powerpc/mm/hash64: Map all the kernel regions in the same 0xc range
[linux-2.6-block.git] / arch / powerpc / mm / tlb-radix.c
CommitLineData
1a472c9d
AK
1/*
2 * TLB flush routines for radix kernels.
3 *
4 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/mm.h>
13#include <linux/hugetlb.h>
14#include <linux/memblock.h>
0cef77c7
NP
15#include <linux/mmu_context.h>
16#include <linux/sched/mm.h>
1a472c9d 17
a25bd72b 18#include <asm/ppc-opcode.h>
1a472c9d
AK
19#include <asm/tlb.h>
20#include <asm/tlbflush.h>
0428491c 21#include <asm/trace.h>
a25bd72b 22#include <asm/cputhreads.h>
1a472c9d 23
36194812
AK
24#define RIC_FLUSH_TLB 0
25#define RIC_FLUSH_PWC 1
26#define RIC_FLUSH_ALL 2
27
d4748276
NP
28/*
29 * tlbiel instruction for radix, set invalidation
30 * i.e., r=1 and is=01 or is=10 or is=11
31 */
32static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
33 unsigned int pid,
34 unsigned int ric, unsigned int prs)
35{
36 unsigned long rb;
37 unsigned long rs;
d4748276
NP
38
39 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
40 rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
41
2675c13b
ME
42 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
43 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
d4748276
NP
44 : "memory");
45}
46
47static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
48{
49 unsigned int set;
50
51 asm volatile("ptesync": : :"memory");
52
53 /*
54 * Flush the first set of the TLB, and the entire Page Walk Cache
55 * and partition table entries. Then flush the remaining sets of the
56 * TLB.
57 */
58 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
59 for (set = 1; set < num_sets; set++)
60 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
61
62 /* Do the same for process scoped entries. */
63 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
64 for (set = 1; set < num_sets; set++)
65 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
66
67 asm volatile("ptesync": : :"memory");
68}
69
70void radix__tlbiel_all(unsigned int action)
71{
72 unsigned int is;
73
74 switch (action) {
75 case TLB_INVAL_SCOPE_GLOBAL:
76 is = 3;
77 break;
78 case TLB_INVAL_SCOPE_LPID:
79 is = 2;
80 break;
81 default:
82 BUG();
83 }
84
85 if (early_cpu_has_feature(CPU_FTR_ARCH_300))
86 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
87 else
88 WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
89
90 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
91}
92
36194812
AK
93static inline void __tlbiel_pid(unsigned long pid, int set,
94 unsigned long ric)
1a472c9d 95{
36194812 96 unsigned long rb,rs,prs,r;
1a472c9d
AK
97
98 rb = PPC_BIT(53); /* IS = 1 */
99 rb |= set << PPC_BITLSHIFT(51);
100 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
101 prs = 1; /* process scoped */
b574df94 102 r = 1; /* radix format */
1a472c9d 103
8cd6d3c2 104 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
1a472c9d 105 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0428491c 106 trace_tlbie(0, 1, rb, rs, ric, prs, r);
1a472c9d
AK
107}
108
0b2f5a8a
NP
109static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
110{
111 unsigned long rb,rs,prs,r;
112
113 rb = PPC_BIT(53); /* IS = 1 */
114 rs = pid << PPC_BITLSHIFT(31);
115 prs = 1; /* process scoped */
b574df94 116 r = 1; /* radix format */
0b2f5a8a
NP
117
118 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
119 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
120 trace_tlbie(0, 0, rb, rs, ric, prs, r);
121}
122
0078778a
NP
123static inline void __tlbiel_lpid(unsigned long lpid, int set,
124 unsigned long ric)
125{
126 unsigned long rb,rs,prs,r;
127
128 rb = PPC_BIT(52); /* IS = 2 */
129 rb |= set << PPC_BITLSHIFT(51);
130 rs = 0; /* LPID comes from LPIDR */
131 prs = 0; /* partition scoped */
132 r = 1; /* radix format */
133
134 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
135 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
136 trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
137}
138
139static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
140{
141 unsigned long rb,rs,prs,r;
142
143 rb = PPC_BIT(52); /* IS = 2 */
144 rs = lpid;
145 prs = 0; /* partition scoped */
146 r = 1; /* radix format */
147
148 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
149 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
150 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
151}
152
153static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
154 unsigned long ric)
155{
156 unsigned long rb,rs,prs,r;
157
158 rb = PPC_BIT(52); /* IS = 2 */
159 rb |= set << PPC_BITLSHIFT(51);
160 rs = 0; /* LPID comes from LPIDR */
161 prs = 1; /* process scoped */
162 r = 1; /* radix format */
163
164 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
165 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
166 trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
167}
168
169
243fee32
AK
170static inline void __tlbiel_va(unsigned long va, unsigned long pid,
171 unsigned long ap, unsigned long ric)
172{
173 unsigned long rb,rs,prs,r;
174
175 rb = va & ~(PPC_BITMASK(52, 63));
176 rb |= ap << PPC_BITLSHIFT(58);
177 rs = pid << PPC_BITLSHIFT(31);
178 prs = 1; /* process scoped */
95dff480 179 r = 1; /* radix format */
243fee32
AK
180
181 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
182 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
183 trace_tlbie(0, 1, rb, rs, ric, prs, r);
184}
185
186static inline void __tlbie_va(unsigned long va, unsigned long pid,
187 unsigned long ap, unsigned long ric)
188{
189 unsigned long rb,rs,prs,r;
190
191 rb = va & ~(PPC_BITMASK(52, 63));
192 rb |= ap << PPC_BITLSHIFT(58);
193 rs = pid << PPC_BITLSHIFT(31);
194 prs = 1; /* process scoped */
95dff480 195 r = 1; /* radix format */
243fee32
AK
196
197 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
198 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
199 trace_tlbie(0, 0, rb, rs, ric, prs, r);
200}
201
0078778a
NP
202static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
203 unsigned long ap, unsigned long ric)
204{
205 unsigned long rb,rs,prs,r;
206
207 rb = va & ~(PPC_BITMASK(52, 63));
208 rb |= ap << PPC_BITLSHIFT(58);
209 rs = lpid;
210 prs = 0; /* partition scoped */
211 r = 1; /* radix format */
212
213 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
214 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
215 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
216}
217
a5d4b589
AK
218static inline void fixup_tlbie(void)
219{
220 unsigned long pid = 0;
221 unsigned long va = ((1UL << 52) - 1);
222
223 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
224 asm volatile("ptesync": : :"memory");
225 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
226 }
227}
228
0078778a
NP
229static inline void fixup_tlbie_lpid(unsigned long lpid)
230{
231 unsigned long va = ((1UL << 52) - 1);
232
233 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
234 asm volatile("ptesync": : :"memory");
235 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
236 }
237}
238
1a472c9d
AK
239/*
240 * We use 128 set in radix mode and 256 set in hpt mode.
241 */
36194812 242static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
1a472c9d
AK
243{
244 int set;
245
f7327e0b 246 asm volatile("ptesync": : :"memory");
a5998fcb
AK
247
248 /*
249 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
250 * also flush the entire Page Walk Cache.
251 */
252 __tlbiel_pid(pid, 0, ric);
253
5ce5fe14
BH
254 /* For PWC, only one flush is needed */
255 if (ric == RIC_FLUSH_PWC) {
256 asm volatile("ptesync": : :"memory");
257 return;
258 }
a5998fcb 259
5ce5fe14 260 /* For the remaining sets, just flush the TLB */
a5998fcb 261 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
5ce5fe14 262 __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
a5998fcb 263
f7327e0b 264 asm volatile("ptesync": : :"memory");
90c1e3c2 265 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
1a472c9d
AK
266}
267
36194812 268static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
1a472c9d 269{
1a472c9d 270 asm volatile("ptesync": : :"memory");
1a472c9d 271
80a4ae20
BH
272 /*
273 * Workaround the fact that the "ric" argument to __tlbie_pid
274 * must be a compile-time contraint to match the "i" constraint
275 * in the asm statement.
276 */
277 switch (ric) {
278 case RIC_FLUSH_TLB:
279 __tlbie_pid(pid, RIC_FLUSH_TLB);
280 break;
281 case RIC_FLUSH_PWC:
282 __tlbie_pid(pid, RIC_FLUSH_PWC);
283 break;
284 case RIC_FLUSH_ALL:
285 default:
286 __tlbie_pid(pid, RIC_FLUSH_ALL);
287 }
a5d4b589 288 fixup_tlbie();
1a472c9d 289 asm volatile("eieio; tlbsync; ptesync": : :"memory");
1a472c9d
AK
290}
291
0078778a
NP
292static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
293{
294 int set;
295
296 VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
297
298 asm volatile("ptesync": : :"memory");
299
300 /*
301 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
302 * also flush the entire Page Walk Cache.
303 */
304 __tlbiel_lpid(lpid, 0, ric);
305
306 /* For PWC, only one flush is needed */
307 if (ric == RIC_FLUSH_PWC) {
308 asm volatile("ptesync": : :"memory");
309 return;
310 }
311
312 /* For the remaining sets, just flush the TLB */
313 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
314 __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB);
315
316 asm volatile("ptesync": : :"memory");
317 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
318}
319
320static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
321{
322 asm volatile("ptesync": : :"memory");
323
324 /*
325 * Workaround the fact that the "ric" argument to __tlbie_pid
326 * must be a compile-time contraint to match the "i" constraint
327 * in the asm statement.
328 */
329 switch (ric) {
330 case RIC_FLUSH_TLB:
331 __tlbie_lpid(lpid, RIC_FLUSH_TLB);
332 break;
333 case RIC_FLUSH_PWC:
334 __tlbie_lpid(lpid, RIC_FLUSH_PWC);
335 break;
336 case RIC_FLUSH_ALL:
337 default:
338 __tlbie_lpid(lpid, RIC_FLUSH_ALL);
339 }
340 fixup_tlbie_lpid(lpid);
341 asm volatile("eieio; tlbsync; ptesync": : :"memory");
342}
343
344static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
345{
346 int set;
347
348 VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
349
350 asm volatile("ptesync": : :"memory");
351
352 /*
353 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
354 * also flush the entire Page Walk Cache.
355 */
356 __tlbiel_lpid_guest(lpid, 0, ric);
357
358 /* For PWC, only one flush is needed */
359 if (ric == RIC_FLUSH_PWC) {
360 asm volatile("ptesync": : :"memory");
361 return;
362 }
363
364 /* For the remaining sets, just flush the TLB */
365 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
366 __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
367
368 asm volatile("ptesync": : :"memory");
053c5a75 369 asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
0078778a
NP
370}
371
372
cbf09c83
NP
373static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
374 unsigned long pid, unsigned long page_size,
375 unsigned long psize)
376{
377 unsigned long addr;
378 unsigned long ap = mmu_get_ap(psize);
379
380 for (addr = start; addr < end; addr += page_size)
381 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
382}
383
14001c60 384static inline void _tlbiel_va(unsigned long va, unsigned long pid,
d665767e 385 unsigned long psize, unsigned long ric)
14001c60 386{
d665767e
NP
387 unsigned long ap = mmu_get_ap(psize);
388
14001c60
NP
389 asm volatile("ptesync": : :"memory");
390 __tlbiel_va(va, pid, ap, ric);
391 asm volatile("ptesync": : :"memory");
392}
393
d665767e
NP
394static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
395 unsigned long pid, unsigned long page_size,
0b2f5a8a 396 unsigned long psize, bool also_pwc)
d665767e 397{
d665767e 398 asm volatile("ptesync": : :"memory");
0b2f5a8a
NP
399 if (also_pwc)
400 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
cbf09c83 401 __tlbiel_va_range(start, end, pid, page_size, psize);
d665767e
NP
402 asm volatile("ptesync": : :"memory");
403}
404
cbf09c83
NP
405static inline void __tlbie_va_range(unsigned long start, unsigned long end,
406 unsigned long pid, unsigned long page_size,
407 unsigned long psize)
408{
409 unsigned long addr;
410 unsigned long ap = mmu_get_ap(psize);
411
412 for (addr = start; addr < end; addr += page_size)
413 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
414}
415
14001c60 416static inline void _tlbie_va(unsigned long va, unsigned long pid,
d665767e 417 unsigned long psize, unsigned long ric)
14001c60 418{
d665767e
NP
419 unsigned long ap = mmu_get_ap(psize);
420
14001c60
NP
421 asm volatile("ptesync": : :"memory");
422 __tlbie_va(va, pid, ap, ric);
a5d4b589 423 fixup_tlbie();
14001c60
NP
424 asm volatile("eieio; tlbsync; ptesync": : :"memory");
425}
426
0078778a
NP
427static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
428 unsigned long psize, unsigned long ric)
429{
430 unsigned long ap = mmu_get_ap(psize);
431
432 asm volatile("ptesync": : :"memory");
433 __tlbie_lpid_va(va, lpid, ap, ric);
434 fixup_tlbie_lpid(lpid);
435 asm volatile("eieio; tlbsync; ptesync": : :"memory");
436}
437
d665767e
NP
438static inline void _tlbie_va_range(unsigned long start, unsigned long end,
439 unsigned long pid, unsigned long page_size,
0b2f5a8a 440 unsigned long psize, bool also_pwc)
d665767e 441{
d665767e 442 asm volatile("ptesync": : :"memory");
0b2f5a8a
NP
443 if (also_pwc)
444 __tlbie_pid(pid, RIC_FLUSH_PWC);
cbf09c83 445 __tlbie_va_range(start, end, pid, page_size, psize);
a5d4b589 446 fixup_tlbie();
d665767e
NP
447 asm volatile("eieio; tlbsync; ptesync": : :"memory");
448}
14001c60 449
1a472c9d
AK
450/*
451 * Base TLB flushing operations:
452 *
453 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
454 * - flush_tlb_page(vma, vmaddr) flushes one page
455 * - flush_tlb_range(vma, start, end) flushes a range of pages
456 * - flush_tlb_kernel_range(start, end) flushes kernel pages
457 *
458 * - local_* variants of page and mm only apply to the current
459 * processor
460 */
461void radix__local_flush_tlb_mm(struct mm_struct *mm)
462{
9690c157 463 unsigned long pid;
1a472c9d
AK
464
465 preempt_disable();
466 pid = mm->context.id;
467 if (pid != MMU_NO_CONTEXT)
a46cc7a9 468 _tlbiel_pid(pid, RIC_FLUSH_TLB);
1a472c9d
AK
469 preempt_enable();
470}
471EXPORT_SYMBOL(radix__local_flush_tlb_mm);
472
a46cc7a9 473#ifndef CONFIG_SMP
6110236b 474void radix__local_flush_all_mm(struct mm_struct *mm)
a145abf1
AK
475{
476 unsigned long pid;
a145abf1
AK
477
478 preempt_disable();
a145abf1
AK
479 pid = mm->context.id;
480 if (pid != MMU_NO_CONTEXT)
a46cc7a9 481 _tlbiel_pid(pid, RIC_FLUSH_ALL);
a145abf1
AK
482 preempt_enable();
483}
6110236b 484EXPORT_SYMBOL(radix__local_flush_all_mm);
a46cc7a9 485#endif /* CONFIG_SMP */
a145abf1 486
f22dfc91 487void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
fbfa26d8 488 int psize)
1a472c9d 489{
9690c157 490 unsigned long pid;
1a472c9d
AK
491
492 preempt_disable();
67730272 493 pid = mm->context.id;
1a472c9d 494 if (pid != MMU_NO_CONTEXT)
d665767e 495 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
1a472c9d
AK
496 preempt_enable();
497}
498
499void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
500{
48483760
AK
501#ifdef CONFIG_HUGETLB_PAGE
502 /* need the return fix for nohash.c */
67730272
ME
503 if (is_vm_hugetlb_page(vma))
504 return radix__local_flush_hugetlb_page(vma, vmaddr);
48483760 505#endif
67730272 506 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
1a472c9d
AK
507}
508EXPORT_SYMBOL(radix__local_flush_tlb_page);
509
0cef77c7
NP
510static bool mm_is_singlethreaded(struct mm_struct *mm)
511{
512 if (atomic_read(&mm->context.copros) > 0)
513 return false;
514 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm)
515 return true;
516 return false;
517}
518
80a4ae20
BH
519static bool mm_needs_flush_escalation(struct mm_struct *mm)
520{
521 /*
522 * P9 nest MMU has issues with the page walk cache
523 * caching PTEs and not flushing them properly when
524 * RIC = 0 for a PID/LPID invalidate
525 */
0cef77c7
NP
526 if (atomic_read(&mm->context.copros) > 0)
527 return true;
528 return false;
80a4ae20
BH
529}
530
1a472c9d 531#ifdef CONFIG_SMP
0cef77c7
NP
532static void do_exit_flush_lazy_tlb(void *arg)
533{
534 struct mm_struct *mm = arg;
535 unsigned long pid = mm->context.id;
536
537 if (current->mm == mm)
538 return; /* Local CPU */
539
540 if (current->active_mm == mm) {
541 /*
542 * Must be a kernel thread because sender is single-threaded.
543 */
544 BUG_ON(current->mm);
545 mmgrab(&init_mm);
546 switch_mm(mm, &init_mm, current);
547 current->active_mm = &init_mm;
548 mmdrop(mm);
549 }
550 _tlbiel_pid(pid, RIC_FLUSH_ALL);
551}
552
553static void exit_flush_lazy_tlbs(struct mm_struct *mm)
554{
555 /*
556 * Would be nice if this was async so it could be run in
557 * parallel with our local flush, but generic code does not
558 * give a good API for it. Could extend the generic code or
559 * make a special powerpc IPI for flushing TLBs.
560 * For now it's not too performance critical.
561 */
562 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
563 (void *)mm, 1);
564 mm_reset_thread_local(mm);
565}
566
1a472c9d
AK
567void radix__flush_tlb_mm(struct mm_struct *mm)
568{
9690c157 569 unsigned long pid;
1a472c9d 570
1a472c9d
AK
571 pid = mm->context.id;
572 if (unlikely(pid == MMU_NO_CONTEXT))
dffe8449 573 return;
1a472c9d 574
dffe8449 575 preempt_disable();
85bcfaf6
NP
576 /*
577 * Order loads of mm_cpumask vs previous stores to clear ptes before
578 * the invalidate. See barrier in switch_mm_irqs_off
579 */
580 smp_mb();
80a4ae20 581 if (!mm_is_thread_local(mm)) {
0cef77c7
NP
582 if (unlikely(mm_is_singlethreaded(mm))) {
583 exit_flush_lazy_tlbs(mm);
584 goto local;
585 }
586
80a4ae20
BH
587 if (mm_needs_flush_escalation(mm))
588 _tlbie_pid(pid, RIC_FLUSH_ALL);
589 else
590 _tlbie_pid(pid, RIC_FLUSH_TLB);
0cef77c7
NP
591 } else {
592local:
a46cc7a9 593 _tlbiel_pid(pid, RIC_FLUSH_TLB);
0cef77c7 594 }
1a472c9d
AK
595 preempt_enable();
596}
597EXPORT_SYMBOL(radix__flush_tlb_mm);
598
0cef77c7 599static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
a145abf1
AK
600{
601 unsigned long pid;
a145abf1 602
a145abf1
AK
603 pid = mm->context.id;
604 if (unlikely(pid == MMU_NO_CONTEXT))
dffe8449 605 return;
a145abf1 606
dffe8449 607 preempt_disable();
85bcfaf6 608 smp_mb(); /* see radix__flush_tlb_mm */
0cef77c7
NP
609 if (!mm_is_thread_local(mm)) {
610 if (unlikely(mm_is_singlethreaded(mm))) {
611 if (!fullmm) {
612 exit_flush_lazy_tlbs(mm);
613 goto local;
614 }
615 }
a46cc7a9 616 _tlbie_pid(pid, RIC_FLUSH_ALL);
0cef77c7
NP
617 } else {
618local:
a46cc7a9 619 _tlbiel_pid(pid, RIC_FLUSH_ALL);
0cef77c7 620 }
a145abf1
AK
621 preempt_enable();
622}
0cef77c7
NP
623void radix__flush_all_mm(struct mm_struct *mm)
624{
625 __flush_all_mm(mm, false);
626}
6110236b 627EXPORT_SYMBOL(radix__flush_all_mm);
a46cc7a9
BH
628
629void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
630{
631 tlb->need_flush_all = 1;
632}
a145abf1
AK
633EXPORT_SYMBOL(radix__flush_tlb_pwc);
634
f22dfc91 635void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
fbfa26d8 636 int psize)
1a472c9d 637{
9690c157 638 unsigned long pid;
1a472c9d 639
67730272 640 pid = mm->context.id;
1a472c9d 641 if (unlikely(pid == MMU_NO_CONTEXT))
dffe8449
NP
642 return;
643
644 preempt_disable();
85bcfaf6 645 smp_mb(); /* see radix__flush_tlb_mm */
0cef77c7
NP
646 if (!mm_is_thread_local(mm)) {
647 if (unlikely(mm_is_singlethreaded(mm))) {
648 exit_flush_lazy_tlbs(mm);
649 goto local;
650 }
d665767e 651 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
0cef77c7
NP
652 } else {
653local:
d665767e 654 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
0cef77c7 655 }
1a472c9d
AK
656 preempt_enable();
657}
658
659void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
660{
48483760 661#ifdef CONFIG_HUGETLB_PAGE
67730272
ME
662 if (is_vm_hugetlb_page(vma))
663 return radix__flush_hugetlb_page(vma, vmaddr);
48483760 664#endif
67730272 665 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
1a472c9d
AK
666}
667EXPORT_SYMBOL(radix__flush_tlb_page);
668
a46cc7a9
BH
669#else /* CONFIG_SMP */
670#define radix__flush_all_mm radix__local_flush_all_mm
1a472c9d
AK
671#endif /* CONFIG_SMP */
672
673void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
674{
36194812 675 _tlbie_pid(0, RIC_FLUSH_ALL);
1a472c9d
AK
676}
677EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
678
cbf09c83
NP
679#define TLB_FLUSH_ALL -1UL
680
1a472c9d 681/*
cbf09c83
NP
682 * Number of pages above which we invalidate the entire PID rather than
683 * flush individual pages, for local and global flushes respectively.
684 *
685 * tlbie goes out to the interconnect and individual ops are more costly.
686 * It also does not iterate over sets like the local tlbiel variant when
687 * invalidating a full PID, so it has a far lower threshold to change from
688 * individual page flushes to full-pid flushes.
1a472c9d 689 */
cbf09c83 690static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
f6f27951 691static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
cbf09c83 692
02390f66
NP
693static inline void __radix__flush_tlb_range(struct mm_struct *mm,
694 unsigned long start, unsigned long end,
695 bool flush_all_sizes)
1a472c9d
AK
696
697{
cbf09c83
NP
698 unsigned long pid;
699 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
700 unsigned long page_size = 1UL << page_shift;
701 unsigned long nr_pages = (end - start) >> page_shift;
702 bool local, full;
a46cc7a9 703
cbf09c83
NP
704 pid = mm->context.id;
705 if (unlikely(pid == MMU_NO_CONTEXT))
706 return;
707
708 preempt_disable();
85bcfaf6 709 smp_mb(); /* see radix__flush_tlb_mm */
0cef77c7
NP
710 if (!mm_is_thread_local(mm)) {
711 if (unlikely(mm_is_singlethreaded(mm))) {
712 if (end != TLB_FLUSH_ALL) {
713 exit_flush_lazy_tlbs(mm);
714 goto is_local;
715 }
716 }
f6f27951
NP
717 local = false;
718 full = (end == TLB_FLUSH_ALL ||
719 nr_pages > tlb_single_page_flush_ceiling);
0cef77c7
NP
720 } else {
721is_local:
722 local = true;
723 full = (end == TLB_FLUSH_ALL ||
724 nr_pages > tlb_local_single_page_flush_ceiling);
f6f27951 725 }
cbf09c83
NP
726
727 if (full) {
80a4ae20 728 if (local) {
cbf09c83 729 _tlbiel_pid(pid, RIC_FLUSH_TLB);
80a4ae20
BH
730 } else {
731 if (mm_needs_flush_escalation(mm))
732 _tlbie_pid(pid, RIC_FLUSH_ALL);
733 else
734 _tlbie_pid(pid, RIC_FLUSH_TLB);
735 }
cbf09c83 736 } else {
02390f66
NP
737 bool hflush = flush_all_sizes;
738 bool gflush = flush_all_sizes;
cbf09c83 739 unsigned long hstart, hend;
02390f66 740 unsigned long gstart, gend;
cbf09c83 741
02390f66 742 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
cbf09c83 743 hflush = true;
02390f66
NP
744
745 if (hflush) {
746 hstart = (start + PMD_SIZE - 1) & PMD_MASK;
747 hend = end & PMD_MASK;
748 if (hstart == hend)
749 hflush = false;
750 }
751
752 if (gflush) {
753 gstart = (start + PUD_SIZE - 1) & PUD_MASK;
754 gend = end & PUD_MASK;
755 if (gstart == gend)
756 gflush = false;
cbf09c83 757 }
cbf09c83
NP
758
759 asm volatile("ptesync": : :"memory");
760 if (local) {
761 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
762 if (hflush)
763 __tlbiel_va_range(hstart, hend, pid,
02390f66
NP
764 PMD_SIZE, MMU_PAGE_2M);
765 if (gflush)
766 __tlbiel_va_range(gstart, gend, pid,
767 PUD_SIZE, MMU_PAGE_1G);
cbf09c83
NP
768 asm volatile("ptesync": : :"memory");
769 } else {
770 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
771 if (hflush)
772 __tlbie_va_range(hstart, hend, pid,
02390f66
NP
773 PMD_SIZE, MMU_PAGE_2M);
774 if (gflush)
775 __tlbie_va_range(gstart, gend, pid,
776 PUD_SIZE, MMU_PAGE_1G);
a5d4b589 777 fixup_tlbie();
cbf09c83
NP
778 asm volatile("eieio; tlbsync; ptesync": : :"memory");
779 }
780 }
781 preempt_enable();
1a472c9d 782}
02390f66
NP
783
784void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
785 unsigned long end)
786
787{
788#ifdef CONFIG_HUGETLB_PAGE
789 if (is_vm_hugetlb_page(vma))
790 return radix__flush_hugetlb_tlb_range(vma, start, end);
791#endif
792
793 __radix__flush_tlb_range(vma->vm_mm, start, end, false);
794}
1a472c9d
AK
795EXPORT_SYMBOL(radix__flush_tlb_range);
796
912cc87a
AK
797static int radix_get_mmu_psize(int page_size)
798{
799 int psize;
800
801 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
802 psize = mmu_virtual_psize;
803 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
804 psize = MMU_PAGE_2M;
805 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
806 psize = MMU_PAGE_1G;
807 else
808 return -1;
809 return psize;
810}
1a472c9d 811
0078778a
NP
812/*
813 * Flush partition scoped LPID address translation for all CPUs.
814 */
815void radix__flush_tlb_lpid_page(unsigned int lpid,
816 unsigned long addr,
817 unsigned long page_size)
818{
819 int psize = radix_get_mmu_psize(page_size);
820
821 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
822}
823EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
824
825/*
826 * Flush partition scoped PWC from LPID for all CPUs.
827 */
828void radix__flush_pwc_lpid(unsigned int lpid)
829{
830 _tlbie_lpid(lpid, RIC_FLUSH_PWC);
831}
832EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
833
fd10be25
SJS
834/*
835 * Flush partition scoped translations from LPID (=LPIDR)
836 */
837void radix__flush_tlb_lpid(unsigned int lpid)
838{
839 _tlbie_lpid(lpid, RIC_FLUSH_ALL);
840}
841EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
842
0078778a
NP
843/*
844 * Flush partition scoped translations from LPID (=LPIDR)
845 */
846void radix__local_flush_tlb_lpid(unsigned int lpid)
847{
848 _tlbiel_lpid(lpid, RIC_FLUSH_ALL);
849}
850EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid);
851
852/*
853 * Flush process scoped translations from LPID (=LPIDR).
854 * Important difference, the guest normally manages its own translations,
855 * but some cases e.g., vCPU CPU migration require KVM to flush.
856 */
857void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
858{
859 _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL);
860}
861EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest);
862
863
0b2f5a8a
NP
864static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
865 unsigned long end, int psize);
866
1a472c9d
AK
867void radix__tlb_flush(struct mmu_gather *tlb)
868{
8cb8140c 869 int psize = 0;
1a472c9d 870 struct mm_struct *mm = tlb->mm;
8cb8140c 871 int page_size = tlb->page_size;
02390f66
NP
872 unsigned long start = tlb->start;
873 unsigned long end = tlb->end;
8cb8140c 874
8cb8140c
AK
875 /*
876 * if page size is not something we understand, do a full mm flush
30b49ec7
NP
877 *
878 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
879 * that flushes the process table entry cache upon process teardown.
880 * See the comment for radix in arch_exit_mmap().
8cb8140c 881 */
0b2f5a8a 882 if (tlb->fullmm) {
0cef77c7 883 __flush_all_mm(mm, true);
02390f66
NP
884#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
885 } else if (mm_tlb_flush_nested(mm)) {
886 /*
887 * If there is a concurrent invalidation that is clearing ptes,
888 * then it's possible this invalidation will miss one of those
889 * cleared ptes and miss flushing the TLB. If this invalidate
890 * returns before the other one flushes TLBs, that can result
891 * in it returning while there are still valid TLBs inside the
892 * range to be invalidated.
893 *
894 * See mm/memory.c:tlb_finish_mmu() for more details.
895 *
896 * The solution to this is ensure the entire range is always
897 * flushed here. The problem for powerpc is that the flushes
898 * are page size specific, so this "forced flush" would not
899 * do the right thing if there are a mix of page sizes in
900 * the range to be invalidated. So use __flush_tlb_range
901 * which invalidates all possible page sizes in the range.
902 *
903 * PWC flush probably is not be required because the core code
904 * shouldn't free page tables in this path, but accounting
905 * for the possibility makes us a bit more robust.
906 *
907 * need_flush_all is an uncommon case because page table
908 * teardown should be done with exclusive locks held (but
909 * after locks are dropped another invalidate could come
910 * in), it could be optimized further if necessary.
911 */
912 if (!tlb->need_flush_all)
913 __radix__flush_tlb_range(mm, start, end, true);
914 else
915 radix__flush_all_mm(mm);
916#endif
0b2f5a8a
NP
917 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
918 if (!tlb->need_flush_all)
919 radix__flush_tlb_mm(mm);
920 else
921 radix__flush_all_mm(mm);
922 } else {
0b2f5a8a
NP
923 if (!tlb->need_flush_all)
924 radix__flush_tlb_range_psize(mm, start, end, psize);
925 else
926 radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
927 }
928 tlb->need_flush_all = 0;
8cb8140c
AK
929}
930
0b2f5a8a
NP
931static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
932 unsigned long start, unsigned long end,
933 int psize, bool also_pwc)
8cb8140c
AK
934{
935 unsigned long pid;
cbf09c83
NP
936 unsigned int page_shift = mmu_psize_defs[psize].shift;
937 unsigned long page_size = 1UL << page_shift;
938 unsigned long nr_pages = (end - start) >> page_shift;
939 bool local, full;
8cb8140c 940
67730272 941 pid = mm->context.id;
8cb8140c 942 if (unlikely(pid == MMU_NO_CONTEXT))
dffe8449 943 return;
8cb8140c 944
dffe8449 945 preempt_disable();
85bcfaf6 946 smp_mb(); /* see radix__flush_tlb_mm */
0cef77c7
NP
947 if (!mm_is_thread_local(mm)) {
948 if (unlikely(mm_is_singlethreaded(mm))) {
949 if (end != TLB_FLUSH_ALL) {
950 exit_flush_lazy_tlbs(mm);
951 goto is_local;
952 }
953 }
f6f27951
NP
954 local = false;
955 full = (end == TLB_FLUSH_ALL ||
956 nr_pages > tlb_single_page_flush_ceiling);
0cef77c7
NP
957 } else {
958is_local:
959 local = true;
960 full = (end == TLB_FLUSH_ALL ||
961 nr_pages > tlb_local_single_page_flush_ceiling);
f6f27951 962 }
cbf09c83
NP
963
964 if (full) {
0cef77c7 965 if (local) {
0b2f5a8a 966 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
0cef77c7
NP
967 } else {
968 if (mm_needs_flush_escalation(mm))
969 also_pwc = true;
970
971 _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
972 }
dffe8449 973 } else {
14001c60 974 if (local)
0b2f5a8a 975 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
14001c60 976 else
0b2f5a8a 977 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
8cb8140c 978 }
8cb8140c 979 preempt_enable();
1a472c9d 980}
912cc87a 981
0b2f5a8a
NP
982void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
983 unsigned long end, int psize)
984{
985 return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
986}
987
988static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
989 unsigned long end, int psize)
990{
991 __radix__flush_tlb_range_psize(mm, start, end, psize, true);
992}
993
424de9c6
BH
994#ifdef CONFIG_TRANSPARENT_HUGEPAGE
995void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
996{
424de9c6 997 unsigned long pid, end;
424de9c6 998
67730272 999 pid = mm->context.id;
424de9c6 1000 if (unlikely(pid == MMU_NO_CONTEXT))
dffe8449 1001 return;
424de9c6
BH
1002
1003 /* 4k page size, just blow the world */
1004 if (PAGE_SIZE == 0x1000) {
1005 radix__flush_all_mm(mm);
1006 return;
1007 }
1008
cbf09c83
NP
1009 end = addr + HPAGE_PMD_SIZE;
1010
1011 /* Otherwise first do the PWC, then iterate the pages. */
dffe8449 1012 preempt_disable();
85bcfaf6 1013 smp_mb(); /* see radix__flush_tlb_mm */
0cef77c7
NP
1014 if (!mm_is_thread_local(mm)) {
1015 if (unlikely(mm_is_singlethreaded(mm))) {
1016 exit_flush_lazy_tlbs(mm);
1017 goto local;
1018 }
0b2f5a8a 1019 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
0cef77c7
NP
1020 } else {
1021local:
1022 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
cbf09c83 1023 }
14001c60 1024
424de9c6
BH
1025 preempt_enable();
1026}
1027#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1028
d8e91e93
AK
1029void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1030 unsigned long start, unsigned long end)
1031{
1032 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1033}
1034EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
be34d300
AK
1035
1036void radix__flush_tlb_all(void)
1037{
1038 unsigned long rb,prs,r,rs;
1039 unsigned long ric = RIC_FLUSH_ALL;
1040
1041 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1042 prs = 0; /* partition scoped */
b574df94 1043 r = 1; /* radix format */
be34d300
AK
1044 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1045
1046 asm volatile("ptesync": : :"memory");
1047 /*
1048 * now flush guest entries by passing PRS = 1 and LPID != 0
1049 */
1050 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1051 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1052 /*
1053 * now flush host entires by passing PRS = 0 and LPID == 0
1054 */
1055 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1056 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1057 asm volatile("eieio; tlbsync; ptesync": : :"memory");
1058}
6d3a0379 1059
a25bd72b
BH
1060#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1061extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
1062{
19e68b2a 1063 unsigned long pid = mm->context.id;
a25bd72b
BH
1064
1065 if (unlikely(pid == MMU_NO_CONTEXT))
1066 return;
1067
1068 /*
1069 * If this context hasn't run on that CPU before and KVM is
1070 * around, there's a slim chance that the guest on another
1071 * CPU just brought in obsolete translation into the TLB of
1072 * this CPU due to a bad prefetch using the guest PID on
1073 * the way into the hypervisor.
1074 *
1075 * We work around this here. If KVM is possible, we check if
1076 * any sibling thread is in KVM. If it is, the window may exist
1077 * and thus we flush that PID from the core.
1078 *
1079 * A potential future improvement would be to mark which PIDs
1080 * have never been used on the system and avoid it if the PID
1081 * is new and the process has no other cpumask bit set.
1082 */
1083 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
1084 int cpu = smp_processor_id();
1085 int sib = cpu_first_thread_sibling(cpu);
1086 bool flush = false;
1087
1088 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
1089 if (sib == cpu)
1090 continue;
758380b8
NP
1091 if (!cpu_possible(sib))
1092 continue;
d2e60075 1093 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
a25bd72b
BH
1094 flush = true;
1095 }
1096 if (flush)
1097 _tlbiel_pid(pid, RIC_FLUSH_ALL);
1098 }
1099}
1100EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
1101#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */