| 1 | /* |
| 2 | * PowerPC64 SLB support. |
| 3 | * |
| 4 | * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM |
| 5 | * Based on earlier code written by: |
| 6 | * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com |
| 7 | * Copyright (c) 2001 Dave Engebretsen |
| 8 | * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM |
| 9 | * |
| 10 | * |
| 11 | * This program is free software; you can redistribute it and/or |
| 12 | * modify it under the terms of the GNU General Public License |
| 13 | * as published by the Free Software Foundation; either version |
| 14 | * 2 of the License, or (at your option) any later version. |
| 15 | */ |
| 16 | |
| 17 | #include <asm/pgtable.h> |
| 18 | #include <asm/mmu.h> |
| 19 | #include <asm/mmu_context.h> |
| 20 | #include <asm/paca.h> |
| 21 | #include <asm/cputable.h> |
| 22 | #include <asm/cacheflush.h> |
| 23 | #include <asm/smp.h> |
| 24 | #include <linux/compiler.h> |
| 25 | #include <linux/context_tracking.h> |
| 26 | #include <linux/mm_types.h> |
| 27 | |
| 28 | #include <asm/udbg.h> |
| 29 | #include <asm/code-patching.h> |
| 30 | |
| 31 | enum slb_index { |
| 32 | LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ |
| 33 | VMALLOC_INDEX = 1, /* Kernel virtual map (0xd000000000000000) */ |
| 34 | KSTACK_INDEX = 2, /* Kernel stack map */ |
| 35 | }; |
| 36 | |
| 37 | extern void slb_allocate(unsigned long ea); |
| 38 | |
| 39 | #define slb_esid_mask(ssize) \ |
| 40 | (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) |
| 41 | |
| 42 | static inline unsigned long mk_esid_data(unsigned long ea, int ssize, |
| 43 | enum slb_index index) |
| 44 | { |
| 45 | return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; |
| 46 | } |
| 47 | |
| 48 | static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, |
| 49 | unsigned long flags) |
| 50 | { |
| 51 | return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags | |
| 52 | ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); |
| 53 | } |
| 54 | |
| 55 | static inline void slb_shadow_update(unsigned long ea, int ssize, |
| 56 | unsigned long flags, |
| 57 | enum slb_index index) |
| 58 | { |
| 59 | struct slb_shadow *p = get_slb_shadow(); |
| 60 | |
| 61 | /* |
| 62 | * Clear the ESID first so the entry is not valid while we are |
| 63 | * updating it. No write barriers are needed here, provided |
| 64 | * we only update the current CPU's SLB shadow buffer. |
| 65 | */ |
| 66 | WRITE_ONCE(p->save_area[index].esid, 0); |
| 67 | WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags))); |
| 68 | WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index))); |
| 69 | } |
| 70 | |
| 71 | static inline void slb_shadow_clear(enum slb_index index) |
| 72 | { |
| 73 | WRITE_ONCE(get_slb_shadow()->save_area[index].esid, 0); |
| 74 | } |
| 75 | |
| 76 | static inline void create_shadowed_slbe(unsigned long ea, int ssize, |
| 77 | unsigned long flags, |
| 78 | enum slb_index index) |
| 79 | { |
| 80 | /* |
| 81 | * Updating the shadow buffer before writing the SLB ensures |
| 82 | * we don't get a stale entry here if we get preempted by PHYP |
| 83 | * between these two statements. |
| 84 | */ |
| 85 | slb_shadow_update(ea, ssize, flags, index); |
| 86 | |
| 87 | asm volatile("slbmte %0,%1" : |
| 88 | : "r" (mk_vsid_data(ea, ssize, flags)), |
| 89 | "r" (mk_esid_data(ea, ssize, index)) |
| 90 | : "memory" ); |
| 91 | } |
| 92 | |
| 93 | static void __slb_flush_and_rebolt(void) |
| 94 | { |
| 95 | /* If you change this make sure you change SLB_NUM_BOLTED |
| 96 | * and PR KVM appropriately too. */ |
| 97 | unsigned long linear_llp, vmalloc_llp, lflags, vflags; |
| 98 | unsigned long ksp_esid_data, ksp_vsid_data; |
| 99 | |
| 100 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; |
| 101 | vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; |
| 102 | lflags = SLB_VSID_KERNEL | linear_llp; |
| 103 | vflags = SLB_VSID_KERNEL | vmalloc_llp; |
| 104 | |
| 105 | ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, KSTACK_INDEX); |
| 106 | if ((ksp_esid_data & ~0xfffffffUL) <= PAGE_OFFSET) { |
| 107 | ksp_esid_data &= ~SLB_ESID_V; |
| 108 | ksp_vsid_data = 0; |
| 109 | slb_shadow_clear(KSTACK_INDEX); |
| 110 | } else { |
| 111 | /* Update stack entry; others don't change */ |
| 112 | slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, KSTACK_INDEX); |
| 113 | ksp_vsid_data = |
| 114 | be64_to_cpu(get_slb_shadow()->save_area[KSTACK_INDEX].vsid); |
| 115 | } |
| 116 | |
| 117 | /* We need to do this all in asm, so we're sure we don't touch |
| 118 | * the stack between the slbia and rebolting it. */ |
| 119 | asm volatile("isync\n" |
| 120 | "slbia\n" |
| 121 | /* Slot 1 - first VMALLOC segment */ |
| 122 | "slbmte %0,%1\n" |
| 123 | /* Slot 2 - kernel stack */ |
| 124 | "slbmte %2,%3\n" |
| 125 | "isync" |
| 126 | :: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, vflags)), |
| 127 | "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, VMALLOC_INDEX)), |
| 128 | "r"(ksp_vsid_data), |
| 129 | "r"(ksp_esid_data) |
| 130 | : "memory"); |
| 131 | } |
| 132 | |
| 133 | void slb_flush_and_rebolt(void) |
| 134 | { |
| 135 | |
| 136 | WARN_ON(!irqs_disabled()); |
| 137 | |
| 138 | /* |
| 139 | * We can't take a PMU exception in the following code, so hard |
| 140 | * disable interrupts. |
| 141 | */ |
| 142 | hard_irq_disable(); |
| 143 | |
| 144 | __slb_flush_and_rebolt(); |
| 145 | get_paca()->slb_cache_ptr = 0; |
| 146 | } |
| 147 | |
| 148 | void slb_vmalloc_update(void) |
| 149 | { |
| 150 | unsigned long vflags; |
| 151 | |
| 152 | vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp; |
| 153 | slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX); |
| 154 | slb_flush_and_rebolt(); |
| 155 | } |
| 156 | |
| 157 | /* Helper function to compare esids. There are four cases to handle. |
| 158 | * 1. The system is not 1T segment size capable. Use the GET_ESID compare. |
| 159 | * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare. |
| 160 | * 3. The system is 1T capable, only one of the two addresses is > 1T. This is not a match. |
| 161 | * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare. |
| 162 | */ |
| 163 | static inline int esids_match(unsigned long addr1, unsigned long addr2) |
| 164 | { |
| 165 | int esid_1t_count; |
| 166 | |
| 167 | /* System is not 1T segment size capable. */ |
| 168 | if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) |
| 169 | return (GET_ESID(addr1) == GET_ESID(addr2)); |
| 170 | |
| 171 | esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) + |
| 172 | ((addr2 >> SID_SHIFT_1T) != 0)); |
| 173 | |
| 174 | /* both addresses are < 1T */ |
| 175 | if (esid_1t_count == 0) |
| 176 | return (GET_ESID(addr1) == GET_ESID(addr2)); |
| 177 | |
| 178 | /* One address < 1T, the other > 1T. Not a match */ |
| 179 | if (esid_1t_count == 1) |
| 180 | return 0; |
| 181 | |
| 182 | /* Both addresses are > 1T. */ |
| 183 | return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2)); |
| 184 | } |
| 185 | |
| 186 | /* Flush all user entries from the segment table of the current processor. */ |
| 187 | void switch_slb(struct task_struct *tsk, struct mm_struct *mm) |
| 188 | { |
| 189 | unsigned long offset; |
| 190 | unsigned long slbie_data = 0; |
| 191 | unsigned long pc = KSTK_EIP(tsk); |
| 192 | unsigned long stack = KSTK_ESP(tsk); |
| 193 | unsigned long exec_base; |
| 194 | |
| 195 | /* |
| 196 | * We need interrupts hard-disabled here, not just soft-disabled, |
| 197 | * so that a PMU interrupt can't occur, which might try to access |
| 198 | * user memory (to get a stack trace) and possible cause an SLB miss |
| 199 | * which would update the slb_cache/slb_cache_ptr fields in the PACA. |
| 200 | */ |
| 201 | hard_irq_disable(); |
| 202 | offset = get_paca()->slb_cache_ptr; |
| 203 | if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && |
| 204 | offset <= SLB_CACHE_ENTRIES) { |
| 205 | int i; |
| 206 | asm volatile("isync" : : : "memory"); |
| 207 | for (i = 0; i < offset; i++) { |
| 208 | slbie_data = (unsigned long)get_paca()->slb_cache[i] |
| 209 | << SID_SHIFT; /* EA */ |
| 210 | slbie_data |= user_segment_size(slbie_data) |
| 211 | << SLBIE_SSIZE_SHIFT; |
| 212 | slbie_data |= SLBIE_C; /* C set for user addresses */ |
| 213 | asm volatile("slbie %0" : : "r" (slbie_data)); |
| 214 | } |
| 215 | asm volatile("isync" : : : "memory"); |
| 216 | } else { |
| 217 | __slb_flush_and_rebolt(); |
| 218 | } |
| 219 | |
| 220 | /* Workaround POWER5 < DD2.1 issue */ |
| 221 | if (offset == 1 || offset > SLB_CACHE_ENTRIES) |
| 222 | asm volatile("slbie %0" : : "r" (slbie_data)); |
| 223 | |
| 224 | get_paca()->slb_cache_ptr = 0; |
| 225 | copy_mm_to_paca(mm); |
| 226 | |
| 227 | /* |
| 228 | * preload some userspace segments into the SLB. |
| 229 | * Almost all 32 and 64bit PowerPC executables are linked at |
| 230 | * 0x10000000 so it makes sense to preload this segment. |
| 231 | */ |
| 232 | exec_base = 0x10000000; |
| 233 | |
| 234 | if (is_kernel_addr(pc) || is_kernel_addr(stack) || |
| 235 | is_kernel_addr(exec_base)) |
| 236 | return; |
| 237 | |
| 238 | slb_allocate(pc); |
| 239 | |
| 240 | if (!esids_match(pc, stack)) |
| 241 | slb_allocate(stack); |
| 242 | |
| 243 | if (!esids_match(pc, exec_base) && |
| 244 | !esids_match(stack, exec_base)) |
| 245 | slb_allocate(exec_base); |
| 246 | } |
| 247 | |
| 248 | static inline void patch_slb_encoding(unsigned int *insn_addr, |
| 249 | unsigned int immed) |
| 250 | { |
| 251 | |
| 252 | /* |
| 253 | * This function patches either an li or a cmpldi instruction with |
| 254 | * a new immediate value. This relies on the fact that both li |
| 255 | * (which is actually addi) and cmpldi both take a 16-bit immediate |
| 256 | * value, and it is situated in the same location in the instruction, |
| 257 | * ie. bits 16-31 (Big endian bit order) or the lower 16 bits. |
| 258 | * The signedness of the immediate operand differs between the two |
| 259 | * instructions however this code is only ever patching a small value, |
| 260 | * much less than 1 << 15, so we can get away with it. |
| 261 | * To patch the value we read the existing instruction, clear the |
| 262 | * immediate value, and or in our new value, then write the instruction |
| 263 | * back. |
| 264 | */ |
| 265 | unsigned int insn = (*insn_addr & 0xffff0000) | immed; |
| 266 | patch_instruction(insn_addr, insn); |
| 267 | } |
| 268 | |
| 269 | extern u32 slb_miss_kernel_load_linear[]; |
| 270 | extern u32 slb_miss_kernel_load_io[]; |
| 271 | extern u32 slb_compare_rr_to_size[]; |
| 272 | extern u32 slb_miss_kernel_load_vmemmap[]; |
| 273 | |
| 274 | void slb_set_size(u16 size) |
| 275 | { |
| 276 | if (mmu_slb_size == size) |
| 277 | return; |
| 278 | |
| 279 | mmu_slb_size = size; |
| 280 | patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size); |
| 281 | } |
| 282 | |
| 283 | void slb_initialize(void) |
| 284 | { |
| 285 | unsigned long linear_llp, vmalloc_llp, io_llp; |
| 286 | unsigned long lflags, vflags; |
| 287 | static int slb_encoding_inited; |
| 288 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
| 289 | unsigned long vmemmap_llp; |
| 290 | #endif |
| 291 | |
| 292 | /* Prepare our SLB miss handler based on our page size */ |
| 293 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; |
| 294 | io_llp = mmu_psize_defs[mmu_io_psize].sllp; |
| 295 | vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; |
| 296 | get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp; |
| 297 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
| 298 | vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp; |
| 299 | #endif |
| 300 | if (!slb_encoding_inited) { |
| 301 | slb_encoding_inited = 1; |
| 302 | patch_slb_encoding(slb_miss_kernel_load_linear, |
| 303 | SLB_VSID_KERNEL | linear_llp); |
| 304 | patch_slb_encoding(slb_miss_kernel_load_io, |
| 305 | SLB_VSID_KERNEL | io_llp); |
| 306 | patch_slb_encoding(slb_compare_rr_to_size, |
| 307 | mmu_slb_size); |
| 308 | |
| 309 | pr_devel("SLB: linear LLP = %04lx\n", linear_llp); |
| 310 | pr_devel("SLB: io LLP = %04lx\n", io_llp); |
| 311 | |
| 312 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
| 313 | patch_slb_encoding(slb_miss_kernel_load_vmemmap, |
| 314 | SLB_VSID_KERNEL | vmemmap_llp); |
| 315 | pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp); |
| 316 | #endif |
| 317 | } |
| 318 | |
| 319 | get_paca()->stab_rr = SLB_NUM_BOLTED; |
| 320 | |
| 321 | lflags = SLB_VSID_KERNEL | linear_llp; |
| 322 | vflags = SLB_VSID_KERNEL | vmalloc_llp; |
| 323 | |
| 324 | /* Invalidate the entire SLB (even entry 0) & all the ERATS */ |
| 325 | asm volatile("isync":::"memory"); |
| 326 | asm volatile("slbmte %0,%0"::"r" (0) : "memory"); |
| 327 | asm volatile("isync; slbia; isync":::"memory"); |
| 328 | create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX); |
| 329 | create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX); |
| 330 | |
| 331 | /* For the boot cpu, we're running on the stack in init_thread_union, |
| 332 | * which is in the first segment of the linear mapping, and also |
| 333 | * get_paca()->kstack hasn't been initialized yet. |
| 334 | * For secondary cpus, we need to bolt the kernel stack entry now. |
| 335 | */ |
| 336 | slb_shadow_clear(KSTACK_INDEX); |
| 337 | if (raw_smp_processor_id() != boot_cpuid && |
| 338 | (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET) |
| 339 | create_shadowed_slbe(get_paca()->kstack, |
| 340 | mmu_kernel_ssize, lflags, KSTACK_INDEX); |
| 341 | |
| 342 | asm volatile("isync":::"memory"); |
| 343 | } |
| 344 | |
| 345 | static void insert_slb_entry(unsigned long vsid, unsigned long ea, |
| 346 | int bpsize, int ssize) |
| 347 | { |
| 348 | unsigned long flags, vsid_data, esid_data; |
| 349 | enum slb_index index; |
| 350 | int slb_cache_index; |
| 351 | |
| 352 | /* |
| 353 | * We are irq disabled, hence should be safe to access PACA. |
| 354 | */ |
| 355 | VM_WARN_ON(!irqs_disabled()); |
| 356 | |
| 357 | /* |
| 358 | * We can't take a PMU exception in the following code, so hard |
| 359 | * disable interrupts. |
| 360 | */ |
| 361 | hard_irq_disable(); |
| 362 | |
| 363 | index = get_paca()->stab_rr; |
| 364 | |
| 365 | /* |
| 366 | * simple round-robin replacement of slb starting at SLB_NUM_BOLTED. |
| 367 | */ |
| 368 | if (index < (mmu_slb_size - 1)) |
| 369 | index++; |
| 370 | else |
| 371 | index = SLB_NUM_BOLTED; |
| 372 | |
| 373 | get_paca()->stab_rr = index; |
| 374 | |
| 375 | flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; |
| 376 | vsid_data = (vsid << slb_vsid_shift(ssize)) | flags | |
| 377 | ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); |
| 378 | esid_data = mk_esid_data(ea, ssize, index); |
| 379 | |
| 380 | /* |
| 381 | * No need for an isync before or after this slbmte. The exception |
| 382 | * we enter with and the rfid we exit with are context synchronizing. |
| 383 | * Also we only handle user segments here. |
| 384 | */ |
| 385 | asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data) |
| 386 | : "memory"); |
| 387 | |
| 388 | /* |
| 389 | * Now update slb cache entries |
| 390 | */ |
| 391 | slb_cache_index = get_paca()->slb_cache_ptr; |
| 392 | if (slb_cache_index < SLB_CACHE_ENTRIES) { |
| 393 | /* |
| 394 | * We have space in slb cache for optimized switch_slb(). |
| 395 | * Top 36 bits from esid_data as per ISA |
| 396 | */ |
| 397 | get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28; |
| 398 | get_paca()->slb_cache_ptr++; |
| 399 | } else { |
| 400 | /* |
| 401 | * Our cache is full and the current cache content strictly |
| 402 | * doesn't indicate the active SLB conents. Bump the ptr |
| 403 | * so that switch_slb() will ignore the cache. |
| 404 | */ |
| 405 | get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | static void handle_multi_context_slb_miss(int context_id, unsigned long ea) |
| 410 | { |
| 411 | struct mm_struct *mm = current->mm; |
| 412 | unsigned long vsid; |
| 413 | int bpsize; |
| 414 | |
| 415 | /* |
| 416 | * We are always above 1TB, hence use high user segment size. |
| 417 | */ |
| 418 | vsid = get_vsid(context_id, ea, mmu_highuser_ssize); |
| 419 | bpsize = get_slice_psize(mm, ea); |
| 420 | insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize); |
| 421 | } |
| 422 | |
| 423 | void slb_miss_large_addr(struct pt_regs *regs) |
| 424 | { |
| 425 | enum ctx_state prev_state = exception_enter(); |
| 426 | unsigned long ea = regs->dar; |
| 427 | int context; |
| 428 | |
| 429 | if (REGION_ID(ea) != USER_REGION_ID) |
| 430 | goto slb_bad_addr; |
| 431 | |
| 432 | /* |
| 433 | * Are we beyound what the page table layout supports ? |
| 434 | */ |
| 435 | if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) |
| 436 | goto slb_bad_addr; |
| 437 | |
| 438 | /* Lower address should have been handled by asm code */ |
| 439 | if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT)) |
| 440 | goto slb_bad_addr; |
| 441 | |
| 442 | /* |
| 443 | * consider this as bad access if we take a SLB miss |
| 444 | * on an address above addr limit. |
| 445 | */ |
| 446 | if (ea >= current->mm->context.slb_addr_limit) |
| 447 | goto slb_bad_addr; |
| 448 | |
| 449 | context = get_ea_context(¤t->mm->context, ea); |
| 450 | if (!context) |
| 451 | goto slb_bad_addr; |
| 452 | |
| 453 | handle_multi_context_slb_miss(context, ea); |
| 454 | exception_exit(prev_state); |
| 455 | return; |
| 456 | |
| 457 | slb_bad_addr: |
| 458 | if (user_mode(regs)) |
| 459 | _exception(SIGSEGV, regs, SEGV_BNDERR, ea); |
| 460 | else |
| 461 | bad_page_fault(regs, ea, SIGSEGV); |
| 462 | exception_exit(prev_state); |
| 463 | } |