Merge tag 'powerpc-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Jul 2021 19:54:34 +0000 (12:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Jul 2021 19:54:34 +0000 (12:54 -0700)
Pull powerpc updates from Michael Ellerman:

 - A big series refactoring parts of our KVM code, and converting some
   to C.

 - Support for ARCH_HAS_SET_MEMORY, and ARCH_HAS_STRICT_MODULE_RWX on
   some CPUs.

 - Support for the Microwatt soft-core.

 - Optimisations to our interrupt return path on 64-bit.

 - Support for userspace access to the NX GZIP accelerator on PowerVM on
   Power10.

 - Enable KUAP and KUEP by default on 32-bit Book3S CPUs.

 - Other smaller features, fixes & cleanups.

Thanks to: Andy Shevchenko, Aneesh Kumar K.V, Arnd Bergmann, Athira
Rajeev, Baokun Li, Benjamin Herrenschmidt, Bharata B Rao, Christophe
Leroy, Daniel Axtens, Daniel Henrique Barboza, Finn Thain, Geoff Levand,
Haren Myneni, Jason Wang, Jiapeng Chong, Joel Stanley, Jordan Niethe,
Kajol Jain, Nathan Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas
Piggin, Nick Desaulniers, Paul Mackerras, Russell Currey, Sathvika
Vasireddy, Shaokun Zhang, Stephen Rothwell, Sudeep Holla, Suraj Jitindar
Singh, Tom Rix, Vaibhav Jain, YueHaibing, Zhang Jianhua, and Zhen Lei.

* tag 'powerpc-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (218 commits)
  powerpc: Only build restart_table.c for 64s
  powerpc/64s: move ret_from_fork etc above __end_soft_masked
  powerpc/64s/interrupt: clean up interrupt return labels
  powerpc/64/interrupt: add missing kprobe annotations on interrupt exit symbols
  powerpc/64: enable MSR[EE] in irq replay pt_regs
  powerpc/64s/interrupt: preserve regs->softe for NMI interrupts
  powerpc/64s: add a table of implicit soft-masked addresses
  powerpc/64e: remove implicit soft-masking and interrupt exit restart logic
  powerpc/64e: fix CONFIG_RELOCATABLE build warnings
  powerpc/64s: fix hash page fault interrupt handler
  powerpc/4xx: Fix setup_kuep() on SMP
  powerpc/32s: Fix setup_{kuap/kuep}() on SMP
  powerpc/interrupt: Use names in check_return_regs_valid()
  powerpc/interrupt: Also use exit_must_hard_disable() on PPC32
  powerpc/sysfs: Replace sizeof(arr)/sizeof(arr[0]) with ARRAY_SIZE
  powerpc/ptrace: Refactor regs_set_return_{msr/ip}
  powerpc/ptrace: Move set_return_regs_changed() before regs_set_return_{msr/ip}
  powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi()
  powerpc/pseries/vas: Include irqdomain.h
  powerpc: mark local variables around longjmp as volatile
  ...

233 files changed:
Documentation/ABI/testing/sysfs-bus-papr-pmem
arch/powerpc/Kconfig
arch/powerpc/Kconfig.debug
arch/powerpc/Makefile
arch/powerpc/boot/Makefile
arch/powerpc/boot/decompress.c
arch/powerpc/boot/devtree.c
arch/powerpc/boot/dts/microwatt.dts [new file with mode: 0644]
arch/powerpc/boot/microwatt.c [new file with mode: 0644]
arch/powerpc/boot/ns16550.c
arch/powerpc/boot/wrapper
arch/powerpc/boot/zImage.ps3.lds.S
arch/powerpc/configs/32-bit.config [new file with mode: 0644]
arch/powerpc/configs/64-bit.config [new file with mode: 0644]
arch/powerpc/configs/microwatt_defconfig [new file with mode: 0644]
arch/powerpc/configs/mpc885_ads_defconfig
arch/powerpc/configs/powernv_defconfig
arch/powerpc/configs/ppc64_defconfig
arch/powerpc/configs/pseries_defconfig
arch/powerpc/include/asm/asm-prototypes.h
arch/powerpc/include/asm/barrier.h
arch/powerpc/include/asm/book3s/32/hash.h [deleted file]
arch/powerpc/include/asm/book3s/32/kup.h
arch/powerpc/include/asm/book3s/32/mmu-hash.h
arch/powerpc/include/asm/book3s/32/pgtable.h
arch/powerpc/include/asm/book3s/64/kup.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/checksum.h
arch/powerpc/include/asm/code-patching.h
arch/powerpc/include/asm/head-64.h
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/hw_irq.h
arch/powerpc/include/asm/inst.h
arch/powerpc/include/asm/interrupt.h
arch/powerpc/include/asm/kup.h
arch/powerpc/include/asm/kvm_guest.h
arch/powerpc/include/asm/livepatch.h
arch/powerpc/include/asm/mmu.h
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/include/asm/nohash/32/kup-8xx.h
arch/powerpc/include/asm/nohash/32/mmu-44x.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/pgtable.h
arch/powerpc/include/asm/ppc-opcode.h
arch/powerpc/include/asm/ppc_asm.h
arch/powerpc/include/asm/probes.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/ps3.h
arch/powerpc/include/asm/ptrace.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/security_features.h
arch/powerpc/include/asm/set_memory.h [new file with mode: 0644]
arch/powerpc/include/asm/setup.h
arch/powerpc/include/asm/sstep.h
arch/powerpc/include/asm/uprobes.h
arch/powerpc/include/asm/vas.h
arch/powerpc/include/asm/xics.h
arch/powerpc/include/uapi/asm/papr_pdsm.h
arch/powerpc/include/uapi/asm/vas-api.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/crash_dump.c
arch/powerpc/kernel/entry_32.S
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/epapr_paravirt.c
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/firmware.c
arch/powerpc/kernel/fpu.S
arch/powerpc/kernel/head_32.h
arch/powerpc/kernel/head_40x.S
arch/powerpc/kernel/head_44x.S
arch/powerpc/kernel/head_64.S
arch/powerpc/kernel/head_8xx.S
arch/powerpc/kernel/head_book3s_32.S
arch/powerpc/kernel/head_booke.h
arch/powerpc/kernel/head_fsl_booke.S
arch/powerpc/kernel/hw_breakpoint.c
arch/powerpc/kernel/interrupt.c
arch/powerpc/kernel/interrupt_64.S [new file with mode: 0644]
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/jump_label.c
arch/powerpc/kernel/kgdb.c
arch/powerpc/kernel/kprobes-ftrace.c
arch/powerpc/kernel/kprobes.c
arch/powerpc/kernel/mce.c
arch/powerpc/kernel/mce_power.c
arch/powerpc/kernel/misc_32.S
arch/powerpc/kernel/module.c
arch/powerpc/kernel/module_32.c
arch/powerpc/kernel/module_64.c
arch/powerpc/kernel/optprobes.c
arch/powerpc/kernel/paca.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/ptrace/ptrace-adv.c
arch/powerpc/kernel/ptrace/ptrace-noadv.c
arch/powerpc/kernel/ptrace/ptrace-view.c
arch/powerpc/kernel/rtas-rtc.c
arch/powerpc/kernel/rtas.c
arch/powerpc/kernel/security.c
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/setup_32.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/signal.c
arch/powerpc/kernel/signal_32.c
arch/powerpc/kernel/signal_64.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/stacktrace.c
arch/powerpc/kernel/syscalls.c
arch/powerpc/kernel/sysfs.c
arch/powerpc/kernel/tau_6xx.c
arch/powerpc/kernel/time.c
arch/powerpc/kernel/trace/ftrace.c
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/udbg_16550.c
arch/powerpc/kernel/uprobes.c
arch/powerpc/kernel/vector.S
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/kernel/watchdog.c
arch/powerpc/kexec/crash.c
arch/powerpc/kvm/book3s_32_mmu_host.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/lib/Makefile
arch/powerpc/lib/code-patching.c
arch/powerpc/lib/error-inject.c
arch/powerpc/lib/feature-fixups.c
arch/powerpc/lib/restart_table.c [new file with mode: 0644]
arch/powerpc/lib/sstep.c
arch/powerpc/lib/test_emulate_step.c
arch/powerpc/math-emu/math.c
arch/powerpc/math-emu/math_efp.c
arch/powerpc/mm/Makefile
arch/powerpc/mm/book3s32/Makefile
arch/powerpc/mm/book3s32/hash_low.S
arch/powerpc/mm/book3s32/kuap.c [new file with mode: 0644]
arch/powerpc/mm/book3s32/kuep.c
arch/powerpc/mm/book3s32/mmu.c
arch/powerpc/mm/book3s32/mmu_context.c
arch/powerpc/mm/book3s64/hash_utils.c
arch/powerpc/mm/book3s64/radix_tlb.c
arch/powerpc/mm/ioremap_32.c
arch/powerpc/mm/ioremap_64.c
arch/powerpc/mm/maccess.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/nohash/44x.c
arch/powerpc/mm/nohash/8xx.c
arch/powerpc/mm/nohash/mmu_context.c
arch/powerpc/mm/nohash/tlb_low.S
arch/powerpc/mm/pageattr.c [new file with mode: 0644]
arch/powerpc/mm/pgtable.c
arch/powerpc/mm/pgtable_32.c
arch/powerpc/mm/ptdump/ptdump.c
arch/powerpc/net/bpf_jit_comp.c
arch/powerpc/net/bpf_jit_comp32.c
arch/powerpc/net/bpf_jit_comp64.c
arch/powerpc/perf/Makefile
arch/powerpc/perf/callchain.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/generic-compat-pmu.c
arch/powerpc/platforms/52xx/mpc52xx_gpt.c
arch/powerpc/platforms/86xx/mpc86xx_smp.c
arch/powerpc/platforms/Kconfig
arch/powerpc/platforms/Kconfig.cputype
arch/powerpc/platforms/Makefile
arch/powerpc/platforms/book3s/Kconfig [new file with mode: 0644]
arch/powerpc/platforms/book3s/Makefile [new file with mode: 0644]
arch/powerpc/platforms/book3s/vas-api.c [new file with mode: 0644]
arch/powerpc/platforms/cell/spider-pci.c
arch/powerpc/platforms/cell/spufs/switch.c
arch/powerpc/platforms/embedded6xx/holly.c
arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
arch/powerpc/platforms/microwatt/Kconfig [new file with mode: 0644]
arch/powerpc/platforms/microwatt/Makefile [new file with mode: 0644]
arch/powerpc/platforms/microwatt/rng.c [new file with mode: 0644]
arch/powerpc/platforms/microwatt/setup.c [new file with mode: 0644]
arch/powerpc/platforms/pasemi/idle.c
arch/powerpc/platforms/powermac/bootx_init.c
arch/powerpc/platforms/powermac/smp.c
arch/powerpc/platforms/powernv/Kconfig
arch/powerpc/platforms/powernv/Makefile
arch/powerpc/platforms/powernv/opal-call.c
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/powernv/subcore.c
arch/powerpc/platforms/powernv/vas-api.c [deleted file]
arch/powerpc/platforms/powernv/vas-debug.c
arch/powerpc/platforms/powernv/vas-fault.c
arch/powerpc/platforms/powernv/vas-trace.h
arch/powerpc/platforms/powernv/vas-window.c
arch/powerpc/platforms/powernv/vas.h
arch/powerpc/platforms/ps3/Kconfig
arch/powerpc/platforms/ps3/mm.c
arch/powerpc/platforms/ps3/setup.c
arch/powerpc/platforms/ps3/system-bus.c
arch/powerpc/platforms/pseries/Makefile
arch/powerpc/platforms/pseries/dlpar.c
arch/powerpc/platforms/pseries/hotplug-memory.c
arch/powerpc/platforms/pseries/hvCall.S
arch/powerpc/platforms/pseries/papr_scm.c
arch/powerpc/platforms/pseries/ras.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/pseries/smp.c
arch/powerpc/platforms/pseries/vas.c [new file with mode: 0644]
arch/powerpc/platforms/pseries/vas.h [new file with mode: 0644]
arch/powerpc/sysdev/fsl_pci.c
arch/powerpc/sysdev/fsl_rio.c
arch/powerpc/sysdev/xics/Kconfig
arch/powerpc/sysdev/xics/Makefile
arch/powerpc/sysdev/xics/ics-native.c [new file with mode: 0644]
arch/powerpc/sysdev/xics/xics-common.c
arch/powerpc/xmon/xmon.c
drivers/crypto/nx/Kconfig
drivers/crypto/nx/Makefile
drivers/crypto/nx/nx-842-pseries.c [deleted file]
drivers/crypto/nx/nx-common-powernv.c
drivers/crypto/nx/nx-common-pseries.c [new file with mode: 0644]
drivers/ps3/ps3-vuart.c
drivers/ps3/ps3av.c
drivers/tty/hvc/hvc_vio.c
include/linux/kprobes.h
kernel/kprobes.c
tools/testing/selftests/powerpc/benchmarks/null_syscall.c
tools/testing/selftests/powerpc/nx-gzip/Makefile
tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
tools/testing/selftests/powerpc/pmu/ebb/Makefile
tools/testing/selftests/powerpc/pmu/ebb/ebb.h
tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c [new file with mode: 0644]
tools/testing/selftests/powerpc/security/Makefile
tools/testing/selftests/powerpc/security/mitigation-patching.sh [new file with mode: 0755]
tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c

index 92e2db0e2d3de331216112eb77e7d2dfe5429b23..95254cec92bfbc3614f8445be6d5cb818d555c3a 100644 (file)
@@ -39,9 +39,11 @@ KernelVersion:       v5.9
 Contact:       linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
 Description:
                (RO) Report various performance stats related to papr-scm NVDIMM
-               device.  Each stat is reported on a new line with each line
-               composed of a stat-identifier followed by it value. Below are
-               currently known dimm performance stats which are reported:
+               device. This attribute is only available for NVDIMM devices
+               that support reporting NVDIMM performance stats. Each stat is
+               reported on a new line with each line composed of a
+               stat-identifier followed by it value. Below are currently known
+               dimm performance stats which are reported:
 
                * "CtlResCt" : Controller Reset Count
                * "CtlResTm" : Controller Reset Elapsed Time
index df46324d509016f4b75692e168f9564b6619d290..d01e3401581d63f5a842150d8bb3cc434bd94971 100644 (file)
@@ -140,7 +140,9 @@ config PPC
        select ARCH_HAS_PTE_DEVMAP              if PPC_BOOK3S_64
        select ARCH_HAS_PTE_SPECIAL
        select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
+       select ARCH_HAS_SET_MEMORY
        select ARCH_HAS_STRICT_KERNEL_RWX       if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION)
+       select ARCH_HAS_STRICT_MODULE_RWX       if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32
        select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAS_UACCESS_FLUSHCACHE
        select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -266,6 +268,7 @@ config PPC
        select PPC_DAWR                         if PPC64
        select RTC_LIB
        select SPARSE_IRQ
+       select STRICT_KERNEL_RWX if STRICT_MODULE_RWX
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
        select VIRT_TO_BUS                      if !PPC64
@@ -289,6 +292,7 @@ config PANIC_TIMEOUT
 config COMPAT
        bool "Enable support for 32bit binaries"
        depends on PPC64
+       depends on !CC_IS_CLANG || CLANG_VERSION >= 120000
        default y if !CPU_LITTLE_ENDIAN
        select ARCH_WANT_OLD_COMPAT_IPC
        select COMPAT_OLD_SIGACTION
@@ -422,7 +426,7 @@ source "kernel/Kconfig.hz"
 
 config MATH_EMULATION
        bool "Math emulation"
-       depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE
+       depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE || PPC_MICROWATT
        select PPC_FPU_REGS
        help
          Some PowerPC chips designed for embedded applications do not have
index 6342f9da454551a3d72117af402010d8c552f99e..205cd77f321fdd5aa9ffd73c0dfa06af2fd3d8f8 100644 (file)
@@ -84,6 +84,11 @@ config MSI_BITMAP_SELFTEST
 
 config PPC_IRQ_SOFT_MASK_DEBUG
        bool "Include extra checks for powerpc irq soft masking"
+       depends on PPC64
+
+config PPC_RFI_SRR_DEBUG
+       bool "Include extra checks for RFI SRR register validity"
+       depends on PPC_BOOK3S_64
 
 config XMON
        bool "Include xmon kernel debugger"
index 3212d076ac6a4318e76a55bd98ecf230d7a520f6..712c5e8768ce1a779baf97f33599d9901c85f8d3 100644 (file)
@@ -376,6 +376,16 @@ ppc64_book3e_allmodconfig:
        $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \
                -f $(srctree)/Makefile allmodconfig
 
+PHONY += ppc32_randconfig
+ppc32_randconfig:
+       $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/32-bit.config \
+               -f $(srctree)/Makefile randconfig
+
+PHONY += ppc64_randconfig
+ppc64_randconfig:
+       $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/64-bit.config \
+               -f $(srctree)/Makefile randconfig
+
 define archhelp
   @echo '* zImage          - Build default images selected by kernel config'
   @echo '  zImage.*        - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)'
index 2b8da923ceca26a9cb12e0d8994ffc40867a7720..e312ea802aa6a94271da256d4acc1d94d30dbd7e 100644 (file)
@@ -163,6 +163,8 @@ src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
 src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
 src-plat-$(CONFIG_MVME7100) += motload-head.S mvme7100.c
 
+src-plat-$(CONFIG_PPC_MICROWATT) += fixed-head.S microwatt.c
+
 src-wlib := $(sort $(src-wlib-y))
 src-plat := $(sort $(src-plat-y))
 src-boot := $(src-wlib) $(src-plat) empty.c
@@ -227,7 +229,7 @@ $(obj)/wrapper.a: $(obj-wlib) FORCE
 
 hostprogs      := addnote hack-coff mktree
 
-targets                += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a)
+targets                += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds
 extra-y                := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \
                   $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds
 
@@ -355,6 +357,8 @@ image-$(CONFIG_MVME5100)            += dtbImage.mvme5100
 # Board port in arch/powerpc/platform/amigaone/Kconfig
 image-$(CONFIG_AMIGAONE)               += cuImage.amigaone
 
+image-$(CONFIG_PPC_MICROWATT)          += dtbImage.microwatt
+
 # For 32-bit powermacs, build the COFF and miboot images
 # as well as the ELF images.
 ifdef CONFIG_PPC32
index 6098b879ac97b60b576c015038b631b5ca1cf7ac..977eb15a6d17af5c76f2113357a38bd33907882b 100644 (file)
@@ -99,8 +99,8 @@ static void print_err(char *s)
  * partial_decompress - decompresses part or all of a compressed buffer
  * @inbuf:       input buffer
  * @input_size:  length of the input buffer
- * @outbuf:      input buffer
- * @output_size: length of the input buffer
+ * @outbuf:      output buffer
+ * @output_size: length of the output buffer
  * @skip         number of output bytes to ignore
  *
  * This function takes compressed data from inbuf, decompresses and write it to
index 5d91036ad626d5a330a83ded285afba9c4d04c52..58fbcfcc98c9e9a31ba585300b14ef507c509a0a 100644 (file)
@@ -13,6 +13,7 @@
 #include "string.h"
 #include "stdio.h"
 #include "ops.h"
+#include "of.h"
 
 void dt_fixup_memory(u64 start, u64 size)
 {
@@ -23,21 +24,25 @@ void dt_fixup_memory(u64 start, u64 size)
        root = finddevice("/");
        if (getprop(root, "#address-cells", &naddr, sizeof(naddr)) < 0)
                naddr = 2;
+       else
+               naddr = be32_to_cpu(naddr);
        if (naddr < 1 || naddr > 2)
                fatal("Can't cope with #address-cells == %d in /\n\r", naddr);
 
        if (getprop(root, "#size-cells", &nsize, sizeof(nsize)) < 0)
                nsize = 1;
+       else
+               nsize = be32_to_cpu(nsize);
        if (nsize < 1 || nsize > 2)
                fatal("Can't cope with #size-cells == %d in /\n\r", nsize);
 
        i = 0;
        if (naddr == 2)
-               memreg[i++] = start >> 32;
-       memreg[i++] = start & 0xffffffff;
+               memreg[i++] = cpu_to_be32(start >> 32);
+       memreg[i++] = cpu_to_be32(start & 0xffffffff);
        if (nsize == 2)
-               memreg[i++] = size >> 32;
-       memreg[i++] = size & 0xffffffff;
+               memreg[i++] = cpu_to_be32(size >> 32);
+       memreg[i++] = cpu_to_be32(size & 0xffffffff);
 
        memory = finddevice("/memory");
        if (! memory) {
@@ -45,9 +50,9 @@ void dt_fixup_memory(u64 start, u64 size)
                setprop_str(memory, "device_type", "memory");
        }
 
-       printf("Memory <- <0x%x", memreg[0]);
+       printf("Memory <- <0x%x", be32_to_cpu(memreg[0]));
        for (i = 1; i < (naddr + nsize); i++)
-               printf(" 0x%x", memreg[i]);
+               printf(" 0x%x", be32_to_cpu(memreg[i]));
        printf("> (%ldMB)\n\r", (unsigned long)(size >> 20));
 
        setprop(memory, "reg", memreg, (naddr + nsize)*sizeof(u32));
@@ -65,10 +70,10 @@ void dt_fixup_cpu_clocks(u32 cpu, u32 tb, u32 bus)
                printf("CPU bus-frequency <- 0x%x (%dMHz)\n\r", bus, MHZ(bus));
 
        while ((devp = find_node_by_devtype(devp, "cpu"))) {
-               setprop_val(devp, "clock-frequency", cpu);
-               setprop_val(devp, "timebase-frequency", tb);
+               setprop_val(devp, "clock-frequency", cpu_to_be32(cpu));
+               setprop_val(devp, "timebase-frequency", cpu_to_be32(tb));
                if (bus > 0)
-                       setprop_val(devp, "bus-frequency", bus);
+                       setprop_val(devp, "bus-frequency", cpu_to_be32(bus));
        }
 
        timebase_period_ns = 1000000000 / tb;
@@ -80,7 +85,7 @@ void dt_fixup_clock(const char *path, u32 freq)
 
        if (devp) {
                printf("%s: clock-frequency <- %x (%dMHz)\n\r", path, freq, MHZ(freq));
-               setprop_val(devp, "clock-frequency", freq);
+               setprop_val(devp, "clock-frequency", cpu_to_be32(freq));
        }
 }
 
@@ -133,8 +138,12 @@ void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize)
 {
        if (getprop(node, "#address-cells", naddr, 4) != 4)
                *naddr = 2;
+       else
+               *naddr = be32_to_cpu(*naddr);
        if (getprop(node, "#size-cells", nsize, 4) != 4)
                *nsize = 1;
+       else
+               *nsize = be32_to_cpu(*nsize);
 }
 
 static void copy_val(u32 *dest, u32 *src, int naddr)
@@ -163,9 +172,9 @@ static int add_reg(u32 *reg, u32 *add, int naddr)
        int i, carry = 0;
 
        for (i = MAX_ADDR_CELLS - 1; i >= MAX_ADDR_CELLS - naddr; i--) {
-               u64 tmp = (u64)reg[i] + add[i] + carry;
+               u64 tmp = (u64)be32_to_cpu(reg[i]) + be32_to_cpu(add[i]) + carry;
                carry = tmp >> 32;
-               reg[i] = (u32)tmp;
+               reg[i] = cpu_to_be32((u32)tmp);
        }
 
        return !carry;
@@ -180,18 +189,18 @@ static int compare_reg(u32 *reg, u32 *range, u32 *rangesize)
        u32 end;
 
        for (i = 0; i < MAX_ADDR_CELLS; i++) {
-               if (reg[i] < range[i])
+               if (be32_to_cpu(reg[i]) < be32_to_cpu(range[i]))
                        return 0;
-               if (reg[i] > range[i])
+               if (be32_to_cpu(reg[i]) > be32_to_cpu(range[i]))
                        break;
        }
 
        for (i = 0; i < MAX_ADDR_CELLS; i++) {
-               end = range[i] + rangesize[i];
+               end = be32_to_cpu(range[i]) + be32_to_cpu(rangesize[i]);
 
-               if (reg[i] < end)
+               if (be32_to_cpu(reg[i]) < end)
                        break;
-               if (reg[i] > end)
+               if (be32_to_cpu(reg[i]) > end)
                        return 0;
        }
 
@@ -240,7 +249,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
                return 0;
 
        dt_get_reg_format(parent, &naddr, &nsize);
-
        if (nsize > 2)
                return 0;
 
@@ -252,10 +260,10 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
 
        copy_val(last_addr, prop_buf + offset, naddr);
 
-       ret_size = prop_buf[offset + naddr];
+       ret_size = be32_to_cpu(prop_buf[offset + naddr]);
        if (nsize == 2) {
                ret_size <<= 32;
-               ret_size |= prop_buf[offset + naddr + 1];
+               ret_size |= be32_to_cpu(prop_buf[offset + naddr + 1]);
        }
 
        for (;;) {
@@ -278,7 +286,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
 
                offset = find_range(last_addr, prop_buf, prev_naddr,
                                    naddr, prev_nsize, buflen / 4);
-
                if (offset < 0)
                        return 0;
 
@@ -296,8 +303,7 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
        if (naddr > 2)
                return 0;
 
-       ret_addr = ((u64)last_addr[2] << 32) | last_addr[3];
-
+       ret_addr = ((u64)be32_to_cpu(last_addr[2]) << 32) | be32_to_cpu(last_addr[3]);
        if (sizeof(void *) == 4 &&
            (ret_addr >= 0x100000000ULL || ret_size > 0x100000000ULL ||
             ret_addr + ret_size > 0x100000000ULL))
@@ -350,11 +356,14 @@ int dt_is_compatible(void *node, const char *compat)
 int dt_get_virtual_reg(void *node, void **addr, int nres)
 {
        unsigned long xaddr;
-       int n;
+       int n, i;
 
        n = getprop(node, "virtual-reg", addr, nres * 4);
-       if (n > 0)
+       if (n > 0) {
+               for (i = 0; i < n/4; i ++)
+                       ((u32 *)addr)[i] = be32_to_cpu(((u32 *)addr)[i]);
                return n / 4;
+       }
 
        for (n = 0; n < nres; n++) {
                if (!dt_xlate_reg(node, n, &xaddr, NULL))
diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
new file mode 100644 (file)
index 0000000..974abbd
--- /dev/null
@@ -0,0 +1,138 @@
+/dts-v1/;
+
+/ {
+       #size-cells = <0x02>;
+       #address-cells = <0x02>;
+       model-name = "microwatt";
+       compatible = "microwatt-soc";
+
+       aliases {
+               serial0 = &UART0;
+       };
+
+       reserved-memory {
+               #size-cells = <0x02>;
+               #address-cells = <0x02>;
+               ranges;
+       };
+
+       memory@0 {
+               device_type = "memory";
+               reg = <0x00000000 0x00000000 0x00000000 0x10000000>;
+       };
+
+       cpus {
+               #size-cells = <0x00>;
+               #address-cells = <0x01>;
+
+               ibm,powerpc-cpu-features {
+                       display-name = "Microwatt";
+                       isa = <3000>;
+                       device_type = "cpu-features";
+                       compatible = "ibm,powerpc-cpu-features";
+
+                       mmu-radix {
+                               isa = <3000>;
+                               usable-privilege = <2>;
+                       };
+
+                       little-endian {
+                               isa = <2050>;
+                               usable-privilege = <3>;
+                               hwcap-bit-nr = <1>;
+                       };
+
+                       cache-inhibited-large-page {
+                               isa = <2040>;
+                               usable-privilege = <2>;
+                       };
+
+                       fixed-point-v3 {
+                               isa = <3000>;
+                               usable-privilege = <3>;
+                       };
+
+                       no-execute {
+                               isa = <2010>;
+                               usable-privilege = <2>;
+                       };
+
+                       floating-point {
+                               hwcap-bit-nr = <27>;
+                               isa = <0>;
+                               usable-privilege = <3>;
+                       };
+               };
+
+               PowerPC,Microwatt@0 {
+                       i-cache-sets = <2>;
+                       ibm,dec-bits = <64>;
+                       reservation-granule-size = <64>;
+                       clock-frequency = <100000000>;
+                       timebase-frequency = <100000000>;
+                       i-tlb-sets = <1>;
+                       ibm,ppc-interrupt-server#s = <0>;
+                       i-cache-block-size = <64>;
+                       d-cache-block-size = <64>;
+                       d-cache-sets = <2>;
+                       i-tlb-size = <64>;
+                       cpu-version = <0x990000>;
+                       status = "okay";
+                       i-cache-size = <0x1000>;
+                       ibm,processor-radix-AP-encodings = <0x0c 0xa0000010 0x20000015 0x4000001e>;
+                       tlb-size = <0>;
+                       tlb-sets = <0>;
+                       device_type = "cpu";
+                       d-tlb-size = <128>;
+                       d-tlb-sets = <2>;
+                       reg = <0>;
+                       general-purpose;
+                       64-bit;
+                       d-cache-size = <0x1000>;
+                       ibm,chip-id = <0>;
+               };
+       };
+
+       soc@c0000000 {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+               interrupt-parent = <&ICS>;
+
+               ranges = <0 0 0xc0000000 0x40000000>;
+
+               interrupt-controller@4000 {
+                       compatible = "openpower,xics-presentation", "ibm,ppc-xicp";
+                       ibm,interrupt-server-ranges = <0x0 0x1>;
+                       reg = <0x4000 0x100>;
+               };
+
+               ICS: interrupt-controller@5000 {
+                       compatible = "openpower,xics-sources";
+                       interrupt-controller;
+                       interrupt-ranges = <0x10 0x10>;
+                       reg = <0x5000 0x100>;
+                       #address-cells = <0>;
+                       #size-cells = <0>;
+                       #interrupt-cells = <2>;
+               };
+
+               UART0: serial@2000 {
+                       device_type = "serial";
+                       compatible = "ns16550";
+                       reg = <0x2000 0x8>;
+                       clock-frequency = <100000000>;
+                       current-speed = <115200>;
+                       reg-shift = <2>;
+                       fifo-size = <16>;
+                       interrupts = <0x10 0x1>;
+               };
+       };
+
+       chosen {
+               bootargs = "";
+               ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00
+                                         00 00 00 00 00 00 00 00 40 00 40];
+               stdout-path = &UART0;
+       };
+};
diff --git a/arch/powerpc/boot/microwatt.c b/arch/powerpc/boot/microwatt.c
new file mode 100644 (file)
index 0000000..ca9d836
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <stddef.h>
+#include "stdio.h"
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+
+BSS_STACK(8192);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
+{
+       unsigned long heapsize = 16*1024*1024 - (unsigned long)_end;
+
+       /*
+        * Disable interrupts and turn off MSR_RI, since we'll
+        * shortly be overwriting the interrupt vectors.
+        */
+       __asm__ volatile("mtmsrd %0,1" : : "r" (0));
+
+       simple_alloc_init(_end, heapsize, 32, 64);
+       fdt_init(_dtb_start);
+       serial_console_init();
+}
index b0da4466d41983590326abb8b08f69a4a7d64ebd..f16d2be1d0f311726a83b3ce70c4c09bf0445462 100644 (file)
@@ -15,6 +15,7 @@
 #include "stdio.h"
 #include "io.h"
 #include "ops.h"
+#include "of.h"
 
 #define UART_DLL       0       /* Out: Divisor Latch Low */
 #define UART_DLM       1       /* Out: Divisor Latch High */
@@ -58,16 +59,20 @@ int ns16550_console_init(void *devp, struct serial_console_data *scdp)
        int n;
        u32 reg_offset;
 
-       if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1)
+       if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1) {
+               printf("virt reg parse fail...\r\n");
                return -1;
+       }
 
        n = getprop(devp, "reg-offset", &reg_offset, sizeof(reg_offset));
        if (n == sizeof(reg_offset))
-               reg_base += reg_offset;
+               reg_base += be32_to_cpu(reg_offset);
 
        n = getprop(devp, "reg-shift", &reg_shift, sizeof(reg_shift));
        if (n != sizeof(reg_shift))
                reg_shift = 0;
+       else
+               reg_shift = be32_to_cpu(reg_shift);
 
        scdp->open = ns16550_open;
        scdp->putc = ns16550_putc;
index cdb796b76e2eac6817c9d724a7e619e12d49f4eb..1f4676bab786b3bbef11c8c8539e7b9daa3b34c9 100755 (executable)
@@ -342,6 +342,11 @@ gamecube|wii)
     link_address='0x600000'
     platformo="$object/$platform-head.o $object/$platform.o"
     ;;
+microwatt)
+    link_address='0x500000'
+    platformo="$object/fixed-head.o $object/$platform.o"
+    binary=y
+    ;;
 treeboot-currituck)
     link_address='0x1000000'
     ;;
index 7b2ff2eaa73a61179ea795b14981cd7f11fa9e54..d0ffb493614d5351a6cf6f580b40b6ae464f7efa 100644 (file)
@@ -8,7 +8,7 @@ SECTIONS
   .kernel:vmlinux.bin : { *(.kernel:vmlinux.bin) }
   _vmlinux_end =  .;
 
-  . = ALIGN(4096);
+  . = ALIGN(8);
   _dtb_start = .;
   .kernel:dtb : { *(.kernel:dtb) }
   _dtb_end = .;
diff --git a/arch/powerpc/configs/32-bit.config b/arch/powerpc/configs/32-bit.config
new file mode 100644 (file)
index 0000000..ad65468
--- /dev/null
@@ -0,0 +1 @@
+# CONFIG_PPC64 is not set
diff --git a/arch/powerpc/configs/64-bit.config b/arch/powerpc/configs/64-bit.config
new file mode 100644 (file)
index 0000000..0fe6406
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_PPC64=y
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
new file mode 100644 (file)
index 0000000..a08b739
--- /dev/null
@@ -0,0 +1,98 @@
+# CONFIG_SWAP is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_TICK_CPU_ACCOUNTING=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_PPC64=y
+# CONFIG_PPC_KUEP is not set
+# CONFIG_PPC_KUAP is not set
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_NR_IRQS=64
+CONFIG_PANIC_TIMEOUT=10
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+CONFIG_PPC_MICROWATT=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ=y
+CONFIG_HZ_100=y
+# CONFIG_PPC_MEM_KEYS is not set
+# CONFIG_SECCOMP is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+# CONFIG_COREDUMP is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_INET=y
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_RAW_DIAG=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_PARTITIONED_MASTER=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_NETDEVICES=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_NVRAM is not set
+CONFIG_RANDOM_TRUST_CPU=y
+CONFIG_SPI=y
+CONFIG_SPI_DEBUG=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_SPIDEV=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_NVMEM is not set
+CONFIG_EXT4_FS=y
+# CONFIG_FILE_LOCKING is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MISC is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_PPC_DISABLE_WERROR=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+# CONFIG_XMON_DEFAULT_RO_MODE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
index 949ff9ccda5e7799dafa3c70e0f072a47143af27..d21f266cea9a5da8bef6334f614ec4ae6c70e744 100644 (file)
@@ -57,3 +57,28 @@ CONFIG_CRC32_SLICEBY4=y
 CONFIG_DEBUG_INFO=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PPC_16K_PAGES=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_FS=y
+CONFIG_PPC_PTDUMP=y
+CONFIG_MODULES=y
+CONFIG_SPI=y
+CONFIG_SPI_FSL_SPI=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_DEV_TALITOS=y
+CONFIG_8xx_GPIO=y
+CONFIG_WATCHDOG=y
+CONFIG_8xxx_WDT=y
+CONFIG_SMC_UCODE_PATCH=y
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_PIN_TLB=y
+CONFIG_PERF_EVENTS=y
+CONFIG_MATH_EMULATION=y
+CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_IPV6=y
+CONFIG_BPF_JIT=y
+CONFIG_DEBUG_VM_PGTABLE=y
+CONFIG_BDI_SWITCH=y
+CONFIG_PPC_EARLY_DEBUG=y
+CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008
index 2c87e856d839b07c0046092dff68d28e7ea8292e..8bfeea6c7de7b4406fea322e6ef74ed60092374d 100644 (file)
@@ -309,6 +309,7 @@ CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_FUNCTION_TRACER=y
 CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_PPC_EMULATED_STATS=y
index 701811c91a6f3f3cfdec97501df31f4a4c3a24c5..0ad2291337a713dbb48580daab80e9188ab7c331 100644 (file)
@@ -368,7 +368,9 @@ CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_FUNCTION_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
 CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
index 50168dde4ea5987769f4c5365f036abcc69b8ef3..b183629f1bcfb8964db7c67bbe7051d291697a5b 100644 (file)
@@ -289,7 +289,9 @@ CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_FUNCTION_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
 CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
index 02ee6f5ac9fe9e1f91f19fc9ed3a2f7be2b4f373..222823861a67479e93d14620a167eb5b2570cde1 100644 (file)
@@ -71,8 +71,13 @@ void __init machine_init(u64 dt_ptr);
 #endif
 long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs);
 notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
-notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr);
-notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr);
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
+#ifdef CONFIG_PPC64
+unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs);
+unsigned long interrupt_exit_user_restart(struct pt_regs *regs);
+unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs);
+#endif
 
 long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
                      u32 len_high, u32 len_low);
index 7ae29cfb06c07fb006ba4dde6b45d76947116ba9..f0e68723648429b0f4a1068030a864fb69d8054e 100644 (file)
@@ -46,6 +46,8 @@
 #    define SMPWMB      eieio
 #endif
 
+/* clang defines this macro for a builtin, which will not work with runtime patching */
+#undef __lwsync
 #define __lwsync()     __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
 #define dma_rmb()      __lwsync()
 #define dma_wmb()      __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h
deleted file mode 100644 (file)
index 2a0a467..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_BOOK3S_32_HASH_H
-#define _ASM_POWERPC_BOOK3S_32_HASH_H
-#ifdef __KERNEL__
-
-/*
- * The "classic" 32-bit implementation of the PowerPC MMU uses a hash
- * table containing PTEs, together with a set of 16 segment registers,
- * to define the virtual to physical address mapping.
- *
- * We use the hash table as an extended TLB, i.e. a cache of currently
- * active mappings.  We maintain a two-level page table tree, much
- * like that used by the i386, for the sake of the Linux memory
- * management code.  Low-level assembler code in hash_low_32.S
- * (procedure hash_page) is responsible for extracting ptes from the
- * tree and putting them into the hash table when necessary, and
- * updating the accessed and modified bits in the page table tree.
- */
-
-#define _PAGE_PRESENT  0x001   /* software: pte contains a translation */
-#define _PAGE_HASHPTE  0x002   /* hash_page has made an HPTE for this pte */
-#define _PAGE_USER     0x004   /* usermode access allowed */
-#define _PAGE_GUARDED  0x008   /* G: prohibit speculative access */
-#define _PAGE_COHERENT 0x010   /* M: enforce memory coherence (SMP systems) */
-#define _PAGE_NO_CACHE 0x020   /* I: cache inhibit */
-#define _PAGE_WRITETHRU        0x040   /* W: cache write-through */
-#define _PAGE_DIRTY    0x080   /* C: page changed */
-#define _PAGE_ACCESSED 0x100   /* R: page referenced */
-#define _PAGE_EXEC     0x200   /* software: exec allowed */
-#define _PAGE_RW       0x400   /* software: user write access allowed */
-#define _PAGE_SPECIAL  0x800   /* software: Special page */
-
-#ifdef CONFIG_PTE_64BIT
-/* We never clear the high word of the pte */
-#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE)
-#else
-#define _PTE_NONE_MASK _PAGE_HASHPTE
-#endif
-
-#define _PMD_PRESENT   0
-#define _PMD_PRESENT_MASK (PAGE_MASK)
-#define _PMD_BAD       (~PAGE_MASK)
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_BOOK3S_32_HASH_H */
index 1670dfe9d4f10bfd346eb01afd936861f32db057..64201125a287bd22a5ff3fbbee9b23309f00a52e 100644 (file)
 
 #ifndef __ASSEMBLY__
 
+#include <linux/jump_label.h>
+
+extern struct static_key_false disable_kuap_key;
+extern struct static_key_false disable_kuep_key;
+
+static __always_inline bool kuap_is_disabled(void)
+{
+       return !IS_ENABLED(CONFIG_PPC_KUAP) || static_branch_unlikely(&disable_kuap_key);
+}
+
+static __always_inline bool kuep_is_disabled(void)
+{
+       return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key);
+}
+
+static inline void kuep_lock(void)
+{
+       if (kuep_is_disabled())
+               return;
+
+       update_user_segments(mfsr(0) | SR_NX);
+}
+
+static inline void kuep_unlock(void)
+{
+       if (kuep_is_disabled())
+               return;
+
+       update_user_segments(mfsr(0) & ~SR_NX);
+}
+
 #ifdef CONFIG_PPC_KUAP
 
 #include <linux/sched.h>
 
-static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
+#define KUAP_NONE      (~0UL)
+#define KUAP_ALL       (~1UL)
+
+static inline void kuap_lock_one(unsigned long addr)
 {
-       addr &= 0xf0000000;     /* align addr to start of segment */
-       barrier();      /* make sure thread.kuap is updated before playing with SRs */
-       while (addr < end) {
-               mtsr(sr, addr);
-               sr += 0x111;            /* next VSID */
-               sr &= 0xf0ffffff;       /* clear VSID overflow */
-               addr += 0x10000000;     /* address of next segment */
-       }
+       mtsr(mfsr(addr) | SR_KS, addr);
+       isync();        /* Context sync required after mtsr() */
+}
+
+static inline void kuap_unlock_one(unsigned long addr)
+{
+       mtsr(mfsr(addr) & ~SR_KS, addr);
+       isync();        /* Context sync required after mtsr() */
+}
+
+static inline void kuap_lock_all(void)
+{
+       update_user_segments(mfsr(0) | SR_KS);
+       isync();        /* Context sync required after mtsr() */
+}
+
+static inline void kuap_unlock_all(void)
+{
+       update_user_segments(mfsr(0) & ~SR_KS);
        isync();        /* Context sync required after mtsr() */
 }
 
+void kuap_lock_all_ool(void);
+void kuap_unlock_all_ool(void);
+
+static inline void kuap_lock(unsigned long addr, bool ool)
+{
+       if (likely(addr != KUAP_ALL))
+               kuap_lock_one(addr);
+       else if (!ool)
+               kuap_lock_all();
+       else
+               kuap_lock_all_ool();
+}
+
+static inline void kuap_unlock(unsigned long addr, bool ool)
+{
+       if (likely(addr != KUAP_ALL))
+               kuap_unlock_one(addr);
+       else if (!ool)
+               kuap_unlock_all();
+       else
+               kuap_unlock_all_ool();
+}
+
 static inline void kuap_save_and_lock(struct pt_regs *regs)
 {
        unsigned long kuap = current->thread.kuap;
-       u32 addr = kuap & 0xf0000000;
-       u32 end = kuap << 28;
+
+       if (kuap_is_disabled())
+               return;
 
        regs->kuap = kuap;
-       if (unlikely(!kuap))
+       if (unlikely(kuap == KUAP_NONE))
                return;
 
-       current->thread.kuap = 0;
-       kuap_update_sr(mfsr(addr) | SR_KS, addr, end);  /* Set Ks */
+       current->thread.kuap = KUAP_NONE;
+       kuap_lock(kuap, false);
 }
 
 static inline void kuap_user_restore(struct pt_regs *regs)
@@ -44,22 +113,22 @@ static inline void kuap_user_restore(struct pt_regs *regs)
 
 static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
 {
-       u32 addr = regs->kuap & 0xf0000000;
-       u32 end = regs->kuap << 28;
+       if (kuap_is_disabled())
+               return;
 
        current->thread.kuap = regs->kuap;
 
-       if (unlikely(regs->kuap == kuap))
-               return;
-
-       kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */
+       kuap_unlock(regs->kuap, false);
 }
 
 static inline unsigned long kuap_get_and_assert_locked(void)
 {
        unsigned long kuap = current->thread.kuap;
 
-       WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0);
+       if (kuap_is_disabled())
+               return KUAP_NONE;
+
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE);
 
        return kuap;
 }
@@ -72,84 +141,78 @@ static inline void kuap_assert_locked(void)
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
                                              u32 size, unsigned long dir)
 {
-       u32 addr, end;
+       if (kuap_is_disabled())
+               return;
 
        BUILD_BUG_ON(!__builtin_constant_p(dir));
-       BUILD_BUG_ON(dir & ~KUAP_READ_WRITE);
 
        if (!(dir & KUAP_WRITE))
                return;
 
-       addr = (__force u32)to;
-
-       if (unlikely(addr >= TASK_SIZE || !size))
-               return;
-
-       end = min(addr + size, TASK_SIZE);
-
-       current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
-       kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */
+       current->thread.kuap = (__force u32)to;
+       kuap_unlock_one((__force u32)to);
 }
 
-static __always_inline void prevent_user_access(void __user *to, const void __user *from,
-                                               u32 size, unsigned long dir)
+static __always_inline void prevent_user_access(unsigned long dir)
 {
-       u32 addr, end;
-
-       BUILD_BUG_ON(!__builtin_constant_p(dir));
+       u32 kuap = current->thread.kuap;
 
-       if (dir & KUAP_CURRENT_WRITE) {
-               u32 kuap = current->thread.kuap;
-
-               if (unlikely(!kuap))
-                       return;
+       if (kuap_is_disabled())
+               return;
 
-               addr = kuap & 0xf0000000;
-               end = kuap << 28;
-       } else if (dir & KUAP_WRITE) {
-               addr = (__force u32)to;
-               end = min(addr + size, TASK_SIZE);
+       BUILD_BUG_ON(!__builtin_constant_p(dir));
 
-               if (unlikely(addr >= TASK_SIZE || !size))
-                       return;
-       } else {
+       if (!(dir & KUAP_WRITE))
                return;
-       }
 
-       current->thread.kuap = 0;
-       kuap_update_sr(mfsr(addr) | SR_KS, addr, end);  /* set Ks */
+       current->thread.kuap = KUAP_NONE;
+       kuap_lock(kuap, true);
 }
 
 static inline unsigned long prevent_user_access_return(void)
 {
        unsigned long flags = current->thread.kuap;
-       unsigned long addr = flags & 0xf0000000;
-       unsigned long end = flags << 28;
-       void __user *to = (__force void __user *)addr;
 
-       if (flags)
-               prevent_user_access(to, to, end - addr, KUAP_READ_WRITE);
+       if (kuap_is_disabled())
+               return KUAP_NONE;
+
+       if (flags != KUAP_NONE) {
+               current->thread.kuap = KUAP_NONE;
+               kuap_lock(flags, true);
+       }
 
        return flags;
 }
 
 static inline void restore_user_access(unsigned long flags)
 {
-       unsigned long addr = flags & 0xf0000000;
-       unsigned long end = flags << 28;
-       void __user *to = (__force void __user *)addr;
+       if (kuap_is_disabled())
+               return;
 
-       if (flags)
-               allow_user_access(to, to, end - addr, KUAP_READ_WRITE);
+       if (flags != KUAP_NONE) {
+               current->thread.kuap = flags;
+               kuap_unlock(flags, true);
+       }
 }
 
 static inline bool
 bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
-       unsigned long begin = regs->kuap & 0xf0000000;
-       unsigned long end = regs->kuap << 28;
+       unsigned long kuap = regs->kuap;
+
+       if (kuap_is_disabled())
+               return false;
+
+       if (!is_write || kuap == KUAP_ALL)
+               return false;
+       if (kuap == KUAP_NONE)
+               return true;
+
+       /* If faulting address doesn't match unlocked segment, unlock all */
+       if ((kuap ^ address) & 0xf0000000)
+               regs->kuap = KUAP_ALL;
 
-       return is_write && (address < begin || address >= end);
+       return false;
 }
 
 #endif /* CONFIG_PPC_KUAP */
index b85f8e114a9c0f52f15f78251ed43db90d1afa19..f5be185cbdf8da15df447dd94931e570595367c0 100644 (file)
@@ -66,6 +66,16 @@ struct ppc_bat {
 
 #ifndef __ASSEMBLY__
 
+/*
+ * This macro defines the mapping from contexts to VSIDs (virtual
+ * segment IDs).  We use a skew on both the context and the high 4 bits
+ * of the 32-bit virtual address (the "effective segment ID") in order
+ * to spread out the entries in the MMU hash table.  Note, if this
+ * function is changed then hash functions will have to be
+ * changed to correspond.
+ */
+#define CTX_TO_VSID(c, id)     ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
+
 /*
  * Hardware Page Table Entry
  * Note that the xpn and x bitfields are used only by processors that
@@ -102,6 +112,37 @@ extern s32 patch__hash_page_B, patch__hash_page_C;
 extern s32 patch__flush_hash_A0, patch__flush_hash_A1, patch__flush_hash_A2;
 extern s32 patch__flush_hash_B;
 
+#include <asm/reg.h>
+#include <asm/task_size_32.h>
+
+static __always_inline void update_user_segment(u32 n, u32 val)
+{
+       if (n << 28 < TASK_SIZE)
+               mtsr(val + n * 0x111, n << 28);
+}
+
+static __always_inline void update_user_segments(u32 val)
+{
+       val &= 0xf0ffffff;
+
+       update_user_segment(0, val);
+       update_user_segment(1, val);
+       update_user_segment(2, val);
+       update_user_segment(3, val);
+       update_user_segment(4, val);
+       update_user_segment(5, val);
+       update_user_segment(6, val);
+       update_user_segment(7, val);
+       update_user_segment(8, val);
+       update_user_segment(9, val);
+       update_user_segment(10, val);
+       update_user_segment(11, val);
+       update_user_segment(12, val);
+       update_user_segment(13, val);
+       update_user_segment(14, val);
+       update_user_segment(15, val);
+}
+
 #endif /* !__ASSEMBLY__ */
 
 /* We happily ignore the smaller BATs on 601, we don't actually use
index 83c65845a1a9d6062d8c916b8bb17627b1d5452a..609c80f671943b540a6c7352ec2619b68566a1b6 100644 (file)
@@ -4,7 +4,43 @@
 
 #include <asm-generic/pgtable-nopmd.h>
 
-#include <asm/book3s/32/hash.h>
+/*
+ * The "classic" 32-bit implementation of the PowerPC MMU uses a hash
+ * table containing PTEs, together with a set of 16 segment registers,
+ * to define the virtual to physical address mapping.
+ *
+ * We use the hash table as an extended TLB, i.e. a cache of currently
+ * active mappings.  We maintain a two-level page table tree, much
+ * like that used by the i386, for the sake of the Linux memory
+ * management code.  Low-level assembler code in hash_low_32.S
+ * (procedure hash_page) is responsible for extracting ptes from the
+ * tree and putting them into the hash table when necessary, and
+ * updating the accessed and modified bits in the page table tree.
+ */
+
+#define _PAGE_PRESENT  0x001   /* software: pte contains a translation */
+#define _PAGE_HASHPTE  0x002   /* hash_page has made an HPTE for this pte */
+#define _PAGE_USER     0x004   /* usermode access allowed */
+#define _PAGE_GUARDED  0x008   /* G: prohibit speculative access */
+#define _PAGE_COHERENT 0x010   /* M: enforce memory coherence (SMP systems) */
+#define _PAGE_NO_CACHE 0x020   /* I: cache inhibit */
+#define _PAGE_WRITETHRU        0x040   /* W: cache write-through */
+#define _PAGE_DIRTY    0x080   /* C: page changed */
+#define _PAGE_ACCESSED 0x100   /* R: page referenced */
+#define _PAGE_EXEC     0x200   /* software: exec allowed */
+#define _PAGE_RW       0x400   /* software: user write access allowed */
+#define _PAGE_SPECIAL  0x800   /* software: Special page */
+
+#ifdef CONFIG_PTE_64BIT
+/* We never clear the high word of the pte */
+#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE)
+#else
+#define _PTE_NONE_MASK _PAGE_HASHPTE
+#endif
+
+#define _PMD_PRESENT   0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD       (~PAGE_MASK)
 
 /* And here we include common definitions */
 
index 9700da3a40933b083ab9302cad8d03cbf94ee10e..a1cc73a88710e2dc5e1618eb2d4a67aef8e9b189 100644 (file)
@@ -398,8 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user
 
 #endif /* !CONFIG_PPC_KUAP */
 
-static inline void prevent_user_access(void __user *to, const void __user *from,
-                                      unsigned long size, unsigned long dir)
+static inline void prevent_user_access(unsigned long dir)
 {
        set_kuap(AMR_KUAP_BLOCKED);
        if (static_branch_unlikely(&uaccess_flush_key))
index a666d561b44d2825e079dd7efa7f37b7b9f4f136..4d9941b2fe517e9a1dd6fc9f939ba23687809dcc 100644 (file)
@@ -232,6 +232,9 @@ extern unsigned long __pmd_frag_size_shift;
 #define PTRS_PER_PUD   (1 << PUD_INDEX_SIZE)
 #define PTRS_PER_PGD   (1 << PGD_INDEX_SIZE)
 
+#define MAX_PTRS_PER_PGD       (1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \
+                                      H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE))
+
 /* PMD_SHIFT determines what a second-level page table entry can map */
 #define PMD_SHIFT      (PAGE_SHIFT + PTE_INDEX_SIZE)
 #define PMD_SIZE       (1UL << PMD_SHIFT)
index d5da7ddbf0fc71b6fb6de1c558b3007a68e89ee8..350de8f90250cbd73694110c7ac6530b1b94aa17 100644 (file)
@@ -91,7 +91,7 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
 }
 
 #define HAVE_ARCH_CSUM_ADD
-static inline __wsum csum_add(__wsum csum, __wsum addend)
+static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
 {
 #ifdef __powerpc64__
        u64 res = (__force u64)csum;
index f1d029bf906e59b52506624eb77539d90d652726..a95f63788c6b1423e957f7e89871987f541b62b1 100644 (file)
 #define BRANCH_ABSOLUTE        0x2
 
 bool is_offset_in_branch_range(long offset);
-int create_branch(struct ppc_inst *instr, const struct ppc_inst *addr,
+int create_branch(struct ppc_inst *instr, const u32 *addr,
                  unsigned long target, int flags);
-int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr,
+int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
                       unsigned long target, int flags);
-int patch_branch(struct ppc_inst *addr, unsigned long target, int flags);
-int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr);
-int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr);
+int patch_branch(u32 *addr, unsigned long target, int flags);
+int patch_instruction(u32 *addr, struct ppc_inst instr);
+int raw_patch_instruction(u32 *addr, struct ppc_inst instr);
 
 static inline unsigned long patch_site_addr(s32 *site)
 {
@@ -38,18 +38,18 @@ static inline unsigned long patch_site_addr(s32 *site)
 
 static inline int patch_instruction_site(s32 *site, struct ppc_inst instr)
 {
-       return patch_instruction((struct ppc_inst *)patch_site_addr(site), instr);
+       return patch_instruction((u32 *)patch_site_addr(site), instr);
 }
 
 static inline int patch_branch_site(s32 *site, unsigned long target, int flags)
 {
-       return patch_branch((struct ppc_inst *)patch_site_addr(site), target, flags);
+       return patch_branch((u32 *)patch_site_addr(site), target, flags);
 }
 
 static inline int modify_instruction(unsigned int *addr, unsigned int clr,
                                     unsigned int set)
 {
-       return patch_instruction((struct ppc_inst *)addr, ppc_inst((*addr & ~clr) | set));
+       return patch_instruction(addr, ppc_inst((*addr & ~clr) | set));
 }
 
 static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set)
@@ -59,10 +59,8 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned
 
 int instr_is_relative_branch(struct ppc_inst instr);
 int instr_is_relative_link_branch(struct ppc_inst instr);
-int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr);
-unsigned long branch_target(const struct ppc_inst *instr);
-int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest,
-                    const struct ppc_inst *src);
+unsigned long branch_target(const u32 *instr);
+int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src);
 extern bool is_conditional_branch(struct ppc_inst instr);
 #ifdef CONFIG_PPC_BOOK3E_64
 void __patch_exception(int exc, unsigned long addr);
@@ -73,9 +71,9 @@ void __patch_exception(int exc, unsigned long addr);
 #endif
 
 #define OP_RT_RA_MASK  0xffff0000UL
-#define LIS_R2         (PPC_INST_ADDIS | __PPC_RT(R2))
-#define ADDIS_R2_R12   (PPC_INST_ADDIS | __PPC_RT(R2) | __PPC_RA(R12))
-#define ADDI_R2_R2     (PPC_INST_ADDI  | __PPC_RT(R2) | __PPC_RA(R2))
+#define LIS_R2         (PPC_RAW_LIS(_R2, 0))
+#define ADDIS_R2_R12   (PPC_RAW_ADDIS(_R2, _R12, 0))
+#define ADDI_R2_R2     (PPC_RAW_ADDI(_R2, _R2, 0))
 
 
 static inline unsigned long ppc_function_entry(void *func)
@@ -180,12 +178,10 @@ static inline unsigned long ppc_kallsyms_lookup_name(const char *name)
 #define R2_STACK_OFFSET         40
 #endif
 
-#define PPC_INST_LD_TOC                (PPC_INST_LD  | ___PPC_RT(__REG_R2) | \
-                                ___PPC_RA(__REG_R1) | R2_STACK_OFFSET)
+#define PPC_INST_LD_TOC                PPC_RAW_LD(_R2, _R1, R2_STACK_OFFSET)
 
 /* usually preceded by a mflr r0 */
-#define PPC_INST_STD_LR                (PPC_INST_STD | ___PPC_RS(__REG_R0) | \
-                                ___PPC_RA(__REG_R1) | PPC_LR_STKOFF)
+#define PPC_INST_STD_LR                PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)
 #endif /* CONFIG_PPC64 */
 
 #endif /* _ASM_POWERPC_CODE_PATCHING_H */
index 4cb9efa2eb218b6448f50ab82a499ed3c210cd22..242204e12993b1690b716ff0832e278f37b79b35 100644 (file)
@@ -16,7 +16,7 @@
        .section ".head.data.\name\()","a",@progbits
 .endm
 .macro use_ftsec name
-       .section ".head.text.\name\()"
+       .section ".head.text.\name\()","ax",@progbits
 .endm
 
 /*
index 7e4b2cef40c291728fa5b986c98633be845ff547..9bcf345cb208ba0011d92f95a9214cac9f762f9f 100644 (file)
 #define H_RESIZE_HPT_COMMIT    0x370
 #define H_REGISTER_PROC_TBL    0x37C
 #define H_SIGNAL_SYS_RESET     0x380
+#define H_ALLOCATE_VAS_WINDOW  0x388
+#define H_MODIFY_VAS_WINDOW    0x38C
+#define H_DEALLOCATE_VAS_WINDOW        0x390
+#define H_QUERY_VAS_WINDOW     0x394
+#define H_QUERY_VAS_CAPABILITIES       0x398
+#define H_QUERY_NX_CAPABILITIES        0x39C
+#define H_GET_NX_FAULT         0x3A0
 #define H_INT_GET_SOURCE_INFO   0x3A8
 #define H_INT_SET_SOURCE_CONFIG 0x3AC
 #define H_INT_GET_SOURCE_CONFIG 0x3B0
 #define H_CPU_BEHAV_FAVOUR_SECURITY_H  (1ull << 60) // IBM bit 3
 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE  (1ull << 58) // IBM bit 5
 #define H_CPU_BEHAV_FLUSH_LINK_STACK   (1ull << 57) // IBM bit 6
+#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY (1ull << 56) // IBM bit 7
+#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS (1ull << 55) // IBM bit 8
+#define H_CPU_BEHAV_NO_STF_BARRIER     (1ull << 54) // IBM bit 9
 
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK     0x18
index 56a98936a6a93734c4993133cdf6060b7bc3ee79..21cc571ea9c2d150834d7bf15788d06009aa1db7 100644 (file)
  * PACA flags in paca->irq_happened.
  *
  * This bits are set when interrupts occur while soft-disabled
- * and allow a proper replay. Additionally, PACA_IRQ_HARD_DIS
- * is set whenever we manually hard disable.
+ * and allow a proper replay.
+ *
+ * The PACA_IRQ_HARD_DIS is set whenever we hard disable. It is almost
+ * always in synch with the MSR[EE] state, except:
+ * - A window in interrupt entry, where hardware disables MSR[EE] and that
+ *   must be "reconciled" with the soft mask state.
+ * - NMI interrupts that hit in awkward places, until they fix the state.
+ * - When local irqs are being enabled and state is being fixed up.
+ * - When returning from an interrupt there are some windows where this
+ *   can become out of synch, but gets fixed before the RFI or before
+ *   executing the next user instruction (see arch/powerpc/kernel/interrupt.c).
  */
 #define PACA_IRQ_HARD_DIS      0x01
 #define PACA_IRQ_DBELL         0x02
@@ -389,7 +398,15 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
        return !(regs->msr & MSR_EE);
 }
 
-static inline void may_hard_irq_enable(void) { }
+static inline bool may_hard_irq_enable(void)
+{
+       return false;
+}
+
+static inline void do_hard_irq_enable(void)
+{
+       BUILD_BUG();
+}
 
 static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
 {
index 268d3bd073c8ac0b93221992fcc1377d79eccd1d..b11c0e2f963977d066c807fa7b1aea053ec86a25 100644 (file)
@@ -8,17 +8,17 @@
 
 #define ___get_user_instr(gu_op, dest, ptr)                            \
 ({                                                                     \
-       long __gui_ret = 0;                                             \
-       unsigned long __gui_ptr = (unsigned long)ptr;                   \
+       long __gui_ret;                                                 \
+       u32 __user *__gui_ptr = (u32 __user *)ptr;                      \
        struct ppc_inst __gui_inst;                                     \
        unsigned int __prefix, __suffix;                                \
-       __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr);  \
+                                                                       \
+       __chk_user_ptr(ptr);                                            \
+       __gui_ret = gu_op(__prefix, __gui_ptr);                         \
        if (__gui_ret == 0) {                                           \
                if ((__prefix >> 26) == OP_PREFIX) {                    \
-                       __gui_ret = gu_op(__suffix,                     \
-                               (unsigned int __user *)__gui_ptr + 1);  \
-                       __gui_inst = ppc_inst_prefix(__prefix,          \
-                                                    __suffix);         \
+                       __gui_ret = gu_op(__suffix, __gui_ptr + 1);     \
+                       __gui_inst = ppc_inst_prefix(__prefix, __suffix); \
                } else {                                                \
                        __gui_inst = ppc_inst(__prefix);                \
                }                                                       \
 })
 #else /* !CONFIG_PPC64 */
 #define ___get_user_instr(gu_op, dest, ptr)                            \
-       gu_op((dest).val, (u32 __user *)(ptr))
+({                                                                     \
+       __chk_user_ptr(ptr);                                            \
+       gu_op((dest).val, (u32 __user *)(ptr));                         \
+})
 #endif /* CONFIG_PPC64 */
 
-#define get_user_instr(x, ptr) \
-       ___get_user_instr(get_user, x, ptr)
+#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
 
-#define __get_user_instr(x, ptr) \
-       ___get_user_instr(__get_user, x, ptr)
+#define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr)
 
 /*
  * Instruction data type for POWER
@@ -59,9 +60,9 @@ static inline int ppc_inst_primary_opcode(struct ppc_inst x)
        return ppc_inst_val(x) >> 26;
 }
 
-#ifdef CONFIG_PPC64
-#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff })
+#define ppc_inst(x) ((struct ppc_inst){ .val = (x) })
 
+#ifdef CONFIG_PPC64
 #define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) })
 
 static inline u32 ppc_inst_suffix(struct ppc_inst x)
@@ -69,68 +70,43 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x)
        return x.suffix;
 }
 
-static inline bool ppc_inst_prefixed(struct ppc_inst x)
-{
-       return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff;
-}
+#else
+#define ppc_inst_prefix(x, y) ppc_inst(x)
 
-static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
+static inline u32 ppc_inst_suffix(struct ppc_inst x)
 {
-       return ppc_inst_prefix(swab32(ppc_inst_val(x)),
-                              swab32(ppc_inst_suffix(x)));
+       return 0;
 }
 
-static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr)
-{
-       u32 val, suffix;
-
-       val = *(u32 *)ptr;
-       if ((val >> 26) == OP_PREFIX) {
-               suffix = *((u32 *)ptr + 1);
-               return ppc_inst_prefix(val, suffix);
-       } else {
-               return ppc_inst(val);
-       }
-}
+#endif /* CONFIG_PPC64 */
 
-static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
+static inline struct ppc_inst ppc_inst_read(const u32 *ptr)
 {
-       return *(u64 *)&x == *(u64 *)&y;
+       if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX)
+               return ppc_inst_prefix(*ptr, *(ptr + 1));
+       else
+               return ppc_inst(*ptr);
 }
 
-#else
-
-#define ppc_inst(x) ((struct ppc_inst){ .val = x })
-
-#define ppc_inst_prefix(x, y) ppc_inst(x)
-
 static inline bool ppc_inst_prefixed(struct ppc_inst x)
 {
-       return false;
-}
-
-static inline u32 ppc_inst_suffix(struct ppc_inst x)
-{
-       return 0;
+       return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX;
 }
 
 static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
 {
-       return ppc_inst(swab32(ppc_inst_val(x)));
-}
-
-static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr)
-{
-       return *ptr;
+       return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x)));
 }
 
 static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
 {
-       return ppc_inst_val(x) == ppc_inst_val(y);
+       if (ppc_inst_val(x) != ppc_inst_val(y))
+               return false;
+       if (!ppc_inst_prefixed(x))
+               return true;
+       return ppc_inst_suffix(x) == ppc_inst_suffix(y);
 }
 
-#endif /* CONFIG_PPC64 */
-
 static inline int ppc_inst_len(struct ppc_inst x)
 {
        return ppc_inst_prefixed(x) ? 8 : 4;
@@ -140,13 +116,13 @@ static inline int ppc_inst_len(struct ppc_inst x)
  * Return the address of the next instruction, if the instruction @value was
  * located at @location.
  */
-static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *value)
+static inline u32 *ppc_inst_next(u32 *location, u32 *value)
 {
        struct ppc_inst tmp;
 
        tmp = ppc_inst_read(value);
 
-       return location + ppc_inst_len(tmp);
+       return (void *)location + ppc_inst_len(tmp);
 }
 
 static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x)
@@ -178,6 +154,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins
        __str;                          \
 })
 
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src);
+int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src);
 
 #endif /* _ASM_POWERPC_INST_H */
index 59f704408d65d37ab67cd94f2cd5c3ffcc4e1199..d4bdf7d274ac1492e703370ad20b64c6ce7a8a2d 100644 (file)
 #include <asm/kprobes.h>
 #include <asm/runlatch.h>
 
+#ifdef CONFIG_PPC_BOOK3S_64
+extern char __end_soft_masked[];
+bool search_kernel_soft_mask_table(unsigned long addr);
+unsigned long search_kernel_restart_table(unsigned long addr);
+
+DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+       if (regs->msr & MSR_PR)
+               return false;
+
+       if (regs->nip >= (unsigned long)__end_soft_masked)
+               return false;
+
+       return search_kernel_soft_mask_table(regs->nip);
+}
+
+static inline void srr_regs_clobbered(void)
+{
+       local_paca->srr_valid = 0;
+       local_paca->hsrr_valid = 0;
+}
+#else
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+       return false;
+}
+
+static inline void srr_regs_clobbered(void)
+{
+}
+#endif
+
 static inline void nap_adjust_return(struct pt_regs *regs)
 {
 #ifdef CONFIG_PPC_970_NAP
        if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
                /* Can avoid a test-and-clear because NMIs do not call this */
                clear_thread_local_flags(_TLF_NAPPING);
-               regs->nip = (unsigned long)power4_idle_nap_return;
+               regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return);
        }
 #endif
 }
@@ -129,9 +163,18 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
                 * CT_WARN_ON comes here via program_check_exception,
                 * so avoid recursion.
                 */
-               if (TRAP(regs) != INTERRUPT_PROGRAM)
+               if (TRAP(regs) != INTERRUPT_PROGRAM) {
                        CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+                       BUG_ON(is_implicit_soft_masked(regs));
+               }
+#ifdef CONFIG_PPC_BOOK3S
+               /* Move this under a debugging check */
+               if (arch_irq_disabled_regs(regs))
+                       BUG_ON(search_kernel_restart_table(regs->nip));
+#endif
        }
+       if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+               BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
 #endif
 
        booke_restore_dbcr0();
@@ -186,6 +229,7 @@ struct interrupt_nmi_state {
        u8 irq_soft_mask;
        u8 irq_happened;
        u8 ftrace_enabled;
+       u64 softe;
 #endif
 };
 
@@ -211,6 +255,7 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
 #ifdef CONFIG_PPC64
        state->irq_soft_mask = local_paca->irq_soft_mask;
        state->irq_happened = local_paca->irq_happened;
+       state->softe = regs->softe;
 
        /*
         * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
@@ -220,12 +265,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
        local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
        local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 
-       if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) &&
-                               regs->nip < (unsigned long)__end_interrupts) {
-               // Kernel code running below __end_interrupts is
-               // implicitly soft-masked.
+       if (is_implicit_soft_masked(regs)) {
+               // Adjust regs->softe soft implicit soft-mask, so
+               // arch_irq_disabled_regs(regs) behaves as expected.
                regs->softe = IRQS_ALL_DISABLED;
        }
+       if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+               BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
 
        /* Don't do any per-CPU operations until interrupt state is fixed */
 
@@ -258,11 +304,20 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
         */
 
 #ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S
+       if (arch_irq_disabled_regs(regs)) {
+               unsigned long rst = search_kernel_restart_table(regs->nip);
+               if (rst)
+                       regs_set_return_ip(regs, rst);
+       }
+#endif
+
        if (nmi_disables_ftrace(regs))
                this_cpu_set_ftrace_enabled(state->ftrace_enabled);
 
        /* Check we didn't change the pending interrupt mask. */
        WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
+       regs->softe = state->softe;
        local_paca->irq_happened = state->irq_happened;
        local_paca->irq_soft_mask = state->irq_soft_mask;
 #endif
index ec96232529ac27f3d0e29a16a7df7b14d3c588a3..1df763002726a8b8eba5d79029d86298922eeeaa 100644 (file)
@@ -5,14 +5,6 @@
 #define KUAP_READ      1
 #define KUAP_WRITE     2
 #define KUAP_READ_WRITE        (KUAP_READ | KUAP_WRITE)
-/*
- * For prevent_user_access() only.
- * Use the current saved situation instead of the to/from/size params.
- * Used on book3s/32
- */
-#define KUAP_CURRENT_READ      4
-#define KUAP_CURRENT_WRITE     8
-#define KUAP_CURRENT           (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE)
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/book3s/64/kup.h>
@@ -46,10 +38,7 @@ void setup_kuep(bool disabled);
 static inline void setup_kuep(bool disabled) { }
 #endif /* CONFIG_PPC_KUEP */
 
-#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
-void kuep_lock(void);
-void kuep_unlock(void);
-#else
+#ifndef CONFIG_PPC_BOOK3S_32
 static inline void kuep_lock(void) { }
 static inline void kuep_unlock(void) { }
 #endif
@@ -83,8 +72,7 @@ static inline unsigned long kuap_get_and_assert_locked(void)
 #ifndef CONFIG_PPC_BOOK3S_64
 static inline void allow_user_access(void __user *to, const void __user *from,
                                     unsigned long size, unsigned long dir) { }
-static inline void prevent_user_access(void __user *to, const void __user *from,
-                                      unsigned long size, unsigned long dir) { }
+static inline void prevent_user_access(unsigned long dir) { }
 static inline unsigned long prevent_user_access_return(void) { return 0UL; }
 static inline void restore_user_access(unsigned long flags) { }
 #endif /* CONFIG_PPC_BOOK3S_64 */
@@ -96,53 +84,53 @@ static __always_inline void setup_kup(void)
        setup_kuap(disable_kuap);
 }
 
-static inline void allow_read_from_user(const void __user *from, unsigned long size)
+static __always_inline void allow_read_from_user(const void __user *from, unsigned long size)
 {
        barrier_nospec();
        allow_user_access(NULL, from, size, KUAP_READ);
 }
 
-static inline void allow_write_to_user(void __user *to, unsigned long size)
+static __always_inline void allow_write_to_user(void __user *to, unsigned long size)
 {
        allow_user_access(to, NULL, size, KUAP_WRITE);
 }
 
-static inline void allow_read_write_user(void __user *to, const void __user *from,
-                                        unsigned long size)
+static __always_inline void allow_read_write_user(void __user *to, const void __user *from,
+                                                 unsigned long size)
 {
        barrier_nospec();
        allow_user_access(to, from, size, KUAP_READ_WRITE);
 }
 
-static inline void prevent_read_from_user(const void __user *from, unsigned long size)
+static __always_inline void prevent_read_from_user(const void __user *from, unsigned long size)
 {
-       prevent_user_access(NULL, from, size, KUAP_READ);
+       prevent_user_access(KUAP_READ);
 }
 
-static inline void prevent_write_to_user(void __user *to, unsigned long size)
+static __always_inline void prevent_write_to_user(void __user *to, unsigned long size)
 {
-       prevent_user_access(to, NULL, size, KUAP_WRITE);
+       prevent_user_access(KUAP_WRITE);
 }
 
-static inline void prevent_read_write_user(void __user *to, const void __user *from,
-                                          unsigned long size)
+static __always_inline void prevent_read_write_user(void __user *to, const void __user *from,
+                                                   unsigned long size)
 {
-       prevent_user_access(to, from, size, KUAP_READ_WRITE);
+       prevent_user_access(KUAP_READ_WRITE);
 }
 
-static inline void prevent_current_access_user(void)
+static __always_inline void prevent_current_access_user(void)
 {
-       prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT);
+       prevent_user_access(KUAP_READ_WRITE);
 }
 
-static inline void prevent_current_read_from_user(void)
+static __always_inline void prevent_current_read_from_user(void)
 {
-       prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ);
+       prevent_user_access(KUAP_READ);
 }
 
-static inline void prevent_current_write_to_user(void)
+static __always_inline void prevent_current_write_to_user(void)
 {
-       prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE);
+       prevent_user_access(KUAP_WRITE);
 }
 
 #endif /* !__ASSEMBLY__ */
index 2fca299f7e1929e6f1f1f3dac6b86c1fb1c85c9e..c63105d2c9e7c3cf8fec91f130088a1a6418ea92 100644 (file)
@@ -16,10 +16,10 @@ static inline bool is_kvm_guest(void)
        return static_branch_unlikely(&kvm_guest);
 }
 
-bool check_kvm_guest(void);
+int check_kvm_guest(void);
 #else
 static inline bool is_kvm_guest(void) { return false; }
-static inline bool check_kvm_guest(void) { return false; }
+static inline int check_kvm_guest(void) { return 0; }
 #endif
 
 #endif /* _ASM_POWERPC_KVM_GUEST_H_ */
index ae25e6e72997953a1403ed9c5f02e05592cd8a57..4fe018cc207bbfb3fa3145f28b7ecbef1d62e006 100644 (file)
@@ -16,7 +16,7 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip)
 {
        struct pt_regs *regs = ftrace_get_regs(fregs);
 
-       regs->nip = ip;
+       regs_set_return_ip(regs, ip);
 }
 
 #define klp_get_ftrace_location klp_get_ftrace_location
index 607168b1aef46a936540a43601960ae6b17eb5bb..27016b98ecb2bc24aa82d9392330007307e0313d 100644 (file)
@@ -220,7 +220,7 @@ enum {
 #elif defined(CONFIG_44x)
 #define MMU_FTRS_ALWAYS                MMU_FTR_TYPE_44x
 #endif
-#if defined(CONFIG_E200) || defined(CONFIG_E500)
+#ifdef CONFIG_E500
 #define MMU_FTRS_ALWAYS                MMU_FTR_TYPE_FSL_E
 #endif
 
@@ -324,7 +324,6 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 }
 #endif /* !CONFIG_DEBUG_VM */
 
-#ifdef CONFIG_PPC_RADIX_MMU
 static inline bool radix_enabled(void)
 {
        return mmu_has_feature(MMU_FTR_TYPE_RADIX);
@@ -334,17 +333,6 @@ static inline bool early_radix_enabled(void)
 {
        return early_mmu_has_feature(MMU_FTR_TYPE_RADIX);
 }
-#else
-static inline bool radix_enabled(void)
-{
-       return false;
-}
-
-static inline bool early_radix_enabled(void)
-{
-       return false;
-}
-#endif
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 static inline bool strict_kernel_rwx_enabled(void)
@@ -357,6 +345,11 @@ static inline bool strict_kernel_rwx_enabled(void)
        return false;
 }
 #endif
+
+static inline bool strict_module_rwx_enabled(void)
+{
+       return IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && strict_kernel_rwx_enabled();
+}
 #endif /* !__ASSEMBLY__ */
 
 /* The kernel use the constants below to index in the page sizes array.
index db186c539d37efdfaf95edf29c4db64583a5de25..9ba6b585337f90fefc602214b5958b573e0783fd 100644 (file)
@@ -57,7 +57,6 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
 static inline void mm_iommu_init(struct mm_struct *mm) { }
 #endif
 extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
-extern void set_context(unsigned long id, pgd_t *pgd);
 
 #ifdef CONFIG_PPC_BOOK3S_64
 extern void radix__switch_mmu_context(struct mm_struct *prev,
index 295ef563960932cd52e93a599c3c0d730af3dad3..882a0bc7887a57c52a09d5ec89969a391a0bdb50 100644 (file)
@@ -9,10 +9,22 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/jump_label.h>
+
 #include <asm/reg.h>
 
+extern struct static_key_false disable_kuap_key;
+
+static __always_inline bool kuap_is_disabled(void)
+{
+       return static_branch_unlikely(&disable_kuap_key);
+}
+
 static inline void kuap_save_and_lock(struct pt_regs *regs)
 {
+       if (kuap_is_disabled())
+               return;
+
        regs->kuap = mfspr(SPRN_MD_AP);
        mtspr(SPRN_MD_AP, MD_APG_KUAP);
 }
@@ -23,12 +35,20 @@ static inline void kuap_user_restore(struct pt_regs *regs)
 
 static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
 {
+       if (kuap_is_disabled())
+               return;
+
        mtspr(SPRN_MD_AP, regs->kuap);
 }
 
 static inline unsigned long kuap_get_and_assert_locked(void)
 {
-       unsigned long kuap = mfspr(SPRN_MD_AP);
+       unsigned long kuap;
+
+       if (kuap_is_disabled())
+               return MD_APG_INIT;
+
+       kuap = mfspr(SPRN_MD_AP);
 
        if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
                WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16);
@@ -38,25 +58,35 @@ static inline unsigned long kuap_get_and_assert_locked(void)
 
 static inline void kuap_assert_locked(void)
 {
-       if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+       if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && !kuap_is_disabled())
                kuap_get_and_assert_locked();
 }
 
 static inline void allow_user_access(void __user *to, const void __user *from,
                                     unsigned long size, unsigned long dir)
 {
+       if (kuap_is_disabled())
+               return;
+
        mtspr(SPRN_MD_AP, MD_APG_INIT);
 }
 
-static inline void prevent_user_access(void __user *to, const void __user *from,
-                                      unsigned long size, unsigned long dir)
+static inline void prevent_user_access(unsigned long dir)
 {
+       if (kuap_is_disabled())
+               return;
+
        mtspr(SPRN_MD_AP, MD_APG_KUAP);
 }
 
 static inline unsigned long prevent_user_access_return(void)
 {
-       unsigned long flags = mfspr(SPRN_MD_AP);
+       unsigned long flags;
+
+       if (kuap_is_disabled())
+               return MD_APG_INIT;
+
+       flags = mfspr(SPRN_MD_AP);
 
        mtspr(SPRN_MD_AP, MD_APG_KUAP);
 
@@ -65,12 +95,18 @@ static inline unsigned long prevent_user_access_return(void)
 
 static inline void restore_user_access(unsigned long flags)
 {
+       if (kuap_is_disabled())
+               return;
+
        mtspr(SPRN_MD_AP, flags);
 }
 
 static inline bool
 bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
+       if (kuap_is_disabled())
+               return false;
+
        return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000);
 }
 
index 2d92a39d8f2e808ffd8ace7afc0dcc14e8743d6b..43ceca128531f965d2850b61bd1b8032be66d4f4 100644 (file)
@@ -113,6 +113,7 @@ typedef struct {
 
 /* patch sites */
 extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I;
+extern s32 patch__tlb_44x_kuep, patch__tlb_47x_kuep;
 
 #endif /* !__ASSEMBLY__ */
 
index ec18ac818e3a6f7bbec3e858b7c4c94d971ce36a..dc05a862e72a1ce15dc9893ef830ff504ec8253c 100644 (file)
@@ -149,11 +149,9 @@ struct paca_struct {
 #endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef CONFIG_PPC_BOOK3S
-       mm_context_id_t mm_ctx_id;
 #ifdef CONFIG_PPC_MM_SLICES
        unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
        unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
-       unsigned long mm_ctx_slb_addr_limit;
 #else
        u16 mm_ctx_user_psize;
        u16 mm_ctx_sllp;
@@ -167,8 +165,15 @@ struct paca_struct {
        u64 kstack;                     /* Saved Kernel stack addr */
        u64 saved_r1;                   /* r1 save for RTAS calls or PM or EE=0 */
        u64 saved_msr;                  /* MSR saved here by enter_rtas */
+#ifdef CONFIG_PPC64
+       u64 exit_save_r1;               /* Syscall/interrupt R1 save */
+#endif
 #ifdef CONFIG_PPC_BOOK3E
        u16 trap_save;                  /* Used when bad stack is encountered */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+       u8 hsrr_valid;                  /* HSRRs set for HRFID */
+       u8 srr_valid;                   /* SRRs set for RFID */
 #endif
        u8 irq_soft_mask;               /* mask for irq soft masking */
        u8 irq_happened;                /* irq happened while soft-disabled */
index 5969743719bc6db919d0835d179c77d957d6c400..d564d0ecd4cd7ed82c92b3cf76f4265a5619f24c 100644 (file)
@@ -41,6 +41,10 @@ struct mm_struct;
 
 #ifndef __ASSEMBLY__
 
+#ifndef MAX_PTRS_PER_PGD
+#define MAX_PTRS_PER_PGD PTRS_PER_PGD
+#endif
+
 /* Keep these as a macros to avoid include dependency mess */
 #define pte_page(x)            pfn_to_page(pte_pfn(x))
 #define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
@@ -72,6 +76,7 @@ extern unsigned long empty_zero_page[];
 extern pgd_t swapper_pg_dir[];
 
 extern void paging_init(void);
+void poking_init(void);
 
 extern unsigned long ioremap_bot;
 
index ac41776661e96300f5f2d66c1094f9800db2eb63..bede76dd3db78e73c8cad4cd4dc6f8c4cff3e0ed 100644 (file)
 #define        __REGA0_R30     30
 #define        __REGA0_R31     31
 
+/* For use with PPC_RAW_() macros */
+#define        _R0     0
+#define        _R1     1
+#define        _R2     2
+#define        _R3     3
+#define        _R4     4
+#define        _R5     5
+#define        _R6     6
+#define        _R7     7
+#define        _R8     8
+#define        _R9     9
+#define        _R10    10
+#define        _R11    11
+#define        _R12    12
+#define        _R13    13
+#define        _R14    14
+#define        _R15    15
+#define        _R16    16
+#define        _R17    17
+#define        _R18    18
+#define        _R19    19
+#define        _R20    20
+#define        _R21    21
+#define        _R22    22
+#define        _R23    23
+#define        _R24    24
+#define        _R25    25
+#define        _R26    26
+#define        _R27    27
+#define        _R28    28
+#define        _R29    29
+#define        _R30    30
+#define        _R31    31
+
 #define IMM_L(i)               ((uintptr_t)(i) & 0xffff)
 #define IMM_DS(i)              ((uintptr_t)(i) & 0xfffc)
 #define IMM_DQ(i)              ((uintptr_t)(i) & 0xfff0)
 #define PPC_INST_LWSYNC                        0x7c2004ac
 #define PPC_INST_SYNC                  0x7c0004ac
 #define PPC_INST_SYNC_MASK             0xfc0007fe
-#define PPC_INST_ISYNC                 0x4c00012c
 #define PPC_INST_MCRXR                 0x7c000400
 #define PPC_INST_MCRXR_MASK            0xfc0007fe
 #define PPC_INST_MFSPR_PVR             0x7c1f42a6
 #define PPC_INST_MFSPR_PVR_MASK                0xfc1ffffe
 #define PPC_INST_MTMSRD                        0x7c000164
-#define PPC_INST_NOP                   0x60000000
 #define PPC_INST_POPCNTB               0x7c0000f4
 #define PPC_INST_POPCNTB_MASK          0xfc0007fe
 #define PPC_INST_RFEBB                 0x4c000124
 #define PPC_INST_MFSPR_DSCR_USER_MASK  0xfc1ffffe
 #define PPC_INST_MTSPR_DSCR_USER       0x7c0303a6
 #define PPC_INST_MTSPR_DSCR_USER_MASK  0xfc1ffffe
-#define PPC_INST_SC                    0x44000002
 #define PPC_INST_STRING                        0x7c00042a
 #define PPC_INST_STRING_MASK           0xfc0007fe
 #define PPC_INST_STRING_GEN_MASK       0xfc00067e
+#define PPC_INST_SETB                  0x7c000100
 #define PPC_INST_STSWI                 0x7c0005aa
 #define PPC_INST_STSWX                 0x7c00052a
 #define PPC_INST_TRECHKPT              0x7c0007dd
 #define PPC_INST_TSR                   0x7c0005dd
 #define PPC_INST_LD                    0xe8000000
 #define PPC_INST_STD                   0xf8000000
-#define PPC_INST_MFLR                  0x7c0802a6
-#define PPC_INST_MTCTR                 0x7c0903a6
-#define PPC_INST_ADDI                  0x38000000
 #define PPC_INST_ADDIS                 0x3c000000
 #define PPC_INST_ADD                   0x7c000214
-#define PPC_INST_BLR                   0x4e800020
-#define PPC_INST_BCTR                  0x4e800420
-#define PPC_INST_BCTRL                 0x4e800421
 #define PPC_INST_DIVD                  0x7c0003d2
-#define PPC_INST_RLDICR                        0x78000004
-#define PPC_INST_ORI                   0x60000000
-#define PPC_INST_ORIS                  0x64000000
 #define PPC_INST_BRANCH                        0x48000000
 #define PPC_INST_BL                    0x48000001
 #define PPC_INST_BRANCH_COND           0x40800000
 #define PPC_LO(v)      ((v) & 0xffff)
 #define PPC_HI(v)      (((v) >> 16) & 0xffff)
 #define PPC_HA(v)      PPC_HI((v) + 0x8000)
+#define PPC_HIGHER(v)  (((v) >> 32) & 0xffff)
+#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff)
 
 /*
  * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
 #define PPC_RAW_STBCIX(s, a, b)                (0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
 #define PPC_RAW_DCBFPS(a, b)           (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21))
 #define PPC_RAW_DCBSTPS(a, b)          (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21))
+#define PPC_RAW_SC()                   (0x44000002)
+#define PPC_RAW_SYNC()                 (0x7c0004ac)
+#define PPC_RAW_ISYNC()                        (0x4c00012c)
+
 /*
  * Define what the VSX XX1 form instructions will look like, then add
  * the 128 bit load store instructions based on that.
 #define PPC_RAW_STXVP(xsp, a, i)       (0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i))
 #define PPC_RAW_LXVPX(xtp, a, b)       (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_STXVPX(xsp, a, b)      (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_RAW_PLXVP(xtp, i, a, pr) \
-       ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i)))
-#define PPC_RAW_PSTXVP(xsp, i, a, pr) \
-       ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i)))
+#define PPC_RAW_PLXVP_P(xtp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i))
+#define PPC_RAW_PLXVP_S(xtp, i, a, pr) (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))
+#define PPC_RAW_PSTXVP_P(xsp, i, a, pr)        (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i))
+#define PPC_RAW_PSTXVP_S(xsp, i, a, pr)        (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))
 #define PPC_RAW_NAP                    (0x4c000364)
 #define PPC_RAW_SLEEP                  (0x4c0003a4)
 #define PPC_RAW_WINKLE                 (0x4c0003e4)
 #define PPC_RAW_ADD_DOT(t, a, b)       (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
 #define PPC_RAW_ADDC(t, a, b)          (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_ADDC_DOT(t, a, b)      (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
-#define PPC_RAW_NOP()                  (PPC_INST_NOP)
-#define PPC_RAW_BLR()                  (PPC_INST_BLR)
+#define PPC_RAW_NOP()                  PPC_RAW_ORI(0, 0, 0)
+#define PPC_RAW_BLR()                  (0x4e800020)
 #define PPC_RAW_BLRL()                 (0x4e800021)
 #define PPC_RAW_MTLR(r)                        (0x7c0803a6 | ___PPC_RT(r))
-#define PPC_RAW_MFLR(t)                        (PPC_INST_MFLR | ___PPC_RT(t))
-#define PPC_RAW_BCTR()                 (PPC_INST_BCTR)
-#define PPC_RAW_MTCTR(r)               (PPC_INST_MTCTR | ___PPC_RT(r))
-#define PPC_RAW_ADDI(d, a, i)          (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_MFLR(t)                        (0x7c0802a6 | ___PPC_RT(t))
+#define PPC_RAW_BCTR()                 (0x4e800420)
+#define PPC_RAW_BCTRL()                        (0x4e800421)
+#define PPC_RAW_MTCTR(r)               (0x7c0903a6 | ___PPC_RT(r))
+#define PPC_RAW_ADDI(d, a, i)          (0x38000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_LI(r, i)               PPC_RAW_ADDI(r, 0, i)
-#define PPC_RAW_ADDIS(d, a, i)         (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIS(d, a, i)         (0x3c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_ADDIC(d, a, i)         (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_ADDIC_DOT(d, a, i)     (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_LIS(r, i)              PPC_RAW_ADDIS(r, 0, i)
 #define PPC_RAW_AND_DOT(d, a, b)       (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_OR(d, a, b)            (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_MR(d, a)               PPC_RAW_OR(d, a, a)
-#define PPC_RAW_ORI(d, a, i)           (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
-#define PPC_RAW_ORIS(d, a, i)          (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ORI(d, a, i)           (0x60000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ORIS(d, a, i)          (0x64000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_NOR(d, a, b)           (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_XOR(d, a, b)           (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_XORI(d, a, i)          (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
                                        (0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
 #define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
 #define PPC_RAW_RLDICL(d, a, i, mb)     (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb))
-#define PPC_RAW_RLDICR(d, a, i, me)     (PPC_INST_RLDICR | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me))
+#define PPC_RAW_RLDICR(d, a, i, me)     (0x78000004 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me))
 
 /* slwi = rlwinm Rx, Ry, n, 0, 31-n */
 #define PPC_RAW_SLWI(d, a, i)          PPC_RAW_RLWINM(d, a, i, 0, 31-(i))
 #define PPC_RAW_NEG(d, a)              (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a))
 
 #define PPC_RAW_MFSPR(d, spr)          (0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr))
+#define PPC_RAW_MTSPR(spr, d)          (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr))
+#define PPC_RAW_EIEIO()                        (0x7c0006ac)
 
 /* Deal with instructions that older assemblers aren't aware of */
 #define        PPC_BCCTR_FLUSH         stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
index d6739d700f0a3bd75afc834fe42c0e7a7c30b677..116c1519728a74fd83db6fc99c69a9b97ccfb522 100644 (file)
@@ -762,6 +762,21 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
        stringify_in_c(.long (_target) - . ;)   \
        stringify_in_c(.previous)
 
+#define SOFT_MASK_TABLE(_start, _end)          \
+       stringify_in_c(.section __soft_mask_table,"a";)\
+       stringify_in_c(.balign 8;)              \
+       stringify_in_c(.llong (_start);)        \
+       stringify_in_c(.llong (_end);)          \
+       stringify_in_c(.previous)
+
+#define RESTART_TABLE(_start, _end, _target)   \
+       stringify_in_c(.section __restart_table,"a";)\
+       stringify_in_c(.balign 8;)              \
+       stringify_in_c(.llong (_start);)        \
+       stringify_in_c(.llong (_end);)          \
+       stringify_in_c(.llong (_target);)       \
+       stringify_in_c(.previous)
+
 #ifdef CONFIG_PPC_FSL_BOOK3E
 #define BTB_FLUSH(reg)                 \
        lis reg,BUCSR_INIT@h;           \
index 84dd1addd4344f3ee8a5de61e59e873fa050f0a7..c5d984700d241a976e9d1039d16a410de9875293 100644 (file)
@@ -34,14 +34,14 @@ typedef u32 ppc_opcode_t;
 /* Enable single stepping for the current task */
 static inline void enable_single_step(struct pt_regs *regs)
 {
-       regs->msr |= MSR_SINGLESTEP;
+       regs_set_return_msr(regs, regs->msr | MSR_SINGLESTEP);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
        /*
         * We turn off Critical Input Exception(CE) to ensure that the single
         * step will be for the instruction we have the probe on; if we don't,
         * it is possible we'd get the single step reported for CE.
         */
-       regs->msr &= ~MSR_CE;
+       regs_set_return_msr(regs, regs->msr & ~MSR_CE);
        mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
 #ifdef CONFIG_PPC_47x
        isync();
index 7bf8a15af22469dca1ff15e340639b6816f83922..f348e564f7dd5407737aad61e785fb023c2a51f8 100644 (file)
@@ -276,7 +276,15 @@ struct thread_struct {
 #define SPEFSCR_INIT
 #endif
 
-#ifdef CONFIG_PPC32
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+#define INIT_THREAD { \
+       .ksp = INIT_SP, \
+       .pgdir = swapper_pg_dir, \
+       .kuap = ~0UL, /* KUAP_NONE */ \
+       .fpexc_mode = MSR_FE0 | MSR_FE1, \
+       SPEFSCR_INIT \
+}
+#elif defined(CONFIG_PPC32)
 #define INIT_THREAD { \
        .ksp = INIT_SP, \
        .pgdir = swapper_pg_dir, \
@@ -339,17 +347,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
 
 #define spin_end()     HMT_medium()
 
-#define spin_until_cond(cond)                                  \
-do {                                                           \
-       if (unlikely(!(cond))) {                                \
-               spin_begin();                                   \
-               do {                                            \
-                       spin_cpu_relax();                       \
-               } while (!(cond));                              \
-               spin_end();                                     \
-       }                                                       \
-} while (0)
-
 #endif
 
 /* Check that a certain kernel stack pointer is valid in task_struct p */
index e646c7f218bc8c2c2766ce02216fc634b9813e33..8a0d8fb3532863c1b3fb590fe5f203a776ad7306 100644 (file)
@@ -71,6 +71,7 @@ struct ps3_dma_region_ops;
  * @bus_addr: The 'translated' bus address of the region.
  * @len: The length in bytes of the region.
  * @offset: The offset from the start of memory of the region.
+ * @dma_mask: Device dma_mask.
  * @ioid: The IOID of the device who owns this region
  * @chunk_list: Opaque variable used by the ioc page manager.
  * @region_ops: struct ps3_dma_region_ops - dma region operations
@@ -85,6 +86,7 @@ struct ps3_dma_region {
        enum ps3_dma_region_type region_type;
        unsigned long len;
        unsigned long offset;
+       u64 dma_mask;
 
        /* driver variables  (set by ps3_dma_region_create) */
        unsigned long bus_addr;
@@ -232,7 +234,7 @@ enum lv1_result {
 
 static inline const char* ps3_result(int result)
 {
-#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT)
+#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) || defined(CONFIG_PS3_VERBOSE_RESULT)
        switch (result) {
        case LV1_SUCCESS:
                return "LV1_SUCCESS (0)";
index b476a685f066ec77f080f923a1b25fdfcb8ca658..3e5d470a6155a7226e456fa7bed81c23cb6a892e 100644 (file)
@@ -48,11 +48,12 @@ struct pt_regs
                        unsigned long result;
                };
        };
-
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_KUAP)
        union {
                struct {
 #ifdef CONFIG_PPC64
                        unsigned long ppr;
+                       unsigned long exit_result;
 #endif
                        union {
 #ifdef CONFIG_PPC_KUAP
@@ -68,6 +69,7 @@ struct pt_regs
                };
                unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */
        };
+#endif
 };
 #endif
 
@@ -122,6 +124,41 @@ struct pt_regs
 #endif /* __powerpc64__ */
 
 #ifndef __ASSEMBLY__
+#include <asm/paca.h>
+
+#ifdef CONFIG_SMP
+extern unsigned long profile_pc(struct pt_regs *regs);
+#else
+#define profile_pc(regs) instruction_pointer(regs)
+#endif
+
+long do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_leave(struct pt_regs *regs);
+
+static inline void set_return_regs_changed(void)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+       local_paca->hsrr_valid = 0;
+       local_paca->srr_valid = 0;
+#endif
+}
+
+static inline void regs_set_return_ip(struct pt_regs *regs, unsigned long ip)
+{
+       regs->nip = ip;
+       set_return_regs_changed();
+}
+
+static inline void regs_set_return_msr(struct pt_regs *regs, unsigned long msr)
+{
+       regs->msr = msr;
+       set_return_regs_changed();
+}
+
+static inline void regs_add_return_ip(struct pt_regs *regs, long offset)
+{
+       regs_set_return_ip(regs, regs->nip + offset);
+}
 
 static inline unsigned long instruction_pointer(struct pt_regs *regs)
 {
@@ -131,7 +168,7 @@ static inline unsigned long instruction_pointer(struct pt_regs *regs)
 static inline void instruction_pointer_set(struct pt_regs *regs,
                unsigned long val)
 {
-       regs->nip = val;
+       regs_set_return_ip(regs, val);
 }
 
 static inline unsigned long user_stack_pointer(struct pt_regs *regs)
@@ -144,15 +181,6 @@ static inline unsigned long frame_pointer(struct pt_regs *regs)
        return 0;
 }
 
-#ifdef CONFIG_SMP
-extern unsigned long profile_pc(struct pt_regs *regs);
-#else
-#define profile_pc(regs) instruction_pointer(regs)
-#endif
-
-long do_syscall_trace_enter(struct pt_regs *regs);
-void do_syscall_trace_leave(struct pt_regs *regs);
-
 #ifdef __powerpc64__
 #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
 #else
index 7c81d3e563b28e94b8d5e1e4e56fdef0f063b302..be85cf156a1f5085b41d942c82807b61ac7a668a 100644 (file)
 #define SPRN_PMMAR     0x356   /* Power Management Memory Activity Register */
 #define SPRN_PSSCR     0x357   /* Processor Stop Status and Control Register (ISA 3.0) */
 #define SPRN_PSSCR_PR  0x337   /* PSSCR ISA 3.0, privileged mode access */
+#define SPRN_TRIG2     0x372
 #define SPRN_PMCR      0x374   /* Power Management Control Register */
 #define SPRN_RWMR      0x375   /* Region-Weighting Mode Register */
 
@@ -1435,8 +1436,6 @@ static inline void mtsr(u32 val, u32 idx)
 }
 #endif
 
-#define proc_trap()    asm volatile("trap")
-
 extern unsigned long current_stack_frame(void);
 
 register unsigned long current_stack_pointer asm("r1");
@@ -1447,16 +1446,6 @@ extern void scom970_write(unsigned int address, unsigned long value);
 struct pt_regs;
 
 extern void ppc_save_regs(struct pt_regs *regs);
-
-static inline void update_power8_hid0(unsigned long hid0)
-{
-       /*
-        *  The HID0 update on Power8 should at the very least be
-        *  preceded by a SYNC instruction followed by an ISYNC
-        *  instruction
-        */
-       asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
-}
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_REG_H */
index b774a4477d5f1a256895c1778af1e7372fb7eeb9..792eefaf230b80b52aaac7d54ae5ca71e2ced11c 100644 (file)
@@ -92,6 +92,9 @@ static inline bool security_ftr_enabled(u64 feature)
 // The L1-D cache should be flushed after user accesses from the kernel
 #define SEC_FTR_L1D_FLUSH_UACCESS      0x0000000000008000ull
 
+// The STF flush should be executed on privilege state switch
+#define SEC_FTR_STF_BARRIER            0x0000000000010000ull
+
 // Features enabled by default
 #define SEC_FTR_DEFAULT \
        (SEC_FTR_L1D_FLUSH_HV | \
@@ -99,6 +102,7 @@ static inline bool security_ftr_enabled(u64 feature)
         SEC_FTR_BNDS_CHK_SPEC_BAR | \
         SEC_FTR_L1D_FLUSH_ENTRY | \
         SEC_FTR_L1D_FLUSH_UACCESS | \
+        SEC_FTR_STF_BARRIER | \
         SEC_FTR_FAVOUR_SECURITY)
 
 #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h
new file mode 100644 (file)
index 0000000..b040094
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SET_MEMORY_H
+#define _ASM_POWERPC_SET_MEMORY_H
+
+#define SET_MEMORY_RO  0
+#define SET_MEMORY_RW  1
+#define SET_MEMORY_NX  2
+#define SET_MEMORY_X   3
+
+int change_memory_attr(unsigned long addr, int numpages, long action);
+
+static inline int set_memory_ro(unsigned long addr, int numpages)
+{
+       return change_memory_attr(addr, numpages, SET_MEMORY_RO);
+}
+
+static inline int set_memory_rw(unsigned long addr, int numpages)
+{
+       return change_memory_attr(addr, numpages, SET_MEMORY_RW);
+}
+
+static inline int set_memory_nx(unsigned long addr, int numpages)
+{
+       return change_memory_attr(addr, numpages, SET_MEMORY_NX);
+}
+
+static inline int set_memory_x(unsigned long addr, int numpages)
+{
+       return change_memory_attr(addr, numpages, SET_MEMORY_X);
+}
+
+int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot);
+
+#endif
index e89bfebd4e00e06e4d9833b23b9366442fa65e30..6c1a7d217d1a28bcd8b227b3ef57860b9e6a5477 100644 (file)
@@ -10,7 +10,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex);
 extern unsigned int rtas_data;
 extern unsigned long long memory_limit;
 extern bool init_mem_is_free;
-extern unsigned long klimit;
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
 struct device_node;
index 972ed0df154d60122353c0d82a4d33103c88fe8d..1df867c2e054e55d1ee1dea381a87d5854d43fd4 100644 (file)
@@ -13,12 +13,11 @@ struct pt_regs;
  * we don't allow putting a breakpoint on an mtmsrd instruction.
  * Similarly we don't allow breakpoints on rfid instructions.
  * These macros tell us if an instruction is a mtmsrd or rfid.
- * Note that IS_MTMSRD returns true for both an mtmsr (32-bit)
- * and an mtmsrd (64-bit).
+ * Note that these return true for both mtmsr/rfi (32-bit)
+ * and mtmsrd/rfid (64-bit).
  */
 #define IS_MTMSRD(instr)       ((ppc_inst_val(instr) & 0xfc0007be) == 0x7c000124)
-#define IS_RFID(instr)         ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000024)
-#define IS_RFI(instr)          ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000064)
+#define IS_RFID(instr)         ((ppc_inst_val(instr) & 0xfc0007be) == 0x4c000024)
 
 enum instruction_type {
        COMPUTE,                /* arith/logical/CR op, etc. */
index 5bf65f5d44a99d61aa5f02dade46ab4e479f9527..fe683371336fefd16a0f65ca9b383107ed48ddcf 100644 (file)
@@ -24,8 +24,8 @@ typedef ppc_opcode_t uprobe_opcode_t;
 
 struct arch_uprobe {
        union {
-               struct ppc_inst insn;
-               struct ppc_inst ixol;
+               u32 insn[2];
+               u32 ixol[2];
        };
 };
 
index e33f80b0ea81952525a68d5cc8587517210730b3..57573d9c1e09199688b2e385464150f670173536 100644 (file)
@@ -5,8 +5,10 @@
 
 #ifndef _ASM_POWERPC_VAS_H
 #define _ASM_POWERPC_VAS_H
-
-struct vas_window;
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/icswx.h>
+#include <uapi/asm/vas-api.h>
 
 /*
  * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
@@ -48,6 +50,64 @@ enum vas_cop_type {
        VAS_COP_TYPE_MAX,
 };
 
+/*
+ * User space VAS windows are opened by tasks and take references
+ * to pid and mm until windows are closed.
+ * Stores pid, mm, and tgid for each window.
+ */
+struct vas_user_win_ref {
+       struct pid *pid;        /* PID of owner */
+       struct pid *tgid;       /* Thread group ID of owner */
+       struct mm_struct *mm;   /* Linux process mm_struct */
+};
+
+/*
+ * Common VAS window struct on PowerNV and PowerVM
+ */
+struct vas_window {
+       u32 winid;
+       u32 wcreds_max; /* Window credits */
+       enum vas_cop_type cop;
+       struct vas_user_win_ref task_ref;
+       char *dbgname;
+       struct dentry *dbgdir;
+};
+
+/*
+ * User space window operations used for powernv and powerVM
+ */
+struct vas_user_win_ops {
+       struct vas_window * (*open_win)(int vas_id, u64 flags,
+                               enum vas_cop_type);
+       u64 (*paste_addr)(struct vas_window *);
+       int (*close_win)(struct vas_window *);
+};
+
+static inline void put_vas_user_win_ref(struct vas_user_win_ref *ref)
+{
+       /* Drop references to pid, tgid, and mm */
+       put_pid(ref->pid);
+       put_pid(ref->tgid);
+       if (ref->mm)
+               mmdrop(ref->mm);
+}
+
+static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref)
+{
+       mm_context_add_vas_window(ref->mm);
+       /*
+        * Even a process that has no foreign real address mapping can
+        * use an unpaired COPY instruction (to no real effect). Issue
+        * CP_ABORT to clear any pending COPY and prevent a covert
+        * channel.
+        *
+        * __switch_to() will issue CP_ABORT on future context switches
+        * if process / thread has any open VAS window (Use
+        * current->mm->context.vas_windows).
+        */
+       asm volatile(PPC_CP_ABORT);
+}
+
 /*
  * Receive window attributes specified by the (in-kernel) owner of window.
  */
@@ -100,6 +160,7 @@ struct vas_tx_win_attr {
        bool rx_win_ord_mode;
 };
 
+#ifdef CONFIG_PPC_POWERNV
 /*
  * Helper to map a chip id to VAS id.
  * For POWER9, this is a 1:1 mapping. In the future this maybe a 1:N
@@ -162,6 +223,43 @@ int vas_copy_crb(void *crb, int offset);
  */
 int vas_paste_crb(struct vas_window *win, int offset, bool re);
 
+int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
+                            const char *name);
+void vas_unregister_api_powernv(void);
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+
+/* VAS Capabilities */
+#define VAS_GZIP_QOS_FEAT      0x1
+#define VAS_GZIP_DEF_FEAT      0x2
+#define VAS_GZIP_QOS_FEAT_BIT  PPC_BIT(VAS_GZIP_QOS_FEAT) /* Bit 1 */
+#define VAS_GZIP_DEF_FEAT_BIT  PPC_BIT(VAS_GZIP_DEF_FEAT) /* Bit 2 */
+
+/* NX Capabilities */
+#define VAS_NX_GZIP_FEAT       0x1
+#define VAS_NX_GZIP_FEAT_BIT   PPC_BIT(VAS_NX_GZIP_FEAT) /* Bit 1 */
+
+/*
+ * These structs are used to retrieve overall VAS capabilities that
+ * the hypervisor provides.
+ */
+struct hv_vas_all_caps {
+       __be64  descriptor;
+       __be64  feat_type;
+} __packed __aligned(0x1000);
+
+struct vas_all_caps {
+       u64     descriptor;
+       u64     feat_type;
+};
+
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result);
+int vas_register_api_pseries(struct module *mod,
+                            enum vas_cop_type cop_type, const char *name);
+void vas_unregister_api_pseries(void);
+#endif
+
 /*
  * Register / unregister coprocessor type to VAS API which will be exported
  * to user space. Applications can use this API to open / close window
@@ -171,7 +269,12 @@ int vas_paste_crb(struct vas_window *win, int offset, bool re);
  * used for others in future.
  */
 int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
-                               const char *name);
+                           const char *name,
+                           const struct vas_user_win_ops *vops);
 void vas_unregister_coproc_api(void);
 
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref);
+void vas_update_csb(struct coprocessor_request_block *crb,
+                   struct vas_user_win_ref *task_ref);
+void vas_dump_crb(struct coprocessor_request_block *crb);
 #endif /* __ASM_POWERPC_VAS_H */
index 8e903b3f9c24902cf3548704cd5dda4af3687ba2..d9cf192368ad2c4f9154511f68d0af02a867f9c6 100644 (file)
@@ -65,8 +65,12 @@ struct icp_ops {
 
 extern const struct icp_ops *icp_ops;
 
+#ifdef CONFIG_PPC_ICS_NATIVE
 /* Native ICS */
 extern int ics_native_init(void);
+#else
+static inline int ics_native_init(void) { return -ENODEV; }
+#endif
 
 /* RTAS ICS */
 #ifdef CONFIG_PPC_ICS_RTAS
index 50ef95e2f5b185dde90bd27991a2a4b5beda6347..82488b1e7276e6560c423a49837a6899c2c415d0 100644 (file)
@@ -77,6 +77,9 @@
 /* Indicate that the 'dimm_fuel_gauge' field is valid */
 #define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
 
+/* Indicate that the 'dimm_dsc' field is valid */
+#define PDSM_DIMM_DSC_VALID 2
+
 /*
  * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
  * Various flags indicate the health status of the dimm.
@@ -105,6 +108,9 @@ struct nd_papr_pdsm_health {
 
                        /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
                        __u16 dimm_fuel_gauge;
+
+                       /* Extension flag PDSM_DIMM_DSC_VALID */
+                       __u64 dimm_dsc;
                };
                __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
        };
index ebd4b242478597b23a0d0a93d5089f52e657b658..7c81301ecdba5601294789743deb01ab79684c0f 100644 (file)
 #define VAS_MAGIC      'v'
 #define VAS_TX_WIN_OPEN        _IOW(VAS_MAGIC, 0x20, struct vas_tx_win_open_attr)
 
+/* Flags to VAS TX open window ioctl */
+/* To allocate a window with QoS credit, otherwise use default credit */
+#define VAS_TX_WIN_FLAG_QOS_CREDIT     0x0000000000000001
+
 struct vas_tx_win_open_attr {
        __u32   version;
        __s16   vas_id; /* specific instance of vas or -1 for default */
        __u16   reserved1;
-       __u64   flags;  /* Future use */
+       __u64   flags;
        __u64   reserved2[6];
 };
 
index aa267d173ded0ce2bd7ad9123bdc9e8d69d1e3b6..a47eefa09bcb2fb56b65263885bb59647cf9639c 100644 (file)
@@ -86,11 +86,7 @@ int main(void)
        OFFSET(PACA_CANARY, paca_struct, canary);
 #endif
 #endif
-       OFFSET(MMCONTEXTID, mm_struct, context.id);
-#ifdef CONFIG_PPC64
-       DEFINE(SIGSEGV, SIGSEGV);
-       DEFINE(NMI_MASK, NMI_MASK);
-#else
+#ifdef CONFIG_PPC32
 #ifdef CONFIG_PPC_RTAS
        OFFSET(RTAS_SP, thread_struct, rtas_sp);
 #endif
@@ -119,7 +115,6 @@ int main(void)
 #ifdef CONFIG_ALTIVEC
        OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr);
        OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area);
-       OFFSET(THREAD_VRSAVE, thread_struct, vrsave);
        OFFSET(THREAD_USED_VR, thread_struct, used_vr);
        OFFSET(VRSTATE_VSCR, thread_vr_state, vscr);
        OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec);
@@ -150,22 +145,15 @@ int main(void)
 #ifdef CONFIG_SPE
        OFFSET(THREAD_EVR0, thread_struct, evr[0]);
        OFFSET(THREAD_ACC, thread_struct, acc);
-       OFFSET(THREAD_SPEFSCR, thread_struct, spefscr);
        OFFSET(THREAD_USED_SPE, thread_struct, used_spe);
 #endif /* CONFIG_SPE */
 #endif /* CONFIG_PPC64 */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-       OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0);
-#endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
        OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu);
 #endif
 #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
        OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
 #endif
-#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
-       OFFSET(KUAP, thread_struct, kuap);
-#endif
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
@@ -185,19 +173,12 @@ int main(void)
               sizeof(struct pt_regs) + 16);
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-       OFFSET(TI_FLAGS, thread_info, flags);
        OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
-       OFFSET(TI_PREEMPT, thread_info, preempt_count);
 
 #ifdef CONFIG_PPC64
        OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
        OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size);
-       OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page);
-       OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size);
-       OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size);
-       OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page);
        /* paca */
-       DEFINE(PACA_SIZE, sizeof(struct paca_struct));
        OFFSET(PACAPACAINDEX, paca_struct, paca_index);
        OFFSET(PACAPROCSTART, paca_struct, cpu_start);
        OFFSET(PACAKSAVE, paca_struct, kstack);
@@ -209,18 +190,13 @@ int main(void)
        OFFSET(PACATOC, paca_struct, kernel_toc);
        OFFSET(PACAKBASE, paca_struct, kernelbase);
        OFFSET(PACAKMSR, paca_struct, kernel_msr);
+#ifdef CONFIG_PPC_BOOK3S_64
+       OFFSET(PACAHSRR_VALID, paca_struct, hsrr_valid);
+       OFFSET(PACASRR_VALID, paca_struct, srr_valid);
+#endif
        OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
        OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
        OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
-#ifdef CONFIG_PPC_BOOK3S
-       OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
-#ifdef CONFIG_PPC_MM_SLICES
-       OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
-       OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
-       OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
-       DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
-#endif /* CONFIG_PPC_MM_SLICES */
-#endif
 
 #ifdef CONFIG_PPC_BOOK3E
        OFFSET(PACAPGD, paca_struct, pgd);
@@ -241,21 +217,9 @@ int main(void)
 #endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef CONFIG_PPC_BOOK3S_64
-       OFFSET(PACASLBCACHE, paca_struct, slb_cache);
-       OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
-       OFFSET(PACASTABRR, paca_struct, stab_rr);
-       OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
-#ifdef CONFIG_PPC_MM_SLICES
-       OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
-#else
-       OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp);
-#endif /* CONFIG_PPC_MM_SLICES */
        OFFSET(PACA_EXGEN, paca_struct, exgen);
        OFFSET(PACA_EXMC, paca_struct, exmc);
        OFFSET(PACA_EXNMI, paca_struct, exnmi);
-#ifdef CONFIG_PPC_PSERIES
-       OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
-#endif
        OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
        OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
        OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
@@ -264,9 +228,7 @@ int main(void)
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
 #endif
-       OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
        OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
-       OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
 #endif /* CONFIG_PPC_BOOK3S_64 */
        OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -282,6 +244,9 @@ int main(void)
        OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
        OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
        OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
+#ifdef CONFIG_PPC64
+       OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1);
+#endif
 #ifdef CONFIG_PPC_BOOK3E
        OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
 #endif
@@ -343,10 +308,6 @@ int main(void)
        STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr);
        STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr);
 #endif
-#ifdef CONFIG_PPC_KUAP
-       STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap);
-#endif
-
 
 #if defined(CONFIG_PPC32)
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -368,10 +329,6 @@ int main(void)
 #endif
 #endif
 
-#ifndef CONFIG_PPC64
-       OFFSET(MM_PGD, mm_struct, pgd);
-#endif /* ! CONFIG_PPC64 */
-
        /* About the CPU features table */
        OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features);
        OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup);
@@ -404,13 +361,6 @@ int main(void)
        DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
-       DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE)));
-#else
-       DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
-#endif
-       DEFINE(PTE_SIZE, sizeof(pte_t));
-
 #ifdef CONFIG_KVM
        OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack);
        OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid);
@@ -482,11 +432,9 @@ int main(void)
        OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid);
        OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr);
        OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1);
-       OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits);
        OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
        OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
        OFFSET(KVM_RADIX, kvm, arch.radix);
-       OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
        OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
        OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
        OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
@@ -514,7 +462,6 @@ int main(void)
        OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
        OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
        OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
-       OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
        OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires);
        OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
        OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
@@ -525,7 +472,6 @@ int main(void)
        OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra);
        OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs);
        OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
-       OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
        OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar);
        OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar);
        OFFSET(VCPU_SIER, kvm_vcpu, arch.sier);
@@ -645,10 +591,8 @@ int main(void)
        HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
        HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
        HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
-       HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
        HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
        HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
-       HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
        HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
        HSTATE_FIELD(HSTATE_PTID, ptid);
        HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
@@ -756,7 +700,6 @@ int main(void)
 #endif
 
        DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
-       DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
 
 #ifdef CONFIG_PPC_8xx
        DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
index 735e893373985e8361d527a3af99b5b38514e155..5693e1c67c2b469af88344ccc50ff4079f4db1d7 100644 (file)
@@ -35,7 +35,7 @@ void __init reserve_kdump_trampoline(void)
 
 static void __init create_trampoline(unsigned long addr)
 {
-       struct ppc_inst *p = (struct ppc_inst *)addr;
+       u32 *p = (u32 *)addr;
 
        /* The maximum range of a single instruction branch, is the current
         * instruction's address + (32 MB - 4) bytes. For the trampoline we
@@ -45,8 +45,8 @@ static void __init create_trampoline(unsigned long addr)
         * branch to "addr" we jump to ("addr" + 32 MB). Although it requires
         * two instructions it doesn't require any registers.
         */
-       patch_instruction(p, ppc_inst(PPC_INST_NOP));
-       patch_branch((void *)p + 4, addr + PHYSICAL_START, 0);
+       patch_instruction(p, ppc_inst(PPC_RAW_NOP()));
+       patch_branch(p + 1, addr + PHYSICAL_START, 0);
 }
 
 void __init setup_kdump_trampoline(void)
index 9160285cb2f444dd67e05672f295d8f270de073f..0273a13490063ddba9305cbb7376d619aab4fe8f 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/barrier.h>
 #include <asm/kup.h>
 #include <asm/bug.h>
+#include <asm/interrupt.h>
 
 #include "head_32.h"
 
@@ -74,6 +75,24 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
 
        .globl  transfer_to_syscall
 transfer_to_syscall:
+       stw     r11, GPR1(r1)
+       stw     r11, 0(r1)
+       mflr    r12
+       stw     r12, _LINK(r1)
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+       rlwinm  r9,r9,0,14,12           /* clear MSR_WE (necessary?) */
+#endif
+       lis     r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+       SAVE_GPR(2, r1)
+       addi    r12,r12,STACK_FRAME_REGS_MARKER@l
+       stw     r9,_MSR(r1)
+       li      r2, INTERRUPT_SYSCALL
+       stw     r12,8(r1)
+       stw     r2,_TRAP(r1)
+       SAVE_GPR(0, r1)
+       SAVE_4GPRS(3, r1)
+       SAVE_2GPRS(7, r1)
+       addi    r2,r10,-THREAD
        SAVE_NVGPRS(r1)
 
        /* Calling convention has r9 = orig r0, r10 = regs */
@@ -176,28 +195,6 @@ _GLOBAL(_switch)
        /* r3-r12 are caller saved -- Cort */
        SAVE_NVGPRS(r1)
        stw     r0,_NIP(r1)     /* Return to switch caller */
-       mfmsr   r11
-       li      r0,MSR_FP       /* Disable floating-point */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-       oris    r0,r0,MSR_VEC@h /* Disable altivec */
-       mfspr   r12,SPRN_VRSAVE /* save vrsave register value */
-       stw     r12,THREAD+THREAD_VRSAVE(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
-       oris    r0,r0,MSR_SPE@h  /* Disable SPE */
-       mfspr   r12,SPRN_SPEFSCR /* save spefscr register value */
-       stw     r12,THREAD+THREAD_SPEFSCR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
-       and.    r0,r0,r11       /* FP or altivec or SPE enabled? */
-       beq+    1f
-       andc    r11,r11,r0
-       mtmsr   r11
-       isync
-1:     stw     r11,_MSR(r1)
        mfcr    r10
        stw     r10,_CCR(r1)
        stw     r1,KSP(r3)      /* Set old stack pointer */
@@ -218,19 +215,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
        mr      r3,r2
        addi    r2,r4,-THREAD   /* Update current */
 
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-       lwz     r0,THREAD+THREAD_VRSAVE(r2)
-       mtspr   SPRN_VRSAVE,r0          /* if G4, restore VRSAVE reg */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
-       lwz     r0,THREAD+THREAD_SPEFSCR(r2)
-       mtspr   SPRN_SPEFSCR,r0         /* restore SPEFSCR reg */
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
-
        lwz     r0,_CCR(r1)
        mtcrf   0xFF,r0
        /* r3-r12 are destroyed -- Cort */
index 03727308d8cc4596429274d7143ac65bb1bc5e55..15720f8661a1b2db4ef7ce9fb120829d4955824d 100644 (file)
@@ -32,7 +32,6 @@
 #include <asm/irqflags.h>
 #include <asm/hw_irq.h>
 #include <asm/context_tracking.h>
-#include <asm/tm.h>
 #include <asm/ppc-opcode.h>
 #include <asm/barrier.h>
 #include <asm/export.h>
 /*
  * System calls.
  */
-       .section        ".toc","aw"
-SYS_CALL_TABLE:
-       .tc sys_call_table[TC],sys_call_table
-
-#ifdef CONFIG_COMPAT
-COMPAT_SYS_CALL_TABLE:
-       .tc compat_sys_call_table[TC],compat_sys_call_table
-#endif
-
-/* This value is used to mark exception frames on the stack. */
-exception_marker:
-       .tc     ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
        .section        ".text"
-       .align 7
-
-#ifdef CONFIG_PPC_BOOK3S
-.macro system_call_vectored name trapnr
-       .globl system_call_vectored_\name
-system_call_vectored_\name:
-_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
-       extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
-       bne     .Ltabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
-       SCV_INTERRUPT_TO_KERNEL
-       mr      r10,r1
-       ld      r1,PACAKSAVE(r13)
-       std     r10,0(r1)
-       std     r11,_NIP(r1)
-       std     r12,_MSR(r1)
-       std     r0,GPR0(r1)
-       std     r10,GPR1(r1)
-       std     r2,GPR2(r1)
-       ld      r2,PACATOC(r13)
-       mfcr    r12
-       li      r11,0
-       /* Can we avoid saving r3-r8 in common case? */
-       std     r3,GPR3(r1)
-       std     r4,GPR4(r1)
-       std     r5,GPR5(r1)
-       std     r6,GPR6(r1)
-       std     r7,GPR7(r1)
-       std     r8,GPR8(r1)
-       /* Zero r9-r12, this should only be required when restoring all GPRs */
-       std     r11,GPR9(r1)
-       std     r11,GPR10(r1)
-       std     r11,GPR11(r1)
-       std     r11,GPR12(r1)
-       std     r9,GPR13(r1)
-       SAVE_NVGPRS(r1)
-       std     r11,_XER(r1)
-       std     r11,_LINK(r1)
-       std     r11,_CTR(r1)
-
-       li      r11,\trapnr
-       std     r11,_TRAP(r1)
-       std     r12,_CCR(r1)
-       addi    r10,r1,STACK_FRAME_OVERHEAD
-       ld      r11,exception_marker@toc(r2)
-       std     r11,-16(r10)            /* "regshere" marker */
-
-BEGIN_FTR_SECTION
-       HMT_MEDIUM
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-       /*
-        * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
-        * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
-        * and interrupts may be masked and pending already.
-        * system_call_exception() will call trace_hardirqs_off() which means
-        * interrupts could already have been blocked before trace_hardirqs_off,
-        * but this is the best we can do.
-        */
-
-       /* Calling convention has r9 = orig r0, r10 = regs */
-       mr      r9,r0
-       bl      system_call_exception
-
-.Lsyscall_vectored_\name\()_exit:
-       addi    r4,r1,STACK_FRAME_OVERHEAD
-       li      r5,1 /* scv */
-       bl      syscall_exit_prepare
-
-       ld      r2,_CCR(r1)
-       ld      r4,_NIP(r1)
-       ld      r5,_MSR(r1)
-
-BEGIN_FTR_SECTION
-       stdcx.  r0,0,r1                 /* to clear the reservation */
-END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
-BEGIN_FTR_SECTION
-       HMT_MEDIUM_LOW
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-       cmpdi   r3,0
-       bne     .Lsyscall_vectored_\name\()_restore_regs
-
-       /* rfscv returns with LR->NIA and CTR->MSR */
-       mtlr    r4
-       mtctr   r5
-
-       /* Could zero these as per ABI, but we may consider a stricter ABI
-        * which preserves these if libc implementations can benefit, so
-        * restore them for now until further measurement is done. */
-       ld      r0,GPR0(r1)
-       ld      r4,GPR4(r1)
-       ld      r5,GPR5(r1)
-       ld      r6,GPR6(r1)
-       ld      r7,GPR7(r1)
-       ld      r8,GPR8(r1)
-       /* Zero volatile regs that may contain sensitive kernel data */
-       li      r9,0
-       li      r10,0
-       li      r11,0
-       li      r12,0
-       mtspr   SPRN_XER,r0
-
-       /*
-        * We don't need to restore AMR on the way back to userspace for KUAP.
-        * The value of AMR only matters while we're in the kernel.
-        */
-       mtcr    r2
-       ld      r2,GPR2(r1)
-       ld      r3,GPR3(r1)
-       ld      r13,GPR13(r1)
-       ld      r1,GPR1(r1)
-       RFSCV_TO_USER
-       b       .       /* prevent speculative execution */
-
-.Lsyscall_vectored_\name\()_restore_regs:
-       li      r3,0
-       mtmsrd  r3,1
-       mtspr   SPRN_SRR0,r4
-       mtspr   SPRN_SRR1,r5
-
-       ld      r3,_CTR(r1)
-       ld      r4,_LINK(r1)
-       ld      r5,_XER(r1)
-
-       REST_NVGPRS(r1)
-       ld      r0,GPR0(r1)
-       mtcr    r2
-       mtctr   r3
-       mtlr    r4
-       mtspr   SPRN_XER,r5
-       REST_10GPRS(2, r1)
-       REST_2GPRS(12, r1)
-       ld      r1,GPR1(r1)
-       RFI_TO_USER
-.endm
-
-system_call_vectored common 0x3000
-/*
- * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
- * which is tested by system_call_exception when r0 is -1 (as set by vector
- * entry code).
- */
-system_call_vectored sigill 0x7ff0
-
-
-/*
- * Entered via kernel return set up by kernel/sstep.c, must match entry regs
- */
-       .globl system_call_vectored_emulate
-system_call_vectored_emulate:
-_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
-       li      r10,IRQS_ALL_DISABLED
-       stb     r10,PACAIRQSOFTMASK(r13)
-       b       system_call_vectored_common
-#endif
-
-       .balign IFETCH_ALIGN_BYTES
-       .globl system_call_common_real
-system_call_common_real:
-       ld      r10,PACAKMSR(r13)       /* get MSR value for kernel */
-       mtmsrd  r10
-
-       .balign IFETCH_ALIGN_BYTES
-       .globl system_call_common
-system_call_common:
-_ASM_NOKPROBE_SYMBOL(system_call_common)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
-       extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
-       bne     .Ltabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
-       mr      r10,r1
-       ld      r1,PACAKSAVE(r13)
-       std     r10,0(r1)
-       std     r11,_NIP(r1)
-       std     r12,_MSR(r1)
-       std     r0,GPR0(r1)
-       std     r10,GPR1(r1)
-       std     r2,GPR2(r1)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-START_BTB_FLUSH_SECTION
-       BTB_FLUSH(r10)
-END_BTB_FLUSH_SECTION
-#endif
-       ld      r2,PACATOC(r13)
-       mfcr    r12
-       li      r11,0
-       /* Can we avoid saving r3-r8 in common case? */
-       std     r3,GPR3(r1)
-       std     r4,GPR4(r1)
-       std     r5,GPR5(r1)
-       std     r6,GPR6(r1)
-       std     r7,GPR7(r1)
-       std     r8,GPR8(r1)
-       /* Zero r9-r12, this should only be required when restoring all GPRs */
-       std     r11,GPR9(r1)
-       std     r11,GPR10(r1)
-       std     r11,GPR11(r1)
-       std     r11,GPR12(r1)
-       std     r9,GPR13(r1)
-       SAVE_NVGPRS(r1)
-       std     r11,_XER(r1)
-       std     r11,_CTR(r1)
-       mflr    r10
-
-       /*
-        * This clears CR0.SO (bit 28), which is the error indication on
-        * return from this system call.
-        */
-       rldimi  r12,r11,28,(63-28)
-       li      r11,0xc00
-       std     r10,_LINK(r1)
-       std     r11,_TRAP(r1)
-       std     r12,_CCR(r1)
-       addi    r10,r1,STACK_FRAME_OVERHEAD
-       ld      r11,exception_marker@toc(r2)
-       std     r11,-16(r10)            /* "regshere" marker */
-
-       /*
-        * We always enter kernel from userspace with irq soft-mask enabled and
-        * nothing pending. system_call_exception() will call
-        * trace_hardirqs_off().
-        */
-       li      r11,IRQS_ALL_DISABLED
-       li      r12,PACA_IRQ_HARD_DIS
-       stb     r11,PACAIRQSOFTMASK(r13)
-       stb     r12,PACAIRQHAPPENED(r13)
-
-       /* Calling convention has r9 = orig r0, r10 = regs */
-       mr      r9,r0
-       bl      system_call_exception
-
-.Lsyscall_exit:
-       addi    r4,r1,STACK_FRAME_OVERHEAD
-       li      r5,0 /* !scv */
-       bl      syscall_exit_prepare
-
-       ld      r2,_CCR(r1)
-       ld      r4,_NIP(r1)
-       ld      r5,_MSR(r1)
-       ld      r6,_LINK(r1)
-
-BEGIN_FTR_SECTION
-       stdcx.  r0,0,r1                 /* to clear the reservation */
-END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
-       mtspr   SPRN_SRR0,r4
-       mtspr   SPRN_SRR1,r5
-       mtlr    r6
-
-       cmpdi   r3,0
-       bne     .Lsyscall_restore_regs
-       /* Zero volatile regs that may contain sensitive kernel data */
-       li      r0,0
-       li      r4,0
-       li      r5,0
-       li      r6,0
-       li      r7,0
-       li      r8,0
-       li      r9,0
-       li      r10,0
-       li      r11,0
-       li      r12,0
-       mtctr   r0
-       mtspr   SPRN_XER,r0
-.Lsyscall_restore_regs_cont:
-
-BEGIN_FTR_SECTION
-       HMT_MEDIUM_LOW
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-       /*
-        * We don't need to restore AMR on the way back to userspace for KUAP.
-        * The value of AMR only matters while we're in the kernel.
-        */
-       mtcr    r2
-       ld      r2,GPR2(r1)
-       ld      r3,GPR3(r1)
-       ld      r13,GPR13(r1)
-       ld      r1,GPR1(r1)
-       RFI_TO_USER
-       b       .       /* prevent speculative execution */
-
-.Lsyscall_restore_regs:
-       ld      r3,_CTR(r1)
-       ld      r4,_XER(r1)
-       REST_NVGPRS(r1)
-       mtctr   r3
-       mtspr   SPRN_XER,r4
-       ld      r0,GPR0(r1)
-       REST_8GPRS(4, r1)
-       ld      r12,GPR12(r1)
-       b       .Lsyscall_restore_regs_cont
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-.Ltabort_syscall:
-       /* Firstly we need to enable TM in the kernel */
-       mfmsr   r10
-       li      r9, 1
-       rldimi  r10, r9, MSR_TM_LG, 63-MSR_TM_LG
-       mtmsrd  r10, 0
-
-       /* tabort, this dooms the transaction, nothing else */
-       li      r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
-       TABORT(R9)
-
-       /*
-        * Return directly to userspace. We have corrupted user register state,
-        * but userspace will never see that register state. Execution will
-        * resume after the tbegin of the aborted transaction with the
-        * checkpointed register state.
-        */
-       li      r9, MSR_RI
-       andc    r10, r10, r9
-       mtmsrd  r10, 1
-       mtspr   SPRN_SRR0, r11
-       mtspr   SPRN_SRR1, r12
-       RFI_TO_USER
-       b       .       /* prevent speculative execution */
-#endif
-
-#ifdef CONFIG_PPC_BOOK3S
-_GLOBAL(ret_from_fork_scv)
-       bl      schedule_tail
-       REST_NVGPRS(r1)
-       li      r3,0    /* fork() return value */
-       b       .Lsyscall_vectored_common_exit
-#endif
-
-_GLOBAL(ret_from_fork)
-       bl      schedule_tail
-       REST_NVGPRS(r1)
-       li      r3,0    /* fork() return value */
-       b       .Lsyscall_exit
-
-_GLOBAL(ret_from_kernel_thread)
-       bl      schedule_tail
-       REST_NVGPRS(r1)
-       mtctr   r14
-       mr      r3,r15
-#ifdef PPC64_ELF_ABI_v2
-       mr      r12,r14
-#endif
-       bctrl
-       li      r3,0
-       b       .Lsyscall_exit
 
 #ifdef CONFIG_PPC_BOOK3S_64
 
@@ -630,156 +264,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        addi    r1,r1,SWITCH_FRAME_SIZE
        blr
 
-       /*
-        * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
-        * touched, no exit work created, then this can be used.
-        */
-       .balign IFETCH_ALIGN_BYTES
-       .globl fast_interrupt_return
-fast_interrupt_return:
-_ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
-       kuap_check_amr r3, r4
-       ld      r5,_MSR(r1)
-       andi.   r0,r5,MSR_PR
-#ifdef CONFIG_PPC_BOOK3S
-       bne     .Lfast_user_interrupt_return_amr
-       kuap_kernel_restore r3, r4
-       andi.   r0,r5,MSR_RI
-       li      r3,0 /* 0 return value, no EMULATE_STACK_STORE */
-       bne+    .Lfast_kernel_interrupt_return
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      unrecoverable_exception
-       b       . /* should not get here */
-#else
-       bne     .Lfast_user_interrupt_return
-       b       .Lfast_kernel_interrupt_return
-#endif
-
-       .balign IFETCH_ALIGN_BYTES
-       .globl interrupt_return
-interrupt_return:
-_ASM_NOKPROBE_SYMBOL(interrupt_return)
-       ld      r4,_MSR(r1)
-       andi.   r0,r4,MSR_PR
-       beq     .Lkernel_interrupt_return
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      interrupt_exit_user_prepare
-       cmpdi   r3,0
-       bne-    .Lrestore_nvgprs
-
-#ifdef CONFIG_PPC_BOOK3S
-.Lfast_user_interrupt_return_amr:
-       kuap_user_restore r3, r4
-#endif
-.Lfast_user_interrupt_return:
-       ld      r11,_NIP(r1)
-       ld      r12,_MSR(r1)
-BEGIN_FTR_SECTION
-       ld      r10,_PPR(r1)
-       mtspr   SPRN_PPR,r10
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-       mtspr   SPRN_SRR0,r11
-       mtspr   SPRN_SRR1,r12
-
-BEGIN_FTR_SECTION
-       stdcx.  r0,0,r1         /* to clear the reservation */
-FTR_SECTION_ELSE
-       ldarx   r0,0,r1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
-       ld      r3,_CCR(r1)
-       ld      r4,_LINK(r1)
-       ld      r5,_CTR(r1)
-       ld      r6,_XER(r1)
-       li      r0,0
-
-       REST_4GPRS(7, r1)
-       REST_2GPRS(11, r1)
-       REST_GPR(13, r1)
-
-       mtcr    r3
-       mtlr    r4
-       mtctr   r5
-       mtspr   SPRN_XER,r6
-
-       REST_4GPRS(2, r1)
-       REST_GPR(6, r1)
-       REST_GPR(0, r1)
-       REST_GPR(1, r1)
-       RFI_TO_USER
-       b       .       /* prevent speculative execution */
-
-.Lrestore_nvgprs:
-       REST_NVGPRS(r1)
-       b       .Lfast_user_interrupt_return
-
-       .balign IFETCH_ALIGN_BYTES
-.Lkernel_interrupt_return:
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      interrupt_exit_kernel_prepare
-
-.Lfast_kernel_interrupt_return:
-       cmpdi   cr1,r3,0
-       ld      r11,_NIP(r1)
-       ld      r12,_MSR(r1)
-       mtspr   SPRN_SRR0,r11
-       mtspr   SPRN_SRR1,r12
-
-BEGIN_FTR_SECTION
-       stdcx.  r0,0,r1         /* to clear the reservation */
-FTR_SECTION_ELSE
-       ldarx   r0,0,r1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
-       ld      r3,_LINK(r1)
-       ld      r4,_CTR(r1)
-       ld      r5,_XER(r1)
-       ld      r6,_CCR(r1)
-       li      r0,0
-
-       REST_4GPRS(7, r1)
-       REST_2GPRS(11, r1)
-
-       mtlr    r3
-       mtctr   r4
-       mtspr   SPRN_XER,r5
-
-       /*
-        * Leaving a stale exception_marker on the stack can confuse
-        * the reliable stack unwinder later on. Clear it.
-        */
-       std     r0,STACK_FRAME_OVERHEAD-16(r1)
-
-       REST_4GPRS(2, r1)
-
-       bne-    cr1,1f /* emulate stack store */
-       mtcr    r6
-       REST_GPR(6, r1)
-       REST_GPR(0, r1)
-       REST_GPR(1, r1)
-       RFI_TO_KERNEL
-       b       .       /* prevent speculative execution */
-
-1:     /*
-        * Emulate stack store with update. New r1 value was already calculated
-        * and updated in our interrupt regs by emulate_loadstore, but we can't
-        * store the previous value of r1 to the stack before re-loading our
-        * registers from it, otherwise they could be clobbered.  Use
-        * PACA_EXGEN as temporary storage to hold the store data, as
-        * interrupts are disabled here so it won't be clobbered.
-        */
-       mtcr    r6
-       std     r9,PACA_EXGEN+0(r13)
-       addi    r9,r1,INT_FRAME_SIZE /* get original r1 */
-       REST_GPR(6, r1)
-       REST_GPR(0, r1)
-       REST_GPR(1, r1)
-       std     r9,0(r1) /* perform store component of stdu */
-       ld      r9,PACA_EXGEN+0(r13)
-
-       RFI_TO_KERNEL
-       b       .       /* prevent speculative execution */
-
 #ifdef CONFIG_PPC_RTAS
 /*
  * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
index 2ed14d4a47f5997b0866f6c2d96533d565b238f4..93b0f3ec8fb059f230d589f771b023d3c48e75bf 100644 (file)
@@ -38,9 +38,9 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
 
        for (i = 0; i < (len / 4); i++) {
                struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i]));
-               patch_instruction((struct ppc_inst *)(epapr_hypercall_start + i), inst);
+               patch_instruction(epapr_hypercall_start + i, inst);
 #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
-               patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), inst);
+               patch_instruction(epapr_ev_idle_start + i, inst);
 #endif
        }
 
index f1ae710274bc9142d5fd7fe6b4bd4f065044f734..1401787b0b937f3edd429c5730309708814c8b3d 100644 (file)
 #include <asm/feature-fixups.h>
 #include <asm/context_tracking.h>
 
+/* 64e interrupt returns always use SRR registers */
+#define fast_interrupt_return fast_interrupt_return_srr
+#define interrupt_return interrupt_return_srr
+
 /* XXX This will ultimately add space for a special exception save
  *     structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
  *     when taking special interrupts. For now we don't support that,
@@ -897,6 +901,34 @@ kernel_dbg_exc:
        bl      unknown_exception
        b       interrupt_return
 
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+       ld      r11,PACATOC(r13)
+       ld      r14,__start___restart_table@got(r11)
+       ld      r15,__stop___restart_table@got(r11)
+#else
+       LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table)
+       LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table)
+#endif
+300:
+       cmpd    r14,r15
+       beq     302f
+       ld      r11,0(r14)
+       cmpld   r10,r11
+       blt     301f
+       ld      r11,8(r14)
+       cmpld   r10,r11
+       bge     301f
+       ld      r11,16(r14)
+       b       303f
+301:
+       addi    r14,r14,24
+       b       300b
+302:
+       li      r11,0
+303:
+.endm
+
 /*
  * An interrupt came in while soft-disabled; We mark paca->irq_happened
  * accordingly and if the interrupt is level sensitive, we hard disable
@@ -905,6 +937,9 @@ kernel_dbg_exc:
  */
 
 .macro masked_interrupt_book3e paca_irq full_mask
+       std     r14,PACA_EXGEN+EX_R14(r13)
+       std     r15,PACA_EXGEN+EX_R15(r13)
+
        lbz     r10,PACAIRQHAPPENED(r13)
        .if \full_mask == 1
        ori     r10,r10,\paca_irq | PACA_IRQ_HARD_DIS
@@ -914,15 +949,23 @@ kernel_dbg_exc:
        stb     r10,PACAIRQHAPPENED(r13)
 
        .if \full_mask == 1
-       rldicl  r10,r11,48,1            /* clear MSR_EE */
-       rotldi  r11,r10,16
+       xori    r11,r11,MSR_EE          /* clear MSR_EE */
        mtspr   SPRN_SRR1,r11
        .endif
 
+       mfspr   r10,SPRN_SRR0
+       SEARCH_RESTART_TABLE
+       cmpdi   r11,0
+       beq     1f
+       mtspr   SPRN_SRR0,r11           /* return to restart address */
+1:
+
        lwz     r11,PACA_EXGEN+EX_CR(r13)
        mtcr    r11
        ld      r10,PACA_EXGEN+EX_R10(r13)
        ld      r11,PACA_EXGEN+EX_R11(r13)
+       ld      r14,PACA_EXGEN+EX_R14(r13)
+       ld      r15,PACA_EXGEN+EX_R15(r13)
        mfspr   r13,SPRN_SPRG_GEN_SCRATCH
        rfi
        b       .
@@ -1282,7 +1325,12 @@ a2_tlbinit_code_start:
 a2_tlbinit_after_linear_map:
 
        /* Now we branch the new virtual address mapped by this entry */
+#ifdef CONFIG_RELOCATABLE
+       ld      r5,PACATOC(r13)
+       ld      r3,1f@got(r5)
+#else
        LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
+#endif
        mtctr   r3
        bctr
 
index f7fc6e078d4edbc6c97df76830c7e459e6e10588..4aec59a77d4c540add7c3d923091fa23ab5ee4c2 100644 (file)
@@ -428,18 +428,31 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
 
                /* If coming from user, skip soft-mask tests. */
                andi.   r10,r12,MSR_PR
-               bne     2f
-
-               /* Kernel code running below __end_interrupts is implicitly
-                * soft-masked */
-               LOAD_HANDLER(r10, __end_interrupts)
+               bne     3f
+
+               /*
+                * Kernel code running below __end_soft_masked may be
+                * implicitly soft-masked if it is within the regions
+                * in the soft mask table.
+                */
+               LOAD_HANDLER(r10, __end_soft_masked)
                cmpld   r11,r10
+               bge+    1f
+
+               /* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */
+               mtctr   r12
+               stw     r9,PACA_EXGEN+EX_CCR(r13)
+               SEARCH_SOFT_MASK_TABLE
+               cmpdi   r12,0
+               mfctr   r12             /* Restore r12 to SRR1 */
+               lwz     r9,PACA_EXGEN+EX_CCR(r13)
+               beq     1f              /* Not in soft-mask table */
                li      r10,IMASK
-               blt-    1f
+               b       2f              /* In soft-mask table, always mask */
 
                /* Test the soft mask state against our interrupt's bit */
-               lbz     r10,PACAIRQSOFTMASK(r13)
-1:             andi.   r10,r10,IMASK
+1:             lbz     r10,PACAIRQSOFTMASK(r13)
+2:             andi.   r10,r10,IMASK
                /* Associate vector numbers with bits in paca->irq_happened */
                .if IVEC == 0x500 || IVEC == 0xea0
                li      r10,PACA_IRQ_EE
@@ -470,7 +483,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
 
        .if ISTACK
        andi.   r10,r12,MSR_PR          /* See if coming from user      */
-2:     mr      r10,r1                  /* Save r1                      */
+3:     mr      r10,r1                  /* Save r1                      */
        subi    r1,r1,INT_FRAME_SIZE    /* alloc frame on kernel stack  */
        beq-    100f
        ld      r1,PACAKSAVE(r13)       /* kernel stack to use          */
@@ -485,6 +498,20 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
        std     r0,GPR0(r1)             /* save r0 in stackframe        */
        std     r10,GPR1(r1)            /* save r1 in stackframe        */
 
+       /* Mark our [H]SRRs valid for return */
+       li      r10,1
+       .if IHSRR_IF_HVMODE
+       BEGIN_FTR_SECTION
+       stb     r10,PACAHSRR_VALID(r13)
+       FTR_SECTION_ELSE
+       stb     r10,PACASRR_VALID(r13)
+       ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+       .elseif IHSRR
+       stb     r10,PACAHSRR_VALID(r13)
+       .else
+       stb     r10,PACASRR_VALID(r13)
+       .endif
+
        .if ISET_RI
        li      r10,MSR_RI
        mtmsrd  r10,1                   /* Set MSR_RI */
@@ -577,6 +604,66 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
        __GEN_COMMON_BODY \name
 .endm
 
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+       mr      r12,r2
+       ld      r2,PACATOC(r13)
+       LOAD_REG_ADDR(r9, __start___restart_table)
+       LOAD_REG_ADDR(r10, __stop___restart_table)
+       mr      r2,r12
+#else
+       LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___restart_table)
+       LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___restart_table)
+#endif
+300:
+       cmpd    r9,r10
+       beq     302f
+       ld      r12,0(r9)
+       cmpld   r11,r12
+       blt     301f
+       ld      r12,8(r9)
+       cmpld   r11,r12
+       bge     301f
+       ld      r12,16(r9)
+       b       303f
+301:
+       addi    r9,r9,24
+       b       300b
+302:
+       li      r12,0
+303:
+.endm
+
+.macro SEARCH_SOFT_MASK_TABLE
+#ifdef CONFIG_RELOCATABLE
+       mr      r12,r2
+       ld      r2,PACATOC(r13)
+       LOAD_REG_ADDR(r9, __start___soft_mask_table)
+       LOAD_REG_ADDR(r10, __stop___soft_mask_table)
+       mr      r2,r12
+#else
+       LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___soft_mask_table)
+       LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___soft_mask_table)
+#endif
+300:
+       cmpd    r9,r10
+       beq     302f
+       ld      r12,0(r9)
+       cmpld   r11,r12
+       blt     301f
+       ld      r12,8(r9)
+       cmpld   r11,r12
+       bge     301f
+       li      r12,1
+       b       303f
+301:
+       addi    r9,r9,16
+       b       300b
+302:
+       li      r12,0
+303:
+.endm
+
 /*
  * Restore all registers including H/SRR0/1 saved in a stack frame of a
  * standard exception.
@@ -584,10 +671,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 .macro EXCEPTION_RESTORE_REGS hsrr=0
        /* Move original SRR0 and SRR1 into the respective regs */
        ld      r9,_MSR(r1)
+       li      r10,0
        .if \hsrr
        mtspr   SPRN_HSRR1,r9
+       stb     r10,PACAHSRR_VALID(r13)
        .else
        mtspr   SPRN_SRR1,r9
+       stb     r10,PACASRR_VALID(r13)
        .endif
        ld      r9,_NIP(r1)
        .if \hsrr
@@ -704,17 +794,17 @@ __start_interrupts:
  * scv instructions enter the kernel without changing EE, RI, ME, or HV.
  * In particular, this means we can take a maskable interrupt at any point
  * in the scv handler, which is unlike any other interrupt. This is solved
- * by treating the instruction addresses below __end_interrupts as being
- * soft-masked.
+ * by treating the instruction addresses in the handler as being soft-masked,
+ * by adding a SOFT_MASK_TABLE entry for them.
  *
  * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and
  * ensure scv is never executed with relocation off, which means AIL-0
  * should never happen.
  *
- * Before leaving the below __end_interrupts text, at least of the following
- * must be true:
+ * Before leaving the following inside-__end_soft_masked text, at least of the
+ * following must be true:
  * - MSR[PR]=1 (i.e., return to userspace)
- * - MSR_EE|MSR_RI is set (no reentrant exceptions)
+ * - MSR_EE|MSR_RI is clear (no reentrant exceptions)
  * - Standard kernel environment is set up (stack, paca, etc)
  *
  * Call convention:
@@ -722,6 +812,7 @@ __start_interrupts:
  * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
  */
 EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
+1:
        /* SCV 0 */
        mr      r9,r13
        GET_PACA(r13)
@@ -751,8 +842,11 @@ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
        b       system_call_vectored_sigill
 #endif
        .endr
+2:
 EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
 
+SOFT_MASK_TABLE(1b, 2b) // Treat scv vectors as soft-masked, see comment above.
+
 #ifdef CONFIG_RELOCATABLE
 TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
        __LOAD_HANDLER(r10, system_call_vectored_common)
@@ -1149,7 +1243,7 @@ EXC_COMMON_BEGIN(machine_check_common)
        mtmsrd  r10,1
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      machine_check_exception
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 #ifdef CONFIG_PPC_P7_NAP
@@ -1275,7 +1369,7 @@ BEGIN_MMU_FTR_SECTION
 MMU_FTR_SECTION_ELSE
        bl      do_page_fault
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
-       b       interrupt_return
+       b       interrupt_return_srr
 
 1:     bl      do_break
        /*
@@ -1283,7 +1377,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
         * If so, we need to restore them with their updated values.
         */
        REST_NVGPRS(r1)
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -1323,7 +1417,7 @@ BEGIN_MMU_FTR_SECTION
        bl      do_slb_fault
        cmpdi   r3,0
        bne-    1f
-       b       fast_interrupt_return
+       b       fast_interrupt_return_srr
 1:     /* Error case */
 MMU_FTR_SECTION_ELSE
        /* Radix case, access is outside page table range */
@@ -1332,7 +1426,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
        std     r3,RESULT(r1)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_bad_slb_fault
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -1368,7 +1462,7 @@ BEGIN_MMU_FTR_SECTION
 MMU_FTR_SECTION_ELSE
        bl      do_page_fault
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -1403,7 +1497,7 @@ BEGIN_MMU_FTR_SECTION
        bl      do_slb_fault
        cmpdi   r3,0
        bne-    1f
-       b       fast_interrupt_return
+       b       fast_interrupt_return_srr
 1:     /* Error case */
 MMU_FTR_SECTION_ELSE
        /* Radix case, access is outside page table range */
@@ -1412,7 +1506,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
        std     r3,RESULT(r1)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_bad_slb_fault
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -1456,7 +1550,11 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
        GEN_COMMON hardware_interrupt
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_IRQ
-       b       interrupt_return
+       BEGIN_FTR_SECTION
+       b       interrupt_return_hsrr
+       FTR_SECTION_ELSE
+       b       interrupt_return_srr
+       ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 
 
 /**
@@ -1483,7 +1581,7 @@ EXC_COMMON_BEGIN(alignment_common)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      alignment_exception
        REST_NVGPRS(r1) /* instruction emulation may change GPRs */
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -1590,7 +1688,7 @@ EXC_COMMON_BEGIN(program_check_common)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      program_check_exception
        REST_NVGPRS(r1) /* instruction emulation may change GPRs */
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /*
@@ -1633,12 +1731,12 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
        bl      load_up_fpu
-       b       fast_interrupt_return
+       b       fast_interrupt_return_srr
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:     /* User process was in a transaction */
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      fp_unavailable_tm
-       b       interrupt_return
+       b       interrupt_return_srr
 #endif
 
 
@@ -1677,7 +1775,7 @@ EXC_COMMON_BEGIN(decrementer_common)
        GEN_COMMON decrementer
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      timer_interrupt
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -1714,6 +1812,8 @@ EXC_COMMON_BEGIN(hdecrementer_common)
         *
         * Be careful to avoid touching the kernel stack.
         */
+       li      r10,0
+       stb     r10,PACAHSRR_VALID(r13)
        ld      r10,PACA_EXGEN+EX_CTR(r13)
        mtctr   r10
        mtcrf   0x80,r9
@@ -1761,7 +1861,7 @@ EXC_COMMON_BEGIN(doorbell_super_common)
 #else
        bl      unknown_async_exception
 #endif
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 EXC_REAL_NONE(0xb00, 0x100)
@@ -1838,8 +1938,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
        mtctr   r10
        bctr
        .else
-       li      r10,MSR_RI
-       mtmsrd  r10,1                   /* Set RI (EE=0) */
 #ifdef CONFIG_RELOCATABLE
        __LOAD_HANDLER(r10, system_call_common)
        mtctr   r10
@@ -1925,7 +2023,7 @@ EXC_COMMON_BEGIN(single_step_common)
        GEN_COMMON single_step
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      single_step_exception
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -1963,7 +2061,7 @@ BEGIN_MMU_FTR_SECTION
 MMU_FTR_SECTION_ELSE
        bl      unknown_exception
 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 
 /**
@@ -1988,7 +2086,7 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
        GEN_COMMON h_instr_storage
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      unknown_exception
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 
 /**
@@ -2012,7 +2110,7 @@ EXC_COMMON_BEGIN(emulation_assist_common)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      emulation_assist_interrupt
        REST_NVGPRS(r1) /* instruction emulation may change GPRs */
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 
 /**
@@ -2089,7 +2187,7 @@ EXC_COMMON_BEGIN(hmi_exception_common)
        GEN_COMMON hmi_exception
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      handle_hmi_exception
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 
 /**
@@ -2119,7 +2217,7 @@ EXC_COMMON_BEGIN(h_doorbell_common)
 #else
        bl      unknown_async_exception
 #endif
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 
 /**
@@ -2145,7 +2243,7 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
        GEN_COMMON h_virt_irq
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_IRQ
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 
 EXC_REAL_NONE(0xec0, 0x20)
@@ -2188,7 +2286,7 @@ EXC_COMMON_BEGIN(performance_monitor_common)
        GEN_COMMON performance_monitor
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      performance_monitor_exception
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -2225,19 +2323,19 @@ BEGIN_FTR_SECTION
   END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
 #endif
        bl      load_up_altivec
-       b       fast_interrupt_return
+       b       fast_interrupt_return_srr
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:     /* User process was in a transaction */
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      altivec_unavailable_tm
-       b       interrupt_return
+       b       interrupt_return_srr
 #endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      altivec_unavailable_exception
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -2278,14 +2376,14 @@ BEGIN_FTR_SECTION
 2:     /* User process was in a transaction */
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      vsx_unavailable_tm
-       b       interrupt_return
+       b       interrupt_return_srr
 #endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      vsx_unavailable_exception
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -2313,7 +2411,7 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      facility_unavailable_exception
        REST_NVGPRS(r1) /* instruction emulation may change GPRs */
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 /**
@@ -2341,7 +2439,7 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      facility_unavailable_exception
        REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 
 EXC_REAL_NONE(0xfa0, 0x20)
@@ -2370,7 +2468,7 @@ EXC_COMMON_BEGIN(cbe_system_error_common)
        GEN_COMMON cbe_system_error
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      cbe_system_error_exception
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 #else /* CONFIG_CBE_RAS */
 EXC_REAL_NONE(0x1200, 0x100)
@@ -2401,7 +2499,7 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common)
        GEN_COMMON instruction_breakpoint
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      instruction_breakpoint_exception
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 EXC_REAL_NONE(0x1400, 0x100)
@@ -2509,6 +2607,8 @@ BEGIN_FTR_SECTION
        ld      r10,PACA_EXGEN+EX_CFAR(r13)
        mtspr   SPRN_CFAR,r10
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+       li      r10,0
+       stb     r10,PACAHSRR_VALID(r13)
        ld      r10,PACA_EXGEN+EX_R10(r13)
        ld      r11,PACA_EXGEN+EX_R11(r13)
        ld      r12,PACA_EXGEN+EX_R12(r13)
@@ -2521,7 +2621,7 @@ EXC_COMMON_BEGIN(denorm_exception_common)
        GEN_COMMON denorm_exception
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      unknown_exception
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 
 #ifdef CONFIG_CBE_RAS
@@ -2538,7 +2638,7 @@ EXC_COMMON_BEGIN(cbe_maintenance_common)
        GEN_COMMON cbe_maintenance
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      cbe_maintenance_exception
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 #else /* CONFIG_CBE_RAS */
 EXC_REAL_NONE(0x1600, 0x100)
@@ -2568,7 +2668,7 @@ EXC_COMMON_BEGIN(altivec_assist_common)
 #else
        bl      unknown_exception
 #endif
-       b       interrupt_return
+       b       interrupt_return_srr
 
 
 #ifdef CONFIG_CBE_RAS
@@ -2585,7 +2685,7 @@ EXC_COMMON_BEGIN(cbe_thermal_common)
        GEN_COMMON cbe_thermal
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      cbe_thermal_exception
-       b       interrupt_return
+       b       interrupt_return_hsrr
 
 #else /* CONFIG_CBE_RAS */
 EXC_REAL_NONE(0x1800, 0x100)
@@ -2610,7 +2710,6 @@ INT_DEFINE_END(soft_nmi)
  * and run it entirely with interrupts hard disabled.
  */
 EXC_COMMON_BEGIN(soft_nmi_common)
-       mfspr   r11,SPRN_SRR0
        mr      r10,r1
        ld      r1,PACAEMERGSP(r13)
        subi    r1,r1,INT_FRAME_SIZE
@@ -2624,6 +2723,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
        mtmsrd  r9,1
 
        kuap_kernel_restore r9, r10
+
        EXCEPTION_RESTORE_REGS hsrr=0
        RFI_TO_KERNEL
 
@@ -2645,19 +2745,24 @@ masked_Hinterrupt:
        .else
 masked_interrupt:
        .endif
-       lbz     r11,PACAIRQHAPPENED(r13)
-       or      r11,r11,r10
-       stb     r11,PACAIRQHAPPENED(r13)
+       stw     r9,PACA_EXGEN+EX_CCR(r13)
+       lbz     r9,PACAIRQHAPPENED(r13)
+       or      r9,r9,r10
+       stb     r9,PACAIRQHAPPENED(r13)
+
+       .if ! \hsrr
        cmpwi   r10,PACA_IRQ_DEC
        bne     1f
-       lis     r10,0x7fff
-       ori     r10,r10,0xffff
-       mtspr   SPRN_DEC,r10
+       LOAD_REG_IMMEDIATE(r9, 0x7fffffff)
+       mtspr   SPRN_DEC,r9
 #ifdef CONFIG_PPC_WATCHDOG
+       lwz     r9,PACA_EXGEN+EX_CCR(r13)
        b       soft_nmi_common
 #else
        b       2f
 #endif
+       .endif
+
 1:     andi.   r10,r10,PACA_IRQ_MUST_HARD_MASK
        beq     2f
        xori    r12,r12,MSR_EE  /* clear MSR_EE */
@@ -2666,11 +2771,29 @@ masked_interrupt:
        .else
        mtspr   SPRN_SRR1,r12
        .endif
-       ori     r11,r11,PACA_IRQ_HARD_DIS
-       stb     r11,PACAIRQHAPPENED(r13)
+       ori     r9,r9,PACA_IRQ_HARD_DIS
+       stb     r9,PACAIRQHAPPENED(r13)
 2:     /* done */
-       ld      r10,PACA_EXGEN+EX_CTR(r13)
-       mtctr   r10
+       li      r9,0
+       .if \hsrr
+       stb     r9,PACAHSRR_VALID(r13)
+       .else
+       stb     r9,PACASRR_VALID(r13)
+       .endif
+
+       SEARCH_RESTART_TABLE
+       cmpdi   r12,0
+       beq     3f
+       .if \hsrr
+       mtspr   SPRN_HSRR0,r12
+       .else
+       mtspr   SPRN_SRR0,r12
+       .endif
+3:
+
+       ld      r9,PACA_EXGEN+EX_CTR(r13)
+       mtctr   r9
+       lwz     r9,PACA_EXGEN+EX_CCR(r13)
        mtcrf   0x80,r9
        std     r1,PACAR1(r13)
        ld      r9,PACA_EXGEN+EX_R9(r13)
@@ -2881,7 +3004,7 @@ MASKED_INTERRUPT hsrr=1
 
 USE_FIXED_SECTION(virt_trampolines)
        /*
-        * All code below __end_interrupts is treated as soft-masked. If
+        * All code below __end_soft_masked is treated as soft-masked. If
         * any code runs here with MSR[EE]=1, it must then cope with pending
         * soft interrupt being raised (i.e., by ensuring it is replayed).
         *
index c9e2819b095abb23a52169f5e9ff0f17b6aab911..c7022c41cc314945c908ed6abc4aa95e03d63261 100644 (file)
@@ -23,18 +23,20 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features);
 
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
 DEFINE_STATIC_KEY_FALSE(kvm_guest);
-bool check_kvm_guest(void)
+int __init check_kvm_guest(void)
 {
        struct device_node *hyper_node;
 
        hyper_node = of_find_node_by_path("/hypervisor");
        if (!hyper_node)
-               return false;
+               return 0;
 
        if (!of_device_is_compatible(hyper_node, "linux,kvm"))
-               return false;
+               return 0;
 
        static_branch_enable(&kvm_guest);
-       return true;
+
+       return 0;
 }
+core_initcall(check_kvm_guest); // before kvm_guest_init()
 #endif
index 2c57ece6671c492dd765df229b6cd30657da02f7..6010adcee16e2d575fd8e3112c37cb28751e03a4 100644 (file)
@@ -103,6 +103,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
        ori     r12,r12,MSR_FP
        or      r12,r12,r4
        std     r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+#endif
 #endif
        li      r4,1
        stb     r4,THREAD_LOAD_FP(r5)
index a8221ddcbd66dac128a7f14f82aeb24998dcebbd..6b1ec9e3541b9046ac0a81198ac3598d21f7c294 100644 (file)
@@ -142,42 +142,23 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 
 .macro SYSCALL_ENTRY trapno
        mfspr   r9, SPRN_SRR1
-       mfspr   r10, SPRN_SRR0
+       mfspr   r12, SPRN_SRR0
        LOAD_REG_IMMEDIATE(r11, MSR_KERNEL)             /* can take exceptions */
-       lis     r12, 1f@h
-       ori     r12, r12, 1f@l
+       lis     r10, 1f@h
+       ori     r10, r10, 1f@l
        mtspr   SPRN_SRR1, r11
-       mtspr   SPRN_SRR0, r12
-       mfspr   r12,SPRN_SPRG_THREAD
+       mtspr   SPRN_SRR0, r10
+       mfspr   r10,SPRN_SPRG_THREAD
        mr      r11, r1
-       lwz     r1,TASK_STACK-THREAD(r12)
-       tovirt(r12, r12)
+       lwz     r1,TASK_STACK-THREAD(r10)
+       tovirt(r10, r10)
        addi    r1, r1, THREAD_SIZE - INT_FRAME_SIZE
        rfi
 1:
-       stw     r11,GPR1(r1)
-       stw     r11,0(r1)
-       mr      r11, r1
-       stw     r10,_NIP(r11)
-       mflr    r10
-       stw     r10, _LINK(r11)
-       mfcr    r10
-       rlwinm  r10,r10,0,4,2   /* Clear SO bit in CR */
-       stw     r10,_CCR(r11)           /* save registers */
-#ifdef CONFIG_40x
-       rlwinm  r9,r9,0,14,12           /* clear MSR_WE (necessary?) */
-#endif
-       lis     r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
-       stw     r2,GPR2(r11)
-       addi    r10,r10,STACK_FRAME_REGS_MARKER@l
-       stw     r9,_MSR(r11)
-       li      r2, \trapno
-       stw     r10,8(r11)
-       stw     r2,_TRAP(r11)
-       SAVE_GPR(0, r11)
-       SAVE_4GPRS(3, r11)
-       SAVE_2GPRS(7, r11)
-       addi    r2,r12,-THREAD
+       stw     r12,_NIP(r1)
+       mfcr    r12
+       rlwinm  r12,r12,0,4,2   /* Clear SO bit in CR */
+       stw     r12,_CCR(r1)
        b       transfer_to_syscall             /* jump to handler */
 .endm
 
index e1360b88b6cb86e0c9f10ac9c4141248839b4c66..7d72ee5ab387c04623ea26537af8d970d263dd21 100644 (file)
@@ -701,39 +701,3 @@ _GLOBAL(abort)
         mfspr   r13,SPRN_DBCR0
         oris    r13,r13,DBCR0_RST_SYSTEM@h
         mtspr   SPRN_DBCR0,r13
-
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
-       /* Context switch the PTE pointer for the Abatron BDI2000.
-        * The PGDIR is the second parameter.
-        */
-       lis     r5, abatron_pteptrs@ha
-       stw     r4, abatron_pteptrs@l + 0x4(r5)
-#endif
-       sync
-       mtspr   SPRN_PID,r3
-       isync                           /* Need an isync to flush shadow */
-                                       /* TLBs after changing PID */
-       blr
-
-/* We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
-       .data
-       .align  12
-       .globl  sdata
-sdata:
-       .globl  empty_zero_page
-empty_zero_page:
-       .space  4096
-EXPORT_SYMBOL(empty_zero_page)
-       .globl  swapper_pg_dir
-swapper_pg_dir:
-       .space  PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-       .space  8
index 5c106ac3662603eee6581b8e739d2876e494be5e..ddc978a2d3819384ae0935bb41bcb18a94351e88 100644 (file)
@@ -532,6 +532,10 @@ finish_tlb_load_44x:
        andi.   r10,r12,_PAGE_USER              /* User page ? */
        beq     1f                              /* nope, leave U bits empty */
        rlwimi  r11,r11,3,26,28                 /* yes, copy S bits to U */
+#ifdef CONFIG_PPC_KUEP
+0:     rlwinm  r11,r11,0,~PPC44x_TLB_SX        /* Clear SX if User page */
+       patch_site 0b, patch__tlb_44x_kuep
+#endif
 1:     tlbwe   r11,r13,PPC44x_TLB_ATTRIB       /* Write ATTRIB */
 
        /* Done...restore registers and get out of here.
@@ -743,6 +747,10 @@ finish_tlb_load_47x:
        andi.   r10,r12,_PAGE_USER              /* User page ? */
        beq     1f                              /* nope, leave U bits empty */
        rlwimi  r11,r11,3,26,28                 /* yes, copy S bits to U */
+#ifdef CONFIG_PPC_KUEP
+0:     rlwinm  r11,r11,0,~PPC47x_TLB2_SX       /* Clear SX if User page */
+       patch_site 0b, patch__tlb_47x_kuep
+#endif
 1:     tlbwe   r11,r13,2
 
        /* Done...restore registers and get out of here.
@@ -780,20 +788,6 @@ _GLOBAL(__fixup_440A_mcheck)
        sync
        blr
 
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
-       /* Context switch the PTE pointer for the Abatron BDI2000.
-        * The PGDIR is the second parameter.
-        */
-       lis     r5, abatron_pteptrs@h
-       ori     r5, r5, abatron_pteptrs@l
-       stw     r4, 0x4(r5)
-#endif
-       mtspr   SPRN_PID,r3
-       isync                   /* Force context change */
-       blr
-
 /*
  * Init CPU state. This is called at boot time or for secondary CPUs
  * to setup initial TLB entries, setup IVORs, etc...
@@ -1239,34 +1233,8 @@ head_start_common:
        isync
        blr
 
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
-       .data
-       .align  PAGE_SHIFT
-       .globl  sdata
-sdata:
-       .globl  empty_zero_page
-empty_zero_page:
-       .space  PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
-/*
- * To support >32-bit physical addresses, we use an 8KB pgdir.
- */
-       .globl  swapper_pg_dir
-swapper_pg_dir:
-       .space  PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-       .space  8
-
 #ifdef CONFIG_SMP
+       .data
        .align  12
 temp_boot_stack:
        .space  1024
index ece7f97bafff6af74a2adddf7f35016db539a50c..79930b0bc781aefcdcaeee77268902477e885594 100644 (file)
@@ -194,8 +194,9 @@ CLOSE_FIXED_SECTION(first_256B)
 
 /* This value is used to mark exception frames on the stack. */
        .section ".toc","aw"
+/* This value is used to mark exception frames on the stack. */
 exception_marker:
-       .tc     ID_72656773_68657265[TC],0x7265677368657265
+       .tc     ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
        .previous
 
 /*
@@ -211,6 +212,8 @@ OPEN_TEXT_SECTION(0x100)
 
 USE_TEXT_SECTION()
 
+#include "interrupt_64.S"
+
 #ifdef CONFIG_PPC_BOOK3E
 /*
  * The booting_thread_hwid holds the thread id we want to boot in cpu
@@ -997,23 +1000,3 @@ start_here_common:
 0:     trap
        EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
        .previous
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the bss, which is page-aligned.
- */
-       .section ".bss"
-/*
- * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K.
- * We will need to find a better way to fix this
- */
-       .align  16
-
-       .globl  swapper_pg_dir
-swapper_pg_dir:
-       .space  PGD_TABLE_SIZE
-
-       .globl  empty_zero_page
-empty_zero_page:
-       .space  PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
index 7d445e4342c0c6eea1cd68eff063d4e276cf81ac..9bdb95f5694f7e8c81a25be374b8fc49293be09a 100644 (file)
@@ -786,28 +786,3 @@ _GLOBAL(mmu_pin_tlb)
        mtspr   SPRN_SRR1, r10
        mtspr   SPRN_SRR0, r11
        rfi
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
-       .data
-       .globl  sdata
-sdata:
-       .globl  empty_zero_page
-       .align  PAGE_SHIFT
-empty_zero_page:
-       .space  PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
-       .globl  swapper_pg_dir
-swapper_pg_dir:
-       .space  PGD_TABLE_SIZE
-
-/* Room for two PTE table pointers, usually the kernel and current user
- * pointer to their respective root page table (pgdir).
- */
-       .globl  abatron_pteptrs
-abatron_pteptrs:
-       .space  8
index 065178f19a3d62b0445ae38bd9c98bab0365cf4d..764edd860ed46cc8f8a5307f36ae804e999ec082 100644 (file)
@@ -518,8 +518,6 @@ BEGIN_FTR_SECTION
        rlwinm  r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
 END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
        mtspr   SPRN_RPA,r1
-       mfspr   r2,SPRN_SRR1            /* Need to restore CR0 */
-       mtcrf   0x80,r2
 BEGIN_MMU_FTR_SECTION
        li      r0,1
        mfspr   r1,SPRN_SPRG_603_LRU
@@ -531,9 +529,15 @@ BEGIN_MMU_FTR_SECTION
        mfspr   r2,SPRN_SRR1
        rlwimi  r2,r0,31-14,14,14
        mtspr   SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+       mtcrf   0x80,r2
+       tlbld   r3
+       rfi
+MMU_FTR_SECTION_ELSE
+       mfspr   r2,SPRN_SRR1            /* Need to restore CR0 */
+       mtcrf   0x80,r2
        tlbld   r3
        rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
 DataAddressInvalid:
        mfspr   r3,SPRN_SRR1
        rlwinm  r1,r3,9,6,6     /* Get load/store bit */
@@ -607,9 +611,15 @@ BEGIN_MMU_FTR_SECTION
        mfspr   r2,SPRN_SRR1
        rlwimi  r2,r0,31-14,14,14
        mtspr   SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+       mtcrf   0x80,r2
+       tlbld   r3
+       rfi
+MMU_FTR_SECTION_ELSE
+       mfspr   r2,SPRN_SRR1            /* Need to restore CR0 */
+       mtcrf   0x80,r2
        tlbld   r3
        rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
 
 #ifndef CONFIG_ALTIVEC
 #define altivec_assist_exception       unknown_exception
@@ -756,9 +766,6 @@ PerformanceMonitor:
  * the kernel image to physical address PHYSICAL_START.
  */
 relocate_kernel:
-       addis   r9,r26,klimit@ha        /* fetch klimit */
-       lwz     r25,klimit@l(r9)
-       addis   r25,r25,-KERNELBASE@h
        lis     r3,PHYSICAL_START@h     /* Destination base address */
        li      r6,0                    /* Destination offset */
        li      r5,0x4000               /* # bytes of memory to copy */
@@ -766,7 +773,8 @@ relocate_kernel:
        addi    r0,r3,4f@l              /* jump to the address of 4f */
        mtctr   r0                      /* in copy and do the rest. */
        bctr                            /* jump to the copy */
-4:     mr      r5,r25
+4:     lis     r5,_end-KERNELBASE@h
+       ori     r5,r5,_end-KERNELBASE@l
        bl      copy_and_flush          /* copy the rest */
        b       turn_on_mmu
 
@@ -924,12 +932,6 @@ _GLOBAL(load_segment_registers)
        li      r0, NUM_USER_SEGMENTS /* load up user segment register values */
        mtctr   r0              /* for context 0 */
        li      r3, 0           /* Kp = 0, Ks = 0, VSID = 0 */
-#ifdef CONFIG_PPC_KUEP
-       oris    r3, r3, SR_NX@h /* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
-       oris    r3, r3, SR_KS@h /* Set Ks */
-#endif
        li      r4, 0
 3:     mtsrin  r3, r4
        addi    r3, r3, 0x111   /* increment VSID */
@@ -1023,58 +1025,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
        mtspr   SPRN_SRR1,r4
        rfi
 
-/*
- * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
- *
- * Set up the segment registers for a new context.
- */
-_ENTRY(switch_mmu_context)
-       lwz     r3,MMCONTEXTID(r4)
-       cmpwi   cr0,r3,0
-       blt-    4f
-       mulli   r3,r3,897       /* multiply context by skew factor */
-       rlwinm  r3,r3,4,8,27    /* VSID = (context & 0xfffff) << 4 */
-#ifdef CONFIG_PPC_KUEP
-       oris    r3, r3, SR_NX@h /* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
-       oris    r3, r3, SR_KS@h /* Set Ks */
-#endif
-       li      r0,NUM_USER_SEGMENTS
-       mtctr   r0
-
-#ifdef CONFIG_BDI_SWITCH
-       /* Context switch the PTE pointer for the Abatron BDI2000.
-        * The PGDIR is passed as second argument.
-        */
-       lwz     r4, MM_PGD(r4)
-       lis     r5, abatron_pteptrs@ha
-       stw     r4, abatron_pteptrs@l + 0x4(r5)
-#endif
-BEGIN_MMU_FTR_SECTION
-#ifndef CONFIG_BDI_SWITCH
-       lwz     r4, MM_PGD(r4)
-#endif
-       tophys(r4, r4)
-       rlwinm  r4, r4, 4, 0xffff01ff
-       mtspr   SPRN_SDR1, r4
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
-       li      r4,0
-       isync
-3:
-       mtsrin  r3,r4
-       addi    r3,r3,0x111     /* next VSID */
-       rlwinm  r3,r3,0,8,3     /* clear out any overflow from VSID field */
-       addis   r4,r4,0x1000    /* address of next segment */
-       bdnz    3b
-       sync
-       isync
-       blr
-4:     trap
-       EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
-       blr
-EXPORT_SYMBOL(switch_mmu_context)
-
 /*
  * An undocumented "feature" of 604e requires that the v bit
  * be cleared before changing BAT values.
@@ -1256,61 +1206,4 @@ setup_usbgecko_bat:
        blr
 #endif
 
-#ifdef CONFIG_8260
-/* Jump into the system reset for the rom.
- * We first disable the MMU, and then jump to the ROM reset address.
- *
- * r3 is the board info structure, r4 is the location for starting.
- * I use this for building a small kernel that can load other kernels,
- * rather than trying to write or rely on a rom monitor that can tftp load.
- */
-       .globl  m8260_gorom
-m8260_gorom:
-       mfmsr   r0
-       rlwinm  r0,r0,0,17,15   /* clear MSR_EE in r0 */
-       sync
-       mtmsr   r0
-       sync
-       mfspr   r11, SPRN_HID0
-       lis     r10, 0
-       ori     r10,r10,HID0_ICE|HID0_DCE
-       andc    r11, r11, r10
-       mtspr   SPRN_HID0, r11
-       isync
-       li      r5, MSR_ME|MSR_RI
-       lis     r6,2f@h
-       addis   r6,r6,-KERNELBASE@h
-       ori     r6,r6,2f@l
-       mtspr   SPRN_SRR0,r6
-       mtspr   SPRN_SRR1,r5
-       isync
-       sync
-       rfi
-2:
-       mtlr    r4
-       blr
-#endif
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
        .data
-       .globl  sdata
-sdata:
-       .globl  empty_zero_page
-empty_zero_page:
-       .space  4096
-EXPORT_SYMBOL(empty_zero_page)
-
-       .globl  swapper_pg_dir
-swapper_pg_dir:
-       .space  PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-       .space  8
index f8247009169703ee0c31ba03acbdd3d6a333902d..87b806e8eded959e1c235868c86be76645cfbc42 100644 (file)
@@ -128,37 +128,20 @@ BEGIN_FTR_SECTION
        mr      r12, r13
        lwz     r13, THREAD_NORMSAVE(2)(r10)
 FTR_SECTION_ELSE
-#endif
        mfcr    r12
-#ifdef CONFIG_KVM_BOOKE_HV
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#else
+       mfcr    r12
 #endif
        mfspr   r9, SPRN_SRR1
        BOOKE_CLEAR_BTB(r11)
-       lwz     r11, TASK_STACK - THREAD(r10)
+       mr      r11, r1
+       lwz     r1, TASK_STACK - THREAD(r10)
        rlwinm  r12,r12,0,4,2   /* Clear SO bit in CR */
-       ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
-       stw     r12, _CCR(r11)          /* save various registers */
-       mflr    r12
-       stw     r12,_LINK(r11)
+       ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE)
+       stw     r12, _CCR(r1)
        mfspr   r12,SPRN_SRR0
-       stw     r1, GPR1(r11)
-       stw     r1, 0(r11)
-       mr      r1, r11
-       stw     r12,_NIP(r11)
-       rlwinm  r9,r9,0,14,12           /* clear MSR_WE (necessary?)       */
-       lis     r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
-       stw     r2,GPR2(r11)
-       addi    r12, r12, STACK_FRAME_REGS_MARKER@l
-       stw     r9,_MSR(r11)
-       li      r2, \trapno
-       stw     r12, 8(r11)
-       stw     r2,_TRAP(r11)
-       SAVE_GPR(0, r11)
-       SAVE_4GPRS(3, r11)
-       SAVE_2GPRS(7, r11)
-
-       addi    r2,r10,-THREAD
+       stw     r12,_NIP(r1)
        b       transfer_to_syscall     /* jump to handler */
 .endm
 
index a1a5c3f10dc4245d16318f8ee1dc13514d162e29..0f9642f36b4900dee70507359451f93f57a7d4f2 100644 (file)
@@ -985,20 +985,6 @@ _GLOBAL(abort)
        mtspr   SPRN_DBCR0,r13
        isync
 
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
-       /* Context switch the PTE pointer for the Abatron BDI2000.
-        * The PGDIR is the second parameter.
-        */
-       lis     r5, abatron_pteptrs@h
-       ori     r5, r5, abatron_pteptrs@l
-       stw     r4, 0x4(r5)
-#endif
-       mtspr   SPRN_PID,r3
-       isync                   /* Force context change */
-       blr
-
 #ifdef CONFIG_SMP
 /* When we get here, r24 needs to hold the CPU # */
        .globl __secondary_start
@@ -1226,26 +1212,3 @@ _GLOBAL(restore_to_as0)
        */
 3:     mr      r3,r5
        bl      _start
-
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
-       .data
-       .align  12
-       .globl  sdata
-sdata:
-       .globl  empty_zero_page
-empty_zero_page:
-       .space  4096
-EXPORT_SYMBOL(empty_zero_page)
-       .globl  swapper_pg_dir
-swapper_pg_dir:
-       .space  PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-       .space  8
index 8fc7a14e4d7100d748f294581d6637914cd782dd..21a638aff72fb9246f59d4d0fbf03bff1f5becd6 100644 (file)
@@ -486,7 +486,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
        return;
 
 reset:
-       regs->msr &= ~MSR_SE;
+       regs_set_return_msr(regs, regs->msr & ~MSR_SE);
        for (i = 0; i < nr_wp_slots(); i++) {
                info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
                __set_breakpoint(i, info);
@@ -537,7 +537,7 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
                        current->thread.last_hit_ubp[i] = bp[i];
                        info[i] = NULL;
                }
-               regs->msr |= MSR_SE;
+               regs_set_return_msr(regs, regs->msr | MSR_SE);
                return false;
        }
 
index e0938ba298f2a2f928844706ca75ed057707d1f6..21bbd615ca41033b3051b86621f53f471652e6ec 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/context_tracking.h>
 #include <linux/err.h>
 #include <linux/compat.h>
+#include <linux/sched/debug.h> /* for show_regs */
 
 #include <asm/asm-prototypes.h>
 #include <asm/kup.h>
@@ -26,6 +27,53 @@ unsigned long global_dbcr0[NR_CPUS];
 
 typedef long (*syscall_fn)(long, long, long, long, long, long);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+static inline bool exit_must_hard_disable(void)
+{
+       return static_branch_unlikely(&interrupt_exit_not_reentrant);
+}
+#else
+static inline bool exit_must_hard_disable(void)
+{
+       return true;
+}
+#endif
+
+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ *
+ * This should be called with local irqs disabled, but if they were previously
+ * enabled when the interrupt handler returns (indicating a process-context /
+ * synchronous interrupt) then irqs_enabled should be true.
+ *
+ * restartable is true then EE/RI can be left on because interrupts are handled
+ * with a restart sequence.
+ */
+static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
+{
+       /* This must be done with RI=1 because tracing may touch vmaps */
+       trace_hardirqs_on();
+
+       if (exit_must_hard_disable() || !restartable)
+               __hard_EE_RI_disable();
+
+#ifdef CONFIG_PPC64
+       /* This pattern matches prep_irq_for_idle */
+       if (unlikely(lazy_irq_pending_nocheck())) {
+               if (exit_must_hard_disable() || !restartable) {
+                       local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+                       __hard_RI_enable();
+               }
+               trace_hardirqs_off();
+
+               return false;
+       }
+#endif
+       return true;
+}
+
 /* Has to run notrace because it is entered not completely "reconciled" */
 notrace long system_call_exception(long r3, long r4, long r5,
                                   long r6, long r7, long r8,
@@ -144,71 +192,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
        return f(r3, r4, r5, r6, r7, r8);
 }
 
-/*
- * local irqs must be disabled. Returns false if the caller must re-enable
- * them, check for new work, and try again.
- *
- * This should be called with local irqs disabled, but if they were previously
- * enabled when the interrupt handler returns (indicating a process-context /
- * synchronous interrupt) then irqs_enabled should be true.
- */
-static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri)
-{
-       /* This must be done with RI=1 because tracing may touch vmaps */
-       trace_hardirqs_on();
-
-       /* This pattern matches prep_irq_for_idle */
-       if (clear_ri)
-               __hard_EE_RI_disable();
-       else
-               __hard_irq_disable();
-#ifdef CONFIG_PPC64
-       if (unlikely(lazy_irq_pending_nocheck())) {
-               /* Took an interrupt, may have more exit work to do. */
-               if (clear_ri)
-                       __hard_RI_enable();
-               trace_hardirqs_off();
-               local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-
-               return false;
-       }
-       local_paca->irq_happened = 0;
-       irq_soft_mask_set(IRQS_ENABLED);
-#endif
-       return true;
-}
-
-static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled)
-{
-       if (__prep_irq_for_enabled_exit(clear_ri))
-               return true;
-
-       /*
-        * Must replay pending soft-masked interrupts now. Don't just
-        * local_irq_enabe(); local_irq_disable(); because if we are
-        * returning from an asynchronous interrupt here, another one
-        * might hit after irqs are enabled, and it would exit via this
-        * same path allowing another to fire, and so on unbounded.
-        *
-        * If interrupts were enabled when this interrupt exited,
-        * indicating a process context (synchronous) interrupt,
-        * local_irq_enable/disable can be used, which will enable
-        * interrupts rather than keeping them masked (unclear how
-        * much benefit this is over just replaying for all cases,
-        * because we immediately disable again, so all we're really
-        * doing is allowing hard interrupts to execute directly for
-        * a very small time, rather than being masked and replayed).
-        */
-       if (irqs_enabled) {
-               local_irq_enable();
-               local_irq_disable();
-       } else {
-               replay_soft_interrupts();
-       }
-
-       return false;
-}
-
 static notrace void booke_load_dbcr0(void)
 {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -231,57 +214,92 @@ static notrace void booke_load_dbcr0(void)
 #endif
 }
 
-/*
- * This should be called after a syscall returns, with r3 the return value
- * from the syscall. If this function returns non-zero, the system call
- * exit assembly should additionally load all GPR registers and CTR and XER
- * from the interrupt frame.
- *
- * The function graph tracer can not trace the return side of this function,
- * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
- */
-notrace unsigned long syscall_exit_prepare(unsigned long r3,
-                                          struct pt_regs *regs,
-                                          long scv)
+static void check_return_regs_valid(struct pt_regs *regs)
 {
-       unsigned long ti_flags;
-       unsigned long ret = 0;
-       bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
+#ifdef CONFIG_PPC_BOOK3S_64
+       unsigned long trap, srr0, srr1;
+       static bool warned;
+       u8 *validp;
+       char *h;
 
-       CT_WARN_ON(ct_state() == CONTEXT_USER);
+       if (trap_is_scv(regs))
+               return;
 
-       kuap_assert_locked();
+       trap = regs->trap;
+       // EE in HV mode sets HSRRs like 0xea0
+       if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+               trap = 0xea0;
+
+       switch (trap) {
+       case 0x980:
+       case INTERRUPT_H_DATA_STORAGE:
+       case 0xe20:
+       case 0xe40:
+       case INTERRUPT_HMI:
+       case 0xe80:
+       case 0xea0:
+       case INTERRUPT_H_FAC_UNAVAIL:
+       case 0x1200:
+       case 0x1500:
+       case 0x1600:
+       case 0x1800:
+               validp = &local_paca->hsrr_valid;
+               if (!*validp)
+                       return;
+
+               srr0 = mfspr(SPRN_HSRR0);
+               srr1 = mfspr(SPRN_HSRR1);
+               h = "H";
+
+               break;
+       default:
+               validp = &local_paca->srr_valid;
+               if (!*validp)
+                       return;
+
+               srr0 = mfspr(SPRN_SRR0);
+               srr1 = mfspr(SPRN_SRR1);
+               h = "";
+               break;
+       }
 
-       regs->result = r3;
+       if (srr0 == regs->nip && srr1 == regs->msr)
+               return;
 
-       /* Check whether the syscall is issued inside a restartable sequence */
-       rseq_syscall(regs);
+       /*
+        * A NMI / soft-NMI interrupt may have come in after we found
+        * srr_valid and before the SRRs are loaded. The interrupt then
+        * comes in and clobbers SRRs and clears srr_valid. Then we load
+        * the SRRs here and test them above and find they don't match.
+        *
+        * Test validity again after that, to catch such false positives.
+        *
+        * This test in general will have some window for false negatives
+        * and may not catch and fix all such cases if an NMI comes in
+        * later and clobbers SRRs without clearing srr_valid, but hopefully
+        * such things will get caught most of the time, statistically
+        * enough to be able to get a warning out.
+        */
+       barrier();
 
-       ti_flags = current_thread_info()->flags;
+       if (!*validp)
+               return;
 
-       if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
-               if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
-                       r3 = -r3;
-                       regs->ccr |= 0x10000000; /* Set SO bit in CR */
-               }
+       if (!warned) {
+               warned = true;
+               printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+               printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+               show_regs(regs);
        }
 
-       if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
-               if (ti_flags & _TIF_RESTOREALL)
-                       ret = _TIF_RESTOREALL;
-               else
-                       regs->gpr[3] = r3;
-               clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
-       } else {
-               regs->gpr[3] = r3;
-       }
-
-       if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
-               do_syscall_trace_leave(regs);
-               ret |= _TIF_RESTOREALL;
-       }
+       *validp = 0; /* fixup */
+#endif
+}
 
-       local_irq_disable();
+static notrace unsigned long
+interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
+{
+       unsigned long ti_flags;
 
 again:
        ti_flags = READ_ONCE(current_thread_info()->flags);
@@ -303,7 +321,7 @@ again:
                ti_flags = READ_ONCE(current_thread_info()->flags);
        }
 
-       if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
                if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
                                unlikely((ti_flags & _TIF_RESTORE_TM))) {
                        restore_tm_state(regs);
@@ -327,10 +345,10 @@ again:
                }
        }
 
-       user_enter_irqoff();
+       check_return_regs_valid(regs);
 
-       /* scv need not set RI=0 because SRRs are not used */
-       if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) {
+       user_enter_irqoff();
+       if (!prep_irq_for_enabled_exit(true)) {
                user_exit_irqoff();
                local_irq_enable();
                local_irq_disable();
@@ -352,90 +370,131 @@ again:
        return ret;
 }
 
-notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+                                          struct pt_regs *regs,
+                                          long scv)
 {
        unsigned long ti_flags;
-       unsigned long flags;
        unsigned long ret = 0;
+       bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
 
-       if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
-               BUG_ON(!(regs->msr & MSR_RI));
-       BUG_ON(!(regs->msr & MSR_PR));
-       BUG_ON(arch_irq_disabled_regs(regs));
        CT_WARN_ON(ct_state() == CONTEXT_USER);
 
-       /*
-        * We don't need to restore AMR on the way back to userspace for KUAP.
-        * AMR can only have been unlocked if we interrupted the kernel.
-        */
        kuap_assert_locked();
 
-       local_irq_save(flags);
-
-again:
-       ti_flags = READ_ONCE(current_thread_info()->flags);
-       while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
-               local_irq_enable(); /* returning to user: may enable */
-               if (ti_flags & _TIF_NEED_RESCHED) {
-                       schedule();
-               } else {
-                       if (ti_flags & _TIF_SIGPENDING)
-                               ret |= _TIF_RESTOREALL;
-                       do_notify_resume(regs, ti_flags);
-               }
-               local_irq_disable();
-               ti_flags = READ_ONCE(current_thread_info()->flags);
-       }
+       regs->result = r3;
 
-       if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
-               if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
-                               unlikely((ti_flags & _TIF_RESTORE_TM))) {
-                       restore_tm_state(regs);
-               } else {
-                       unsigned long mathflags = MSR_FP;
+       /* Check whether the syscall is issued inside a restartable sequence */
+       rseq_syscall(regs);
 
-                       if (cpu_has_feature(CPU_FTR_VSX))
-                               mathflags |= MSR_VEC | MSR_VSX;
-                       else if (cpu_has_feature(CPU_FTR_ALTIVEC))
-                               mathflags |= MSR_VEC;
+       ti_flags = current_thread_info()->flags;
 
-                       /* See above restore_math comment */
-                       if ((regs->msr & mathflags) != mathflags)
-                               restore_math(regs);
+       if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
+               if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+                       r3 = -r3;
+                       regs->ccr |= 0x10000000; /* Set SO bit in CR */
                }
        }
 
-       user_enter_irqoff();
+       if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+               if (ti_flags & _TIF_RESTOREALL)
+                       ret = _TIF_RESTOREALL;
+               else
+                       regs->gpr[3] = r3;
+               clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
+       } else {
+               regs->gpr[3] = r3;
+       }
 
-       if (unlikely(!__prep_irq_for_enabled_exit(true))) {
-               user_exit_irqoff();
-               local_irq_enable();
-               local_irq_disable();
-               goto again;
+       if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+               do_syscall_trace_leave(regs);
+               ret |= _TIF_RESTOREALL;
        }
 
-       booke_load_dbcr0();
+       local_irq_disable();
+       ret = interrupt_exit_user_prepare_main(ret, regs);
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       local_paca->tm_scratch = regs->msr;
+#ifdef CONFIG_PPC64
+       regs->exit_result = ret;
 #endif
 
-       account_cpu_user_exit();
+       return ret;
+}
 
-       /* Restore user access locks last */
-       kuap_user_restore(regs);
-       kuep_unlock();
+#ifdef CONFIG_PPC64
+notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
+{
+       /*
+        * This is called when detecting a soft-pending interrupt as well as
+        * an alternate-return interrupt. So we can't just have the alternate
+        * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless
+        * the soft-pending case were to fix things up as well). RI might be
+        * disabled, in which case it gets re-enabled by __hard_irq_disable().
+        */
+       __hard_irq_disable();
+       local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+       trace_hardirqs_off();
+       user_exit_irqoff();
+       account_cpu_user_entry();
+
+       BUG_ON(!user_mode(regs));
+
+       regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+
+       return regs->exit_result;
+}
+#endif
+
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
+{
+       unsigned long ret;
+
+       if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
+               BUG_ON(!(regs->msr & MSR_RI));
+       BUG_ON(!(regs->msr & MSR_PR));
+       BUG_ON(arch_irq_disabled_regs(regs));
+       CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+       /*
+        * We don't need to restore AMR on the way back to userspace for KUAP.
+        * AMR can only have been unlocked if we interrupted the kernel.
+        */
+       kuap_assert_locked();
+
+       local_irq_disable();
+
+       ret = interrupt_exit_user_prepare_main(0, regs);
+
+#ifdef CONFIG_PPC64
+       regs->exit_result = ret;
+#endif
 
        return ret;
 }
 
 void preempt_schedule_irq(void);
 
-notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
 {
        unsigned long flags;
        unsigned long ret = 0;
        unsigned long kuap;
+       bool stack_store = current_thread_info()->flags &
+                                               _TIF_EMULATE_STACK_STORE;
 
        if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
            unlikely(!(regs->msr & MSR_RI)))
@@ -450,11 +509,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 
        kuap = kuap_get_and_assert_locked();
 
-       if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
-               clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
-               ret = 1;
-       }
-
        local_irq_save(flags);
 
        if (!arch_irq_disabled_regs(regs)) {
@@ -469,17 +523,58 @@ again:
                        }
                }
 
-               if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags))))
+               check_return_regs_valid(regs);
+
+               /*
+                * Stack store exit can't be restarted because the interrupt
+                * stack frame might have been clobbered.
+                */
+               if (!prep_irq_for_enabled_exit(unlikely(stack_store))) {
+                       /*
+                        * Replay pending soft-masked interrupts now. Don't
+                        * just local_irq_enabe(); local_irq_disable(); because
+                        * if we are returning from an asynchronous interrupt
+                        * here, another one might hit after irqs are enabled,
+                        * and it would exit via this same path allowing
+                        * another to fire, and so on unbounded.
+                        */
+                       hard_irq_disable();
+                       replay_soft_interrupts();
+                       /* Took an interrupt, may have more exit work to do. */
                        goto again;
-       } else {
-               /* Returning to a kernel context with local irqs disabled. */
-               __hard_EE_RI_disable();
+               }
 #ifdef CONFIG_PPC64
+               /*
+                * An interrupt may clear MSR[EE] and set this concurrently,
+                * but it will be marked pending and the exit will be retried.
+                * This leaves a racy window where MSR[EE]=0 and HARD_DIS is
+                * clear, until interrupt_exit_kernel_restart() calls
+                * hard_irq_disable(), which will set HARD_DIS again.
+                */
+               local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+       } else {
+               check_return_regs_valid(regs);
+
+               if (unlikely(stack_store))
+                       __hard_EE_RI_disable();
+               /*
+                * Returning to a kernel context with local irqs disabled.
+                * Here, if EE was enabled in the interrupted context, enable
+                * it on return as well. A problem exists here where a soft
+                * masked interrupt may have cleared MSR[EE] and set HARD_DIS
+                * here, and it will still exist on return to the caller. This
+                * will be resolved by the masked interrupt firing again.
+                */
                if (regs->msr & MSR_EE)
                        local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-#endif
+#endif /* CONFIG_PPC64 */
        }
 
+       if (unlikely(stack_store)) {
+               clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
+               ret = 1;
+       }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        local_paca->tm_scratch = regs->msr;
@@ -494,3 +589,46 @@ again:
 
        return ret;
 }
+
+#ifdef CONFIG_PPC64
+notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs)
+{
+       __hard_irq_disable();
+       local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+       trace_hardirqs_off();
+       user_exit_irqoff();
+       account_cpu_user_entry();
+
+       BUG_ON(!user_mode(regs));
+
+       regs->exit_result |= interrupt_exit_user_prepare(regs);
+
+       return regs->exit_result;
+}
+
+/*
+ * No real need to return a value here because the stack store case does not
+ * get restarted.
+ */
+notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs)
+{
+       __hard_irq_disable();
+       local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+       if (regs->softe == IRQS_ENABLED)
+               trace_hardirqs_off();
+
+       BUG_ON(user_mode(regs));
+
+       return interrupt_exit_kernel_prepare(regs);
+}
+#endif
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
new file mode 100644 (file)
index 0000000..4063e8a
--- /dev/null
@@ -0,0 +1,770 @@
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+#include <asm/feature-fixups.h>
+#include <asm/head-64.h>
+#include <asm/hw_irq.h>
+#include <asm/kup.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/tm.h>
+
+       .section        ".toc","aw"
+SYS_CALL_TABLE:
+       .tc sys_call_table[TC],sys_call_table
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYS_CALL_TABLE:
+       .tc compat_sys_call_table[TC],compat_sys_call_table
+#endif
+       .previous
+
+       .align 7
+
+.macro DEBUG_SRR_VALID srr
+#ifdef CONFIG_PPC_RFI_SRR_DEBUG
+       .ifc \srr,srr
+       mfspr   r11,SPRN_SRR0
+       ld      r12,_NIP(r1)
+100:   tdne    r11,r12
+       EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+       mfspr   r11,SPRN_SRR1
+       ld      r12,_MSR(r1)
+100:   tdne    r11,r12
+       EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+       .else
+       mfspr   r11,SPRN_HSRR0
+       ld      r12,_NIP(r1)
+100:   tdne    r11,r12
+       EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+       mfspr   r11,SPRN_HSRR1
+       ld      r12,_MSR(r1)
+100:   tdne    r11,r12
+       EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+       .endif
+#endif
+.endm
+
+#ifdef CONFIG_PPC_BOOK3S
+.macro system_call_vectored name trapnr
+       .globl system_call_vectored_\name
+system_call_vectored_\name:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+       extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+       bne     tabort_syscall
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+       SCV_INTERRUPT_TO_KERNEL
+       mr      r10,r1
+       ld      r1,PACAKSAVE(r13)
+       std     r10,0(r1)
+       std     r11,_NIP(r1)
+       std     r12,_MSR(r1)
+       std     r0,GPR0(r1)
+       std     r10,GPR1(r1)
+       std     r2,GPR2(r1)
+       ld      r2,PACATOC(r13)
+       mfcr    r12
+       li      r11,0
+       /* Can we avoid saving r3-r8 in common case? */
+       std     r3,GPR3(r1)
+       std     r4,GPR4(r1)
+       std     r5,GPR5(r1)
+       std     r6,GPR6(r1)
+       std     r7,GPR7(r1)
+       std     r8,GPR8(r1)
+       /* Zero r9-r12, this should only be required when restoring all GPRs */
+       std     r11,GPR9(r1)
+       std     r11,GPR10(r1)
+       std     r11,GPR11(r1)
+       std     r11,GPR12(r1)
+       std     r9,GPR13(r1)
+       SAVE_NVGPRS(r1)
+       std     r11,_XER(r1)
+       std     r11,_LINK(r1)
+       std     r11,_CTR(r1)
+
+       li      r11,\trapnr
+       std     r11,_TRAP(r1)
+       std     r12,_CCR(r1)
+       addi    r10,r1,STACK_FRAME_OVERHEAD
+       ld      r11,exception_marker@toc(r2)
+       std     r11,-16(r10)            /* "regshere" marker */
+
+BEGIN_FTR_SECTION
+       HMT_MEDIUM
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+       /*
+        * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
+        * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
+        * and interrupts may be masked and pending already.
+        * system_call_exception() will call trace_hardirqs_off() which means
+        * interrupts could already have been blocked before trace_hardirqs_off,
+        * but this is the best we can do.
+        */
+
+       /* Calling convention has r9 = orig r0, r10 = regs */
+       mr      r9,r0
+       bl      system_call_exception
+
+.Lsyscall_vectored_\name\()_exit:
+       addi    r4,r1,STACK_FRAME_OVERHEAD
+       li      r5,1 /* scv */
+       bl      syscall_exit_prepare
+       std     r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Lsyscall_vectored_\name\()_rst_start:
+       lbz     r11,PACAIRQHAPPENED(r13)
+       andi.   r11,r11,(~PACA_IRQ_HARD_DIS)@l
+       bne-    syscall_vectored_\name\()_restart
+       li      r11,IRQS_ENABLED
+       stb     r11,PACAIRQSOFTMASK(r13)
+       li      r11,0
+       stb     r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+       ld      r2,_CCR(r1)
+       ld      r4,_NIP(r1)
+       ld      r5,_MSR(r1)
+
+BEGIN_FTR_SECTION
+       stdcx.  r0,0,r1                 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+BEGIN_FTR_SECTION
+       HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+       cmpdi   r3,0
+       bne     .Lsyscall_vectored_\name\()_restore_regs
+
+       /* rfscv returns with LR->NIA and CTR->MSR */
+       mtlr    r4
+       mtctr   r5
+
+       /* Could zero these as per ABI, but we may consider a stricter ABI
+        * which preserves these if libc implementations can benefit, so
+        * restore them for now until further measurement is done. */
+       ld      r0,GPR0(r1)
+       ld      r4,GPR4(r1)
+       ld      r5,GPR5(r1)
+       ld      r6,GPR6(r1)
+       ld      r7,GPR7(r1)
+       ld      r8,GPR8(r1)
+       /* Zero volatile regs that may contain sensitive kernel data */
+       li      r9,0
+       li      r10,0
+       li      r11,0
+       li      r12,0
+       mtspr   SPRN_XER,r0
+
+       /*
+        * We don't need to restore AMR on the way back to userspace for KUAP.
+        * The value of AMR only matters while we're in the kernel.
+        */
+       mtcr    r2
+       ld      r2,GPR2(r1)
+       ld      r3,GPR3(r1)
+       ld      r13,GPR13(r1)
+       ld      r1,GPR1(r1)
+       RFSCV_TO_USER
+       b       .       /* prevent speculative execution */
+
+.Lsyscall_vectored_\name\()_restore_regs:
+       mtspr   SPRN_SRR0,r4
+       mtspr   SPRN_SRR1,r5
+
+       ld      r3,_CTR(r1)
+       ld      r4,_LINK(r1)
+       ld      r5,_XER(r1)
+
+       REST_NVGPRS(r1)
+       ld      r0,GPR0(r1)
+       mtcr    r2
+       mtctr   r3
+       mtlr    r4
+       mtspr   SPRN_XER,r5
+       REST_10GPRS(2, r1)
+       REST_2GPRS(12, r1)
+       ld      r1,GPR1(r1)
+       RFI_TO_USER
+.Lsyscall_vectored_\name\()_rst_end:
+
+syscall_vectored_\name\()_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
+       GET_PACA(r13)
+       ld      r1,PACA_EXIT_SAVE_R1(r13)
+       ld      r2,PACATOC(r13)
+       ld      r3,RESULT(r1)
+       addi    r4,r1,STACK_FRAME_OVERHEAD
+       li      r11,IRQS_ALL_DISABLED
+       stb     r11,PACAIRQSOFTMASK(r13)
+       bl      syscall_exit_restart
+       std     r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+       b       .Lsyscall_vectored_\name\()_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_vectored_\name\()_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart)
+
+.endm
+
+system_call_vectored common 0x3000
+
+/*
+ * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
+ * which is tested by system_call_exception when r0 is -1 (as set by vector
+ * entry code).
+ */
+system_call_vectored sigill 0x7ff0
+
+
+/*
+ * Entered via kernel return set up by kernel/sstep.c, must match entry regs
+ */
+       .globl system_call_vectored_emulate
+system_call_vectored_emulate:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
+       li      r10,IRQS_ALL_DISABLED
+       stb     r10,PACAIRQSOFTMASK(r13)
+       b       system_call_vectored_common
+#endif /* CONFIG_PPC_BOOK3S */
+
+       .balign IFETCH_ALIGN_BYTES
+       .globl system_call_common_real
+system_call_common_real:
+_ASM_NOKPROBE_SYMBOL(system_call_common_real)
+       ld      r10,PACAKMSR(r13)       /* get MSR value for kernel */
+       mtmsrd  r10
+
+       .balign IFETCH_ALIGN_BYTES
+       .globl system_call_common
+system_call_common:
+_ASM_NOKPROBE_SYMBOL(system_call_common)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+       extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+       bne     tabort_syscall
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+       mr      r10,r1
+       ld      r1,PACAKSAVE(r13)
+       std     r10,0(r1)
+       std     r11,_NIP(r1)
+       std     r12,_MSR(r1)
+       std     r0,GPR0(r1)
+       std     r10,GPR1(r1)
+       std     r2,GPR2(r1)
+#ifdef CONFIG_PPC_FSL_BOOK3E
+START_BTB_FLUSH_SECTION
+       BTB_FLUSH(r10)
+END_BTB_FLUSH_SECTION
+#endif
+       ld      r2,PACATOC(r13)
+       mfcr    r12
+       li      r11,0
+       /* Can we avoid saving r3-r8 in common case? */
+       std     r3,GPR3(r1)
+       std     r4,GPR4(r1)
+       std     r5,GPR5(r1)
+       std     r6,GPR6(r1)
+       std     r7,GPR7(r1)
+       std     r8,GPR8(r1)
+       /* Zero r9-r12, this should only be required when restoring all GPRs */
+       std     r11,GPR9(r1)
+       std     r11,GPR10(r1)
+       std     r11,GPR11(r1)
+       std     r11,GPR12(r1)
+       std     r9,GPR13(r1)
+       SAVE_NVGPRS(r1)
+       std     r11,_XER(r1)
+       std     r11,_CTR(r1)
+       mflr    r10
+
+       /*
+        * This clears CR0.SO (bit 28), which is the error indication on
+        * return from this system call.
+        */
+       rldimi  r12,r11,28,(63-28)
+       li      r11,0xc00
+       std     r10,_LINK(r1)
+       std     r11,_TRAP(r1)
+       std     r12,_CCR(r1)
+       addi    r10,r1,STACK_FRAME_OVERHEAD
+       ld      r11,exception_marker@toc(r2)
+       std     r11,-16(r10)            /* "regshere" marker */
+
+#ifdef CONFIG_PPC_BOOK3S
+       li      r11,1
+       stb     r11,PACASRR_VALID(r13)
+#endif
+
+       /*
+        * We always enter kernel from userspace with irq soft-mask enabled and
+        * nothing pending. system_call_exception() will call
+        * trace_hardirqs_off().
+        */
+       li      r11,IRQS_ALL_DISABLED
+       li      r12,-1 /* Set MSR_EE and MSR_RI */
+       stb     r11,PACAIRQSOFTMASK(r13)
+       mtmsrd  r12,1
+
+       /* Calling convention has r9 = orig r0, r10 = regs */
+       mr      r9,r0
+       bl      system_call_exception
+
+.Lsyscall_exit:
+       addi    r4,r1,STACK_FRAME_OVERHEAD
+       li      r5,0 /* !scv */
+       bl      syscall_exit_prepare
+       std     r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Lsyscall_rst_start:
+       lbz     r11,PACAIRQHAPPENED(r13)
+       andi.   r11,r11,(~PACA_IRQ_HARD_DIS)@l
+       bne-    syscall_restart
+#endif
+       li      r11,IRQS_ENABLED
+       stb     r11,PACAIRQSOFTMASK(r13)
+       li      r11,0
+       stb     r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+       ld      r2,_CCR(r1)
+       ld      r6,_LINK(r1)
+       mtlr    r6
+
+#ifdef CONFIG_PPC_BOOK3S
+       lbz     r4,PACASRR_VALID(r13)
+       cmpdi   r4,0
+       bne     1f
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+#endif
+       ld      r4,_NIP(r1)
+       ld      r5,_MSR(r1)
+       mtspr   SPRN_SRR0,r4
+       mtspr   SPRN_SRR1,r5
+1:
+       DEBUG_SRR_VALID srr
+
+BEGIN_FTR_SECTION
+       stdcx.  r0,0,r1                 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+       cmpdi   r3,0
+       bne     .Lsyscall_restore_regs
+       /* Zero volatile regs that may contain sensitive kernel data */
+       li      r0,0
+       li      r4,0
+       li      r5,0
+       li      r6,0
+       li      r7,0
+       li      r8,0
+       li      r9,0
+       li      r10,0
+       li      r11,0
+       li      r12,0
+       mtctr   r0
+       mtspr   SPRN_XER,r0
+.Lsyscall_restore_regs_cont:
+
+BEGIN_FTR_SECTION
+       HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+       /*
+        * We don't need to restore AMR on the way back to userspace for KUAP.
+        * The value of AMR only matters while we're in the kernel.
+        */
+       mtcr    r2
+       ld      r2,GPR2(r1)
+       ld      r3,GPR3(r1)
+       ld      r13,GPR13(r1)
+       ld      r1,GPR1(r1)
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
+
+.Lsyscall_restore_regs:
+       ld      r3,_CTR(r1)
+       ld      r4,_XER(r1)
+       REST_NVGPRS(r1)
+       mtctr   r3
+       mtspr   SPRN_XER,r4
+       ld      r0,GPR0(r1)
+       REST_8GPRS(4, r1)
+       ld      r12,GPR12(r1)
+       b       .Lsyscall_restore_regs_cont
+.Lsyscall_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+syscall_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_restart)
+       GET_PACA(r13)
+       ld      r1,PACA_EXIT_SAVE_R1(r13)
+       ld      r2,PACATOC(r13)
+       ld      r3,RESULT(r1)
+       addi    r4,r1,STACK_FRAME_OVERHEAD
+       li      r11,IRQS_ALL_DISABLED
+       stb     r11,PACAIRQSOFTMASK(r13)
+       bl      syscall_exit_restart
+       std     r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+       b       .Lsyscall_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart)
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tabort_syscall:
+_ASM_NOKPROBE_SYMBOL(tabort_syscall)
+       /* Firstly we need to enable TM in the kernel */
+       mfmsr   r10
+       li      r9, 1
+       rldimi  r10, r9, MSR_TM_LG, 63-MSR_TM_LG
+       mtmsrd  r10, 0
+
+       /* tabort, this dooms the transaction, nothing else */
+       li      r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+       TABORT(R9)
+
+       /*
+        * Return directly to userspace. We have corrupted user register state,
+        * but userspace will never see that register state. Execution will
+        * resume after the tbegin of the aborted transaction with the
+        * checkpointed register state.
+        */
+       li      r9, MSR_RI
+       andc    r10, r10, r9
+       mtmsrd  r10, 1
+       mtspr   SPRN_SRR0, r11
+       mtspr   SPRN_SRR1, r12
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
+#endif
+
+       /*
+        * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+        * touched, no exit work created, then this can be used.
+        */
+       .balign IFETCH_ALIGN_BYTES
+       .globl fast_interrupt_return_srr
+fast_interrupt_return_srr:
+_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
+       kuap_check_amr r3, r4
+       ld      r5,_MSR(r1)
+       andi.   r0,r5,MSR_PR
+#ifdef CONFIG_PPC_BOOK3S
+       beq     1f
+       kuap_user_restore r3, r4
+       b       .Lfast_user_interrupt_return_srr
+1:     kuap_kernel_restore r3, r4
+       andi.   r0,r5,MSR_RI
+       li      r3,0 /* 0 return value, no EMULATE_STACK_STORE */
+       bne+    .Lfast_kernel_interrupt_return_srr
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      unrecoverable_exception
+       b       . /* should not get here */
+#else
+       bne     .Lfast_user_interrupt_return_srr
+       b       .Lfast_kernel_interrupt_return_srr
+#endif
+
+.macro interrupt_return_macro srr
+       .balign IFETCH_ALIGN_BYTES
+       .globl interrupt_return_\srr
+interrupt_return_\srr\():
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
+       ld      r4,_MSR(r1)
+       andi.   r0,r4,MSR_PR
+       beq     interrupt_return_\srr\()_kernel
+interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      interrupt_exit_user_prepare
+       cmpdi   r3,0
+       bne-    .Lrestore_nvgprs_\srr
+.Lrestore_nvgprs_\srr\()_cont:
+       std     r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Linterrupt_return_\srr\()_user_rst_start:
+       lbz     r11,PACAIRQHAPPENED(r13)
+       andi.   r11,r11,(~PACA_IRQ_HARD_DIS)@l
+       bne-    interrupt_return_\srr\()_user_restart
+#endif
+       li      r11,IRQS_ENABLED
+       stb     r11,PACAIRQSOFTMASK(r13)
+       li      r11,0
+       stb     r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+.Lfast_user_interrupt_return_\srr\():
+#ifdef CONFIG_PPC_BOOK3S
+       .ifc \srr,srr
+       lbz     r4,PACASRR_VALID(r13)
+       .else
+       lbz     r4,PACAHSRR_VALID(r13)
+       .endif
+       cmpdi   r4,0
+       li      r4,0
+       bne     1f
+#endif
+       ld      r11,_NIP(r1)
+       ld      r12,_MSR(r1)
+       .ifc \srr,srr
+       mtspr   SPRN_SRR0,r11
+       mtspr   SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+       stb     r4,PACASRR_VALID(r13)
+#endif
+       .else
+       mtspr   SPRN_HSRR0,r11
+       mtspr   SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+       stb     r4,PACAHSRR_VALID(r13)
+#endif
+       .endif
+       DEBUG_SRR_VALID \srr
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+       lbz     r4,PACAIRQSOFTMASK(r13)
+       tdnei   r4,IRQS_ENABLED
+#endif
+
+BEGIN_FTR_SECTION
+       ld      r10,_PPR(r1)
+       mtspr   SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+BEGIN_FTR_SECTION
+       stdcx.  r0,0,r1         /* to clear the reservation */
+FTR_SECTION_ELSE
+       ldarx   r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+       ld      r3,_CCR(r1)
+       ld      r4,_LINK(r1)
+       ld      r5,_CTR(r1)
+       ld      r6,_XER(r1)
+       li      r0,0
+
+       REST_4GPRS(7, r1)
+       REST_2GPRS(11, r1)
+       REST_GPR(13, r1)
+
+       mtcr    r3
+       mtlr    r4
+       mtctr   r5
+       mtspr   SPRN_XER,r6
+
+       REST_4GPRS(2, r1)
+       REST_GPR(6, r1)
+       REST_GPR(0, r1)
+       REST_GPR(1, r1)
+       .ifc \srr,srr
+       RFI_TO_USER
+       .else
+       HRFI_TO_USER
+       .endif
+       b       .       /* prevent speculative execution */
+.Linterrupt_return_\srr\()_user_rst_end:
+
+.Lrestore_nvgprs_\srr\():
+       REST_NVGPRS(r1)
+       b       .Lrestore_nvgprs_\srr\()_cont
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_user_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
+       GET_PACA(r13)
+       ld      r1,PACA_EXIT_SAVE_R1(r13)
+       ld      r2,PACATOC(r13)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       li      r11,IRQS_ALL_DISABLED
+       stb     r11,PACAIRQSOFTMASK(r13)
+       bl      interrupt_exit_user_restart
+       std     r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+       b       .Linterrupt_return_\srr\()_user_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_user_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart)
+#endif
+
+       .balign IFETCH_ALIGN_BYTES
+interrupt_return_\srr\()_kernel:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      interrupt_exit_kernel_prepare
+
+       std     r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Linterrupt_return_\srr\()_kernel_rst_start:
+       ld      r11,SOFTE(r1)
+       cmpwi   r11,IRQS_ENABLED
+       stb     r11,PACAIRQSOFTMASK(r13)
+       bne     1f
+#ifdef CONFIG_PPC_BOOK3S
+       lbz     r11,PACAIRQHAPPENED(r13)
+       andi.   r11,r11,(~PACA_IRQ_HARD_DIS)@l
+       bne-    interrupt_return_\srr\()_kernel_restart
+#endif
+       li      r11,0
+       stb     r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+1:
+
+.Lfast_kernel_interrupt_return_\srr\():
+       cmpdi   cr1,r3,0
+#ifdef CONFIG_PPC_BOOK3S
+       .ifc \srr,srr
+       lbz     r4,PACASRR_VALID(r13)
+       .else
+       lbz     r4,PACAHSRR_VALID(r13)
+       .endif
+       cmpdi   r4,0
+       li      r4,0
+       bne     1f
+#endif
+       ld      r11,_NIP(r1)
+       ld      r12,_MSR(r1)
+       .ifc \srr,srr
+       mtspr   SPRN_SRR0,r11
+       mtspr   SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+       stb     r4,PACASRR_VALID(r13)
+#endif
+       .else
+       mtspr   SPRN_HSRR0,r11
+       mtspr   SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+       stb     r4,PACAHSRR_VALID(r13)
+#endif
+       .endif
+       DEBUG_SRR_VALID \srr
+
+BEGIN_FTR_SECTION
+       stdcx.  r0,0,r1         /* to clear the reservation */
+FTR_SECTION_ELSE
+       ldarx   r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+       ld      r3,_LINK(r1)
+       ld      r4,_CTR(r1)
+       ld      r5,_XER(r1)
+       ld      r6,_CCR(r1)
+       li      r0,0
+
+       REST_4GPRS(7, r1)
+       REST_2GPRS(11, r1)
+
+       mtlr    r3
+       mtctr   r4
+       mtspr   SPRN_XER,r5
+
+       /*
+        * Leaving a stale exception_marker on the stack can confuse
+        * the reliable stack unwinder later on. Clear it.
+        */
+       std     r0,STACK_FRAME_OVERHEAD-16(r1)
+
+       REST_4GPRS(2, r1)
+
+       bne-    cr1,1f /* emulate stack store */
+       mtcr    r6
+       REST_GPR(6, r1)
+       REST_GPR(0, r1)
+       REST_GPR(1, r1)
+       .ifc \srr,srr
+       RFI_TO_KERNEL
+       .else
+       HRFI_TO_KERNEL
+       .endif
+       b       .       /* prevent speculative execution */
+
+1:     /*
+        * Emulate stack store with update. New r1 value was already calculated
+        * and updated in our interrupt regs by emulate_loadstore, but we can't
+        * store the previous value of r1 to the stack before re-loading our
+        * registers from it, otherwise they could be clobbered.  Use
+        * PACA_EXGEN as temporary storage to hold the store data, as
+        * interrupts are disabled here so it won't be clobbered.
+        */
+       mtcr    r6
+       std     r9,PACA_EXGEN+0(r13)
+       addi    r9,r1,INT_FRAME_SIZE /* get original r1 */
+       REST_GPR(6, r1)
+       REST_GPR(0, r1)
+       REST_GPR(1, r1)
+       std     r9,0(r1) /* perform store component of stdu */
+       ld      r9,PACA_EXGEN+0(r13)
+
+       .ifc \srr,srr
+       RFI_TO_KERNEL
+       .else
+       HRFI_TO_KERNEL
+       .endif
+       b       .       /* prevent speculative execution */
+.Linterrupt_return_\srr\()_kernel_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_kernel_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
+       GET_PACA(r13)
+       ld      r1,PACA_EXIT_SAVE_R1(r13)
+       ld      r2,PACATOC(r13)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       li      r11,IRQS_ALL_DISABLED
+       stb     r11,PACAIRQSOFTMASK(r13)
+       bl      interrupt_exit_kernel_restart
+       std     r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+       b       .Linterrupt_return_\srr\()_kernel_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart)
+#endif
+
+.endm
+
+interrupt_return_macro srr
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_macro hsrr
+
+       .globl __end_soft_masked
+__end_soft_masked:
+DEFINE_FIXED_SYMBOL(__end_soft_masked)
+#endif /* CONFIG_PPC_BOOK3S */
+
+#ifdef CONFIG_PPC_BOOK3S
+_GLOBAL(ret_from_fork_scv)
+       bl      schedule_tail
+       REST_NVGPRS(r1)
+       li      r3,0    /* fork() return value */
+       b       .Lsyscall_vectored_common_exit
+#endif
+
+_GLOBAL(ret_from_fork)
+       bl      schedule_tail
+       REST_NVGPRS(r1)
+       li      r3,0    /* fork() return value */
+       b       .Lsyscall_exit
+
+_GLOBAL(ret_from_kernel_thread)
+       bl      schedule_tail
+       REST_NVGPRS(r1)
+       mtctr   r14
+       mr      r3,r15
+#ifdef PPC64_ELF_ABI_v2
+       mr      r12,r14
+#endif
+       bctrl
+       li      r3,0
+       b       .Lsyscall_exit
index 72cb45393ef29568dee0bb339842b2c7dce2b52d..91e63eac4e8fac47e6cf6cad85e93c9a53d016d6 100644 (file)
@@ -121,6 +121,7 @@ void replay_soft_interrupts(void)
 
        ppc_save_regs(&regs);
        regs.softe = IRQS_ENABLED;
+       regs.msr |= MSR_EE;
 
 again:
        if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
@@ -217,6 +218,100 @@ static inline void replay_soft_interrupts_irqrestore(void)
 #define replay_soft_interrupts_irqrestore() replay_soft_interrupts()
 #endif
 
+#ifdef CONFIG_CC_HAS_ASM_GOTO
+notrace void arch_local_irq_restore(unsigned long mask)
+{
+       unsigned char irq_happened;
+
+       /* Write the new soft-enabled value if it is a disable */
+       if (mask) {
+               irq_soft_mask_set(mask);
+               return;
+       }
+
+       /*
+        * After the stb, interrupts are unmasked and there are no interrupts
+        * pending replay. The restart sequence makes this atomic with
+        * respect to soft-masked interrupts. If this was just a simple code
+        * sequence, a soft-masked interrupt could become pending right after
+        * the comparison and before the stb.
+        *
+        * This allows interrupts to be unmasked without hard disabling, and
+        * also without new hard interrupts coming in ahead of pending ones.
+        */
+       asm_volatile_goto(
+"1:                                    \n"
+"              lbz     9,%0(13)        \n"
+"              cmpwi   9,0             \n"
+"              bne     %l[happened]    \n"
+"              stb     9,%1(13)        \n"
+"2:                                    \n"
+               RESTART_TABLE(1b, 2b, 1b)
+       : : "i" (offsetof(struct paca_struct, irq_happened)),
+           "i" (offsetof(struct paca_struct, irq_soft_mask))
+       : "cr0", "r9"
+       : happened);
+
+       if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+               WARN_ON_ONCE(!(mfmsr() & MSR_EE));
+
+       return;
+
+happened:
+       irq_happened = get_irq_happened();
+       if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+               WARN_ON_ONCE(!irq_happened);
+
+       if (irq_happened == PACA_IRQ_HARD_DIS) {
+               if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+                       WARN_ON_ONCE(mfmsr() & MSR_EE);
+               irq_soft_mask_set(IRQS_ENABLED);
+               local_paca->irq_happened = 0;
+               __hard_irq_enable();
+               return;
+       }
+
+       /* Have interrupts to replay, need to hard disable first */
+       if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
+               if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+                       if (!(mfmsr() & MSR_EE)) {
+                               /*
+                                * An interrupt could have come in and cleared
+                                * MSR[EE] and set IRQ_HARD_DIS, so check
+                                * IRQ_HARD_DIS again and warn if it is still
+                                * clear.
+                                */
+                               irq_happened = get_irq_happened();
+                               WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS));
+                       }
+               }
+               __hard_irq_disable();
+               local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+       } else {
+               if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+                       if (WARN_ON_ONCE(mfmsr() & MSR_EE))
+                               __hard_irq_disable();
+               }
+       }
+
+       /*
+        * Disable preempt here, so that the below preempt_enable will
+        * perform resched if required (a replayed interrupt may set
+        * need_resched).
+        */
+       preempt_disable();
+       irq_soft_mask_set(IRQS_ALL_DISABLED);
+       trace_hardirqs_off();
+
+       replay_soft_interrupts_irqrestore();
+       local_paca->irq_happened = 0;
+
+       trace_hardirqs_on();
+       irq_soft_mask_set(IRQS_ENABLED);
+       __hard_irq_enable();
+       preempt_enable();
+}
+#else
 notrace void arch_local_irq_restore(unsigned long mask)
 {
        unsigned char irq_happened;
@@ -288,6 +383,7 @@ notrace void arch_local_irq_restore(unsigned long mask)
        __hard_irq_enable();
        preempt_enable();
 }
+#endif
 EXPORT_SYMBOL(arch_local_irq_restore);
 
 /*
index ce87dc5ea23cfae216f0297a5f56160753509be0..5277cf582c1611010c50fd56a9396f7d11d8d026 100644 (file)
 void arch_jump_label_transform(struct jump_entry *entry,
                               enum jump_label_type type)
 {
-       struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry);
+       u32 *addr = (u32 *)jump_entry_code(entry);
 
        if (type == JUMP_LABEL_JMP)
                patch_branch(addr, jump_entry_target(entry), 0);
        else
-               patch_instruction(addr, ppc_inst(PPC_INST_NOP));
+               patch_instruction(addr, ppc_inst(PPC_RAW_NOP()));
 }
index 7dd2ad3603ad263893662f4d37abd7d8342ffc90..bdee7262c080acb0ce7bbe15a42e6f1022db7c5b 100644 (file)
@@ -147,7 +147,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
                return 0;
 
        if (*(u32 *)regs->nip == BREAK_INSTR)
-               regs->nip += BREAK_INSTR_SIZE;
+               regs_add_return_ip(regs, BREAK_INSTR_SIZE);
 
        return 1;
 }
@@ -372,7 +372,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
 
 void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
 {
-       regs->nip = pc;
+       regs_set_return_ip(regs, pc);
 }
 
 /*
@@ -394,7 +394,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
        case 'c':
                /* handle the optional parameter */
                if (kgdb_hex2long(&ptr, &addr))
-                       linux_regs->nip = addr;
+                       regs_set_return_ip(linux_regs, addr);
 
                atomic_set(&kgdb_cpu_doing_single_step, -1);
                /* set the trace bit if we're stepping */
@@ -402,9 +402,9 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
                        mtspr(SPRN_DBCR0,
                              mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
-                       linux_regs->msr |= MSR_DE;
+                       regs_set_return_msr(linux_regs, linux_regs->msr | MSR_DE);
 #else
-                       linux_regs->msr |= MSR_SE;
+                       regs_set_return_msr(linux_regs, linux_regs->msr | MSR_SE);
 #endif
                        atomic_set(&kgdb_cpu_doing_single_step,
                                   raw_smp_processor_id());
@@ -417,11 +417,10 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
 
 int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
+       u32 instr, *addr = (u32 *)bpt->bpt_addr;
        int err;
-       unsigned int instr;
-       struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr;
 
-       err = get_kernel_nofault(instr, (unsigned *) addr);
+       err = get_kernel_nofault(instr, addr);
        if (err)
                return err;
 
@@ -429,7 +428,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
        if (err)
                return -EFAULT;
 
-       *(unsigned int *)bpt->saved_instr = instr;
+       *(u32 *)bpt->saved_instr = instr;
 
        return 0;
 }
@@ -438,7 +437,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 {
        int err;
        unsigned int instr = *(unsigned int *)bpt->saved_instr;
-       struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr;
+       u32 *addr = (u32 *)bpt->bpt_addr;
 
        err = patch_instruction(addr, ppc_inst(instr));
        if (err)
index 660138f6c4b247d5f7220b2023b39d5f8994a8cb..7154d58338cc29314b13afe0c14d3cb5e0ff9e40 100644 (file)
@@ -39,7 +39,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
                 * On powerpc, NIP is *before* this instruction for the
                 * pre handler
                 */
-               regs->nip -= MCOUNT_INSN_SIZE;
+               regs_add_return_ip(regs, -MCOUNT_INSN_SIZE);
 
                __this_cpu_write(current_kprobe, p);
                kcb->kprobe_status = KPROBE_HIT_ACTIVE;
@@ -48,7 +48,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
                         * Emulate singlestep (and also recover regs->nip)
                         * as if there is a nop
                         */
-                       regs->nip += MCOUNT_INSN_SIZE;
+                       regs_add_return_ip(regs, MCOUNT_INSN_SIZE);
                        if (unlikely(p->post_handler)) {
                                kcb->kprobe_status = KPROBE_HIT_SSDONE;
                                p->post_handler(p, regs, 0);
index c64a5feaebbead7b132ea4077eacd46930ab10e7..cbc28d1a2e1b1a767955ee3f0f77196aa50bee73 100644 (file)
 #include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/slab.h>
+#include <linux/moduleloader.h>
 #include <asm/code-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/sstep.h>
 #include <asm/sections.h>
 #include <asm/inst.h>
+#include <asm/set_memory.h>
 #include <linux/uaccess.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -103,28 +105,42 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
        return addr;
 }
 
+void *alloc_insn_page(void)
+{
+       void *page;
+
+       page = module_alloc(PAGE_SIZE);
+       if (!page)
+               return NULL;
+
+       if (strict_module_rwx_enabled()) {
+               set_memory_ro((unsigned long)page, 1);
+               set_memory_x((unsigned long)page, 1);
+       }
+       return page;
+}
+
 int arch_prepare_kprobe(struct kprobe *p)
 {
        int ret = 0;
        struct kprobe *prev;
-       struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr);
+       struct ppc_inst insn = ppc_inst_read(p->addr);
 
        if ((unsigned long)p->addr & 0x03) {
                printk("Attempt to register kprobe at an unaligned address\n");
                ret = -EINVAL;
-       } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
-               printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
+       } else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
+               printk("Cannot register a kprobe on mtmsr[d]/rfi[d]\n");
                ret = -EINVAL;
        } else if ((unsigned long)p->addr & ~PAGE_MASK &&
-                  ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) {
+                  ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) {
                printk("Cannot register a kprobe on the second word of prefixed instruction\n");
                ret = -EINVAL;
        }
        preempt_disable();
        prev = get_kprobe(p->addr - 1);
        preempt_enable_no_resched();
-       if (prev &&
-           ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)prev->ainsn.insn))) {
+       if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) {
                printk("Cannot register a kprobe on the second word of prefixed instruction\n");
                ret = -EINVAL;
        }
@@ -138,7 +154,7 @@ int arch_prepare_kprobe(struct kprobe *p)
        }
 
        if (!ret) {
-               patch_instruction((struct ppc_inst *)p->ainsn.insn, insn);
+               patch_instruction(p->ainsn.insn, insn);
                p->opcode = ppc_inst_val(insn);
        }
 
@@ -149,13 +165,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe);
 
 void arch_arm_kprobe(struct kprobe *p)
 {
-       patch_instruction((struct ppc_inst *)p->addr, ppc_inst(BREAKPOINT_INSTRUCTION));
+       WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)));
 }
 NOKPROBE_SYMBOL(arch_arm_kprobe);
 
 void arch_disarm_kprobe(struct kprobe *p)
 {
-       patch_instruction((struct ppc_inst *)p->addr, ppc_inst(p->opcode));
+       WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(p->opcode)));
 }
 NOKPROBE_SYMBOL(arch_disarm_kprobe);
 
@@ -178,7 +194,7 @@ static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs
         * variant as values in regs could play a part in
         * if the trap is taken or not
         */
-       regs->nip = (unsigned long)p->ainsn.insn;
+       regs_set_return_ip(regs, (unsigned long)p->ainsn.insn);
 }
 
 static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
@@ -228,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
 {
        int ret;
-       struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
+       struct ppc_inst insn = ppc_inst_read(p->ainsn.insn);
 
        /* regs->nip is also adjusted if emulate_step returns 1 */
        ret = emulate_step(regs, insn);
@@ -319,8 +335,9 @@ int kprobe_handler(struct pt_regs *regs)
                kprobe_opcode_t insn = *p->ainsn.insn;
                if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) {
                        /* Turn off 'trace' bits */
-                       regs->msr &= ~MSR_SINGLESTEP;
-                       regs->msr |= kcb->kprobe_saved_msr;
+                       regs_set_return_msr(regs,
+                               (regs->msr & ~MSR_SINGLESTEP) |
+                               kcb->kprobe_saved_msr);
                        goto no_kprobe;
                }
 
@@ -415,7 +432,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         * we end up emulating it in kprobe_handler(), which increments the nip
         * again.
         */
-       regs->nip = orig_ret_address - 4;
+       regs_set_return_ip(regs, orig_ret_address - 4);
        regs->link = orig_ret_address;
 
        return 0;
@@ -439,7 +456,7 @@ int kprobe_post_handler(struct pt_regs *regs)
        if (!cur || user_mode(regs))
                return 0;
 
-       len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn));
+       len = ppc_inst_len(ppc_inst_read(cur->ainsn.insn));
        /* make sure we got here for instruction we have a kprobe on */
        if (((unsigned long)cur->ainsn.insn + len) != regs->nip)
                return 0;
@@ -450,8 +467,8 @@ int kprobe_post_handler(struct pt_regs *regs)
        }
 
        /* Adjust nip to after the single-stepped instruction */
-       regs->nip = (unsigned long)cur->addr + len;
-       regs->msr |= kcb->kprobe_saved_msr;
+       regs_set_return_ip(regs, (unsigned long)cur->addr + len);
+       regs_set_return_msr(regs, regs->msr | kcb->kprobe_saved_msr);
 
        /*Restore back the original saved kprobes variables and continue. */
        if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -490,9 +507,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
                 * and allow the page fault handler to continue as a
                 * normal page fault.
                 */
-               regs->nip = (unsigned long)cur->addr;
-               regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */
-               regs->msr |= kcb->kprobe_saved_msr;
+               regs_set_return_ip(regs, (unsigned long)cur->addr);
+               /* Turn off 'trace' bits */
+               regs_set_return_msr(regs,
+                       (regs->msr & ~MSR_SINGLESTEP) |
+                       kcb->kprobe_saved_msr);
                if (kcb->kprobe_status == KPROBE_REENTER)
                        restore_previous_kprobe(kcb);
                else
@@ -506,7 +525,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
                 * zero, try to fix up.
                 */
                if ((entry = search_exception_tables(regs->nip)) != NULL) {
-                       regs->nip = extable_fixup(entry);
+                       regs_set_return_ip(regs, extable_fixup(entry));
                        return 1;
                }
 
index 15e7b4900689af54b61000b4f405ba26d3cc849f..47a683cd00d246e6b8a5fe2f648802acd21cd8e4 100644 (file)
@@ -274,7 +274,7 @@ void mce_common_process_ue(struct pt_regs *regs,
        entry = search_kernel_exception_table(regs->nip);
        if (entry) {
                mce_err->ignore_event = true;
-               regs->nip = extable_fixup(entry);
+               regs_set_return_ip(regs, extable_fixup(entry));
        }
 }
 
index 667104d4c455042eeb698c8b8c9627df0d0bd887..c2f55fe7092d2b2bf4f42bf8fdb8ca2a808a4439 100644 (file)
@@ -463,7 +463,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
        pfn = addr_to_pfn(regs, regs->nip);
        if (pfn != ULONG_MAX) {
                instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
-               instr = ppc_inst_read((struct ppc_inst *)instr_addr);
+               instr = ppc_inst_read((u32 *)instr_addr);
                if (!analyse_instr(&op, &tmp, instr)) {
                        pfn = addr_to_pfn(regs, op.ea);
                        *addr = op.ea;
@@ -481,12 +481,11 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
        return -1;
 }
 
-static int mce_handle_ierror(struct pt_regs *regs,
+static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
                const struct mce_ierror_table table[],
                struct mce_error_info *mce_err, uint64_t *addr,
                uint64_t *phys_addr)
 {
-       uint64_t srr1 = regs->msr;
        int handled = 0;
        int i;
 
@@ -695,19 +694,19 @@ static long mce_handle_ue_error(struct pt_regs *regs,
 }
 
 static long mce_handle_error(struct pt_regs *regs,
+               unsigned long srr1,
                const struct mce_derror_table dtable[],
                const struct mce_ierror_table itable[])
 {
        struct mce_error_info mce_err = { 0 };
        uint64_t addr, phys_addr = ULONG_MAX;
-       uint64_t srr1 = regs->msr;
        long handled;
 
        if (SRR1_MC_LOADSTORE(srr1))
                handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
                                &phys_addr);
        else
-               handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
+               handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr,
                                &phys_addr);
 
        if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
@@ -723,16 +722,20 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
        /* P7 DD1 leaves top bits of DSISR undefined */
        regs->dsisr &= 0x0000ffff;
 
-       return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
+       return mce_handle_error(regs, regs->msr,
+                       mce_p7_derror_table, mce_p7_ierror_table);
 }
 
 long __machine_check_early_realmode_p8(struct pt_regs *regs)
 {
-       return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
+       return mce_handle_error(regs, regs->msr,
+                       mce_p8_derror_table, mce_p8_ierror_table);
 }
 
 long __machine_check_early_realmode_p9(struct pt_regs *regs)
 {
+       unsigned long srr1 = regs->msr;
+
        /*
         * On POWER9 DD2.1 and below, it's possible to get a machine check
         * caused by a paste instruction where only DSISR bit 25 is set. This
@@ -746,10 +749,39 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs)
        if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
                return 1;
 
-       return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
+       /*
+        * Async machine check due to bad real address from store or foreign
+        * link time out comes with the load/store bit (PPC bit 42) set in
+        * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're
+        * directed to the ierror table so it will find the cause (which
+        * describes it correctly as a store error).
+        */
+       if (SRR1_MC_LOADSTORE(srr1) &&
+                       ((srr1 & 0x081c0000) == 0x08140000 ||
+                        (srr1 & 0x081c0000) == 0x08180000)) {
+               srr1 &= ~PPC_BIT(42);
+       }
+
+       return mce_handle_error(regs, srr1,
+                       mce_p9_derror_table, mce_p9_ierror_table);
 }
 
 long __machine_check_early_realmode_p10(struct pt_regs *regs)
 {
-       return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table);
+       unsigned long srr1 = regs->msr;
+
+       /*
+        * Async machine check due to bad real address from store comes with
+        * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in
+        * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table
+        * so it will find the cause (which describes it correctly as a store
+        * error).
+        */
+       if (SRR1_MC_LOADSTORE(srr1) &&
+                       (srr1 & 0x081c0000) == 0x08140000) {
+               srr1 &= ~PPC_BIT(42);
+       }
+
+       return mce_handle_error(regs, srr1,
+                       mce_p10_derror_table, mce_p10_ierror_table);
 }
index 6a076bef29321527314b4333e2fcd47887dcdd0d..39ab1541959274091de14f15c276d5a71b7be018 100644 (file)
@@ -388,9 +388,3 @@ _GLOBAL(start_secondary_resume)
        bl      start_secondary
        b       .
 #endif /* CONFIG_SMP */
-       
-/*
- * This routine is just here to keep GCC happy - sigh...
- */
-_GLOBAL(__main)
-       blr
index 3f35c8d20be75ffeea1bda50b20f72c0f29034ca..ed04a3ba66fe8b33184be16357bb6e8d7188e161 100644 (file)
@@ -92,12 +92,14 @@ int module_finalize(const Elf_Ehdr *hdr,
 static __always_inline void *
 __module_alloc(unsigned long size, unsigned long start, unsigned long end)
 {
+       pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+
        /*
         * Don't do huge page allocations for modules yet until more testing
         * is done. STRICT_MODULE_RWX may require extra work to support this
         * too.
         */
-       return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC,
+       return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot,
                                    VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
                                    NUMA_NO_NODE, __builtin_return_address(0));
 }
index c27b8687b82aeb07557a11fa50643f50f7d1473a..f417afc08d33bc406b2e8d6f1a13fbc7fde57002 100644 (file)
@@ -145,10 +145,9 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
 
 static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
 {
-       if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val)))
+       if (entry->jump[0] != PPC_RAW_LIS(_R12, PPC_HA(val)))
                return 0;
-       if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) |
-                              PPC_LO(val)))
+       if (entry->jump[1] != PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)))
                return 0;
        return 1;
 }
@@ -175,16 +174,10 @@ static uint32_t do_plt_call(void *location,
                entry++;
        }
 
-       /*
-        * lis r12, sym@ha
-        * addi r12, r12, sym@l
-        * mtctr r12
-        * bctr
-        */
-       entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val);
-       entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val);
-       entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12);
-       entry->jump[3] = PPC_INST_BCTR;
+       entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val));
+       entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val));
+       entry->jump[2] = PPC_RAW_MTCTR(_R12);
+       entry->jump[3] = PPC_RAW_BCTR();
 
        pr_debug("Initialized plt for 0x%x at %p\n", val, entry);
        return (uint32_t)entry;
index ae2b188365b1e18b6342b9f53a63d7dc70476181..6baa676e7cb60219795d6b40a99b27a674357723 100644 (file)
@@ -122,27 +122,19 @@ struct ppc64_stub_entry
  * the stub, but it's significantly shorter to put these values at the
  * end of the stub code, and patch the stub address (32-bits relative
  * to the TOC ptr, r2) into the stub.
- *
- * addis   r11,r2, <high>
- * addi    r11,r11, <low>
- * std     r2,R2_STACK_OFFSET(r1)
- * ld      r12,32(r11)
- * ld      r2,40(r11)
- * mtctr   r12
- * bctr
  */
 static u32 ppc64_stub_insns[] = {
-       PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2),
-       PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11),
+       PPC_RAW_ADDIS(_R11, _R2, 0),
+       PPC_RAW_ADDI(_R11, _R11, 0),
        /* Save current r2 value in magic place on the stack. */
-       PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET,
-       PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32,
+       PPC_RAW_STD(_R2, _R1, R2_STACK_OFFSET),
+       PPC_RAW_LD(_R12, _R11, 32),
 #ifdef PPC64_ELF_ABI_v1
        /* Set up new r2 from function descriptor */
-       PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40,
+       PPC_RAW_LD(_R2, _R11, 40),
 #endif
-       PPC_INST_MTCTR | __PPC_RS(R12),
-       PPC_INST_BCTR,
+       PPC_RAW_MTCTR(_R12),
+       PPC_RAW_BCTR(),
 };
 
 /* Count how many different 24-bit relocations (different symbol,
@@ -336,21 +328,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
 
 #ifdef CONFIG_MPROFILE_KERNEL
 
-#define PACATOC offsetof(struct paca_struct, kernel_toc)
-
-/*
- * ld      r12,PACATOC(r13)
- * addis   r12,r12,<high>
- * addi    r12,r12,<low>
- * mtctr   r12
- * bctr
- */
 static u32 stub_insns[] = {
-       PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC,
-       PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12),
-       PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12),
-       PPC_INST_MTCTR | __PPC_RS(R12),
-       PPC_INST_BCTR,
+       PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
+       PPC_RAW_ADDIS(_R12, _R12, 0),
+       PPC_RAW_ADDI(_R12, _R12, 0),
+       PPC_RAW_MTCTR(_R12),
+       PPC_RAW_BCTR(),
 };
 
 /*
@@ -507,7 +490,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me)
        if (!instr_is_relative_link_branch(ppc_inst(*prev_insn)))
                return 1;
 
-       if (*instruction != PPC_INST_NOP) {
+       if (*instruction != PPC_RAW_NOP()) {
                pr_err("%s: Expected nop after call, got %08x at %pS\n",
                        me->name, *instruction, instruction);
                return 0;
@@ -696,21 +679,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
                         *      ld r2, ...(r12)
                         *      add r2, r2, r12
                         */
-                       if ((((uint32_t *)location)[0] & ~0xfffc) !=
-                           (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12)))
+                       if ((((uint32_t *)location)[0] & ~0xfffc) != PPC_RAW_LD(_R2, _R12, 0))
                                break;
-                       if (((uint32_t *)location)[1] !=
-                           (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12)))
+                       if (((uint32_t *)location)[1] != PPC_RAW_ADD(_R2, _R2, _R12))
                                break;
                        /*
                         * If found, replace it with:
                         *      addis r2, r12, (.TOC.-func)@ha
                         *      addi  r2,  r2, (.TOC.-func)@l
                         */
-                       ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) |
-                                                   __PPC_RA(R12) | PPC_HA(value);
-                       ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) |
-                                                   __PPC_RA(R2) | PPC_LO(value);
+                       ((uint32_t *)location)[0] = PPC_RAW_ADDIS(_R2, _R12, PPC_HA(value));
+                       ((uint32_t *)location)[1] = PPC_RAW_ADDI(_R2, _R2, PPC_LO(value));
                        break;
 
                case R_PPC64_REL16_HA:
index cdf87086fa33a029767e281a809dadb6f50578c4..c79899abcec8491847bb8cd9e2e170b495697c23 100644 (file)
 #include <asm/ppc-opcode.h>
 #include <asm/inst.h>
 
-#define TMPL_CALL_HDLR_IDX     \
-       (optprobe_template_call_handler - optprobe_template_entry)
-#define TMPL_EMULATE_IDX       \
-       (optprobe_template_call_emulate - optprobe_template_entry)
-#define TMPL_RET_IDX           \
-       (optprobe_template_ret - optprobe_template_entry)
-#define TMPL_OP_IDX            \
-       (optprobe_template_op_address - optprobe_template_entry)
-#define TMPL_INSN_IDX          \
-       (optprobe_template_insn - optprobe_template_entry)
-#define TMPL_END_IDX           \
-       (optprobe_template_end - optprobe_template_entry)
-
-DEFINE_INSN_CACHE_OPS(ppc_optinsn);
+#define TMPL_CALL_HDLR_IDX     (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX       (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_IDX           (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP_IDX            (optprobe_template_op_address - optprobe_template_entry)
+#define TMPL_INSN_IDX          (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX           (optprobe_template_end - optprobe_template_entry)
 
 static bool insn_page_in_use;
 
-static void *__ppc_alloc_insn_page(void)
+void *alloc_optinsn_page(void)
 {
        if (insn_page_in_use)
                return NULL;
@@ -43,20 +35,11 @@ static void *__ppc_alloc_insn_page(void)
        return &optinsn_slot;
 }
 
-static void __ppc_free_insn_page(void *page __maybe_unused)
+void free_optinsn_page(void *page)
 {
        insn_page_in_use = false;
 }
 
-struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
-       .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
-       .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
-       /* insn_size initialized later */
-       .alloc = __ppc_alloc_insn_page,
-       .free = __ppc_free_insn_page,
-       .nr_garbage = 0,
-};
-
 /*
  * Check if we can optimize this probe. Returns NIP post-emulation if this can
  * be optimized and 0 otherwise.
@@ -66,6 +49,7 @@ static unsigned long can_optimize(struct kprobe *p)
        struct pt_regs regs;
        struct instruction_op op;
        unsigned long nip = 0;
+       unsigned long addr = (unsigned long)p->addr;
 
        /*
         * kprobe placed for kretprobe during boot time
@@ -73,7 +57,7 @@ static unsigned long can_optimize(struct kprobe *p)
         * So further checks can be skipped.
         */
        if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
-               return (unsigned long)p->addr + sizeof(kprobe_opcode_t);
+               return addr + sizeof(kprobe_opcode_t);
 
        /*
         * We only support optimizing kernel addresses, but not
@@ -81,11 +65,11 @@ static unsigned long can_optimize(struct kprobe *p)
         *
         * FIXME: Optimize kprobes placed in module addresses.
         */
-       if (!is_kernel_addr((unsigned long)p->addr))
+       if (!is_kernel_addr(addr))
                return 0;
 
        memset(&regs, 0, sizeof(struct pt_regs));
-       regs.nip = (unsigned long)p->addr;
+       regs.nip = addr;
        regs.trap = 0x0;
        regs.msr = MSR_KERNEL;
 
@@ -100,9 +84,8 @@ static unsigned long can_optimize(struct kprobe *p)
         * Ensure that the instruction is not a conditional branch,
         * and that can be emulated.
         */
-       if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) &&
-           analyse_instr(&op, &regs,
-                         ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) {
+       if (!is_conditional_branch(ppc_inst_read(p->ainsn.insn)) &&
+           analyse_instr(&op, &regs, ppc_inst_read(p->ainsn.insn)) == 1) {
                emulate_update_regs(&regs, &op);
                nip = regs.nip;
        }
@@ -123,7 +106,7 @@ static void optimized_callback(struct optimized_kprobe *op,
                kprobes_inc_nmissed_count(&op->kp);
        } else {
                __this_cpu_write(current_kprobe, &op->kp);
-               regs->nip = (unsigned long)op->kp.addr;
+               regs_set_return_ip(regs, (unsigned long)op->kp.addr);
                get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
                opt_pre_handler(&op->kp, regs);
                __this_cpu_write(current_kprobe, NULL);
@@ -136,19 +119,15 @@ NOKPROBE_SYMBOL(optimized_callback);
 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 {
        if (op->optinsn.insn) {
-               free_ppc_optinsn_slot(op->optinsn.insn, 1);
+               free_optinsn_slot(op->optinsn.insn, 1);
                op->optinsn.insn = NULL;
        }
 }
 
 static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
 {
-       patch_instruction((struct ppc_inst *)addr,
-                         ppc_inst(PPC_RAW_LIS(reg, IMM_H(val))));
-       addr++;
-
-       patch_instruction((struct ppc_inst *)addr,
-                         ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val))));
+       patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HI(val))));
+       patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
 }
 
 /*
@@ -157,34 +136,11 @@ static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *
  */
 static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr)
 {
-       /* lis reg,(op)@highest */
-       patch_instruction((struct ppc_inst *)addr,
-                         ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) |
-                                  ((val >> 48) & 0xffff)));
-       addr++;
-
-       /* ori reg,reg,(op)@higher */
-       patch_instruction((struct ppc_inst *)addr,
-                         ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) |
-                                  ___PPC_RS(reg) | ((val >> 32) & 0xffff)));
-       addr++;
-
-       /* rldicr reg,reg,32,31 */
-       patch_instruction((struct ppc_inst *)addr,
-                         ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) |
-                                  ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31)));
-       addr++;
-
-       /* oris reg,reg,(op)@h */
-       patch_instruction((struct ppc_inst *)addr,
-                         ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) |
-                                  ___PPC_RS(reg) | ((val >> 16) & 0xffff)));
-       addr++;
-
-       /* ori reg,reg,(op)@l */
-       patch_instruction((struct ppc_inst *)addr,
-                         ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) |
-                                  ___PPC_RS(reg) | (val & 0xffff)));
+       patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HIGHEST(val))));
+       patch_instruction(addr++, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_HIGHER(val))));
+       patch_instruction(addr++, ppc_inst(PPC_RAW_SLDI(reg, reg, 32)));
+       patch_instruction(addr++, ppc_inst(PPC_RAW_ORIS(reg, reg, PPC_HI(val))));
+       patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
 }
 
 static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
@@ -198,19 +154,18 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad
 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
 {
        struct ppc_inst branch_op_callback, branch_emulate_step, temp;
-       kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff;
+       unsigned long op_callback_addr, emulate_step_addr;
+       kprobe_opcode_t *buff;
        long b_offset;
        unsigned long nip, size;
        int rc, i;
 
-       kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
-
        nip = can_optimize(p);
        if (!nip)
                return -EILSEQ;
 
        /* Allocate instruction slot for detour buffer */
-       buff = get_ppc_optinsn_slot();
+       buff = get_optinsn_slot();
        if (!buff)
                return -ENOMEM;
 
@@ -228,8 +183,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
                goto error;
 
        /* Check if the return address is also within 32MB range */
-       b_offset = (unsigned long)(buff + TMPL_RET_IDX) -
-                       (unsigned long)nip;
+       b_offset = (unsigned long)(buff + TMPL_RET_IDX) - nip;
        if (!is_offset_in_branch_range(b_offset))
                goto error;
 
@@ -238,8 +192,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
        size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int);
        pr_devel("Copying template to %p, size %lu\n", buff, size);
        for (i = 0; i < size; i++) {
-               rc = patch_instruction((struct ppc_inst *)(buff + i),
-                                      ppc_inst(*(optprobe_template_entry + i)));
+               rc = patch_instruction(buff + i, ppc_inst(*(optprobe_template_entry + i)));
                if (rc < 0)
                        goto error;
        }
@@ -253,51 +206,48 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
        /*
         * 2. branch to optimized_callback() and emulate_step()
         */
-       op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback");
-       emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step");
+       op_callback_addr = ppc_kallsyms_lookup_name("optimized_callback");
+       emulate_step_addr = ppc_kallsyms_lookup_name("emulate_step");
        if (!op_callback_addr || !emulate_step_addr) {
                WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n");
                goto error;
        }
 
-       rc = create_branch(&branch_op_callback,
-                          (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX),
-                          (unsigned long)op_callback_addr,
-                          BRANCH_SET_LINK);
+       rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX,
+                          op_callback_addr, BRANCH_SET_LINK);
 
-       rc |= create_branch(&branch_emulate_step,
-                           (struct ppc_inst *)(buff + TMPL_EMULATE_IDX),
-                           (unsigned long)emulate_step_addr,
-                           BRANCH_SET_LINK);
+       rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX,
+                           emulate_step_addr, BRANCH_SET_LINK);
 
        if (rc)
                goto error;
 
-       patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX),
-                         branch_op_callback);
-       patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX),
-                         branch_emulate_step);
+       patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback);
+       patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step);
 
        /*
         * 3. load instruction to be emulated into relevant register, and
         */
-       temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
-       patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
+       if (IS_ENABLED(CONFIG_PPC64)) {
+               temp = ppc_inst_read(p->ainsn.insn);
+               patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
+       } else {
+               patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX);
+       }
 
        /*
         * 4. branch back from trampoline
         */
-       patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0);
+       patch_branch(buff + TMPL_RET_IDX, nip, 0);
 
-       flush_icache_range((unsigned long)buff,
-                          (unsigned long)(&buff[TMPL_END_IDX]));
+       flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX]));
 
        op->optinsn.insn = buff;
 
        return 0;
 
 error:
-       free_ppc_optinsn_slot(buff, 0);
+       free_optinsn_slot(buff, 0);
        return -ERANGE;
 
 }
@@ -328,12 +278,9 @@ void arch_optimize_kprobes(struct list_head *oplist)
                 * Backup instructions which will be replaced
                 * by jump address
                 */
-               memcpy(op->optinsn.copied_insn, op->kp.addr,
-                                              RELATIVEJUMP_SIZE);
-               create_branch(&instr,
-                             (struct ppc_inst *)op->kp.addr,
-                             (unsigned long)op->optinsn.insn, 0);
-               patch_instruction((struct ppc_inst *)op->kp.addr, instr);
+               memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE);
+               create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0);
+               patch_instruction(op->kp.addr, instr);
                list_del_init(&op->list);
        }
 }
@@ -343,8 +290,7 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op)
        arch_arm_kprobe(&op->kp);
 }
 
-void arch_unoptimize_kprobes(struct list_head *oplist,
-                            struct list_head *done_list)
+void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list)
 {
        struct optimized_kprobe *op;
        struct optimized_kprobe *tmp;
@@ -355,8 +301,7 @@ void arch_unoptimize_kprobes(struct list_head *oplist,
        }
 }
 
-int arch_within_optimized_kprobe(struct optimized_kprobe *op,
-                                unsigned long addr)
+int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
 {
        return ((unsigned long)op->kp.addr <= addr &&
                (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr);
index 7f5aae3c387d29e4b6862b7fd0aa16c98732ff4f..9bd30cac852bfefd551763e28bb06e9031b3a32d 100644 (file)
@@ -346,10 +346,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
 #ifdef CONFIG_PPC_BOOK3S
        mm_context_t *context = &mm->context;
 
-       get_paca()->mm_ctx_id = context->id;
 #ifdef CONFIG_PPC_MM_SLICES
        VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
-       get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
        memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
               LOW_SLICE_ARRAY_SZ);
        memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
index 8935c5696bcefcfd59355de87d454250016d0a74..185beb29058011d91b389a7490cba98d0f57d04e 100644 (file)
@@ -96,7 +96,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
        if (tsk == current && tsk->thread.regs &&
            MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
            !test_thread_flag(TIF_RESTORE_TM)) {
-               tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr;
+               regs_set_return_msr(&tsk->thread.ckpt_regs,
+                                               tsk->thread.regs->msr);
                set_thread_flag(TIF_RESTORE_TM);
        }
 }
@@ -161,7 +162,7 @@ static void __giveup_fpu(struct task_struct *tsk)
        msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
        if (cpu_has_feature(CPU_FTR_VSX))
                msr &= ~MSR_VSX;
-       tsk->thread.regs->msr = msr;
+       regs_set_return_msr(tsk->thread.regs, msr);
 }
 
 void giveup_fpu(struct task_struct *tsk)
@@ -244,7 +245,7 @@ static void __giveup_altivec(struct task_struct *tsk)
        msr &= ~MSR_VEC;
        if (cpu_has_feature(CPU_FTR_VSX))
                msr &= ~MSR_VSX;
-       tsk->thread.regs->msr = msr;
+       regs_set_return_msr(tsk->thread.regs, msr);
 }
 
 void giveup_altivec(struct task_struct *tsk)
@@ -559,7 +560,7 @@ void notrace restore_math(struct pt_regs *regs)
 
                msr_check_and_clear(new_msr);
 
-               regs->msr |= new_msr | fpexc_mode;
+               regs_set_return_msr(regs, regs->msr | new_msr | fpexc_mode);
        }
 }
 #endif /* CONFIG_PPC_BOOK3S_64 */
@@ -1114,7 +1115,7 @@ void restore_tm_state(struct pt_regs *regs)
 #endif
        restore_math(regs);
 
-       regs->msr |= msr_diff;
+       regs_set_return_msr(regs, regs->msr | msr_diff);
 }
 
 #else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -1129,6 +1130,10 @@ static inline void save_sprs(struct thread_struct *t)
        if (cpu_has_feature(CPU_FTR_ALTIVEC))
                t->vrsave = mfspr(SPRN_VRSAVE);
 #endif
+#ifdef CONFIG_SPE
+       if (cpu_has_feature(CPU_FTR_SPE))
+               t->spefscr = mfspr(SPRN_SPEFSCR);
+#endif
 #ifdef CONFIG_PPC_BOOK3S_64
        if (cpu_has_feature(CPU_FTR_DSCR))
                t->dscr = mfspr(SPRN_DSCR);
@@ -1159,6 +1164,11 @@ static inline void restore_sprs(struct thread_struct *old_thread,
            old_thread->vrsave != new_thread->vrsave)
                mtspr(SPRN_VRSAVE, new_thread->vrsave);
 #endif
+#ifdef CONFIG_SPE
+       if (cpu_has_feature(CPU_FTR_SPE) &&
+           old_thread->spefscr != new_thread->spefscr)
+               mtspr(SPRN_SPEFSCR, new_thread->spefscr);
+#endif
 #ifdef CONFIG_PPC_BOOK3S_64
        if (cpu_has_feature(CPU_FTR_DSCR)) {
                u64 dscr = get_paca()->dscr_default;
@@ -1213,6 +1223,19 @@ struct task_struct *__switch_to(struct task_struct *prev,
                        __flush_tlb_pending(batch);
                batch->active = 0;
        }
+
+       /*
+        * On POWER9 the copy-paste buffer can only paste into
+        * foreign real addresses, so unprivileged processes can not
+        * see the data or use it in any way unless they have
+        * foreign real mappings. If the new process has the foreign
+        * real address mappings, we must issue a cp_abort to clear
+        * any state and prevent snooping, corruption or a covert
+        * channel. ISA v3.1 supports paste into local memory.
+        */
+       if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) ||
+                       atomic_read(&new->mm->context.vas_windows)))
+               asm volatile(PPC_CP_ABORT);
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -1248,43 +1271,48 @@ struct task_struct *__switch_to(struct task_struct *prev,
        }
 
        /*
-        * Call restore_sprs() before calling _switch(). If we move it after
-        * _switch() then we miss out on calling it for new tasks. The reason
-        * for this is we manually create a stack frame for new tasks that
-        * directly returns through ret_from_fork() or
+        * Call restore_sprs() and set_return_regs_changed() before calling
+        * _switch(). If we move it after _switch() then we miss out on calling
+        * it for new tasks. The reason for this is we manually create a stack
+        * frame for new tasks that directly returns through ret_from_fork() or
         * ret_from_kernel_thread(). See copy_thread() for details.
         */
        restore_sprs(old_thread, new_thread);
 
+       set_return_regs_changed(); /* _switch changes stack (and regs) */
+
 #ifdef CONFIG_PPC32
        kuap_assert_locked();
 #endif
        last = _switch(old_thread, new_thread);
 
+       /*
+        * Nothing after _switch will be run for newly created tasks,
+        * because they switch directly to ret_from_fork/ret_from_kernel_thread
+        * etc. Code added here should have a comment explaining why that is
+        * okay.
+        */
+
 #ifdef CONFIG_PPC_BOOK3S_64
+       /*
+        * This applies to a process that was context switched while inside
+        * arch_enter_lazy_mmu_mode(), to re-activate the batch that was
+        * deactivated above, before _switch(). This will never be the case
+        * for new tasks.
+        */
        if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
                current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
                batch = this_cpu_ptr(&ppc64_tlb_batch);
                batch->active = 1;
        }
 
-       if (current->thread.regs) {
+       /*
+        * Math facilities are masked out of the child MSR in copy_thread.
+        * A new task does not need to restore_math because it will
+        * demand fault them.
+        */
+       if (current->thread.regs)
                restore_math(current->thread.regs);
-
-               /*
-                * On POWER9 the copy-paste buffer can only paste into
-                * foreign real addresses, so unprivileged processes can not
-                * see the data or use it in any way unless they have
-                * foreign real mappings. If the new process has the foreign
-                * real address mappings, we must issue a cp_abort to clear
-                * any state and prevent snooping, corruption or a covert
-                * channel. ISA v3.1 supports paste into local memory.
-                */
-               if (current->mm &&
-                       (cpu_has_feature(CPU_FTR_ARCH_31) ||
-                       atomic_read(&current->mm->context.vas_windows)))
-                       asm volatile(PPC_CP_ABORT);
-       }
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
        return last;
@@ -1736,6 +1764,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 #ifdef CONFIG_ALTIVEC
        p->thread.vr_save_area = NULL;
 #endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+       p->thread.kuap = KUAP_NONE;
+#endif
 
        setup_ksp_vsid(p, sp);
 
@@ -1838,13 +1869,14 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
                        }
                        regs->gpr[2] = toc;
                }
-               regs->nip = entry;
-               regs->msr = MSR_USER64;
+               regs_set_return_ip(regs, entry);
+               regs_set_return_msr(regs, MSR_USER64);
        } else {
-               regs->nip = start;
                regs->gpr[2] = 0;
-               regs->msr = MSR_USER32;
+               regs_set_return_ip(regs, start);
+               regs_set_return_msr(regs, MSR_USER32);
        }
+
 #endif
 #ifdef CONFIG_VSX
        current->thread.used_vsr = 0;
@@ -1875,7 +1907,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
        current->thread.tm_tfiar = 0;
        current->thread.load_tm = 0;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
 }
 EXPORT_SYMBOL(start_thread);
 
@@ -1923,9 +1954,10 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
        if (val > PR_FP_EXC_PRECISE)
                return -EINVAL;
        tsk->thread.fpexc_mode = __pack_fe01(val);
-       if (regs != NULL && (regs->msr & MSR_FP) != 0)
-               regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
-                       | tsk->thread.fpexc_mode;
+       if (regs != NULL && (regs->msr & MSR_FP) != 0) {
+               regs_set_return_msr(regs, (regs->msr & ~(MSR_FE0|MSR_FE1))
+                                               | tsk->thread.fpexc_mode);
+       }
        return 0;
 }
 
@@ -1971,9 +2003,9 @@ int set_endian(struct task_struct *tsk, unsigned int val)
                return -EINVAL;
 
        if (val == PR_ENDIAN_BIG)
-               regs->msr &= ~MSR_LE;
+               regs_set_return_msr(regs, regs->msr & ~MSR_LE);
        else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE)
-               regs->msr |= MSR_LE;
+               regs_set_return_msr(regs, regs->msr | MSR_LE);
        else
                return -EINVAL;
 
@@ -2121,8 +2153,9 @@ unsigned long get_wchan(struct task_struct *p)
 
 static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
 
-void show_stack(struct task_struct *tsk, unsigned long *stack,
-               const char *loglvl)
+void __no_sanitize_address show_stack(struct task_struct *tsk,
+                                     unsigned long *stack,
+                                     const char *loglvl)
 {
        unsigned long sp, ip, lr, newsp;
        int count = 0;
index fbe9deebc8e13eff1b844df914cdf9cbb54d0c2d..f620e04dc9bfea3c2160a44727f10cfac2796eb1 100644 (file)
@@ -758,7 +758,7 @@ void __init early_init_devtree(void *params)
                first_memblock_size = min_t(u64, first_memblock_size, memory_limit);
        setup_initial_memory_limit(memstart_addr, first_memblock_size);
        /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
-       memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
+       memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START);
        /* If relocatable, reserve first 32k for interrupt vectors etc. */
        if (PHYSICAL_START > MEMORY_START)
                memblock_reserve(MEMORY_START, 0x8000);
index 41ed7e33d8973c55f6294e725f9b40f439a826a3..a5bf355ce1d632f032b855ff4089365d1d5b3ee1 100644 (file)
 #include <linux/initrd.h>
 #include <linux/bitops.h>
 #include <linux/pgtable.h>
+#include <linux/printk.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/page.h>
 #include <asm/processor.h>
+#include <asm/interrupt.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -242,13 +244,31 @@ static int __init prom_strcmp(const char *cs, const char *ct)
        return 0;
 }
 
-static char __init *prom_strcpy(char *dest, const char *src)
+static ssize_t __init prom_strscpy_pad(char *dest, const char *src, size_t n)
 {
-       char *tmp = dest;
+       ssize_t rc;
+       size_t i;
 
-       while ((*dest++ = *src++) != '\0')
-               /* nothing */;
-       return tmp;
+       if (n == 0 || n > INT_MAX)
+               return -E2BIG;
+
+       // Copy up to n bytes
+       for (i = 0; i < n && src[i] != '\0'; i++)
+               dest[i] = src[i];
+
+       rc = i;
+
+       // If we copied all n then we have run out of space for the nul
+       if (rc == n) {
+               // Rewind by one character to ensure nul termination
+               i--;
+               rc = -E2BIG;
+       }
+
+       for (; i < n; i++)
+               dest[i] = '\0';
+
+       return rc;
 }
 
 static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
@@ -701,13 +721,12 @@ static int __init prom_setprop(phandle node, const char *nodename,
 }
 
 /* We can't use the standard versions because of relocation headaches. */
-#define isxdigit(c)    (('0' <= (c) && (c) <= '9') \
-                        || ('a' <= (c) && (c) <= 'f') \
-                        || ('A' <= (c) && (c) <= 'F'))
+#define prom_isxdigit(c) \
+       (('0' <= (c) && (c) <= '9') || ('a' <= (c) && (c) <= 'f') || ('A' <= (c) && (c) <= 'F'))
 
-#define isdigit(c)     ('0' <= (c) && (c) <= '9')
-#define islower(c)     ('a' <= (c) && (c) <= 'z')
-#define toupper(c)     (islower(c) ? ((c) - 'a' + 'A') : (c))
+#define prom_isdigit(c)        ('0' <= (c) && (c) <= '9')
+#define prom_islower(c)        ('a' <= (c) && (c) <= 'z')
+#define prom_toupper(c)        (prom_islower(c) ? ((c) - 'a' + 'A') : (c))
 
 static unsigned long prom_strtoul(const char *cp, const char **endp)
 {
@@ -716,14 +735,14 @@ static unsigned long prom_strtoul(const char *cp, const char **endp)
        if (*cp == '0') {
                base = 8;
                cp++;
-               if (toupper(*cp) == 'X') {
+               if (prom_toupper(*cp) == 'X') {
                        cp++;
                        base = 16;
                }
        }
 
-       while (isxdigit(*cp) &&
-              (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) {
+       while (prom_isxdigit(*cp) &&
+              (value = prom_isdigit(*cp) ? *cp - '0' : prom_toupper(*cp) - 'A' + 10) < base) {
                result = result * base + value;
                cp++;
        }
@@ -927,6 +946,10 @@ struct option_vector6 {
        u8 os_name;
 } __packed;
 
+struct option_vector7 {
+       u8 os_id[256];
+} __packed;
+
 struct ibm_arch_vec {
        struct { u32 mask, val; } pvrs[14];
 
@@ -949,6 +972,9 @@ struct ibm_arch_vec {
 
        u8 vec6_len;
        struct option_vector6 vec6;
+
+       u8 vec7_len;
+       struct option_vector7 vec7;
 } __packed;
 
 static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
@@ -1095,6 +1121,9 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
                .secondary_pteg = 0,
                .os_name = OV6_LINUX,
        },
+
+       /* option vector 7: OS Identification */
+       .vec7_len = VECTOR_LENGTH(sizeof(struct option_vector7)),
 };
 
 static struct ibm_arch_vec __prombss ibm_architecture_vec  ____cacheline_aligned;
@@ -1323,6 +1352,8 @@ static void __init prom_check_platform_support(void)
        memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
               sizeof(ibm_architecture_vec));
 
+       prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256);
+
        if (prop_len > 1) {
                int i;
                u8 vec[8];
@@ -1762,6 +1793,8 @@ static int prom_rtas_hcall(uint64_t args)
        asm volatile("sc 1\n" : "=r" (arg1) :
                        "r" (arg1),
                        "r" (arg2) :);
+       srr_regs_clobbered();
+
        return arg1;
 }
 
@@ -2702,7 +2735,7 @@ static void __init flatten_device_tree(void)
 
        /* Add "phandle" in there, we'll need it */
        namep = make_room(&mem_start, &mem_end, 16, 1);
-       prom_strcpy(namep, "phandle");
+       prom_strscpy_pad(namep, "phandle", sizeof("phandle"));
        mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
 
        /* Build string array */
@@ -3210,54 +3243,6 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
 #endif /* CONFIG_BLK_DEV_INITRD */
 }
 
-#ifdef CONFIG_PPC64
-#ifdef CONFIG_RELOCATABLE
-static void reloc_toc(void)
-{
-}
-
-static void unreloc_toc(void)
-{
-}
-#else
-static void __reloc_toc(unsigned long offset, unsigned long nr_entries)
-{
-       unsigned long i;
-       unsigned long *toc_entry;
-
-       /* Get the start of the TOC by using r2 directly. */
-       asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry));
-
-       for (i = 0; i < nr_entries; i++) {
-               *toc_entry = *toc_entry + offset;
-               toc_entry++;
-       }
-}
-
-static void reloc_toc(void)
-{
-       unsigned long offset = reloc_offset();
-       unsigned long nr_entries =
-               (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long);
-
-       __reloc_toc(offset, nr_entries);
-
-       mb();
-}
-
-static void unreloc_toc(void)
-{
-       unsigned long offset = reloc_offset();
-       unsigned long nr_entries =
-               (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long);
-
-       mb();
-
-       __reloc_toc(-offset, nr_entries);
-}
-#endif
-#endif
-
 #ifdef CONFIG_PPC_SVM
 /*
  * Perform the Enter Secure Mode ultracall.
@@ -3291,14 +3276,12 @@ static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
         * relocated it so the check will fail. Restore the original image by
         * relocating it back to the kernel virtual base address.
         */
-       if (IS_ENABLED(CONFIG_RELOCATABLE))
-               relocate(KERNELBASE);
+       relocate(KERNELBASE);
 
        ret = enter_secure_mode(kbase, fdt);
 
        /* Relocate the kernel again. */
-       if (IS_ENABLED(CONFIG_RELOCATABLE))
-               relocate(kbase);
+       relocate(kbase);
 
        if (ret != U_SUCCESS) {
                prom_printf("Returned %d from switching to secure mode.\n", ret);
@@ -3326,8 +3309,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 #ifdef CONFIG_PPC32
        unsigned long offset = reloc_offset();
        reloc_got2(offset);
-#else
-       reloc_toc();
 #endif
 
        /*
@@ -3504,8 +3485,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 
 #ifdef CONFIG_PPC32
        reloc_got2(-offset);
-#else
-       unreloc_toc();
 #endif
 
        /* Move to secure memory if we're supposed to be secure guests. */
index 3990c01ef8cfa9303dd7513c52389635b148292c..399f5d94a3dfd234b2f7c99dda6c65d5e10656a4 100644 (file)
@@ -12,7 +12,7 @@ void user_enable_single_step(struct task_struct *task)
        if (regs != NULL) {
                task->thread.debug.dbcr0 &= ~DBCR0_BT;
                task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
-               regs->msr |= MSR_DE;
+               regs_set_return_msr(regs, regs->msr | MSR_DE);
        }
        set_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
@@ -24,7 +24,7 @@ void user_enable_block_step(struct task_struct *task)
        if (regs != NULL) {
                task->thread.debug.dbcr0 &= ~DBCR0_IC;
                task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
-               regs->msr |= MSR_DE;
+               regs_set_return_msr(regs, regs->msr | MSR_DE);
        }
        set_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
@@ -50,7 +50,7 @@ void user_disable_single_step(struct task_struct *task)
                         * All debug events were off.....
                         */
                        task->thread.debug.dbcr0 &= ~DBCR0_IDM;
-                       regs->msr &= ~MSR_DE;
+                       regs_set_return_msr(regs, regs->msr & ~MSR_DE);
                }
        }
        clear_tsk_thread_flag(task, TIF_SINGLESTEP);
@@ -82,6 +82,7 @@ int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
 
 int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
 {
+       struct pt_regs *regs = task->thread.regs;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        int ret;
        struct thread_struct *thread = &task->thread;
@@ -112,7 +113,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l
                dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
                if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
                                        task->thread.debug.dbcr1)) {
-                       task->thread.regs->msr &= ~MSR_DE;
+                       regs_set_return_msr(regs, regs->msr & ~MSR_DE);
                        task->thread.debug.dbcr0 &= ~DBCR0_IDM;
                }
                return 0;
@@ -132,7 +133,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l
                dbcr_dac(task) |= DBCR_DAC1R;
        if (data & 0x2UL)
                dbcr_dac(task) |= DBCR_DAC1W;
-       task->thread.regs->msr |= MSR_DE;
+       regs_set_return_msr(regs, regs->msr | MSR_DE);
        return 0;
 }
 
@@ -220,7 +221,7 @@ static long set_instruction_bp(struct task_struct *child,
        }
 out:
        child->thread.debug.dbcr0 |= DBCR0_IDM;
-       child->thread.regs->msr |= MSR_DE;
+       regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
 
        return slot;
 }
@@ -336,7 +337,7 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
                return -ENOSPC;
        }
        child->thread.debug.dbcr0 |= DBCR0_IDM;
-       child->thread.regs->msr |= MSR_DE;
+       regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
 
        return slot + 4;
 }
@@ -430,7 +431,7 @@ static int set_dac_range(struct task_struct *child,
                child->thread.debug.dbcr2  |= DBCR2_DAC12MX;
        else    /* PPC_BREAKPOINT_MODE_MASK */
                child->thread.debug.dbcr2  |= DBCR2_DAC12MM;
-       child->thread.regs->msr |= MSR_DE;
+       regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
 
        return 5;
 }
@@ -485,7 +486,8 @@ long ppc_del_hwdebug(struct task_struct *child, long data)
                if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
                                        child->thread.debug.dbcr1)) {
                        child->thread.debug.dbcr0 &= ~DBCR0_IDM;
-                       child->thread.regs->msr &= ~MSR_DE;
+                       regs_set_return_msr(child->thread.regs,
+                                       child->thread.regs->msr & ~MSR_DE);
                }
        }
        return rc;
index aa36fcad36cd609d8efe04c47db3ec603d98f645..a5dd7d2e2c9eb508dafa0fc44a145d35ce21dbce 100644 (file)
@@ -11,10 +11,8 @@ void user_enable_single_step(struct task_struct *task)
 {
        struct pt_regs *regs = task->thread.regs;
 
-       if (regs != NULL) {
-               regs->msr &= ~MSR_BE;
-               regs->msr |= MSR_SE;
-       }
+       if (regs != NULL)
+               regs_set_return_msr(regs, (regs->msr & ~MSR_BE) | MSR_SE);
        set_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
 
@@ -22,10 +20,8 @@ void user_enable_block_step(struct task_struct *task)
 {
        struct pt_regs *regs = task->thread.regs;
 
-       if (regs != NULL) {
-               regs->msr &= ~MSR_SE;
-               regs->msr |= MSR_BE;
-       }
+       if (regs != NULL)
+               regs_set_return_msr(regs, (regs->msr & ~MSR_SE) | MSR_BE);
        set_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
 
@@ -34,7 +30,7 @@ void user_disable_single_step(struct task_struct *task)
        struct pt_regs *regs = task->thread.regs;
 
        if (regs != NULL)
-               regs->msr &= ~(MSR_SE | MSR_BE);
+               regs_set_return_msr(regs, regs->msr & ~(MSR_SE | MSR_BE));
 
        clear_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
index 773bcc4ca8431b3852cdaedb75f1bb527914bb5f..b8be1d6668b59a36aba451912ceaafa823f95dc1 100644 (file)
@@ -113,8 +113,9 @@ static unsigned long get_user_msr(struct task_struct *task)
 
 static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr)
 {
-       task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
-       task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
+       unsigned long newmsr = (task->thread.regs->msr & ~MSR_DEBUGCHANGE) |
+                               (msr & MSR_DEBUGCHANGE);
+       regs_set_return_msr(task->thread.regs, newmsr);
        return 0;
 }
 
index a28239b8b0c0bc4eb11b87d502edc8b8294d724d..33c07c8af6c8b379852cc8f8784b70b5381ccfd2 100644 (file)
@@ -12,7 +12,7 @@
 
 
 #define MAX_RTC_WAIT 5000      /* 5 sec */
-#define RTAS_CLOCK_BUSY (-2)
+
 time64_t __init rtas_get_boot_time(void)
 {
        int ret[8];
index 6bada744402b1c6fc19afc93b5470ea93a515561..99f2cce635fb11c04f3a5055f8820d9d5b72f7b6 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/reboot.h>
 #include <linux/syscalls.h>
 
+#include <asm/interrupt.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/hvcall.h>
 /* This is here deliberately so it's only used in this file */
 void enter_rtas(unsigned long);
 
+static inline void do_enter_rtas(unsigned long args)
+{
+       enter_rtas(args);
+
+       srr_regs_clobbered(); /* rtas uses SRRs, invalidate */
+}
+
 struct rtas_t rtas = {
        .lock = __ARCH_SPIN_LOCK_UNLOCKED
 };
@@ -384,7 +392,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
        save_args = rtas.args;
        rtas.args = err_args;
 
-       enter_rtas(__pa(&rtas.args));
+       do_enter_rtas(__pa(&rtas.args));
 
        err_args = rtas.args;
        rtas.args = save_args;
@@ -430,7 +438,7 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
        for (i = 0; i < nret; ++i)
                args->rets[i] = 0;
 
-       enter_rtas(__pa(args));
+       do_enter_rtas(__pa(args));
 }
 
 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)
@@ -1138,7 +1146,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
        flags = lock_rtas();
 
        rtas.args = args;
-       enter_rtas(__pa(&rtas.args));
+       do_enter_rtas(__pa(&rtas.args));
        args = rtas.args;
 
        /* A -1 return code indicates that the last command couldn't
index c17d1c9362b50ead08c9178c25ff129458a3d0b3..cc51fa52e783150dabff0f634fd5ddc2162ee321 100644 (file)
@@ -300,9 +300,7 @@ static void stf_barrier_enable(bool enable)
 void setup_stf_barrier(void)
 {
        enum stf_barrier_type type;
-       bool enable, hv;
-
-       hv = cpu_has_feature(CPU_FTR_HVMODE);
+       bool enable;
 
        /* Default to fallback in case fw-features are not available */
        if (cpu_has_feature(CPU_FTR_ARCH_300))
@@ -315,8 +313,7 @@ void setup_stf_barrier(void)
                type = STF_BARRIER_NONE;
 
        enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
-               (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
-                (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
+                security_ftr_enabled(SEC_FTR_STF_BARRIER);
 
        if (type == STF_BARRIER_FALLBACK) {
                pr_info("stf-barrier: fallback barrier available\n");
@@ -439,8 +436,8 @@ static void update_branch_cache_flush(void)
        site2 = &patch__call_kvm_flush_link_stack_p9;
        // This controls the branch from guest_exit_cont to kvm_flush_link_stack
        if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
-               patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
-               patch_instruction_site(site2, ppc_inst(PPC_INST_NOP));
+               patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+               patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP()));
        } else {
                // Could use HW flush, but that could also flush count cache
                patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
@@ -450,11 +447,11 @@ static void update_branch_cache_flush(void)
 
        // Patch out the bcctr first, then nop the rest
        site = &patch__call_flush_branch_caches3;
-       patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+       patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
        site = &patch__call_flush_branch_caches2;
-       patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+       patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
        site = &patch__call_flush_branch_caches1;
-       patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+       patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
 
        // This controls the branch from _switch to flush_branch_caches
        if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE &&
@@ -477,12 +474,12 @@ static void update_branch_cache_flush(void)
                // If we just need to flush the link stack, early return
                if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) {
                        patch_instruction_site(&patch__flush_link_stack_return,
-                                              ppc_inst(PPC_INST_BLR));
+                                              ppc_inst(PPC_RAW_BLR()));
 
                // If we have flush instruction, early return
                } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) {
                        patch_instruction_site(&patch__flush_count_cache_return,
-                                              ppc_inst(PPC_INST_BLR));
+                                              ppc_inst(PPC_RAW_BLR()));
                }
        }
 }
index 046fe21b5c3b02685db5f96a2fc9087280d9f419..26328fd2990c78ad077c4d8c9219aecf33f07c60 100644 (file)
@@ -92,8 +92,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid);
 int dcache_bsize;
 int icache_bsize;
 
-unsigned long klimit = (unsigned long) _end;
-
 /*
  * This still seems to be needed... -- paulus
  */ 
@@ -931,7 +929,7 @@ void __init setup_arch(char **cmdline_p)
        init_mm.start_code = (unsigned long)_stext;
        init_mm.end_code = (unsigned long) _etext;
        init_mm.end_data = (unsigned long) _edata;
-       init_mm.brk = klimit;
+       init_mm.brk = (unsigned long)_end;
 
        mm_iommu_init(&init_mm);
        irqstack_early_init();
index d7c1f92152af6720e26d3220d0a2f2c08cdc809f..7ec5c47fce0e3030e2f0b79092b1aab391bf2d33 100644 (file)
@@ -74,7 +74,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE);
  */
 notrace void __init machine_init(u64 dt_ptr)
 {
-       struct ppc_inst *addr = (struct ppc_inst *)patch_site_addr(&patch__memset_nocache);
+       u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache);
        struct ppc_inst insn;
 
        /* Configure static keys first, now that we're relocated. */
@@ -85,7 +85,7 @@ notrace void __init machine_init(u64 dt_ptr)
        /* Enable early debugging if any specified (see udbg.h) */
        udbg_early_init();
 
-       patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP));
+       patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_RAW_NOP()));
 
        create_cond_branch(&insn, addr, branch_target(addr), 0x820000);
        patch_instruction(addr, insn);  /* replace b by bne cr0 */
index a35fbf4d0bcef9fc8efd6ae1cd824b7c951173a6..1ff258f6c76c2733ce5c7dde8f0e6868be3e6aae 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/pgtable.h>
 
 #include <asm/debugfs.h>
+#include <asm/kvm_guest.h>
 #include <asm/io.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
@@ -939,16 +940,20 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
  * disable it by default. Book3S has a soft-nmi hardlockup detector based
  * on the decrementer interrupt, so it does not suffer from this problem.
  *
- * It is likely to get false positives in VM guests, so disable it there
- * by default too.
+ * It is likely to get false positives in KVM guests, so disable it there
+ * by default too. PowerVM will not stop or arbitrarily oversubscribe
+ * CPUs, but give a minimum regular allotment even with SPLPAR, so enable
+ * the detector for non-KVM guests, assume PowerVM.
  */
 static int __init disable_hardlockup_detector(void)
 {
 #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
        hardlockup_detector_disable();
 #else
-       if (firmware_has_feature(FW_FEATURE_LPAR))
-               hardlockup_detector_disable();
+       if (firmware_has_feature(FW_FEATURE_LPAR)) {
+               if (is_kvm_guest())
+                       hardlockup_detector_disable();
+       }
 #endif
 
        return 0;
index 9ded046edb0e16aa850c423a084465b8abfa42bb..e600764a926c67b89254f6314ba91e3af69a1306 100644 (file)
@@ -214,7 +214,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
                        regs->gpr[0] = __NR_restart_syscall;
                else
                        regs->gpr[3] = regs->orig_gpr3;
-               regs->nip -= 4;
+               regs_add_return_ip(regs, -4);
                regs->result = 0;
        } else {
                if (trap_is_scv(regs)) {
@@ -322,16 +322,16 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk)
         * For signals taken in non-TM or suspended mode, we use the
         * normal/non-checkpointed stack pointer.
         */
-
-       unsigned long ret = tsk->thread.regs->gpr[1];
+       struct pt_regs *regs = tsk->thread.regs;
+       unsigned long ret = regs->gpr[1];
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        BUG_ON(tsk != current);
 
-       if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) {
+       if (MSR_TM_ACTIVE(regs->msr)) {
                preempt_disable();
                tm_reclaim_current(TM_CAUSE_SIGNAL);
-               if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr))
+               if (MSR_TM_TRANSACTIONAL(regs->msr))
                        ret = tsk->thread.ckpt_regs.gpr[1];
 
                /*
@@ -341,7 +341,7 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk)
                 * (tm_recheckpoint_new_task() would recheckpoint). Besides, we
                 * enter the signal handler in non-transactional state.
                 */
-               tsk->thread.regs->msr &= ~MSR_TS_MASK;
+               regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
                preempt_enable();
        }
 #endif
index 8f05ed0da292b09e4303919d0f51456f43da3d4a..0608581967f093187cf2970a46033ec9dedfa5f8 100644 (file)
@@ -354,14 +354,8 @@ static void prepare_save_tm_user_regs(void)
 {
        WARN_ON(tm_suspend_disabled);
 
-#ifdef CONFIG_ALTIVEC
        if (cpu_has_feature(CPU_FTR_ALTIVEC))
                current->thread.ckvrsave = mfspr(SPRN_VRSAVE);
-#endif
-#ifdef CONFIG_SPE
-       if (current->thread.used_spe)
-               flush_spe_to_thread(current);
-#endif
 }
 
 static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
@@ -379,7 +373,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
         */
        unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed);
 
-#ifdef CONFIG_ALTIVEC
        /* save altivec registers */
        if (current->thread.used_vr) {
                unsafe_copy_to_user(&frame->mc_vregs, &current->thread.ckvr_state,
@@ -412,7 +405,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
        else
                unsafe_put_user(current->thread.ckvrsave,
                                (u32 __user *)&tm_frame->mc_vregs[32], failed);
-#endif /* CONFIG_ALTIVEC */
 
        unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed);
        if (msr & MSR_FP)
@@ -420,7 +412,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
        else
                unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed);
 
-#ifdef CONFIG_VSX
        /*
         * Copy VSR 0-31 upper half from thread_struct to local
         * buffer, then write that to userspace.  Also set MSR_VSX in
@@ -436,23 +427,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
 
                msr |= MSR_VSX;
        }
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
-       /* SPE regs are not checkpointed with TM, so this section is
-        * simply the same as in __unsafe_save_user_regs().
-        */
-       if (current->thread.used_spe) {
-               unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr,
-                                   ELF_NEVRREG * sizeof(u32), failed);
-               /* set MSR_SPE in the saved MSR value to indicate that
-                * frame->mc_vregs contains valid data */
-               msr |= MSR_SPE;
-       }
-
-       /* We always copy to/from spefscr */
-       unsafe_put_user(current->thread.spefscr,
-                       (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed);
-#endif /* CONFIG_SPE */
 
        unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed);
 
@@ -505,14 +479,14 @@ static long restore_user_regs(struct pt_regs *regs,
 
        /* if doing signal return, restore the previous little-endian mode */
        if (sig)
-               regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+               regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
 
 #ifdef CONFIG_ALTIVEC
        /*
         * Force the process to reload the altivec registers from
         * current->thread when it next does altivec instructions
         */
-       regs->msr &= ~MSR_VEC;
+       regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
        if (msr & MSR_VEC) {
                /* restore altivec registers from the stack */
                unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
@@ -534,7 +508,7 @@ static long restore_user_regs(struct pt_regs *regs,
         * Force the process to reload the VSX registers from
         * current->thread when it next does VSX instruction.
         */
-       regs->msr &= ~MSR_VSX;
+       regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
        if (msr & MSR_VSX) {
                /*
                 * Restore altivec registers from the stack to a local
@@ -550,12 +524,12 @@ static long restore_user_regs(struct pt_regs *regs,
         * force the process to reload the FP registers from
         * current->thread when it next does FP instructions
         */
-       regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+       regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
 
 #ifdef CONFIG_SPE
        /* force the process to reload the spe registers from
           current->thread when it next does spe instructions */
-       regs->msr &= ~MSR_SPE;
+       regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
        if (msr & MSR_SPE) {
                /* restore spe registers from the stack */
                unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
@@ -587,9 +561,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
                                 struct mcontext __user *tm_sr)
 {
        unsigned long msr, msr_hi;
-#ifdef CONFIG_VSX
        int i;
-#endif
 
        if (tm_suspend_disabled)
                return 1;
@@ -608,10 +580,9 @@ static long restore_tm_user_regs(struct pt_regs *regs,
        unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
 
        /* Restore the previous little-endian mode */
-       regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+       regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
 
-#ifdef CONFIG_ALTIVEC
-       regs->msr &= ~MSR_VEC;
+       regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
        if (msr & MSR_VEC) {
                /* restore altivec registers from the stack */
                unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
@@ -629,14 +600,12 @@ static long restore_tm_user_regs(struct pt_regs *regs,
                        (u32 __user *)&sr->mc_vregs[32], failed);
        if (cpu_has_feature(CPU_FTR_ALTIVEC))
                mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
-#endif /* CONFIG_ALTIVEC */
 
-       regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+       regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
 
        unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
 
-#ifdef CONFIG_VSX
-       regs->msr &= ~MSR_VSX;
+       regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
        if (msr & MSR_VSX) {
                /*
                 * Restore altivec registers from the stack to a local
@@ -649,24 +618,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
                        current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
                        current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
                }
-#endif /* CONFIG_VSX */
-
-#ifdef CONFIG_SPE
-       /* SPE regs are not checkpointed with TM, so this section is
-        * simply the same as in restore_user_regs().
-        */
-       regs->msr &= ~MSR_SPE;
-       if (msr & MSR_SPE) {
-               unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
-                                     ELF_NEVRREG * sizeof(u32), failed);
-               current->thread.used_spe = true;
-       } else if (current->thread.used_spe)
-               memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
-
-       /* Always get SPEFSCR back */
-       unsafe_get_user(current->thread.spefscr,
-                       (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
-#endif /* CONFIG_SPE */
 
        user_read_access_end();
 
@@ -675,7 +626,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 
        unsafe_restore_general_regs(regs, tm_sr, failed);
 
-#ifdef CONFIG_ALTIVEC
        /* restore altivec registers from the stack */
        if (msr & MSR_VEC)
                unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
@@ -684,11 +634,9 @@ static long restore_tm_user_regs(struct pt_regs *regs,
        /* Always get VRSAVE back */
        unsafe_get_user(current->thread.vrsave,
                        (u32 __user *)&tm_sr->mc_vregs[32], failed);
-#endif /* CONFIG_ALTIVEC */
 
        unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
 
-#ifdef CONFIG_VSX
        if (msr & MSR_VSX) {
                /*
                 * Restore altivec registers from the stack to a local
@@ -697,7 +645,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
                unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
                current->thread.used_vsr = true;
        }
-#endif /* CONFIG_VSX */
 
        /* Get the top half of the MSR from the user context */
        unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
@@ -725,7 +672,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
         *
         * Pull in the MSR TM bits from the user context
         */
-       regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK);
+       regs_set_return_msr(regs, (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK));
        /* Now, recheckpoint.  This loads up all of the checkpointed (older)
         * registers, including FP and V[S]Rs.  After recheckpointing, the
         * transactional versions should be loaded.
@@ -740,14 +687,12 @@ static long restore_tm_user_regs(struct pt_regs *regs,
        msr_check_and_set(msr & (MSR_FP | MSR_VEC));
        if (msr & MSR_FP) {
                load_fp_state(&current->thread.fp_state);
-               regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+               regs_set_return_msr(regs, regs->msr | (MSR_FP | current->thread.fpexc_mode));
        }
-#ifdef CONFIG_ALTIVEC
        if (msr & MSR_VEC) {
                load_vr_state(&current->thread.vr_state);
-               regs->msr |= MSR_VEC;
+               regs_set_return_msr(regs, regs->msr | MSR_VEC);
        }
-#endif
 
        preempt_enable();
 
@@ -828,10 +773,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
                tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32);
        } else {
                tramp = (unsigned long)mctx->mc_pad;
-               /* Set up the sigreturn trampoline: li r0,sigret; sc */
-               unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0],
-                               failed);
-               unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed);
+               unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed);
+               unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
                asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
        }
        unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed);
@@ -858,10 +801,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
        regs->gpr[4] = (unsigned long)&frame->info;
        regs->gpr[5] = (unsigned long)&frame->uc;
        regs->gpr[6] = (unsigned long)frame;
-       regs->nip = (unsigned long) ksig->ka.sa.sa_handler;
+       regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
        /* enter the signal handler in native-endian mode */
-       regs->msr &= ~MSR_LE;
-       regs->msr |= (MSR_KERNEL & MSR_LE);
+       regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
        return 0;
 
 failed:
@@ -926,9 +869,8 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
                tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32);
        } else {
                tramp = (unsigned long)mctx->mc_pad;
-               /* Set up the sigreturn trampoline: li r0,sigret; sc */
-               unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed);
-               unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed);
+               unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed);
+               unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
                asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
        }
        user_access_end();
@@ -947,10 +889,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
        regs->gpr[1] = newsp;
        regs->gpr[3] = ksig->sig;
        regs->gpr[4] = (unsigned long) sc;
-       regs->nip = (unsigned long)ksig->ka.sa.sa_handler;
+       regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
        /* enter the signal handler in native-endian mode */
-       regs->msr &= ~MSR_LE;
-       regs->msr |= (MSR_KERNEL & MSR_LE);
+       regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
        return 0;
 
 failed:
@@ -1200,7 +1142,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
                 * set, and recheckpoint was not called. This avoid
                 * hitting a TM Bad thing at RFID
                 */
-               regs->msr &= ~MSR_TS_MASK;
+               regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
        }
        /* Fall through, for non-TM restore */
 #endif
@@ -1289,7 +1231,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
           affect the contents of these registers.  After this point,
           failure is a problem, anyway, and it's very unlikely unless
           the user is really doing something wrong. */
-       regs->msr = new_msr;
+       regs_set_return_msr(regs, new_msr);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
        current->thread.debug.dbcr0 = new_dbcr0;
 #endif
index f9e1f5428b9e3d4006c0af034c7ff1bd3d7c5ea1..1831bba0582e10dd0ab6ccafed3b1a6a045f1246 100644 (file)
@@ -354,7 +354,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_
        /* get MSR separately, transfer the LE bit if doing signal return */
        unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
        if (sig)
-               regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+               regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
        unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
        unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
        unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
@@ -376,7 +376,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_
         * This has to be done before copying stuff into tsk->thread.fpr/vr
         * for the reasons explained in the previous comment.
         */
-       regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+       regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
 
 #ifdef CONFIG_ALTIVEC
        unsafe_get_user(v_regs, &sc->v_regs, efault_out);
@@ -468,7 +468,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
                return -EINVAL;
 
        /* pull in MSR LE from user context */
-       regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+       regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
 
        /* The following non-GPR non-FPR non-VR state is also checkpointed: */
        err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]);
@@ -495,7 +495,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
         * This has to be done before copying stuff into tsk->thread.fpr/vr
         * for the reasons explained in the previous comment.
         */
-       regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+       regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
 
 #ifdef CONFIG_ALTIVEC
        err |= __get_user(v_regs, &sc->v_regs);
@@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
        preempt_disable();
 
        /* pull in MSR TS bits from user context */
-       regs->msr |= msr & MSR_TS_MASK;
+       regs_set_return_msr(regs, regs->msr | (msr & MSR_TS_MASK));
 
        /*
         * Ensure that TM is enabled in regs->msr before we leave the signal
@@ -583,7 +583,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
         * to be de-scheduled with MSR[TS] set but without calling
         * tm_recheckpoint(). This can cause a bug.
         */
-       regs->msr |= MSR_TM;
+       regs_set_return_msr(regs, regs->msr | MSR_TM);
 
        /* This loads the checkpointed FP/VEC state, if used */
        tm_recheckpoint(&tsk->thread);
@@ -591,11 +591,11 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
        msr_check_and_set(msr & (MSR_FP | MSR_VEC));
        if (msr & MSR_FP) {
                load_fp_state(&tsk->thread.fp_state);
-               regs->msr |= (MSR_FP | tsk->thread.fpexc_mode);
+               regs_set_return_msr(regs, regs->msr | (MSR_FP | tsk->thread.fpexc_mode));
        }
        if (msr & MSR_VEC) {
                load_vr_state(&tsk->thread.vr_state);
-               regs->msr |= MSR_VEC;
+               regs_set_return_msr(regs, regs->msr | MSR_VEC);
        }
 
        preempt_enable();
@@ -618,15 +618,12 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
        int i;
        long err = 0;
 
-       /* bctrl # call the handler */
-       err |= __put_user(PPC_INST_BCTRL, &tramp[0]);
-       /* addi r1, r1, __SIGNAL_FRAMESIZE  # Pop the dummy stackframe */
-       err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
-                         (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]);
-       /* li r0, __NR_[rt_]sigreturn| */
-       err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]);
-       /* sc */
-       err |= __put_user(PPC_INST_SC, &tramp[3]);
+       /* Call the handler and pop the dummy stackframe*/
+       err |= __put_user(PPC_RAW_BCTRL(), &tramp[0]);
+       err |= __put_user(PPC_RAW_ADDI(_R1, _R1, __SIGNAL_FRAMESIZE), &tramp[1]);
+
+       err |= __put_user(PPC_RAW_LI(_R0, syscall), &tramp[2]);
+       err |= __put_user(PPC_RAW_SC(), &tramp[3]);
 
        /* Minimal traceback info */
        for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
@@ -720,6 +717,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 
        /* This returns like rt_sigreturn */
        set_thread_flag(TIF_RESTOREALL);
+
        return 0;
 
 efault_out:
@@ -786,7 +784,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
                 * the MSR[TS] that came from user context later, at
                 * restore_tm_sigcontexts.
                 */
-               regs->msr &= ~MSR_TS_MASK;
+               regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
 
                if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
                        goto badframe;
@@ -818,7 +816,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
                 * MSR[TS] set, but without CPU in the proper state,
                 * causing a TM bad thing.
                 */
-               current->thread.regs->msr &= ~MSR_TS_MASK;
+               regs_set_return_msr(current->thread.regs,
+                               current->thread.regs->msr & ~MSR_TS_MASK);
                if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
                        goto badframe;
 
@@ -832,6 +831,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
                goto badframe;
 
        set_thread_flag(TIF_RESTOREALL);
+
        return 0;
 
 badframe_block:
@@ -911,12 +911,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
        /* Set up to return from userspace. */
        if (tsk->mm->context.vdso) {
-               regs->nip = VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64);
+               regs_set_return_ip(regs, VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64));
        } else {
                err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
                if (err)
                        goto badframe;
-               regs->nip = (unsigned long) &frame->tramp[0];
+               regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]);
        }
 
        /* Allocate a dummy caller frame for the signal handler. */
@@ -941,14 +941,13 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
        }
 
        /* enter the signal handler in native-endian mode */
-       regs->msr &= ~MSR_LE;
-       regs->msr |= (MSR_KERNEL & MSR_LE);
+       regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
        regs->gpr[1] = newsp;
        regs->gpr[3] = ksig->sig;
        regs->result = 0;
        if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-               err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo);
-               err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc);
+               regs->gpr[4] = (unsigned long)&frame->info;
+               regs->gpr[5] = (unsigned long)&frame->uc;
                regs->gpr[6] = (unsigned long) frame;
        } else {
                regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext;
index 7ddc2d32c39e9db2bd27b414e769be7d6c693b71..447b78a87c8f2adcb5f46c0073b7cdbea84ec275 100644 (file)
@@ -619,6 +619,8 @@ static void nmi_stop_this_cpu(struct pt_regs *regs)
        /*
         * IRQs are already hard disabled by the smp_handle_nmi_ipi.
         */
+       set_cpu_online(smp_processor_id(), false);
+
        spin_begin();
        while (1)
                spin_cpu_relax();
@@ -634,6 +636,15 @@ void smp_send_stop(void)
 static void stop_this_cpu(void *dummy)
 {
        hard_irq_disable();
+
+       /*
+        * Offlining CPUs in stop_this_cpu can result in scheduler warnings,
+        * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
+        * to know other CPUs are offline before it breaks locks to flush
+        * printk buffers, in case we panic()ed while holding the lock.
+        */
+       set_cpu_online(smp_processor_id(), false);
+
        spin_begin();
        while (1)
                spin_cpu_relax();
@@ -1541,6 +1552,10 @@ void start_secondary(void *unused)
 {
        unsigned int cpu = raw_smp_processor_id();
 
+       /* PPC64 calls setup_kup() in early_setup_secondary() */
+       if (IS_ENABLED(CONFIG_PPC32))
+               setup_kup();
+
        mmgrab(&init_mm);
        current->active_mm = &init_mm;
 
index 1deb1bf331ddbdb1e0b527cdb87f4e159c70cf48..2b0d04a1b7d2d8caaf896c153fbd9316ea016b4d 100644 (file)
@@ -23,8 +23,8 @@
 
 #include <asm/paca.h>
 
-void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
-                    struct task_struct *task, struct pt_regs *regs)
+void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+                                          struct task_struct *task, struct pt_regs *regs)
 {
        unsigned long sp;
 
@@ -61,8 +61,8 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
  *
  * If the task is not 'current', the caller *must* ensure the task is inactive.
  */
-int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
-                            void *cookie, struct task_struct *task)
+int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+                                                  void *cookie, struct task_struct *task)
 {
        unsigned long sp;
        unsigned long newsp;
@@ -172,17 +172,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs)
 
 static void raise_backtrace_ipi(cpumask_t *mask)
 {
+       struct paca_struct *p;
        unsigned int cpu;
+       u64 delay_us;
 
        for_each_cpu(cpu, mask) {
-               if (cpu == smp_processor_id())
+               if (cpu == smp_processor_id()) {
                        handle_backtrace_ipi(NULL);
-               else
-                       smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
-       }
+                       continue;
+               }
 
-       for_each_cpu(cpu, mask) {
-               struct paca_struct *p = paca_ptrs[cpu];
+               delay_us = 5 * USEC_PER_SEC;
+
+               if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) {
+                       // Now wait up to 5s for the other CPU to do its backtrace
+                       while (cpumask_test_cpu(cpu, mask) && delay_us) {
+                               udelay(1);
+                               delay_us--;
+                       }
+
+                       // Other CPU cleared itself from the mask
+                       if (delay_us)
+                               continue;
+               }
+
+               p = paca_ptrs[cpu];
 
                cpumask_clear_cpu(cpu, mask);
 
index a552c9e68d7e69d68236e055c825c359863c91be..bf4ae0f0e36c30385d185672075a3807152aee71 100644 (file)
@@ -114,7 +114,8 @@ SYSCALL_DEFINE0(switch_endian)
 {
        struct thread_info *ti;
 
-       current->thread.regs->msr ^= MSR_LE;
+       regs_set_return_msr(current->thread.regs,
+                               current->thread.regs->msr ^ MSR_LE);
 
        /*
         * Set TIF_RESTOREALL so that r3 isn't clobbered on return to
index 2e08640bb3b4b61ea2ab93197148a8a01e483f76..5ff0e55d0db1008d3f629b798c89c3d5bcc184de 100644 (file)
@@ -843,14 +843,14 @@ static int register_cpu_online(unsigned int cpu)
 #ifdef HAS_PPC_PMC_IBM
        case PPC_PMC_IBM:
                attrs = ibm_common_attrs;
-               nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+               nattrs = ARRAY_SIZE(ibm_common_attrs);
                pmc_attrs = classic_pmc_attrs;
                break;
 #endif /* HAS_PPC_PMC_IBM */
 #ifdef HAS_PPC_PMC_G4
        case PPC_PMC_G4:
                attrs = g4_common_attrs;
-               nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+               nattrs = ARRAY_SIZE(g4_common_attrs);
                pmc_attrs = classic_pmc_attrs;
                break;
 #endif /* HAS_PPC_PMC_G4 */
@@ -858,7 +858,7 @@ static int register_cpu_online(unsigned int cpu)
        case PPC_PMC_PA6T:
                /* PA Semi starts counting at PMC0 */
                attrs = pa6t_attrs;
-               nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+               nattrs = ARRAY_SIZE(pa6t_attrs);
                pmc_attrs = NULL;
                break;
 #endif
@@ -940,14 +940,14 @@ static int unregister_cpu_online(unsigned int cpu)
 #ifdef HAS_PPC_PMC_IBM
        case PPC_PMC_IBM:
                attrs = ibm_common_attrs;
-               nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+               nattrs = ARRAY_SIZE(ibm_common_attrs);
                pmc_attrs = classic_pmc_attrs;
                break;
 #endif /* HAS_PPC_PMC_IBM */
 #ifdef HAS_PPC_PMC_G4
        case PPC_PMC_G4:
                attrs = g4_common_attrs;
-               nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+               nattrs = ARRAY_SIZE(g4_common_attrs);
                pmc_attrs = classic_pmc_attrs;
                break;
 #endif /* HAS_PPC_PMC_G4 */
@@ -955,7 +955,7 @@ static int unregister_cpu_online(unsigned int cpu)
        case PPC_PMC_PA6T:
                /* PA Semi starts counting at PMC0 */
                attrs = pa6t_attrs;
-               nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+               nattrs = ARRAY_SIZE(pa6t_attrs);
                pmc_attrs = NULL;
                break;
 #endif
index 6c31af7f4fa82a77fdf1cf87c9e8b3a5a77ea93e..b9a047d92ec08fc75f0b4238d3f7a75d5a147da5 100644 (file)
@@ -201,7 +201,7 @@ static int __init TAU_init(void)
        tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
                         !strcmp(cur_cpu_spec->platform, "ppc750");
 
-       tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0);
+       tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1);
        if (!tau_workq)
                return -ENOMEM;
 
index da995c5fb97d93b5fea0c7b5bfb20c5a3bd101cc..e45ce427bffb1f51ab52c801cf51184378a3e1ef 100644 (file)
@@ -231,24 +231,13 @@ static u64 scan_dispatch_log(u64 stop_tb)
 void notrace accumulate_stolen_time(void)
 {
        u64 sst, ust;
-       unsigned long save_irq_soft_mask = irq_soft_mask_return();
        struct cpu_accounting_data *acct = &local_paca->accounting;
 
-       /* We are called early in the exception entry, before
-        * soft/hard_enabled are sync'ed to the expected state
-        * for the exception. We are hard disabled but the PACA
-        * needs to reflect that so various debug stuff doesn't
-        * complain
-        */
-       irq_soft_mask_set(IRQS_DISABLED);
-
        sst = scan_dispatch_log(acct->starttime_user);
        ust = scan_dispatch_log(acct->starttime);
        acct->stime -= sst;
        acct->utime -= ust;
        acct->steal_time += ust + sst;
-
-       irq_soft_mask_set(save_irq_soft_mask);
 }
 
 static inline u64 calculate_stolen_time(u64 stop_tb)
index ffe9537195aa33d348b027a6e2e4c868c105f0d2..d89c5df4f20620c5911b5c224e820ffb3ecb688c 100644 (file)
@@ -49,7 +49,7 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
        addr = ppc_function_entry((void *)addr);
 
        /* if (link) set op to 'bl' else 'b' */
-       create_branch(&op, (struct ppc_inst *)ip, addr, link ? 1 : 0);
+       create_branch(&op, (u32 *)ip, addr, link ? 1 : 0);
 
        return op;
 }
@@ -79,7 +79,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
        }
 
        /* replace the text with the new text */
-       if (patch_instruction((struct ppc_inst *)ip, new))
+       if (patch_instruction((u32 *)ip, new))
                return -EPERM;
 
        return 0;
@@ -94,7 +94,7 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr)
        addr = ppc_function_entry((void *)addr);
 
        /* use the create_branch to verify that this offset can be branched */
-       return create_branch(&op, (struct ppc_inst *)ip, addr, 0) == 0;
+       return create_branch(&op, (u32 *)ip, addr, 0) == 0;
 }
 
 static int is_bl_op(struct ppc_inst op)
@@ -162,7 +162,7 @@ __ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MPROFILE_KERNEL
        /* When using -mkernel_profile there is no load to jump over */
-       pop = ppc_inst(PPC_INST_NOP);
+       pop = ppc_inst(PPC_RAW_NOP());
 
        if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
                pr_err("Fetching instruction at %lx failed.\n", ip - 4);
@@ -170,7 +170,7 @@ __ftrace_make_nop(struct module *mod,
        }
 
        /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
-       if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) &&
+       if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) &&
            !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
                pr_err("Unexpected instruction %s around bl _mcount\n",
                       ppc_inst_as_str(op));
@@ -203,12 +203,12 @@ __ftrace_make_nop(struct module *mod,
        }
 
        if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_LD_TOC))) {
-               pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op));
+               pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op));
                return -EINVAL;
        }
 #endif /* CONFIG_MPROFILE_KERNEL */
 
-       if (patch_instruction((struct ppc_inst *)ip, pop)) {
+       if (patch_instruction((u32 *)ip, pop)) {
                pr_err("Patching NOP failed.\n");
                return -EPERM;
        }
@@ -278,9 +278,9 @@ __ftrace_make_nop(struct module *mod,
                return -EINVAL;
        }
 
-       op = ppc_inst(PPC_INST_NOP);
+       op = ppc_inst(PPC_RAW_NOP());
 
-       if (patch_instruction((struct ppc_inst *)ip, op))
+       if (patch_instruction((u32 *)ip, op))
                return -EPERM;
 
        return 0;
@@ -380,7 +380,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
                return -1;
        }
 
-       if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) {
+       if (patch_branch((u32 *)tramp, ptr, 0)) {
                pr_debug("REL24 out of range!\n");
                return -1;
        }
@@ -424,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
                }
        }
 
-       if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_INST_NOP))) {
+       if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) {
                pr_err("Patching NOP failed.\n");
                return -EPERM;
        }
@@ -446,7 +446,7 @@ int ftrace_make_nop(struct module *mod,
        if (test_24bit_addr(ip, addr)) {
                /* within range */
                old = ftrace_call_replace(ip, addr, 1);
-               new = ppc_inst(PPC_INST_NOP);
+               new = ppc_inst(PPC_RAW_NOP());
                return ftrace_modify_code(ip, old, new);
        } else if (core_kernel_text(ip))
                return __ftrace_make_nop_kernel(rec, addr);
@@ -510,7 +510,7 @@ static int
 expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
 {
        /* look for patched "NOP" on ppc64 with -mprofile-kernel */
-       if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP)))
+       if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())))
                return 0;
        return 1;
 }
@@ -589,14 +589,14 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
        int err;
        struct ppc_inst op;
-       unsigned long ip = rec->ip;
+       u32 *ip = (u32 *)rec->ip;
 
        /* read where this goes */
-       if (copy_inst_from_kernel_nofault(&op, (void *)ip))
+       if (copy_inst_from_kernel_nofault(&op, ip))
                return -EFAULT;
 
        /* It should be pointing to a nop */
-       if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_NOP))) {
+       if (!ppc_inst_equal(op,  ppc_inst(PPC_RAW_NOP()))) {
                pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op));
                return -EINVAL;
        }
@@ -608,8 +608,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
        }
 
        /* create the branch to the trampoline */
-       err = create_branch(&op, (struct ppc_inst *)ip,
-                           rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+       err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
        if (err) {
                pr_err("REL24 out of range!\n");
                return -EINVAL;
@@ -617,7 +616,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
        pr_devel("write to %lx\n", rec->ip);
 
-       if (patch_instruction((struct ppc_inst *)ip, op))
+       if (patch_instruction(ip, op))
                return -EPERM;
 
        return 0;
@@ -653,7 +652,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
                return -EFAULT;
        }
 
-       if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) {
+       if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
                pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op));
                return -EINVAL;
        }
@@ -684,7 +683,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
         */
        if (test_24bit_addr(ip, addr)) {
                /* within range */
-               old = ppc_inst(PPC_INST_NOP);
+               old = ppc_inst(PPC_RAW_NOP());
                new = ftrace_call_replace(ip, addr, 1);
                return ftrace_modify_code(ip, old, new);
        } else if (core_kernel_text(ip))
@@ -762,7 +761,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
        /* The new target may be within range */
        if (test_24bit_addr(ip, addr)) {
                /* within range */
-               if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) {
+               if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) {
                        pr_err("REL24 out of range!\n");
                        return -EINVAL;
                }
@@ -790,12 +789,12 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
        }
 
        /* Ensure branch is within 24 bits */
-       if (create_branch(&op, (struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) {
+       if (create_branch(&op, (u32 *)ip, tramp, BRANCH_SET_LINK)) {
                pr_err("Branch out of range\n");
                return -EINVAL;
        }
 
-       if (patch_branch((struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) {
+       if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) {
                pr_err("REL24 out of range!\n");
                return -EINVAL;
        }
@@ -851,7 +850,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
        struct ppc_inst old, new;
        int ret;
 
-       old = ppc_inst_read((struct ppc_inst *)&ftrace_call);
+       old = ppc_inst_read((u32 *)&ftrace_call);
        new = ftrace_call_replace(ip, (unsigned long)func, 1);
        ret = ftrace_modify_code(ip, old, new);
 
@@ -859,7 +858,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
        /* Also update the regs callback function */
        if (!ret) {
                ip = (unsigned long)(&ftrace_regs_call);
-               old = ppc_inst_read((struct ppc_inst *)&ftrace_regs_call);
+               old = ppc_inst_read((u32 *)&ftrace_regs_call);
                new = ftrace_call_replace(ip, (unsigned long)func, 1);
                ret = ftrace_modify_code(ip, old, new);
        }
index b4ab95c9e94a89c2f94cd2ea2b68720580492e99..dfbce527c98ed2632252a2e7b453c6d006109ca5 100644 (file)
@@ -67,6 +67,7 @@
 #include <asm/kprobes.h>
 #include <asm/stacktrace.h>
 #include <asm/nmi.h>
+#include <asm/disassemble.h>
 
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
 int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -427,7 +428,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
        return;
 
 nonrecoverable:
-       regs->msr &= ~MSR_RI;
+       regs_set_return_msr(regs, regs->msr & ~MSR_RI);
 #endif
 }
 DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
@@ -537,11 +538,11 @@ static inline int check_io_access(struct pt_regs *regs)
                 * For the debug message, we look at the preceding
                 * load or store.
                 */
-               if (*nip == PPC_INST_NOP)
+               if (*nip == PPC_RAW_NOP())
                        nip -= 2;
-               else if (*nip == PPC_INST_ISYNC)
+               else if (*nip == PPC_RAW_ISYNC())
                        --nip;
-               if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) {
+               if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) {
                        unsigned int rb;
 
                        --nip;
@@ -549,8 +550,8 @@ static inline int check_io_access(struct pt_regs *regs)
                        printk(KERN_DEBUG "%s bad port %lx at %p\n",
                               (*nip & 0x100)? "OUT to": "IN from",
                               regs->gpr[rb] - _IO_BASE, nip);
-                       regs->msr |= MSR_RI;
-                       regs->nip = extable_fixup(entry);
+                       regs_set_return_msr(regs, regs->msr | MSR_RI);
+                       regs_set_return_ip(regs, extable_fixup(entry));
                        return 1;
                }
        }
@@ -586,8 +587,8 @@ static inline int check_io_access(struct pt_regs *regs)
 #define REASON_BOUNDARY                SRR1_BOUNDARY
 
 #define single_stepping(regs)  ((regs)->msr & MSR_SE)
-#define clear_single_step(regs)        ((regs)->msr &= ~MSR_SE)
-#define clear_br_trace(regs)   ((regs)->msr &= ~MSR_BE)
+#define clear_single_step(regs)        (regs_set_return_msr((regs), (regs)->msr & ~MSR_SE))
+#define clear_br_trace(regs)   (regs_set_return_msr((regs), (regs)->msr & ~MSR_BE))
 #endif
 
 #define inst_length(reason)    (((reason) & REASON_PREFIXED) ? 8 : 4)
@@ -1031,7 +1032,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
 #endif /* !__LITTLE_ENDIAN__ */
 
        /* Go to next instruction */
-       regs->nip += 4;
+       regs_add_return_ip(regs, 4);
 }
 #endif /* CONFIG_VSX */
 
@@ -1476,7 +1477,7 @@ static void do_program_check(struct pt_regs *regs)
 
                if (!(regs->msr & MSR_PR) &&  /* not user-mode */
                    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
-                       regs->nip += 4;
+                       regs_add_return_ip(regs, 4);
                        return;
                }
                _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
@@ -1538,7 +1539,7 @@ static void do_program_check(struct pt_regs *regs)
        if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
                switch (emulate_instruction(regs)) {
                case 0:
-                       regs->nip += 4;
+                       regs_add_return_ip(regs, 4);
                        emulate_single_step(regs);
                        return;
                case -EFAULT:
@@ -1566,7 +1567,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
  */
 DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
 {
-       regs->msr |= REASON_ILLEGAL;
+       regs_set_return_msr(regs, regs->msr | REASON_ILLEGAL);
        do_program_check(regs);
 }
 
@@ -1593,7 +1594,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
 
        if (fixed == 1) {
                /* skip over emulated instruction */
-               regs->nip += inst_length(reason);
+               regs_add_return_ip(regs, inst_length(reason));
                emulate_single_step(regs);
                return;
        }
@@ -1659,7 +1660,7 @@ static void tm_unavailable(struct pt_regs *regs)
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        if (user_mode(regs)) {
                current->thread.load_tm++;
-               regs->msr |= MSR_TM;
+               regs_set_return_msr(regs, regs->msr | MSR_TM);
                tm_enable();
                tm_restore_sprs(&current->thread);
                return;
@@ -1751,7 +1752,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
                                pr_err("DSCR based mfspr emulation failed\n");
                                return;
                        }
-                       regs->nip += 4;
+                       regs_add_return_ip(regs, 4);
                        emulate_single_step(regs);
                }
                return;
@@ -1948,7 +1949,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
         */
        if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
                               current->thread.debug.dbcr1))
-               regs->msr |= MSR_DE;
+               regs_set_return_msr(regs, regs->msr | MSR_DE);
        else
                /* Make sure the IDM flag is off */
                current->thread.debug.dbcr0 &= ~DBCR0_IDM;
@@ -1969,7 +1970,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException)
         * instead of stopping here when hitting a BT
         */
        if (debug_status & DBSR_BT) {
-               regs->msr &= ~MSR_DE;
+               regs_set_return_msr(regs, regs->msr & ~MSR_DE);
 
                /* Disable BT */
                mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
@@ -1980,7 +1981,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException)
                if (user_mode(regs)) {
                        current->thread.debug.dbcr0 &= ~DBCR0_BT;
                        current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
-                       regs->msr |= MSR_DE;
+                       regs_set_return_msr(regs, regs->msr | MSR_DE);
                        return;
                }
 
@@ -1994,7 +1995,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException)
                if (debugger_sstep(regs))
                        return;
        } else if (debug_status & DBSR_IC) {    /* Instruction complete */
-               regs->msr &= ~MSR_DE;
+               regs_set_return_msr(regs, regs->msr & ~MSR_DE);
 
                /* Disable instruction completion */
                mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
@@ -2016,7 +2017,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException)
                        current->thread.debug.dbcr0 &= ~DBCR0_IC;
                        if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
                                               current->thread.debug.dbcr1))
-                               regs->msr |= MSR_DE;
+                               regs_set_return_msr(regs, regs->msr | MSR_DE);
                        else
                                /* Make sure the IDM bit is off */
                                current->thread.debug.dbcr0 &= ~DBCR0_IDM;
@@ -2044,7 +2045,7 @@ DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
        PPC_WARN_EMULATED(altivec, regs);
        err = emulate_altivec(regs);
        if (err == 0) {
-               regs->nip += 4;         /* skip emulated instruction */
+               regs_add_return_ip(regs, 4); /* skip emulated instruction */
                emulate_single_step(regs);
                return;
        }
@@ -2109,7 +2110,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
 
        err = do_spe_mathemu(regs);
        if (err == 0) {
-               regs->nip += 4;         /* skip emulated instruction */
+               regs_add_return_ip(regs, 4); /* skip emulated instruction */
                emulate_single_step(regs);
                return;
        }
@@ -2140,10 +2141,10 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
                giveup_spe(current);
        preempt_enable();
 
-       regs->nip -= 4;
+       regs_add_return_ip(regs, -4);
        err = speround_handler(regs);
        if (err == 0) {
-               regs->nip += 4;         /* skip emulated instruction */
+               regs_add_return_ip(regs, 4); /* skip emulated instruction */
                emulate_single_step(regs);
                return;
        }
index 9356b60d6030f28112791dbfb6bb6c049e67be9b..8513aa49614e6ca282b29b8cc7d5b1d7595b01cd 100644 (file)
@@ -296,3 +296,42 @@ void __init udbg_init_40x_realmode(void)
 }
 
 #endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT
+
+#define UDBG_UART_MW_ADDR      ((void __iomem *)0xc0002000)
+
+static u8 udbg_uart_in_isa300_rm(unsigned int reg)
+{
+       uint64_t msr = mfmsr();
+       uint8_t  c;
+
+       mtmsr(msr & ~(MSR_EE|MSR_DR));
+       isync();
+       eieio();
+       c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2));
+       mtmsr(msr);
+       isync();
+       return c;
+}
+
+static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val)
+{
+       uint64_t msr = mfmsr();
+
+       mtmsr(msr & ~(MSR_EE|MSR_DR));
+       isync();
+       eieio();
+       __raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2));
+       mtmsr(msr);
+       isync();
+}
+
+void __init udbg_init_debug_microwatt(void)
+{
+       udbg_uart_in = udbg_uart_in_isa300_rm;
+       udbg_uart_out = udbg_uart_out_isa300_rm;
+       udbg_use_uart();
+}
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */
index 186f69b11e94d2e70518ca9aa761e43aac04a322..c6975467d9ffdcad81b9a56a7a96a75b9ca0c3c6 100644 (file)
@@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
                return -EINVAL;
 
        if (cpu_has_feature(CPU_FTR_ARCH_31) &&
-           ppc_inst_prefixed(auprobe->insn) &&
+           ppc_inst_prefixed(ppc_inst_read(auprobe->insn)) &&
            (addr & 0x3f) == 60) {
                pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n");
                return -EINVAL;
@@ -62,7 +62,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 
        autask->saved_trap_nr = current->thread.trap_nr;
        current->thread.trap_nr = UPROBE_TRAP_NR;
-       regs->nip = current->utask->xol_vaddr;
+       regs_set_return_ip(regs, current->utask->xol_vaddr);
 
        user_enable_single_step(current);
        return 0;
@@ -119,7 +119,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
         * support doesn't exist and have to fix-up the next instruction
         * to be executed.
         */
-       regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, &auprobe->insn);
+       regs_set_return_ip(regs, (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn));
 
        user_disable_single_step(current);
        return 0;
@@ -182,7 +182,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
         * emulate_step() returns 1 if the insn was successfully emulated.
         * For all other cases, we need to single-step in hardware.
         */
-       ret = emulate_step(regs, ppc_inst_read(&auprobe->insn));
+       ret = emulate_step(regs, ppc_inst_read(auprobe->insn));
        if (ret > 0)
                return true;
 
index f5a52f444e3604b26a90b22fc6cc7621a4bed36f..fc120fac19104a78d0ba2a6238946cc3612dc957 100644 (file)
@@ -73,6 +73,10 @@ _GLOBAL(load_up_altivec)
        addi    r5,r4,THREAD            /* Get THREAD */
        oris    r12,r12,MSR_VEC@h
        std     r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+#endif
 #endif
        li      r4,1
        stb     r4,THREAD_LOAD_VEC(r5)
@@ -131,7 +135,9 @@ _GLOBAL(load_up_vsx)
        /* enable use of VSX after return */
        oris    r12,r12,MSR_VSX@h
        std     r12,_MSR(r1)
-       b       fast_interrupt_return
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+       b       fast_interrupt_return_srr
 
 #endif /* CONFIG_VSX */
 
index 72fa3c00229a56ebfab492c563a2678c8fbc8bc2..40bdefe9caa73529aba81c522fef5c40fdb8fe87 100644 (file)
@@ -9,6 +9,22 @@
 #define EMITS_PT_NOTE
 #define RO_EXCEPTION_TABLE_ALIGN       0
 
+#define SOFT_MASK_TABLE(align)                                         \
+       . = ALIGN(align);                                               \
+       __soft_mask_table : AT(ADDR(__soft_mask_table) - LOAD_OFFSET) { \
+               __start___soft_mask_table = .;                          \
+               KEEP(*(__soft_mask_table))                              \
+               __stop___soft_mask_table = .;                           \
+       }
+
+#define RESTART_TABLE(align)                                           \
+       . = ALIGN(align);                                               \
+       __restart_table : AT(ADDR(__restart_table) - LOAD_OFFSET) {     \
+               __start___restart_table = .;                            \
+               KEEP(*(__restart_table))                                \
+               __stop___restart_table = .;                             \
+       }
+
 #include <asm/page.h>
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/cache.h>
@@ -124,6 +140,9 @@ SECTIONS
        RO_DATA(PAGE_SIZE)
 
 #ifdef CONFIG_PPC64
+       SOFT_MASK_TABLE(8)
+       RESTART_TABLE(8)
+
        . = ALIGN(8);
        __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
                __start___stf_entry_barrier_fixup = .;
index c9a8f4781a1018afcb4fe15cba3129eb1447b6e5..a165635fd2148fc1feb564a529ad3728c754584b 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/kdebug.h>
 #include <linux/sched/debug.h>
 #include <linux/delay.h>
+#include <linux/processor.h>
 #include <linux/smp.h>
 
 #include <asm/interrupt.h>
index 0196d0c211aca76648a138f9406512c5060a8bed..10f997e6bb95cc59c7ded71bbd00cdac57913380 100644 (file)
@@ -105,8 +105,8 @@ void crash_ipi_callback(struct pt_regs *regs)
 static void crash_kexec_prepare_cpus(int cpu)
 {
        unsigned int msecs;
-       unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
-       int tries = 0;
+       volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+       volatile int tries = 0;
        int (*old_handler)(struct pt_regs *regs);
 
        printk(KERN_EMERG "Sending IPI to other CPUs\n");
index e8e7b2c530d1b0eb9bb3957759e95f8e2c83d1f0..4b3a8d80cfa354c0720bbd8b2ef760b8e33e6ea1 100644 (file)
@@ -353,9 +353,6 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
        preempt_enable();
 }
 
-/* From mm/mmu_context_hash32.c */
-#define CTX_TO_VSID(c, id)     ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
-
 int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
index 260e860d53a2d64d181acfc528f370ba1b67d0b4..1d1fcc290fca4aa53a49a10115911da45b5c19e5 100644 (file)
@@ -4626,6 +4626,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 
        vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
        atomic_dec(&kvm->arch.vcpus_running);
+
+       srr_regs_clobbered();
+
        return r;
 }
 
index 71bcb014046139684149ce26e0e04a58ef3b505f..6bc9425acb322768e7c1b75f88314d83b295341a 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
 #include <linux/uaccess.h>
+#include <asm/interrupt.h>
 #include <asm/io.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -1848,6 +1849,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
        /* Make sure we save the guest TAR/EBB/DSCR state */
        kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
 
+       srr_regs_clobbered();
 out:
        vcpu->mode = OUTSIDE_GUEST_MODE;
        return ret;
index cc1a8a0f311e7a9b2e082f5e733334b8ca34315b..99a7c9132422ccdfc0958c0ab94b0f57d43ed6f5 100644 (file)
@@ -39,7 +39,7 @@ extra-$(CONFIG_PPC64) += crtsavres.o
 endif
 
 obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
-                              memcpy_power7.o
+                              memcpy_power7.o restart_table.o
 
 obj64-y        += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
           memcpy_64.o copy_mc_64.o
index 870b30d9be2f85ca00c73fd7baf27804386bae41..f9a3019e37b43cfffb66e23f882d6314dfd2ae39 100644 (file)
@@ -18,8 +18,7 @@
 #include <asm/setup.h>
 #include <asm/inst.h>
 
-static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr,
-                              struct ppc_inst *patch_addr)
+static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr)
 {
        if (!ppc_inst_prefixed(instr)) {
                u32 val = ppc_inst_val(instr);
@@ -40,7 +39,7 @@ failed:
        return -EFAULT;
 }
 
-int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
+int raw_patch_instruction(u32 *addr, struct ppc_inst instr)
 {
        return __patch_instruction(addr, instr, addr);
 }
@@ -70,22 +69,16 @@ static int text_area_cpu_down(unsigned int cpu)
 }
 
 /*
- * Run as a late init call. This allows all the boot time patching to be done
- * simply by patching the code, and then we're called here prior to
- * mark_rodata_ro(), which happens after all init calls are run. Although
- * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge
- * it as being preferable to a kernel that will crash later when someone tries
- * to use patch_instruction().
+ * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and
+ * we judge it as being preferable to a kernel that will crash later when
+ * someone tries to use patch_instruction().
  */
-static int __init setup_text_poke_area(void)
+void __init poking_init(void)
 {
        BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
                "powerpc/text_poke:online", text_area_cpu_up,
                text_area_cpu_down));
-
-       return 0;
 }
-late_initcall(setup_text_poke_area);
 
 /*
  * This can be called for kernel text or a module.
@@ -148,10 +141,10 @@ static inline int unmap_patch_area(unsigned long addr)
        return 0;
 }
 
-static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
+static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
 {
        int err;
-       struct ppc_inst *patch_addr = NULL;
+       u32 *patch_addr = NULL;
        unsigned long flags;
        unsigned long text_poke_addr;
        unsigned long kaddr = (unsigned long)addr;
@@ -172,7 +165,7 @@ static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
                goto out;
        }
 
-       patch_addr = (struct ppc_inst *)(text_poke_addr + (kaddr & ~PAGE_MASK));
+       patch_addr = (u32 *)(text_poke_addr + (kaddr & ~PAGE_MASK));
 
        __patch_instruction(addr, instr, patch_addr);
 
@@ -187,14 +180,14 @@ out:
 }
 #else /* !CONFIG_STRICT_KERNEL_RWX */
 
-static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
+static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
 {
        return raw_patch_instruction(addr, instr);
 }
 
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
-int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
+int patch_instruction(u32 *addr, struct ppc_inst instr)
 {
        /* Make sure we aren't patching a freed init section */
        if (init_mem_is_free && init_section_contains(addr, 4)) {
@@ -205,7 +198,7 @@ int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
 }
 NOKPROBE_SYMBOL(patch_instruction);
 
-int patch_branch(struct ppc_inst *addr, unsigned long target, int flags)
+int patch_branch(u32 *addr, unsigned long target, int flags)
 {
        struct ppc_inst instr;
 
@@ -257,8 +250,7 @@ bool is_conditional_branch(struct ppc_inst instr)
 }
 NOKPROBE_SYMBOL(is_conditional_branch);
 
-int create_branch(struct ppc_inst *instr,
-                 const struct ppc_inst *addr,
+int create_branch(struct ppc_inst *instr, const u32 *addr,
                  unsigned long target, int flags)
 {
        long offset;
@@ -278,7 +270,7 @@ int create_branch(struct ppc_inst *instr,
        return 0;
 }
 
-int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr,
+int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
                       unsigned long target, int flags)
 {
        long offset;
@@ -325,39 +317,39 @@ int instr_is_relative_link_branch(struct ppc_inst instr)
        return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
 }
 
-static unsigned long branch_iform_target(const struct ppc_inst *instr)
+static unsigned long branch_iform_target(const u32 *instr)
 {
        signed long imm;
 
-       imm = ppc_inst_val(*instr) & 0x3FFFFFC;
+       imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
 
        /* If the top bit of the immediate value is set this is negative */
        if (imm & 0x2000000)
                imm -= 0x4000000;
 
-       if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0)
+       if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
                imm += (unsigned long)instr;
 
        return (unsigned long)imm;
 }
 
-static unsigned long branch_bform_target(const struct ppc_inst *instr)
+static unsigned long branch_bform_target(const u32 *instr)
 {
        signed long imm;
 
-       imm = ppc_inst_val(*instr) & 0xFFFC;
+       imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
 
        /* If the top bit of the immediate value is set this is negative */
        if (imm & 0x8000)
                imm -= 0x10000;
 
-       if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0)
+       if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
                imm += (unsigned long)instr;
 
        return (unsigned long)imm;
 }
 
-unsigned long branch_target(const struct ppc_inst *instr)
+unsigned long branch_target(const u32 *instr)
 {
        if (instr_is_branch_iform(ppc_inst_read(instr)))
                return branch_iform_target(instr);
@@ -367,17 +359,7 @@ unsigned long branch_target(const struct ppc_inst *instr)
        return 0;
 }
 
-int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr)
-{
-       if (instr_is_branch_iform(ppc_inst_read(instr)) ||
-           instr_is_branch_bform(ppc_inst_read(instr)))
-               return branch_target(instr) == addr;
-
-       return 0;
-}
-
-int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest,
-                    const struct ppc_inst *src)
+int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src)
 {
        unsigned long target;
        target = branch_target(src);
@@ -404,12 +386,21 @@ void __patch_exception(int exc, unsigned long addr)
         * instruction of the exception, not the first one
         */
 
-       patch_branch((struct ppc_inst *)(ibase + (exc / 4) + 1), addr, 0);
+       patch_branch(ibase + (exc / 4) + 1, addr, 0);
 }
 #endif
 
 #ifdef CONFIG_CODE_PATCHING_SELFTEST
 
+static int instr_is_branch_to_addr(const u32 *instr, unsigned long addr)
+{
+       if (instr_is_branch_iform(ppc_inst_read(instr)) ||
+           instr_is_branch_bform(ppc_inst_read(instr)))
+               return branch_target(instr) == addr;
+
+       return 0;
+}
+
 static void __init test_trampoline(void)
 {
        asm ("nop;\n");
@@ -422,9 +413,9 @@ static void __init test_branch_iform(void)
 {
        int err;
        struct ppc_inst instr;
-       unsigned long addr;
-
-       addr = (unsigned long)&instr;
+       u32 tmp[2];
+       u32 *iptr = tmp;
+       unsigned long addr = (unsigned long)tmp;
 
        /* The simplest case, branch to self, no flags */
        check(instr_is_branch_iform(ppc_inst(0x48000000)));
@@ -445,63 +436,68 @@ static void __init test_branch_iform(void)
        check(!instr_is_branch_iform(ppc_inst(0x7bfffffd)));
 
        /* Absolute branch to 0x100 */
-       instr = ppc_inst(0x48000103);
-       check(instr_is_branch_to_addr(&instr, 0x100));
+       patch_instruction(iptr, ppc_inst(0x48000103));
+       check(instr_is_branch_to_addr(iptr, 0x100));
        /* Absolute branch to 0x420fc */
-       instr = ppc_inst(0x480420ff);
-       check(instr_is_branch_to_addr(&instr, 0x420fc));
+       patch_instruction(iptr, ppc_inst(0x480420ff));
+       check(instr_is_branch_to_addr(iptr, 0x420fc));
        /* Maximum positive relative branch, + 20MB - 4B */
-       instr = ppc_inst(0x49fffffc);
-       check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC));
+       patch_instruction(iptr, ppc_inst(0x49fffffc));
+       check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC));
        /* Smallest negative relative branch, - 4B */
-       instr = ppc_inst(0x4bfffffc);
-       check(instr_is_branch_to_addr(&instr, addr - 4));
+       patch_instruction(iptr, ppc_inst(0x4bfffffc));
+       check(instr_is_branch_to_addr(iptr, addr - 4));
        /* Largest negative relative branch, - 32 MB */
-       instr = ppc_inst(0x4a000000);
-       check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
+       patch_instruction(iptr, ppc_inst(0x4a000000));
+       check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
 
        /* Branch to self, with link */
-       err = create_branch(&instr, &instr, addr, BRANCH_SET_LINK);
-       check(instr_is_branch_to_addr(&instr, addr));
+       err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK);
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr));
 
        /* Branch to self - 0x100, with link */
-       err = create_branch(&instr, &instr, addr - 0x100, BRANCH_SET_LINK);
-       check(instr_is_branch_to_addr(&instr, addr - 0x100));
+       err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK);
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr - 0x100));
 
        /* Branch to self + 0x100, no link */
-       err = create_branch(&instr, &instr, addr + 0x100, 0);
-       check(instr_is_branch_to_addr(&instr, addr + 0x100));
+       err = create_branch(&instr, iptr, addr + 0x100, 0);
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr + 0x100));
 
        /* Maximum relative negative offset, - 32 MB */
-       err = create_branch(&instr, &instr, addr - 0x2000000, BRANCH_SET_LINK);
-       check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
+       err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK);
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
 
        /* Out of range relative negative offset, - 32 MB + 4*/
-       err = create_branch(&instr, &instr, addr - 0x2000004, BRANCH_SET_LINK);
+       err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK);
        check(err);
 
        /* Out of range relative positive offset, + 32 MB */
-       err = create_branch(&instr, &instr, addr + 0x2000000, BRANCH_SET_LINK);
+       err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK);
        check(err);
 
        /* Unaligned target */
-       err = create_branch(&instr, &instr, addr + 3, BRANCH_SET_LINK);
+       err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK);
        check(err);
 
        /* Check flags are masked correctly */
-       err = create_branch(&instr, &instr, addr, 0xFFFFFFFC);
-       check(instr_is_branch_to_addr(&instr, addr));
+       err = create_branch(&instr, iptr, addr, 0xFFFFFFFC);
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr));
        check(ppc_inst_equal(instr, ppc_inst(0x48000000)));
 }
 
 static void __init test_create_function_call(void)
 {
-       struct ppc_inst *iptr;
+       u32 *iptr;
        unsigned long dest;
        struct ppc_inst instr;
 
        /* Check we can create a function call */
-       iptr = (struct ppc_inst *)ppc_function_entry(test_trampoline);
+       iptr = (u32 *)ppc_function_entry(test_trampoline);
        dest = ppc_function_entry(test_create_function_call);
        create_branch(&instr, iptr, dest, BRANCH_SET_LINK);
        patch_instruction(iptr, instr);
@@ -512,10 +508,11 @@ static void __init test_branch_bform(void)
 {
        int err;
        unsigned long addr;
-       struct ppc_inst *iptr, instr;
+       struct ppc_inst instr;
+       u32 tmp[2];
+       u32 *iptr = tmp;
        unsigned int flags;
 
-       iptr = &instr;
        addr = (unsigned long)iptr;
 
        /* The simplest case, branch to self, no flags */
@@ -528,39 +525,43 @@ static void __init test_branch_bform(void)
        check(!instr_is_branch_bform(ppc_inst(0x7bffffff)));
 
        /* Absolute conditional branch to 0x100 */
-       instr = ppc_inst(0x43ff0103);
-       check(instr_is_branch_to_addr(&instr, 0x100));
+       patch_instruction(iptr, ppc_inst(0x43ff0103));
+       check(instr_is_branch_to_addr(iptr, 0x100));
        /* Absolute conditional branch to 0x20fc */
-       instr = ppc_inst(0x43ff20ff);
-       check(instr_is_branch_to_addr(&instr, 0x20fc));
+       patch_instruction(iptr, ppc_inst(0x43ff20ff));
+       check(instr_is_branch_to_addr(iptr, 0x20fc));
        /* Maximum positive relative conditional branch, + 32 KB - 4B */
-       instr = ppc_inst(0x43ff7ffc);
-       check(instr_is_branch_to_addr(&instr, addr + 0x7FFC));
+       patch_instruction(iptr, ppc_inst(0x43ff7ffc));
+       check(instr_is_branch_to_addr(iptr, addr + 0x7FFC));
        /* Smallest negative relative conditional branch, - 4B */
-       instr = ppc_inst(0x43fffffc);
-       check(instr_is_branch_to_addr(&instr, addr - 4));
+       patch_instruction(iptr, ppc_inst(0x43fffffc));
+       check(instr_is_branch_to_addr(iptr, addr - 4));
        /* Largest negative relative conditional branch, - 32 KB */
-       instr = ppc_inst(0x43ff8000);
-       check(instr_is_branch_to_addr(&instr, addr - 0x8000));
+       patch_instruction(iptr, ppc_inst(0x43ff8000));
+       check(instr_is_branch_to_addr(iptr, addr - 0x8000));
 
        /* All condition code bits set & link */
        flags = 0x3ff000 | BRANCH_SET_LINK;
 
        /* Branch to self */
        err = create_cond_branch(&instr, iptr, addr, flags);
-       check(instr_is_branch_to_addr(&instr, addr));
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr));
 
        /* Branch to self - 0x100 */
        err = create_cond_branch(&instr, iptr, addr - 0x100, flags);
-       check(instr_is_branch_to_addr(&instr, addr - 0x100));
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr - 0x100));
 
        /* Branch to self + 0x100 */
        err = create_cond_branch(&instr, iptr, addr + 0x100, flags);
-       check(instr_is_branch_to_addr(&instr, addr + 0x100));
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr + 0x100));
 
        /* Maximum relative negative offset, - 32 KB */
        err = create_cond_branch(&instr, iptr, addr - 0x8000, flags);
-       check(instr_is_branch_to_addr(&instr, addr - 0x8000));
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr - 0x8000));
 
        /* Out of range relative negative offset, - 32 KB + 4*/
        err = create_cond_branch(&instr, iptr, addr - 0x8004, flags);
@@ -576,7 +577,8 @@ static void __init test_branch_bform(void)
 
        /* Check flags are masked correctly */
        err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC);
-       check(instr_is_branch_to_addr(&instr, addr));
+       patch_instruction(iptr, instr);
+       check(instr_is_branch_to_addr(iptr, addr));
        check(ppc_inst_equal(instr, ppc_inst(0x43FF0000)));
 }
 
@@ -715,9 +717,9 @@ static void __init test_prefixed_patching(void)
        extern unsigned int code_patching_test1_expected[];
        extern unsigned int end_code_patching_test1[];
 
-       __patch_instruction((struct ppc_inst *)code_patching_test1,
+       __patch_instruction(code_patching_test1,
                            ppc_inst_prefix(OP_PREFIX << 26, 0x00000000),
-                           (struct ppc_inst *)code_patching_test1);
+                           code_patching_test1);
 
        check(!memcmp(code_patching_test1,
                      code_patching_test1_expected,
index 407b992fb02fd5fff78db6f590e3462963beb2cd..e834079d2b5c284987a5b8313f8a89b8f393f48b 100644 (file)
@@ -11,6 +11,6 @@ void override_function_with_return(struct pt_regs *regs)
         * function in the kernel/module, captured on a kprobe. We don't need
         * to worry about 32-bit userspace on a 64-bit kernel.
         */
-       regs->nip = regs->link;
+       regs_set_return_ip(regs, regs->link);
 }
 NOKPROBE_SYMBOL(override_function_with_return);
index fe26f2fa0f3f8b81fcefc87e092b24725ea50a67..cda17bee5afea451d5814f16ad32db629a7e0ae2 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/stop_machine.h>
 #include <asm/cputable.h>
 #include <asm/code-patching.h>
+#include <asm/interrupt.h>
 #include <asm/page.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -33,26 +34,25 @@ struct fixup_entry {
        long            alt_end_off;
 };
 
-static struct ppc_inst *calc_addr(struct fixup_entry *fcur, long offset)
+static u32 *calc_addr(struct fixup_entry *fcur, long offset)
 {
        /*
         * We store the offset to the code as a negative offset from
         * the start of the alt_entry, to support the VDSO. This
         * routine converts that back into an actual address.
         */
-       return (struct ppc_inst *)((unsigned long)fcur + offset);
+       return (u32 *)((unsigned long)fcur + offset);
 }
 
-static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest,
-                                struct ppc_inst *alt_start, struct ppc_inst *alt_end)
+static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end)
 {
        int err;
        struct ppc_inst instr;
 
        instr = ppc_inst_read(src);
 
-       if (instr_is_relative_branch(*src)) {
-               struct ppc_inst *target = (struct ppc_inst *)branch_target(src);
+       if (instr_is_relative_branch(ppc_inst_read(src))) {
+               u32 *target = (u32 *)branch_target(src);
 
                /* Branch within the section doesn't need translating */
                if (target < alt_start || target > alt_end) {
@@ -69,7 +69,7 @@ static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest,
 
 static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
 {
-       struct ppc_inst *start, *end, *alt_start, *alt_end, *src, *dest, nop;
+       u32 *start, *end, *alt_start, *alt_end, *src, *dest;
 
        start = calc_addr(fcur, fcur->start_off);
        end = calc_addr(fcur, fcur->end_off);
@@ -91,9 +91,8 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
                        return 1;
        }
 
-       nop = ppc_inst(PPC_INST_NOP);
-       for (; dest < end; dest = ppc_inst_next(dest, &nop))
-               raw_patch_instruction(dest, nop);
+       for (; dest < end; dest++)
+               raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP()));
 
        return 0;
 }
@@ -128,21 +127,21 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
        start = PTRRELOC(&__start___stf_entry_barrier_fixup);
        end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
 
-       instrs[0] = 0x60000000; /* nop */
-       instrs[1] = 0x60000000; /* nop */
-       instrs[2] = 0x60000000; /* nop */
+       instrs[0] = PPC_RAW_NOP();
+       instrs[1] = PPC_RAW_NOP();
+       instrs[2] = PPC_RAW_NOP();
 
        i = 0;
        if (types & STF_BARRIER_FALLBACK) {
-               instrs[i++] = 0x7d4802a6; /* mflr r10           */
-               instrs[i++] = 0x60000000; /* branch patched below */
-               instrs[i++] = 0x7d4803a6; /* mtlr r10           */
+               instrs[i++] = PPC_RAW_MFLR(_R10);
+               instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+               instrs[i++] = PPC_RAW_MTLR(_R10);
        } else if (types & STF_BARRIER_EIEIO) {
-               instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+               instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
        } else if (types & STF_BARRIER_SYNC_ORI) {
-               instrs[i++] = 0x7c0004ac; /* hwsync             */
-               instrs[i++] = 0xe94d0000; /* ld r10,0(r13)      */
-               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+               instrs[i++] = PPC_RAW_SYNC();
+               instrs[i++] = PPC_RAW_LD(_R10, _R13, 0);
+               instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
        }
 
        for (i = 0; start < end; start++, i++) {
@@ -152,14 +151,14 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
 
                // See comment in do_entry_flush_fixups() RE order of patching
                if (types & STF_BARRIER_FALLBACK) {
-                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
-                       patch_branch((struct ppc_inst *)(dest + 1),
+                       patch_instruction(dest, ppc_inst(instrs[0]));
+                       patch_instruction(dest + 2, ppc_inst(instrs[2]));
+                       patch_branch(dest + 1,
                                     (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
                } else {
-                       patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
-                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+                       patch_instruction(dest + 1, ppc_inst(instrs[1]));
+                       patch_instruction(dest + 2, ppc_inst(instrs[2]));
+                       patch_instruction(dest, ppc_inst(instrs[0]));
                }
        }
 
@@ -180,32 +179,31 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
        start = PTRRELOC(&__start___stf_exit_barrier_fixup);
        end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
 
-       instrs[0] = 0x60000000; /* nop */
-       instrs[1] = 0x60000000; /* nop */
-       instrs[2] = 0x60000000; /* nop */
-       instrs[3] = 0x60000000; /* nop */
-       instrs[4] = 0x60000000; /* nop */
-       instrs[5] = 0x60000000; /* nop */
+       instrs[0] = PPC_RAW_NOP();
+       instrs[1] = PPC_RAW_NOP();
+       instrs[2] = PPC_RAW_NOP();
+       instrs[3] = PPC_RAW_NOP();
+       instrs[4] = PPC_RAW_NOP();
+       instrs[5] = PPC_RAW_NOP();
 
        i = 0;
        if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
                if (cpu_has_feature(CPU_FTR_HVMODE)) {
-                       instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */
-                       instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */
+                       instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13);
+                       instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0);
                } else {
-                       instrs[i++] = 0x7db243a6; /* mtsprg 2,r13       */
-                       instrs[i++] = 0x7db142a6; /* mfsprg r13,1    */
+                       instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13);
+                       instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1);
                }
-               instrs[i++] = 0x7c0004ac; /* hwsync             */
-               instrs[i++] = 0xe9ad0000; /* ld r13,0(r13)      */
-               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
-               if (cpu_has_feature(CPU_FTR_HVMODE)) {
-                       instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */
-               } else {
-                       instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */
-               }
+               instrs[i++] = PPC_RAW_SYNC();
+               instrs[i++] = PPC_RAW_LD(_R13, _R13, 0);
+               instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+               if (cpu_has_feature(CPU_FTR_HVMODE))
+                       instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1);
+               else
+                       instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2);
        } else if (types & STF_BARRIER_EIEIO) {
-               instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+               instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
        }
 
        for (i = 0; start < end; start++, i++) {
@@ -213,12 +211,12 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
 
                pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-               patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
-               patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3]));
-               patch_instruction((struct ppc_inst *)(dest + 4), ppc_inst(instrs[4]));
-               patch_instruction((struct ppc_inst *)(dest + 5), ppc_inst(instrs[5]));
+               patch_instruction(dest, ppc_inst(instrs[0]));
+               patch_instruction(dest + 1, ppc_inst(instrs[1]));
+               patch_instruction(dest + 2, ppc_inst(instrs[2]));
+               patch_instruction(dest + 3, ppc_inst(instrs[3]));
+               patch_instruction(dest + 4, ppc_inst(instrs[4]));
+               patch_instruction(dest + 5, ppc_inst(instrs[5]));
        }
        printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
                (types == STF_BARRIER_NONE)                  ? "no" :
@@ -228,6 +226,9 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
                                                           : "unknown");
 }
 
+static bool stf_exit_reentrant = false;
+static bool rfi_exit_reentrant = false;
+
 static int __do_stf_barrier_fixups(void *data)
 {
        enum stf_barrier_type *types = data;
@@ -242,11 +243,27 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
 {
        /*
         * The call to the fallback entry flush, and the fallback/sync-ori exit
-        * flush can not be safely patched in/out while other CPUs are executing
-        * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
-        * spin in the stop machine core with interrupts hard disabled.
+        * flush can not be safely patched in/out while other CPUs are
+        * executing them. So call __do_stf_barrier_fixups() on one CPU while
+        * all other CPUs spin in the stop machine core with interrupts hard
+        * disabled.
+        *
+        * The branch to mark interrupt exits non-reentrant is enabled first,
+        * then stop_machine runs which will ensure all CPUs are out of the
+        * low level interrupt exit code before patching. After the patching,
+        * if allowed, then flip the branch to allow fast exits.
         */
+       static_branch_enable(&interrupt_exit_not_reentrant);
+
        stop_machine(__do_stf_barrier_fixups, &types, NULL);
+
+       if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI))
+               stf_exit_reentrant = false;
+       else
+               stf_exit_reentrant = true;
+
+       if (stf_exit_reentrant && rfi_exit_reentrant)
+               static_branch_disable(&interrupt_exit_not_reentrant);
 }
 
 void do_uaccess_flush_fixups(enum l1d_flush_type types)
@@ -258,35 +275,35 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
        start = PTRRELOC(&__start___uaccess_flush_fixup);
        end = PTRRELOC(&__stop___uaccess_flush_fixup);
 
-       instrs[0] = 0x60000000; /* nop */
-       instrs[1] = 0x60000000; /* nop */
-       instrs[2] = 0x60000000; /* nop */
-       instrs[3] = 0x4e800020; /* blr */
+       instrs[0] = PPC_RAW_NOP();
+       instrs[1] = PPC_RAW_NOP();
+       instrs[2] = PPC_RAW_NOP();
+       instrs[3] = PPC_RAW_BLR();
 
        i = 0;
        if (types == L1D_FLUSH_FALLBACK) {
-               instrs[3] = 0x60000000; /* nop */
+               instrs[3] = PPC_RAW_NOP();
                /* fallthrough to fallback flush */
        }
 
        if (types & L1D_FLUSH_ORI) {
-               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
-               instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+               instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+               instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
        }
 
        if (types & L1D_FLUSH_MTTRIG)
-               instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+               instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
 
        for (i = 0; start < end; start++, i++) {
                dest = (void *)start + *start;
 
                pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+               patch_instruction(dest, ppc_inst(instrs[0]));
 
-               patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
-               patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3]));
+               patch_instruction(dest + 1, ppc_inst(instrs[1]));
+               patch_instruction(dest + 2, ppc_inst(instrs[2]));
+               patch_instruction(dest + 3, ppc_inst(instrs[3]));
        }
 
        printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
@@ -306,24 +323,24 @@ static int __do_entry_flush_fixups(void *data)
        long *start, *end;
        int i;
 
-       instrs[0] = 0x60000000; /* nop */
-       instrs[1] = 0x60000000; /* nop */
-       instrs[2] = 0x60000000; /* nop */
+       instrs[0] = PPC_RAW_NOP();
+       instrs[1] = PPC_RAW_NOP();
+       instrs[2] = PPC_RAW_NOP();
 
        i = 0;
        if (types == L1D_FLUSH_FALLBACK) {
-               instrs[i++] = 0x7d4802a6; /* mflr r10           */
-               instrs[i++] = 0x60000000; /* branch patched below */
-               instrs[i++] = 0x7d4803a6; /* mtlr r10           */
+               instrs[i++] = PPC_RAW_MFLR(_R10);
+               instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+               instrs[i++] = PPC_RAW_MTLR(_R10);
        }
 
        if (types & L1D_FLUSH_ORI) {
-               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
-               instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+               instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+               instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
        }
 
        if (types & L1D_FLUSH_MTTRIG)
-               instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+               instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
 
        /*
         * If we're patching in or out the fallback flush we need to be careful about the
@@ -358,14 +375,14 @@ static int __do_entry_flush_fixups(void *data)
                pr_devel("patching dest %lx\n", (unsigned long)dest);
 
                if (types == L1D_FLUSH_FALLBACK) {
-                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
-                       patch_branch((struct ppc_inst *)(dest + 1),
+                       patch_instruction(dest, ppc_inst(instrs[0]));
+                       patch_instruction(dest + 2, ppc_inst(instrs[2]));
+                       patch_branch(dest + 1,
                                     (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
                } else {
-                       patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
-                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+                       patch_instruction(dest + 1, ppc_inst(instrs[1]));
+                       patch_instruction(dest + 2, ppc_inst(instrs[2]));
+                       patch_instruction(dest, ppc_inst(instrs[0]));
                }
        }
 
@@ -377,14 +394,14 @@ static int __do_entry_flush_fixups(void *data)
                pr_devel("patching dest %lx\n", (unsigned long)dest);
 
                if (types == L1D_FLUSH_FALLBACK) {
-                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
-                       patch_branch((struct ppc_inst *)(dest + 1),
+                       patch_instruction(dest, ppc_inst(instrs[0]));
+                       patch_instruction(dest + 2, ppc_inst(instrs[2]));
+                       patch_branch(dest + 1,
                                     (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
                } else {
-                       patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
-                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+                       patch_instruction(dest + 1, ppc_inst(instrs[1]));
+                       patch_instruction(dest + 2, ppc_inst(instrs[2]));
+                       patch_instruction(dest, ppc_inst(instrs[0]));
                }
        }
 
@@ -412,8 +429,9 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
        stop_machine(__do_entry_flush_fixups, &types, NULL);
 }
 
-void do_rfi_flush_fixups(enum l1d_flush_type types)
+static int __do_rfi_flush_fixups(void *data)
 {
+       enum l1d_flush_type types = *(enum l1d_flush_type *)data;
        unsigned int instrs[3], *dest;
        long *start, *end;
        int i;
@@ -421,31 +439,31 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
        start = PTRRELOC(&__start___rfi_flush_fixup);
        end = PTRRELOC(&__stop___rfi_flush_fixup);
 
-       instrs[0] = 0x60000000; /* nop */
-       instrs[1] = 0x60000000; /* nop */
-       instrs[2] = 0x60000000; /* nop */
+       instrs[0] = PPC_RAW_NOP();
+       instrs[1] = PPC_RAW_NOP();
+       instrs[2] = PPC_RAW_NOP();
 
        if (types & L1D_FLUSH_FALLBACK)
                /* b .+16 to fallback flush */
-               instrs[0] = 0x48000010;
+               instrs[0] = PPC_INST_BRANCH | 16;
 
        i = 0;
        if (types & L1D_FLUSH_ORI) {
-               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
-               instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+               instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+               instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
        }
 
        if (types & L1D_FLUSH_MTTRIG)
-               instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+               instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
 
        for (i = 0; start < end; start++, i++) {
                dest = (void *)start + *start;
 
                pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-               patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+               patch_instruction(dest, ppc_inst(instrs[0]));
+               patch_instruction(dest + 1, ppc_inst(instrs[1]));
+               patch_instruction(dest + 2, ppc_inst(instrs[2]));
        }
 
        printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
@@ -456,6 +474,29 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
                                                        : "ori type" :
                (types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
                                                : "unknown");
+
+       return 0;
+}
+
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+       /*
+        * stop_machine gets all CPUs out of the interrupt exit handler same
+        * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run
+        * without stop_machine, so this could be achieved with a broadcast
+        * IPI instead, but this matches the stf sequence.
+        */
+       static_branch_enable(&interrupt_exit_not_reentrant);
+
+       stop_machine(__do_rfi_flush_fixups, &types, NULL);
+
+       if (types & L1D_FLUSH_FALLBACK)
+               rfi_exit_reentrant = false;
+       else
+               rfi_exit_reentrant = true;
+
+       if (stf_exit_reentrant && rfi_exit_reentrant)
+               static_branch_disable(&interrupt_exit_not_reentrant);
 }
 
 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
@@ -467,18 +508,18 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
        start = fixup_start;
        end = fixup_end;
 
-       instr = 0x60000000; /* nop */
+       instr = PPC_RAW_NOP();
 
        if (enable) {
                pr_info("barrier-nospec: using ORI speculation barrier\n");
-               instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+               instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
        }
 
        for (i = 0; start < end; start++, i++) {
                dest = (void *)start + *start;
 
                pr_devel("patching dest %lx\n", (unsigned long)dest);
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instr));
+               patch_instruction(dest, ppc_inst(instr));
        }
 
        printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
@@ -508,21 +549,21 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
        start = fixup_start;
        end = fixup_end;
 
-       instr[0] = PPC_INST_NOP;
-       instr[1] = PPC_INST_NOP;
+       instr[0] = PPC_RAW_NOP();
+       instr[1] = PPC_RAW_NOP();
 
        if (enable) {
                pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
-               instr[0] = PPC_INST_ISYNC;
-               instr[1] = PPC_INST_SYNC;
+               instr[0] = PPC_RAW_ISYNC();
+               instr[1] = PPC_RAW_SYNC();
        }
 
        for (i = 0; start < end; start++, i++) {
                dest = (void *)start + *start;
 
                pr_devel("patching dest %lx\n", (unsigned long)dest);
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instr[0]));
-               patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instr[1]));
+               patch_instruction(dest, ppc_inst(instr[0]));
+               patch_instruction(dest + 1, ppc_inst(instr[1]));
        }
 
        printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
@@ -536,7 +577,7 @@ static void patch_btb_flush_section(long *curr)
        end = (void *)curr + *(curr + 1);
        for (; start < end; start++) {
                pr_devel("patching dest %lx\n", (unsigned long)start);
-               patch_instruction((struct ppc_inst *)start, ppc_inst(PPC_INST_NOP));
+               patch_instruction(start, ppc_inst(PPC_RAW_NOP()));
        }
 }
 
@@ -555,7 +596,7 @@ void do_btb_flush_fixups(void)
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
        long *start, *end;
-       struct ppc_inst *dest;
+       u32 *dest;
 
        if (!(value & CPU_FTR_LWSYNC))
                return ;
@@ -572,13 +613,14 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 static void do_final_fixups(void)
 {
 #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
-       struct ppc_inst inst, *src, *dest, *end;
+       struct ppc_inst inst;
+       u32 *src, *dest, *end;
 
        if (PHYSICAL_START == 0)
                return;
 
-       src = (struct ppc_inst *)(KERNELBASE + PHYSICAL_START);
-       dest = (struct ppc_inst *)KERNELBASE;
+       src = (u32 *)(KERNELBASE + PHYSICAL_START);
+       dest = (u32 *)KERNELBASE;
        end = (void *)src + (__end_interrupts - _stext);
 
        while (src < end) {
diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c
new file mode 100644 (file)
index 0000000..bccb662
--- /dev/null
@@ -0,0 +1,56 @@
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+
+struct soft_mask_table_entry {
+       unsigned long start;
+       unsigned long end;
+};
+
+struct restart_table_entry {
+       unsigned long start;
+       unsigned long end;
+       unsigned long fixup;
+};
+
+extern struct soft_mask_table_entry __start___soft_mask_table[];
+extern struct soft_mask_table_entry __stop___soft_mask_table[];
+
+extern struct restart_table_entry __start___restart_table[];
+extern struct restart_table_entry __stop___restart_table[];
+
+/* Given an address, look for it in the soft mask table */
+bool search_kernel_soft_mask_table(unsigned long addr)
+{
+       struct soft_mask_table_entry *smte = __start___soft_mask_table;
+
+       while (smte < __stop___soft_mask_table) {
+               unsigned long start = smte->start;
+               unsigned long end = smte->end;
+
+               if (addr >= start && addr < end)
+                       return true;
+
+               smte++;
+       }
+       return false;
+}
+NOKPROBE_SYMBOL(search_kernel_soft_mask_table);
+
+/* Given an address, look for it in the kernel exception table */
+unsigned long search_kernel_restart_table(unsigned long addr)
+{
+       struct restart_table_entry *rte = __start___restart_table;
+
+       while (rte < __stop___restart_table) {
+               unsigned long start = rte->start;
+               unsigned long end = rte->end;
+               unsigned long fixup = rte->fixup;
+
+               if (addr >= start && addr < end)
+                       return fixup;
+
+               rte++;
+       }
+       return 0;
+}
+NOKPROBE_SYMBOL(search_kernel_restart_table);
index 45bda25207557796cdd76f7b0d6e8eeb0815a489..d8d5f901cee1ca29e07c32672d63fc41ddf0a3e9 100644 (file)
@@ -1700,6 +1700,28 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
                        op->val = regs->ccr & imm;
                        goto compute_done;
 
+               case 128:       /* setb */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
+                       /*
+                        * 'ra' encodes the CR field number (bfa) in the top 3 bits.
+                        * Since each CR field is 4 bits,
+                        * we can simply mask off the bottom two bits (bfa * 4)
+                        * to yield the first bit in the CR field.
+                        */
+                       ra = ra & ~0x3;
+                       /* 'val' stores bits of the CR field (bfa) */
+                       val = regs->ccr >> (CR0_SHIFT - ra);
+                       /* checks if the LT bit of CR field (bfa) is set */
+                       if (val & 8)
+                               op->val = -1;
+                       /* checks if the GT bit of CR field (bfa) is set */
+                       else if (val & 4)
+                               op->val = 1;
+                       else
+                               op->val = 0;
+                       goto compute_done;
+
                case 144:       /* mtcrf */
                        op->type = COMPUTE + SETCC;
                        imm = 0xf0000000UL;
@@ -3203,7 +3225,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
        default:
                WARN_ON_ONCE(1);
        }
-       regs->nip = next_pc;
+       regs_set_return_ip(regs, next_pc);
 }
 NOKPROBE_SYMBOL(emulate_update_regs);
 
@@ -3541,7 +3563,7 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
                        /* can't step mtmsr[d] that would clear MSR_RI */
                        return -1;
                /* here op.val is the mask of bits to change */
-               regs->msr = (regs->msr & ~op.val) | (val & op.val);
+               regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val));
                goto instr_done;
 
 #ifdef CONFIG_PPC64
@@ -3554,7 +3576,7 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
                if (IS_ENABLED(CONFIG_PPC_FAST_ENDIAN_SWITCH) &&
                                cpu_has_feature(CPU_FTR_REAL_LE) &&
                                regs->gpr[0] == 0x1ebe) {
-                       regs->msr ^= MSR_LE;
+                       regs_set_return_msr(regs, regs->msr ^ MSR_LE);
                        goto instr_done;
                }
                regs->gpr[9] = regs->gpr[13];
@@ -3562,8 +3584,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
                regs->gpr[11] = regs->nip + 4;
                regs->gpr[12] = regs->msr & MSR_MASK;
                regs->gpr[13] = (unsigned long) get_paca();
-               regs->nip = (unsigned long) &system_call_common;
-               regs->msr = MSR_KERNEL;
+               regs_set_return_ip(regs, (unsigned long) &system_call_common);
+               regs_set_return_msr(regs, MSR_KERNEL);
                return 1;
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -3573,8 +3595,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
                regs->gpr[11] = regs->nip + 4;
                regs->gpr[12] = regs->msr & MSR_MASK;
                regs->gpr[13] = (unsigned long) get_paca();
-               regs->nip = (unsigned long) &system_call_vectored_emulate;
-               regs->msr = MSR_KERNEL;
+               regs_set_return_ip(regs, (unsigned long) &system_call_vectored_emulate);
+               regs_set_return_msr(regs, MSR_KERNEL);
                return 1;
 #endif
 
@@ -3585,7 +3607,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
        return 0;
 
  instr_done:
-       regs->nip = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type));
+       regs_set_return_ip(regs,
+               truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)));
        return 1;
 }
 NOKPROBE_SYMBOL(emulate_step);
index 783d1b85ecfe3fd7055d3d2323004e3d99dcbb59..8b4f6b3e96c4ee307b06ac90120ef7ce20707cbb 100644 (file)
@@ -53,6 +53,8 @@
        ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
                        PPC_RAW_ADDI(t, a, i))
 
+#define TEST_SETB(t, bfa)       ppc_inst(PPC_INST_SETB | ___PPC_RT(t) | ___PPC_RA((bfa & 0x7) << 2))
+
 
 static void __init init_pt_regs(struct pt_regs *regs)
 {
@@ -824,8 +826,7 @@ static void __init test_plxvp_pstxvp(void)
         * XTp = 32xTX + 2xTp
         * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1
         */
-       instr = ppc_inst_prefix(PPC_RAW_PLXVP(34, 0, 3, 0) >> 32,
-                       PPC_RAW_PLXVP(34, 0, 3, 0) & 0xffffffff);
+       instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0));
 
        stepped = emulate_step(&regs, instr);
        if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
@@ -853,8 +854,7 @@ static void __init test_plxvp_pstxvp(void)
         * XSp = 32xSX + 2xSp
         * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1
         */
-       instr = ppc_inst_prefix(PPC_RAW_PSTXVP(34, 0, 3, 0) >> 32,
-                       PPC_RAW_PSTXVP(34, 0, 3, 0) & 0xffffffff);
+       instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0));
 
        stepped = emulate_step(&regs, instr);
 
@@ -922,13 +922,40 @@ static struct compute_test compute_tests[] = {
                .subtests = {
                        {
                                .descr = "R0 = LONG_MAX",
-                               .instr = ppc_inst(PPC_INST_NOP),
+                               .instr = ppc_inst(PPC_RAW_NOP()),
                                .regs = {
                                        .gpr[0] = LONG_MAX,
                                }
                        }
                }
        },
+       {
+               .mnemonic = "setb",
+               .cpu_feature = CPU_FTR_ARCH_300,
+               .subtests = {
+                       {
+                               .descr = "BFA = 1, CR = GT",
+                               .instr = TEST_SETB(20, 1),
+                               .regs = {
+                                       .ccr = 0x4000000,
+                               }
+                       },
+                       {
+                               .descr = "BFA = 4, CR = LT",
+                               .instr = TEST_SETB(20, 4),
+                               .regs = {
+                                       .ccr = 0x8000,
+                               }
+                       },
+                       {
+                               .descr = "BFA = 5, CR = EQ",
+                               .instr = TEST_SETB(20, 5),
+                               .regs = {
+                                       .ccr = 0x200,
+                               }
+                       }
+               }
+       },
        {
                .mnemonic = "add",
                .subtests = {
@@ -1582,6 +1609,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs,
        if (!regs || !ppc_inst_val(instr))
                return -EINVAL;
 
+       /* This is not a return frame regs */
        regs->nip = patch_site_addr(&patch__exec_instr);
 
        analysed = analyse_instr(&op, regs, instr);
index 327165f26ca6a66f732e5ac2af0c59904054f1de..36761bd00f385d2d1256ec24e40b4f5ebe766f9b 100644 (file)
@@ -453,7 +453,7 @@ do_mathemu(struct pt_regs *regs)
                break;
        }
 
-       regs->nip += 4;
+       regs_add_return_ip(regs, 4);
        return 0;
 
 illegal:
index 0a05e51964c1098e169f69412da07edec4d6363a..39b84e7452e1b4560f3d9b5347ec85326e446285 100644 (file)
@@ -710,7 +710,7 @@ update_regs:
 illegal:
        if (have_e500_cpu_a005_erratum) {
                /* according to e500 cpu a005 erratum, reissue efp inst */
-               regs->nip -= 4;
+               regs_add_return_ip(regs, -4);
                pr_debug("re-issue efp inst: %08lx\n", speinsn);
                return 0;
        }
index 2ffcf540f08bfd94709843e99d772812ba1d7bff..eae4ec2988fc5cfa84c69297c4fae8a20384de10 100644 (file)
@@ -5,7 +5,7 @@
 
 ccflags-$(CONFIG_PPC64)        := $(NO_MINIMAL_TOC)
 
-obj-y                          := fault.o mem.o pgtable.o mmap.o maccess.o \
+obj-y                          := fault.o mem.o pgtable.o mmap.o maccess.o pageattr.o \
                                   init_$(BITS).o pgtable_$(BITS).o \
                                   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
                                   init-common.o mmu_context.o drmem.o \
index 7f0c8a78ba0c08f38eba1cc2fb04a88a89c07be5..15f4773643d2198f85757fda5fe653b51473a6ac 100644 (file)
@@ -10,3 +10,4 @@ obj-y += mmu.o mmu_context.o
 obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
 obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
 obj-$(CONFIG_PPC_KUEP) += kuep.o
+obj-$(CONFIG_PPC_KUAP) += kuap.o
index fb4233a5bdf7d791650cb79a15ca99e2e557ba2b..6925ce9985571e2fb9d138fe5a2e86ea48f26f07 100644 (file)
 #include <asm/code-patching-asm.h>
 
 #ifdef CONFIG_PTE_64BIT
+#define PTE_T_SIZE             8
 #define PTE_FLAGS_OFFSET       4       /* offset of PTE flags, in bytes */
 #else
+#define PTE_T_SIZE             4
 #define PTE_FLAGS_OFFSET       0
 #endif
 
@@ -488,7 +490,7 @@ _GLOBAL(flush_hash_pages)
        bne     2f
        ble     cr1,19f
        addi    r4,r4,0x1000
-       addi    r5,r5,PTE_SIZE
+       addi    r5,r5,PTE_T_SIZE
        addi    r6,r6,-1
        b       1b
 
@@ -573,7 +575,7 @@ _GLOBAL(flush_hash_pages)
 
 8:     ble     cr1,9f                  /* if all ptes checked */
 81:    addi    r6,r6,-1
-       addi    r5,r5,PTE_SIZE
+       addi    r5,r5,PTE_T_SIZE
        addi    r4,r4,0x1000
        lwz     r0,0(r5)                /* check next pte */
        cmpwi   cr1,r6,1
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
new file mode 100644 (file)
index 0000000..0f920f0
--- /dev/null
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+struct static_key_false disable_kuap_key;
+EXPORT_SYMBOL(disable_kuap_key);
+
+void kuap_lock_all_ool(void)
+{
+       kuap_lock_all();
+}
+EXPORT_SYMBOL(kuap_lock_all_ool);
+
+void kuap_unlock_all_ool(void)
+{
+       kuap_unlock_all();
+}
+EXPORT_SYMBOL(kuap_unlock_all_ool);
+
+void setup_kuap(bool disabled)
+{
+       if (!disabled)
+               kuap_lock_all_ool();
+
+       if (smp_processor_id() != boot_cpuid)
+               return;
+
+       if (disabled)
+               static_branch_enable(&disable_kuap_key);
+       else
+               pr_info("Activating Kernel Userspace Access Protection\n");
+}
index 8ed1b8634839736bd9e8c4a7359c245ee87c9bb2..c20733d6e02cb6013b847f2a7ba315b480984582 100644 (file)
@@ -1,40 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <asm/kup.h>
-#include <asm/reg.h>
-#include <asm/task_size_32.h>
-#include <asm/mmu.h>
+#include <asm/smp.h>
 
-#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do {          \
-       if (TASK_SIZE > ((n) << 28))                    \
-               mtsr(val1, (n) << 28);                  \
-       if (TASK_SIZE > (((n) + 1) << 28))              \
-               mtsr(val2, ((n) + 1) << 28);            \
-       val1 = (val1 + 0x222) & 0xf0ffffff;             \
-       val2 = (val2 + 0x222) & 0xf0ffffff;             \
-} while (0)
+struct static_key_false disable_kuep_key;
 
-static __always_inline void kuep_update(u32 val)
+void setup_kuep(bool disabled)
 {
-       int val1 = val;
-       int val2 = (val + 0x111) & 0xf0ffffff;
+       if (!disabled)
+               kuep_lock();
 
-       KUEP_UPDATE_TWO_USER_SEGMENTS(0);
-       KUEP_UPDATE_TWO_USER_SEGMENTS(2);
-       KUEP_UPDATE_TWO_USER_SEGMENTS(4);
-       KUEP_UPDATE_TWO_USER_SEGMENTS(6);
-       KUEP_UPDATE_TWO_USER_SEGMENTS(8);
-       KUEP_UPDATE_TWO_USER_SEGMENTS(10);
-       KUEP_UPDATE_TWO_USER_SEGMENTS(12);
-       KUEP_UPDATE_TWO_USER_SEGMENTS(14);
-}
+       if (smp_processor_id() != boot_cpuid)
+               return;
 
-void kuep_lock(void)
-{
-       kuep_update(mfsr(0) | SR_NX);
-}
-
-void kuep_unlock(void)
-{
-       kuep_update(mfsr(0) & ~SR_NX);
+       if (disabled)
+               static_branch_enable(&disable_kuep_key);
+       else
+               pr_info("Activating Kernel Userspace Execution Prevention\n");
 }
index 159930351d9f960b456279beaffa45ce7418a84d..27061583a0107008670d2cc7a300347621654a4d 100644 (file)
@@ -445,26 +445,6 @@ void __init print_system_hash_info(void)
                pr_info("Hash_mask         = 0x%lx\n", Hash_mask);
 }
 
-#ifdef CONFIG_PPC_KUEP
-void __init setup_kuep(bool disabled)
-{
-       pr_info("Activating Kernel Userspace Execution Prevention\n");
-
-       if (disabled)
-               pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
-}
-#endif
-
-#ifdef CONFIG_PPC_KUAP
-void __init setup_kuap(bool disabled)
-{
-       pr_info("Activating Kernel Userspace Access Protection\n");
-
-       if (disabled)
-               pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n");
-}
-#endif
-
 void __init early_init_mmu(void)
 {
 }
index 218996e40a8eca33d06c33731159e19e4f721573..e2708e387dc39351aa4ef0ff07cf70cf08f6ee02 100644 (file)
 
 #include <asm/mmu_context.h>
 
+/*
+ * Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+void *abatron_pteptrs[2];
+
 /*
  * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
  * (virtual segment identifiers) for each context.  Although the
 #define LAST_CONTEXT           32767
 #define FIRST_CONTEXT          1
 
-/*
- * This function defines the mapping from contexts to VSIDs (virtual
- * segment IDs).  We use a skew on both the context and the high 4 bits
- * of the 32-bit virtual address (the "effective segment ID") in order
- * to spread out the entries in the MMU hash table.  Note, if this
- * function is changed then arch/ppc/mm/hashtable.S will have to be
- * changed to correspond.
- *
- *
- * CTX_TO_VSID(ctx, va)        (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \
- *                              & 0xffffff)
- */
-
 static unsigned long next_mmu_context;
 static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
 
@@ -111,3 +104,32 @@ void __init mmu_context_init(void)
        context_map[0] = (1 << FIRST_CONTEXT) - 1;
        next_mmu_context = FIRST_CONTEXT;
 }
+
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+{
+       long id = next->context.id;
+       unsigned long val;
+
+       if (id < 0)
+               panic("mm_struct %p has no context ID", next);
+
+       isync();
+
+       val = CTX_TO_VSID(id, 0);
+       if (!kuep_is_disabled())
+               val |= SR_NX;
+       if (!kuap_is_disabled())
+               val |= SR_KS;
+
+       update_user_segments(val);
+
+       if (IS_ENABLED(CONFIG_BDI_SWITCH))
+               abatron_pteptrs[1] = next->pgd;
+
+       if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+               mtspr(SPRN_SDR1, rol32(__pa(next->pgd), 4) & 0xffff01ff);
+
+       mb();   /* sync */
+       isync();
+}
+EXPORT_SYMBOL(switch_mmu_context);
index 96d9aa1640073cc191ed495fad014807f5c32124..ac5720371c0d994bc4f002836ab74c9da99a472c 100644 (file)
@@ -1522,8 +1522,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
 }
 EXPORT_SYMBOL_GPL(hash_page);
 
-DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault);
-DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
+DECLARE_INTERRUPT_HANDLER(__do_hash_fault);
+DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
 {
        unsigned long ea = regs->dar;
        unsigned long dsisr = regs->dsisr;
@@ -1533,6 +1533,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
        unsigned int region_id;
        long err;
 
+       if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
+               hash__do_page_fault(regs);
+               return;
+       }
+
        region_id = get_region_id(ea);
        if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
                mm = &init_mm;
@@ -1571,9 +1576,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
                        bad_page_fault(regs, SIGBUS);
                }
                err = 0;
-       }
 
-       return err;
+       } else if (err) {
+               hash__do_page_fault(regs);
+       }
 }
 
 /*
@@ -1582,13 +1588,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
  */
 DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
 {
-       unsigned long dsisr = regs->dsisr;
-
-       if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
-               hash__do_page_fault(regs);
-               return 0;
-       }
-
        /*
         * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
         * don't call hash_page, just fail the fault. This is required to
@@ -1607,8 +1606,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
                return 0;
        }
 
-       if (__do_hash_fault(regs))
-               hash__do_page_fault(regs);
+       __do_hash_fault(regs);
 
        return 0;
 }
index 318ec4f336611257bc38f7e12640e33ed882d930..ae12fb5559cbbd0bf0d2e07a2d4cd6e890a39dd4 100644 (file)
@@ -354,22 +354,30 @@ static inline void fixup_tlbie_lpid(unsigned long lpid)
 /*
  * We use 128 set in radix mode and 256 set in hpt mode.
  */
-static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 {
        int set;
 
        asm volatile("ptesync": : :"memory");
 
-       /*
-        * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
-        * also flush the entire Page Walk Cache.
-        */
-       __tlbiel_pid(pid, 0, ric);
+       switch (ric) {
+       case RIC_FLUSH_PWC:
 
-       /* For PWC, only one flush is needed */
-       if (ric == RIC_FLUSH_PWC) {
+               /* For PWC, only one flush is needed */
+               __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
                ppc_after_tlbiel_barrier();
                return;
+       case RIC_FLUSH_TLB:
+               __tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
+               break;
+       case RIC_FLUSH_ALL:
+       default:
+               /*
+                * Flush the first set of the TLB, and if
+                * we're doing a RIC_FLUSH_ALL, also flush
+                * the entire Page Walk Cache.
+                */
+               __tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
        }
 
        if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
@@ -1290,7 +1298,7 @@ void radix__tlb_flush(struct mmu_gather *tlb)
        }
 }
 
-static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
+static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
                                unsigned long start, unsigned long end,
                                int psize, bool also_pwc)
 {
index 743e11384deab8b1fa8bc66d40d8351164abd200..9d13143b8be496a8625f5b3ba7a76e86d2d15d71 100644 (file)
@@ -70,10 +70,10 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
         */
        pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
 
-       err = early_ioremap_range(ioremap_bot - size, p, size, prot);
+       err = early_ioremap_range(ioremap_bot - size - PAGE_SIZE, p, size, prot);
        if (err)
                return NULL;
-       ioremap_bot -= size;
+       ioremap_bot -= size + PAGE_SIZE;
 
        return (void __iomem *)ioremap_bot + offset;
 }
index ba5cbb0d66bd6bc366ba5a3949cb82fb15f71bd6..3acece00b33e87680d83b37f9c16f69a5940e621 100644 (file)
@@ -38,7 +38,7 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
                return NULL;
 
        ret = (void __iomem *)ioremap_bot + offset;
-       ioremap_bot += size;
+       ioremap_bot += size + PAGE_SIZE;
 
        return ret;
 }
index a3c30a8840768a6edc8d39d7c7cf6cb6a7bbe7f2..aad7c47e0030905891988f5e95ce28cd19780522 100644 (file)
@@ -12,7 +12,7 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
        return is_kernel_addr((unsigned long)unsafe_src);
 }
 
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src)
+int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src)
 {
        unsigned int val, suffix;
        int err;
@@ -21,7 +21,7 @@ int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src)
        if (err)
                return err;
        if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
-               err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4);
+               err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix));
                *inst = ppc_inst_prefix(val, suffix);
        } else {
                *inst = ppc_inst(val);
index c5e520c6f13b5c90bf7f4aebbfb906420d60f284..ad198b4392224e8c716be33c59c9a6f3842bb029 100644 (file)
@@ -28,6 +28,9 @@
 unsigned long long memory_limit;
 bool init_mem_is_free;
 
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
                              unsigned long size, pgprot_t vma_prot)
 {
@@ -299,6 +302,10 @@ void __init mem_init(void)
                        ioremap_bot, IOREMAP_TOP);
        pr_info("  * 0x%08lx..0x%08lx  : vmalloc & ioremap\n",
                VMALLOC_START, VMALLOC_END);
+#ifdef MODULES_VADDR
+       pr_info("  * 0x%08lx..0x%08lx  : modules\n",
+               MODULES_VADDR, MODULES_END);
+#endif
 #endif /* CONFIG_PPC32 */
 }
 
index 3d6ae7c72412db31a6513251b766faa86650e1fe..e079f26b267ec19be8eb176e06fc97e38bb8e359 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 #include <asm/code-patching.h>
+#include <asm/smp.h>
 
 #include <mm/mmu_decl.h>
 
@@ -239,3 +240,19 @@ void __init mmu_init_secondary(int cpu)
        }
 }
 #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled)
+{
+       if (smp_processor_id() != boot_cpuid)
+               return;
+
+       if (disabled)
+               patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP()));
+       else
+               pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+       if (IS_ENABLED(CONFIG_PPC_47x) && disabled)
+               patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP()));
+}
+#endif
index 71bfdbedacee81af869887f8d5329f7a35e2b099..60780e08911817a0983ce9ccf9f8f11d6e879538 100644 (file)
@@ -212,37 +212,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
        memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M));
 }
 
-/*
- * Set up to use a given MMU context.
- * id is context number, pgd is PGD pointer.
- *
- * We place the physical address of the new task page directory loaded
- * into the MMU base register, and set the ASID compare register with
- * the new "context."
- */
-void set_context(unsigned long id, pgd_t *pgd)
-{
-       s16 offset = (s16)(__pa(swapper_pg_dir));
-
-       /* Context switch the PTE pointer for the Abatron BDI2000.
-        * The PGDIR is passed as second argument.
-        */
-       if (IS_ENABLED(CONFIG_BDI_SWITCH))
-               abatron_pteptrs[1] = pgd;
-
-       /* Register M_TWB will contain base address of level 1 table minus the
-        * lower part of the kernel PGDIR base address, so that all accesses to
-        * level 1 table are done relative to lower part of kernel PGDIR base
-        * address.
-        */
-       mtspr(SPRN_M_TWB, __pa(pgd) - offset);
-
-       /* Update context */
-       mtspr(SPRN_M_CASID, id - 1);
-       /* sync */
-       mb();
-}
-
 #ifdef CONFIG_PPC_KUEP
 void __init setup_kuep(bool disabled)
 {
@@ -256,12 +225,17 @@ void __init setup_kuep(bool disabled)
 #endif
 
 #ifdef CONFIG_PPC_KUAP
+struct static_key_false disable_kuap_key;
+EXPORT_SYMBOL(disable_kuap_key);
+
 void __init setup_kuap(bool disabled)
 {
-       pr_info("Activating Kernel Userspace Access Protection\n");
+       if (disabled) {
+               static_branch_enable(&disable_kuap_key);
+               return;
+       }
 
-       if (disabled)
-               pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n");
+       pr_info("Activating Kernel Userspace Access Protection\n");
 
        mtspr(SPRN_MD_AP, MD_APG_KUAP);
 }
index aac81c9f84a5c5b8b62c49ba3d3aed8c1aad3d05..44b2b5e7cabe94eb84c559e36a7010237b6aa96b 100644 (file)
  *     also clear mm->cpu_vm_mask bits when processes are migrated
  */
 
-//#define DEBUG_MAP_CONSISTENCY
-//#define DEBUG_CLAMP_LAST_CONTEXT   31
-//#define DEBUG_HARDER
-
-/* We don't use DEBUG because it tends to be compiled in always nowadays
- * and this would generate way too much output
- */
-#ifdef DEBUG_HARDER
-#define pr_hard(args...)       printk(KERN_DEBUG args)
-#define pr_hardcont(args...)   printk(KERN_CONT args)
-#else
-#define pr_hard(args...)       do { } while(0)
-#define pr_hardcont(args...)   do { } while(0)
-#endif
-
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
+#include <asm/smp.h>
 
 #include <mm/mmu_decl.h>
 
+/*
+ * Room for two PTE table pointers, usually the kernel and current user
+ * pointer to their respective root page table (pgdir).
+ */
+void *abatron_pteptrs[2];
+
 /*
  * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
  * A better way would be to keep track of tasks that own contexts, and implement
@@ -68,9 +60,7 @@
  * -- BenH
  */
 #define FIRST_CONTEXT 1
-#ifdef DEBUG_CLAMP_LAST_CONTEXT
-#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT
-#elif defined(CONFIG_PPC_8xx)
+#if defined(CONFIG_PPC_8xx)
 #define LAST_CONTEXT 16
 #elif defined(CONFIG_PPC_47x)
 #define LAST_CONTEXT 65535
@@ -80,9 +70,7 @@
 
 static unsigned int next_context, nr_free_contexts;
 static unsigned long *context_map;
-#ifdef CONFIG_SMP
 static unsigned long *stale_map[NR_CPUS];
-#endif
 static struct mm_struct **context_mm;
 static DEFINE_RAW_SPINLOCK(context_lock);
 
@@ -105,7 +93,6 @@ static DEFINE_RAW_SPINLOCK(context_lock);
  * the stale map as we can just flush the local CPU
  *  -- benh
  */
-#ifdef CONFIG_SMP
 static unsigned int steal_context_smp(unsigned int id)
 {
        struct mm_struct *mm;
@@ -127,7 +114,6 @@ static unsigned int steal_context_smp(unsigned int id)
                                id = FIRST_CONTEXT;
                        continue;
                }
-               pr_hardcont(" | steal %d from 0x%p", id, mm);
 
                /* Mark this mm has having no context anymore */
                mm->context.id = MMU_NO_CONTEXT;
@@ -158,34 +144,25 @@ static unsigned int steal_context_smp(unsigned int id)
        /* This will cause the caller to try again */
        return MMU_NO_CONTEXT;
 }
-#endif  /* CONFIG_SMP */
 
 static unsigned int steal_all_contexts(void)
 {
        struct mm_struct *mm;
-#ifdef CONFIG_SMP
        int cpu = smp_processor_id();
-#endif
        unsigned int id;
 
        for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
                /* Pick up the victim mm */
                mm = context_mm[id];
 
-               pr_hardcont(" | steal %d from 0x%p", id, mm);
-
                /* Mark this mm as having no context anymore */
                mm->context.id = MMU_NO_CONTEXT;
                if (id != FIRST_CONTEXT) {
                        context_mm[id] = NULL;
                        __clear_bit(id, context_map);
-#ifdef DEBUG_MAP_CONSISTENCY
-                       mm->context.active = 0;
-#endif
                }
-#ifdef CONFIG_SMP
-               __clear_bit(id, stale_map[cpu]);
-#endif
+               if (IS_ENABLED(CONFIG_SMP))
+                       __clear_bit(id, stale_map[cpu]);
        }
 
        /* Flush the TLB for all contexts (not to be used on SMP) */
@@ -204,15 +181,11 @@ static unsigned int steal_all_contexts(void)
 static unsigned int steal_context_up(unsigned int id)
 {
        struct mm_struct *mm;
-#ifdef CONFIG_SMP
        int cpu = smp_processor_id();
-#endif
 
        /* Pick up the victim mm */
        mm = context_mm[id];
 
-       pr_hardcont(" | steal %d from 0x%p", id, mm);
-
        /* Flush the TLB for that context */
        local_flush_tlb_mm(mm);
 
@@ -220,81 +193,64 @@ static unsigned int steal_context_up(unsigned int id)
        mm->context.id = MMU_NO_CONTEXT;
 
        /* XXX This clear should ultimately be part of local_flush_tlb_mm */
-#ifdef CONFIG_SMP
-       __clear_bit(id, stale_map[cpu]);
-#endif
+       if (IS_ENABLED(CONFIG_SMP))
+               __clear_bit(id, stale_map[cpu]);
 
        return id;
 }
 
-#ifdef DEBUG_MAP_CONSISTENCY
-static void context_check_map(void)
+static void set_context(unsigned long id, pgd_t *pgd)
 {
-       unsigned int id, nrf, nact;
+       if (IS_ENABLED(CONFIG_PPC_8xx)) {
+               s16 offset = (s16)(__pa(swapper_pg_dir));
+
+               /*
+                * Register M_TWB will contain base address of level 1 table minus the
+                * lower part of the kernel PGDIR base address, so that all accesses to
+                * level 1 table are done relative to lower part of kernel PGDIR base
+                * address.
+                */
+               mtspr(SPRN_M_TWB, __pa(pgd) - offset);
 
-       nrf = nact = 0;
-       for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
-               int used = test_bit(id, context_map);
-               if (!used)
-                       nrf++;
-               if (used != (context_mm[id] != NULL))
-                       pr_err("MMU: Context %d is %s and MM is %p !\n",
-                              id, used ? "used" : "free", context_mm[id]);
-               if (context_mm[id] != NULL)
-                       nact += context_mm[id]->context.active;
-       }
-       if (nrf != nr_free_contexts) {
-               pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
-                      nr_free_contexts, nrf);
-               nr_free_contexts = nrf;
+               /* Update context */
+               mtspr(SPRN_M_CASID, id - 1);
+
+               /* sync */
+               mb();
+       } else {
+               if (IS_ENABLED(CONFIG_40x))
+                       mb();   /* sync */
+
+               mtspr(SPRN_PID, id);
+               isync();
        }
-       if (nact > num_online_cpus())
-               pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
-                      nact, num_online_cpus());
-       if (FIRST_CONTEXT > 0 && !test_bit(0, context_map))
-               pr_err("MMU: Context 0 has been freed !!!\n");
 }
-#else
-static void context_check_map(void) { }
-#endif
 
 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
                        struct task_struct *tsk)
 {
        unsigned int id;
-#ifdef CONFIG_SMP
        unsigned int i, cpu = smp_processor_id();
-#endif
        unsigned long *map;
 
        /* No lockless fast path .. yet */
        raw_spin_lock(&context_lock);
 
-       pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
-               cpu, next, next->context.active, next->context.id);
-
-#ifdef CONFIG_SMP
-       /* Mark us active and the previous one not anymore */
-       next->context.active++;
-       if (prev) {
-               pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
-               WARN_ON(prev->context.active < 1);
-               prev->context.active--;
+       if (IS_ENABLED(CONFIG_SMP)) {
+               /* Mark us active and the previous one not anymore */
+               next->context.active++;
+               if (prev) {
+                       WARN_ON(prev->context.active < 1);
+                       prev->context.active--;
+               }
        }
 
  again:
-#endif /* CONFIG_SMP */
 
        /* If we already have a valid assigned context, skip all that */
        id = next->context.id;
-       if (likely(id != MMU_NO_CONTEXT)) {
-#ifdef DEBUG_MAP_CONSISTENCY
-               if (context_mm[id] != next)
-                       pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
-                              next, id, id, context_mm[id]);
-#endif
+       if (likely(id != MMU_NO_CONTEXT))
                goto ctxt_ok;
-       }
 
        /* We really don't have a context, let's try to acquire one */
        id = next_context;
@@ -304,14 +260,12 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 
        /* No more free contexts, let's try to steal one */
        if (nr_free_contexts == 0) {
-#ifdef CONFIG_SMP
                if (num_online_cpus() > 1) {
                        id = steal_context_smp(id);
                        if (id == MMU_NO_CONTEXT)
                                goto again;
                        goto stolen;
                }
-#endif /* CONFIG_SMP */
                if (IS_ENABLED(CONFIG_PPC_8xx))
                        id = steal_all_contexts();
                else
@@ -330,20 +284,13 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
        next_context = id + 1;
        context_mm[id] = next;
        next->context.id = id;
-       pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
 
-       context_check_map();
  ctxt_ok:
 
        /* If that context got marked stale on this CPU, then flush the
         * local TLB for it and unmark it before we use it
         */
-#ifdef CONFIG_SMP
-       if (test_bit(id, stale_map[cpu])) {
-               pr_hardcont(" | stale flush %d [%d..%d]",
-                           id, cpu_first_thread_sibling(cpu),
-                           cpu_last_thread_sibling(cpu));
-
+       if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) {
                local_flush_tlb_mm(next);
 
                /* XXX This clear should ultimately be part of local_flush_tlb_mm */
@@ -353,10 +300,10 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
                                __clear_bit(id, stale_map[i]);
                }
        }
-#endif
 
        /* Flick the MMU and release lock */
-       pr_hardcont(" -> %d\n", id);
+       if (IS_ENABLED(CONFIG_BDI_SWITCH))
+               abatron_pteptrs[1] = next->pgd;
        set_context(id, next->pgd);
        raw_spin_unlock(&context_lock);
 }
@@ -366,8 +313,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
  */
 int init_new_context(struct task_struct *t, struct mm_struct *mm)
 {
-       pr_hard("initing context for mm @%p\n", mm);
-
        /*
         * We have MMU_NO_CONTEXT set to be ~0. Hence check
         * explicitly against context.id == 0. This ensures that we properly
@@ -401,16 +346,12 @@ void destroy_context(struct mm_struct *mm)
        if (id != MMU_NO_CONTEXT) {
                __clear_bit(id, context_map);
                mm->context.id = MMU_NO_CONTEXT;
-#ifdef DEBUG_MAP_CONSISTENCY
-               mm->context.active = 0;
-#endif
                context_mm[id] = NULL;
                nr_free_contexts++;
        }
        raw_spin_unlock_irqrestore(&context_lock, flags);
 }
 
-#ifdef CONFIG_SMP
 static int mmu_ctx_cpu_prepare(unsigned int cpu)
 {
        /* We don't touch CPU 0 map, it's allocated at aboot and kept
@@ -419,7 +360,6 @@ static int mmu_ctx_cpu_prepare(unsigned int cpu)
        if (cpu == boot_cpuid)
                return 0;
 
-       pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
        stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
        return 0;
 }
@@ -430,7 +370,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu)
        if (cpu == boot_cpuid)
                return 0;
 
-       pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
        kfree(stale_map[cpu]);
        stale_map[cpu] = NULL;
 
@@ -440,8 +379,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu)
        return 0;
 }
 
-#endif /* CONFIG_SMP */
-
 /*
  * Initialize the context management stuff.
  */
@@ -465,16 +402,16 @@ void __init mmu_context_init(void)
        if (!context_mm)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      sizeof(void *) * (LAST_CONTEXT + 1));
-#ifdef CONFIG_SMP
-       stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
-       if (!stale_map[boot_cpuid])
-               panic("%s: Failed to allocate %zu bytes\n", __func__,
-                     CTX_MAP_SIZE);
-
-       cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
-                                 "powerpc/mmu/ctx:prepare",
-                                 mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
-#endif
+       if (IS_ENABLED(CONFIG_SMP)) {
+               stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+               if (!stale_map[boot_cpuid])
+                       panic("%s: Failed to allocate %zu bytes\n", __func__,
+                             CTX_MAP_SIZE);
+
+               cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+                                         "powerpc/mmu/ctx:prepare",
+                                         mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
+       }
 
        printk(KERN_INFO
               "MMU: Allocated %zu bytes of context maps for %d contexts\n",
index 68797e072f55fddc0e65c57acf2ee743894970c8..4613bf8e9aae66d80aa5fdfc50d03b30f4f590f5 100644 (file)
@@ -360,19 +360,6 @@ _GLOBAL(_tlbivax_bcast)
        sync
        wrtee   r10
        blr
-
-_GLOBAL(set_context)
-#ifdef CONFIG_BDI_SWITCH
-       /* Context switch the PTE pointer for the Abatron BDI2000.
-        * The PGDIR is the second parameter.
-        */
-       lis     r5, abatron_pteptrs@h
-       ori     r5, r5, abatron_pteptrs@l
-       stw     r4, 0x4(r5)
-#endif
-       mtspr   SPRN_PID,r3
-       isync                   /* Force context change */
-       blr
 #else
 #error Unsupported processor type !
 #endif
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
new file mode 100644 (file)
index 0000000..0876216
--- /dev/null
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * MMU-generic set_memory implementation for powerpc
+ *
+ * Copyright 2019-2021, IBM Corporation.
+ */
+
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/set_memory.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+
+/*
+ * Updates the attributes of a page in three steps:
+ *
+ * 1. invalidate the page table entry
+ * 2. flush the TLB
+ * 3. install the new entry with the updated attributes
+ *
+ * Invalidating the pte means there are situations where this will not work
+ * when in theory it should.
+ * For example:
+ * - removing write from page whilst it is being executed
+ * - setting a page read-only whilst it is being read by another CPU
+ *
+ */
+static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+       long action = (long)data;
+       pte_t pte;
+
+       spin_lock(&init_mm.page_table_lock);
+
+       /* invalidate the PTE so it's safe to modify */
+       pte = ptep_get_and_clear(&init_mm, addr, ptep);
+       flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+       /* modify the PTE bits as desired, then apply */
+       switch (action) {
+       case SET_MEMORY_RO:
+               pte = pte_wrprotect(pte);
+               break;
+       case SET_MEMORY_RW:
+               pte = pte_mkwrite(pte_mkdirty(pte));
+               break;
+       case SET_MEMORY_NX:
+               pte = pte_exprotect(pte);
+               break;
+       case SET_MEMORY_X:
+               pte = pte_mkexec(pte);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               break;
+       }
+
+       set_pte_at(&init_mm, addr, ptep, pte);
+
+       /* See ptesync comment in radix__set_pte_at() */
+       if (radix_enabled())
+               asm volatile("ptesync": : :"memory");
+       spin_unlock(&init_mm.page_table_lock);
+
+       return 0;
+}
+
+int change_memory_attr(unsigned long addr, int numpages, long action)
+{
+       unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+       unsigned long size = numpages * PAGE_SIZE;
+
+       if (!numpages)
+               return 0;
+
+       if (WARN_ON_ONCE(is_vmalloc_or_module_addr((void *)addr) &&
+                        is_vm_area_hugepages((void *)addr)))
+               return -EINVAL;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       /*
+        * On hash, the linear mapping is not in the Linux page table so
+        * apply_to_existing_page_range() will have no effect. If in the future
+        * the set_memory_* functions are used on the linear map this will need
+        * to be updated.
+        */
+       if (!radix_enabled()) {
+               int region = get_region_id(addr);
+
+               if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID))
+                       return -EINVAL;
+       }
+#endif
+
+       return apply_to_existing_page_range(&init_mm, start, size,
+                                           change_page_attr, (void *)action);
+}
+
+/*
+ * Set the attributes of a page:
+ *
+ * This function is used by PPC32 at the end of init to set final kernel memory
+ * protection. It includes changing the maping of the page it is executing from
+ * and data pages it is using.
+ */
+static int set_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+       pgprot_t prot = __pgprot((unsigned long)data);
+
+       spin_lock(&init_mm.page_table_lock);
+
+       set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot));
+       flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+       spin_unlock(&init_mm.page_table_lock);
+
+       return 0;
+}
+
+int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot)
+{
+       unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+       unsigned long sz = numpages * PAGE_SIZE;
+
+       if (numpages <= 0)
+               return 0;
+
+       return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr,
+                                           (void *)pgprot_val(prot));
+}
index 3546119401183ed02cc09b77b5fec21d4902fad9..cd16b407f47e1a76b060ea9ef76866285884c560 100644 (file)
 #include <asm/hugetlb.h>
 #include <asm/pte-walk.h>
 
+#ifdef CONFIG_PPC64
+#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD)
+#else
+#define PGD_ALIGN PAGE_SIZE
+#endif
+
+pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN);
+
 static inline int is_exec_fault(void)
 {
        return current->thread.regs && TRAP(current->thread.regs) == 0x400;
index e0ec67a1688795799bbaa0aa6e05830a93096dde..dcf5ecca19d9912a52eb423ccbb141b1a65ed9fc 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/highmem.h>
 #include <linux/memblock.h>
 #include <linux/slab.h>
+#include <linux/set_memory.h>
 
 #include <asm/pgalloc.h>
 #include <asm/fixmap.h>
@@ -132,64 +133,20 @@ void __init mapin_ram(void)
        }
 }
 
-static int __change_page_attr_noflush(struct page *page, pgprot_t prot)
-{
-       pte_t *kpte;
-       unsigned long address;
-
-       BUG_ON(PageHighMem(page));
-       address = (unsigned long)page_address(page);
-
-       if (v_block_mapped(address))
-               return 0;
-       kpte = virt_to_kpte(address);
-       if (!kpte)
-               return -EINVAL;
-       __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0);
-
-       return 0;
-}
-
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY
- */
-static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
-       int i, err = 0;
-       unsigned long flags;
-       struct page *start = page;
-
-       local_irq_save(flags);
-       for (i = 0; i < numpages; i++, page++) {
-               err = __change_page_attr_noflush(page, prot);
-               if (err)
-                       break;
-       }
-       wmb();
-       local_irq_restore(flags);
-       flush_tlb_kernel_range((unsigned long)page_address(start),
-                              (unsigned long)page_address(page));
-       return err;
-}
-
 void mark_initmem_nx(void)
 {
-       struct page *page = virt_to_page(_sinittext);
        unsigned long numpages = PFN_UP((unsigned long)_einittext) -
                                 PFN_DOWN((unsigned long)_sinittext);
 
        if (v_block_mapped((unsigned long)_sinittext))
                mmu_mark_initmem_nx();
        else
-               change_page_attr(page, numpages, PAGE_KERNEL);
+               set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL);
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void mark_rodata_ro(void)
 {
-       struct page *page;
        unsigned long numpages;
 
        if (v_block_mapped((unsigned long)_stext + 1)) {
@@ -198,20 +155,18 @@ void mark_rodata_ro(void)
                return;
        }
 
-       page = virt_to_page(_stext);
        numpages = PFN_UP((unsigned long)_etext) -
                   PFN_DOWN((unsigned long)_stext);
 
-       change_page_attr(page, numpages, PAGE_KERNEL_ROX);
+       set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX);
        /*
         * mark .rodata as read only. Use __init_begin rather than __end_rodata
         * to cover NOTES and EXCEPTION_TABLE.
         */
-       page = virt_to_page(__start_rodata);
        numpages = PFN_UP((unsigned long)__init_begin) -
                   PFN_DOWN((unsigned long)__start_rodata);
 
-       change_page_attr(page, numpages, PAGE_KERNEL_RO);
+       set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO);
 
        // mark_initmem_nx() should have already run by now
        ptdump_check_wx();
@@ -221,9 +176,14 @@ void mark_rodata_ro(void)
 #ifdef CONFIG_DEBUG_PAGEALLOC
 void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
+       unsigned long addr = (unsigned long)page_address(page);
+
        if (PageHighMem(page))
                return;
 
-       change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+       if (enable)
+               set_memory_attr(addr, numpages, PAGE_KERNEL);
+       else
+               set_memory_attr(addr, numpages, __pgprot(0));
 }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
index aca354fb670b13394b7747a13648ce610a1e4839..5062c58b1e5b40e71c3f23277ae7f7d73824804b 100644 (file)
@@ -58,8 +58,6 @@ struct pg_state {
        const struct addr_marker *marker;
        unsigned long start_address;
        unsigned long start_pa;
-       unsigned long last_pa;
-       unsigned long page_size;
        unsigned int level;
        u64 current_flags;
        bool check_wx;
@@ -163,8 +161,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info
 
 static void dump_addr(struct pg_state *st, unsigned long addr)
 {
-       unsigned long delta;
-
 #ifdef CONFIG_PPC64
 #define REG            "0x%016lx"
 #else
@@ -172,14 +168,8 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
 #endif
 
        pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
-       if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) {
-               pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa);
-               delta = st->page_size >> 10;
-       } else {
-               pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
-               delta = (addr - st->start_address) >> 10;
-       }
-       pt_dump_size(st->seq, delta);
+       pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
+       pt_dump_size(st->seq, (addr - st->start_address) >> 10);
 }
 
 static void note_prot_wx(struct pg_state *st, unsigned long addr)
@@ -208,7 +198,6 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr,
        st->current_flags = flag;
        st->start_address = addr;
        st->start_pa = pa;
-       st->page_size = page_size;
 
        while (addr >= st->marker[1].start_address) {
                st->marker++;
@@ -220,7 +209,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
               unsigned int level, u64 val, unsigned long page_size)
 {
        u64 flag = val & pg_level[level].mask;
-       u64 pa = val & PTE_RPN_MASK;
 
        /* At first no level is set */
        if (!st->level) {
@@ -232,12 +220,9 @@ static void note_page(struct pg_state *st, unsigned long addr,
         *   - we change levels in the tree.
         *   - the address is in a different section of memory and is thus
         *   used for a different purpose, regardless of the flags.
-        *   - the pa of this page is not adjacent to the last inspected page
         */
        } else if (flag != st->current_flags || level != st->level ||
-                  addr >= st->marker[1].start_address ||
-                  (pa != st->last_pa + st->page_size &&
-                   (pa != st->start_pa || st->start_pa != st->last_pa))) {
+                  addr >= st->marker[1].start_address) {
 
                /* Check the PTE flags */
                if (st->current_flags) {
@@ -259,7 +244,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
                 */
                note_page_update_state(st, addr, level, val, page_size);
        }
-       st->last_pa = pa;
 }
 
 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
index 798ac4350a82c564e6e2e944b67b2fdc61af16e6..53aefee3fe70be6526699687ad7136e9e69b0c38 100644 (file)
@@ -237,6 +237,7 @@ skip_codegen_passes:
        fp->jited_len = alloclen;
 
        bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+       bpf_jit_binary_lock_ro(bpf_hdr);
        if (!fp->is_func || extra_pass) {
                bpf_prog_fill_jited_linfo(fp, addrs);
 out_addrs:
@@ -257,15 +258,3 @@ out:
 
        return fp;
 }
-
-/* Overriding bpf_jit_free() as we don't set images read-only. */
-void bpf_jit_free(struct bpf_prog *fp)
-{
-       unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
-       struct bpf_binary_header *bpf_hdr = (void *)addr;
-
-       if (fp->jited)
-               bpf_jit_binary_free(bpf_hdr);
-
-       bpf_prog_unlock_free(fp);
-}
index bbb16099e8c7fa384922f768b5ca3734670cc28b..cbe5b399ed869dc9e547bbf406168f9c6c37b6ad 100644 (file)
@@ -108,20 +108,20 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
        int i;
 
        /* First arg comes in as a 32 bits pointer. */
-       EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3));
+       EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3));
        EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0));
-       EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx)));
+       EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
 
        /*
         * Initialize tail_call_cnt in stack frame if we do tail calls.
         * Otherwise, put in NOPs so that it can be skipped when we are
         * invoked through a tail call.
         */
-       if (ctx->seen & SEEN_TAILCALL) {
-               EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
-       } else {
+       if (ctx->seen & SEEN_TAILCALL)
+               EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1,
+                                bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+       else
                EMIT(PPC_RAW_NOP());
-       }
 
 #define BPF_TAILCALL_PROLOGUE_SIZE     16
 
@@ -130,30 +130,30 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
         * save/restore LR unless we call other functions
         */
        if (ctx->seen & SEEN_FUNC)
-               EMIT(PPC_RAW_MFLR(__REG_R0));
+               EMIT(PPC_RAW_MFLR(_R0));
 
        /*
         * Back up non-volatile regs -- registers r18-r31
         */
        for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
                if (bpf_is_seen_register(ctx, i))
-                       EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+                       EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
 
        /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/
        if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
-               EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8);
-               EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12);
+               EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8);
+               EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12);
        }
 
        /* Setup frame pointer to point to the bpf stack area */
        if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) {
                EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0));
-               EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1,
+               EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1,
                                  STACK_FRAME_MIN_SIZE + ctx->stack_size));
        }
 
        if (ctx->seen & SEEN_FUNC)
-               EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+               EMIT(PPC_RAW_STW(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
 }
 
 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
@@ -163,24 +163,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
        /* Restore NVRs */
        for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
                if (bpf_is_seen_register(ctx, i))
-                       EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+                       EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
 }
 
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 {
-       EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0)));
+       EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0)));
 
        bpf_jit_emit_common_epilogue(image, ctx);
 
        /* Tear down our stack frame */
 
        if (ctx->seen & SEEN_FUNC)
-               EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+               EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
 
-       EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx)));
+       EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
 
        if (ctx->seen & SEEN_FUNC)
-               EMIT(PPC_RAW_MTLR(__REG_R0));
+               EMIT(PPC_RAW_MTLR(_R0));
 
        EMIT(PPC_RAW_BLR());
 }
@@ -193,10 +193,10 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
                PPC_BL_ABS(func);
        } else {
                /* Load function address into r0 */
-               EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func)));
-               EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func)));
-               EMIT(PPC_RAW_MTLR(__REG_R0));
-               EMIT(PPC_RAW_BLRL());
+               EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
+               EMIT(PPC_RAW_ORI(_R0, _R0, IMM_L(func)));
+               EMIT(PPC_RAW_MTCTR(_R0));
+               EMIT(PPC_RAW_BCTRL());
        }
 }
 
@@ -215,47 +215,47 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
         * if (index >= array->map.max_entries)
         *   goto out;
         */
-       EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
-       EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0));
-       EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+       EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+       EMIT(PPC_RAW_CMPLW(b2p_index, _R0));
+       EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
        PPC_BCC(COND_GE, out);
 
        /*
         * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
         *   goto out;
         */
-       EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT));
+       EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT));
        /* tail_call_cnt++; */
-       EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1));
+       EMIT(PPC_RAW_ADDIC(_R0, _R0, 1));
        PPC_BCC(COND_GT, out);
 
        /* prog = array->ptrs[index]; */
-       EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29));
-       EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array));
-       EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs)));
-       EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+       EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
+       EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
+       EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
+       EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
 
        /*
         * if (prog == NULL)
         *   goto out;
         */
-       EMIT(PPC_RAW_CMPLWI(__REG_R3, 0));
+       EMIT(PPC_RAW_CMPLWI(_R3, 0));
        PPC_BCC(COND_EQ, out);
 
        /* goto *(prog->bpf_func + prologue_size); */
-       EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func)));
+       EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
 
        if (ctx->seen & SEEN_FUNC)
-               EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+               EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
 
-       EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE));
+       EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
 
        if (ctx->seen & SEEN_FUNC)
-               EMIT(PPC_RAW_MTLR(__REG_R0));
+               EMIT(PPC_RAW_MTLR(_R0));
 
-       EMIT(PPC_RAW_MTCTR(__REG_R3));
+       EMIT(PPC_RAW_MTCTR(_R3));
 
-       EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1)));
+       EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1)));
 
        /* tear restore NVRs, ... */
        bpf_jit_emit_common_epilogue(image, ctx);
@@ -352,8 +352,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        if (imm >= -32768 && imm < 32768) {
                                EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm));
                        } else {
-                               PPC_LI32(__REG_R0, imm);
-                               EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0));
+                               PPC_LI32(_R0, imm);
+                               EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, _R0));
                        }
                        if (imm >= 0)
                                EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h));
@@ -362,11 +362,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        break;
                case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
                        bpf_set_seen_register(ctx, tmp_reg);
-                       EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h));
+                       EMIT(PPC_RAW_MULW(_R0, dst_reg, src_reg_h));
                        EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg));
                        EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg));
                        EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
-                       EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+                       EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
                        EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg));
                        break;
                case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
@@ -376,8 +376,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        if (imm >= -32768 && imm < 32768) {
                                EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm));
                        } else {
-                               PPC_LI32(__REG_R0, imm);
-                               EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0));
+                               PPC_LI32(_R0, imm);
+                               EMIT(PPC_RAW_MULW(dst_reg, dst_reg, _R0));
                        }
                        break;
                case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
@@ -398,17 +398,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg));
                        if (imm < 0)
                                EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg));
-                       EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg));
+                       EMIT(PPC_RAW_MULHWU(_R0, dst_reg, tmp_reg));
                        EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg));
-                       EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+                       EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
                        break;
                case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
                        EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
                        break;
                case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
-                       EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg));
-                       EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0));
-                       EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+                       EMIT(PPC_RAW_DIVWU(_R0, dst_reg, src_reg));
+                       EMIT(PPC_RAW_MULW(_R0, src_reg, _R0));
+                       EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0));
                        break;
                case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
                        return -EOPNOTSUPP;
@@ -420,8 +420,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        if (imm == 1)
                                break;
 
-                       PPC_LI32(__REG_R0, imm);
-                       EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0));
+                       PPC_LI32(_R0, imm);
+                       EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, _R0));
                        break;
                case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
                        if (!imm)
@@ -430,9 +430,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        if (!is_power_of_2((u32)imm)) {
                                bpf_set_seen_register(ctx, tmp_reg);
                                PPC_LI32(tmp_reg, imm);
-                               EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg));
-                               EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0));
-                               EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+                               EMIT(PPC_RAW_DIVWU(_R0, dst_reg, tmp_reg));
+                               EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0));
+                               EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0));
                                break;
                        }
                        if (imm == 1)
@@ -503,8 +503,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                                EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0,
                                                    32 - fls(imm), 32 - ffs(imm)));
                        } else {
-                               PPC_LI32(__REG_R0, imm);
-                               EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0));
+                               PPC_LI32(_R0, imm);
+                               EMIT(PPC_RAW_AND(dst_reg, dst_reg, _R0));
                        }
                        break;
                case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
@@ -555,12 +555,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        break;
                case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
                        bpf_set_seen_register(ctx, tmp_reg);
-                       EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+                       EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
                        EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg));
                        EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
-                       EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0));
+                       EMIT(PPC_RAW_SRW(_R0, dst_reg, _R0));
                        EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg));
-                       EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0));
+                       EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, _R0));
                        EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
                        EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg));
                        break;
@@ -591,12 +591,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        break;
                case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
                        bpf_set_seen_register(ctx, tmp_reg);
-                       EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+                       EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
                        EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
                        EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
-                       EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+                       EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0));
                        EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg));
-                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
                        EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg));
                        EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
                        break;
@@ -627,15 +627,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        break;
                case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
                        bpf_set_seen_register(ctx, tmp_reg);
-                       EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+                       EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
                        EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
-                       EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+                       EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0));
                        EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
-                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
-                       EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26));
+                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
+                       EMIT(PPC_RAW_RLWINM(_R0, tmp_reg, 0, 26, 26));
                        EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg));
                        EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg));
-                       EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0));
+                       EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, _R0));
                        EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
                        break;
                case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
@@ -702,24 +702,24 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                                 * 2 bytes are already in their final position
                                 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
                                 */
-                               EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31));
+                               EMIT(PPC_RAW_RLWINM(_R0, dst_reg, 8, 0, 31));
                                /* Rotate 24 bits and insert byte 1 */
-                               EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7));
+                               EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 0, 7));
                                /* Rotate 24 bits and insert byte 3 */
-                               EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23));
-                               EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+                               EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 16, 23));
+                               EMIT(PPC_RAW_MR(dst_reg, _R0));
                                break;
                        case 64:
                                bpf_set_seen_register(ctx, tmp_reg);
                                EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31));
-                               EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31));
+                               EMIT(PPC_RAW_RLWINM(_R0, dst_reg_h, 8, 0, 31));
                                /* Rotate 24 bits and insert byte 1 */
                                EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7));
-                               EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7));
+                               EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 0, 7));
                                /* Rotate 24 bits and insert byte 3 */
                                EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23));
-                               EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23));
-                               EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+                               EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 16, 23));
+                               EMIT(PPC_RAW_MR(dst_reg, _R0));
                                EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg));
                                break;
                        }
@@ -744,32 +744,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
                        break;
                case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
-                       PPC_LI32(__REG_R0, imm);
-                       EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off));
+                       PPC_LI32(_R0, imm);
+                       EMIT(PPC_RAW_STB(_R0, dst_reg, off));
                        break;
                case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
                        EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
                        break;
                case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
-                       PPC_LI32(__REG_R0, imm);
-                       EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off));
+                       PPC_LI32(_R0, imm);
+                       EMIT(PPC_RAW_STH(_R0, dst_reg, off));
                        break;
                case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
                        EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
                        break;
                case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
-                       PPC_LI32(__REG_R0, imm);
-                       EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+                       PPC_LI32(_R0, imm);
+                       EMIT(PPC_RAW_STW(_R0, dst_reg, off));
                        break;
                case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
                        EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off));
                        EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4));
                        break;
                case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
-                       PPC_LI32(__REG_R0, imm);
-                       EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4));
-                       PPC_EX32(__REG_R0, imm);
-                       EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+                       PPC_LI32(_R0, imm);
+                       EMIT(PPC_RAW_STW(_R0, dst_reg, off + 4));
+                       PPC_EX32(_R0, imm);
+                       EMIT(PPC_RAW_STW(_R0, dst_reg, off));
                        break;
 
                /*
@@ -780,11 +780,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                        /* Get offset into TMP_REG */
                        EMIT(PPC_RAW_LI(tmp_reg, off));
                        /* load value from memory into r0 */
-                       EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0));
+                       EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
                        /* add value from src_reg into this */
-                       EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg));
+                       EMIT(PPC_RAW_ADD(_R0, _R0, src_reg));
                        /* store result back */
-                       EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg));
+                       EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg));
                        /* we're done if this succeeded */
                        PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4);
                        break;
@@ -852,14 +852,14 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                                return ret;
 
                        if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
-                               EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8));
-                               EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12));
+                               EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8));
+                               EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12));
                        }
 
                        bpf_jit_emit_func_call_rel(image, ctx, func_addr);
 
-                       EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3));
-                       EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4));
+                       EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3));
+                       EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4));
                        break;
 
                /*
@@ -967,12 +967,12 @@ cond_branch:
                                EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
                                break;
                        case BPF_JMP | BPF_JSET | BPF_X:
-                               EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h));
+                               EMIT(PPC_RAW_AND_DOT(_R0, dst_reg_h, src_reg_h));
                                PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
-                               EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+                               EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
                                break;
                        case BPF_JMP32 | BPF_JSET | BPF_X: {
-                               EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+                               EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
                                break;
                        case BPF_JMP | BPF_JNE | BPF_K:
                        case BPF_JMP | BPF_JEQ | BPF_K:
@@ -990,11 +990,11 @@ cond_branch:
                                        EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
                                } else {
                                        /* sign-extending load ... but unsigned comparison */
-                                       PPC_EX32(__REG_R0, imm);
-                                       EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0));
-                                       PPC_LI32(__REG_R0, imm);
+                                       PPC_EX32(_R0, imm);
+                                       EMIT(PPC_RAW_CMPLW(dst_reg_h, _R0));
+                                       PPC_LI32(_R0, imm);
                                        PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
-                                       EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+                                       EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
                                }
                                break;
                        case BPF_JMP32 | BPF_JNE | BPF_K:
@@ -1006,8 +1006,8 @@ cond_branch:
                                if (imm >= 0 && imm < 65536) {
                                        EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
                                } else {
-                                       PPC_LI32(__REG_R0, imm);
-                                       EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+                                       PPC_LI32(_R0, imm);
+                                       EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
                                }
                                break;
                        }
@@ -1022,9 +1022,9 @@ cond_branch:
                                } else {
                                        /* sign-extending load */
                                        EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
-                                       PPC_LI32(__REG_R0, imm);
+                                       PPC_LI32(_R0, imm);
                                        PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
-                                       EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+                                       EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
                                }
                                break;
                        case BPF_JMP32 | BPF_JSGT | BPF_K:
@@ -1039,32 +1039,32 @@ cond_branch:
                                        EMIT(PPC_RAW_CMPWI(dst_reg, imm));
                                } else {
                                        /* sign-extending load */
-                                       PPC_LI32(__REG_R0, imm);
-                                       EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0));
+                                       PPC_LI32(_R0, imm);
+                                       EMIT(PPC_RAW_CMPW(dst_reg, _R0));
                                }
                                break;
                        case BPF_JMP | BPF_JSET | BPF_K:
                                /* andi does not sign-extend the immediate */
                                if (imm >= 0 && imm < 32768) {
                                        /* PPC_ANDI is _only/always_ dot-form */
-                                       EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+                                       EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
                                } else {
-                                       PPC_LI32(__REG_R0, imm);
+                                       PPC_LI32(_R0, imm);
                                        if (imm < 0) {
                                                EMIT(PPC_RAW_CMPWI(dst_reg_h, 0));
                                                PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
                                        }
-                                       EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+                                       EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
                                }
                                break;
                        case BPF_JMP32 | BPF_JSET | BPF_K:
                                /* andi does not sign-extend the immediate */
                                if (imm >= -32768 && imm < 32768) {
                                        /* PPC_ANDI is _only/always_ dot-form */
-                                       EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+                                       EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
                                } else {
-                                       PPC_LI32(__REG_R0, imm);
-                                       EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+                                       PPC_LI32(_R0, imm);
+                                       EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
                                }
                                break;
                        }
index 57a8c1153851a05d9b40607a84bc11c96159dc47..5cad5b5a7e97745b19391a2df0f803dc360bc0ac 100644 (file)
@@ -94,7 +94,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
                 * save/restore LR unless we call other functions
                 */
                if (ctx->seen & SEEN_FUNC) {
-                       EMIT(PPC_INST_MFLR | __PPC_RT(R0));
+                       EMIT(PPC_RAW_MFLR(_R0));
                        PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
                }
 
@@ -153,8 +153,8 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
        PPC_LI64(b2p[TMP_REG_2], func);
        /* Load actual entry point from function descriptor */
        PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
-       /* ... and move it to LR */
-       EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1]));
+       /* ... and move it to CTR */
+       EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1]));
        /*
         * Load TOC from function descriptor at offset 8.
         * We can clobber r2 since we get called through a
@@ -165,9 +165,9 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
 #else
        /* We can clobber r12 */
        PPC_FUNC_ADDR(12, func);
-       EMIT(PPC_RAW_MTLR(12));
+       EMIT(PPC_RAW_MTCTR(12));
 #endif
-       EMIT(PPC_RAW_BLRL());
+       EMIT(PPC_RAW_BCTRL());
 }
 
 void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
@@ -202,8 +202,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
        PPC_BPF_LL(12, 12, 0);
 #endif
 
-       EMIT(PPC_RAW_MTLR(12));
-       EMIT(PPC_RAW_BLRL());
+       EMIT(PPC_RAW_MTCTR(12));
+       EMIT(PPC_RAW_BCTRL());
 }
 
 static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
index c02854dea2b20e8e33d10ffb986f9c3d9e8fc701..2f46e31c76129799eb13378b31498ae76cbe5611 100644 (file)
@@ -1,9 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_PERF_EVENTS)      += callchain.o callchain_$(BITS).o perf_regs.o
-ifdef CONFIG_COMPAT
-obj-$(CONFIG_PERF_EVENTS)      += callchain_32.o
-endif
+obj-y                          += callchain.o callchain_$(BITS).o perf_regs.o
+obj-$(CONFIG_COMPAT)           += callchain_32.o
 
 obj-$(CONFIG_PPC_PERF_CTRS)    += core-book3s.o bhrb.o
 obj64-$(CONFIG_PPC_PERF_CTRS)  += ppc970-pmu.o power5-pmu.o \
index 6c028ee513c0d7774a0194076011f208ca1edaaf..082f6d0308a472916365d203e2cc19e1bd4563db 100644 (file)
@@ -40,7 +40,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
        return 0;
 }
 
-void
+void __no_sanitize_address
 perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
        unsigned long sp, next_sp;
index 51622411a7ccd80585b909cc701de1d8862fafaa..bb0ee716de912c92e89ca9ea1646fe0f644f097c 100644 (file)
@@ -460,7 +460,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
                                sizeof(instr)))
                        return 0;
 
-               return branch_target((struct ppc_inst *)&instr);
+               return branch_target(&instr);
        }
 
        /* Userspace: need copy instruction here then translate it */
@@ -468,7 +468,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
                        sizeof(instr)))
                return 0;
 
-       target = branch_target((struct ppc_inst *)&instr);
+       target = branch_target(&instr);
        if ((!target) || (instr & BRANCH_ABSOLUTE))
                return target;
 
index eb8a6aaf4cc14b3420a7537aa3cafa3b9f4fed19..695975227e60fdd2927ec8d827450fd502b58b4c 100644 (file)
  *
  *        28        24        20        16        12         8         4         0
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- *                                 [ pmc ]   [unit ]   [ ]   m   [    pmcxsel    ]
- *                                                     |     |
- *                                                     |     *- mark
- *                                                     |
- *                                                     |
- *                                                     *- combine
- *
- * Below uses IBM bit numbering.
- *
- * MMCR1[x:y] = unit    (PMCxUNIT)
- * MMCR1[24]   = pmc1combine[0]
- * MMCR1[25]   = pmc1combine[1]
- * MMCR1[26]   = pmc2combine[0]
- * MMCR1[27]   = pmc2combine[1]
- * MMCR1[28]   = pmc3combine[0]
- * MMCR1[29]   = pmc3combine[1]
- * MMCR1[30]   = pmc4combine[0]
- * MMCR1[31]   = pmc4combine[1]
- *
+ *                                 [ pmc ]                       [    pmcxsel    ]
  */
 
 /*
- * Some power9 event codes.
+ * Event codes defined in ISA v3.0B
  */
 #define EVENT(_name, _code)    _name = _code,
 
 enum {
-EVENT(PM_CYC,                                  0x0001e)
-EVENT(PM_INST_CMPL,                            0x00002)
+       /* Cycles, alternate code */
+       EVENT(PM_CYC_ALT,                       0x100f0)
+       /* One or more instructions completed in a cycle */
+       EVENT(PM_CYC_INST_CMPL,                 0x100f2)
+       /* Floating-point instruction completed */
+       EVENT(PM_FLOP_CMPL,                     0x100f4)
+       /* Instruction ERAT/L1-TLB miss */
+       EVENT(PM_L1_ITLB_MISS,                  0x100f6)
+       /* All instructions completed and none available */
+       EVENT(PM_NO_INST_AVAIL,                 0x100f8)
+       /* A load-type instruction completed (ISA v3.0+) */
+       EVENT(PM_LD_CMPL,                       0x100fc)
+       /* Instruction completed, alternate code (ISA v3.0+) */
+       EVENT(PM_INST_CMPL_ALT,                 0x100fe)
+       /* A store-type instruction completed */
+       EVENT(PM_ST_CMPL,                       0x200f0)
+       /* Instruction Dispatched */
+       EVENT(PM_INST_DISP,                     0x200f2)
+       /* Run_cycles */
+       EVENT(PM_RUN_CYC,                       0x200f4)
+       /* Data ERAT/L1-TLB miss/reload */
+       EVENT(PM_L1_DTLB_RELOAD,                0x200f6)
+       /* Taken branch completed */
+       EVENT(PM_BR_TAKEN_CMPL,                 0x200fa)
+       /* Demand iCache Miss */
+       EVENT(PM_L1_ICACHE_MISS,                0x200fc)
+       /* L1 Dcache reload from memory */
+       EVENT(PM_L1_RELOAD_FROM_MEM,            0x200fe)
+       /* L1 Dcache store miss */
+       EVENT(PM_ST_MISS_L1,                    0x300f0)
+       /* Alternate code for PM_INST_DISP */
+       EVENT(PM_INST_DISP_ALT,                 0x300f2)
+       /* Branch direction or target mispredicted */
+       EVENT(PM_BR_MISPREDICT,                 0x300f6)
+       /* Data TLB miss/reload */
+       EVENT(PM_DTLB_MISS,                     0x300fc)
+       /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+       EVENT(PM_DATA_FROM_L3MISS,              0x300fe)
+       /* L1 Dcache load miss */
+       EVENT(PM_LD_MISS_L1,                    0x400f0)
+       /* Cycle when instruction(s) dispatched */
+       EVENT(PM_CYC_INST_DISP,                 0x400f2)
+       /* Branch or branch target mispredicted */
+       EVENT(PM_BR_MPRED_CMPL,                 0x400f6)
+       /* Instructions completed with run latch set */
+       EVENT(PM_RUN_INST_CMPL,                 0x400fa)
+       /* Instruction TLB miss/reload */
+       EVENT(PM_ITLB_MISS,                     0x400fc)
+       /* Load data not cached */
+       EVENT(PM_LD_NOT_CACHED,                 0x400fe)
+       /* Instructions */
+       EVENT(PM_INST_CMPL,                     0x500fa)
+       /* Cycles */
+       EVENT(PM_CYC,                           0x600f4)
 };
 
 #undef EVENT
 
+/* Table of alternatives, sorted in increasing order of column 0 */
+/* Note that in each row, column 0 must be the smallest */
+static const unsigned int generic_event_alternatives[][MAX_ALT] = {
+       { PM_CYC_ALT,                   PM_CYC },
+       { PM_INST_CMPL_ALT,             PM_INST_CMPL },
+       { PM_INST_DISP,                 PM_INST_DISP_ALT },
+};
+
+static int generic_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+       int num_alt = 0;
+
+       num_alt = isa207_get_alternatives(event, alt,
+                                         ARRAY_SIZE(generic_event_alternatives), flags,
+                                         generic_event_alternatives);
+
+       return num_alt;
+}
+
 GENERIC_EVENT_ATTR(cpu-cycles,                 PM_CYC);
 GENERIC_EVENT_ATTR(instructions,               PM_INST_CMPL);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend,    PM_NO_INST_AVAIL);
+GENERIC_EVENT_ATTR(branch-misses,              PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-misses,               PM_LD_MISS_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses,                PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-store-misses,       PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses,                PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(LLC-load-misses,              PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(branch-load-misses,           PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses,             PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses,             PM_ITLB_MISS);
 
 static struct attribute *generic_compat_events_attr[] = {
        GENERIC_EVENT_PTR(PM_CYC),
        GENERIC_EVENT_PTR(PM_INST_CMPL),
+       GENERIC_EVENT_PTR(PM_NO_INST_AVAIL),
+       GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+       GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+       CACHE_EVENT_PTR(PM_LD_MISS_L1),
+       CACHE_EVENT_PTR(PM_ST_MISS_L1),
+       CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+       CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+       CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+       CACHE_EVENT_PTR(PM_DTLB_MISS),
+       CACHE_EVENT_PTR(PM_ITLB_MISS),
        NULL
 };
 
@@ -63,17 +137,11 @@ static struct attribute_group generic_compat_pmu_events_group = {
 
 PMU_FORMAT_ATTR(event,         "config:0-19");
 PMU_FORMAT_ATTR(pmcxsel,       "config:0-7");
-PMU_FORMAT_ATTR(mark,          "config:8");
-PMU_FORMAT_ATTR(combine,       "config:10-11");
-PMU_FORMAT_ATTR(unit,          "config:12-15");
 PMU_FORMAT_ATTR(pmc,           "config:16-19");
 
 static struct attribute *generic_compat_pmu_format_attr[] = {
        &format_attr_event.attr,
        &format_attr_pmcxsel.attr,
-       &format_attr_mark.attr,
-       &format_attr_combine.attr,
-       &format_attr_unit.attr,
        &format_attr_pmc.attr,
        NULL,
 };
@@ -92,6 +160,9 @@ static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
 static int compat_generic_events[] = {
        [PERF_COUNT_HW_CPU_CYCLES] =                    PM_CYC,
        [PERF_COUNT_HW_INSTRUCTIONS] =                  PM_INST_CMPL,
+       [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =       PM_NO_INST_AVAIL,
+       [PERF_COUNT_HW_BRANCH_MISSES] =                 PM_BR_MPRED_CMPL,
+       [PERF_COUNT_HW_CACHE_MISSES] =                  PM_LD_MISS_L1,
 };
 
 #define C(x)   PERF_COUNT_HW_CACHE_##x
@@ -105,11 +176,11 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
        [ C(L1D) ] = {
                [ C(OP_READ) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
-                       [ C(RESULT_MISS)   ] = 0,
+                       [ C(RESULT_MISS)   ] = PM_LD_MISS_L1,
                },
                [ C(OP_WRITE) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
-                       [ C(RESULT_MISS)   ] = 0,
+                       [ C(RESULT_MISS)   ] = PM_ST_MISS_L1,
                },
                [ C(OP_PREFETCH) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
@@ -119,7 +190,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
        [ C(L1I) ] = {
                [ C(OP_READ) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
-                       [ C(RESULT_MISS)   ] = 0,
+                       [ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS,
                },
                [ C(OP_WRITE) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
@@ -133,7 +204,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
        [ C(LL) ] = {
                [ C(OP_READ) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
-                       [ C(RESULT_MISS)   ] = 0,
+                       [ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS,
                },
                [ C(OP_WRITE) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
@@ -147,7 +218,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
        [ C(DTLB) ] = {
                [ C(OP_READ) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
-                       [ C(RESULT_MISS)   ] = 0,
+                       [ C(RESULT_MISS)   ] = PM_DTLB_MISS,
                },
                [ C(OP_WRITE) ] = {
                        [ C(RESULT_ACCESS) ] = -1,
@@ -161,7 +232,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
        [ C(ITLB) ] = {
                [ C(OP_READ) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
-                       [ C(RESULT_MISS)   ] = 0,
+                       [ C(RESULT_MISS)   ] = PM_ITLB_MISS,
                },
                [ C(OP_WRITE) ] = {
                        [ C(RESULT_ACCESS) ] = -1,
@@ -175,7 +246,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
        [ C(BPU) ] = {
                [ C(OP_READ) ] = {
                        [ C(RESULT_ACCESS) ] = 0,
-                       [ C(RESULT_MISS)   ] = 0,
+                       [ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL,
                },
                [ C(OP_WRITE) ] = {
                        [ C(RESULT_ACCESS) ] = -1,
@@ -204,13 +275,30 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
 #undef C
 
+/*
+ * We set MMCR0[CC5-6RUN] so we can use counters 5 and 6 for
+ * PM_INST_CMPL and PM_CYC.
+ */
+static int generic_compute_mmcr(u64 event[], int n_ev,
+                               unsigned int hwc[], struct mmcr_regs *mmcr,
+                               struct perf_event *pevents[], u32 flags)
+{
+       int ret;
+
+       ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+       if (!ret)
+               mmcr->mmcr0 |= MMCR0_C56RUN;
+       return ret;
+}
+
 static struct power_pmu generic_compat_pmu = {
        .name                   = "GENERIC_COMPAT",
        .n_counter              = MAX_PMU_COUNTERS,
        .add_fields             = ISA207_ADD_FIELDS,
        .test_adder             = ISA207_TEST_ADDER,
-       .compute_mmcr           = isa207_compute_mmcr,
+       .compute_mmcr           = generic_compute_mmcr,
        .get_constraint         = isa207_get_constraint,
+       .get_alternatives       = generic_get_alternatives,
        .disable_pmc            = isa207_disable_pmc,
        .flags                  = PPMU_HAS_SIER | PPMU_ARCH_207S,
        .n_generic              = ARRAY_SIZE(compat_generic_events),
@@ -223,6 +311,16 @@ int init_generic_compat_pmu(void)
 {
        int rc = 0;
 
+       /*
+        * From ISA v2.07 on, PMU features are architected;
+        * we require >= v3.0 because (a) that has PM_LD_CMPL and
+        * PM_INST_CMPL_ALT, which v2.07 doesn't have, and
+        * (b) we don't expect any non-IBM Power ISA
+        * implementations that conform to v2.07 but not v3.0.
+        */
+       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+               return -ENODEV;
+
        rc = register_power_pmu(&generic_compat_pmu);
        if (rc)
                return rc;
index 8c0d324f657e1bba975fc6ec8fcacffd456e9425..3823df235f251c1096fd47e174846ed3086c54ea 100644 (file)
@@ -582,6 +582,7 @@ static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd,
                if (ret)
                        break;
                /* fall through and return the timeout */
+               fallthrough;
 
        case WDIOC_GETTIMEOUT:
                /* we need to round here as to avoid e.g. the following
index 87f524e4b09ce2ed2754b474995be4159271cdaf..8a7e55acf090fdc6a87284663755284e9dd33e2b 100644 (file)
@@ -73,7 +73,7 @@ smp_86xx_kick_cpu(int nr)
 
        /* Setup fake reset vector to call __secondary_start_mpc86xx. */
        target = (unsigned long) __secondary_start_mpc86xx;
-       patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK);
+       patch_branch(vector, target, BRANCH_SET_LINK);
 
        /* Kick that CPU */
        smp_86xx_release_core(nr);
@@ -83,7 +83,7 @@ smp_86xx_kick_cpu(int nr)
                mdelay(1);
 
        /* Restore the exception vector */
-       patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector));
+       patch_instruction(vector, ppc_inst(save_vector));
 
        local_irq_restore(flags);
 
index 7a5e8f4541e3fd299d312ba899f7b0aaaa40903f..e02d29a9d12ff63a134c924a71f54a4ee967fa76 100644 (file)
@@ -20,6 +20,8 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig"
 source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/book3s/Kconfig"
+source "arch/powerpc/platforms/microwatt/Kconfig"
 
 config KVM_GUEST
        bool "KVM Guest support"
@@ -49,6 +51,7 @@ config PPC_NATIVE
 config PPC_OF_BOOT_TRAMPOLINE
        bool "Support booting from Open Firmware or yaboot"
        depends on PPC_BOOK3S_32 || PPC64
+       select RELOCATABLE if PPC64
        default y
        help
          Support from booting from Open Firmware or yaboot using an
index 7d271de8fcbdbbb8b5b4b877fa0c4c0974154e72..0e1bb1cedd13a6be7ce7cc99050a29156d9f8546 100644 (file)
@@ -61,6 +61,7 @@ config 44x
        select 4xx_SOC
        select HAVE_PCI
        select PHYS_64BIT
+       select PPC_HAVE_KUEP
 
 endchoice
 
@@ -390,7 +391,7 @@ config PPC_HAVE_KUEP
 config PPC_KUEP
        bool "Kernel Userspace Execution Prevention"
        depends on PPC_HAVE_KUEP
-       default y if !PPC_BOOK3S_32
+       default y
        help
          Enable support for Kernel Userspace Execution Prevention (KUEP)
 
@@ -402,7 +403,7 @@ config PPC_HAVE_KUAP
 config PPC_KUAP
        bool "Kernel Userspace Access Protection"
        depends on PPC_HAVE_KUAP
-       default y if !PPC_BOOK3S_32
+       default y
        help
          Enable support for Kernel Userspace Access Protection (KUAP)
 
@@ -425,10 +426,6 @@ config PPC_MMU_NOHASH
        def_bool y
        depends on !PPC_BOOK3S
 
-config PPC_MMU_NOHASH_32
-       def_bool y
-       depends on PPC_MMU_NOHASH && PPC32
-
 config PPC_BOOK3E_MMU
        def_bool y
        depends on FSL_BOOKE || PPC_BOOK3E
@@ -477,9 +474,9 @@ config SMP
          If you don't know what to do here, say N.
 
 config NR_CPUS
-       int "Maximum number of CPUs (2-8192)"
-       range 2 8192
-       depends on SMP
+       int "Maximum number of CPUs (2-8192)" if SMP
+       range 2 8192 if SMP
+       default "1" if !SMP
        default "32" if PPC64
        default "4"
 
index 143d4417f6cccf65486126f508d0a7db48de4801..94470fb27c99b44e2a5779f74091b78e6b5f2dec 100644 (file)
@@ -22,3 +22,5 @@ obj-$(CONFIG_PPC_CELL)                += cell/
 obj-$(CONFIG_PPC_PS3)          += ps3/
 obj-$(CONFIG_EMBEDDED6xx)      += embedded6xx/
 obj-$(CONFIG_AMIGAONE)         += amigaone/
+obj-$(CONFIG_PPC_BOOK3S)       += book3s/
+obj-$(CONFIG_PPC_MICROWATT)    += microwatt/
diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig
new file mode 100644 (file)
index 0000000..34c9315
--- /dev/null
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_VAS
+       bool "IBM Virtual Accelerator Switchboard (VAS)"
+       depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES
+       default y
+       help
+         This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+         VAS devices are found in POWER9-based and later systems, they
+         provide access to accelerator coprocessors such as NX-GZIP and
+         NX-842. This config allows the kernel to use NX-842 accelerators,
+         and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV
+         and POWER10 PowerVM platforms.
+
+         If unsure, say "N".
diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile
new file mode 100644 (file)
index 0000000..e790f19
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PPC_VAS)  += vas-api.o
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
new file mode 100644 (file)
index 0000000..30172e5
--- /dev/null
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * VAS user space API for its accelerators (Only NX-GZIP is supported now)
+ * Copyright (C) 2019 Haren Myneni, IBM Corp
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+#include <linux/mmu_context.h>
+#include <linux/io.h>
+#include <asm/vas.h>
+#include <uapi/asm/vas-api.h>
+
+/*
+ * The driver creates the device node that can be used as follows:
+ * For NX-GZIP
+ *
+ *     fd = open("/dev/crypto/nx-gzip", O_RDWR);
+ *     rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
+ *     paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
+ *     vas_copy(&crb, 0, 1);
+ *     vas_paste(paste_addr, 0, 1);
+ *     close(fd) or exit process to close window.
+ *
+ * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
+ * copy/paste returns to the user space directly. So refer NX hardware
+ * documententation for exact copy/paste usage and completion / error
+ * conditions.
+ */
+
+/*
+ * Wrapper object for the nx-gzip device - there is just one instance of
+ * this node for the whole system.
+ */
+static struct coproc_dev {
+       struct cdev cdev;
+       struct device *device;
+       char *name;
+       dev_t devt;
+       struct class *class;
+       enum vas_cop_type cop_type;
+       const struct vas_user_win_ops *vops;
+} coproc_device;
+
+struct coproc_instance {
+       struct coproc_dev *coproc;
+       struct vas_window *txwin;
+};
+
+static char *coproc_devnode(struct device *dev, umode_t *mode)
+{
+       return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
+}
+
+/*
+ * Take reference to pid and mm
+ */
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref)
+{
+       /*
+        * Window opened by a child thread may not be closed when
+        * it exits. So take reference to its pid and release it
+        * when the window is free by parent thread.
+        * Acquire a reference to the task's pid to make sure
+        * pid will not be re-used - needed only for multithread
+        * applications.
+        */
+       task_ref->pid = get_task_pid(current, PIDTYPE_PID);
+       /*
+        * Acquire a reference to the task's mm.
+        */
+       task_ref->mm = get_task_mm(current);
+       if (!task_ref->mm) {
+               put_pid(task_ref->pid);
+               pr_err("VAS: pid(%d): mm_struct is not found\n",
+                               current->pid);
+               return -EPERM;
+       }
+
+       mmgrab(task_ref->mm);
+       mmput(task_ref->mm);
+       /*
+        * Process closes window during exit. In the case of
+        * multithread application, the child thread can open
+        * window and can exit without closing it. So takes tgid
+        * reference until window closed to make sure tgid is not
+        * reused.
+        */
+       task_ref->tgid = find_get_pid(task_tgid_vnr(current));
+
+       return 0;
+}
+
+/*
+ * Successful return must release the task reference with
+ * put_task_struct
+ */
+static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref,
+                         struct task_struct **tskp, struct pid **pidp)
+{
+       struct task_struct *tsk;
+       struct pid *pid;
+
+       pid = task_ref->pid;
+       tsk = get_pid_task(pid, PIDTYPE_PID);
+       if (!tsk) {
+               pid = task_ref->tgid;
+               tsk = get_pid_task(pid, PIDTYPE_PID);
+               /*
+                * Parent thread (tgid) will be closing window when it
+                * exits. So should not get here.
+                */
+               if (WARN_ON_ONCE(!tsk))
+                       return false;
+       }
+
+       /* Return if the task is exiting. */
+       if (tsk->flags & PF_EXITING) {
+               put_task_struct(tsk);
+               return false;
+       }
+
+       *tskp = tsk;
+       *pidp = pid;
+
+       return true;
+}
+
+/*
+ * Update the CSB to indicate a translation error.
+ *
+ * User space will be polling on CSB after the request is issued.
+ * If NX can handle the request without any issues, it updates CSB.
+ * Whereas if NX encounters page fault, the kernel will handle the
+ * fault and update CSB with translation error.
+ *
+ * If we are unable to update the CSB means copy_to_user failed due to
+ * invalid csb_addr, send a signal to the process.
+ */
+void vas_update_csb(struct coprocessor_request_block *crb,
+                   struct vas_user_win_ref *task_ref)
+{
+       struct coprocessor_status_block csb;
+       struct kernel_siginfo info;
+       struct task_struct *tsk;
+       void __user *csb_addr;
+       struct pid *pid;
+       int rc;
+
+       /*
+        * NX user space windows can not be opened for task->mm=NULL
+        * and faults will not be generated for kernel requests.
+        */
+       if (WARN_ON_ONCE(!task_ref->mm))
+               return;
+
+       csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
+
+       memset(&csb, 0, sizeof(csb));
+       csb.cc = CSB_CC_FAULT_ADDRESS;
+       csb.ce = CSB_CE_TERMINATION;
+       csb.cs = 0;
+       csb.count = 0;
+
+       /*
+        * NX operates and returns in BE format as defined CRB struct.
+        * So saves fault_storage_addr in BE as NX pastes in FIFO and
+        * expects user space to convert to CPU format.
+        */
+       csb.address = crb->stamp.nx.fault_storage_addr;
+       csb.flags = 0;
+
+       /*
+        * Process closes send window after all pending NX requests are
+        * completed. In multi-thread applications, a child thread can
+        * open a window and can exit without closing it. May be some
+        * requests are pending or this window can be used by other
+        * threads later. We should handle faults if NX encounters
+        * pages faults on these requests. Update CSB with translation
+        * error and fault address. If csb_addr passed by user space is
+        * invalid, send SEGV signal to pid saved in window. If the
+        * child thread is not running, send the signal to tgid.
+        * Parent thread (tgid) will close this window upon its exit.
+        *
+        * pid and mm references are taken when window is opened by
+        * process (pid). So tgid is used only when child thread opens
+        * a window and exits without closing it.
+        */
+
+       if (!ref_get_pid_and_task(task_ref, &tsk, &pid))
+               return;
+
+       kthread_use_mm(task_ref->mm);
+       rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+       /*
+        * User space polls on csb.flags (first byte). So add barrier
+        * then copy first byte with csb flags update.
+        */
+       if (!rc) {
+               csb.flags = CSB_V;
+               /* Make sure update to csb.flags is visible now */
+               smp_mb();
+               rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+       }
+       kthread_unuse_mm(task_ref->mm);
+       put_task_struct(tsk);
+
+       /* Success */
+       if (!rc)
+               return;
+
+
+       pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
+                       csb_addr, pid_vnr(pid));
+
+       clear_siginfo(&info);
+       info.si_signo = SIGSEGV;
+       info.si_errno = EFAULT;
+       info.si_code = SEGV_MAPERR;
+       info.si_addr = csb_addr;
+       /*
+        * process will be polling on csb.flags after request is sent to
+        * NX. So generally CSB update should not fail except when an
+        * application passes invalid csb_addr. So an error message will
+        * be displayed and leave it to user space whether to ignore or
+        * handle this signal.
+        */
+       rcu_read_lock();
+       rc = kill_pid_info(SIGSEGV, &info, pid);
+       rcu_read_unlock();
+
+       pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
+                       pid_vnr(pid), rc);
+}
+
+void vas_dump_crb(struct coprocessor_request_block *crb)
+{
+       struct data_descriptor_entry *dde;
+       struct nx_fault_stamp *nx;
+
+       dde = &crb->source;
+       pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+               be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+               dde->count, dde->index, dde->flags);
+
+       dde = &crb->target;
+       pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+               be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+               dde->count, dde->index, dde->flags);
+
+       nx = &crb->stamp.nx;
+       pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
+               be32_to_cpu(nx->pswid),
+               be64_to_cpu(crb->stamp.nx.fault_storage_addr),
+               nx->flags, nx->fault_status);
+}
+
+static int coproc_open(struct inode *inode, struct file *fp)
+{
+       struct coproc_instance *cp_inst;
+
+       cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
+       if (!cp_inst)
+               return -ENOMEM;
+
+       cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
+                                       cdev);
+       fp->private_data = cp_inst;
+
+       return 0;
+}
+
+static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
+{
+       void __user *uptr = (void __user *)arg;
+       struct vas_tx_win_open_attr uattr;
+       struct coproc_instance *cp_inst;
+       struct vas_window *txwin;
+       int rc;
+
+       cp_inst = fp->private_data;
+
+       /*
+        * One window for file descriptor
+        */
+       if (cp_inst->txwin)
+               return -EEXIST;
+
+       rc = copy_from_user(&uattr, uptr, sizeof(uattr));
+       if (rc) {
+               pr_err("%s(): copy_from_user() returns %d\n", __func__, rc);
+               return -EFAULT;
+       }
+
+       if (uattr.version != 1) {
+               pr_err("Invalid window open API version\n");
+               return -EINVAL;
+       }
+
+       if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->open_win) {
+               pr_err("VAS API is not registered\n");
+               return -EACCES;
+       }
+
+       txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags,
+                                               cp_inst->coproc->cop_type);
+       if (IS_ERR(txwin)) {
+               pr_err("%s() VAS window open failed, %ld\n", __func__,
+                               PTR_ERR(txwin));
+               return PTR_ERR(txwin);
+       }
+
+       cp_inst->txwin = txwin;
+
+       return 0;
+}
+
+static int coproc_release(struct inode *inode, struct file *fp)
+{
+       struct coproc_instance *cp_inst = fp->private_data;
+       int rc;
+
+       if (cp_inst->txwin) {
+               if (cp_inst->coproc->vops &&
+                       cp_inst->coproc->vops->close_win) {
+                       rc = cp_inst->coproc->vops->close_win(cp_inst->txwin);
+                       if (rc)
+                               return rc;
+               }
+               cp_inst->txwin = NULL;
+       }
+
+       kfree(cp_inst);
+       fp->private_data = NULL;
+
+       /*
+        * We don't know here if user has other receive windows
+        * open, so we can't really call clear_thread_tidr().
+        * So, once the process calls set_thread_tidr(), the
+        * TIDR value sticks around until process exits, resulting
+        * in an extra copy in restore_sprs().
+        */
+
+       return 0;
+}
+
+static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+       struct coproc_instance *cp_inst = fp->private_data;
+       struct vas_window *txwin;
+       unsigned long pfn;
+       u64 paste_addr;
+       pgprot_t prot;
+       int rc;
+
+       txwin = cp_inst->txwin;
+
+       if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+               pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__,
+                               (vma->vm_end - vma->vm_start), PAGE_SIZE);
+               return -EINVAL;
+       }
+
+       /* Ensure instance has an open send window */
+       if (!txwin) {
+               pr_err("%s(): No send window open?\n", __func__);
+               return -EINVAL;
+       }
+
+       if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->paste_addr) {
+               pr_err("%s(): VAS API is not registered\n", __func__);
+               return -EACCES;
+       }
+
+       paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+       if (!paste_addr) {
+               pr_err("%s(): Window paste address failed\n", __func__);
+               return -EINVAL;
+       }
+
+       pfn = paste_addr >> PAGE_SHIFT;
+
+       /* flags, page_prot from cxl_mmap(), except we want cachable */
+       vma->vm_flags |= VM_IO | VM_PFNMAP;
+       vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+
+       prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
+
+       rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+                       vma->vm_end - vma->vm_start, prot);
+
+       pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__,
+                       paste_addr, vma->vm_start, rc);
+
+       return rc;
+}
+
+static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case VAS_TX_WIN_OPEN:
+               return coproc_ioc_tx_win_open(fp, arg);
+       default:
+               return -EINVAL;
+       }
+}
+
+static struct file_operations coproc_fops = {
+       .open = coproc_open,
+       .release = coproc_release,
+       .mmap = coproc_mmap,
+       .unlocked_ioctl = coproc_ioctl,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
+                           const char *name,
+                           const struct vas_user_win_ops *vops)
+{
+       int rc = -EINVAL;
+       dev_t devno;
+
+       rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
+       if (rc) {
+               pr_err("Unable to allocate coproc major number: %i\n", rc);
+               return rc;
+       }
+
+       pr_devel("%s device allocated, dev [%i,%i]\n", name,
+                       MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
+
+       coproc_device.class = class_create(mod, name);
+       if (IS_ERR(coproc_device.class)) {
+               rc = PTR_ERR(coproc_device.class);
+               pr_err("Unable to create %s class %d\n", name, rc);
+               goto err_class;
+       }
+       coproc_device.class->devnode = coproc_devnode;
+       coproc_device.cop_type = cop_type;
+       coproc_device.vops = vops;
+
+       coproc_fops.owner = mod;
+       cdev_init(&coproc_device.cdev, &coproc_fops);
+
+       devno = MKDEV(MAJOR(coproc_device.devt), 0);
+       rc = cdev_add(&coproc_device.cdev, devno, 1);
+       if (rc) {
+               pr_err("cdev_add() failed %d\n", rc);
+               goto err_cdev;
+       }
+
+       coproc_device.device = device_create(coproc_device.class, NULL,
+                       devno, NULL, name, MINOR(devno));
+       if (IS_ERR(coproc_device.device)) {
+               rc = PTR_ERR(coproc_device.device);
+               pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
+               goto err;
+       }
+
+       pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno),
+                       MINOR(devno));
+
+       return 0;
+
+err:
+       cdev_del(&coproc_device.cdev);
+err_cdev:
+       class_destroy(coproc_device.class);
+err_class:
+       unregister_chrdev_region(coproc_device.devt, 1);
+       return rc;
+}
+
+void vas_unregister_coproc_api(void)
+{
+       dev_t devno;
+
+       cdev_del(&coproc_device.cdev);
+       devno = MKDEV(MAJOR(coproc_device.devt), 0);
+       device_destroy(coproc_device.class, devno);
+
+       class_destroy(coproc_device.class);
+       unregister_chrdev_region(coproc_device.devt, 1);
+}
index 93ea41680f5447a71c0cde6e4c12ee21dbcd7636..a1c293f42a1fb131d06d6cfd51fe91e229325e2e 100644 (file)
@@ -25,10 +25,9 @@ struct spiderpci_iowa_private {
 static void spiderpci_io_flush(struct iowa_bus *bus)
 {
        struct spiderpci_iowa_private *priv;
-       u32 val;
 
        priv = bus->private;
-       val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+       in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
        iosync();
 }
 
index d56b4e3241cd4a2d4eead57a87ed590ac8f7c212..b41e81b22fdc0ead1e6567803ad3c715c34b20d2 100644 (file)
@@ -1657,14 +1657,13 @@ static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu)
 static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
 {
        struct spu_problem __iomem *prob = spu->problem;
-       u32 dummy = 0;
 
        /* Restore, Step 66:
         *     If CSA.MB_Stat[P]=0 (mailbox empty) then
         *     read from the PPU_MB register.
         */
        if ((csa->prob.mb_stat_R & 0xFF) == 0) {
-               dummy = in_be32(&prob->pu_mb_R);
+               in_be32(&prob->pu_mb_R);
                eieio();
        }
 }
@@ -1672,14 +1671,13 @@ static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
 static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
 {
        struct spu_priv2 __iomem *priv2 = spu->priv2;
-       u64 dummy = 0UL;
 
        /* Restore, Step 66:
         *     If CSA.MB_Stat[I]=0 (mailbox empty) then
         *     read from the PPUINT_MB register.
         */
        if ((csa->prob.mb_stat_R & 0xFF0000) == 0) {
-               dummy = in_be64(&priv2->puint_mb_R);
+               in_be64(&priv2->puint_mb_R);
                eieio();
                spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
                eieio();
index 53065d564161d7b44076a8de5b6320e2fcadbd86..85521b3e7098406b501191472560d5517b367f1a 100644 (file)
@@ -251,8 +251,8 @@ static int ppc750_machine_check_exception(struct pt_regs *regs)
        /* Are we prepared to handle this fault */
        if ((entry = search_exception_tables(regs->nip)) != NULL) {
                tsi108_clear_pci_cfg_error();
-               regs->msr |= MSR_RI;
-               regs->nip = extable_fixup(entry);
+               regs_set_return_msr(regs, regs->msr | MSR_RI);
+               regs_set_return_ip(regs, extable_fixup(entry));
                return 1;
        }
        return 0;
index 5565647dc879e8ad1b103b93ab8f5537e7dc6ed2..d8da6a483e59101eb92eb006c2da15215c4490eb 100644 (file)
@@ -173,8 +173,8 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs)
        /* Are we prepared to handle this fault */
        if ((entry = search_exception_tables(regs->nip)) != NULL) {
                tsi108_clear_pci_cfg_error();
-               regs->msr |= MSR_RI;
-               regs->nip = extable_fixup(entry);
+               regs_set_return_msr(regs, regs->msr | MSR_RI);
+               regs_set_return_ip(regs, extable_fixup(entry));
                return 1;
        }
        return 0;
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
new file mode 100644 (file)
index 0000000..8f6a819
--- /dev/null
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MICROWATT
+       depends on PPC_BOOK3S_64 && !SMP
+       bool "Microwatt SoC platform"
+       select PPC_XICS
+       select PPC_ICS_NATIVE
+       select PPC_ICP_NATIVE
+       select PPC_NATIVE
+       select PPC_UDBG_16550
+       select ARCH_RANDOM
+       help
+          This option enables support for FPGA-based Microwatt implementations.
+
diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile
new file mode 100644 (file)
index 0000000..116d6d3
--- /dev/null
@@ -0,0 +1 @@
+obj-y  += setup.o rng.o
diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c
new file mode 100644 (file)
index 0000000..3d8ee6e
--- /dev/null
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/powerpc/platforms/powernv/rng.c, which is:
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt)    "microwatt-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <asm/archrandom.h>
+#include <asm/cputable.h>
+#include <asm/machdep.h>
+
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
+
+int microwatt_get_random_darn(unsigned long *v)
+{
+       unsigned long val;
+
+       /* Using DARN with L=1 - 64-bit conditioned random number */
+       asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+       if (val == DARN_ERR)
+               return 0;
+
+       *v = val;
+
+       return 1;
+}
+
+static __init int rng_init(void)
+{
+       unsigned long val;
+       int i;
+
+       for (i = 0; i < 10; i++) {
+               if (microwatt_get_random_darn(&val)) {
+                       ppc_md.get_random_seed = microwatt_get_random_darn;
+                       return 0;
+               }
+       }
+
+       pr_warn("Unable to use DARN for get_random_seed()\n");
+
+       return -EIO;
+}
+machine_subsys_initcall(, rng_init);
diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c
new file mode 100644 (file)
index 0000000..0b02603
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Microwatt FPGA-based SoC platform setup code.
+ *
+ * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/xics.h>
+#include <asm/udbg.h>
+
+static void __init microwatt_init_IRQ(void)
+{
+       xics_init();
+}
+
+static int __init microwatt_probe(void)
+{
+       return of_machine_is_compatible("microwatt-soc");
+}
+
+static int __init microwatt_populate(void)
+{
+       return of_platform_default_populate(NULL, NULL, NULL);
+}
+machine_arch_initcall(microwatt, microwatt_populate);
+
+define_machine(microwatt) {
+       .name                   = "microwatt",
+       .probe                  = microwatt_probe,
+       .init_IRQ               = microwatt_init_IRQ,
+       .progress               = udbg_progress,
+       .calibrate_decr         = generic_calibrate_decr,
+};
index 1c954c90b0f43920a5422669feccf5d1a95198c4..9b88e3cded7d2dc07d165444688071d9c51a3b95 100644 (file)
@@ -37,7 +37,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
         */
 
        if (regs->msr & SRR1_WAKEMASK)
-               regs->nip = regs->link;
+               regs_set_return_ip(regs, regs->link);
 
        switch (regs->msr & SRR1_WAKEMASK) {
        case SRR1_WAKEDEC:
@@ -58,7 +58,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
        restore_astate(hard_smp_processor_id());
 
        /* everything handled */
-       regs->msr |= MSR_RI;
+       regs_set_return_msr(regs, regs->msr | MSR_RI);
        return 1;
 }
 
index 9d4ecd292255b4736854d3a573b008e8d9ea656c..d20ef35e6d9da18ebbf0b7627f81182b82314c8d 100644 (file)
@@ -433,7 +433,7 @@ static void __init btext_welcome(boot_infos_t *bi)
        bootx_printf("\nframe buffer at  : 0x%x", bi->dispDeviceBase);
        bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase);
        bootx_printf(" (log)");
-       bootx_printf("\nklimit           : 0x%x",(unsigned long)klimit);
+       bootx_printf("\nklimit           : 0x%x",(unsigned long)_end);
        bootx_printf("\nboot_info at     : 0x%x", bi);
        __asm__ __volatile__ ("mfmsr %0" : "=r" (flags));
        bootx_printf("\nMSR              : 0x%x", flags);
index adae2a6712e11f92860ea8d52b317cdef512e44b..bdfea6d6ab69a2aabe1a092207a30ef3ed0f2471 100644 (file)
@@ -810,7 +810,7 @@ static int smp_core99_kick_cpu(int nr)
         *   b __secondary_start_pmac_0 + nr*8
         */
        target = (unsigned long) __secondary_start_pmac_0 + nr * 8;
-       patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK);
+       patch_branch(vector, target, BRANCH_SET_LINK);
 
        /* Put some life in our friend */
        pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0);
@@ -823,7 +823,7 @@ static int smp_core99_kick_cpu(int nr)
        mdelay(1);
 
        /* Restore our exception vector */
-       patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector));
+       patch_instruction(vector, ppc_inst(save_vector));
 
        local_irq_restore(flags);
        if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
index 619b093a0657bac262ae2c794f74e29f8e06ddf2..043eefbbdd286f8f8175c9689415b269a2c89ac5 100644 (file)
@@ -33,20 +33,6 @@ config PPC_MEMTRACE
          Enabling this option allows for runtime allocation of memory (RAM)
          for hardware tracing.
 
-config PPC_VAS
-       bool "IBM Virtual Accelerator Switchboard (VAS)"
-       depends on PPC_POWERNV && PPC_64K_PAGES
-       default y
-       help
-         This enables support for IBM Virtual Accelerator Switchboard (VAS).
-
-         VAS allows accelerators in co-processors like NX-GZIP and NX-842
-         to be accessible to kernel subsystems and user processes.
-
-         VAS adapters are found in POWER9 based systems.
-
-         If unsure, say N.
-
 config SCOM_DEBUGFS
        bool "Expose SCOM controllers via debugfs"
        depends on DEBUG_FS
index be2546b968165e639612eeb0860fc64f71556992..dc7b37c23b60dccf7c87d31ca9d8bb2aa7d61143 100644 (file)
@@ -18,7 +18,7 @@ obj-$(CONFIG_MEMORY_FAILURE)  += opal-memory-errors.o
 obj-$(CONFIG_OPAL_PRD) += opal-prd.o
 obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
 obj-$(CONFIG_PPC_MEMTRACE)     += memtrace.o
-obj-$(CONFIG_PPC_VAS)  += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o
+obj-$(CONFIG_PPC_VAS)  += vas.o vas-window.o vas-debug.o vas-fault.o
 obj-$(CONFIG_OCXL_BASE)        += ocxl.o
 obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
 obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
index 01401e3da7ca146f39db6078f0a496a5b9244840..f812c74c61e5fc4eda6ba380f2f628ec86c84f1f 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/percpu.h>
 #include <linux/jump_label.h>
+#include <asm/interrupt.h>
 #include <asm/opal-api.h>
 #include <asm/trace.h>
 #include <asm/asm-prototypes.h>
@@ -100,6 +101,9 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
        bool mmu = (msr & (MSR_IR|MSR_DR));
        int64_t ret;
 
+       /* OPAL call / firmware may use SRR and/or HSRR */
+       srr_regs_clobbered();
+
        msr &= ~MSR_EE;
 
        if (unlikely(!mmu))
index 303d7c775740308af7651e2036c2545569106ebd..2835376e61a495c1f8c22b160fadfb232bc0055b 100644 (file)
@@ -773,7 +773,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs)
         * Setup regs->nip to rfi into fixup address.
         */
        if (recover_addr)
-               regs->nip = recover_addr;
+               regs_set_return_ip(regs, recover_addr);
 
 out:
        return !!recover_addr;
index b18468dc31ff5be29fc9d18d48ca14c2b3787563..6bb3c52633fb73f62d62d58b29ffe626370215c1 100644 (file)
@@ -711,7 +711,7 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
        return PCIBIOS_SUCCESSFUL;
 }
 
-#if CONFIG_EEH
+#ifdef CONFIG_EEH
 static bool pnv_pci_cfg_check(struct pci_dn *pdn)
 {
        struct eeh_dev *edev = NULL;
index 73207b53dc2b2333bac19727966c7903f83e81b9..7e98b00ea2e843939da0a1b97fa18b749622d984 100644 (file)
@@ -169,6 +169,16 @@ static void update_hid_in_slw(u64 hid0)
        }
 }
 
+static inline void update_power8_hid0(unsigned long hid0)
+{
+       /*
+        *  The HID0 update on Power8 should at the very least be
+        *  preceded by a SYNC instruction followed by an ISYNC
+        *  instruction
+        */
+       asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
+}
+
 static void unsplit_core(void)
 {
        u64 hid0, mask;
diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c
deleted file mode 100644 (file)
index 98ed5d8..0000000
+++ /dev/null
@@ -1,278 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * VAS user space API for its accelerators (Only NX-GZIP is supported now)
- * Copyright (C) 2019 Haren Myneni, IBM Corp
- */
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <asm/vas.h>
-#include <uapi/asm/vas-api.h>
-#include "vas.h"
-
-/*
- * The driver creates the device node that can be used as follows:
- * For NX-GZIP
- *
- *     fd = open("/dev/crypto/nx-gzip", O_RDWR);
- *     rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
- *     paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
- *     vas_copy(&crb, 0, 1);
- *     vas_paste(paste_addr, 0, 1);
- *     close(fd) or exit process to close window.
- *
- * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
- * copy/paste returns to the user space directly. So refer NX hardware
- * documententation for exact copy/paste usage and completion / error
- * conditions.
- */
-
-/*
- * Wrapper object for the nx-gzip device - there is just one instance of
- * this node for the whole system.
- */
-static struct coproc_dev {
-       struct cdev cdev;
-       struct device *device;
-       char *name;
-       dev_t devt;
-       struct class *class;
-       enum vas_cop_type cop_type;
-} coproc_device;
-
-struct coproc_instance {
-       struct coproc_dev *coproc;
-       struct vas_window *txwin;
-};
-
-static char *coproc_devnode(struct device *dev, umode_t *mode)
-{
-       return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
-}
-
-static int coproc_open(struct inode *inode, struct file *fp)
-{
-       struct coproc_instance *cp_inst;
-
-       cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
-       if (!cp_inst)
-               return -ENOMEM;
-
-       cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
-                                       cdev);
-       fp->private_data = cp_inst;
-
-       return 0;
-}
-
-static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
-{
-       void __user *uptr = (void __user *)arg;
-       struct vas_tx_win_attr txattr = {};
-       struct vas_tx_win_open_attr uattr;
-       struct coproc_instance *cp_inst;
-       struct vas_window *txwin;
-       int rc, vasid;
-
-       cp_inst = fp->private_data;
-
-       /*
-        * One window for file descriptor
-        */
-       if (cp_inst->txwin)
-               return -EEXIST;
-
-       rc = copy_from_user(&uattr, uptr, sizeof(uattr));
-       if (rc) {
-               pr_err("%s(): copy_from_user() returns %d\n", __func__, rc);
-               return -EFAULT;
-       }
-
-       if (uattr.version != 1) {
-               pr_err("Invalid version\n");
-               return -EINVAL;
-       }
-
-       vasid = uattr.vas_id;
-
-       vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type);
-
-       txattr.lpid = mfspr(SPRN_LPID);
-       txattr.pidr = mfspr(SPRN_PID);
-       txattr.user_win = true;
-       txattr.rsvd_txbuf_count = false;
-       txattr.pswid = false;
-
-       pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
-                               mfspr(SPRN_PID));
-
-       txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr);
-       if (IS_ERR(txwin)) {
-               pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__,
-                                       PTR_ERR(txwin));
-               return PTR_ERR(txwin);
-       }
-
-       cp_inst->txwin = txwin;
-
-       return 0;
-}
-
-static int coproc_release(struct inode *inode, struct file *fp)
-{
-       struct coproc_instance *cp_inst = fp->private_data;
-
-       if (cp_inst->txwin) {
-               vas_win_close(cp_inst->txwin);
-               cp_inst->txwin = NULL;
-       }
-
-       kfree(cp_inst);
-       fp->private_data = NULL;
-
-       /*
-        * We don't know here if user has other receive windows
-        * open, so we can't really call clear_thread_tidr().
-        * So, once the process calls set_thread_tidr(), the
-        * TIDR value sticks around until process exits, resulting
-        * in an extra copy in restore_sprs().
-        */
-
-       return 0;
-}
-
-static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
-{
-       struct coproc_instance *cp_inst = fp->private_data;
-       struct vas_window *txwin;
-       unsigned long pfn;
-       u64 paste_addr;
-       pgprot_t prot;
-       int rc;
-
-       txwin = cp_inst->txwin;
-
-       if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
-               pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__,
-                               (vma->vm_end - vma->vm_start), PAGE_SIZE);
-               return -EINVAL;
-       }
-
-       /* Ensure instance has an open send window */
-       if (!txwin) {
-               pr_err("%s(): No send window open?\n", __func__);
-               return -EINVAL;
-       }
-
-       vas_win_paste_addr(txwin, &paste_addr, NULL);
-       pfn = paste_addr >> PAGE_SHIFT;
-
-       /* flags, page_prot from cxl_mmap(), except we want cachable */
-       vma->vm_flags |= VM_IO | VM_PFNMAP;
-       vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
-
-       prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
-
-       rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
-                       vma->vm_end - vma->vm_start, prot);
-
-       pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__,
-                       paste_addr, vma->vm_start, rc);
-
-       return rc;
-}
-
-static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
-{
-       switch (cmd) {
-       case VAS_TX_WIN_OPEN:
-               return coproc_ioc_tx_win_open(fp, arg);
-       default:
-               return -EINVAL;
-       }
-}
-
-static struct file_operations coproc_fops = {
-       .open = coproc_open,
-       .release = coproc_release,
-       .mmap = coproc_mmap,
-       .unlocked_ioctl = coproc_ioctl,
-};
-
-/*
- * Supporting only nx-gzip coprocessor type now, but this API code
- * extended to other coprocessor types later.
- */
-int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
-                               const char *name)
-{
-       int rc = -EINVAL;
-       dev_t devno;
-
-       rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
-       if (rc) {
-               pr_err("Unable to allocate coproc major number: %i\n", rc);
-               return rc;
-       }
-
-       pr_devel("%s device allocated, dev [%i,%i]\n", name,
-                       MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
-
-       coproc_device.class = class_create(mod, name);
-       if (IS_ERR(coproc_device.class)) {
-               rc = PTR_ERR(coproc_device.class);
-               pr_err("Unable to create %s class %d\n", name, rc);
-               goto err_class;
-       }
-       coproc_device.class->devnode = coproc_devnode;
-       coproc_device.cop_type = cop_type;
-
-       coproc_fops.owner = mod;
-       cdev_init(&coproc_device.cdev, &coproc_fops);
-
-       devno = MKDEV(MAJOR(coproc_device.devt), 0);
-       rc = cdev_add(&coproc_device.cdev, devno, 1);
-       if (rc) {
-               pr_err("cdev_add() failed %d\n", rc);
-               goto err_cdev;
-       }
-
-       coproc_device.device = device_create(coproc_device.class, NULL,
-                       devno, NULL, name, MINOR(devno));
-       if (IS_ERR(coproc_device.device)) {
-               rc = PTR_ERR(coproc_device.device);
-               pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
-               goto err;
-       }
-
-       pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno),
-                       MINOR(devno));
-
-       return 0;
-
-err:
-       cdev_del(&coproc_device.cdev);
-err_cdev:
-       class_destroy(coproc_device.class);
-err_class:
-       unregister_chrdev_region(coproc_device.devt, 1);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(vas_register_coproc_api);
-
-void vas_unregister_coproc_api(void)
-{
-       dev_t devno;
-
-       cdev_del(&coproc_device.cdev);
-       devno = MKDEV(MAJOR(coproc_device.devt), 0);
-       device_destroy(coproc_device.class, devno);
-
-       class_destroy(coproc_device.class);
-       unregister_chrdev_region(coproc_device.devt, 1);
-}
-EXPORT_SYMBOL_GPL(vas_unregister_coproc_api);
index 41fa90d2f4abdfecbfde352f1d4fb5253eed83e4..3ce89a4b54be0d81a6057a1831984b495140b8ae 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/slab.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <asm/vas.h>
 #include "vas.h"
 
 static struct dentry *vas_debugfs;
@@ -28,7 +29,7 @@ static char *cop_to_str(int cop)
 
 static int info_show(struct seq_file *s, void *private)
 {
-       struct vas_window *window = s->private;
+       struct pnv_vas_window *window = s->private;
 
        mutex_lock(&vas_mutex);
 
@@ -36,9 +37,9 @@ static int info_show(struct seq_file *s, void *private)
        if (!window->hvwc_map)
                goto unlock;
 
-       seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
+       seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop),
                                        window->tx_win ? "Send" : "Receive");
-       seq_printf(s, "Pid : %d\n", vas_window_pid(window));
+       seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win));
 
 unlock:
        mutex_unlock(&vas_mutex);
@@ -47,7 +48,7 @@ unlock:
 
 DEFINE_SHOW_ATTRIBUTE(info);
 
-static inline void print_reg(struct seq_file *s, struct vas_window *win,
+static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win,
                        char *name, u32 reg)
 {
        seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
@@ -55,7 +56,7 @@ static inline void print_reg(struct seq_file *s, struct vas_window *win,
 
 static int hvwc_show(struct seq_file *s, void *private)
 {
-       struct vas_window *window = s->private;
+       struct pnv_vas_window *window = s->private;
 
        mutex_lock(&vas_mutex);
 
@@ -103,8 +104,10 @@ unlock:
 
 DEFINE_SHOW_ATTRIBUTE(hvwc);
 
-void vas_window_free_dbgdir(struct vas_window *window)
+void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win)
 {
+       struct vas_window *window =  &pnv_win->vas_win;
+
        if (window->dbgdir) {
                debugfs_remove_recursive(window->dbgdir);
                kfree(window->dbgname);
@@ -113,21 +116,21 @@ void vas_window_free_dbgdir(struct vas_window *window)
        }
 }
 
-void vas_window_init_dbgdir(struct vas_window *window)
+void vas_window_init_dbgdir(struct pnv_vas_window *window)
 {
        struct dentry *d;
 
        if (!window->vinst->dbgdir)
                return;
 
-       window->dbgname = kzalloc(16, GFP_KERNEL);
-       if (!window->dbgname)
+       window->vas_win.dbgname = kzalloc(16, GFP_KERNEL);
+       if (!window->vas_win.dbgname)
                return;
 
-       snprintf(window->dbgname, 16, "w%d", window->winid);
+       snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid);
 
-       d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir);
-       window->dbgdir = d;
+       d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir);
+       window->vas_win.dbgdir = d;
 
        debugfs_create_file("info", 0444, d, window, &info_fops);
        debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops);
index 3d21fce254b741c3b7bb52edcea631108d42aa9c..a7aabc18039eb4bd2bbe6e8876d482701d12341c 100644 (file)
  */
 #define VAS_FAULT_WIN_FIFO_SIZE        (4 << 20)
 
-static void dump_crb(struct coprocessor_request_block *crb)
-{
-       struct data_descriptor_entry *dde;
-       struct nx_fault_stamp *nx;
-
-       dde = &crb->source;
-       pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
-               be64_to_cpu(dde->address), be32_to_cpu(dde->length),
-               dde->count, dde->index, dde->flags);
-
-       dde = &crb->target;
-       pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
-               be64_to_cpu(dde->address), be32_to_cpu(dde->length),
-               dde->count, dde->index, dde->flags);
-
-       nx = &crb->stamp.nx;
-       pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
-               be32_to_cpu(nx->pswid),
-               be64_to_cpu(crb->stamp.nx.fault_storage_addr),
-               nx->flags, nx->fault_status);
-}
-
-/*
- * Update the CSB to indicate a translation error.
- *
- * User space will be polling on CSB after the request is issued.
- * If NX can handle the request without any issues, it updates CSB.
- * Whereas if NX encounters page fault, the kernel will handle the
- * fault and update CSB with translation error.
- *
- * If we are unable to update the CSB means copy_to_user failed due to
- * invalid csb_addr, send a signal to the process.
- */
-static void update_csb(struct vas_window *window,
-                       struct coprocessor_request_block *crb)
-{
-       struct coprocessor_status_block csb;
-       struct kernel_siginfo info;
-       struct task_struct *tsk;
-       void __user *csb_addr;
-       struct pid *pid;
-       int rc;
-
-       /*
-        * NX user space windows can not be opened for task->mm=NULL
-        * and faults will not be generated for kernel requests.
-        */
-       if (WARN_ON_ONCE(!window->mm || !window->user_win))
-               return;
-
-       csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
-
-       memset(&csb, 0, sizeof(csb));
-       csb.cc = CSB_CC_FAULT_ADDRESS;
-       csb.ce = CSB_CE_TERMINATION;
-       csb.cs = 0;
-       csb.count = 0;
-
-       /*
-        * NX operates and returns in BE format as defined CRB struct.
-        * So saves fault_storage_addr in BE as NX pastes in FIFO and
-        * expects user space to convert to CPU format.
-        */
-       csb.address = crb->stamp.nx.fault_storage_addr;
-       csb.flags = 0;
-
-       pid = window->pid;
-       tsk = get_pid_task(pid, PIDTYPE_PID);
-       /*
-        * Process closes send window after all pending NX requests are
-        * completed. In multi-thread applications, a child thread can
-        * open a window and can exit without closing it. May be some
-        * requests are pending or this window can be used by other
-        * threads later. We should handle faults if NX encounters
-        * pages faults on these requests. Update CSB with translation
-        * error and fault address. If csb_addr passed by user space is
-        * invalid, send SEGV signal to pid saved in window. If the
-        * child thread is not running, send the signal to tgid.
-        * Parent thread (tgid) will close this window upon its exit.
-        *
-        * pid and mm references are taken when window is opened by
-        * process (pid). So tgid is used only when child thread opens
-        * a window and exits without closing it.
-        */
-       if (!tsk) {
-               pid = window->tgid;
-               tsk = get_pid_task(pid, PIDTYPE_PID);
-               /*
-                * Parent thread (tgid) will be closing window when it
-                * exits. So should not get here.
-                */
-               if (WARN_ON_ONCE(!tsk))
-                       return;
-       }
-
-       /* Return if the task is exiting. */
-       if (tsk->flags & PF_EXITING) {
-               put_task_struct(tsk);
-               return;
-       }
-
-       kthread_use_mm(window->mm);
-       rc = copy_to_user(csb_addr, &csb, sizeof(csb));
-       /*
-        * User space polls on csb.flags (first byte). So add barrier
-        * then copy first byte with csb flags update.
-        */
-       if (!rc) {
-               csb.flags = CSB_V;
-               /* Make sure update to csb.flags is visible now */
-               smp_mb();
-               rc = copy_to_user(csb_addr, &csb, sizeof(u8));
-       }
-       kthread_unuse_mm(window->mm);
-       put_task_struct(tsk);
-
-       /* Success */
-       if (!rc)
-               return;
-
-       pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
-                       csb_addr, pid_vnr(pid));
-
-       clear_siginfo(&info);
-       info.si_signo = SIGSEGV;
-       info.si_errno = EFAULT;
-       info.si_code = SEGV_MAPERR;
-       info.si_addr = csb_addr;
-
-       /*
-        * process will be polling on csb.flags after request is sent to
-        * NX. So generally CSB update should not fail except when an
-        * application passes invalid csb_addr. So an error message will
-        * be displayed and leave it to user space whether to ignore or
-        * handle this signal.
-        */
-       rcu_read_lock();
-       rc = kill_pid_info(SIGSEGV, &info, pid);
-       rcu_read_unlock();
-
-       pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
-                       pid_vnr(pid), rc);
-}
-
 static void dump_fifo(struct vas_instance *vinst, void *entry)
 {
        unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size;
@@ -212,7 +68,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
        struct vas_instance *vinst = data;
        struct coprocessor_request_block *crb, *entry;
        struct coprocessor_request_block buf;
-       struct vas_window *window;
+       struct pnv_vas_window *window;
        unsigned long flags;
        void *fifo;
 
@@ -272,7 +128,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
                                vinst->vas_id, vinst->fault_fifo, fifo,
                                vinst->fault_crbs);
 
-               dump_crb(crb);
+               vas_dump_crb(crb);
                window = vas_pswid_to_window(vinst,
                                be32_to_cpu(crb->stamp.nx.pswid));
 
@@ -293,7 +149,14 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
 
                        WARN_ON_ONCE(1);
                } else {
-                       update_csb(window, crb);
+                       /*
+                        * NX sees faults only with user space windows.
+                        */
+                       if (window->user_win)
+                               vas_update_csb(crb, &window->vas_win.task_ref);
+                       else
+                               WARN_ON_ONCE(!window->user_win);
+
                        /*
                         * Return credit for send window after processing
                         * fault CRB.
@@ -336,6 +199,7 @@ irqreturn_t vas_fault_handler(int irq, void *dev_id)
 int vas_setup_fault_window(struct vas_instance *vinst)
 {
        struct vas_rx_win_attr attr;
+       struct vas_window *win;
 
        vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE;
        vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL);
@@ -364,18 +228,17 @@ int vas_setup_fault_window(struct vas_instance *vinst)
        attr.lnotify_pid = mfspr(SPRN_PID);
        attr.lnotify_tid = mfspr(SPRN_PID);
 
-       vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT,
-                                       &attr);
-
-       if (IS_ERR(vinst->fault_win)) {
-               pr_err("VAS: Error %ld opening FaultWin\n",
-                       PTR_ERR(vinst->fault_win));
+       win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr);
+       if (IS_ERR(win)) {
+               pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win));
                kfree(vinst->fault_fifo);
-               return PTR_ERR(vinst->fault_win);
+               return PTR_ERR(win);
        }
 
+       vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win);
+
        pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n",
-                       vinst->fault_win->winid, attr.lnotify_lpid,
+                       vinst->fault_win->vas_win.winid, attr.lnotify_lpid,
                        attr.lnotify_pid, attr.lnotify_tid);
 
        return 0;
index a449b9f0c12efbebd67bc484d69c5c4d6c96489c..ca2e08f2ddc0a51d0e0f9070a3e1f4fb78aee2dc 100644 (file)
@@ -80,7 +80,7 @@ TRACE_EVENT(  vas_tx_win_open,
 TRACE_EVENT(   vas_paste_crb,
 
                TP_PROTO(struct task_struct *tsk,
-                       struct vas_window *win),
+                       struct pnv_vas_window *win),
 
                TP_ARGS(tsk, win),
 
@@ -96,7 +96,7 @@ TRACE_EVENT(  vas_paste_crb,
                TP_fast_assign(
                        __entry->pid = tsk->pid;
                        __entry->vasid = win->vinst->vas_id;
-                       __entry->winid = win->winid;
+                       __entry->winid = win->vas_win.winid;
                        __entry->paste_kaddr = (unsigned long)win->paste_kaddr
                ),
 
index 5f5fe63a3d1cebbde434d87cd8b9026b8d9b8dea..0f8d39fbf2b21a04e6c8b35db0bbdf975e3ab1aa 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/mmu_context.h>
 #include <asm/switch_to.h>
 #include <asm/ppc-opcode.h>
+#include <asm/vas.h>
 #include "vas.h"
 #include "copy-paste.h"
 
  * Compute the paste address region for the window @window using the
  * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
  */
-void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len)
+void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len)
 {
        int winid;
        u64 base, shift;
 
        base = window->vinst->paste_base_addr;
        shift = window->vinst->paste_win_id_shift;
-       winid = window->winid;
+       winid = window->vas_win.winid;
 
        *addr  = base + (winid << shift);
        if (len)
@@ -42,23 +43,23 @@ void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len)
        pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
 }
 
-static inline void get_hvwc_mmio_bar(struct vas_window *window,
+static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window,
                        u64 *start, int *len)
 {
        u64 pbaddr;
 
        pbaddr = window->vinst->hvwc_bar_start;
-       *start = pbaddr + window->winid * VAS_HVWC_SIZE;
+       *start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE;
        *len = VAS_HVWC_SIZE;
 }
 
-static inline void get_uwc_mmio_bar(struct vas_window *window,
+static inline void get_uwc_mmio_bar(struct pnv_vas_window *window,
                        u64 *start, int *len)
 {
        u64 pbaddr;
 
        pbaddr = window->vinst->uwc_bar_start;
-       *start = pbaddr + window->winid * VAS_UWC_SIZE;
+       *start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE;
        *len = VAS_UWC_SIZE;
 }
 
@@ -67,7 +68,7 @@ static inline void get_uwc_mmio_bar(struct vas_window *window,
  * space. Unlike MMIO regions (map_mmio_region() below), paste region must
  * be mapped cache-able and is only applicable to send windows.
  */
-static void *map_paste_region(struct vas_window *txwin)
+static void *map_paste_region(struct pnv_vas_window *txwin)
 {
        int len;
        void *map;
@@ -75,7 +76,7 @@ static void *map_paste_region(struct vas_window *txwin)
        u64 start;
 
        name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
-                               txwin->winid);
+                               txwin->vas_win.winid);
        if (!name)
                goto free_name;
 
@@ -132,7 +133,7 @@ static void unmap_region(void *addr, u64 start, int len)
 /*
  * Unmap the paste address region for a window.
  */
-static void unmap_paste_region(struct vas_window *window)
+static void unmap_paste_region(struct pnv_vas_window *window)
 {
        int len;
        u64 busaddr_start;
@@ -153,7 +154,7 @@ static void unmap_paste_region(struct vas_window *window)
  * path, just minimize the time we hold the mutex for now. We can add
  * a per-instance mutex later if necessary.
  */
-static void unmap_winctx_mmio_bars(struct vas_window *window)
+static void unmap_winctx_mmio_bars(struct pnv_vas_window *window)
 {
        int len;
        void *uwc_map;
@@ -186,7 +187,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window)
  * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
  * Map these bus addresses and save the mapped kernel addresses in @window.
  */
-static int map_winctx_mmio_bars(struct vas_window *window)
+static int map_winctx_mmio_bars(struct pnv_vas_window *window)
 {
        int len;
        u64 start;
@@ -214,7 +215,7 @@ static int map_winctx_mmio_bars(struct vas_window *window)
  *      registers are not sequential. And, we can only write to offsets
  *      with valid registers.
  */
-static void reset_window_regs(struct vas_window *window)
+static void reset_window_regs(struct pnv_vas_window *window)
 {
        write_hvwc_reg(window, VREG(LPID), 0ULL);
        write_hvwc_reg(window, VREG(PID), 0ULL);
@@ -270,7 +271,7 @@ static void reset_window_regs(struct vas_window *window)
  * want to add fields to vas_winctx and move the initialization to
  * init_vas_winctx_regs().
  */
-static void init_xlate_regs(struct vas_window *window, bool user_win)
+static void init_xlate_regs(struct pnv_vas_window *window, bool user_win)
 {
        u64 lpcr, val;
 
@@ -335,7 +336,7 @@ static void init_xlate_regs(struct vas_window *window, bool user_win)
  *
  * TODO: Reserved (aka dedicated) send buffers are not supported yet.
  */
-static void init_rsvd_tx_buf_count(struct vas_window *txwin,
+static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin,
                                struct vas_winctx *winctx)
 {
        write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
@@ -357,7 +358,7 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin,
  *     as a one-time task? That could work for NX but what about other
  *     receivers?  Let the receivers tell us the rx-fifo buffers for now.
  */
-static void init_winctx_regs(struct vas_window *window,
+static void init_winctx_regs(struct pnv_vas_window *window,
                             struct vas_winctx *winctx)
 {
        u64 val;
@@ -519,10 +520,10 @@ static int vas_assign_window_id(struct ida *ida)
        return winid;
 }
 
-static void vas_window_free(struct vas_window *window)
+static void vas_window_free(struct pnv_vas_window *window)
 {
-       int winid = window->winid;
        struct vas_instance *vinst = window->vinst;
+       int winid = window->vas_win.winid;
 
        unmap_winctx_mmio_bars(window);
 
@@ -533,10 +534,10 @@ static void vas_window_free(struct vas_window *window)
        vas_release_window_id(&vinst->ida, winid);
 }
 
-static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
+static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst)
 {
        int winid;
-       struct vas_window *window;
+       struct pnv_vas_window *window;
 
        winid = vas_assign_window_id(&vinst->ida);
        if (winid < 0)
@@ -547,7 +548,7 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
                goto out_free;
 
        window->vinst = vinst;
-       window->winid = winid;
+       window->vas_win.winid = winid;
 
        if (map_winctx_mmio_bars(window))
                goto out_free;
@@ -562,7 +563,7 @@ out_free:
        return ERR_PTR(-ENOMEM);
 }
 
-static void put_rx_win(struct vas_window *rxwin)
+static void put_rx_win(struct pnv_vas_window *rxwin)
 {
        /* Better not be a send window! */
        WARN_ON_ONCE(rxwin->tx_win);
@@ -578,10 +579,11 @@ static void put_rx_win(struct vas_window *rxwin)
  *
  * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
  */
-static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
+static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst,
+                                            u32 pswid)
 {
        int vasid, winid;
-       struct vas_window *rxwin;
+       struct pnv_vas_window *rxwin;
 
        decode_pswid(pswid, &vasid, &winid);
 
@@ -590,7 +592,7 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
 
        rxwin = vinst->windows[winid];
 
-       if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW)
+       if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW)
                return ERR_PTR(-EINVAL);
 
        return rxwin;
@@ -602,10 +604,10 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
  *
  * See also function header of set_vinst_win().
  */
-static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
+static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst,
                        enum vas_cop_type cop, u32 pswid)
 {
-       struct vas_window *rxwin;
+       struct pnv_vas_window *rxwin;
 
        mutex_lock(&vinst->mutex);
 
@@ -638,9 +640,9 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
  * window, we also save the window in the ->rxwin[] table.
  */
 static void set_vinst_win(struct vas_instance *vinst,
-                       struct vas_window *window)
+                       struct pnv_vas_window *window)
 {
-       int id = window->winid;
+       int id = window->vas_win.winid;
 
        mutex_lock(&vinst->mutex);
 
@@ -649,8 +651,8 @@ static void set_vinst_win(struct vas_instance *vinst,
         * unless its a user (FTW) window.
         */
        if (!window->user_win && !window->tx_win) {
-               WARN_ON_ONCE(vinst->rxwin[window->cop]);
-               vinst->rxwin[window->cop] = window;
+               WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]);
+               vinst->rxwin[window->vas_win.cop] = window;
        }
 
        WARN_ON_ONCE(vinst->windows[id] != NULL);
@@ -663,16 +665,16 @@ static void set_vinst_win(struct vas_instance *vinst,
  * Clear this window from the table(s) of windows for this VAS instance.
  * See also function header of set_vinst_win().
  */
-static void clear_vinst_win(struct vas_window *window)
+static void clear_vinst_win(struct pnv_vas_window *window)
 {
-       int id = window->winid;
+       int id = window->vas_win.winid;
        struct vas_instance *vinst = window->vinst;
 
        mutex_lock(&vinst->mutex);
 
        if (!window->user_win && !window->tx_win) {
-               WARN_ON_ONCE(!vinst->rxwin[window->cop]);
-               vinst->rxwin[window->cop] = NULL;
+               WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]);
+               vinst->rxwin[window->vas_win.cop] = NULL;
        }
 
        WARN_ON_ONCE(vinst->windows[id] != window);
@@ -681,7 +683,7 @@ static void clear_vinst_win(struct vas_window *window)
        mutex_unlock(&vinst->mutex);
 }
 
-static void init_winctx_for_rxwin(struct vas_window *rxwin,
+static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin,
                        struct vas_rx_win_attr *rxattr,
                        struct vas_winctx *winctx)
 {
@@ -702,7 +704,7 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
 
        winctx->rx_fifo = rxattr->rx_fifo;
        winctx->rx_fifo_size = rxattr->rx_fifo_size;
-       winctx->wcreds_max = rxwin->wcreds_max;
+       winctx->wcreds_max = rxwin->vas_win.wcreds_max;
        winctx->pin_win = rxattr->pin_win;
 
        winctx->nx_win = rxattr->nx_win;
@@ -851,7 +853,7 @@ EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);
 struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
                        struct vas_rx_win_attr *rxattr)
 {
-       struct vas_window *rxwin;
+       struct pnv_vas_window *rxwin;
        struct vas_winctx winctx;
        struct vas_instance *vinst;
 
@@ -870,21 +872,21 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
        rxwin = vas_window_alloc(vinst);
        if (IS_ERR(rxwin)) {
                pr_devel("Unable to allocate memory for Rx window\n");
-               return rxwin;
+               return (struct vas_window *)rxwin;
        }
 
        rxwin->tx_win = false;
        rxwin->nx_win = rxattr->nx_win;
        rxwin->user_win = rxattr->user_win;
-       rxwin->cop = cop;
-       rxwin->wcreds_max = rxattr->wcreds_max;
+       rxwin->vas_win.cop = cop;
+       rxwin->vas_win.wcreds_max = rxattr->wcreds_max;
 
        init_winctx_for_rxwin(rxwin, rxattr, &winctx);
        init_winctx_regs(rxwin, &winctx);
 
        set_vinst_win(vinst, rxwin);
 
-       return rxwin;
+       return &rxwin->vas_win;
 }
 EXPORT_SYMBOL_GPL(vas_rx_win_open);
 
@@ -905,7 +907,7 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
 }
 EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);
 
-static void init_winctx_for_txwin(struct vas_window *txwin,
+static void init_winctx_for_txwin(struct pnv_vas_window *txwin,
                        struct vas_tx_win_attr *txattr,
                        struct vas_winctx *winctx)
 {
@@ -926,7 +928,7 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
         */
        memset(winctx, 0, sizeof(struct vas_winctx));
 
-       winctx->wcreds_max = txwin->wcreds_max;
+       winctx->wcreds_max = txwin->vas_win.wcreds_max;
 
        winctx->user_win = txattr->user_win;
        winctx->nx_win = txwin->rxwin->nx_win;
@@ -946,13 +948,13 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
 
        winctx->lpid = txattr->lpid;
        winctx->pidr = txattr->pidr;
-       winctx->rx_win_id = txwin->rxwin->winid;
+       winctx->rx_win_id = txwin->rxwin->vas_win.winid;
        /*
         * IRQ and fault window setup is successful. Set fault window
         * for the send window so that ready to handle faults.
         */
        if (txwin->vinst->virq)
-               winctx->fault_win_id = txwin->vinst->fault_win->winid;
+               winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid;
 
        winctx->dma_type = VAS_DMA_TYPE_INJECT;
        winctx->tc_mode = txattr->tc_mode;
@@ -962,7 +964,8 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
                winctx->irq_port = txwin->vinst->irq_port;
 
        winctx->pswid = txattr->pswid ? txattr->pswid :
-                       encode_pswid(txwin->vinst->vas_id, txwin->winid);
+                       encode_pswid(txwin->vinst->vas_id,
+                       txwin->vas_win.winid);
 }
 
 static bool tx_win_args_valid(enum vas_cop_type cop,
@@ -993,8 +996,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
                        struct vas_tx_win_attr *attr)
 {
        int rc;
-       struct vas_window *txwin;
-       struct vas_window *rxwin;
+       struct pnv_vas_window *txwin;
+       struct pnv_vas_window *rxwin;
        struct vas_winctx winctx;
        struct vas_instance *vinst;
 
@@ -1020,7 +1023,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
        rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
        if (IS_ERR(rxwin)) {
                pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
-               return rxwin;
+               return (struct vas_window *)rxwin;
        }
 
        txwin = vas_window_alloc(vinst);
@@ -1029,12 +1032,12 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
                goto put_rxwin;
        }
 
-       txwin->cop = cop;
+       txwin->vas_win.cop = cop;
        txwin->tx_win = 1;
        txwin->rxwin = rxwin;
        txwin->nx_win = txwin->rxwin->nx_win;
        txwin->user_win = attr->user_win;
-       txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+       txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
 
        init_winctx_for_txwin(txwin, attr, &winctx);
 
@@ -1064,56 +1067,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
                        rc = -ENODEV;
                        goto free_window;
                }
-
-               /*
-                * Window opened by a child thread may not be closed when
-                * it exits. So take reference to its pid and release it
-                * when the window is free by parent thread.
-                * Acquire a reference to the task's pid to make sure
-                * pid will not be re-used - needed only for multithread
-                * applications.
-                */
-               txwin->pid = get_task_pid(current, PIDTYPE_PID);
-               /*
-                * Acquire a reference to the task's mm.
-                */
-               txwin->mm = get_task_mm(current);
-
-               if (!txwin->mm) {
-                       put_pid(txwin->pid);
-                       pr_err("VAS: pid(%d): mm_struct is not found\n",
-                                       current->pid);
-                       rc = -EPERM;
+               rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+               if (rc)
                        goto free_window;
-               }
 
-               mmgrab(txwin->mm);
-               mmput(txwin->mm);
-               mm_context_add_vas_window(txwin->mm);
-               /*
-                * Process closes window during exit. In the case of
-                * multithread application, the child thread can open
-                * window and can exit without closing it. Expects parent
-                * thread to use and close the window. So do not need
-                * to take pid reference for parent thread.
-                */
-               txwin->tgid = find_get_pid(task_tgid_vnr(current));
-               /*
-                * Even a process that has no foreign real address mapping can
-                * use an unpaired COPY instruction (to no real effect). Issue
-                * CP_ABORT to clear any pending COPY and prevent a covert
-                * channel.
-                *
-                * __switch_to() will issue CP_ABORT on future context switches
-                * if process / thread has any open VAS window (Use
-                * current->mm->context.vas_windows).
-                */
-               asm volatile(PPC_CP_ABORT);
+               vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
        }
 
        set_vinst_win(vinst, txwin);
 
-       return txwin;
+       return &txwin->vas_win;
 
 free_window:
        vas_window_free(txwin);
@@ -1132,12 +1095,14 @@ int vas_copy_crb(void *crb, int offset)
 EXPORT_SYMBOL_GPL(vas_copy_crb);
 
 #define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
-int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
+int vas_paste_crb(struct vas_window *vwin, int offset, bool re)
 {
+       struct pnv_vas_window *txwin;
        int rc;
        void *addr;
        uint64_t val;
 
+       txwin = container_of(vwin, struct pnv_vas_window, vas_win);
        trace_vas_paste_crb(current, txwin);
 
        /*
@@ -1167,7 +1132,7 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
        else
                rc = -EINVAL;
 
-       pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid,
+       pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid,
                        read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
 
        return rc;
@@ -1187,7 +1152,7 @@ EXPORT_SYMBOL_GPL(vas_paste_crb);
  *     user space. (NX-842 driver waits for CSB and Fast thread-wakeup
  *     doesn't use credit checking).
  */
-static void poll_window_credits(struct vas_window *window)
+static void poll_window_credits(struct pnv_vas_window *window)
 {
        u64 val;
        int creds, mode;
@@ -1217,7 +1182,7 @@ retry:
         *       and issue CRB Kill to stop all pending requests. Need only
         *       if there is a bug in NX or fault handling in kernel.
         */
-       if (creds < window->wcreds_max) {
+       if (creds < window->vas_win.wcreds_max) {
                val = 0;
                set_current_state(TASK_UNINTERRUPTIBLE);
                schedule_timeout(msecs_to_jiffies(10));
@@ -1228,7 +1193,8 @@ retry:
                 */
                if (!(count % 1000))
                        pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n",
-                               vas_window_pid(window), window->winid,
+                               vas_window_pid(&window->vas_win),
+                               window->vas_win.winid,
                                creds, count);
 
                goto retry;
@@ -1240,7 +1206,7 @@ retry:
  * short time to queue a CRB, so window should not be busy for too long.
  * Trying 5ms intervals.
  */
-static void poll_window_busy_state(struct vas_window *window)
+static void poll_window_busy_state(struct pnv_vas_window *window)
 {
        int busy;
        u64 val;
@@ -1260,7 +1226,8 @@ retry:
                 */
                if (!(count % 1000))
                        pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n",
-                               vas_window_pid(window), window->winid, count);
+                               vas_window_pid(&window->vas_win),
+                               window->vas_win.winid, count);
 
                goto retry;
        }
@@ -1282,7 +1249,7 @@ retry:
  *     casting out becomes necessary we should consider offloading the
  *     job to a worker thread, so the window close can proceed quickly.
  */
-static void poll_window_castout(struct vas_window *window)
+static void poll_window_castout(struct pnv_vas_window *window)
 {
        /* stub for now */
 }
@@ -1291,7 +1258,7 @@ static void poll_window_castout(struct vas_window *window)
  * Unpin and close a window so no new requests are accepted and the
  * hardware can evict this window from cache if necessary.
  */
-static void unpin_close_window(struct vas_window *window)
+static void unpin_close_window(struct pnv_vas_window *window)
 {
        u64 val;
 
@@ -1313,11 +1280,15 @@ static void unpin_close_window(struct vas_window *window)
  *
  * Besides the hardware, kernel has some bookkeeping of course.
  */
-int vas_win_close(struct vas_window *window)
+int vas_win_close(struct vas_window *vwin)
 {
-       if (!window)
+       struct pnv_vas_window *window;
+
+       if (!vwin)
                return 0;
 
+       window = container_of(vwin, struct pnv_vas_window, vas_win);
+
        if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
                pr_devel("Attempting to close an active Rx window!\n");
                WARN_ON_ONCE(1);
@@ -1339,12 +1310,8 @@ int vas_win_close(struct vas_window *window)
        /* if send window, drop reference to matching receive window */
        if (window->tx_win) {
                if (window->user_win) {
-                       /* Drop references to pid and mm */
-                       put_pid(window->pid);
-                       if (window->mm) {
-                               mm_context_remove_vas_window(window->mm);
-                               mmdrop(window->mm);
-                       }
+                       put_vas_user_win_ref(&vwin->task_ref);
+                       mm_context_remove_vas_window(vwin->task_ref.mm);
                }
                put_rx_win(window->rxwin);
        }
@@ -1377,7 +1344,7 @@ EXPORT_SYMBOL_GPL(vas_win_close);
  * - The kernel with return credit on fault window after reading entry
  *   from fault FIFO.
  */
-void vas_return_credit(struct vas_window *window, bool tx)
+void vas_return_credit(struct pnv_vas_window *window, bool tx)
 {
        uint64_t val;
 
@@ -1391,10 +1358,10 @@ void vas_return_credit(struct vas_window *window, bool tx)
        }
 }
 
-struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
                uint32_t pswid)
 {
-       struct vas_window *window;
+       struct pnv_vas_window *window;
        int winid;
 
        if (!pswid) {
@@ -1431,13 +1398,74 @@ struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
         * by NX).
         */
        if (!window->tx_win || !window->user_win || !window->nx_win ||
-                       window->cop == VAS_COP_TYPE_FAULT ||
-                       window->cop == VAS_COP_TYPE_FTW) {
+                       window->vas_win.cop == VAS_COP_TYPE_FAULT ||
+                       window->vas_win.cop == VAS_COP_TYPE_FTW) {
                pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n",
                        winid, window->tx_win, window->user_win,
-                       window->nx_win, window->cop);
+                       window->nx_win, window->vas_win.cop);
                WARN_ON(1);
        }
 
        return window;
 }
+
+static struct vas_window *vas_user_win_open(int vas_id, u64 flags,
+                               enum vas_cop_type cop_type)
+{
+       struct vas_tx_win_attr txattr = {};
+
+       vas_init_tx_win_attr(&txattr, cop_type);
+
+       txattr.lpid = mfspr(SPRN_LPID);
+       txattr.pidr = mfspr(SPRN_PID);
+       txattr.user_win = true;
+       txattr.rsvd_txbuf_count = false;
+       txattr.pswid = false;
+
+       pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
+                               mfspr(SPRN_PID));
+
+       return vas_tx_win_open(vas_id, cop_type, &txattr);
+}
+
+static u64 vas_user_win_paste_addr(struct vas_window *txwin)
+{
+       struct pnv_vas_window *win;
+       u64 paste_addr;
+
+       win = container_of(txwin, struct pnv_vas_window, vas_win);
+       vas_win_paste_addr(win, &paste_addr, NULL);
+
+       return paste_addr;
+}
+
+static int vas_user_win_close(struct vas_window *txwin)
+{
+       vas_win_close(txwin);
+
+       return 0;
+}
+
+static const struct vas_user_win_ops vops =  {
+       .open_win       =       vas_user_win_open,
+       .paste_addr     =       vas_user_win_paste_addr,
+       .close_win      =       vas_user_win_close,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
+                            const char *name)
+{
+
+       return vas_register_coproc_api(mod, cop_type, name, &vops);
+}
+EXPORT_SYMBOL_GPL(vas_register_api_powernv);
+
+void vas_unregister_api_powernv(void)
+{
+       vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_powernv);
index c7db3190baca4976e0053de0b8c09f44168c828f..8bb08e395de059a16813c59fed51a52da95e5c09 100644 (file)
@@ -334,11 +334,11 @@ struct vas_instance {
        int fifo_in_progress;   /* To wake up thread or return IRQ_HANDLED */
        spinlock_t fault_lock;  /* Protects fifo_in_progress update */
        void *fault_fifo;
-       struct vas_window *fault_win; /* Fault window */
+       struct pnv_vas_window *fault_win; /* Fault window */
 
        struct mutex mutex;
-       struct vas_window *rxwin[VAS_COP_TYPE_MAX];
-       struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
+       struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX];
+       struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP];
 
        char *name;
        char *dbgname;
@@ -346,32 +346,24 @@ struct vas_instance {
 };
 
 /*
- * In-kernel state a VAS window. One per window.
+ * In-kernel state a VAS window on PowerNV. One per window.
  */
-struct vas_window {
+struct pnv_vas_window {
+       struct vas_window vas_win;
        /* Fields common to send and receive windows */
        struct vas_instance *vinst;
-       int winid;
        bool tx_win;            /* True if send window */
        bool nx_win;            /* True if NX window */
        bool user_win;          /* True if user space window */
        void *hvwc_map;         /* HV window context */
        void *uwc_map;          /* OS/User window context */
-       struct pid *pid;        /* Linux process id of owner */
-       struct pid *tgid;       /* Thread group ID of owner */
-       struct mm_struct *mm;   /* Linux process mm_struct */
-       int wcreds_max;         /* Window credits */
-
-       char *dbgname;
-       struct dentry *dbgdir;
 
        /* Fields applicable only to send windows */
        void *paste_kaddr;
        char *paste_addr_name;
-       struct vas_window *rxwin;
+       struct pnv_vas_window *rxwin;
 
-       /* Feilds applicable only to receive windows */
-       enum vas_cop_type cop;
+       /* Fields applicable only to receive windows */
        atomic_t num_txwins;
 };
 
@@ -430,32 +422,32 @@ extern struct mutex vas_mutex;
 extern struct vas_instance *find_vas_instance(int vasid);
 extern void vas_init_dbgdir(void);
 extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
-extern void vas_window_init_dbgdir(struct vas_window *win);
-extern void vas_window_free_dbgdir(struct vas_window *win);
+extern void vas_window_init_dbgdir(struct pnv_vas_window *win);
+extern void vas_window_free_dbgdir(struct pnv_vas_window *win);
 extern int vas_setup_fault_window(struct vas_instance *vinst);
 extern irqreturn_t vas_fault_thread_fn(int irq, void *data);
 extern irqreturn_t vas_fault_handler(int irq, void *dev_id);
-extern void vas_return_credit(struct vas_window *window, bool tx);
-extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+extern void vas_return_credit(struct pnv_vas_window *window, bool tx);
+extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
                                                uint32_t pswid);
-extern void vas_win_paste_addr(struct vas_window *window, u64 *addr,
-                                       int *len);
+extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr,
+                               int *len);
 
 static inline int vas_window_pid(struct vas_window *window)
 {
-       return pid_vnr(window->pid);
+       return pid_vnr(window->task_ref.pid);
 }
 
-static inline void vas_log_write(struct vas_window *win, char *name,
+static inline void vas_log_write(struct pnv_vas_window *win, char *name,
                        void *regptr, u64 val)
 {
        if (val)
                pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n",
-                               win->tx_win ? "Tx" : "Rx", win->winid, name,
-                               regptr, val);
+                               win->tx_win ? "Tx" : "Rx", win->vas_win.winid,
+                               name, regptr, val);
 }
 
-static inline void write_uwc_reg(struct vas_window *win, char *name,
+static inline void write_uwc_reg(struct pnv_vas_window *win, char *name,
                        s32 reg, u64 val)
 {
        void *regptr;
@@ -466,7 +458,7 @@ static inline void write_uwc_reg(struct vas_window *win, char *name,
        out_be64(regptr, val);
 }
 
-static inline void write_hvwc_reg(struct vas_window *win, char *name,
+static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name,
                        s32 reg, u64 val)
 {
        void *regptr;
@@ -477,7 +469,7 @@ static inline void write_hvwc_reg(struct vas_window *win, char *name,
        out_be64(regptr, val);
 }
 
-static inline u64 read_hvwc_reg(struct vas_window *win,
+static inline u64 read_hvwc_reg(struct pnv_vas_window *win,
                        char *name __maybe_unused, s32 reg)
 {
        return in_be64(win->hvwc_map+reg);
index 4d0535cc7946f810c49dc1a8765cbf0af31ed3c3..a4048b8c8c507cfeff218e77896a677074a15754 100644 (file)
@@ -86,6 +86,15 @@ config PS3_SYS_MANAGER
          This support is required for PS3 system control.  In
          general, all users will say Y or M.
 
+config PS3_VERBOSE_RESULT
+       bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED
+       depends on PPC_PS3
+       help
+         Enables more verbose log mesages for LV1 hypercall results.
+
+         If in doubt, say N here and reduce the size of the kernel by a
+         small amount.
+
 config PS3_REPOSITORY_WRITE
        bool "PS3 Repository write support" if PS3_ADVANCED
        depends on PPC_PS3
index d094321964fb035ce74f8ebbf87b95c092c284c8..a81eac35d9009d20d0fd96e8df7f90eadc9d1b90 100644 (file)
@@ -6,6 +6,7 @@
  *  Copyright 2006 Sony Corp.
  */
 
+#include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
@@ -1118,6 +1119,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev,
        enum ps3_dma_region_type region_type, void *addr, unsigned long len)
 {
        unsigned long lpar_addr;
+       int result;
 
        lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0;
 
@@ -1129,6 +1131,16 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev,
                r->offset -= map.r1.offset;
        r->len = len ? len : ALIGN(map.total, 1 << r->page_size);
 
+       dev->core.dma_mask = &r->dma_mask;
+
+       result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32));
+
+       if (result < 0) {
+               dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n",
+                       __func__, __LINE__, result);
+               return result;
+       }
+
        switch (dev->dev_type) {
        case PS3_DEVICE_TYPE_SB:
                r->region_ops =  (USE_DYNAMIC_DMA)
index e9ae5dd03593e256cc5770312607332567132875..3de9145c20bc0bb5d189208fcab1fae21f41cce9 100644 (file)
@@ -36,6 +36,7 @@ DEFINE_MUTEX(ps3_gpu_mutex);
 EXPORT_SYMBOL_GPL(ps3_gpu_mutex);
 
 static union ps3_firmware_version ps3_firmware_version;
+static char ps3_firmware_version_str[16];
 
 void ps3_get_firmware_version(union ps3_firmware_version *v)
 {
@@ -182,6 +183,40 @@ static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx)
        return lv1_set_dabr(dabr, dabrx) ? -1 : 0;
 }
 
+static ssize_t ps3_fw_version_show(struct kobject *kobj,
+       struct kobj_attribute *attr, char *buf)
+{
+       return sprintf(buf, "%s", ps3_firmware_version_str);
+}
+
+static int __init ps3_setup_sysfs(void)
+{
+       static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO,
+               ps3_fw_version_show, NULL);
+       static struct kobject *kobj;
+       int result;
+
+       kobj = kobject_create_and_add("ps3", firmware_kobj);
+
+       if (!kobj) {
+               pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__,
+                       __LINE__);
+               return -ENOMEM;
+       }
+
+       result = sysfs_create_file(kobj, &attr.attr);
+
+       if (result) {
+               pr_warn("%s:%d: sysfs_create_file failed.\n", __func__,
+                       __LINE__);
+               kobject_put(kobj);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+core_initcall(ps3_setup_sysfs);
+
 static void __init ps3_setup_arch(void)
 {
        u64 tmp;
@@ -190,9 +225,11 @@ static void __init ps3_setup_arch(void)
 
        lv1_get_version_info(&ps3_firmware_version.raw, &tmp);
 
-       printk(KERN_INFO "PS3 firmware version %u.%u.%u\n",
-              ps3_firmware_version.major, ps3_firmware_version.minor,
-              ps3_firmware_version.rev);
+       snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str),
+               "%u.%u.%u", ps3_firmware_version.major,
+               ps3_firmware_version.minor, ps3_firmware_version.rev);
+
+       printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str);
 
        ps3_spu_set_platform();
 
index b431f41c6cb53abe2dd256c07e97c865903b51f1..1a5665875165b7138282b109489c369eedd7b926 100644 (file)
@@ -64,9 +64,10 @@ static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev)
        result = lv1_open_device(dev->bus_id, dev->dev_id, 0);
 
        if (result) {
-               pr_debug("%s:%d: lv1_open_device failed: %s\n", __func__,
-                       __LINE__, ps3_result(result));
-                       result = -EPERM;
+               pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n",
+                       __func__, __LINE__, dev->match_id, dev->match_sub_id,
+                       dev_name(&dev->core), ps3_result(result));
+               result = -EPERM;
        }
 
 done:
@@ -120,7 +121,7 @@ static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev)
        result = lv1_gpu_open(0);
 
        if (result) {
-               pr_debug("%s:%d: lv1_gpu_open failed: %s\n", __func__,
+               pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__,
                        __LINE__, ps3_result(result));
                        result = -EPERM;
        }
index c8a2b0b05ac008aafc69c5d2a9be5f70bd565583..4cda0ef87be00405609b22bc3983a315378e8fee 100644 (file)
@@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_SVM)         += svm.o
 obj-$(CONFIG_FA_DUMP)          += rtas-fadump.o
 
 obj-$(CONFIG_SUSPEND)          += suspend.o
+obj-$(CONFIG_PPC_VAS)          += vas.o
index 3ac70790ec7aa54a2f15418b989b3436dbbafe06..b1f01ac0c29e3f670a386c1d43b7c2a6054907d0 100644 (file)
@@ -289,8 +289,7 @@ int dlpar_acquire_drc(u32 drc_index)
 {
        int dr_status, rc;
 
-       rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
-                      DR_ENTITY_SENSE, drc_index);
+       rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
        if (rc || dr_status != DR_ENTITY_UNUSABLE)
                return -1;
 
@@ -311,8 +310,7 @@ int dlpar_release_drc(u32 drc_index)
 {
        int dr_status, rc;
 
-       rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
-                      DR_ENTITY_SENSE, drc_index);
+       rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
        if (rc || dr_status != DR_ENTITY_PRESENT)
                return -1;
 
@@ -333,8 +331,7 @@ int dlpar_unisolate_drc(u32 drc_index)
 {
        int dr_status, rc;
 
-       rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
-                               DR_ENTITY_SENSE, drc_index);
+       rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
        if (rc || dr_status != DR_ENTITY_PRESENT)
                return -1;
 
index 8377f1f7c78edd68ff2cd08e77d06fb719348df8..377d852f5a9aa9e7fce8b969590d336a76740900 100644 (file)
@@ -348,7 +348,8 @@ static int pseries_remove_mem_node(struct device_node *np)
 
 static bool lmb_is_removable(struct drmem_lmb *lmb)
 {
-       if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+       if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+               !(lmb->flags & DRCONF_MEM_ASSIGNED))
                return false;
 
 #ifdef CONFIG_FA_DUMP
@@ -401,7 +402,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
 {
        struct drmem_lmb *lmb;
-       int lmbs_removed = 0;
+       int lmbs_reserved = 0;
        int lmbs_available = 0;
        int rc;
 
@@ -435,12 +436,12 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
                 */
                drmem_mark_lmb_reserved(lmb);
 
-               lmbs_removed++;
-               if (lmbs_removed == lmbs_to_remove)
+               lmbs_reserved++;
+               if (lmbs_reserved == lmbs_to_remove)
                        break;
        }
 
-       if (lmbs_removed != lmbs_to_remove) {
+       if (lmbs_reserved != lmbs_to_remove) {
                pr_err("Memory hot-remove failed, adding LMB's back\n");
 
                for_each_drmem_lmb(lmb) {
@@ -453,6 +454,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
                                       lmb->drc_index);
 
                        drmem_remove_lmb_reservation(lmb);
+
+                       lmbs_reserved--;
+                       if (lmbs_reserved == 0)
+                               break;
                }
 
                rc = -EINVAL;
@@ -466,6 +471,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
                                lmb->base_addr);
 
                        drmem_remove_lmb_reservation(lmb);
+
+                       lmbs_reserved--;
+                       if (lmbs_reserved == 0)
+                               break;
                }
                rc = 0;
        }
@@ -508,7 +517,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index)
 static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 {
        struct drmem_lmb *lmb, *start_lmb, *end_lmb;
-       int lmbs_available = 0;
        int rc;
 
        pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
@@ -521,18 +529,29 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
        if (rc)
                return -EINVAL;
 
-       /* Validate that there are enough LMBs to satisfy the request */
+       /*
+        * Validate that all LMBs in range are not reserved. Note that it
+        * is ok if they are !ASSIGNED since our goal here is to remove the
+        * LMB range, regardless of whether some LMBs were already removed
+        * by any other reason.
+        *
+        * This is a contrast to what is done in remove_by_count() where we
+        * check for both RESERVED and !ASSIGNED (via lmb_is_removable()),
+        * because we want to remove a fixed amount of LMBs in that function.
+        */
        for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
-               if (lmb->flags & DRCONF_MEM_RESERVED)
-                       break;
-
-               lmbs_available++;
+               if (lmb->flags & DRCONF_MEM_RESERVED) {
+                       pr_err("Memory at %llx (drc index %x) is reserved\n",
+                               lmb->base_addr, lmb->drc_index);
+                       return -EINVAL;
+               }
        }
 
-       if (lmbs_available < lmbs_to_remove)
-               return -EINVAL;
-
        for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+               /*
+                * dlpar_remove_lmb() will error out if the LMB is already
+                * !ASSIGNED, but this case is a no-op for us.
+                */
                if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
                        continue;
 
@@ -551,6 +570,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
                        if (!drmem_lmb_reserved(lmb))
                                continue;
 
+                       /*
+                        * Setting the isolation state of an UNISOLATED/CONFIGURED
+                        * device to UNISOLATE is a no-op, but the hypervisor can
+                        * use it as a hint that the LMB removal failed.
+                        */
+                       dlpar_unisolate_drc(lmb->drc_index);
+
                        rc = dlpar_add_lmb(lmb);
                        if (rc)
                                pr_err("Failed to add LMB, drc index %x\n",
@@ -585,10 +611,6 @@ static inline int pseries_remove_mem_node(struct device_node *np)
 {
        return 0;
 }
-static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
-{
-       return -EOPNOTSUPP;
-}
 static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
        return -EOPNOTSUPP;
@@ -651,7 +673,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 {
        struct drmem_lmb *lmb;
        int lmbs_available = 0;
-       int lmbs_added = 0;
+       int lmbs_reserved = 0;
        int rc;
 
        pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
@@ -661,6 +683,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 
        /* Validate that there are enough LMBs to satisfy the request */
        for_each_drmem_lmb(lmb) {
+               if (lmb->flags & DRCONF_MEM_RESERVED)
+                       continue;
+
                if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
                        lmbs_available++;
 
@@ -689,13 +714,12 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
                 * requested LMBs cannot be added.
                 */
                drmem_mark_lmb_reserved(lmb);
-
-               lmbs_added++;
-               if (lmbs_added == lmbs_to_add)
+               lmbs_reserved++;
+               if (lmbs_reserved == lmbs_to_add)
                        break;
        }
 
-       if (lmbs_added != lmbs_to_add) {
+       if (lmbs_reserved != lmbs_to_add) {
                pr_err("Memory hot-add failed, removing any added LMBs\n");
 
                for_each_drmem_lmb(lmb) {
@@ -710,6 +734,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
                                dlpar_release_drc(lmb->drc_index);
 
                        drmem_remove_lmb_reservation(lmb);
+                       lmbs_reserved--;
+
+                       if (lmbs_reserved == 0)
+                               break;
                }
                rc = -EINVAL;
        } else {
@@ -720,6 +748,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
                        pr_debug("Memory at %llx (drc index %x) was hot-added\n",
                                 lmb->base_addr, lmb->drc_index);
                        drmem_remove_lmb_reservation(lmb);
+                       lmbs_reserved--;
+
+                       if (lmbs_reserved == 0)
+                               break;
                }
                rc = 0;
        }
@@ -764,7 +796,6 @@ static int dlpar_memory_add_by_index(u32 drc_index)
 static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
 {
        struct drmem_lmb *lmb, *start_lmb, *end_lmb;
-       int lmbs_available = 0;
        int rc;
 
        pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
@@ -779,15 +810,14 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
 
        /* Validate that the LMBs in this range are not reserved */
        for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
-               if (lmb->flags & DRCONF_MEM_RESERVED)
-                       break;
-
-               lmbs_available++;
+               /* Fail immediately if the whole range can't be hot-added */
+               if (lmb->flags & DRCONF_MEM_RESERVED) {
+                       pr_err("Memory at %llx (drc index %x) is reserved\n",
+                                       lmb->base_addr, lmb->drc_index);
+                       return -EINVAL;
+               }
        }
 
-       if (lmbs_available < lmbs_to_add)
-               return -EINVAL;
-
        for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
                if (lmb->flags & DRCONF_MEM_ASSIGNED)
                        continue;
index 8a2b8d64265bc2c252b8c526f8932bcccf6d03e7..ab9fc65068617b570d881bd85ed5880e76aae13a 100644 (file)
@@ -108,6 +108,10 @@ _GLOBAL_TOC(plpar_hcall_norets_notrace)
        mfcr    r0
        stw     r0,8(r1)
        HVSC                            /* invoke the hypervisor */
+
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
        blr                             /* return r3 = status */
@@ -120,6 +124,9 @@ _GLOBAL_TOC(plpar_hcall_norets)
        HCALL_BRANCH(plpar_hcall_norets_trace)
        HVSC                            /* invoke the hypervisor */
 
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
        blr                             /* return r3 = status */
@@ -129,6 +136,10 @@ plpar_hcall_norets_trace:
        HCALL_INST_PRECALL(R4)
        HVSC
        HCALL_INST_POSTCALL_NORETS
+
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
        blr
@@ -159,6 +170,9 @@ _GLOBAL_TOC(plpar_hcall)
        std     r6, 16(r12)
        std     r7, 24(r12)
 
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
 
@@ -188,6 +202,9 @@ plpar_hcall_trace:
 
        HCALL_INST_POSTCALL(r12)
 
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
 
@@ -223,6 +240,9 @@ _GLOBAL(plpar_hcall_raw)
        std     r6, 16(r12)
        std     r7, 24(r12)
 
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
 
@@ -262,6 +282,9 @@ _GLOBAL_TOC(plpar_hcall9)
        std     r11,56(r12)
        std     r0, 64(r12)
 
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
 
@@ -300,6 +323,9 @@ plpar_hcall9_trace:
 
        HCALL_INST_POSTCALL(r12)
 
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
 
@@ -339,6 +365,9 @@ _GLOBAL(plpar_hcall9_raw)
        std     r11,56(r12)
        std     r0, 64(r12)
 
+       li      r4,0
+       stb     r4,PACASRR_VALID(r13)
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
 
index ef26fe40efb03cfafdb86c1a20fc3f194e208489..f48e87ac89c9b201c2ab93da1d9b5e534433a45f 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/plpar_wrappers.h>
 #include <asm/papr_pdsm.h>
 #include <asm/mce.h>
+#include <asm/unaligned.h>
 
 #define BIND_ANY_ADDR (~0ul)
 
@@ -114,6 +115,9 @@ struct papr_scm_priv {
        /* Health information for the dimm */
        u64 health_bitmap;
 
+       /* Holds the last known dirty shutdown counter value */
+       u64 dirty_shutdown_counter;
+
        /* length of the stat buffer as expected by phyp */
        size_t stat_buffer_len;
 };
@@ -260,7 +264,7 @@ err_out:
  * Query the Dimm performance stats from PHYP and copy them (if returned) to
  * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast
  * (num_stats + header) bytes.
- * - If buff_stats == NULL the return value is the size in byes of the buffer
+ * - If buff_stats == NULL the return value is the size in bytes of the buffer
  * needed to hold all supported performance-statistics.
  * - If buff_stats != NULL and num_stats == 0 then we copy all known
  * performance-statistics to 'buff_stat' and expect to be large enough to
@@ -310,6 +314,13 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
                dev_err(&p->pdev->dev,
                        "Unknown performance stats, Err:0x%016lX\n", ret[0]);
                return -ENOENT;
+       } else if (rc == H_AUTHORITY) {
+               dev_info(&p->pdev->dev,
+                        "Permission denied while accessing performance stats");
+               return -EPERM;
+       } else if (rc == H_UNSUPPORTED) {
+               dev_dbg(&p->pdev->dev, "Performance stats unsupported\n");
+               return -EOPNOTSUPP;
        } else if (rc != H_SUCCESS) {
                dev_err(&p->pdev->dev,
                        "Failed to query performance stats, Err:%lld\n", rc);
@@ -596,6 +607,16 @@ free_stats:
        return rc;
 }
 
+/* Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_dsc(struct papr_scm_priv *p,
+                        union nd_pdsm_payload *payload)
+{
+       payload->health.extension_flags |= PDSM_DIMM_DSC_VALID;
+       payload->health.dimm_dsc = p->dirty_shutdown_counter;
+
+       return sizeof(struct nd_papr_pdsm_health);
+}
+
 /* Fetch the DIMM health info and populate it in provided package. */
 static int papr_pdsm_health(struct papr_scm_priv *p,
                            union nd_pdsm_payload *payload)
@@ -639,6 +660,8 @@ static int papr_pdsm_health(struct papr_scm_priv *p,
 
        /* Populate the fuel gauge meter in the payload */
        papr_pdsm_fuel_gauge(p, payload);
+       /* Populate the dirty-shutdown-counter field */
+       papr_pdsm_dsc(p, payload);
 
        rc = sizeof(struct nd_papr_pdsm_health);
 
@@ -900,15 +923,41 @@ static ssize_t flags_show(struct device *dev,
 }
 DEVICE_ATTR_RO(flags);
 
+static ssize_t dirty_shutdown_show(struct device *dev,
+                         struct device_attribute *attr, char *buf)
+{
+       struct nvdimm *dimm = to_nvdimm(dev);
+       struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+       return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter);
+}
+DEVICE_ATTR_RO(dirty_shutdown);
+
+static umode_t papr_nd_attribute_visible(struct kobject *kobj,
+                                        struct attribute *attr, int n)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct papr_scm_priv *p = nvdimm_provider_data(nvdimm);
+
+       /* For if perf-stats not available remove perf_stats sysfs */
+       if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0)
+               return 0;
+
+       return attr->mode;
+}
+
 /* papr_scm specific dimm attributes */
 static struct attribute *papr_nd_attributes[] = {
        &dev_attr_flags.attr,
        &dev_attr_perf_stats.attr,
+       &dev_attr_dirty_shutdown.attr,
        NULL,
 };
 
 static struct attribute_group papr_nd_attribute_group = {
        .name = "papr",
+       .is_visible = papr_nd_attribute_visible,
        .attrs = papr_nd_attributes,
 };
 
@@ -924,7 +973,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
        struct nd_region_desc ndr_desc;
        unsigned long dimm_flags;
        int target_nid, online_nid;
-       ssize_t stat_size;
 
        p->bus_desc.ndctl = papr_scm_ndctl;
        p->bus_desc.module = THIS_MODULE;
@@ -1009,16 +1057,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
        list_add_tail(&p->region_list, &papr_nd_regions);
        mutex_unlock(&papr_ndr_lock);
 
-       /* Try retriving the stat buffer and see if its supported */
-       stat_size = drc_pmem_query_stats(p, NULL, 0);
-       if (stat_size > 0) {
-               p->stat_buffer_len = stat_size;
-               dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
-                       p->stat_buffer_len);
-       } else {
-               dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n");
-       }
-
        return 0;
 
 err:   nvdimm_bus_unregister(p->bus);
@@ -1094,8 +1132,10 @@ static int papr_scm_probe(struct platform_device *pdev)
        u32 drc_index, metadata_size;
        u64 blocks, block_size;
        struct papr_scm_priv *p;
+       u8 uuid_raw[UUID_SIZE];
        const char *uuid_str;
-       u64 uuid[2];
+       ssize_t stat_size;
+       uuid_t uuid;
        int rc;
 
        /* check we have all the required DT properties */
@@ -1137,17 +1177,28 @@ static int papr_scm_probe(struct platform_device *pdev)
        p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
        p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
 
+       if (of_property_read_u64(dn, "ibm,persistence-failed-count",
+                                &p->dirty_shutdown_counter))
+               p->dirty_shutdown_counter = 0;
+
        /* We just need to ensure that set cookies are unique across */
-       uuid_parse(uuid_str, (uuid_t *) uuid);
+       uuid_parse(uuid_str, &uuid);
+
        /*
-        * cookie1 and cookie2 are not really little endian
-        * we store a little endian representation of the
-        * uuid str so that we can compare this with the label
-        * area cookie irrespective of the endian config with which
-        * the kernel is built.
+        * The cookie1 and cookie2 are not really little endian.
+        * We store a raw buffer representation of the
+        * uuid string so that we can compare this with the label
+        * area cookie irrespective of the endian configuration
+        * with which the kernel is built.
+        *
+        * Historically we stored the cookie in the below format.
+        * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa
+        *      cookie1 was 0xfd423b0b671b5172
+        *      cookie2 was 0xaabce8cae35b1d8d
         */
-       p->nd_set.cookie1 = cpu_to_le64(uuid[0]);
-       p->nd_set.cookie2 = cpu_to_le64(uuid[1]);
+       export_uuid(uuid_raw, &uuid);
+       p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]);
+       p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]);
 
        /* might be zero */
        p->metadata_size = metadata_size;
@@ -1172,6 +1223,14 @@ static int papr_scm_probe(struct platform_device *pdev)
        p->res.name  = pdev->name;
        p->res.flags = IORESOURCE_MEM;
 
+       /* Try retrieving the stat buffer and see if its supported */
+       stat_size = drc_pmem_query_stats(p, NULL, 0);
+       if (stat_size > 0) {
+               p->stat_buffer_len = stat_size;
+               dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
+                       p->stat_buffer_len);
+       }
+
        rc = papr_scm_nvdimm_init(p);
        if (rc)
                goto err2;
index 9d4ef65da7f395f6aab3e7a9e544f27c2117f0e5..167f2e1b8d39cd0d94d89c09ba247f1b7280d342 100644 (file)
@@ -487,8 +487,8 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
        if ((be64_to_cpu(regs->msr) &
                        (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
                         MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
-               regs->nip = be64_to_cpu((__be64)regs->nip);
-               regs->msr = 0;
+               regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
+               regs_set_return_msr(regs, 0);
        }
 #endif
 
@@ -593,8 +593,6 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
                mce_err.severity = MCE_SEV_SEVERE;
        else if (severity == RTAS_SEVERITY_ERROR)
                mce_err.severity = MCE_SEV_SEVERE;
-       else if (severity == RTAS_SEVERITY_FATAL)
-               mce_err.severity = MCE_SEV_FATAL;
        else
                mce_err.severity = MCE_SEV_FATAL;
 
index 754e493b7c05bbbe4d35df7f5f247aa0ed85375c..631a0d57b6cdf54b49809419033010d00ef8bf3d 100644 (file)
@@ -549,6 +549,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
        if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
                security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
 
+       if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY)
+               security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
+       if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS)
+               security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+
+       if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER)
+               security_ftr_clear(SEC_FTR_STF_BARRIER);
+
        if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
                security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 }
index c70b4be9f0a5496a8f16a633d09aee3193e2076a..096629f545769fe97729ae5cef8e4a1590859241 100644 (file)
@@ -211,7 +211,9 @@ static __init void pSeries_smp_probe(void)
        if (!cpu_has_feature(CPU_FTR_SMT))
                return;
 
-       if (check_kvm_guest()) {
+       check_kvm_guest();
+
+       if (is_kvm_guest()) {
                /*
                 * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
                 * faults to the hypervisor which then reads the instruction
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
new file mode 100644 (file)
index 0000000..b5c1cf1
--- /dev/null
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/vas.h>
+#include "vas.h"
+
+#define VAS_INVALID_WIN_ADDRESS        0xFFFFFFFFFFFFFFFFul
+#define VAS_DEFAULT_DOMAIN_ID  0xFFFFFFFFFFFFFFFFul
+/* The hypervisor allows one credit per window right now */
+#define DEF_WIN_CREDS          1
+
+static struct vas_all_caps caps_all;
+static bool copypaste_feat;
+
+static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
+static DEFINE_MUTEX(vas_pseries_mutex);
+
+static long hcall_return_busy_check(long rc)
+{
+       /* Check if we are stalled for some time */
+       if (H_IS_LONG_BUSY(rc)) {
+               msleep(get_longbusy_msecs(rc));
+               rc = H_BUSY;
+       } else if (rc == H_BUSY) {
+               cond_resched();
+       }
+
+       return rc;
+}
+
+/*
+ * Allocate VAS window hcall
+ */
+static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
+                                    u8 wintype, u16 credits)
+{
+       long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+       long rc;
+
+       do {
+               rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
+                                 credits, domain[0], domain[1], domain[2],
+                                 domain[3], domain[4], domain[5]);
+
+               rc = hcall_return_busy_check(rc);
+       } while (rc == H_BUSY);
+
+       if (rc == H_SUCCESS) {
+               if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
+                       pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
+                       return -ENOTSUPP;
+               }
+               win->vas_win.winid = retbuf[0];
+               win->win_addr = retbuf[1];
+               win->complete_irq = retbuf[2];
+               win->fault_irq = retbuf[3];
+               return 0;
+       }
+
+       pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
+               rc, wintype, credits);
+
+       return -EIO;
+}
+
+/*
+ * Deallocate VAS window hcall.
+ */
+static int h_deallocate_vas_window(u64 winid)
+{
+       long rc;
+
+       do {
+               rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
+
+               rc = hcall_return_busy_check(rc);
+       } while (rc == H_BUSY);
+
+       if (rc == H_SUCCESS)
+               return 0;
+
+       pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
+               rc, winid);
+       return -EIO;
+}
+
+/*
+ * Modify VAS window.
+ * After the window is opened with allocate window hcall, configure it
+ * with flags and LPAR PID before using.
+ */
+static int h_modify_vas_window(struct pseries_vas_window *win)
+{
+       long rc;
+       u32 lpid = mfspr(SPRN_PID);
+
+       /*
+        * AMR value is not supported in Linux VAS implementation.
+        * The hypervisor ignores it if 0 is passed.
+        */
+       do {
+               rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
+                                       win->vas_win.winid, lpid, 0,
+                                       VAS_MOD_WIN_FLAGS, 0);
+
+               rc = hcall_return_busy_check(rc);
+       } while (rc == H_BUSY);
+
+       if (rc == H_SUCCESS)
+               return 0;
+
+       pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
+                       rc, win->vas_win.winid, lpid);
+       return -EIO;
+}
+
+/*
+ * This hcall is used to determine the capabilities from the hypervisor.
+ * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
+ * @query_type: If 0 is passed, the hypervisor returns the overall
+ *             capabilities which provides all feature(s) that are
+ *             available. Then query the hypervisor to get the
+ *             corresponding capabilities for the specific feature.
+ *             Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
+ *                     and VAS GZIP Default capabilities.
+ *                     H_QUERY_NX_CAPABILITIES provides NX GZIP
+ *                     capabilities.
+ * @result: Return buffer to save capabilities.
+ */
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
+{
+       long rc;
+
+       rc = plpar_hcall_norets(hcall, query_type, result);
+
+       if (rc == H_SUCCESS)
+               return 0;
+
+       pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n",
+                       hcall, rc, query_type, result);
+       return -EIO;
+}
+EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
+
+/*
+ * hcall to get fault CRB from the hypervisor.
+ */
+static int h_get_nx_fault(u32 winid, u64 buffer)
+{
+       long rc;
+
+       rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
+
+       if (rc == H_SUCCESS)
+               return 0;
+
+       pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
+               rc, winid, buffer);
+       return -EIO;
+
+}
+
+/*
+ * Handle the fault interrupt.
+ * When the fault interrupt is received for each window, query the
+ * hypervisor to get the fault CRB on the specific fault. Then
+ * process the CRB by updating CSB or send signal if the user space
+ * CSB is invalid.
+ * Note: The hypervisor forwards an interrupt for each fault request.
+ *     So one fault CRB to process for each H_GET_NX_FAULT hcall.
+ */
+irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+{
+       struct pseries_vas_window *txwin = data;
+       struct coprocessor_request_block crb;
+       struct vas_user_win_ref *tsk_ref;
+       int rc;
+
+       rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+       if (!rc) {
+               tsk_ref = &txwin->vas_win.task_ref;
+               vas_dump_crb(&crb);
+               vas_update_csb(&crb, tsk_ref);
+       }
+
+       return IRQ_HANDLED;
+}
+
+/*
+ * Allocate window and setup IRQ mapping.
+ */
+static int allocate_setup_window(struct pseries_vas_window *txwin,
+                                u64 *domain, u8 wintype)
+{
+       int rc;
+
+       rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
+       if (rc)
+               return rc;
+       /*
+        * On PowerVM, the hypervisor setup and forwards the fault
+        * interrupt per window. So the IRQ setup and fault handling
+        * will be done for each open window separately.
+        */
+       txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
+       if (!txwin->fault_virq) {
+               pr_err("Failed irq mapping %d\n", txwin->fault_irq);
+               rc = -EINVAL;
+               goto out_win;
+       }
+
+       txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
+                               txwin->vas_win.winid);
+       if (!txwin->name) {
+               rc = -ENOMEM;
+               goto out_irq;
+       }
+
+       rc = request_threaded_irq(txwin->fault_virq, NULL,
+                                 pseries_vas_fault_thread_fn, IRQF_ONESHOT,
+                                 txwin->name, txwin);
+       if (rc) {
+               pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
+                      txwin->vas_win.winid, txwin->fault_virq, rc);
+               goto out_free;
+       }
+
+       txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
+
+       return 0;
+out_free:
+       kfree(txwin->name);
+out_irq:
+       irq_dispose_mapping(txwin->fault_virq);
+out_win:
+       h_deallocate_vas_window(txwin->vas_win.winid);
+       return rc;
+}
+
+static inline void free_irq_setup(struct pseries_vas_window *txwin)
+{
+       free_irq(txwin->fault_virq, txwin);
+       kfree(txwin->name);
+       irq_dispose_mapping(txwin->fault_virq);
+}
+
+static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
+                                             enum vas_cop_type cop_type)
+{
+       long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+       struct vas_cop_feat_caps *cop_feat_caps;
+       struct vas_caps *caps;
+       struct pseries_vas_window *txwin;
+       int rc;
+
+       txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
+       if (!txwin)
+               return ERR_PTR(-ENOMEM);
+
+       /*
+        * A VAS window can have many credits which means that many
+        * requests can be issued simultaneously. But the hypervisor
+        * restricts one credit per window.
+        * The hypervisor introduces 2 different types of credits:
+        * Default credit type (Uses normal priority FIFO):
+        *      A limited number of credits are assigned to partitions
+        *      based on processor entitlement. But these credits may be
+        *      over-committed on a system depends on whether the CPUs
+        *      are in shared or dedicated modes - that is, more requests
+        *      may be issued across the system than NX can service at
+        *      once which can result in paste command failure (RMA_busy).
+        *      Then the process has to resend requests or fall-back to
+        *      SW compression.
+        * Quality of Service (QoS) credit type (Uses high priority FIFO):
+        *      To avoid NX HW contention, the system admins can assign
+        *      QoS credits for each LPAR so that this partition is
+        *      guaranteed access to NX resources. These credits are
+        *      assigned to partitions via the HMC.
+        *      Refer PAPR for more information.
+        *
+        * Allocate window with QoS credits if user requested. Otherwise
+        * default credits are used.
+        */
+       if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
+               caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
+       else
+               caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
+
+       cop_feat_caps = &caps->caps;
+
+       if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
+                       atomic_read(&cop_feat_caps->target_lpar_creds)) {
+               pr_err("Credits are not available to allocate window\n");
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if (vas_id == -1) {
+               /*
+                * The user space is requesting to allocate a window on
+                * a VAS instance where the process is executing.
+                * On PowerVM, domain values are passed to the hypervisor
+                * to select VAS instance. Useful if the process is
+                * affinity to NUMA node.
+                * The hypervisor selects VAS instance if
+                * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
+                * The h_allocate_vas_window hcall is defined to take a
+                * domain values as specified by h_home_node_associativity,
+                * So no unpacking needs to be done.
+                */
+               rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
+                                 VPHN_FLAG_VCPU, smp_processor_id());
+               if (rc != H_SUCCESS) {
+                       pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
+                       goto out;
+               }
+       }
+
+       /*
+        * Allocate / Deallocate window hcalls and setup / free IRQs
+        * have to be protected with mutex.
+        * Open VAS window: Allocate window hcall and setup IRQ
+        * Close VAS window: Deallocate window hcall and free IRQ
+        *      The hypervisor waits until all NX requests are
+        *      completed before closing the window. So expects OS
+        *      to handle NX faults, means IRQ can be freed only
+        *      after the deallocate window hcall is returned.
+        * So once the window is closed with deallocate hcall before
+        * the IRQ is freed, it can be assigned to new allocate
+        * hcall with the same fault IRQ by the hypervisor. It can
+        * result in setup IRQ fail for the new window since the
+        * same fault IRQ is not freed by the OS before.
+        */
+       mutex_lock(&vas_pseries_mutex);
+       rc = allocate_setup_window(txwin, (u64 *)&domain[0],
+                                  cop_feat_caps->win_type);
+       mutex_unlock(&vas_pseries_mutex);
+       if (rc)
+               goto out;
+
+       /*
+        * Modify window and it is ready to use.
+        */
+       rc = h_modify_vas_window(txwin);
+       if (!rc)
+               rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+       if (rc)
+               goto out_free;
+
+       vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+       txwin->win_type = cop_feat_caps->win_type;
+       mutex_lock(&vas_pseries_mutex);
+       list_add(&txwin->win_list, &caps->list);
+       mutex_unlock(&vas_pseries_mutex);
+
+       return &txwin->vas_win;
+
+out_free:
+       /*
+        * Window is not operational. Free IRQ before closing
+        * window so that do not have to hold mutex.
+        */
+       free_irq_setup(txwin);
+       h_deallocate_vas_window(txwin->vas_win.winid);
+out:
+       atomic_dec(&cop_feat_caps->used_lpar_creds);
+       kfree(txwin);
+       return ERR_PTR(rc);
+}
+
+static u64 vas_paste_address(struct vas_window *vwin)
+{
+       struct pseries_vas_window *win;
+
+       win = container_of(vwin, struct pseries_vas_window, vas_win);
+       return win->win_addr;
+}
+
+static int deallocate_free_window(struct pseries_vas_window *win)
+{
+       int rc = 0;
+
+       /*
+        * The hypervisor waits for all requests including faults
+        * are processed before closing the window - Means all
+        * credits have to be returned. In the case of fault
+        * request, a credit is returned after OS issues
+        * H_GET_NX_FAULT hcall.
+        * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
+        * hcall.
+        */
+       rc = h_deallocate_vas_window(win->vas_win.winid);
+       if (!rc)
+               free_irq_setup(win);
+
+       return rc;
+}
+
+static int vas_deallocate_window(struct vas_window *vwin)
+{
+       struct pseries_vas_window *win;
+       struct vas_cop_feat_caps *caps;
+       int rc = 0;
+
+       if (!vwin)
+               return -EINVAL;
+
+       win = container_of(vwin, struct pseries_vas_window, vas_win);
+
+       /* Should not happen */
+       if (win->win_type >= VAS_MAX_FEAT_TYPE) {
+               pr_err("Window (%u): Invalid window type %u\n",
+                               vwin->winid, win->win_type);
+               return -EINVAL;
+       }
+
+       caps = &vascaps[win->win_type].caps;
+       mutex_lock(&vas_pseries_mutex);
+       rc = deallocate_free_window(win);
+       if (rc) {
+               mutex_unlock(&vas_pseries_mutex);
+               return rc;
+       }
+
+       list_del(&win->win_list);
+       atomic_dec(&caps->used_lpar_creds);
+       mutex_unlock(&vas_pseries_mutex);
+
+       put_vas_user_win_ref(&vwin->task_ref);
+       mm_context_remove_vas_window(vwin->task_ref.mm);
+
+       kfree(win);
+       return 0;
+}
+
+static const struct vas_user_win_ops vops_pseries = {
+       .open_win       = vas_allocate_window,  /* Open and configure window */
+       .paste_addr     = vas_paste_address,    /* To do copy/paste */
+       .close_win      = vas_deallocate_window, /* Close window */
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
+                            const char *name)
+{
+       int rc;
+
+       if (!copypaste_feat)
+               return -ENOTSUPP;
+
+       rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(vas_register_api_pseries);
+
+void vas_unregister_api_pseries(void)
+{
+       vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
+
+/*
+ * Get the specific capabilities based on the feature type.
+ * Right now supports GZIP default and GZIP QoS capabilities.
+ */
+static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
+                               struct hv_vas_cop_feat_caps *hv_caps)
+{
+       struct vas_cop_feat_caps *caps;
+       struct vas_caps *vcaps;
+       int rc = 0;
+
+       vcaps = &vascaps[type];
+       memset(vcaps, 0, sizeof(*vcaps));
+       INIT_LIST_HEAD(&vcaps->list);
+
+       caps = &vcaps->caps;
+
+       rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
+                                         (u64)virt_to_phys(hv_caps));
+       if (rc)
+               return rc;
+
+       caps->user_mode = hv_caps->user_mode;
+       if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
+               pr_err("User space COPY/PASTE is not supported\n");
+               return -ENOTSUPP;
+       }
+
+       caps->descriptor = be64_to_cpu(hv_caps->descriptor);
+       caps->win_type = hv_caps->win_type;
+       if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
+               pr_err("Unsupported window type %u\n", caps->win_type);
+               return -EINVAL;
+       }
+       caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
+       caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
+       atomic_set(&caps->target_lpar_creds,
+                  be16_to_cpu(hv_caps->target_lpar_creds));
+       if (feat == VAS_GZIP_DEF_FEAT) {
+               caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
+
+               if (caps->max_win_creds < DEF_WIN_CREDS) {
+                       pr_err("Window creds(%u) > max allowed window creds(%u)\n",
+                              DEF_WIN_CREDS, caps->max_win_creds);
+                       return -EINVAL;
+               }
+       }
+
+       copypaste_feat = true;
+
+       return 0;
+}
+
+static int __init pseries_vas_init(void)
+{
+       struct hv_vas_cop_feat_caps *hv_cop_caps;
+       struct hv_vas_all_caps *hv_caps;
+       int rc;
+
+       /*
+        * Linux supports user space COPY/PASTE only with Radix
+        */
+       if (!radix_enabled()) {
+               pr_err("API is supported only with radix page tables\n");
+               return -ENOTSUPP;
+       }
+
+       hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
+       if (!hv_caps)
+               return -ENOMEM;
+       /*
+        * Get VAS overall capabilities by passing 0 to feature type.
+        */
+       rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
+                                         (u64)virt_to_phys(hv_caps));
+       if (rc)
+               goto out;
+
+       caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
+       caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
+
+       hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
+       if (!hv_cop_caps) {
+               rc = -ENOMEM;
+               goto out;
+       }
+       /*
+        * QOS capabilities available
+        */
+       if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
+               rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
+                                         VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
+
+               if (rc)
+                       goto out_cop;
+       }
+       /*
+        * Default capabilities available
+        */
+       if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
+               rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
+                                         VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
+               if (rc)
+                       goto out_cop;
+       }
+
+       pr_info("GZIP feature is available\n");
+
+out_cop:
+       kfree(hv_cop_caps);
+out:
+       kfree(hv_caps);
+       return rc;
+}
+machine_device_initcall(pseries, pseries_vas_init);
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
new file mode 100644 (file)
index 0000000..4ecb3fc
--- /dev/null
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <asm/vas.h>
+#include <linux/mutex.h>
+#include <linux/stringify.h>
+
+/*
+ * VAS window modify flags
+ */
+#define VAS_MOD_WIN_CLOSE      PPC_BIT(0)
+#define VAS_MOD_WIN_JOBS_KILL  PPC_BIT(1)
+#define VAS_MOD_WIN_DR         PPC_BIT(3)
+#define VAS_MOD_WIN_PR         PPC_BIT(4)
+#define VAS_MOD_WIN_SF         PPC_BIT(5)
+#define VAS_MOD_WIN_TA         PPC_BIT(6)
+#define VAS_MOD_WIN_FLAGS      (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \
+                               VAS_MOD_WIN_PR | VAS_MOD_WIN_SF)
+
+#define VAS_WIN_ACTIVE         0x0
+#define VAS_WIN_CLOSED         0x1
+#define VAS_WIN_INACTIVE       0x2     /* Inactive due to HW failure */
+/* Process of being modified, deallocated, or quiesced */
+#define VAS_WIN_MOD_IN_PROCESS 0x3
+
+#define VAS_COPY_PASTE_USER_MODE       0x00000001
+#define VAS_COP_OP_USER_MODE           0x00000010
+
+/*
+ * Co-processor feature - GZIP QoS windows or GZIP default windows
+ */
+enum vas_cop_feat_type {
+       VAS_GZIP_QOS_FEAT_TYPE,
+       VAS_GZIP_DEF_FEAT_TYPE,
+       VAS_MAX_FEAT_TYPE,
+};
+
+/*
+ * Use to get feature specific capabilities from the
+ * hypervisor.
+ */
+struct hv_vas_cop_feat_caps {
+       __be64  descriptor;
+       u8      win_type;               /* Default or QoS type */
+       u8      user_mode;
+       __be16  max_lpar_creds;
+       __be16  max_win_creds;
+       union {
+               __be16  reserved;
+               __be16  def_lpar_creds; /* Used for default capabilities */
+       };
+       __be16  target_lpar_creds;
+} __packed __aligned(0x1000);
+
+/*
+ * Feature specific (QoS or default) capabilities.
+ */
+struct vas_cop_feat_caps {
+       u64             descriptor;
+       u8              win_type;       /* Default or QoS type */
+       u8              user_mode;      /* User mode copy/paste or COP HCALL */
+       u16             max_lpar_creds; /* Max credits available in LPAR */
+       /* Max credits can be assigned per window */
+       u16             max_win_creds;
+       union {
+               u16     reserved;       /* Used for QoS credit type */
+               u16     def_lpar_creds; /* Used for default credit type */
+       };
+       /* Total LPAR available credits. Can be different from max LPAR */
+       /* credits due to DLPAR operation */
+       atomic_t        target_lpar_creds;
+       atomic_t        used_lpar_creds; /* Used credits so far */
+       u16             avail_lpar_creds; /* Remaining available credits */
+};
+
+/*
+ * Feature (QoS or Default) specific to store capabilities and
+ * the list of open windows.
+ */
+struct vas_caps {
+       struct vas_cop_feat_caps caps;
+       struct list_head list;  /* List of open windows */
+};
+
+/*
+ * To get window information from the hypervisor.
+ */
+struct hv_vas_win_lpar {
+       __be16  version;
+       u8      win_type;
+       u8      status;
+       __be16  credits;        /* No of credits assigned to this window */
+       __be16  reserved;
+       __be32  pid;            /* LPAR Process ID */
+       __be32  tid;            /* LPAR Thread ID */
+       __be64  win_addr;       /* Paste address */
+       __be32  interrupt;      /* Interrupt when NX request completes */
+       __be32  fault;          /* Interrupt when NX sees fault */
+       /* Associativity Domain Identifiers as returned in */
+       /* H_HOME_NODE_ASSOCIATIVITY */
+       __be64  domain[6];
+       __be64  win_util;       /* Number of bytes processed */
+} __packed __aligned(0x1000);
+
+struct pseries_vas_window {
+       struct vas_window vas_win;
+       u64 win_addr;           /* Physical paste address */
+       u8 win_type;            /* QoS or Default window */
+       u32 complete_irq;       /* Completion interrupt */
+       u32 fault_irq;          /* Fault interrupt */
+       u64 domain[6];          /* Associativity domain Ids */
+                               /* this window is allocated */
+       u64 util;
+
+       /* List of windows opened which is used for LPM */
+       struct list_head win_list;
+       u64 flags;
+       char *name;
+       int fault_virq;
+};
+#endif /* _VAS_H */
index 69af73765783b061a5ba2263773b5d95f4c103bc..b8f76f3fd99418091c1cd8b61d7feccb632fdd08 100644 (file)
@@ -1072,7 +1072,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
                        ret = get_kernel_nofault(inst, (void *)regs->nip);
 
                if (!ret && mcheck_handle_load(regs, inst)) {
-                       regs->nip += 4;
+                       regs_add_return_ip(regs, 4);
                        return 1;
                }
        }
index 07c164f7f8cfe7b77d0a42d563377b3fa65b4fe5..5a95b8ea23d8c3c130704d15daa161a87ad20abf 100644 (file)
@@ -108,8 +108,8 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs)
                                 __func__);
                        out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR),
                                 0);
-                       regs->msr |= MSR_RI;
-                       regs->nip = extable_fixup(entry);
+                       regs_set_return_msr(regs, regs->msr | MSR_RI);
+                       regs_set_return_ip(regs, extable_fixup(entry));
                        return 1;
                }
        }
index 304614c920aa0bc68ba2216f48477e5011016f98..063d9195891fb0910b097ed24ae93a6896adfd57 100644 (file)
@@ -12,3 +12,6 @@ config PPC_ICP_HV
 
 config PPC_ICS_RTAS
        def_bool n
+
+config PPC_ICS_NATIVE
+       def_bool n
index ba1e3117b1c09818f98963679d9cb6bfc663e29e..747063927c6cac505e2067d36dad08254059082c 100644 (file)
@@ -4,4 +4,5 @@ obj-y                           += xics-common.o
 obj-$(CONFIG_PPC_ICP_NATIVE)   += icp-native.o
 obj-$(CONFIG_PPC_ICP_HV)       += icp-hv.o
 obj-$(CONFIG_PPC_ICS_RTAS)     += ics-rtas.o
+obj-$(CONFIG_PPC_ICS_NATIVE)   += ics-native.o
 obj-$(CONFIG_PPC_POWERNV)      += ics-opal.o icp-opal.o
diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c
new file mode 100644 (file)
index 0000000..d450502
--- /dev/null
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ICS backend for OPAL managed interrupts.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+//#define DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+#include <linux/list.h>
+
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+
+struct ics_native {
+       struct ics              ics;
+       struct device_node      *node;
+       void __iomem            *base;
+       u32                     ibase;
+       u32                     icount;
+};
+#define to_ics_native(_ics)     container_of(_ics, struct ics_native, ics)
+
+static void __iomem *ics_native_xive(struct ics_native *in, unsigned int vec)
+{
+       return in->base + 0x800 + ((vec - in->ibase) << 2);
+}
+
+static void ics_native_unmask_irq(struct irq_data *d)
+{
+       unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+       struct ics *ics = irq_data_get_irq_chip_data(d);
+       struct ics_native *in = to_ics_native(ics);
+       unsigned int server;
+
+       pr_devel("ics-native: unmask virq %d [hw 0x%x]\n", d->irq, vec);
+
+       if (vec < in->ibase || vec >= (in->ibase + in->icount))
+               return;
+
+       server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+       out_be32(ics_native_xive(in, vec), (server << 8) | DEFAULT_PRIORITY);
+}
+
+static unsigned int ics_native_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+       /*
+        * The generic MSI code returns with the interrupt disabled on the
+        * card, using the MSI mask bits. Firmware doesn't appear to unmask
+        * at that level, so we do it here by hand.
+        */
+       if (irq_data_get_msi_desc(d))
+               pci_msi_unmask_irq(d);
+#endif
+
+       /* unmask it */
+       ics_native_unmask_irq(d);
+       return 0;
+}
+
+static void ics_native_do_mask(struct ics_native *in, unsigned int vec)
+{
+       out_be32(ics_native_xive(in, vec), 0xff);
+}
+
+static void ics_native_mask_irq(struct irq_data *d)
+{
+       unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+       struct ics *ics = irq_data_get_irq_chip_data(d);
+       struct ics_native *in = to_ics_native(ics);
+
+       pr_devel("ics-native: mask virq %d [hw 0x%x]\n", d->irq, vec);
+
+       if (vec < in->ibase || vec >= (in->ibase + in->icount))
+               return;
+       ics_native_do_mask(in, vec);
+}
+
+static int ics_native_set_affinity(struct irq_data *d,
+                                  const struct cpumask *cpumask,
+                                  bool force)
+{
+       unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+       struct ics *ics = irq_data_get_irq_chip_data(d);
+       struct ics_native *in = to_ics_native(ics);
+       int server;
+       u32 xive;
+
+       if (vec < in->ibase || vec >= (in->ibase + in->icount))
+               return -EINVAL;
+
+       server = xics_get_irq_server(d->irq, cpumask, 1);
+       if (server == -1) {
+               pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+                       __func__, cpumask_pr_args(cpumask), d->irq);
+               return -1;
+       }
+
+       xive = in_be32(ics_native_xive(in, vec));
+       xive = (xive & 0xff) | (server << 8);
+       out_be32(ics_native_xive(in, vec), xive);
+
+       return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_native_irq_chip = {
+       .name = "ICS",
+       .irq_startup            = ics_native_startup,
+       .irq_mask               = ics_native_mask_irq,
+       .irq_unmask             = ics_native_unmask_irq,
+       .irq_eoi                = NULL, /* Patched at init time */
+       .irq_set_affinity       = ics_native_set_affinity,
+       .irq_set_type           = xics_set_irq_type,
+       .irq_retrigger          = xics_retrigger,
+};
+
+static int ics_native_map(struct ics *ics, unsigned int virq)
+{
+       unsigned int vec = (unsigned int)virq_to_hw(virq);
+       struct ics_native *in = to_ics_native(ics);
+
+       pr_devel("%s: vec=0x%x\n", __func__, vec);
+
+       if (vec < in->ibase || vec >= (in->ibase + in->icount))
+               return -EINVAL;
+
+       irq_set_chip_and_handler(virq, &ics_native_irq_chip, handle_fasteoi_irq);
+       irq_set_chip_data(virq, ics);
+
+       return 0;
+}
+
+static void ics_native_mask_unknown(struct ics *ics, unsigned long vec)
+{
+       struct ics_native *in = to_ics_native(ics);
+
+       if (vec < in->ibase || vec >= (in->ibase + in->icount))
+               return;
+
+       ics_native_do_mask(in, vec);
+}
+
+static long ics_native_get_server(struct ics *ics, unsigned long vec)
+{
+       struct ics_native *in = to_ics_native(ics);
+       u32 xive;
+
+       if (vec < in->ibase || vec >= (in->ibase + in->icount))
+               return -EINVAL;
+
+       xive = in_be32(ics_native_xive(in, vec));
+       return (xive >> 8) & 0xfff;
+}
+
+static int ics_native_host_match(struct ics *ics, struct device_node *node)
+{
+       struct ics_native *in = to_ics_native(ics);
+
+       return in->node == node;
+}
+
+static struct ics ics_native_template = {
+       .map            = ics_native_map,
+       .mask_unknown   = ics_native_mask_unknown,
+       .get_server     = ics_native_get_server,
+       .host_match     = ics_native_host_match,
+};
+
+static int __init ics_native_add_one(struct device_node *np)
+{
+       struct ics_native *ics;
+       u32 ranges[2];
+       int rc, count;
+
+       ics = kzalloc(sizeof(struct ics_native), GFP_KERNEL);
+       if (!ics)
+               return -ENOMEM;
+       ics->node = of_node_get(np);
+       memcpy(&ics->ics, &ics_native_template, sizeof(struct ics));
+
+       ics->base = of_iomap(np, 0);
+       if (!ics->base) {
+               pr_err("Failed to map %pOFP\n", np);
+               rc = -ENOMEM;
+               goto fail;
+       }
+
+       count = of_property_count_u32_elems(np, "interrupt-ranges");
+       if (count < 2 || count & 1) {
+               pr_err("Failed to read interrupt-ranges of %pOFP\n", np);
+               rc = -EINVAL;
+               goto fail;
+       }
+       if (count > 2) {
+               pr_warn("ICS %pOFP has %d ranges, only one supported\n",
+                       np, count >> 1);
+       }
+       rc = of_property_read_u32_array(np, "interrupt-ranges",
+                                       ranges, 2);
+       if (rc) {
+               pr_err("Failed to read interrupt-ranges of %pOFP\n", np);
+               goto fail;
+       }
+       ics->ibase = ranges[0];
+       ics->icount = ranges[1];
+
+       pr_info("ICS native initialized for sources %d..%d\n",
+               ics->ibase, ics->ibase + ics->icount - 1);
+
+       /* Register ourselves */
+       xics_register_ics(&ics->ics);
+
+       return 0;
+fail:
+       of_node_put(ics->node);
+       kfree(ics);
+       return rc;
+}
+
+int __init ics_native_init(void)
+{
+       struct device_node *ics;
+       bool found_one = false;
+
+       /* We need to patch our irq chip's EOI to point to the
+        * right ICP
+        */
+       ics_native_irq_chip.irq_eoi = icp_ops->eoi;
+
+       /* Find native ICS in the device-tree */
+       for_each_compatible_node(ics, NULL, "openpower,xics-sources") {
+               if (ics_native_add_one(ics) == 0)
+                       found_one = true;
+       }
+
+       if (found_one)
+               pr_info("ICS native backend registered\n");
+
+       return found_one ? 0 : -ENODEV;
+}
index fdf8db4444b67d8a15ae75dd18815c477c0eb7b0..b14c502e56a8832c49bad5d1e1c1d5311dd469cb 100644 (file)
@@ -476,6 +476,8 @@ void __init xics_init(void)
        rc = ics_rtas_init();
        if (rc < 0)
                rc = ics_opal_init();
+       if (rc < 0)
+               rc = ics_native_init();
        if (rc < 0)
                pr_warn("XICS: Cannot find a Source Controller !\n");
 
index 84de2d7c2f40c527d223f97ad80d97884ccf9612..da4d7f225a409bc03b0cf84a67fea7724833bed3 100644 (file)
@@ -70,6 +70,9 @@ static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
 static unsigned long xmon_taken = 1;
 static int xmon_owner;
 static int xmon_gate;
+static int xmon_batch;
+static unsigned long xmon_batch_start_cpu;
+static cpumask_t xmon_batch_cpus = CPU_MASK_NONE;
 #else
 #define xmon_owner 0
 #endif /* CONFIG_SMP */
@@ -100,7 +103,7 @@ static long *xmon_fault_jmp[NR_CPUS];
 /* Breakpoint stuff */
 struct bpt {
        unsigned long   address;
-       struct ppc_inst *instr;
+       u32             *instr;
        atomic_t        ref_count;
        int             enabled;
        unsigned long   pad;
@@ -133,6 +136,12 @@ static void prdump(unsigned long, long);
 static int ppc_inst_dump(unsigned long, long, int);
 static void dump_log_buf(void);
 
+#ifdef CONFIG_SMP
+static int xmon_switch_cpu(unsigned long);
+static int xmon_batch_next_cpu(void);
+static int batch_cmds(struct pt_regs *);
+#endif
+
 #ifdef CONFIG_PPC_POWERNV
 static void dump_opal_msglog(void);
 #else
@@ -216,7 +225,8 @@ Commands:\n\
 #ifdef CONFIG_SMP
   "\
   c    print cpus stopped in xmon\n\
-  c#   try to switch to cpu number h (in hex)\n"
+  c#   try to switch to cpu number h (in hex)\n\
+  c# $ run command '$' (one of 'r','S' or 't') on all cpus in xmon\n"
 #endif
   "\
   C    checksum\n\
@@ -489,10 +499,10 @@ static void xmon_touch_watchdogs(void)
        touch_nmi_watchdog();
 }
 
-static int xmon_core(struct pt_regs *regs, int fromipi)
+static int xmon_core(struct pt_regs *regs, volatile int fromipi)
 {
-       int cmd = 0;
-       struct bpt *bp;
+       volatile int cmd = 0;
+       struct bpt *volatile bp;
        long recurse_jmp[JMP_BUF_LEN];
        bool locked_down;
        unsigned long offset;
@@ -514,7 +524,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 
        bp = in_breakpoint_table(regs->nip, &offset);
        if (bp != NULL) {
-               regs->nip = bp->address + offset;
+               regs_set_return_ip(regs, bp->address + offset);
                atomic_dec(&bp->ref_count);
        }
 
@@ -644,7 +654,12 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                        spin_cpu_relax();
                        touch_nmi_watchdog();
                } else {
-                       if (!locked_down)
+                       cmd = 1;
+#ifdef CONFIG_SMP
+                       if (xmon_batch)
+                               cmd = batch_cmds(regs);
+#endif
+                       if (!locked_down && cmd)
                                cmd = cmds(regs);
                        if (locked_down || cmd != 0) {
                                /* exiting xmon */
@@ -702,7 +717,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
        if (regs->msr & MSR_DE) {
                bp = at_breakpoint(regs->nip);
                if (bp != NULL) {
-                       regs->nip = (unsigned long) &bp->instr[0];
+                       regs_set_return_ip(regs, (unsigned long) &bp->instr[0]);
                        atomic_inc(&bp->ref_count);
                }
        }
@@ -712,7 +727,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                if (bp != NULL) {
                        int stepped = emulate_step(regs, ppc_inst_read(bp->instr));
                        if (stepped == 0) {
-                               regs->nip = (unsigned long) &bp->instr[0];
+                               regs_set_return_ip(regs, (unsigned long) &bp->instr[0]);
                                atomic_inc(&bp->ref_count);
                        } else if (stepped < 0) {
                                printf("Couldn't single-step %s instruction\n",
@@ -766,7 +781,7 @@ static int xmon_bpt(struct pt_regs *regs)
        /* Are we at the trap at bp->instr[1] for some bp? */
        bp = in_breakpoint_table(regs->nip, &offset);
        if (bp != NULL && (offset == 4 || offset == 8)) {
-               regs->nip = bp->address + offset;
+               regs_set_return_ip(regs, bp->address + offset);
                atomic_dec(&bp->ref_count);
                return 1;
        }
@@ -836,7 +851,7 @@ static int xmon_fault_handler(struct pt_regs *regs)
        if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
                bp = in_breakpoint_table(regs->nip, &offset);
                if (bp != NULL) {
-                       regs->nip = bp->address + offset;
+                       regs_set_return_ip(regs, bp->address + offset);
                        atomic_dec(&bp->ref_count);
                }
        }
@@ -857,7 +872,7 @@ static inline void force_enable_xmon(void)
 static struct bpt *at_breakpoint(unsigned long pc)
 {
        int i;
-       struct bpt *bp;
+       struct bpt *volatile bp;
 
        bp = bpts;
        for (i = 0; i < NBPTS; ++i, ++bp)
@@ -946,11 +961,11 @@ static void insert_bpts(void)
                }
 
                patch_instruction(bp->instr, instr);
-               patch_instruction(ppc_inst_next(bp->instr, &instr),
+               patch_instruction(ppc_inst_next(bp->instr, bp->instr),
                                  ppc_inst(bpinstr));
                if (bp->enabled & BP_CIABR)
                        continue;
-               if (patch_instruction((struct ppc_inst *)bp->address,
+               if (patch_instruction((u32 *)bp->address,
                                      ppc_inst(bpinstr)) != 0) {
                        printf("Couldn't write instruction at %lx, "
                               "disabling breakpoint there\n", bp->address);
@@ -992,7 +1007,7 @@ static void remove_bpts(void)
                if (mread_instr(bp->address, &instr)
                    && ppc_inst_equal(instr, ppc_inst(bpinstr))
                    && patch_instruction(
-                       (struct ppc_inst *)bp->address, ppc_inst_read(bp->instr)) != 0)
+                       (u32 *)bp->address, ppc_inst_read(bp->instr)) != 0)
                        printf("Couldn't remove breakpoint at %lx\n",
                               bp->address);
        }
@@ -1188,7 +1203,7 @@ cmds(struct pt_regs *excp)
 #ifdef CONFIG_BOOKE
 static int do_step(struct pt_regs *regs)
 {
-       regs->msr |= MSR_DE;
+       regs_set_return_msr(regs, regs->msr | MSR_DE);
        mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
        return 1;
 }
@@ -1221,7 +1236,7 @@ static int do_step(struct pt_regs *regs)
                        }
                }
        }
-       regs->msr |= MSR_SE;
+       regs_set_return_msr(regs, regs->msr | MSR_SE);
        return 1;
 }
 #endif
@@ -1243,11 +1258,112 @@ static void bootcmds(void)
        }
 }
 
+#ifdef CONFIG_SMP
+static int xmon_switch_cpu(unsigned long cpu)
+{
+       int timeout;
+
+       xmon_taken = 0;
+       mb();
+       xmon_owner = cpu;
+       timeout = 10000000;
+       while (!xmon_taken) {
+               if (--timeout == 0) {
+                       if (test_and_set_bit(0, &xmon_taken))
+                               break;
+                       /* take control back */
+                       mb();
+                       xmon_owner = smp_processor_id();
+                       printf("cpu 0x%lx didn't take control\n", cpu);
+                       return 0;
+               }
+               barrier();
+       }
+       return 1;
+}
+
+static int xmon_batch_next_cpu(void)
+{
+       unsigned long cpu;
+
+       while (!cpumask_empty(&xmon_batch_cpus)) {
+               cpu = cpumask_next_wrap(smp_processor_id(), &xmon_batch_cpus,
+                                       xmon_batch_start_cpu, true);
+               if (cpu == nr_cpumask_bits)
+                       break;
+               if (xmon_batch_start_cpu == -1)
+                       xmon_batch_start_cpu = cpu;
+               if (xmon_switch_cpu(cpu))
+                       return 0;
+               cpumask_clear_cpu(cpu, &xmon_batch_cpus);
+       }
+
+       xmon_batch = 0;
+       printf("%x:mon> \n", smp_processor_id());
+       return 1;
+}
+
+static int batch_cmds(struct pt_regs *excp)
+{
+       int cmd;
+
+       /* simulate command entry */
+       cmd = xmon_batch;
+       termch = '\n';
+
+       last_cmd = NULL;
+       xmon_regs = excp;
+
+       printf("%x:", smp_processor_id());
+       printf("mon> ");
+       printf("%c\n", (char)cmd);
+
+       switch (cmd) {
+       case 'r':
+               prregs(excp);   /* print regs */
+               break;
+       case 'S':
+               super_regs();
+               break;
+       case 't':
+               backtrace(excp);
+               break;
+       }
+
+       cpumask_clear_cpu(smp_processor_id(), &xmon_batch_cpus);
+
+       return xmon_batch_next_cpu();
+}
+
 static int cpu_cmd(void)
 {
-#ifdef CONFIG_SMP
        unsigned long cpu, first_cpu, last_cpu;
-       int timeout;
+
+       cpu = skipbl();
+       if (cpu == '#') {
+               xmon_batch = skipbl();
+               if (xmon_batch) {
+                       switch (xmon_batch) {
+                       case 'r':
+                       case 'S':
+                       case 't':
+                               cpumask_copy(&xmon_batch_cpus, &cpus_in_xmon);
+                               if (cpumask_weight(&xmon_batch_cpus) <= 1) {
+                                       printf("There are no other cpus in xmon\n");
+                                       break;
+                               }
+                               xmon_batch_start_cpu = -1;
+                               if (!xmon_batch_next_cpu())
+                                       return 1;
+                               break;
+                       default:
+                               printf("c# only supports 'r', 'S' and 't' commands\n");
+                       }
+                       xmon_batch = 0;
+                       return 0;
+               }
+       }
+       termch = cpu;
 
        if (!scanhex(&cpu)) {
                /* print cpus waiting or in xmon */
@@ -1279,27 +1395,15 @@ static int cpu_cmd(void)
 #endif
                return 0;
        }
-       xmon_taken = 0;
-       mb();
-       xmon_owner = cpu;
-       timeout = 10000000;
-       while (!xmon_taken) {
-               if (--timeout == 0) {
-                       if (test_and_set_bit(0, &xmon_taken))
-                               break;
-                       /* take control back */
-                       mb();
-                       xmon_owner = smp_processor_id();
-                       printf("cpu 0x%lx didn't take control\n", cpu);
-                       return 0;
-               }
-               barrier();
-       }
-       return 1;
+
+       return xmon_switch_cpu(cpu);
+}
 #else
+static int cpu_cmd(void)
+{
        return 0;
-#endif /* CONFIG_SMP */
 }
+#endif /* CONFIG_SMP */
 
 static unsigned short fcstab[256] = {
        0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
@@ -2214,7 +2318,7 @@ mread_instr(unsigned long adrs, struct ppc_inst *instr)
        if (setjmp(bus_error_jmp) == 0) {
                catch_memory_errors = 1;
                sync();
-               *instr = ppc_inst_read((struct ppc_inst *)adrs);
+               *instr = ppc_inst_read((u32 *)adrs);
                sync();
                /* wait a little while to see if we get a machine check */
                __delay(200);
@@ -2975,7 +3079,7 @@ static void
 dump_log_buf(void)
 {
        struct kmsg_dump_iter iter;
-       unsigned char buf[128];
+       static unsigned char buf[1024];
        size_t len;
 
        if (setjmp(bus_error_jmp) != 0) {
@@ -3005,7 +3109,7 @@ static void dump_opal_msglog(void)
 {
        unsigned char buf[128];
        ssize_t res;
-       loff_t pos = 0;
+       volatile loff_t pos = 0;
 
        if (!firmware_has_feature(FW_FEATURE_OPAL)) {
                printf("Machine is not running OPAL firmware.\n");
@@ -3160,7 +3264,7 @@ memzcan(void)
                printf("%.8lx\n", a - mskip);
 }
 
-static void show_task(struct task_struct *tsk)
+static void show_task(struct task_struct *volatile tsk)
 {
        unsigned int p_state = READ_ONCE(tsk->__state);
        char state;
@@ -3205,7 +3309,7 @@ static void format_pte(void *ptep, unsigned long pte)
 static void show_pte(unsigned long addr)
 {
        unsigned long tskv = 0;
-       struct task_struct *tsk = NULL;
+       struct task_struct *volatile tsk = NULL;
        struct mm_struct *mm;
        pgd_t *pgdp;
        p4d_t *p4dp;
@@ -3300,7 +3404,7 @@ static void show_pte(unsigned long addr)
 static void show_tasks(void)
 {
        unsigned long tskv;
-       struct task_struct *tsk = NULL;
+       struct task_struct *volatile tsk = NULL;
 
        printf("     task_struct     ->thread.ksp    ->thread.regs    PID   PPID S  P CMD\n");
 
@@ -3623,7 +3727,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
                              const char *after)
 {
        char *modname;
-       const char *name = NULL;
+       const char *volatile name = NULL;
        unsigned long offset, size;
 
        printf(REG, address);
@@ -4055,7 +4159,7 @@ void xmon_register_spus(struct list_head *list)
 static void stop_spus(void)
 {
        struct spu *spu;
-       int i;
+       volatile int i;
        u64 tmp;
 
        for (i = 0; i < XMON_NUM_SPUS; i++) {
@@ -4096,7 +4200,7 @@ static void stop_spus(void)
 static void restart_spus(void)
 {
        struct spu *spu;
-       int i;
+       volatile int i;
 
        for (i = 0; i < XMON_NUM_SPUS; i++) {
                if (!spu_info[i].spu)
index 23e3d0160e67b2606813225a6e758e8a504d8cd4..2a35e0e785bd4df65d6dcdcad9ab4acb2fe9e306 100644 (file)
@@ -29,6 +29,7 @@ if CRYPTO_DEV_NX_COMPRESS
 config CRYPTO_DEV_NX_COMPRESS_PSERIES
        tristate "Compression acceleration support on pSeries platform"
        depends on PPC_PSERIES && IBMVIO
+       depends on PPC_VAS
        default y
        help
          Support for PowerPC Nest (NX) compression acceleration. This
index bc89a20e5d9d82f0149ccd5cca49360ecd811fdd..d00181a26dd652ee39bcdf6a3c30dd893a754bd3 100644 (file)
@@ -14,5 +14,5 @@ nx-crypto-objs := nx.o \
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o nx-compress.o
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o nx-compress.o
 nx-compress-objs := nx-842.o
-nx-compress-pseries-objs := nx-842-pseries.o
+nx-compress-pseries-objs := nx-common-pseries.o
 nx-compress-powernv-objs := nx-common-powernv.o
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
deleted file mode 100644 (file)
index 1491cbf..0000000
+++ /dev/null
@@ -1,1130 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Driver for IBM Power 842 compression accelerator
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
- *          Seth Jennings <sjenning@linux.vnet.ibm.com>
- */
-
-#include <asm/vio.h>
-
-#include "nx-842.h"
-#include "nx_csbcpb.h" /* struct nx_csbcpb */
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
-MODULE_ALIAS_CRYPTO("842");
-MODULE_ALIAS_CRYPTO("842-nx");
-
-static struct nx842_constraints nx842_pseries_constraints = {
-       .alignment =    DDE_BUFFER_ALIGN,
-       .multiple =     DDE_BUFFER_LAST_MULT,
-       .minimum =      DDE_BUFFER_LAST_MULT,
-       .maximum =      PAGE_SIZE, /* dynamic, max_sync_size */
-};
-
-static int check_constraints(unsigned long buf, unsigned int *len, bool in)
-{
-       if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) {
-               pr_debug("%s buffer 0x%lx not aligned to 0x%x\n",
-                        in ? "input" : "output", buf,
-                        nx842_pseries_constraints.alignment);
-               return -EINVAL;
-       }
-       if (*len % nx842_pseries_constraints.multiple) {
-               pr_debug("%s buffer len 0x%x not multiple of 0x%x\n",
-                        in ? "input" : "output", *len,
-                        nx842_pseries_constraints.multiple);
-               if (in)
-                       return -EINVAL;
-               *len = round_down(*len, nx842_pseries_constraints.multiple);
-       }
-       if (*len < nx842_pseries_constraints.minimum) {
-               pr_debug("%s buffer len 0x%x under minimum 0x%x\n",
-                        in ? "input" : "output", *len,
-                        nx842_pseries_constraints.minimum);
-               return -EINVAL;
-       }
-       if (*len > nx842_pseries_constraints.maximum) {
-               pr_debug("%s buffer len 0x%x over maximum 0x%x\n",
-                        in ? "input" : "output", *len,
-                        nx842_pseries_constraints.maximum);
-               if (in)
-                       return -EINVAL;
-               *len = nx842_pseries_constraints.maximum;
-       }
-       return 0;
-}
-
-/* I assume we need to align the CSB? */
-#define WORKMEM_ALIGN  (256)
-
-struct nx842_workmem {
-       /* scatterlist */
-       char slin[4096];
-       char slout[4096];
-       /* coprocessor status/parameter block */
-       struct nx_csbcpb csbcpb;
-
-       char padding[WORKMEM_ALIGN];
-} __aligned(WORKMEM_ALIGN);
-
-/* Macros for fields within nx_csbcpb */
-/* Check the valid bit within the csbcpb valid field */
-#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
-
-/* CE macros operate on the completion_extension field bits in the csbcpb.
- * CE0 0=full completion, 1=partial completion
- * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
- * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
-#define NX842_CSBCPB_CE0(x)    (x & BIT_MASK(7))
-#define NX842_CSBCPB_CE1(x)    (x & BIT_MASK(6))
-#define NX842_CSBCPB_CE2(x)    (x & BIT_MASK(5))
-
-/* The NX unit accepts data only on 4K page boundaries */
-#define NX842_HW_PAGE_SIZE     (4096)
-#define NX842_HW_PAGE_MASK     (~(NX842_HW_PAGE_SIZE-1))
-
-struct ibm_nx842_counters {
-       atomic64_t comp_complete;
-       atomic64_t comp_failed;
-       atomic64_t decomp_complete;
-       atomic64_t decomp_failed;
-       atomic64_t swdecomp;
-       atomic64_t comp_times[32];
-       atomic64_t decomp_times[32];
-};
-
-static struct nx842_devdata {
-       struct vio_dev *vdev;
-       struct device *dev;
-       struct ibm_nx842_counters *counters;
-       unsigned int max_sg_len;
-       unsigned int max_sync_size;
-       unsigned int max_sync_sg;
-} __rcu *devdata;
-static DEFINE_SPINLOCK(devdata_mutex);
-
-#define NX842_COUNTER_INC(_x) \
-static inline void nx842_inc_##_x( \
-       const struct nx842_devdata *dev) { \
-       if (dev) \
-               atomic64_inc(&dev->counters->_x); \
-}
-NX842_COUNTER_INC(comp_complete);
-NX842_COUNTER_INC(comp_failed);
-NX842_COUNTER_INC(decomp_complete);
-NX842_COUNTER_INC(decomp_failed);
-NX842_COUNTER_INC(swdecomp);
-
-#define NX842_HIST_SLOTS 16
-
-static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time)
-{
-       int bucket = fls(time);
-
-       if (bucket)
-               bucket = min((NX842_HIST_SLOTS - 1), bucket - 1);
-
-       atomic64_inc(&times[bucket]);
-}
-
-/* NX unit operation flags */
-#define NX842_OP_COMPRESS      0x0
-#define NX842_OP_CRC           0x1
-#define NX842_OP_DECOMPRESS    0x2
-#define NX842_OP_COMPRESS_CRC   (NX842_OP_COMPRESS | NX842_OP_CRC)
-#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
-#define NX842_OP_ASYNC         (1<<23)
-#define NX842_OP_NOTIFY                (1<<22)
-#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8)
-
-static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
-{
-       /* No use of DMA mappings within the driver. */
-       return 0;
-}
-
-struct nx842_slentry {
-       __be64 ptr; /* Real address (use __pa()) */
-       __be64 len;
-};
-
-/* pHyp scatterlist entry */
-struct nx842_scatterlist {
-       int entry_nr; /* number of slentries */
-       struct nx842_slentry *entries; /* ptr to array of slentries */
-};
-
-/* Does not include sizeof(entry_nr) in the size */
-static inline unsigned long nx842_get_scatterlist_size(
-                               struct nx842_scatterlist *sl)
-{
-       return sl->entry_nr * sizeof(struct nx842_slentry);
-}
-
-static int nx842_build_scatterlist(unsigned long buf, int len,
-                       struct nx842_scatterlist *sl)
-{
-       unsigned long entrylen;
-       struct nx842_slentry *entry;
-
-       sl->entry_nr = 0;
-
-       entry = sl->entries;
-       while (len) {
-               entry->ptr = cpu_to_be64(nx842_get_pa((void *)buf));
-               entrylen = min_t(int, len,
-                                LEN_ON_SIZE(buf, NX842_HW_PAGE_SIZE));
-               entry->len = cpu_to_be64(entrylen);
-
-               len -= entrylen;
-               buf += entrylen;
-
-               sl->entry_nr++;
-               entry++;
-       }
-
-       return 0;
-}
-
-static int nx842_validate_result(struct device *dev,
-       struct cop_status_block *csb)
-{
-       /* The csb must be valid after returning from vio_h_cop_sync */
-       if (!NX842_CSBCBP_VALID_CHK(csb->valid)) {
-               dev_err(dev, "%s: cspcbp not valid upon completion.\n",
-                               __func__);
-               dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
-                               csb->valid,
-                               csb->crb_seq_number,
-                               csb->completion_code,
-                               csb->completion_extension);
-               dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n",
-                               be32_to_cpu(csb->processed_byte_count),
-                               (unsigned long)be64_to_cpu(csb->address));
-               return -EIO;
-       }
-
-       /* Check return values from the hardware in the CSB */
-       switch (csb->completion_code) {
-       case 0: /* Completed without error */
-               break;
-       case 64: /* Compression ok, but output larger than input */
-               dev_dbg(dev, "%s: output size larger than input size\n",
-                                       __func__);
-               break;
-       case 13: /* Output buffer too small */
-               dev_dbg(dev, "%s: Out of space in output buffer\n",
-                                       __func__);
-               return -ENOSPC;
-       case 65: /* Calculated CRC doesn't match the passed value */
-               dev_dbg(dev, "%s: CRC mismatch for decompression\n",
-                                       __func__);
-               return -EINVAL;
-       case 66: /* Input data contains an illegal template field */
-       case 67: /* Template indicates data past the end of the input stream */
-               dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
-                                       __func__, csb->completion_code);
-               return -EINVAL;
-       default:
-               dev_dbg(dev, "%s: Unspecified error (code:%d)\n",
-                                       __func__, csb->completion_code);
-               return -EIO;
-       }
-
-       /* Hardware sanity check */
-       if (!NX842_CSBCPB_CE2(csb->completion_extension)) {
-               dev_err(dev, "%s: No error returned by hardware, but "
-                               "data returned is unusable, contact support.\n"
-                               "(Additional info: csbcbp->processed bytes "
-                               "does not specify processed bytes for the "
-                               "target buffer.)\n", __func__);
-               return -EIO;
-       }
-
-       return 0;
-}
-
-/**
- * nx842_pseries_compress - Compress data using the 842 algorithm
- *
- * Compression provide by the NX842 coprocessor on IBM Power systems.
- * The input buffer is compressed and the result is stored in the
- * provided output buffer.
- *
- * Upon return from this function @outlen contains the length of the
- * compressed data.  If there is an error then @outlen will be 0 and an
- * error will be specified by the return code from this function.
- *
- * @in: Pointer to input buffer
- * @inlen: Length of input buffer
- * @out: Pointer to output buffer
- * @outlen: Length of output buffer
- * @wmem: ptr to buffer for working memory, size determined by
- *        nx842_pseries_driver.workmem_size
- *
- * Returns:
- *   0         Success, output of length @outlen stored in the buffer at @out
- *   -ENOMEM   Unable to allocate internal buffers
- *   -ENOSPC   Output buffer is to small
- *   -EIO      Internal error
- *   -ENODEV   Hardware unavailable
- */
-static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen,
-                                 unsigned char *out, unsigned int *outlen,
-                                 void *wmem)
-{
-       struct nx842_devdata *local_devdata;
-       struct device *dev = NULL;
-       struct nx842_workmem *workmem;
-       struct nx842_scatterlist slin, slout;
-       struct nx_csbcpb *csbcpb;
-       int ret = 0;
-       unsigned long inbuf, outbuf;
-       struct vio_pfo_op op = {
-               .done = NULL,
-               .handle = 0,
-               .timeout = 0,
-       };
-       unsigned long start = get_tb();
-
-       inbuf = (unsigned long)in;
-       if (check_constraints(inbuf, &inlen, true))
-               return -EINVAL;
-
-       outbuf = (unsigned long)out;
-       if (check_constraints(outbuf, outlen, false))
-               return -EINVAL;
-
-       rcu_read_lock();
-       local_devdata = rcu_dereference(devdata);
-       if (!local_devdata || !local_devdata->dev) {
-               rcu_read_unlock();
-               return -ENODEV;
-       }
-       dev = local_devdata->dev;
-
-       /* Init scatterlist */
-       workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN);
-       slin.entries = (struct nx842_slentry *)workmem->slin;
-       slout.entries = (struct nx842_slentry *)workmem->slout;
-
-       /* Init operation */
-       op.flags = NX842_OP_COMPRESS_CRC;
-       csbcpb = &workmem->csbcpb;
-       memset(csbcpb, 0, sizeof(*csbcpb));
-       op.csbcpb = nx842_get_pa(csbcpb);
-
-       if ((inbuf & NX842_HW_PAGE_MASK) ==
-           ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) {
-               /* Create direct DDE */
-               op.in = nx842_get_pa((void *)inbuf);
-               op.inlen = inlen;
-       } else {
-               /* Create indirect DDE (scatterlist) */
-               nx842_build_scatterlist(inbuf, inlen, &slin);
-               op.in = nx842_get_pa(slin.entries);
-               op.inlen = -nx842_get_scatterlist_size(&slin);
-       }
-
-       if ((outbuf & NX842_HW_PAGE_MASK) ==
-           ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) {
-               /* Create direct DDE */
-               op.out = nx842_get_pa((void *)outbuf);
-               op.outlen = *outlen;
-       } else {
-               /* Create indirect DDE (scatterlist) */
-               nx842_build_scatterlist(outbuf, *outlen, &slout);
-               op.out = nx842_get_pa(slout.entries);
-               op.outlen = -nx842_get_scatterlist_size(&slout);
-       }
-
-       dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n",
-               __func__, (unsigned long)op.in, (long)op.inlen,
-               (unsigned long)op.out, (long)op.outlen);
-
-       /* Send request to pHyp */
-       ret = vio_h_cop_sync(local_devdata->vdev, &op);
-
-       /* Check for pHyp error */
-       if (ret) {
-               dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
-                       __func__, ret, op.hcall_err);
-               ret = -EIO;
-               goto unlock;
-       }
-
-       /* Check for hardware error */
-       ret = nx842_validate_result(dev, &csbcpb->csb);
-       if (ret)
-               goto unlock;
-
-       *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count);
-       dev_dbg(dev, "%s: processed_bytes=%d\n", __func__, *outlen);
-
-unlock:
-       if (ret)
-               nx842_inc_comp_failed(local_devdata);
-       else {
-               nx842_inc_comp_complete(local_devdata);
-               ibm_nx842_incr_hist(local_devdata->counters->comp_times,
-                       (get_tb() - start) / tb_ticks_per_usec);
-       }
-       rcu_read_unlock();
-       return ret;
-}
-
-/**
- * nx842_pseries_decompress - Decompress data using the 842 algorithm
- *
- * Decompression provide by the NX842 coprocessor on IBM Power systems.
- * The input buffer is decompressed and the result is stored in the
- * provided output buffer.  The size allocated to the output buffer is
- * provided by the caller of this function in @outlen.  Upon return from
- * this function @outlen contains the length of the decompressed data.
- * If there is an error then @outlen will be 0 and an error will be
- * specified by the return code from this function.
- *
- * @in: Pointer to input buffer
- * @inlen: Length of input buffer
- * @out: Pointer to output buffer
- * @outlen: Length of output buffer
- * @wmem: ptr to buffer for working memory, size determined by
- *        nx842_pseries_driver.workmem_size
- *
- * Returns:
- *   0         Success, output of length @outlen stored in the buffer at @out
- *   -ENODEV   Hardware decompression device is unavailable
- *   -ENOMEM   Unable to allocate internal buffers
- *   -ENOSPC   Output buffer is to small
- *   -EINVAL   Bad input data encountered when attempting decompress
- *   -EIO      Internal error
- */
-static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen,
-                                   unsigned char *out, unsigned int *outlen,
-                                   void *wmem)
-{
-       struct nx842_devdata *local_devdata;
-       struct device *dev = NULL;
-       struct nx842_workmem *workmem;
-       struct nx842_scatterlist slin, slout;
-       struct nx_csbcpb *csbcpb;
-       int ret = 0;
-       unsigned long inbuf, outbuf;
-       struct vio_pfo_op op = {
-               .done = NULL,
-               .handle = 0,
-               .timeout = 0,
-       };
-       unsigned long start = get_tb();
-
-       /* Ensure page alignment and size */
-       inbuf = (unsigned long)in;
-       if (check_constraints(inbuf, &inlen, true))
-               return -EINVAL;
-
-       outbuf = (unsigned long)out;
-       if (check_constraints(outbuf, outlen, false))
-               return -EINVAL;
-
-       rcu_read_lock();
-       local_devdata = rcu_dereference(devdata);
-       if (!local_devdata || !local_devdata->dev) {
-               rcu_read_unlock();
-               return -ENODEV;
-       }
-       dev = local_devdata->dev;
-
-       workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN);
-
-       /* Init scatterlist */
-       slin.entries = (struct nx842_slentry *)workmem->slin;
-       slout.entries = (struct nx842_slentry *)workmem->slout;
-
-       /* Init operation */
-       op.flags = NX842_OP_DECOMPRESS_CRC;
-       csbcpb = &workmem->csbcpb;
-       memset(csbcpb, 0, sizeof(*csbcpb));
-       op.csbcpb = nx842_get_pa(csbcpb);
-
-       if ((inbuf & NX842_HW_PAGE_MASK) ==
-           ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) {
-               /* Create direct DDE */
-               op.in = nx842_get_pa((void *)inbuf);
-               op.inlen = inlen;
-       } else {
-               /* Create indirect DDE (scatterlist) */
-               nx842_build_scatterlist(inbuf, inlen, &slin);
-               op.in = nx842_get_pa(slin.entries);
-               op.inlen = -nx842_get_scatterlist_size(&slin);
-       }
-
-       if ((outbuf & NX842_HW_PAGE_MASK) ==
-           ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) {
-               /* Create direct DDE */
-               op.out = nx842_get_pa((void *)outbuf);
-               op.outlen = *outlen;
-       } else {
-               /* Create indirect DDE (scatterlist) */
-               nx842_build_scatterlist(outbuf, *outlen, &slout);
-               op.out = nx842_get_pa(slout.entries);
-               op.outlen = -nx842_get_scatterlist_size(&slout);
-       }
-
-       dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n",
-               __func__, (unsigned long)op.in, (long)op.inlen,
-               (unsigned long)op.out, (long)op.outlen);
-
-       /* Send request to pHyp */
-       ret = vio_h_cop_sync(local_devdata->vdev, &op);
-
-       /* Check for pHyp error */
-       if (ret) {
-               dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
-                       __func__, ret, op.hcall_err);
-               goto unlock;
-       }
-
-       /* Check for hardware error */
-       ret = nx842_validate_result(dev, &csbcpb->csb);
-       if (ret)
-               goto unlock;
-
-       *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count);
-
-unlock:
-       if (ret)
-               /* decompress fail */
-               nx842_inc_decomp_failed(local_devdata);
-       else {
-               nx842_inc_decomp_complete(local_devdata);
-               ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
-                       (get_tb() - start) / tb_ticks_per_usec);
-       }
-
-       rcu_read_unlock();
-       return ret;
-}
-
-/**
- * nx842_OF_set_defaults -- Set default (disabled) values for devdata
- *
- * @devdata: struct nx842_devdata to update
- *
- * Returns:
- *  0 on success
- *  -ENOENT if @devdata ptr is NULL
- */
-static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
-{
-       if (devdata) {
-               devdata->max_sync_size = 0;
-               devdata->max_sync_sg = 0;
-               devdata->max_sg_len = 0;
-               return 0;
-       } else
-               return -ENOENT;
-}
-
-/**
- * nx842_OF_upd_status -- Check the device info from OF status prop
- *
- * The status property indicates if the accelerator is enabled.  If the
- * device is in the OF tree it indicates that the hardware is present.
- * The status field indicates if the device is enabled when the status
- * is 'okay'.  Otherwise the device driver will be disabled.
- *
- * @devdata: struct nx842_devdata to use for dev_info
- * @prop: struct property point containing the maxsyncop for the update
- *
- * Returns:
- *  0 - Device is available
- *  -ENODEV - Device is not available
- */
-static int nx842_OF_upd_status(struct nx842_devdata *devdata,
-                              struct property *prop)
-{
-       const char *status = (const char *)prop->value;
-
-       if (!strncmp(status, "okay", (size_t)prop->length))
-               return 0;
-       if (!strncmp(status, "disabled", (size_t)prop->length))
-               return -ENODEV;
-       dev_info(devdata->dev, "%s: unknown status '%s'\n", __func__, status);
-
-       return -EINVAL;
-}
-
-/**
- * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop
- *
- * Definition of the 'ibm,max-sg-len' OF property:
- *  This field indicates the maximum byte length of a scatter list
- *  for the platform facility. It is a single cell encoded as with encode-int.
- *
- * Example:
- *  # od -x ibm,max-sg-len
- *  0000000 0000 0ff0
- *
- *  In this example, the maximum byte length of a scatter list is
- *  0x0ff0 (4,080).
- *
- * @devdata: struct nx842_devdata to update
- * @prop: struct property point containing the maxsyncop for the update
- *
- * Returns:
- *  0 on success
- *  -EINVAL on failure
- */
-static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
-                                       struct property *prop) {
-       int ret = 0;
-       const unsigned int maxsglen = of_read_number(prop->value, 1);
-
-       if (prop->length != sizeof(maxsglen)) {
-               dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__);
-               dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
-                               prop->length, sizeof(maxsglen));
-               ret = -EINVAL;
-       } else {
-               devdata->max_sg_len = min_t(unsigned int,
-                                           maxsglen, NX842_HW_PAGE_SIZE);
-       }
-
-       return ret;
-}
-
-/**
- * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop
- *
- * Definition of the 'ibm,max-sync-cop' OF property:
- *  Two series of cells.  The first series of cells represents the maximums
- *  that can be synchronously compressed. The second series of cells
- *  represents the maximums that can be synchronously decompressed.
- *  1. The first cell in each series contains the count of the number of
- *     data length, scatter list elements pairs that follow â€“ each being
- *     of the form
- *    a. One cell data byte length
- *    b. One cell total number of scatter list elements
- *
- * Example:
- *  # od -x ibm,max-sync-cop
- *  0000000 0000 0001 0000 1000 0000 01fe 0000 0001
- *  0000020 0000 1000 0000 01fe
- *
- *  In this example, compression supports 0x1000 (4,096) data byte length
- *  and 0x1fe (510) total scatter list elements.  Decompression supports
- *  0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
- *  elements.
- *
- * @devdata: struct nx842_devdata to update
- * @prop: struct property point containing the maxsyncop for the update
- *
- * Returns:
- *  0 on success
- *  -EINVAL on failure
- */
-static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
-                                       struct property *prop) {
-       int ret = 0;
-       unsigned int comp_data_limit, decomp_data_limit;
-       unsigned int comp_sg_limit, decomp_sg_limit;
-       const struct maxsynccop_t {
-               __be32 comp_elements;
-               __be32 comp_data_limit;
-               __be32 comp_sg_limit;
-               __be32 decomp_elements;
-               __be32 decomp_data_limit;
-               __be32 decomp_sg_limit;
-       } *maxsynccop;
-
-       if (prop->length != sizeof(*maxsynccop)) {
-               dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__);
-               dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length,
-                               sizeof(*maxsynccop));
-               ret = -EINVAL;
-               goto out;
-       }
-
-       maxsynccop = (const struct maxsynccop_t *)prop->value;
-       comp_data_limit = be32_to_cpu(maxsynccop->comp_data_limit);
-       comp_sg_limit = be32_to_cpu(maxsynccop->comp_sg_limit);
-       decomp_data_limit = be32_to_cpu(maxsynccop->decomp_data_limit);
-       decomp_sg_limit = be32_to_cpu(maxsynccop->decomp_sg_limit);
-
-       /* Use one limit rather than separate limits for compression and
-        * decompression. Set a maximum for this so as not to exceed the
-        * size that the header can support and round the value down to
-        * the hardware page size (4K) */
-       devdata->max_sync_size = min(comp_data_limit, decomp_data_limit);
-
-       devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
-                                       65536);
-
-       if (devdata->max_sync_size < 4096) {
-               dev_err(devdata->dev, "%s: hardware max data size (%u) is "
-                               "less than the driver minimum, unable to use "
-                               "the hardware device\n",
-                               __func__, devdata->max_sync_size);
-               ret = -EINVAL;
-               goto out;
-       }
-
-       nx842_pseries_constraints.maximum = devdata->max_sync_size;
-
-       devdata->max_sync_sg = min(comp_sg_limit, decomp_sg_limit);
-       if (devdata->max_sync_sg < 1) {
-               dev_err(devdata->dev, "%s: hardware max sg size (%u) is "
-                               "less than the driver minimum, unable to use "
-                               "the hardware device\n",
-                               __func__, devdata->max_sync_sg);
-               ret = -EINVAL;
-               goto out;
-       }
-
-out:
-       return ret;
-}
-
-/**
- * nx842_OF_upd -- Handle OF properties updates for the device.
- *
- * Set all properties from the OF tree.  Optionally, a new property
- * can be provided by the @new_prop pointer to overwrite an existing value.
- * The device will remain disabled until all values are valid, this function
- * will return an error for updates unless all values are valid.
- *
- * @new_prop: If not NULL, this property is being updated.  If NULL, update
- *  all properties from the current values in the OF tree.
- *
- * Returns:
- *  0 - Success
- *  -ENOMEM - Could not allocate memory for new devdata structure
- *  -EINVAL - property value not found, new_prop is not a recognized
- *     property for the device or property value is not valid.
- *  -ENODEV - Device is not available
- */
-static int nx842_OF_upd(struct property *new_prop)
-{
-       struct nx842_devdata *old_devdata = NULL;
-       struct nx842_devdata *new_devdata = NULL;
-       struct device_node *of_node = NULL;
-       struct property *status = NULL;
-       struct property *maxsglen = NULL;
-       struct property *maxsyncop = NULL;
-       int ret = 0;
-       unsigned long flags;
-
-       new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
-       if (!new_devdata)
-               return -ENOMEM;
-
-       spin_lock_irqsave(&devdata_mutex, flags);
-       old_devdata = rcu_dereference_check(devdata,
-                       lockdep_is_held(&devdata_mutex));
-       if (old_devdata)
-               of_node = old_devdata->dev->of_node;
-
-       if (!old_devdata || !of_node) {
-               pr_err("%s: device is not available\n", __func__);
-               spin_unlock_irqrestore(&devdata_mutex, flags);
-               kfree(new_devdata);
-               return -ENODEV;
-       }
-
-       memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
-       new_devdata->counters = old_devdata->counters;
-
-       /* Set ptrs for existing properties */
-       status = of_find_property(of_node, "status", NULL);
-       maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
-       maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
-       if (!status || !maxsglen || !maxsyncop) {
-               dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
-               ret = -EINVAL;
-               goto error_out;
-       }
-
-       /*
-        * If this is a property update, there are only certain properties that
-        * we care about. Bail if it isn't in the below list
-        */
-       if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
-                        strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
-                        strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
-               goto out;
-
-       /* Perform property updates */
-       ret = nx842_OF_upd_status(new_devdata, status);
-       if (ret)
-               goto error_out;
-
-       ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
-       if (ret)
-               goto error_out;
-
-       ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
-       if (ret)
-               goto error_out;
-
-out:
-       dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
-                       __func__, new_devdata->max_sync_size,
-                       old_devdata->max_sync_size);
-       dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
-                       __func__, new_devdata->max_sync_sg,
-                       old_devdata->max_sync_sg);
-       dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
-                       __func__, new_devdata->max_sg_len,
-                       old_devdata->max_sg_len);
-
-       rcu_assign_pointer(devdata, new_devdata);
-       spin_unlock_irqrestore(&devdata_mutex, flags);
-       synchronize_rcu();
-       dev_set_drvdata(new_devdata->dev, new_devdata);
-       kfree(old_devdata);
-       return 0;
-
-error_out:
-       if (new_devdata) {
-               dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
-               nx842_OF_set_defaults(new_devdata);
-               rcu_assign_pointer(devdata, new_devdata);
-               spin_unlock_irqrestore(&devdata_mutex, flags);
-               synchronize_rcu();
-               dev_set_drvdata(new_devdata->dev, new_devdata);
-               kfree(old_devdata);
-       } else {
-               dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
-               spin_unlock_irqrestore(&devdata_mutex, flags);
-       }
-
-       if (!ret)
-               ret = -EINVAL;
-       return ret;
-}
-
-/**
- * nx842_OF_notifier - Process updates to OF properties for the device
- *
- * @np: notifier block
- * @action: notifier action
- * @data: struct of_reconfig_data pointer
- *
- * Returns:
- *     NOTIFY_OK on success
- *     NOTIFY_BAD encoded with error number on failure, use
- *             notifier_to_errno() to decode this value
- */
-static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
-                            void *data)
-{
-       struct of_reconfig_data *upd = data;
-       struct nx842_devdata *local_devdata;
-       struct device_node *node = NULL;
-
-       rcu_read_lock();
-       local_devdata = rcu_dereference(devdata);
-       if (local_devdata)
-               node = local_devdata->dev->of_node;
-
-       if (local_devdata &&
-                       action == OF_RECONFIG_UPDATE_PROPERTY &&
-                       !strcmp(upd->dn->name, node->name)) {
-               rcu_read_unlock();
-               nx842_OF_upd(upd->prop);
-       } else
-               rcu_read_unlock();
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block nx842_of_nb = {
-       .notifier_call = nx842_OF_notifier,
-};
-
-#define nx842_counter_read(_name)                                      \
-static ssize_t nx842_##_name##_show(struct device *dev,                \
-               struct device_attribute *attr,                          \
-               char *buf) {                                            \
-       struct nx842_devdata *local_devdata;                    \
-       int p = 0;                                                      \
-       rcu_read_lock();                                                \
-       local_devdata = rcu_dereference(devdata);                       \
-       if (local_devdata)                                              \
-               p = snprintf(buf, PAGE_SIZE, "%lld\n",                  \
-                      atomic64_read(&local_devdata->counters->_name)); \
-       rcu_read_unlock();                                              \
-       return p;                                                       \
-}
-
-#define NX842DEV_COUNTER_ATTR_RO(_name)                                        \
-       nx842_counter_read(_name);                                      \
-       static struct device_attribute dev_attr_##_name = __ATTR(_name, \
-                                               0444,                   \
-                                               nx842_##_name##_show,\
-                                               NULL);
-
-NX842DEV_COUNTER_ATTR_RO(comp_complete);
-NX842DEV_COUNTER_ATTR_RO(comp_failed);
-NX842DEV_COUNTER_ATTR_RO(decomp_complete);
-NX842DEV_COUNTER_ATTR_RO(decomp_failed);
-NX842DEV_COUNTER_ATTR_RO(swdecomp);
-
-static ssize_t nx842_timehist_show(struct device *,
-               struct device_attribute *, char *);
-
-static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444,
-               nx842_timehist_show, NULL);
-static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times,
-               0444, nx842_timehist_show, NULL);
-
-static ssize_t nx842_timehist_show(struct device *dev,
-               struct device_attribute *attr, char *buf) {
-       char *p = buf;
-       struct nx842_devdata *local_devdata;
-       atomic64_t *times;
-       int bytes_remain = PAGE_SIZE;
-       int bytes;
-       int i;
-
-       rcu_read_lock();
-       local_devdata = rcu_dereference(devdata);
-       if (!local_devdata) {
-               rcu_read_unlock();
-               return 0;
-       }
-
-       if (attr == &dev_attr_comp_times)
-               times = local_devdata->counters->comp_times;
-       else if (attr == &dev_attr_decomp_times)
-               times = local_devdata->counters->decomp_times;
-       else {
-               rcu_read_unlock();
-               return 0;
-       }
-
-       for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
-               bytes = snprintf(p, bytes_remain, "%u-%uus:\t%lld\n",
-                              i ? (2<<(i-1)) : 0, (2<<i)-1,
-                              atomic64_read(&times[i]));
-               bytes_remain -= bytes;
-               p += bytes;
-       }
-       /* The last bucket holds everything over
-        * 2<<(NX842_HIST_SLOTS - 2) us */
-       bytes = snprintf(p, bytes_remain, "%uus - :\t%lld\n",
-                       2<<(NX842_HIST_SLOTS - 2),
-                       atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
-       p += bytes;
-
-       rcu_read_unlock();
-       return p - buf;
-}
-
-static struct attribute *nx842_sysfs_entries[] = {
-       &dev_attr_comp_complete.attr,
-       &dev_attr_comp_failed.attr,
-       &dev_attr_decomp_complete.attr,
-       &dev_attr_decomp_failed.attr,
-       &dev_attr_swdecomp.attr,
-       &dev_attr_comp_times.attr,
-       &dev_attr_decomp_times.attr,
-       NULL,
-};
-
-static struct attribute_group nx842_attribute_group = {
-       .name = NULL,           /* put in device directory */
-       .attrs = nx842_sysfs_entries,
-};
-
-static struct nx842_driver nx842_pseries_driver = {
-       .name =         KBUILD_MODNAME,
-       .owner =        THIS_MODULE,
-       .workmem_size = sizeof(struct nx842_workmem),
-       .constraints =  &nx842_pseries_constraints,
-       .compress =     nx842_pseries_compress,
-       .decompress =   nx842_pseries_decompress,
-};
-
-static int nx842_pseries_crypto_init(struct crypto_tfm *tfm)
-{
-       return nx842_crypto_init(tfm, &nx842_pseries_driver);
-}
-
-static struct crypto_alg nx842_pseries_alg = {
-       .cra_name               = "842",
-       .cra_driver_name        = "842-nx",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
-       .cra_ctxsize            = sizeof(struct nx842_crypto_ctx),
-       .cra_module             = THIS_MODULE,
-       .cra_init               = nx842_pseries_crypto_init,
-       .cra_exit               = nx842_crypto_exit,
-       .cra_u                  = { .compress = {
-       .coa_compress           = nx842_crypto_compress,
-       .coa_decompress         = nx842_crypto_decompress } }
-};
-
-static int nx842_probe(struct vio_dev *viodev,
-                      const struct vio_device_id *id)
-{
-       struct nx842_devdata *old_devdata, *new_devdata = NULL;
-       unsigned long flags;
-       int ret = 0;
-
-       new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
-       if (!new_devdata)
-               return -ENOMEM;
-
-       new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
-                       GFP_NOFS);
-       if (!new_devdata->counters) {
-               kfree(new_devdata);
-               return -ENOMEM;
-       }
-
-       spin_lock_irqsave(&devdata_mutex, flags);
-       old_devdata = rcu_dereference_check(devdata,
-                       lockdep_is_held(&devdata_mutex));
-
-       if (old_devdata && old_devdata->vdev != NULL) {
-               dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
-               ret = -1;
-               goto error_unlock;
-       }
-
-       dev_set_drvdata(&viodev->dev, NULL);
-
-       new_devdata->vdev = viodev;
-       new_devdata->dev = &viodev->dev;
-       nx842_OF_set_defaults(new_devdata);
-
-       rcu_assign_pointer(devdata, new_devdata);
-       spin_unlock_irqrestore(&devdata_mutex, flags);
-       synchronize_rcu();
-       kfree(old_devdata);
-
-       of_reconfig_notifier_register(&nx842_of_nb);
-
-       ret = nx842_OF_upd(NULL);
-       if (ret)
-               goto error;
-
-       ret = crypto_register_alg(&nx842_pseries_alg);
-       if (ret) {
-               dev_err(&viodev->dev, "could not register comp alg: %d\n", ret);
-               goto error;
-       }
-
-       rcu_read_lock();
-       dev_set_drvdata(&viodev->dev, rcu_dereference(devdata));
-       rcu_read_unlock();
-
-       if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
-               dev_err(&viodev->dev, "could not create sysfs device attributes\n");
-               ret = -1;
-               goto error;
-       }
-
-       return 0;
-
-error_unlock:
-       spin_unlock_irqrestore(&devdata_mutex, flags);
-       if (new_devdata)
-               kfree(new_devdata->counters);
-       kfree(new_devdata);
-error:
-       return ret;
-}
-
-static void nx842_remove(struct vio_dev *viodev)
-{
-       struct nx842_devdata *old_devdata;
-       unsigned long flags;
-
-       pr_info("Removing IBM Power 842 compression device\n");
-       sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
-
-       crypto_unregister_alg(&nx842_pseries_alg);
-
-       spin_lock_irqsave(&devdata_mutex, flags);
-       old_devdata = rcu_dereference_check(devdata,
-                       lockdep_is_held(&devdata_mutex));
-       of_reconfig_notifier_unregister(&nx842_of_nb);
-       RCU_INIT_POINTER(devdata, NULL);
-       spin_unlock_irqrestore(&devdata_mutex, flags);
-       synchronize_rcu();
-       dev_set_drvdata(&viodev->dev, NULL);
-       if (old_devdata)
-               kfree(old_devdata->counters);
-       kfree(old_devdata);
-}
-
-static const struct vio_device_id nx842_vio_driver_ids[] = {
-       {"ibm,compression-v1", "ibm,compression"},
-       {"", ""},
-};
-MODULE_DEVICE_TABLE(vio, nx842_vio_driver_ids);
-
-static struct vio_driver nx842_vio_driver = {
-       .name = KBUILD_MODNAME,
-       .probe = nx842_probe,
-       .remove = nx842_remove,
-       .get_desired_dma = nx842_get_desired_dma,
-       .id_table = nx842_vio_driver_ids,
-};
-
-static int __init nx842_pseries_init(void)
-{
-       struct nx842_devdata *new_devdata;
-       int ret;
-
-       if (!of_find_compatible_node(NULL, NULL, "ibm,compression"))
-               return -ENODEV;
-
-       RCU_INIT_POINTER(devdata, NULL);
-       new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
-       if (!new_devdata)
-               return -ENOMEM;
-
-       RCU_INIT_POINTER(devdata, new_devdata);
-
-       ret = vio_register_driver(&nx842_vio_driver);
-       if (ret) {
-               pr_err("Could not register VIO driver %d\n", ret);
-
-               kfree(new_devdata);
-               return ret;
-       }
-
-       return 0;
-}
-
-module_init(nx842_pseries_init);
-
-static void __exit nx842_pseries_exit(void)
-{
-       struct nx842_devdata *old_devdata;
-       unsigned long flags;
-
-       crypto_unregister_alg(&nx842_pseries_alg);
-
-       spin_lock_irqsave(&devdata_mutex, flags);
-       old_devdata = rcu_dereference_check(devdata,
-                       lockdep_is_held(&devdata_mutex));
-       RCU_INIT_POINTER(devdata, NULL);
-       spin_unlock_irqrestore(&devdata_mutex, flags);
-       synchronize_rcu();
-       if (old_devdata && old_devdata->dev)
-               dev_set_drvdata(old_devdata->dev, NULL);
-       kfree(old_devdata);
-       vio_unregister_driver(&nx842_vio_driver);
-}
-
-module_exit(nx842_pseries_exit);
-
index 655361ba91070e47c655cf0f4eeb54680d82b1b0..32a036ada5d0ac25ed56a9f990477f13919b37c2 100644 (file)
@@ -1092,8 +1092,8 @@ static __init int nx_compress_powernv_init(void)
                 * normal FIFO priority is assigned for userspace.
                 * 842 compression is supported only in kernel.
                 */
-               ret = vas_register_coproc_api(THIS_MODULE, VAS_COP_TYPE_GZIP,
-                                               "nx-gzip");
+               ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP,
+                                              "nx-gzip");
 
                /*
                 * GZIP is not supported in kernel right now.
@@ -1129,7 +1129,7 @@ static void __exit nx_compress_powernv_exit(void)
         * use. So delete this API use for GZIP engine.
         */
        if (!nx842_ct)
-               vas_unregister_coproc_api();
+               vas_unregister_api_powernv();
 
        crypto_unregister_alg(&nx842_powernv_alg);
 
diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c
new file mode 100644 (file)
index 0000000..4e304f6
--- /dev/null
@@ -0,0 +1,1268 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Driver for IBM Power 842 compression accelerator
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
+ *          Seth Jennings <sjenning@linux.vnet.ibm.com>
+ */
+
+#include <asm/vio.h>
+#include <asm/hvcall.h>
+#include <asm/vas.h>
+
+#include "nx-842.h"
+#include "nx_csbcpb.h" /* struct nx_csbcpb */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
+MODULE_ALIAS_CRYPTO("842");
+MODULE_ALIAS_CRYPTO("842-nx");
+
+/*
+ * Coprocessor type specific capabilities from the hypervisor.
+ */
+struct hv_nx_cop_caps {
+       __be64  descriptor;
+       __be64  req_max_processed_len;  /* Max bytes in one GZIP request */
+       __be64  min_compress_len;       /* Min compression size in bytes */
+       __be64  min_decompress_len;     /* Min decompression size in bytes */
+} __packed __aligned(0x1000);
+
+/*
+ * Coprocessor type specific capabilities.
+ */
+struct nx_cop_caps {
+       u64     descriptor;
+       u64     req_max_processed_len;  /* Max bytes in one GZIP request */
+       u64     min_compress_len;       /* Min compression in bytes */
+       u64     min_decompress_len;     /* Min decompression in bytes */
+};
+
+static u64 caps_feat;
+static struct nx_cop_caps nx_cop_caps;
+
+static struct nx842_constraints nx842_pseries_constraints = {
+       .alignment =    DDE_BUFFER_ALIGN,
+       .multiple =     DDE_BUFFER_LAST_MULT,
+       .minimum =      DDE_BUFFER_LAST_MULT,
+       .maximum =      PAGE_SIZE, /* dynamic, max_sync_size */
+};
+
+static int check_constraints(unsigned long buf, unsigned int *len, bool in)
+{
+       if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) {
+               pr_debug("%s buffer 0x%lx not aligned to 0x%x\n",
+                        in ? "input" : "output", buf,
+                        nx842_pseries_constraints.alignment);
+               return -EINVAL;
+       }
+       if (*len % nx842_pseries_constraints.multiple) {
+               pr_debug("%s buffer len 0x%x not multiple of 0x%x\n",
+                        in ? "input" : "output", *len,
+                        nx842_pseries_constraints.multiple);
+               if (in)
+                       return -EINVAL;
+               *len = round_down(*len, nx842_pseries_constraints.multiple);
+       }
+       if (*len < nx842_pseries_constraints.minimum) {
+               pr_debug("%s buffer len 0x%x under minimum 0x%x\n",
+                        in ? "input" : "output", *len,
+                        nx842_pseries_constraints.minimum);
+               return -EINVAL;
+       }
+       if (*len > nx842_pseries_constraints.maximum) {
+               pr_debug("%s buffer len 0x%x over maximum 0x%x\n",
+                        in ? "input" : "output", *len,
+                        nx842_pseries_constraints.maximum);
+               if (in)
+                       return -EINVAL;
+               *len = nx842_pseries_constraints.maximum;
+       }
+       return 0;
+}
+
+/* I assume we need to align the CSB? */
+#define WORKMEM_ALIGN  (256)
+
+struct nx842_workmem {
+       /* scatterlist */
+       char slin[4096];
+       char slout[4096];
+       /* coprocessor status/parameter block */
+       struct nx_csbcpb csbcpb;
+
+       char padding[WORKMEM_ALIGN];
+} __aligned(WORKMEM_ALIGN);
+
+/* Macros for fields within nx_csbcpb */
+/* Check the valid bit within the csbcpb valid field */
+#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
+
+/* CE macros operate on the completion_extension field bits in the csbcpb.
+ * CE0 0=full completion, 1=partial completion
+ * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
+ * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
+#define NX842_CSBCPB_CE0(x)    (x & BIT_MASK(7))
+#define NX842_CSBCPB_CE1(x)    (x & BIT_MASK(6))
+#define NX842_CSBCPB_CE2(x)    (x & BIT_MASK(5))
+
+/* The NX unit accepts data only on 4K page boundaries */
+#define NX842_HW_PAGE_SIZE     (4096)
+#define NX842_HW_PAGE_MASK     (~(NX842_HW_PAGE_SIZE-1))
+
+struct ibm_nx842_counters {
+       atomic64_t comp_complete;
+       atomic64_t comp_failed;
+       atomic64_t decomp_complete;
+       atomic64_t decomp_failed;
+       atomic64_t swdecomp;
+       atomic64_t comp_times[32];
+       atomic64_t decomp_times[32];
+};
+
+static struct nx842_devdata {
+       struct vio_dev *vdev;
+       struct device *dev;
+       struct ibm_nx842_counters *counters;
+       unsigned int max_sg_len;
+       unsigned int max_sync_size;
+       unsigned int max_sync_sg;
+} __rcu *devdata;
+static DEFINE_SPINLOCK(devdata_mutex);
+
+#define NX842_COUNTER_INC(_x) \
+static inline void nx842_inc_##_x( \
+       const struct nx842_devdata *dev) { \
+       if (dev) \
+               atomic64_inc(&dev->counters->_x); \
+}
+NX842_COUNTER_INC(comp_complete);
+NX842_COUNTER_INC(comp_failed);
+NX842_COUNTER_INC(decomp_complete);
+NX842_COUNTER_INC(decomp_failed);
+NX842_COUNTER_INC(swdecomp);
+
+#define NX842_HIST_SLOTS 16
+
+static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time)
+{
+       int bucket = fls(time);
+
+       if (bucket)
+               bucket = min((NX842_HIST_SLOTS - 1), bucket - 1);
+
+       atomic64_inc(&times[bucket]);
+}
+
+/* NX unit operation flags */
+#define NX842_OP_COMPRESS      0x0
+#define NX842_OP_CRC           0x1
+#define NX842_OP_DECOMPRESS    0x2
+#define NX842_OP_COMPRESS_CRC   (NX842_OP_COMPRESS | NX842_OP_CRC)
+#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
+#define NX842_OP_ASYNC         (1<<23)
+#define NX842_OP_NOTIFY                (1<<22)
+#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8)
+
+static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
+{
+       /* No use of DMA mappings within the driver. */
+       return 0;
+}
+
+struct nx842_slentry {
+       __be64 ptr; /* Real address (use __pa()) */
+       __be64 len;
+};
+
+/* pHyp scatterlist entry */
+struct nx842_scatterlist {
+       int entry_nr; /* number of slentries */
+       struct nx842_slentry *entries; /* ptr to array of slentries */
+};
+
+/* Does not include sizeof(entry_nr) in the size */
+static inline unsigned long nx842_get_scatterlist_size(
+                               struct nx842_scatterlist *sl)
+{
+       return sl->entry_nr * sizeof(struct nx842_slentry);
+}
+
+static int nx842_build_scatterlist(unsigned long buf, int len,
+                       struct nx842_scatterlist *sl)
+{
+       unsigned long entrylen;
+       struct nx842_slentry *entry;
+
+       sl->entry_nr = 0;
+
+       entry = sl->entries;
+       while (len) {
+               entry->ptr = cpu_to_be64(nx842_get_pa((void *)buf));
+               entrylen = min_t(int, len,
+                                LEN_ON_SIZE(buf, NX842_HW_PAGE_SIZE));
+               entry->len = cpu_to_be64(entrylen);
+
+               len -= entrylen;
+               buf += entrylen;
+
+               sl->entry_nr++;
+               entry++;
+       }
+
+       return 0;
+}
+
+static int nx842_validate_result(struct device *dev,
+       struct cop_status_block *csb)
+{
+       /* The csb must be valid after returning from vio_h_cop_sync */
+       if (!NX842_CSBCBP_VALID_CHK(csb->valid)) {
+               dev_err(dev, "%s: cspcbp not valid upon completion.\n",
+                               __func__);
+               dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
+                               csb->valid,
+                               csb->crb_seq_number,
+                               csb->completion_code,
+                               csb->completion_extension);
+               dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n",
+                               be32_to_cpu(csb->processed_byte_count),
+                               (unsigned long)be64_to_cpu(csb->address));
+               return -EIO;
+       }
+
+       /* Check return values from the hardware in the CSB */
+       switch (csb->completion_code) {
+       case 0: /* Completed without error */
+               break;
+       case 64: /* Compression ok, but output larger than input */
+               dev_dbg(dev, "%s: output size larger than input size\n",
+                                       __func__);
+               break;
+       case 13: /* Output buffer too small */
+               dev_dbg(dev, "%s: Out of space in output buffer\n",
+                                       __func__);
+               return -ENOSPC;
+       case 65: /* Calculated CRC doesn't match the passed value */
+               dev_dbg(dev, "%s: CRC mismatch for decompression\n",
+                                       __func__);
+               return -EINVAL;
+       case 66: /* Input data contains an illegal template field */
+       case 67: /* Template indicates data past the end of the input stream */
+               dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
+                                       __func__, csb->completion_code);
+               return -EINVAL;
+       default:
+               dev_dbg(dev, "%s: Unspecified error (code:%d)\n",
+                                       __func__, csb->completion_code);
+               return -EIO;
+       }
+
+       /* Hardware sanity check */
+       if (!NX842_CSBCPB_CE2(csb->completion_extension)) {
+               dev_err(dev, "%s: No error returned by hardware, but "
+                               "data returned is unusable, contact support.\n"
+                               "(Additional info: csbcbp->processed bytes "
+                               "does not specify processed bytes for the "
+                               "target buffer.)\n", __func__);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+/**
+ * nx842_pseries_compress - Compress data using the 842 algorithm
+ *
+ * Compression provide by the NX842 coprocessor on IBM Power systems.
+ * The input buffer is compressed and the result is stored in the
+ * provided output buffer.
+ *
+ * Upon return from this function @outlen contains the length of the
+ * compressed data.  If there is an error then @outlen will be 0 and an
+ * error will be specified by the return code from this function.
+ *
+ * @in: Pointer to input buffer
+ * @inlen: Length of input buffer
+ * @out: Pointer to output buffer
+ * @outlen: Length of output buffer
+ * @wmem: ptr to buffer for working memory, size determined by
+ *        nx842_pseries_driver.workmem_size
+ *
+ * Returns:
+ *   0         Success, output of length @outlen stored in the buffer at @out
+ *   -ENOMEM   Unable to allocate internal buffers
+ *   -ENOSPC   Output buffer is to small
+ *   -EIO      Internal error
+ *   -ENODEV   Hardware unavailable
+ */
+static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen,
+                                 unsigned char *out, unsigned int *outlen,
+                                 void *wmem)
+{
+       struct nx842_devdata *local_devdata;
+       struct device *dev = NULL;
+       struct nx842_workmem *workmem;
+       struct nx842_scatterlist slin, slout;
+       struct nx_csbcpb *csbcpb;
+       int ret = 0;
+       unsigned long inbuf, outbuf;
+       struct vio_pfo_op op = {
+               .done = NULL,
+               .handle = 0,
+               .timeout = 0,
+       };
+       unsigned long start = get_tb();
+
+       inbuf = (unsigned long)in;
+       if (check_constraints(inbuf, &inlen, true))
+               return -EINVAL;
+
+       outbuf = (unsigned long)out;
+       if (check_constraints(outbuf, outlen, false))
+               return -EINVAL;
+
+       rcu_read_lock();
+       local_devdata = rcu_dereference(devdata);
+       if (!local_devdata || !local_devdata->dev) {
+               rcu_read_unlock();
+               return -ENODEV;
+       }
+       dev = local_devdata->dev;
+
+       /* Init scatterlist */
+       workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN);
+       slin.entries = (struct nx842_slentry *)workmem->slin;
+       slout.entries = (struct nx842_slentry *)workmem->slout;
+
+       /* Init operation */
+       op.flags = NX842_OP_COMPRESS_CRC;
+       csbcpb = &workmem->csbcpb;
+       memset(csbcpb, 0, sizeof(*csbcpb));
+       op.csbcpb = nx842_get_pa(csbcpb);
+
+       if ((inbuf & NX842_HW_PAGE_MASK) ==
+           ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) {
+               /* Create direct DDE */
+               op.in = nx842_get_pa((void *)inbuf);
+               op.inlen = inlen;
+       } else {
+               /* Create indirect DDE (scatterlist) */
+               nx842_build_scatterlist(inbuf, inlen, &slin);
+               op.in = nx842_get_pa(slin.entries);
+               op.inlen = -nx842_get_scatterlist_size(&slin);
+       }
+
+       if ((outbuf & NX842_HW_PAGE_MASK) ==
+           ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) {
+               /* Create direct DDE */
+               op.out = nx842_get_pa((void *)outbuf);
+               op.outlen = *outlen;
+       } else {
+               /* Create indirect DDE (scatterlist) */
+               nx842_build_scatterlist(outbuf, *outlen, &slout);
+               op.out = nx842_get_pa(slout.entries);
+               op.outlen = -nx842_get_scatterlist_size(&slout);
+       }
+
+       dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n",
+               __func__, (unsigned long)op.in, (long)op.inlen,
+               (unsigned long)op.out, (long)op.outlen);
+
+       /* Send request to pHyp */
+       ret = vio_h_cop_sync(local_devdata->vdev, &op);
+
+       /* Check for pHyp error */
+       if (ret) {
+               dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
+                       __func__, ret, op.hcall_err);
+               ret = -EIO;
+               goto unlock;
+       }
+
+       /* Check for hardware error */
+       ret = nx842_validate_result(dev, &csbcpb->csb);
+       if (ret)
+               goto unlock;
+
+       *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count);
+       dev_dbg(dev, "%s: processed_bytes=%d\n", __func__, *outlen);
+
+unlock:
+       if (ret)
+               nx842_inc_comp_failed(local_devdata);
+       else {
+               nx842_inc_comp_complete(local_devdata);
+               ibm_nx842_incr_hist(local_devdata->counters->comp_times,
+                       (get_tb() - start) / tb_ticks_per_usec);
+       }
+       rcu_read_unlock();
+       return ret;
+}
+
+/**
+ * nx842_pseries_decompress - Decompress data using the 842 algorithm
+ *
+ * Decompression provide by the NX842 coprocessor on IBM Power systems.
+ * The input buffer is decompressed and the result is stored in the
+ * provided output buffer.  The size allocated to the output buffer is
+ * provided by the caller of this function in @outlen.  Upon return from
+ * this function @outlen contains the length of the decompressed data.
+ * If there is an error then @outlen will be 0 and an error will be
+ * specified by the return code from this function.
+ *
+ * @in: Pointer to input buffer
+ * @inlen: Length of input buffer
+ * @out: Pointer to output buffer
+ * @outlen: Length of output buffer
+ * @wmem: ptr to buffer for working memory, size determined by
+ *        nx842_pseries_driver.workmem_size
+ *
+ * Returns:
+ *   0         Success, output of length @outlen stored in the buffer at @out
+ *   -ENODEV   Hardware decompression device is unavailable
+ *   -ENOMEM   Unable to allocate internal buffers
+ *   -ENOSPC   Output buffer is to small
+ *   -EINVAL   Bad input data encountered when attempting decompress
+ *   -EIO      Internal error
+ */
+static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen,
+                                   unsigned char *out, unsigned int *outlen,
+                                   void *wmem)
+{
+       struct nx842_devdata *local_devdata;
+       struct device *dev = NULL;
+       struct nx842_workmem *workmem;
+       struct nx842_scatterlist slin, slout;
+       struct nx_csbcpb *csbcpb;
+       int ret = 0;
+       unsigned long inbuf, outbuf;
+       struct vio_pfo_op op = {
+               .done = NULL,
+               .handle = 0,
+               .timeout = 0,
+       };
+       unsigned long start = get_tb();
+
+       /* Ensure page alignment and size */
+       inbuf = (unsigned long)in;
+       if (check_constraints(inbuf, &inlen, true))
+               return -EINVAL;
+
+       outbuf = (unsigned long)out;
+       if (check_constraints(outbuf, outlen, false))
+               return -EINVAL;
+
+       rcu_read_lock();
+       local_devdata = rcu_dereference(devdata);
+       if (!local_devdata || !local_devdata->dev) {
+               rcu_read_unlock();
+               return -ENODEV;
+       }
+       dev = local_devdata->dev;
+
+       workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN);
+
+       /* Init scatterlist */
+       slin.entries = (struct nx842_slentry *)workmem->slin;
+       slout.entries = (struct nx842_slentry *)workmem->slout;
+
+       /* Init operation */
+       op.flags = NX842_OP_DECOMPRESS_CRC;
+       csbcpb = &workmem->csbcpb;
+       memset(csbcpb, 0, sizeof(*csbcpb));
+       op.csbcpb = nx842_get_pa(csbcpb);
+
+       if ((inbuf & NX842_HW_PAGE_MASK) ==
+           ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) {
+               /* Create direct DDE */
+               op.in = nx842_get_pa((void *)inbuf);
+               op.inlen = inlen;
+       } else {
+               /* Create indirect DDE (scatterlist) */
+               nx842_build_scatterlist(inbuf, inlen, &slin);
+               op.in = nx842_get_pa(slin.entries);
+               op.inlen = -nx842_get_scatterlist_size(&slin);
+       }
+
+       if ((outbuf & NX842_HW_PAGE_MASK) ==
+           ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) {
+               /* Create direct DDE */
+               op.out = nx842_get_pa((void *)outbuf);
+               op.outlen = *outlen;
+       } else {
+               /* Create indirect DDE (scatterlist) */
+               nx842_build_scatterlist(outbuf, *outlen, &slout);
+               op.out = nx842_get_pa(slout.entries);
+               op.outlen = -nx842_get_scatterlist_size(&slout);
+       }
+
+       dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n",
+               __func__, (unsigned long)op.in, (long)op.inlen,
+               (unsigned long)op.out, (long)op.outlen);
+
+       /* Send request to pHyp */
+       ret = vio_h_cop_sync(local_devdata->vdev, &op);
+
+       /* Check for pHyp error */
+       if (ret) {
+               dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
+                       __func__, ret, op.hcall_err);
+               goto unlock;
+       }
+
+       /* Check for hardware error */
+       ret = nx842_validate_result(dev, &csbcpb->csb);
+       if (ret)
+               goto unlock;
+
+       *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count);
+
+unlock:
+       if (ret)
+               /* decompress fail */
+               nx842_inc_decomp_failed(local_devdata);
+       else {
+               nx842_inc_decomp_complete(local_devdata);
+               ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
+                       (get_tb() - start) / tb_ticks_per_usec);
+       }
+
+       rcu_read_unlock();
+       return ret;
+}
+
+/**
+ * nx842_OF_set_defaults -- Set default (disabled) values for devdata
+ *
+ * @devdata: struct nx842_devdata to update
+ *
+ * Returns:
+ *  0 on success
+ *  -ENOENT if @devdata ptr is NULL
+ */
+static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
+{
+       if (devdata) {
+               devdata->max_sync_size = 0;
+               devdata->max_sync_sg = 0;
+               devdata->max_sg_len = 0;
+               return 0;
+       } else
+               return -ENOENT;
+}
+
+/**
+ * nx842_OF_upd_status -- Check the device info from OF status prop
+ *
+ * The status property indicates if the accelerator is enabled.  If the
+ * device is in the OF tree it indicates that the hardware is present.
+ * The status field indicates if the device is enabled when the status
+ * is 'okay'.  Otherwise the device driver will be disabled.
+ *
+ * @devdata: struct nx842_devdata to use for dev_info
+ * @prop: struct property point containing the maxsyncop for the update
+ *
+ * Returns:
+ *  0 - Device is available
+ *  -ENODEV - Device is not available
+ */
+static int nx842_OF_upd_status(struct nx842_devdata *devdata,
+                              struct property *prop)
+{
+       const char *status = (const char *)prop->value;
+
+       if (!strncmp(status, "okay", (size_t)prop->length))
+               return 0;
+       if (!strncmp(status, "disabled", (size_t)prop->length))
+               return -ENODEV;
+       dev_info(devdata->dev, "%s: unknown status '%s'\n", __func__, status);
+
+       return -EINVAL;
+}
+
+/**
+ * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop
+ *
+ * Definition of the 'ibm,max-sg-len' OF property:
+ *  This field indicates the maximum byte length of a scatter list
+ *  for the platform facility. It is a single cell encoded as with encode-int.
+ *
+ * Example:
+ *  # od -x ibm,max-sg-len
+ *  0000000 0000 0ff0
+ *
+ *  In this example, the maximum byte length of a scatter list is
+ *  0x0ff0 (4,080).
+ *
+ * @devdata: struct nx842_devdata to update
+ * @prop: struct property point containing the maxsyncop for the update
+ *
+ * Returns:
+ *  0 on success
+ *  -EINVAL on failure
+ */
+static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
+                                       struct property *prop) {
+       int ret = 0;
+       const unsigned int maxsglen = of_read_number(prop->value, 1);
+
+       if (prop->length != sizeof(maxsglen)) {
+               dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__);
+               dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
+                               prop->length, sizeof(maxsglen));
+               ret = -EINVAL;
+       } else {
+               devdata->max_sg_len = min_t(unsigned int,
+                                           maxsglen, NX842_HW_PAGE_SIZE);
+       }
+
+       return ret;
+}
+
+/**
+ * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop
+ *
+ * Definition of the 'ibm,max-sync-cop' OF property:
+ *  Two series of cells.  The first series of cells represents the maximums
+ *  that can be synchronously compressed. The second series of cells
+ *  represents the maximums that can be synchronously decompressed.
+ *  1. The first cell in each series contains the count of the number of
+ *     data length, scatter list elements pairs that follow â€“ each being
+ *     of the form
+ *    a. One cell data byte length
+ *    b. One cell total number of scatter list elements
+ *
+ * Example:
+ *  # od -x ibm,max-sync-cop
+ *  0000000 0000 0001 0000 1000 0000 01fe 0000 0001
+ *  0000020 0000 1000 0000 01fe
+ *
+ *  In this example, compression supports 0x1000 (4,096) data byte length
+ *  and 0x1fe (510) total scatter list elements.  Decompression supports
+ *  0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
+ *  elements.
+ *
+ * @devdata: struct nx842_devdata to update
+ * @prop: struct property point containing the maxsyncop for the update
+ *
+ * Returns:
+ *  0 on success
+ *  -EINVAL on failure
+ */
+static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
+                                       struct property *prop) {
+       int ret = 0;
+       unsigned int comp_data_limit, decomp_data_limit;
+       unsigned int comp_sg_limit, decomp_sg_limit;
+       const struct maxsynccop_t {
+               __be32 comp_elements;
+               __be32 comp_data_limit;
+               __be32 comp_sg_limit;
+               __be32 decomp_elements;
+               __be32 decomp_data_limit;
+               __be32 decomp_sg_limit;
+       } *maxsynccop;
+
+       if (prop->length != sizeof(*maxsynccop)) {
+               dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__);
+               dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length,
+                               sizeof(*maxsynccop));
+               ret = -EINVAL;
+               goto out;
+       }
+
+       maxsynccop = (const struct maxsynccop_t *)prop->value;
+       comp_data_limit = be32_to_cpu(maxsynccop->comp_data_limit);
+       comp_sg_limit = be32_to_cpu(maxsynccop->comp_sg_limit);
+       decomp_data_limit = be32_to_cpu(maxsynccop->decomp_data_limit);
+       decomp_sg_limit = be32_to_cpu(maxsynccop->decomp_sg_limit);
+
+       /* Use one limit rather than separate limits for compression and
+        * decompression. Set a maximum for this so as not to exceed the
+        * size that the header can support and round the value down to
+        * the hardware page size (4K) */
+       devdata->max_sync_size = min(comp_data_limit, decomp_data_limit);
+
+       devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
+                                       65536);
+
+       if (devdata->max_sync_size < 4096) {
+               dev_err(devdata->dev, "%s: hardware max data size (%u) is "
+                               "less than the driver minimum, unable to use "
+                               "the hardware device\n",
+                               __func__, devdata->max_sync_size);
+               ret = -EINVAL;
+               goto out;
+       }
+
+       nx842_pseries_constraints.maximum = devdata->max_sync_size;
+
+       devdata->max_sync_sg = min(comp_sg_limit, decomp_sg_limit);
+       if (devdata->max_sync_sg < 1) {
+               dev_err(devdata->dev, "%s: hardware max sg size (%u) is "
+                               "less than the driver minimum, unable to use "
+                               "the hardware device\n",
+                               __func__, devdata->max_sync_sg);
+               ret = -EINVAL;
+               goto out;
+       }
+
+out:
+       return ret;
+}
+
+/**
+ * nx842_OF_upd -- Handle OF properties updates for the device.
+ *
+ * Set all properties from the OF tree.  Optionally, a new property
+ * can be provided by the @new_prop pointer to overwrite an existing value.
+ * The device will remain disabled until all values are valid, this function
+ * will return an error for updates unless all values are valid.
+ *
+ * @new_prop: If not NULL, this property is being updated.  If NULL, update
+ *  all properties from the current values in the OF tree.
+ *
+ * Returns:
+ *  0 - Success
+ *  -ENOMEM - Could not allocate memory for new devdata structure
+ *  -EINVAL - property value not found, new_prop is not a recognized
+ *     property for the device or property value is not valid.
+ *  -ENODEV - Device is not available
+ */
+static int nx842_OF_upd(struct property *new_prop)
+{
+       struct nx842_devdata *old_devdata = NULL;
+       struct nx842_devdata *new_devdata = NULL;
+       struct device_node *of_node = NULL;
+       struct property *status = NULL;
+       struct property *maxsglen = NULL;
+       struct property *maxsyncop = NULL;
+       int ret = 0;
+       unsigned long flags;
+
+       new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
+       if (!new_devdata)
+               return -ENOMEM;
+
+       spin_lock_irqsave(&devdata_mutex, flags);
+       old_devdata = rcu_dereference_check(devdata,
+                       lockdep_is_held(&devdata_mutex));
+       if (old_devdata)
+               of_node = old_devdata->dev->of_node;
+
+       if (!old_devdata || !of_node) {
+               pr_err("%s: device is not available\n", __func__);
+               spin_unlock_irqrestore(&devdata_mutex, flags);
+               kfree(new_devdata);
+               return -ENODEV;
+       }
+
+       memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
+       new_devdata->counters = old_devdata->counters;
+
+       /* Set ptrs for existing properties */
+       status = of_find_property(of_node, "status", NULL);
+       maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
+       maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
+       if (!status || !maxsglen || !maxsyncop) {
+               dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
+               ret = -EINVAL;
+               goto error_out;
+       }
+
+       /*
+        * If this is a property update, there are only certain properties that
+        * we care about. Bail if it isn't in the below list
+        */
+       if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
+                        strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
+                        strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
+               goto out;
+
+       /* Perform property updates */
+       ret = nx842_OF_upd_status(new_devdata, status);
+       if (ret)
+               goto error_out;
+
+       ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
+       if (ret)
+               goto error_out;
+
+       ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
+       if (ret)
+               goto error_out;
+
+out:
+       dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
+                       __func__, new_devdata->max_sync_size,
+                       old_devdata->max_sync_size);
+       dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
+                       __func__, new_devdata->max_sync_sg,
+                       old_devdata->max_sync_sg);
+       dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
+                       __func__, new_devdata->max_sg_len,
+                       old_devdata->max_sg_len);
+
+       rcu_assign_pointer(devdata, new_devdata);
+       spin_unlock_irqrestore(&devdata_mutex, flags);
+       synchronize_rcu();
+       dev_set_drvdata(new_devdata->dev, new_devdata);
+       kfree(old_devdata);
+       return 0;
+
+error_out:
+       if (new_devdata) {
+               dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
+               nx842_OF_set_defaults(new_devdata);
+               rcu_assign_pointer(devdata, new_devdata);
+               spin_unlock_irqrestore(&devdata_mutex, flags);
+               synchronize_rcu();
+               dev_set_drvdata(new_devdata->dev, new_devdata);
+               kfree(old_devdata);
+       } else {
+               dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
+               spin_unlock_irqrestore(&devdata_mutex, flags);
+       }
+
+       if (!ret)
+               ret = -EINVAL;
+       return ret;
+}
+
+/**
+ * nx842_OF_notifier - Process updates to OF properties for the device
+ *
+ * @np: notifier block
+ * @action: notifier action
+ * @data: struct of_reconfig_data pointer
+ *
+ * Returns:
+ *     NOTIFY_OK on success
+ *     NOTIFY_BAD encoded with error number on failure, use
+ *             notifier_to_errno() to decode this value
+ */
+static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
+                            void *data)
+{
+       struct of_reconfig_data *upd = data;
+       struct nx842_devdata *local_devdata;
+       struct device_node *node = NULL;
+
+       rcu_read_lock();
+       local_devdata = rcu_dereference(devdata);
+       if (local_devdata)
+               node = local_devdata->dev->of_node;
+
+       if (local_devdata &&
+                       action == OF_RECONFIG_UPDATE_PROPERTY &&
+                       !strcmp(upd->dn->name, node->name)) {
+               rcu_read_unlock();
+               nx842_OF_upd(upd->prop);
+       } else
+               rcu_read_unlock();
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block nx842_of_nb = {
+       .notifier_call = nx842_OF_notifier,
+};
+
+#define nx842_counter_read(_name)                                      \
+static ssize_t nx842_##_name##_show(struct device *dev,                \
+               struct device_attribute *attr,                          \
+               char *buf) {                                            \
+       struct nx842_devdata *local_devdata;                    \
+       int p = 0;                                                      \
+       rcu_read_lock();                                                \
+       local_devdata = rcu_dereference(devdata);                       \
+       if (local_devdata)                                              \
+               p = snprintf(buf, PAGE_SIZE, "%lld\n",                  \
+                      atomic64_read(&local_devdata->counters->_name)); \
+       rcu_read_unlock();                                              \
+       return p;                                                       \
+}
+
+#define NX842DEV_COUNTER_ATTR_RO(_name)                                        \
+       nx842_counter_read(_name);                                      \
+       static struct device_attribute dev_attr_##_name = __ATTR(_name, \
+                                               0444,                   \
+                                               nx842_##_name##_show,\
+                                               NULL);
+
+NX842DEV_COUNTER_ATTR_RO(comp_complete);
+NX842DEV_COUNTER_ATTR_RO(comp_failed);
+NX842DEV_COUNTER_ATTR_RO(decomp_complete);
+NX842DEV_COUNTER_ATTR_RO(decomp_failed);
+NX842DEV_COUNTER_ATTR_RO(swdecomp);
+
+static ssize_t nx842_timehist_show(struct device *,
+               struct device_attribute *, char *);
+
+static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444,
+               nx842_timehist_show, NULL);
+static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times,
+               0444, nx842_timehist_show, NULL);
+
+static ssize_t nx842_timehist_show(struct device *dev,
+               struct device_attribute *attr, char *buf) {
+       char *p = buf;
+       struct nx842_devdata *local_devdata;
+       atomic64_t *times;
+       int bytes_remain = PAGE_SIZE;
+       int bytes;
+       int i;
+
+       rcu_read_lock();
+       local_devdata = rcu_dereference(devdata);
+       if (!local_devdata) {
+               rcu_read_unlock();
+               return 0;
+       }
+
+       if (attr == &dev_attr_comp_times)
+               times = local_devdata->counters->comp_times;
+       else if (attr == &dev_attr_decomp_times)
+               times = local_devdata->counters->decomp_times;
+       else {
+               rcu_read_unlock();
+               return 0;
+       }
+
+       for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
+               bytes = snprintf(p, bytes_remain, "%u-%uus:\t%lld\n",
+                              i ? (2<<(i-1)) : 0, (2<<i)-1,
+                              atomic64_read(&times[i]));
+               bytes_remain -= bytes;
+               p += bytes;
+       }
+       /* The last bucket holds everything over
+        * 2<<(NX842_HIST_SLOTS - 2) us */
+       bytes = snprintf(p, bytes_remain, "%uus - :\t%lld\n",
+                       2<<(NX842_HIST_SLOTS - 2),
+                       atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
+       p += bytes;
+
+       rcu_read_unlock();
+       return p - buf;
+}
+
+static struct attribute *nx842_sysfs_entries[] = {
+       &dev_attr_comp_complete.attr,
+       &dev_attr_comp_failed.attr,
+       &dev_attr_decomp_complete.attr,
+       &dev_attr_decomp_failed.attr,
+       &dev_attr_swdecomp.attr,
+       &dev_attr_comp_times.attr,
+       &dev_attr_decomp_times.attr,
+       NULL,
+};
+
+static struct attribute_group nx842_attribute_group = {
+       .name = NULL,           /* put in device directory */
+       .attrs = nx842_sysfs_entries,
+};
+
+#define        nxcop_caps_read(_name)                                          \
+static ssize_t nxcop_##_name##_show(struct device *dev,                        \
+                       struct device_attribute *attr, char *buf)       \
+{                                                                      \
+       return sprintf(buf, "%lld\n", nx_cop_caps._name);               \
+}
+
+#define NXCT_ATTR_RO(_name)                                            \
+       nxcop_caps_read(_name);                                         \
+       static struct device_attribute dev_attr_##_name = __ATTR(_name, \
+                                               0444,                   \
+                                               nxcop_##_name##_show,   \
+                                               NULL);
+
+NXCT_ATTR_RO(req_max_processed_len);
+NXCT_ATTR_RO(min_compress_len);
+NXCT_ATTR_RO(min_decompress_len);
+
+static struct attribute *nxcop_caps_sysfs_entries[] = {
+       &dev_attr_req_max_processed_len.attr,
+       &dev_attr_min_compress_len.attr,
+       &dev_attr_min_decompress_len.attr,
+       NULL,
+};
+
+static struct attribute_group nxcop_caps_attr_group = {
+       .name   =       "nx_gzip_caps",
+       .attrs  =       nxcop_caps_sysfs_entries,
+};
+
+static struct nx842_driver nx842_pseries_driver = {
+       .name =         KBUILD_MODNAME,
+       .owner =        THIS_MODULE,
+       .workmem_size = sizeof(struct nx842_workmem),
+       .constraints =  &nx842_pseries_constraints,
+       .compress =     nx842_pseries_compress,
+       .decompress =   nx842_pseries_decompress,
+};
+
+static int nx842_pseries_crypto_init(struct crypto_tfm *tfm)
+{
+       return nx842_crypto_init(tfm, &nx842_pseries_driver);
+}
+
+static struct crypto_alg nx842_pseries_alg = {
+       .cra_name               = "842",
+       .cra_driver_name        = "842-nx",
+       .cra_priority           = 300,
+       .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
+       .cra_ctxsize            = sizeof(struct nx842_crypto_ctx),
+       .cra_module             = THIS_MODULE,
+       .cra_init               = nx842_pseries_crypto_init,
+       .cra_exit               = nx842_crypto_exit,
+       .cra_u                  = { .compress = {
+       .coa_compress           = nx842_crypto_compress,
+       .coa_decompress         = nx842_crypto_decompress } }
+};
+
+static int nx842_probe(struct vio_dev *viodev,
+                      const struct vio_device_id *id)
+{
+       struct nx842_devdata *old_devdata, *new_devdata = NULL;
+       unsigned long flags;
+       int ret = 0;
+
+       new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
+       if (!new_devdata)
+               return -ENOMEM;
+
+       new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
+                       GFP_NOFS);
+       if (!new_devdata->counters) {
+               kfree(new_devdata);
+               return -ENOMEM;
+       }
+
+       spin_lock_irqsave(&devdata_mutex, flags);
+       old_devdata = rcu_dereference_check(devdata,
+                       lockdep_is_held(&devdata_mutex));
+
+       if (old_devdata && old_devdata->vdev != NULL) {
+               dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
+               ret = -1;
+               goto error_unlock;
+       }
+
+       dev_set_drvdata(&viodev->dev, NULL);
+
+       new_devdata->vdev = viodev;
+       new_devdata->dev = &viodev->dev;
+       nx842_OF_set_defaults(new_devdata);
+
+       rcu_assign_pointer(devdata, new_devdata);
+       spin_unlock_irqrestore(&devdata_mutex, flags);
+       synchronize_rcu();
+       kfree(old_devdata);
+
+       of_reconfig_notifier_register(&nx842_of_nb);
+
+       ret = nx842_OF_upd(NULL);
+       if (ret)
+               goto error;
+
+       ret = crypto_register_alg(&nx842_pseries_alg);
+       if (ret) {
+               dev_err(&viodev->dev, "could not register comp alg: %d\n", ret);
+               goto error;
+       }
+
+       rcu_read_lock();
+       dev_set_drvdata(&viodev->dev, rcu_dereference(devdata));
+       rcu_read_unlock();
+
+       if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
+               dev_err(&viodev->dev, "could not create sysfs device attributes\n");
+               ret = -1;
+               goto error;
+       }
+
+       if (caps_feat) {
+               if (sysfs_create_group(&viodev->dev.kobj,
+                                       &nxcop_caps_attr_group)) {
+                       dev_err(&viodev->dev,
+                               "Could not create sysfs NX capability entries\n");
+                       ret = -1;
+                       goto error;
+               }
+       }
+
+       return 0;
+
+error_unlock:
+       spin_unlock_irqrestore(&devdata_mutex, flags);
+       if (new_devdata)
+               kfree(new_devdata->counters);
+       kfree(new_devdata);
+error:
+       return ret;
+}
+
+static void nx842_remove(struct vio_dev *viodev)
+{
+       struct nx842_devdata *old_devdata;
+       unsigned long flags;
+
+       pr_info("Removing IBM Power 842 compression device\n");
+       sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
+
+       if (caps_feat)
+               sysfs_remove_group(&viodev->dev.kobj, &nxcop_caps_attr_group);
+
+       crypto_unregister_alg(&nx842_pseries_alg);
+
+       spin_lock_irqsave(&devdata_mutex, flags);
+       old_devdata = rcu_dereference_check(devdata,
+                       lockdep_is_held(&devdata_mutex));
+       of_reconfig_notifier_unregister(&nx842_of_nb);
+       RCU_INIT_POINTER(devdata, NULL);
+       spin_unlock_irqrestore(&devdata_mutex, flags);
+       synchronize_rcu();
+       dev_set_drvdata(&viodev->dev, NULL);
+       if (old_devdata)
+               kfree(old_devdata->counters);
+       kfree(old_devdata);
+}
+
+/*
+ * Get NX capabilities from the hypervisor.
+ * Only NXGZIP capabilities are provided by the hypersvisor right
+ * now and these values are available to user space with sysfs.
+ */
+static void __init nxcop_get_capabilities(void)
+{
+       struct hv_vas_all_caps *hv_caps;
+       struct hv_nx_cop_caps *hv_nxc;
+       int rc;
+
+       hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
+       if (!hv_caps)
+               return;
+       /*
+        * Get NX overall capabilities with feature type=0
+        */
+       rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES, 0,
+                                         (u64)virt_to_phys(hv_caps));
+       if (rc)
+               goto out;
+
+       caps_feat = be64_to_cpu(hv_caps->feat_type);
+       /*
+        * NX-GZIP feature available
+        */
+       if (caps_feat & VAS_NX_GZIP_FEAT_BIT) {
+               hv_nxc = kmalloc(sizeof(*hv_nxc), GFP_KERNEL);
+               if (!hv_nxc)
+                       goto out;
+               /*
+                * Get capabilities for NX-GZIP feature
+                */
+               rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES,
+                                                 VAS_NX_GZIP_FEAT,
+                                                 (u64)virt_to_phys(hv_nxc));
+       } else {
+               pr_err("NX-GZIP feature is not available\n");
+               rc = -EINVAL;
+       }
+
+       if (!rc) {
+               nx_cop_caps.descriptor = be64_to_cpu(hv_nxc->descriptor);
+               nx_cop_caps.req_max_processed_len =
+                               be64_to_cpu(hv_nxc->req_max_processed_len);
+               nx_cop_caps.min_compress_len =
+                               be64_to_cpu(hv_nxc->min_compress_len);
+               nx_cop_caps.min_decompress_len =
+                               be64_to_cpu(hv_nxc->min_decompress_len);
+       } else {
+               caps_feat = 0;
+       }
+
+       kfree(hv_nxc);
+out:
+       kfree(hv_caps);
+}
+
+static const struct vio_device_id nx842_vio_driver_ids[] = {
+       {"ibm,compression-v1", "ibm,compression"},
+       {"", ""},
+};
+MODULE_DEVICE_TABLE(vio, nx842_vio_driver_ids);
+
+static struct vio_driver nx842_vio_driver = {
+       .name = KBUILD_MODNAME,
+       .probe = nx842_probe,
+       .remove = nx842_remove,
+       .get_desired_dma = nx842_get_desired_dma,
+       .id_table = nx842_vio_driver_ids,
+};
+
+static int __init nx842_pseries_init(void)
+{
+       struct nx842_devdata *new_devdata;
+       int ret;
+
+       if (!of_find_compatible_node(NULL, NULL, "ibm,compression"))
+               return -ENODEV;
+
+       RCU_INIT_POINTER(devdata, NULL);
+       new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
+       if (!new_devdata)
+               return -ENOMEM;
+
+       RCU_INIT_POINTER(devdata, new_devdata);
+       /*
+        * Get NX capabilities from the hypervisor.
+        */
+       nxcop_get_capabilities();
+
+       ret = vio_register_driver(&nx842_vio_driver);
+       if (ret) {
+               pr_err("Could not register VIO driver %d\n", ret);
+
+               kfree(new_devdata);
+               return ret;
+       }
+
+       ret = vas_register_api_pseries(THIS_MODULE, VAS_COP_TYPE_GZIP,
+                                      "nx-gzip");
+
+       if (ret)
+               pr_err("NX-GZIP is not supported. Returned=%d\n", ret);
+
+       return 0;
+}
+
+module_init(nx842_pseries_init);
+
+static void __exit nx842_pseries_exit(void)
+{
+       struct nx842_devdata *old_devdata;
+       unsigned long flags;
+
+       vas_unregister_api_pseries();
+
+       crypto_unregister_alg(&nx842_pseries_alg);
+
+       spin_lock_irqsave(&devdata_mutex, flags);
+       old_devdata = rcu_dereference_check(devdata,
+                       lockdep_is_held(&devdata_mutex));
+       RCU_INIT_POINTER(devdata, NULL);
+       spin_unlock_irqrestore(&devdata_mutex, flags);
+       synchronize_rcu();
+       if (old_devdata && old_devdata->dev)
+               dev_set_drvdata(old_devdata->dev, NULL);
+       kfree(old_devdata);
+       vio_unregister_driver(&nx842_vio_driver);
+}
+
+module_exit(nx842_pseries_exit);
+
index e34ae6a442c7b1c83cbab4d730895bffb38c2762..6328abd51ffad46cd5112acc47dba8708aa0c085 100644 (file)
@@ -358,7 +358,7 @@ static int ps3_vuart_raw_write(struct ps3_system_bus_device *dev,
                ps3_mm_phys_to_lpar(__pa(buf)), bytes, bytes_written);
 
        if (result) {
-               dev_dbg(&dev->core, "%s:%d: lv1_write_virtual_uart failed: "
+               dev_warn(&dev->core, "%s:%d: lv1_write_virtual_uart failed: "
                        "%s\n", __func__, __LINE__, ps3_result(result));
                return result;
        }
index 9d66257e1da5c0b37607a34d49711b30efec2879..516e6d14d32e88d5b090a09df3fd5e10ace41bd2 100644 (file)
@@ -217,9 +217,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf,
        /* send pkt */
        res = ps3av_vuart_write(ps3av->dev, send_buf, write_len);
        if (res < 0) {
-               dev_dbg(&ps3av->dev->core,
-                       "%s: ps3av_vuart_write() failed (result=%d)\n",
-                       __func__, res);
+               dev_warn(&ps3av->dev->core,
+                       "%s:%d: ps3av_vuart_write() failed: %s\n", __func__,
+                       __LINE__, ps3_result(res));
                return res;
        }
 
@@ -230,9 +230,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf,
                res = ps3av_vuart_read(ps3av->dev, recv_buf, PS3AV_HDR_SIZE,
                                       timeout);
                if (res != PS3AV_HDR_SIZE) {
-                       dev_dbg(&ps3av->dev->core,
-                               "%s: ps3av_vuart_read() failed (result=%d)\n",
-                               __func__, res);
+                       dev_warn(&ps3av->dev->core,
+                               "%s:%d: ps3av_vuart_read() failed: %s\n", __func__,
+                               __LINE__, ps3_result(res));
                        return res;
                }
 
@@ -240,9 +240,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf,
                res = ps3av_vuart_read(ps3av->dev, &recv_buf->cid,
                                       recv_buf->size, timeout);
                if (res < 0) {
-                       dev_dbg(&ps3av->dev->core,
-                               "%s: ps3av_vuart_read() failed (result=%d)\n",
-                               __func__, res);
+                       dev_warn(&ps3av->dev->core,
+                               "%s:%d: ps3av_vuart_read() failed: %s\n", __func__,
+                               __LINE__, ps3_result(res));
                        return res;
                }
                res += PS3AV_HDR_SIZE;  /* total len */
@@ -251,8 +251,8 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf,
        } while (event);
 
        if ((cmd | PS3AV_REPLY_BIT) != recv_buf->cid) {
-               dev_dbg(&ps3av->dev->core, "%s: reply err (result=%x)\n",
-                       __func__, recv_buf->cid);
+               dev_warn(&ps3av->dev->core, "%s:%d: reply err: %x\n", __func__,
+                       __LINE__, recv_buf->cid);
                return -EINVAL;
        }
 
index 798f27f40cc2d97302d68dda833ad502530a7f9f..72b11aa7e0a68e25f80908bde82e926c24d5ac2b 100644 (file)
@@ -249,7 +249,7 @@ static void udbg_hvc_putc(char c)
                        count = hvterm_hvsi_put_chars(0, &c, 1);
                        break;
                }
-       } while(count == 0);
+       } while (count == 0 || count == -EAGAIN);
 }
 
 static int udbg_hvc_getc_poll(void)
index 4d0c28c2ba124cdcf2aa36cfd0faf7cd105891e1..e4f3bfe0875709ac3db32db6509e6aa20dcc7170 100644 (file)
@@ -400,6 +400,9 @@ void dump_kprobe(struct kprobe *kp);
 
 void *alloc_insn_page(void);
 
+void *alloc_optinsn_page(void);
+void free_optinsn_page(void *page);
+
 int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
                       char *sym);
 
index 297dc8bbe33398e21d13b5094a3cdb9015e071e6..471b1d18a92fc17e5df6a63122dbeb6a260d8fce 100644 (file)
@@ -321,11 +321,21 @@ int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
 }
 
 #ifdef CONFIG_OPTPROBES
+void __weak *alloc_optinsn_page(void)
+{
+       return alloc_insn_page();
+}
+
+void __weak free_optinsn_page(void *page)
+{
+       free_insn_page(page);
+}
+
 /* For optimized_kprobe buffer */
 struct kprobe_insn_cache kprobe_optinsn_slots = {
        .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
-       .alloc = alloc_insn_page,
-       .free = free_insn_page,
+       .alloc = alloc_optinsn_page,
+       .free = free_optinsn_page,
        .sym = KPROBE_OPTINSN_PAGE_SYM,
        .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
        /* .insn_size is initialized later */
index 579f0215c6e72325799030a6d392e2401f581edb..9836838a529f68681b3090ba6f9ddddcc853d15f 100644 (file)
@@ -14,6 +14,7 @@
 #include <time.h>
 #include <sys/types.h>
 #include <sys/time.h>
+#include <sys/syscall.h>
 #include <signal.h>
 
 static volatile int soak_done;
@@ -121,7 +122,7 @@ static void do_null_syscall(unsigned long nr)
        unsigned long i;
 
        for (i = 0; i < nr; i++)
-               getppid();
+               syscall(__NR_gettid);
 }
 
 #define TIME(A, STR) \
index 640fad6cc2c79c43f6706e81cc584685c4903982..0785c2e99d407e91d0d411ba9779912fc1d15ce2 100644 (file)
@@ -1,8 +1,8 @@
-CFLAGS = -O3 -m64 -I./include
+CFLAGS = -O3 -m64 -I./include -I../include
 
 TEST_GEN_FILES := gzfht_test gunz_test
 TEST_PROGS := nx-gzip-test.sh
 
 include ../../lib.mk
 
-$(TEST_GEN_FILES): gzip_vas.c
+$(TEST_GEN_FILES): gzip_vas.c ../utils.c
index b099753b50e4af4cf6fea2aa2ef4c915013eb7d3..095195a25687e7837f98bf55d4bf3a4901e04870 100644 (file)
@@ -60,6 +60,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <signal.h>
+#include "utils.h"
 #include "nxu.h"
 #include "nx.h"
 
@@ -70,6 +71,8 @@ FILE *nx_gzip_log;
 #define FNAME_MAX 1024
 #define FEXT ".nx.gz"
 
+#define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len"
+
 /*
  * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
  */
@@ -244,6 +247,7 @@ int compress_file(int argc, char **argv, void *handle)
        struct nx_gzip_crb_cpb_t *cmdp;
        uint32_t pagelen = 65536;
        int fault_tries = NX_MAX_FAULTS;
+       char buf[32];
 
        cmdp = (void *)(uintptr_t)
                aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
@@ -263,8 +267,17 @@ int compress_file(int argc, char **argv, void *handle)
        assert(NULL != (outbuf = (char *)malloc(outlen)));
        nxu_touch_pages(outbuf, outlen, pagelen, 1);
 
-       /* Compress piecemeal in smallish chunks */
-       chunk = 1<<22;
+       /*
+        * On PowerVM, the hypervisor defines the maximum request buffer
+        * size is defined and this value is available via sysfs.
+        */
+       if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) {
+               chunk = atoi(buf);
+       } else {
+               /* sysfs entry is not available on PowerNV */
+               /* Compress piecemeal in smallish chunks */
+               chunk = 1<<22;
+       }
 
        /* Write the gzip header to the stream */
        num_hdr_bytes = gzip_header_blank(outbuf);
index c5ecb46340948f05bd22932f98ea7ce5d9c20149..0101606902272c480ab285af0bf91014d1c74d47 100644 (file)
@@ -24,7 +24,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test   \
         fork_cleanup_test ebb_on_child_test                    \
         ebb_on_willing_child_test back_to_back_ebbs_test       \
         lost_exception_test no_handler_test                    \
-        cycles_with_mmcr2_test
+        cycles_with_mmcr2_test regs_access_pmccext_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
index b5bc2b616075d0407dc24399777917b028d27a66..2c803b5b48d63b580d56527ba3bef3c2c0605d65 100644 (file)
@@ -55,8 +55,6 @@ void ebb_global_disable(void);
 bool ebb_is_supported(void);
 void ebb_freeze_pmcs(void);
 void ebb_unfreeze_pmcs(void);
-void event_ebb_init(struct event *e);
-void event_leader_ebb_init(struct event *e);
 int count_pmc(int pmc, uint32_t sample_period);
 void dump_ebb_state(void);
 void dump_summary_ebb_state(void);
index fc5bf4870d8e65f4cf582fbcd69cf42eae3a5571..01e827c31169dbea8f892aa807df882862008389 100644 (file)
@@ -50,8 +50,6 @@ static int no_handler_test(void)
 
        event_close(&event);
 
-       dump_ebb_state();
-
        /* The real test is that we never took an EBB at 0x0 */
 
        return 0;
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
new file mode 100644 (file)
index 0000000..1eda8e9
--- /dev/null
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC and
+ * sets MMCR0_PMCCEXT preventing further read access to the
+ * group B PMU registers.
+ */
+
+static int regs_access_pmccext(void)
+{
+       struct event event;
+
+       SKIP_IF(!ebb_is_supported());
+
+       event_init_named(&event, 0x1001e, "cycles");
+       event_leader_ebb_init(&event);
+
+       FAIL_IF(event_open(&event));
+
+       ebb_enable_pmc_counting(1);
+       setup_ebb_handler(standard_ebb_callee);
+       ebb_global_enable();
+       FAIL_IF(ebb_event_enable(&event));
+
+       mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+       while (ebb_state.stats.ebb_count < 1)
+               FAIL_IF(core_busy_loop());
+
+       ebb_global_disable();
+       event_close(&event);
+
+       FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+       /*
+        * For ISA v3.1, verify the test takes a SIGILL when reading
+        * PMU regs after the event is closed. With the control bit
+        * in MMCR0 (PMCCEXT) restricting access to group B PMU regs,
+        * sigill is expected.
+        */
+       if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+               FAIL_IF(catch_sigill(dump_ebb_state));
+       else
+               dump_ebb_state();
+
+       return 0;
+}
+
+int main(void)
+{
+       return test_harness(regs_access_pmccext, "regs_access_pmccext");
+}
index 844d18cd5f930360cf193a30e773773f67a7d94d..7488315fd8474434d8bcca38b00f28255f4a96df 100644 (file)
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0+
 
 TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2
+TEST_PROGS := mitigation-patching.sh
+
 top_srcdir = ../../../../..
 
 CFLAGS += -I../../../../../usr/include
diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
new file mode 100755 (executable)
index 0000000..00197ac
--- /dev/null
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+TIMEOUT=10
+
+function do_one
+{
+    local mitigation="$1"
+    local orig
+    local start
+    local now
+
+    orig=$(cat "$mitigation")
+
+    start=$EPOCHSECONDS
+    now=$start
+
+    while [[ $((now-start)) -lt "$TIMEOUT" ]]
+    do
+        echo 0 > "$mitigation"
+        echo 1 > "$mitigation"
+
+        now=$EPOCHSECONDS
+    done
+
+    echo "$orig" > "$mitigation"
+}
+
+rc=0
+cd /sys/kernel/debug/powerpc || rc=1
+if [[ "$rc" -ne 0 ]]; then
+    echo "Error: couldn't cd to /sys/kernel/debug/powerpc" >&2
+    exit 1
+fi
+
+tainted=$(cat /proc/sys/kernel/tainted)
+if [[ "$tainted" -ne 0 ]]; then
+    echo "Error: kernel already tainted!" >&2
+    exit 1
+fi
+
+mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush"
+
+for m in $mitigations
+do
+    do_one "$m" &
+done
+
+echo "Spawned threads enabling/disabling mitigations ..."
+
+if stress-ng > /dev/null 2>&1; then
+    stress="stress-ng"
+elif stress > /dev/null 2>&1; then
+    stress="stress"
+else
+    stress=""
+fi
+
+if [[ -n "$stress" ]]; then
+    "$stress" -m "$(nproc)" -t "$TIMEOUT" &
+    echo "Spawned VM stressors ..."
+fi
+
+echo "Waiting for timeout ..."
+wait
+
+tainted=$(cat /proc/sys/kernel/tainted)
+if [[ "$tainted" -ne 0 ]]; then
+    echo "Error: kernel became tainted!" >&2
+    exit 1
+fi
+
+echo "OK"
+exit 0
index e2a0c07e836253086afd1924116593dec5fc45ea..9ef37a9836ac65a8bad245be00b4be6cf3f7bb4b 100644 (file)
@@ -17,7 +17,6 @@
 #include <pthread.h>
 #include <sys/mman.h>
 #include <unistd.h>
-#include <pthread.h>
 
 #include "tm.h"
 #include "utils.h"