Merge tag 'powerpc-4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Feb 2018 18:01:04 +0000 (10:01 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Feb 2018 18:01:04 +0000 (10:01 -0800)
Pull powerpc updates from Michael Ellerman:
 "Highlights:

   - Enable support for memory protection keys aka "pkeys" on Power7/8/9
     when using the hash table MMU.

   - Extend our interrupt soft masking to support masking PMU interrupts
     as well as "normal" interrupts, and then use that to implement
     local_t for a ~4x speedup vs the current atomics-based
     implementation.

   - A new driver "ocxl" for "Open Coherent Accelerator Processor
     Interface (OpenCAPI)" devices.

   - Support for new device tree properties on PowerVM to describe
     hotpluggable memory and devices.

   - Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE to the 64-bit
     VDSO.

   - Freescale updates from Scott: fixes for CPM GPIO and an FSL PCI
     erratum workaround, plus a minor cleanup patch.

  As well as quite a lot of other changes all over the place, and small
  fixes and cleanups as always.

  Thanks to: Alan Modra, Alastair D'Silva, Alexey Kardashevskiy,
  Alistair Popple, Andreas Schwab, Andrew Donnellan, Aneesh Kumar K.V,
  Anju T Sudhakar, Anshuman Khandual, Anton Blanchard, Arnd Bergmann,
  Balbir Singh, Benjamin Herrenschmidt, Bhaktipriya Shridhar, Bryant G.
  Ly, Cédric Le Goater, Christophe Leroy, Christophe Lombard, Cyril Bur,
  David Gibson, Desnes A. Nunes do Rosario, Dmitry Torokhov, Frederic
  Barrat, Geert Uytterhoeven, Guilherme G. Piccoli, Gustavo A. R. Silva,
  Gustavo Romero, Ivan Mikhaylov, Joakim Tjernlund, Joe Perches, Josh
  Poimboeuf, Juan J. Alvarez, Julia Cartwright, Kamalesh Babulal,
  Madhavan Srinivasan, Mahesh Salgaonkar, Mathieu Malaterre, Michael
  Bringmann, Michael Hanselmann, Michael Neuling, Nathan Fontenot,
  Naveen N. Rao, Nicholas Piggin, Paul Mackerras, Philippe Bergheaud,
  Ram Pai, Russell Currey, Santosh Sivaraj, Scott Wood, Seth Forshee,
  Simon Guo, Stewart Smith, Sukadev Bhattiprolu, Thiago Jung Bauermann,
  Vaibhav Jain, Vasyl Gomonovych"

* tag 'powerpc-4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (199 commits)
  powerpc/mm/radix: Fix build error when RADIX_MMU=n
  macintosh/ams-input: Use true and false for boolean values
  macintosh: change some data types from int to bool
  powerpc/watchdog: Print the NIP in soft_nmi_interrupt()
  powerpc/watchdog: regs can't be null in soft_nmi_interrupt()
  powerpc/watchdog: Tweak watchdog printks
  powerpc/cell: Remove axonram driver
  rtc-opal: Fix handling of firmware error codes, prevent busy loops
  powerpc/mpc52xx_gpt: make use of raw_spinlock variants
  macintosh/adb: Properly mark continued kernel messages
  powerpc/pseries: Fix cpu hotplug crash with memoryless nodes
  powerpc/numa: Ensure nodes initialized for hotplug
  powerpc/numa: Use ibm,max-associativity-domains to discover possible nodes
  powerpc/kernel: Block interrupts when updating TIDR
  powerpc/powernv/idoa: Remove unnecessary pcidev from pci_dn
  powerpc/mm/nohash: do not flush the entire mm when range is a single page
  powerpc/pseries: Add Initialization of VF Bars
  powerpc/pseries/pci: Associate PEs to VFs in configure SR-IOV
  powerpc/eeh: Add EEH notify resume sysfs
  powerpc/eeh: Add EEH operations to notify resume
  ...

320 files changed:
Documentation/ABI/testing/sysfs-class-ocxl [new file with mode: 0644]
Documentation/accelerators/ocxl.rst [new file with mode: 0644]
Documentation/devicetree/booting-without-of.txt
Documentation/filesystems/dax.txt
Documentation/ioctl/ioctl-number.txt
MAINTAINERS
arch/powerpc/Kconfig
arch/powerpc/Kconfig.debug
arch/powerpc/Makefile
arch/powerpc/boot/Makefile
arch/powerpc/boot/dts/a3m071.dts
arch/powerpc/boot/dts/akebono.dts
arch/powerpc/boot/dts/c2k.dts
arch/powerpc/boot/dts/currituck.dts
arch/powerpc/boot/dts/fsl/mpc8568mds.dts
arch/powerpc/boot/dts/fsl/mpc8569mds.dts
arch/powerpc/boot/dts/fsl/p1021mds.dts
arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
arch/powerpc/boot/dts/fsl/p1025twr.dtsi
arch/powerpc/boot/dts/fsl/t1040rdb.dts
arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
arch/powerpc/boot/dts/fsl/t1042rdb.dts
arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
arch/powerpc/boot/dts/fsp2.dts
arch/powerpc/boot/dts/gamecube.dts
arch/powerpc/boot/dts/haleakala.dts
arch/powerpc/boot/dts/kilauea.dts
arch/powerpc/boot/dts/kmeter1.dts
arch/powerpc/boot/dts/makalu.dts
arch/powerpc/boot/dts/mpc832x_mds.dts
arch/powerpc/boot/dts/mpc832x_rdb.dts
arch/powerpc/boot/dts/mpc836x_mds.dts
arch/powerpc/boot/dts/sbc8548-altflash.dts
arch/powerpc/boot/dts/sbc8548.dts
arch/powerpc/boot/dts/wii.dts
arch/powerpc/boot/serial.c
arch/powerpc/configs/mpc866_ads_defconfig
arch/powerpc/configs/powernv_defconfig
arch/powerpc/include/asm/book3s/32/pgtable.h
arch/powerpc/include/asm/book3s/64/hash-4k.h
arch/powerpc/include/asm/book3s/64/hash-64k.h
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/mmu-hash.h
arch/powerpc/include/asm/book3s/64/mmu.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
arch/powerpc/include/asm/book3s/64/tlbflush.h
arch/powerpc/include/asm/bug.h
arch/powerpc/include/asm/code-patching.h
arch/powerpc/include/asm/cpm.h
arch/powerpc/include/asm/cpm1.h
arch/powerpc/include/asm/cputable.h
arch/powerpc/include/asm/drmem.h [new file with mode: 0644]
arch/powerpc/include/asm/eeh.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/firmware.h
arch/powerpc/include/asm/hardirq.h
arch/powerpc/include/asm/head-64.h
arch/powerpc/include/asm/hmi.h
arch/powerpc/include/asm/hugetlb.h
arch/powerpc/include/asm/hw_irq.h
arch/powerpc/include/asm/imc-pmu.h
arch/powerpc/include/asm/irqflags.h
arch/powerpc/include/asm/kexec.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/local.h
arch/powerpc/include/asm/machdep.h
arch/powerpc/include/asm/mman.h
arch/powerpc/include/asm/mmu-8xx.h
arch/powerpc/include/asm/mmu.h
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/include/asm/mpic_timer.h
arch/powerpc/include/asm/nmi.h
arch/powerpc/include/asm/nohash/32/pgalloc.h
arch/powerpc/include/asm/nohash/32/pgtable.h
arch/powerpc/include/asm/nohash/32/pte-8xx.h
arch/powerpc/include/asm/nohash/pgtable.h
arch/powerpc/include/asm/nohash/pte-book3e.h
arch/powerpc/include/asm/opal-api.h
arch/powerpc/include/asm/opal.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/pci-bridge.h
arch/powerpc/include/asm/pci.h
arch/powerpc/include/asm/pkeys.h [new file with mode: 0644]
arch/powerpc/include/asm/pnv-ocxl.h [new file with mode: 0644]
arch/powerpc/include/asm/ppc-opcode.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/prom.h
arch/powerpc/include/asm/pte-common.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/reg_8xx.h
arch/powerpc/include/asm/systbl.h
arch/powerpc/include/asm/unistd.h
arch/powerpc/include/asm/xive-regs.h
arch/powerpc/include/asm/xive.h
arch/powerpc/include/uapi/asm/elf.h
arch/powerpc/include/uapi/asm/mman.h
arch/powerpc/include/uapi/asm/unistd.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/cpu_setup_power.S
arch/powerpc/kernel/cputable.c
arch/powerpc/kernel/crash.c
arch/powerpc/kernel/dt_cpu_ftrs.c
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/eeh_driver.c
arch/powerpc/kernel/eeh_sysfs.c
arch/powerpc/kernel/entry_32.S
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/head_64.S
arch/powerpc/kernel/head_8xx.S
arch/powerpc/kernel/idle_book3e.S
arch/powerpc/kernel/idle_power4.S
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/mce.c
arch/powerpc/kernel/mce_power.c
arch/powerpc/kernel/module.lds [new file with mode: 0644]
arch/powerpc/kernel/module_64.c
arch/powerpc/kernel/optprobes_head.S
arch/powerpc/kernel/paca.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/pci_dn.c
arch/powerpc/kernel/pci_of_scan.c
arch/powerpc/kernel/proc_powerpc.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/ptrace.c
arch/powerpc/kernel/rtas-proc.c
arch/powerpc/kernel/rtas_flash.c
arch/powerpc/kernel/rtasd.c
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/setup.h
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/signal_32.c
arch/powerpc/kernel/signal_64.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/sysfs.c
arch/powerpc/kernel/time.c
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/vdso64/gettimeofday.S
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/kernel/watchdog.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_ras.c
arch/powerpc/kvm/book3s_hv_rm_mmu.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_xics.c
arch/powerpc/lib/code-patching.c
arch/powerpc/lib/feature-fixups.c
arch/powerpc/mm/8xx_mmu.c
arch/powerpc/mm/Makefile
arch/powerpc/mm/drmem.c [new file with mode: 0644]
arch/powerpc/mm/dump_linuxpagetables.c
arch/powerpc/mm/fault.c
arch/powerpc/mm/hash64_4k.c
arch/powerpc/mm/hash64_64k.c
arch/powerpc/mm/hash_native_64.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/hugetlbpage-hash64.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/mm/init_64.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/mmu_context_book3s64.c
arch/powerpc/mm/numa.c
arch/powerpc/mm/pgtable-radix.c
arch/powerpc/mm/pgtable.c
arch/powerpc/mm/pgtable_32.c
arch/powerpc/mm/pgtable_64.c
arch/powerpc/mm/pkeys.c [new file with mode: 0644]
arch/powerpc/mm/subpage-prot.c
arch/powerpc/mm/tlb-radix.c
arch/powerpc/mm/tlb_nohash.c
arch/powerpc/perf/8xx-pmu.c
arch/powerpc/perf/Makefile
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/imc-pmu.c
arch/powerpc/platforms/44x/fsp2.c
arch/powerpc/platforms/44x/fsp2.h [new file with mode: 0644]
arch/powerpc/platforms/512x/mpc512x_shared.c
arch/powerpc/platforms/52xx/mpc52xx_gpt.c
arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
arch/powerpc/platforms/83xx/mpc832x_mds.c
arch/powerpc/platforms/83xx/mpc832x_rdb.c
arch/powerpc/platforms/83xx/mpc836x_mds.c
arch/powerpc/platforms/85xx/socrates_fpga_pic.c
arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
arch/powerpc/platforms/8xx/Kconfig
arch/powerpc/platforms/Kconfig
arch/powerpc/platforms/Kconfig.cputype
arch/powerpc/platforms/cell/interrupt.c
arch/powerpc/platforms/cell/setup.c
arch/powerpc/platforms/cell/spider-pic.c
arch/powerpc/platforms/cell/spu_manage.c
arch/powerpc/platforms/cell/spufs/file.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/cell/spufs/spufs.h
arch/powerpc/platforms/pasemi/dma_lib.c
arch/powerpc/platforms/powermac/backlight.c
arch/powerpc/platforms/powermac/feature.c
arch/powerpc/platforms/powermac/pic.c
arch/powerpc/platforms/powermac/smp.c
arch/powerpc/platforms/powernv/Makefile
arch/powerpc/platforms/powernv/eeh-powernv.c
arch/powerpc/platforms/powernv/npu-dma.c
arch/powerpc/platforms/powernv/ocxl.c [new file with mode: 0644]
arch/powerpc/platforms/powernv/opal-dump.c
arch/powerpc/platforms/powernv/opal-elog.c
arch/powerpc/platforms/powernv/opal-imc.c
arch/powerpc/platforms/powernv/opal-sysparam.c
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/powernv/pci.h
arch/powerpc/platforms/powernv/smp.c
arch/powerpc/platforms/ps3/device-init.c
arch/powerpc/platforms/ps3/mm.c
arch/powerpc/platforms/ps3/os-area.c
arch/powerpc/platforms/ps3/setup.c
arch/powerpc/platforms/pseries/cmm.c
arch/powerpc/platforms/pseries/eeh_pseries.c
arch/powerpc/platforms/pseries/firmware.c
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/hotplug-memory.c
arch/powerpc/platforms/pseries/hvCall_inst.c
arch/powerpc/platforms/pseries/ibmebus.c
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/platforms/pseries/lparcfg.c
arch/powerpc/platforms/pseries/mobility.c
arch/powerpc/platforms/pseries/of_helpers.c
arch/powerpc/platforms/pseries/pci.c
arch/powerpc/platforms/pseries/pseries_energy.c
arch/powerpc/platforms/pseries/reconfig.c
arch/powerpc/platforms/pseries/scanlog.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/pseries/suspend.c
arch/powerpc/sysdev/Makefile
arch/powerpc/sysdev/axonram.c [deleted file]
arch/powerpc/sysdev/cpm1.c
arch/powerpc/sysdev/cpm2.c
arch/powerpc/sysdev/cpm_common.c
arch/powerpc/sysdev/cpm_gpio.c [new file with mode: 0644]
arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
arch/powerpc/sysdev/fsl_pci.c
arch/powerpc/sysdev/mpic.c
arch/powerpc/sysdev/mpic_timer.c
arch/powerpc/sysdev/mv64x60_pci.c
arch/powerpc/sysdev/xics/icp-native.c
arch/powerpc/sysdev/xics/ics-opal.c
arch/powerpc/sysdev/xics/ics-rtas.c
arch/powerpc/sysdev/xics/xics-common.c
arch/powerpc/sysdev/xive/common.c
arch/powerpc/xmon/ppc-dis.c
arch/powerpc/xmon/xmon.c
drivers/cpuidle/cpuidle-powernv.c
drivers/cpuidle/cpuidle-pseries.c
drivers/macintosh/adb.c
drivers/macintosh/adbhid.c
drivers/macintosh/ams/ams-input.c
drivers/macintosh/therm_adt746x.c
drivers/macintosh/via-pmu-backlight.c
drivers/macintosh/windfarm_pm112.c
drivers/macintosh/windfarm_pm121.c
drivers/macintosh/windfarm_pm72.c
drivers/macintosh/windfarm_pm81.c
drivers/macintosh/windfarm_pm91.c
drivers/macintosh/windfarm_rm31.c
drivers/misc/Kconfig
drivers/misc/Makefile
drivers/misc/cxl/context.c
drivers/misc/cxl/cxl.h
drivers/misc/cxl/cxllib.c
drivers/misc/cxl/file.c
drivers/misc/cxl/native.c
drivers/misc/cxl/pci.c
drivers/misc/ocxl/Kconfig [new file with mode: 0644]
drivers/misc/ocxl/Makefile [new file with mode: 0644]
drivers/misc/ocxl/afu_irq.c [new file with mode: 0644]
drivers/misc/ocxl/config.c [new file with mode: 0644]
drivers/misc/ocxl/context.c [new file with mode: 0644]
drivers/misc/ocxl/file.c [new file with mode: 0644]
drivers/misc/ocxl/link.c [new file with mode: 0644]
drivers/misc/ocxl/main.c [new file with mode: 0644]
drivers/misc/ocxl/ocxl_internal.h [new file with mode: 0644]
drivers/misc/ocxl/pasid.c [new file with mode: 0644]
drivers/misc/ocxl/pci.c [new file with mode: 0644]
drivers/misc/ocxl/sysfs.c [new file with mode: 0644]
drivers/misc/ocxl/trace.c [new file with mode: 0644]
drivers/misc/ocxl/trace.h [new file with mode: 0644]
drivers/pci/hotplug/rpadlpar_core.c
drivers/pci/hotplug/rpadlpar_sysfs.c
drivers/pci/hotplug/rpaphp.h
drivers/pci/hotplug/rpaphp_core.c
drivers/pci/iov.c
drivers/pci/pcie/aer/aerdrv_core.c
drivers/ps3/ps3av.c
drivers/rtc/rtc-opal.c
include/linux/pci.h
include/misc/ocxl-config.h [new file with mode: 0644]
include/misc/ocxl.h [new file with mode: 0644]
include/uapi/linux/elf.h
include/uapi/misc/cxl.h
include/uapi/misc/ocxl.h [new file with mode: 0644]
tools/testing/selftests/powerpc/alignment/Makefile
tools/testing/selftests/powerpc/alignment/alignment_handler.c [new file with mode: 0644]
tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
tools/testing/selftests/powerpc/mm/.gitignore
tools/testing/selftests/powerpc/mm/Makefile
tools/testing/selftests/powerpc/mm/segv_errors.c [new file with mode: 0644]
tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
tools/testing/selftests/powerpc/tm/.gitignore
tools/testing/selftests/powerpc/tm/Makefile
tools/testing/selftests/powerpc/tm/tm-trap.c [new file with mode: 0644]
tools/testing/selftests/powerpc/tm/tm-unavailable.c

diff --git a/Documentation/ABI/testing/sysfs-class-ocxl b/Documentation/ABI/testing/sysfs-class-ocxl
new file mode 100644 (file)
index 0000000..b5b1fa1
--- /dev/null
@@ -0,0 +1,35 @@
+What:          /sys/class/ocxl/<afu name>/afu_version
+Date:          January 2018
+Contact:       linuxppc-dev@lists.ozlabs.org
+Description:   read only
+               Version of the AFU, in the format <major>:<minor>
+               Reflects what is read in the configuration space of the AFU
+
+What:          /sys/class/ocxl/<afu name>/contexts
+Date:          January 2018
+Contact:       linuxppc-dev@lists.ozlabs.org
+Description:   read only
+               Number of contexts for the AFU, in the format <n>/<max>
+               where:
+                       n:      number of currently active contexts, for debug
+                       max:    maximum number of contexts supported by the AFU
+
+What:          /sys/class/ocxl/<afu name>/pp_mmio_size
+Date:          January 2018
+Contact:       linuxppc-dev@lists.ozlabs.org
+Description:   read only
+               Size of the per-process mmio area, as defined in the
+               configuration space of the AFU
+
+What:          /sys/class/ocxl/<afu name>/global_mmio_size
+Date:          January 2018
+Contact:       linuxppc-dev@lists.ozlabs.org
+Description:   read only
+               Size of the global mmio area, as defined in the
+               configuration space of the AFU
+
+What:          /sys/class/ocxl/<afu name>/global_mmio_area
+Date:          January 2018
+Contact:       linuxppc-dev@lists.ozlabs.org
+Description:   read/write
+               Give access the global mmio area for the AFU
diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/accelerators/ocxl.rst
new file mode 100644 (file)
index 0000000..4f7af84
--- /dev/null
@@ -0,0 +1,160 @@
+========================================================
+OpenCAPI (Open Coherent Accelerator Processor Interface)
+========================================================
+
+OpenCAPI is an interface between processors and accelerators. It aims
+at being low-latency and high-bandwidth. The specification is
+developed by the `OpenCAPI Consortium <http://opencapi.org/>`_.
+
+It allows an accelerator (which could be a FPGA, ASICs, ...) to access
+the host memory coherently, using virtual addresses. An OpenCAPI
+device can also host its own memory, that can be accessed from the
+host.
+
+OpenCAPI is known in linux as 'ocxl', as the open, processor-agnostic
+evolution of 'cxl' (the driver for the IBM CAPI interface for
+powerpc), which was named that way to avoid confusion with the ISDN
+CAPI subsystem.
+
+
+High-level view
+===============
+
+OpenCAPI defines a Data Link Layer (DL) and Transaction Layer (TL), to
+be implemented on top of a physical link. Any processor or device
+implementing the DL and TL can start sharing memory.
+
+::
+
+  +-----------+                         +-------------+
+  |           |                         |             |
+  |           |                         | Accelerated |
+  | Processor |                         |  Function   |
+  |           |  +--------+             |    Unit     |  +--------+
+  |           |--| Memory |             |    (AFU)    |--| Memory |
+  |           |  +--------+             |             |  +--------+
+  +-----------+                         +-------------+
+       |                                       |
+  +-----------+                         +-------------+
+  |    TL     |                         |    TLX      |
+  +-----------+                         +-------------+
+       |                                       |
+  +-----------+                         +-------------+
+  |    DL     |                         |    DLX      |
+  +-----------+                         +-------------+
+       |                                       |
+       |                   PHY                 |
+       +---------------------------------------+
+
+
+
+Device discovery
+================
+
+OpenCAPI relies on a PCI-like configuration space, implemented on the
+device. So the host can discover AFUs by querying the config space.
+
+OpenCAPI devices in Linux are treated like PCI devices (with a few
+caveats). The firmware is expected to abstract the hardware as if it
+was a PCI link. A lot of the existing PCI infrastructure is reused:
+devices are scanned and BARs are assigned during the standard PCI
+enumeration. Commands like 'lspci' can therefore be used to see what
+devices are available.
+
+The configuration space defines the AFU(s) that can be found on the
+physical adapter, such as its name, how many memory contexts it can
+work with, the size of its MMIO areas, ...
+
+
+
+MMIO
+====
+
+OpenCAPI defines two MMIO areas for each AFU:
+
+* the global MMIO area, with registers pertinent to the whole AFU.
+* a per-process MMIO area, which has a fixed size for each context.
+
+
+
+AFU interrupts
+==============
+
+OpenCAPI includes the possibility for an AFU to send an interrupt to a
+host process. It is done through a 'intrp_req' defined in the
+Transaction Layer, specifying a 64-bit object handle which defines the
+interrupt.
+
+The driver allows a process to allocate an interrupt and obtain its
+64-bit object handle, that can be passed to the AFU.
+
+
+
+char devices
+============
+
+The driver creates one char device per AFU found on the physical
+device. A physical device may have multiple functions and each
+function can have multiple AFUs. At the time of this writing though,
+it has only been tested with devices exporting only one AFU.
+
+Char devices can be found in /dev/ocxl/ and are named as:
+/dev/ocxl/<AFU name>.<location>.<index>
+
+where <AFU name> is a max 20-character long name, as found in the
+config space of the AFU.
+<location> is added by the driver and can help distinguish devices
+when a system has more than one instance of the same OpenCAPI device.
+<index> is also to help distinguish AFUs in the unlikely case where a
+device carries multiple copies of the same AFU.
+
+
+
+Sysfs class
+===========
+
+An ocxl class is added for the devices representing the AFUs. See
+/sys/class/ocxl. The layout is described in
+Documentation/ABI/testing/sysfs-class-ocxl
+
+
+
+User API
+========
+
+open
+----
+
+Based on the AFU definition found in the config space, an AFU may
+support working with more than one memory context, in which case the
+associated char device may be opened multiple times by different
+processes.
+
+
+ioctl
+-----
+
+OCXL_IOCTL_ATTACH:
+
+  Attach the memory context of the calling process to the AFU so that
+  the AFU can access its memory.
+
+OCXL_IOCTL_IRQ_ALLOC:
+
+  Allocate an AFU interrupt and return an identifier.
+
+OCXL_IOCTL_IRQ_FREE:
+
+  Free a previously allocated AFU interrupt.
+
+OCXL_IOCTL_IRQ_SET_FD:
+
+  Associate an event fd to an AFU interrupt so that the user process
+  can be notified when the AFU sends an interrupt.
+
+
+mmap
+----
+
+A process can mmap the per-process MMIO area for interactions with the
+AFU.
index 417f9111001042bcf5348decba7dc933016257b2..e86bd2f6411780aed6b45841ba0dad569b4bc98f 100644 (file)
@@ -1309,7 +1309,7 @@ number and level/sense information. All interrupt children in an
 OpenPIC interrupt domain use 2 cells per interrupt in their interrupts
 property.
 
-The PCI bus binding specifies a #interrupt-cell value of 1 to encode
+The PCI bus binding specifies a #interrupt-cells value of 1 to encode
 which interrupt pin (INTA,INTB,INTC,INTD) is used.
 
 2) interrupt-parent property
index 3be3b266be41e8e537b2c2726eb7ad26a97b4dbe..70cb68bed2e88ef6cd102ef5590ad2b66f5cbf5c 100644 (file)
@@ -46,7 +46,6 @@ stall the CPU for an extended period, you should also not attempt to
 implement direct_access.
 
 These block devices may be used for inspiration:
-- axonram: Axon DDR2 device driver
 - brd: RAM backed block device driver
 - dcssblk: s390 dcss block device driver
 - pmem: NVDIMM persistent memory driver
index 3e3fdae5f3ed8ae19adbb669530919c07c140ad0..6501389d55b9ded0dd299a404f2a57f55fb1dd39 100644 (file)
@@ -326,6 +326,7 @@ Code  Seq#(hex)     Include File            Comments
 0xB5   00-0F   uapi/linux/rpmsg.h      <mailto:linux-remoteproc@vger.kernel.org>
 0xC0   00-0F   linux/usb/iowarrior.h
 0xCA   00-0F   uapi/misc/cxl.h
+0xCA   10-2F   uapi/misc/ocxl.h
 0xCA   80-BF   uapi/scsi/cxlflash_ioctl.h
 0xCB   00-1F   CBM serial IEC bus      in development:
                                        <mailto:michael.klein@puffin.lb.shuttle.de>
index f981c5678eebaaab3eda0c95fa13566c93cb0f6a..1dc846d0add689ba6c94d14c14c238da5e5549c2 100644 (file)
@@ -9909,6 +9909,18 @@ M:       Josh Poimboeuf <jpoimboe@redhat.com>
 S:     Supported
 F:     tools/objtool/
 
+OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
+M:     Frederic Barrat <fbarrat@linux.vnet.ibm.com>
+M:     Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+L:     linuxppc-dev@lists.ozlabs.org
+S:     Supported
+F:     arch/powerpc/platforms/powernv/ocxl.c
+F:     arch/powerpc/include/asm/pnv-ocxl.h
+F:     drivers/misc/ocxl/
+F:     include/misc/ocxl*
+F:     include/uapi/misc/ocxl.h
+F:     Documentation/accelerators/ocxl.txt
+
 OMAP AUDIO SUPPORT
 M:     Peter Ujfalusi <peter.ujfalusi@ti.com>
 M:     Jarkko Nikula <jarkko.nikula@bitmer.com>
index 73fcf592ee9143703484b27f7cc65ef702217b4a..9d3329811cc17f7a1d9a4ca8941b9462cf531047 100644 (file)
@@ -143,6 +143,7 @@ config PPC
        select ARCH_HAS_PMEM_API                if PPC64
        select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE
        select ARCH_HAS_SG_CHAIN
+       select ARCH_HAS_STRICT_KERNEL_RWX       if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
        select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAS_UACCESS_FLUSHCACHE      if PPC64
        select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -150,6 +151,7 @@ config PPC
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select ARCH_MIGHT_HAVE_PC_PARPORT
        select ARCH_MIGHT_HAVE_PC_SERIO
+       select ARCH_OPTIONAL_KERNEL_RWX         if ARCH_HAS_STRICT_KERNEL_RWX
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_USE_BUILTIN_BSWAP
        select ARCH_USE_CMPXCHG_LOCKREF         if PPC64
@@ -180,8 +182,6 @@ config PPC
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if COMPAT
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
-       select ARCH_HAS_STRICT_KERNEL_RWX       if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
-       select ARCH_OPTIONAL_KERNEL_RWX         if ARCH_HAS_STRICT_KERNEL_RWX
        select HAVE_CBPF_JIT                    if !PPC64
        select HAVE_CONTEXT_TRACKING            if PPC64
        select HAVE_DEBUG_KMEMLEAK
@@ -868,6 +868,21 @@ config SECCOMP
 
          If unsure, say Y. Only embedded should say N here.
 
+config PPC_MEM_KEYS
+       prompt "PowerPC Memory Protection Keys"
+       def_bool y
+       depends on PPC_BOOK3S_64
+       select ARCH_USES_HIGH_VMA_FLAGS
+       select ARCH_HAS_PKEYS
+       help
+         Memory Protection Keys provides a mechanism for enforcing
+         page-based protections, but without requiring modification of the
+         page tables when an application changes protection domains.
+
+         For details, see Documentation/vm/protection-keys.txt
+
+         If unsure, say y.
+
 endmenu
 
 config ISA_DMA_API
index 657c33cd4eee050fd22aabc15e22499d6deac896..c45424c64e1963c3017981a601fb558be6686269 100644 (file)
@@ -90,6 +90,10 @@ config MSI_BITMAP_SELFTEST
        depends on DEBUG_KERNEL
        default n
 
+config PPC_IRQ_SOFT_MASK_DEBUG
+       bool "Include extra checks for powerpc irq soft masking"
+       default n
+
 config XMON
        bool "Include xmon kernel debugger"
        depends on DEBUG_KERNEL
@@ -368,7 +372,7 @@ config PPC_PTDUMP
 
 config PPC_HTDUMP
        def_bool y
-       depends on PPC_PTDUMP && PPC_BOOK3S
+       depends on PPC_PTDUMP && PPC_BOOK3S_64
 
 config PPC_FAST_ENDIAN_SWITCH
        bool "Deprecated fast endian-switch syscall"
index 1381693a4a51bee5b1f37290b604f4df1588b845..ccd2556bdb530db23d6a1ba9f1ffff907d62fb59 100644 (file)
@@ -63,6 +63,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y))
 ifdef CONFIG_PPC32
 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
 else
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds
 ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
 # Have the linker provide sfpr if possible.
 # There is a corresponding test in arch/powerpc/lib/Makefile
index 08782f55b89ff491383adaa1bbdb6eee8490020b..ef6549e5715717003bf3fe9fc3a3c869e1fd5b2f 100644 (file)
@@ -108,10 +108,10 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
                $(libfdt) libfdt-wrapper.c \
                ns16550.c serial.c simple_alloc.c div64.S util.S \
                elf_util.c $(zlib-y) devtree.c stdlib.c \
-               oflib.c ofconsole.c cuboot.c cpm-serial.c \
-               uartlite.c opal.c
+               oflib.c ofconsole.c cuboot.c
+
 src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c
-src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) +=  opal-calls.S
+src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c
 ifndef CONFIG_PPC64_BOOT_WRAPPER
 src-wlib-y += crtsavres.S
 endif
@@ -120,6 +120,8 @@ src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
 src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
 src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
 src-wlib-$(CONFIG_EMBEDDED6xx) += mpsc.c mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c
+src-wlib-$(CONFIG_CPM) += cpm-serial.c
 
 src-plat-y := of.c epapr.c
 src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
index bf81b8f9704c9c8a094e78ab8c296758a0eab366..187ce458d03a802751da4a5dc41f594660295164 100644 (file)
                        reg = <0 0x0 0x02000000>;
                        compatible = "cfi-flash";
                        bank-width = <2>;
-                       partition@0x0 {
+                       partition@0 {
                                label = "u-boot";
                                reg = <0x00000000 0x00040000>;
                                read-only;
                        };
-                       partition@0x00040000 {
+                       partition@40000 {
                                label = "env";
                                reg = <0x00040000 0x00020000>;
                        };
-                       partition@0x00060000 {
+                       partition@60000 {
                                label = "dtb";
                                reg = <0x00060000 0x00020000>;
                        };
-                       partition@0x00080000 {
+                       partition@80000 {
                                label = "kernel";
                                reg = <0x00080000 0x00500000>;
                        };
-                       partition@0x00580000 {
+                       partition@580000 {
                                label = "root";
                                reg = <0x00580000 0x00A80000>;
                        };
index e61d5dc598c115b5aa5f8678d4e0b90be5bf7b81..746779202a122b3b504a8c751eff8ba7269f18ce 100644 (file)
                                interrupts = <39 2>;
                        };
 
-                       IIC0: i2c@00000000 {
+                       IIC0: i2c@0 {
                                compatible = "ibm,iic-476gtr", "ibm,iic";
                                reg = <0x0 0x00000020>;
                                interrupt-parent = <&MPIC>;
                                };
                        };
 
-                       IIC1: i2c@00000100 {
+                       IIC1: i2c@100 {
                                compatible = "ibm,iic-476gtr", "ibm,iic";
                                reg = <0x100 0x00000020>;
                                interrupt-parent = <&MPIC>;
index 1e32903cb0a891391b846dd6ecc51c2b48dd69be..27f169e3ade947a769e74bb34c0d8e07acd5cec4 100644 (file)
                        >;
                };
 
-               cpu-error@0070 {
+               cpu-error@70 {
                        compatible = "marvell,mv64360-cpu-error";
                        reg = <0x0070 0x10 0x0128 0x28>;
                        interrupts = <3>;
                        interrupt-parent = <&PIC>;
                };
 
-               sram-ctrl@0380 {
+               sram-ctrl@380 {
                        compatible = "marvell,mv64360-sram-ctrl";
                        reg = <0x0380 0x80>;
                        interrupts = <13>;
                        interrupt-parent = <&PIC>;
                };
                /* Devices attached to the device controller */
-               devicebus@045c {
+               devicebus@45c {
                        #address-cells = <2>;
                        #size-cells = <1>;
                        compatible = "marvell,mv64306-devctrl";
index 4191e1850ea1719929eb80e97e81f90ab5400057..f2ad5815f08d7a241e89dc1c03bb6960448bd486 100644 (file)
                                reg = <0x50000000 0x4>;
                        };
 
-                       IIC0: i2c@00000000 {
+                       IIC0: i2c@0 {
                                compatible = "ibm,iic-currituck", "ibm,iic";
                                reg = <0x0 0x00000014>;
                                interrupt-parent = <&MPIC>;
index 01706a3396031e3a3f2398b1cb2be464736d5297..bc3e8039bdc7a2b0d492d24fcabf08ab463f17e9 100644 (file)
                par_io@e0100 {
                        num-ports = <7>;
 
-                       pio1: ucc_pin@01 {
+                       pio1: ucc_pin@1 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0x4  0xa  0x1  0x0  0x2  0x0    /* TxD0 */
                                        0x1  0x1f  0x2  0x0  0x3  0x0>; /* GTX125 */
                        };
 
-                       pio2: ucc_pin@02 {
+                       pio2: ucc_pin@2 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0x5  0xa 0x1  0x0  0x2  0x0   /* TxD0 */
 
                        /* These are the same PHYs as on
                         * gianfar's MDIO bus */
-                       qe_phy0: ethernet-phy@07 {
+                       qe_phy0: ethernet-phy@7 {
                                interrupt-parent = <&mpic>;
                                interrupts = <1 1 0 0>;
                                reg = <0x7>;
                        };
-                       qe_phy1: ethernet-phy@01 {
+                       qe_phy1: ethernet-phy@1 {
                                interrupt-parent = <&mpic>;
                                interrupts = <2 1 0 0>;
                                reg = <0x1>;
                        };
-                       qe_phy2: ethernet-phy@02 {
+                       qe_phy2: ethernet-phy@2 {
                                interrupt-parent = <&mpic>;
                                interrupts = <1 1 0 0>;
                                reg = <0x2>;
                        };
-                       qe_phy3: ethernet-phy@03 {
+                       qe_phy3: ethernet-phy@3 {
                                interrupt-parent = <&mpic>;
                                interrupts = <2 1 0 0>;
                                reg = <0x3>;
index 76b2bd6f77422e16e4089e267cdb9e6c5c651efe..d8367ceddea6634d4bcbff613a21786cd7fe9f59 100644 (file)
                                gpio-controller;
                        };
 
-                       pio1: ucc_pin@01 {
+                       pio1: ucc_pin@1 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0x2  0x1f 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                                        0x2  0x14 0x1  0x0  0x2  0x0>;  /* ENET1_GTXCLK */
                        };
 
-                       pio2: ucc_pin@02 {
+                       pio2: ucc_pin@2 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0x2  0x1f 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                                        0x2  0x2 0x1  0x0  0x2  0x0>;   /* ENET2_GTXCLK */
                        };
 
-                       pio3: ucc_pin@03 {
+                       pio3: ucc_pin@3 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0x2  0x1f 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                                        0x2  0x19 0x1  0x0  0x2  0x0>;  /* ENET3_GTXCLK */
                        };
 
-                       pio4: ucc_pin@04 {
+                       pio4: ucc_pin@4 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0x2  0x1f 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                        reg = <0x2120 0x18>;
                        compatible = "fsl,ucc-mdio";
 
-                       qe_phy0: ethernet-phy@07 {
+                       qe_phy0: ethernet-phy@7 {
                                interrupt-parent = <&mpic>;
                                interrupts = <1 1 0 0>;
                                reg = <0x7>;
                        };
-                       qe_phy1: ethernet-phy@01 {
+                       qe_phy1: ethernet-phy@1 {
                                interrupt-parent = <&mpic>;
                                interrupts = <2 1 0 0>;
                                reg = <0x1>;
                        };
-                       qe_phy2: ethernet-phy@02 {
+                       qe_phy2: ethernet-phy@2 {
                                interrupt-parent = <&mpic>;
                                interrupts = <3 1 0 0>;
                                reg = <0x2>;
                        };
-                       qe_phy3: ethernet-phy@03 {
+                       qe_phy3: ethernet-phy@3 {
                                interrupt-parent = <&mpic>;
                                interrupts = <4 1 0 0>;
                                reg = <0x3>;
                        };
-                       qe_phy5: ethernet-phy@04 {
+                       qe_phy5: ethernet-phy@4 {
                                reg = <0x04>;
                        };
-                       qe_phy7: ethernet-phy@06 {
+                       qe_phy7: ethernet-phy@6 {
                                reg = <0x6>;
                        };
                        tbi1: tbi-phy@11 {
index 291454c75ddad117d1d4345d992116bff7cd3cfb..1047802f4d2acc6e98693102a373bf365b8b3436 100644 (file)
                        ranges = <0x0 0xe0100 0x60>;
                        device_type = "par_io";
                        num-ports = <3>;
-                       pio1: ucc_pin@01 {
+                       pio1: ucc_pin@1 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                                        0x0  0x10 0x2  0x0  0x2  0x0>;    /* ENET1_COL */
                        };
 
-                       pio2: ucc_pin@02 {
+                       pio2: ucc_pin@2 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                                interrupts = <4 1 0 0>;
                                reg = <0x0>;
                        };
-                       qe_phy1: ethernet-phy@03 {
+                       qe_phy1: ethernet-phy@3 {
                                interrupt-parent = <&mpic>;
                                interrupts = <5 1 0 0>;
                                reg = <0x3>;
index d44bb12debb01052e6d75ceaa60182c849f2101c..0a5434a631c326f02435d90a88c7bd33da8da7e0 100644 (file)
                ranges = <0x0 0xe0100 0x60>;
                device_type = "par_io";
                num-ports = <3>;
-               pio1: ucc_pin@01 {
+               pio1: ucc_pin@1 {
                        pio-map = <
                /* port  pin  dir  open_drain  assignment  has_irq */
                                0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                                0x0  0x10 0x2  0x0  0x2  0x0>;    /* ENET1_COL */
                };
 
-               pio2: ucc_pin@02 {
+               pio2: ucc_pin@2 {
                        pio-map = <
                /* port  pin  dir  open_drain  assignment  has_irq */
                                0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                                0x1  0x8  0x2  0x0  0x2  0x0>;    /* ENET5_RX_ER_SER5_CD_B */
                };
 
-               pio3: ucc_pin@03 {
+               pio3: ucc_pin@3 {
                        pio-map = <
                /* port  pin  dir  open_drain  assignment  has_irq */
                                0x0  0x16 0x2  0x0  0x2  0x0    /* SER7_CD_B*/
                                0x0  0x15 0x1  0x0  0x2  0x0>;    /* SER7_TXD0*/
                };
 
-               pio4: ucc_pin@04 {
+               pio4: ucc_pin@4 {
                        pio-map = <
                /* port  pin  dir  open_drain  assignment  has_irq */
                                0x1  0x0  0x2  0x0  0x2  0x0    /* SER3_CD_B*/
index b15acbaea34b5296816efa6ee5a6c02836c06233..ea33b57f8774304e7adecb842ab4793881cf1d18 100644 (file)
                                interrupts = <4 1 0 0>;
                                reg = <0x6>;
                        };
-                       qe_phy1: ethernet-phy@03 {
+                       qe_phy1: ethernet-phy@3 {
                                interrupt-parent = <&mpic>;
                                interrupts = <5 1 0 0>;
                                reg = <0x3>;
index 08816fb474f5d552dec09b381839bcf7278e221b..ab75b8f29ae23360deff65e1e2de5e35c2f04069 100644 (file)
                ranges = <0x0 0xe0100 0x60>;
                device_type = "par_io";
                num-ports = <3>;
-               pio1: ucc_pin@01 {
+               pio1: ucc_pin@1 {
                        pio-map = <
                /* port  pin  dir  open_drain  assignment  has_irq */
                                0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                                0x0  0x10 0x2  0x0  0x2  0x0>;    /* ENET1_COL */
                };
 
-               pio2: ucc_pin@02 {
+               pio2: ucc_pin@2 {
                        pio-map = <
                /* port  pin  dir  open_drain  assignment  has_irq */
                                0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
                                0x1  0x8  0x2  0x0  0x2  0x0>;    /* ENET5_RX_ER_SER5_CD_B */
                };
 
-               pio3: ucc_pin@03 {
+               pio3: ucc_pin@3 {
                        pio-map = <
                /* port  pin  dir  open_drain  assignment  has_irq */
                                0x0  0x16 0x2  0x0  0x2  0x0    /* SER7_CD_B*/
                                0x0  0x15 0x1  0x0  0x2  0x0>;    /* SER7_TXD0*/
                };
 
-               pio4: ucc_pin@04 {
+               pio4: ucc_pin@4 {
                        pio-map = <
                /* port  pin  dir  open_drain  assignment  has_irq */
                                0x1  0x0  0x2  0x0  0x2  0x0    /* SER3_CD_B*/
index 621f2c6ee6ad51829f764f103e9f177079c46ed4..65ff34c4902569650fa2957f1fd62e2514b84eef 100644 (file)
@@ -61,7 +61,7 @@
                        };
 
                        mdio@fc000 {
-                               phy_sgmii_2: ethernet-phy@03 {
+                               phy_sgmii_2: ethernet-phy@3 {
                                        reg = <0x03>;
                                };
                        };
index fcd2aeb5b8ac8279efdc81f30c6ecb785d6962b6..4fa15f48a4c39ed79b4960942a74f1d97bff8388 100644 (file)
                        };
 
                        mdio0: mdio@fc000 {
-                               phy_sgmii_0: ethernet-phy@02 {
+                               phy_sgmii_0: ethernet-phy@2 {
                                        reg = <0x02>;
                                };
 
-                               phy_sgmii_1: ethernet-phy@03 {
+                               phy_sgmii_1: ethernet-phy@3 {
                                        reg = <0x03>;
                                };
 
-                               phy_sgmii_2: ethernet-phy@01 {
+                               phy_sgmii_2: ethernet-phy@1 {
                                        reg = <0x01>;
                                };
 
-                               phy_rgmii_0: ethernet-phy@04 {
+                               phy_rgmii_0: ethernet-phy@4 {
                                        reg = <0x04>;
                                };
 
-                               phy_rgmii_1: ethernet-phy@05 {
+                               phy_rgmii_1: ethernet-phy@5 {
                                        reg = <0x05>;
                                };
                        };
index 2c138627b1b4344a7292e18144252363848fcb1e..3ebb712224cbe02e8990c4548a515aea6894fa72 100644 (file)
@@ -59,7 +59,7 @@
                        };
 
                        mdio@fc000 {
-                               phy_sgmii_2: ethernet-phy@03 {
+                               phy_sgmii_2: ethernet-phy@3 {
                                        reg = <0x03>;
                                };
                        };
index 5fdddbd2a62b29df17994e8b45e70e82c0e4d4f1..099a598c74c00dda112ace336ab970d6c4e709a9 100644 (file)
                        };
 
                        mdio0: mdio@fc000 {
-                               phy_sgmii_2: ethernet-phy@03 {
+                               phy_sgmii_2: ethernet-phy@3 {
                                        reg = <0x03>;
                                };
 
-                               phy_rgmii_0: ethernet-phy@01 {
+                               phy_rgmii_0: ethernet-phy@1 {
                                        reg = <0x01>;
                                };
 
-                               phy_rgmii_1: ethernet-phy@02 {
+                               phy_rgmii_1: ethernet-phy@2 {
                                        reg = <0x02>;
                                };
                        };
index f10a64aeb83bd64046b0147f33e9a71baf174f18..6560283c5aecd55b48671bffd1e1567e261cc619 100644 (file)
                        };
                };
 
-               OHCI1: ohci@02040000 {
+               OHCI1: ohci@2040000 {
                        compatible = "ohci-le";
                        reg = <0x02040000 0xa0>;
                        interrupt-parent = <&UIC1_3>;
                        interrupts = <28 0x8 29 0x8>;
                };
 
-               OHCI2: ohci@02080000 {
+               OHCI2: ohci@2080000 {
                        compatible = "ohci-le";
                        reg = <0x02080000 0xa0>;
                        interrupt-parent = <&UIC1_3>;
                        interrupts = <30 0x8 31 0x8>;
                };
 
-               EHCI: ehci@02000000 {
+               EHCI: ehci@2000000 {
                        compatible = "usb-ehci";
                        reg = <0x02000000 0xa4>;
                        interrupt-parent = <&UIC1_3>;
index ef3be0e58b028270a23f074f4afd8a12424fff19..58d06c9ee08b806793e96ce5005ea38b12fe902d 100644 (file)
                ranges = <0x0c000000 0x0c000000 0x00010000>;
                interrupt-parent = <&PIC>;
 
-               video@0c002000 {
+               video@c002000 {
                        compatible = "nintendo,flipper-vi";
                        reg = <0x0c002000 0x100>;
                        interrupts = <8>;
                };
 
-               processor-interface@0c003000 {
+               processor-interface@c003000 {
                        compatible = "nintendo,flipper-pi";
                        reg = <0x0c003000 0x100>;
 
@@ -71,7 +71,7 @@
                        };
                };
 
-               dsp@0c005000 {
+               dsp@c005000 {
                        #address-cells = <1>;
                        #size-cells = <1>;
                        compatible = "nintendo,flipper-dsp";
                        };
                };
 
-               disk@0c006000 {
+               disk@c006000 {
                        compatible = "nintendo,flipper-di";
                        reg = <0x0c006000 0x40>;
                        interrupts = <2>;
                };
 
-               audio@0c006c00 {
+               audio@c006c00 {
                        compatible = "nintendo,flipper-ai";
                        reg = <0x0c006c00 0x20>;
                        interrupts = <6>;
                };
 
-               gamepad-controller@0c006400 {
+               gamepad-controller@c006400 {
                        compatible = "nintendo,flipper-si";
                        reg = <0x0c006400 0x100>;
                        interrupts = <3>;
                };
 
                /* External Interface bus */
-               exi@0c006800 {
+               exi@c006800 {
                        compatible = "nintendo,flipper-exi";
                        reg = <0x0c006800 0x40>;
                        virtual-reg = <0x0c006800>;
index 2b256694eca6b1c48d63dd31f3ca532eb8b741c8..cb16dad43c9223a925b9f3c912b10ad636f547bb 100644 (file)
                        };
                };
 
-               PCIE0: pciex@0a0000000 {
+               PCIE0: pciex@a0000000 {
                        device_type = "pci";
                        #interrupt-cells = <1>;
                        #size-cells = <2>;
index 5ba7f01e2a297878251001d427bf498bdd94397c..2a3413221cc1f35150a36637449cb7b8f6ad763a 100644 (file)
                        };
                };
 
-               PCIE0: pciex@0a0000000 {
+               PCIE0: pciex@a0000000 {
                        device_type = "pci";
                        #interrupt-cells = <1>;
                        #size-cells = <2>;
                                0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
                };
 
-               PCIE1: pciex@0c0000000 {
+               PCIE1: pciex@c0000000 {
                        device_type = "pci";
                        #interrupt-cells = <1>;
                        #size-cells = <2>;
index 983aee185793282529c7fcd050808d6c731104f0..9fa33d9ba966d22bc4c2e2dd956f3e12c10f1bcf 100644 (file)
                                compatible = "fsl,ucc-mdio";
 
                                /* Piggy2 (UCC4, MDIO 0x00, RMII) */
-                               phy_piggy2: ethernet-phy@00 {
+                               phy_piggy2: ethernet-phy@0 {
                                        reg = <0x0>;
                                };
 
                                /* Eth-1 (UCC5, MDIO 0x08, RMII) */
-                               phy_eth1: ethernet-phy@08 {
+                               phy_eth1: ethernet-phy@8 {
                                        reg = <0x08>;
                                };
 
                                /* Eth-2 (UCC6, MDIO 0x09, RMII) */
-                               phy_eth2: ethernet-phy@09 {
+                               phy_eth2: ethernet-phy@9 {
                                        reg = <0x09>;
                                };
 
                                /* Eth-3 (UCC7, MDIO 0x0a, RMII) */
-                               phy_eth3: ethernet-phy@0a {
+                               phy_eth3: ethernet-phy@a {
                                        reg = <0x0a>;
                                };
 
                                /* Eth-4 (UCC8, MDIO 0x0b, RMII) */
-                               phy_eth4: ethernet-phy@0b {
+                               phy_eth4: ethernet-phy@b {
                                        reg = <0x0b>;
                                };
 
index 63d48b632c84c8f3a2c9382eaddf578b0791338e..bf8fe16293924a15c5047c86cd924cb13cce8109 100644 (file)
                        };
                };
 
-               PCIE0: pciex@0a0000000 {
+               PCIE0: pciex@a0000000 {
                        device_type = "pci";
                        #interrupt-cells = <1>;
                        #size-cells = <2>;
                                0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
                };
 
-               PCIE1: pciex@0c0000000 {
+               PCIE1: pciex@c0000000 {
                        device_type = "pci";
                        #interrupt-cells = <1>;
                        #size-cells = <2>;
index 0793cdf0d46e49a8bef08ffd0d29e537a947c9d4..49c7d657118aac0fb57fa37f819f3c786f77f17f 100644 (file)
                        device_type = "par_io";
                        num-ports = <7>;
 
-                       pio3: ucc_pin@03 {
+                       pio3: ucc_pin@3 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        3  4  3  0  2  0  /* MDIO */
                                        1 12  1  0  1  0        /* TX_EN */
                                        1 13  2  0  1  0>;      /* CRS */
                        };
-                       pio4: ucc_pin@04 {
+                       pio4: ucc_pin@4 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        3 31  2  0  1  0        /* RX_CLK (CLK7) */
                                        1 30  1  0  1  0        /* TX_EN */
                                        1 31  2  0  1  0>;      /* CRS */
                        };
-                       pio5: ucc_pin@05 {
+                       pio5: ucc_pin@5 {
                                pio-map = <
                                /*
                                 *                    open       has
                        reg = <0x2320 0x18>;
                        compatible = "fsl,ucc-mdio";
 
-                       phy3: ethernet-phy@03 {
+                       phy3: ethernet-phy@3 {
                                interrupt-parent = <&ipic>;
                                interrupts = <17 0x8>;
                                reg = <0x3>;
                        };
-                       phy4: ethernet-phy@04 {
+                       phy4: ethernet-phy@4 {
                                interrupt-parent = <&ipic>;
                                interrupts = <18 0x8>;
                                reg = <0x4>;
index 91df1eb166673c8e9be80934c3fb2be1a8e54381..647cae14c16dfb8464320af7c621c4b4b4b020ad 100644 (file)
                                gpio-controller;
                        };
 
-                       ucc2pio:ucc_pin@02 {
+                       ucc2pio:ucc_pin@2 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        3  4  3  0  2  0        /* MDIO */
                                        0 30  1  0  1  0        /* TX_EN */
                                        0 31  2  0  1  0>;      /* CRS */
                        };
-                       ucc3pio:ucc_pin@03 {
+                       ucc3pio:ucc_pin@3 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0 13  2  0  1  0        /* RX_CLK (CLK9) */
                        reg = <0x3120 0x18>;
                        compatible = "fsl,ucc-mdio";
 
-                       phy00:ethernet-phy@00 {
+                       phy00:ethernet-phy@0 {
                                interrupt-parent = <&ipic>;
                                interrupts = <0>;
                                reg = <0x0>;
                        };
-                       phy04:ethernet-phy@04 {
+                       phy04:ethernet-phy@4 {
                                interrupt-parent = <&ipic>;
                                interrupts = <0>;
                                reg = <0x4>;
index ecb6ccd3a6aad7e2dc03daf6a577b7a6fb76b4e4..539fd9f72eda804ef2d524912bb7f7cbac05d1bf 100644 (file)
                                gpio-controller;
                        };
 
-                       pio1: ucc_pin@01 {
+                       pio1: ucc_pin@1 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0  3  1  0  1  0        /* TxD0 */
                                        2  9  1  0  3  0        /* GTX_CLK - CLK10 */
                                        2  8  2  0  1  0>;      /* GTX125 - CLK9 */
                        };
-                       pio2: ucc_pin@02 {
+                       pio2: ucc_pin@2 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                                        0  17 1  0  1  0   /* TxD0 */
                        reg = <0x2120 0x18>;
                        compatible = "fsl,ucc-mdio";
 
-                       phy0: ethernet-phy@00 {
+                       phy0: ethernet-phy@0 {
                                interrupt-parent = <&ipic>;
                                interrupts = <17 0x8>;
                                reg = <0x0>;
                        };
-                       phy1: ethernet-phy@01 {
+                       phy1: ethernet-phy@1 {
                                interrupt-parent = <&ipic>;
                                interrupts = <18 0x8>;
                                reg = <0x1>;
index 0b38a0defd2ce4ef607f7cae4390db36f6d8591b..8967a56adad4c5248b741c3d60361ab80fff69a3 100644 (file)
                        compatible = "intel,JS28F128", "cfi-flash";
                        bank-width = <4>;
                        device-width = <1>;
-                       partition@0x0 {
+                       partition@0 {
                                label = "space";
                                /* FC000000 -> FFEFFFFF */
                                reg = <0x00000000 0x03f00000>;
                        };
-                       partition@0x03f00000 {
+                       partition@3f00000 {
                                label = "bootloader";
                                /* FFF00000 -> FFFFFFFF */
                                reg = <0x03f00000 0x00100000>;
                        reg = <0x6 0x0 0x800000>;
                        bank-width = <1>;
                        device-width = <1>;
-                       partition@0x0 {
+                       partition@0 {
                                label = "space";
                                /* EF800000 -> EFF9FFFF */
                                reg = <0x00000000 0x007a0000>;
                        };
-                       partition@0x7a0000 {
+                       partition@7a0000 {
                                label = "bootloader";
                                /* EFFA0000 -> EFFFFFFF */
                                reg = <0x007a0000 0x00060000>;
index 1df2a0955668d280195cb79700bedc16f097ffdc..9bdb828a504e6f65ce593e993b28f5d636cec134 100644 (file)
                        reg = <0x0 0x0 0x800000>;
                        bank-width = <1>;
                        device-width = <1>;
-                       partition@0x0 {
+                       partition@0 {
                                label = "space";
                                /* FF800000 -> FFF9FFFF */
                                reg = <0x00000000 0x007a0000>;
                        };
-                       partition@0x7a0000 {
+                       partition@7a0000 {
                                label = "bootloader";
                                /* FFFA0000 -> FFFFFFFF */
                                reg = <0x007a0000 0x00060000>;
                        compatible = "intel,JS28F128", "cfi-flash";
                        bank-width = <4>;
                        device-width = <1>;
-                       partition@0x0 {
+                       partition@0 {
                                label = "space";
                                /* EC000000 -> EFEFFFFF */
                                reg = <0x00000000 0x03f00000>;
                        };
-                       partition@0x03f00000 {
+                       partition@3f00000 {
                                label = "bootloader";
                                /* EFF00000 -> EFFFFFFF */
                                reg = <0x03f00000 0x00100000>;
index 77528c9a8dbd4efa2ad5506304b814d05ab2935f..17a5babb098d426b54dc5eb01933c738b748b2c6 100644 (file)
                          0x0d800000 0x0d800000 0x00800000>;
                interrupt-parent = <&PIC0>;
 
-               video@0c002000 {
+               video@c002000 {
                        compatible = "nintendo,hollywood-vi",
                                        "nintendo,flipper-vi";
                        reg = <0x0c002000 0x100>;
                        interrupts = <8>;
                };
 
-               processor-interface@0c003000 {
+               processor-interface@c003000 {
                        compatible = "nintendo,hollywood-pi",
                                        "nintendo,flipper-pi";
                        reg = <0x0c003000 0x100>;
@@ -84,7 +84,7 @@
                        };
                };
 
-               dsp@0c005000 {
+               dsp@c005000 {
                        #address-cells = <1>;
                        #size-cells = <1>;
                        compatible = "nintendo,hollywood-dsp",
                        interrupts = <6>;
                };
 
-               gamepad-controller@0d006400 {
+               gamepad-controller@d006400 {
                        compatible = "nintendo,hollywood-si",
                                        "nintendo,flipper-si";
                        reg = <0x0d006400 0x100>;
                        interrupts = <3>;
                };
 
-               audio@0c006c00 {
+               audio@c006c00 {
                        compatible = "nintendo,hollywood-ai",
                                        "nintendo,flipper-ai";
                        reg = <0x0d006c00 0x20>;
                };
 
                /* External Interface bus */
-               exi@0d006800 {
+               exi@d006800 {
                        compatible = "nintendo,hollywood-exi",
                                        "nintendo,flipper-exi";
                        reg = <0x0d006800 0x40>;
                        interrupts = <4>;
                };
 
-               usb@0d040000 {
+               usb@d040000 {
                        compatible = "nintendo,hollywood-usb-ehci",
                                        "usb-ehci";
                        reg = <0x0d040000 0x100>;
                        interrupt-parent = <&PIC1>;
                };
 
-               usb@0d050000 {
+               usb@d050000 {
                        compatible = "nintendo,hollywood-usb-ohci",
                                        "usb-ohci";
                        reg = <0x0d050000 0x100>;
                        interrupt-parent = <&PIC1>;
                };
 
-               usb@0d060000 {
+               usb@d060000 {
                        compatible = "nintendo,hollywood-usb-ohci",
                                        "usb-ohci";
                        reg = <0x0d060000 0x100>;
                        interrupt-parent = <&PIC1>;
                };
 
-               sd@0d070000 {
+               sd@d070000 {
                        compatible = "nintendo,hollywood-sdhci",
                                        "sdhci";
                        reg = <0x0d070000 0x200>;
                        interrupt-parent = <&PIC1>;
                };
 
-               sdio@0d080000 {
+               sdio@d080000 {
                        compatible = "nintendo,hollywood-sdhci",
                                        "sdhci";
                        reg = <0x0d080000 0x200>;
                        interrupt-parent = <&PIC1>;
                };
 
-               ipc@0d000000 {
+               ipc@d000000 {
                        compatible = "nintendo,hollywood-ipc";
                        reg = <0x0d000000 0x10>;
                        interrupts = <30>;
                        interrupt-parent = <&PIC1>;
                };
 
-               PIC1: pic1@0d800030 {
+               PIC1: pic1@d800030 {
                        #interrupt-cells = <1>;
                        compatible = "nintendo,hollywood-pic";
                        reg = <0x0d800030 0x10>;
                        interrupts = <14>;
                };
 
-               GPIO: gpio@0d8000c0 {
+               GPIO: gpio@d8000c0 {
                        #gpio-cells = <2>;
                        compatible = "nintendo,hollywood-gpio";
                        reg = <0x0d8000c0 0x40>;
                        */
                };
 
-               control@0d800100 {
+               control@d800100 {
                        compatible = "nintendo,hollywood-control";
                        reg = <0x0d800100 0x300>;
                };
 
-               disk@0d806000 {
+               disk@d806000 {
                        compatible = "nintendo,hollywood-di";
                        reg = <0x0d806000 0x40>;
                        interrupts = <2>;
index 7b5c02b1afd00987ad1bdf3336480de0f198d1e1..88955095ec07d0cc46d4074ef6b9b6c61e7c4863 100644 (file)
@@ -124,20 +124,26 @@ int serial_console_init(void)
        else if (dt_is_compatible(devp, "marvell,mv64360-mpsc"))
                rc = mpsc_console_init(devp, &serial_cd);
 #endif
+#ifdef CONFIG_CPM
        else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") ||
                 dt_is_compatible(devp, "fsl,cpm1-smc-uart") ||
                 dt_is_compatible(devp, "fsl,cpm2-scc-uart") ||
                 dt_is_compatible(devp, "fsl,cpm2-smc-uart"))
                rc = cpm_console_init(devp, &serial_cd);
+#endif
 #ifdef CONFIG_PPC_MPC52XX
        else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
                rc = mpc5200_psc_console_init(devp, &serial_cd);
 #endif
+#ifdef CONFIG_XILINX_VIRTEX
        else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") ||
                 dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a"))
                rc = uartlite_console_init(devp, &serial_cd);
+#endif
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
        else if (dt_is_compatible(devp, "ibm,opal-console-raw"))
                rc = opal_console_init(devp, &serial_cd);
+#endif
 
        /* Add other serial console driver calls here */
 
index f1f176c29fa3e932c1626453fc48824bd489fc41..5320735395e7fcadfc7775d23cd1a73b22f32f19 100644 (file)
@@ -13,7 +13,6 @@ CONFIG_EXPERT=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_MPC86XADS=y
 CONFIG_8xx_COPYBACK=y
-CONFIG_8xx_CPU6=y
 CONFIG_GEN_RTC=y
 CONFIG_HZ_1000=y
 CONFIG_MATH_EMULATION=y
index 73dab7a37386a2180aaec24c101ebda4034bf4a3..9e92aa6a52bab9008695311761ea70eb53b942d3 100644 (file)
@@ -96,6 +96,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=65536
 CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_NVME=y
 CONFIG_IDE=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_GENERIC=y
@@ -112,6 +113,7 @@ CONFIG_SCSI_CXGB3_ISCSI=m
 CONFIG_SCSI_CXGB4_ISCSI=m
 CONFIG_SCSI_BNX2_ISCSI=m
 CONFIG_BE2ISCSI=m
+CONFIG_SCSI_AACRAID=y
 CONFIG_SCSI_MPT2SAS=m
 CONFIG_SCSI_SYM53C8XX_2=m
 CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
index 016579ef16d3d411a4e4caa36bccbd5f7ad8e5c5..30a155c0a6b07e31ca69d5a7418fb19a1f6e7872 100644 (file)
@@ -311,6 +311,29 @@ static inline int pte_present(pte_t pte)
        return pte_val(pte) & _PAGE_PRESENT;
 }
 
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+       unsigned long pteval = pte_val(pte);
+       /*
+        * A read-only access is controlled by _PAGE_USER bit.
+        * We have _PAGE_READ set for WRITE and EXECUTE
+        */
+       unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
+
+       if (write)
+               need_pte_bits |= _PAGE_WRITE;
+
+       if ((pteval & need_pte_bits) != need_pte_bits)
+               return false;
+
+       return true;
+}
+
 /* Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
  *
index 2d9df40446f62dfabdaa68aab4b285b0010f684e..949d691094a46d674880dd1e54da971a4161f815 100644 (file)
 #define H_PUD_TABLE_SIZE       (sizeof(pud_t) << H_PUD_INDEX_SIZE)
 #define H_PGD_TABLE_SIZE       (sizeof(pgd_t) << H_PGD_INDEX_SIZE)
 
+#define H_PAGE_F_GIX_SHIFT     53
+#define H_PAGE_F_SECOND        _RPAGE_RPN44    /* HPTE is in 2ndary HPTEG */
+#define H_PAGE_F_GIX   (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41)
+#define H_PAGE_BUSY    _RPAGE_RSV1     /* software: PTE & hash are busy */
+#define H_PAGE_HASHPTE _RPAGE_RSV2     /* software: PTE & hash are busy */
+
 /* PTE flags to conserve for HPTE identification */
 #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
                         H_PAGE_F_SECOND | H_PAGE_F_GIX)
@@ -49,6 +55,20 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
 }
 #endif
 
+/*
+ * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just
+ * a matter of returning the PTE bits that need to be modified. On 64K PTE,
+ * things are a little more involved and hence needs many more parameters to
+ * accomplish the same. However we want to abstract this out from the caller by
+ * keeping the prototype consistent across the two formats.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+                       unsigned int subpg_index, unsigned long hidx)
+{
+       return (hidx << H_PAGE_F_GIX_SHIFT) &
+               (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 static inline char *get_hpte_slot_array(pmd_t *pmdp)
index cb46d1034f335e16039d4ed47a4b8cd2d48dc43f..338b7da468cef309fa2b787c852e96ab05f014e5 100644 (file)
  */
 #define H_PAGE_COMBO   _RPAGE_RPN0 /* this is a combo 4k page */
 #define H_PAGE_4K_PFN  _RPAGE_RPN1 /* PFN is for a single 4k page */
+#define H_PAGE_BUSY    _RPAGE_RPN44     /* software: PTE & hash are busy */
+#define H_PAGE_HASHPTE _RPAGE_RPN43    /* PTE has associated HPTE */
+
 /*
  * We need to differentiate between explicit huge page and THP huge
  * page, since THP huge page also need to track real subpage details
  */
 #define H_PAGE_THP_HUGE  H_PAGE_4K_PFN
 
-/*
- * Used to track subpage group valid if H_PAGE_COMBO is set
- * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND
- */
-#define H_PAGE_COMBO_VALID     (H_PAGE_F_GIX | H_PAGE_F_SECOND)
-
 /* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \
-                        H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO)
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO)
 /*
  * we support 16 fragments per PTE page of 64K size.
  */
@@ -55,24 +51,57 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
        unsigned long *hidxp;
 
        rpte.pte = pte;
-       rpte.hidx = 0;
-       if (pte_val(pte) & H_PAGE_COMBO) {
-               /*
-                * Make sure we order the hidx load against the H_PAGE_COMBO
-                * check. The store side ordering is done in __hash_page_4K
-                */
-               smp_rmb();
-               hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
-               rpte.hidx = *hidxp;
-       }
+
+       /*
+        * Ensure that we do not read the hidx before we read the PTE. Because
+        * the writer side is expected to finish writing the hidx first followed
+        * by the PTE, by using smp_wmb(). pte_set_hash_slot() ensures that.
+        */
+       smp_rmb();
+
+       hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+       rpte.hidx = *hidxp;
        return rpte;
 }
 
+/*
+ * shift the hidx representation by one-modulo-0xf; i.e hidx 0 is respresented
+ * as 1, 1 as 2,... , and 0xf as 0.  This convention lets us represent a
+ * invalid hidx 0xf with a 0x0 bit value. PTEs are anyway zero'd when
+ * allocated. We dont have to zero them gain; thus save on the initialization.
+ */
+#define HIDX_UNSHIFT_BY_ONE(x) ((x + 0xfUL) & 0xfUL) /* shift backward by one */
+#define HIDX_SHIFT_BY_ONE(x) ((x + 0x1UL) & 0xfUL)   /* shift forward by one */
+#define HIDX_BITS(x, index)  (x << (index << 2))
+#define BITS_TO_HIDX(x, index)  ((x >> (index << 2)) & 0xfUL)
+#define INVALID_RPTE_HIDX  0x0UL
+
 static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
 {
-       if ((pte_val(rpte.pte) & H_PAGE_COMBO))
-               return (rpte.hidx >> (index<<2)) & 0xf;
-       return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf;
+       return HIDX_UNSHIFT_BY_ONE(BITS_TO_HIDX(rpte.hidx, index));
+}
+
+/*
+ * Commit the hidx and return PTE bits that needs to be modified. The caller is
+ * expected to modify the PTE bits accordingly and commit the PTE to memory.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+               unsigned int subpg_index, unsigned long hidx)
+{
+       unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+
+       rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
+       *hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
+
+       /*
+        * Anyone reading PTE must ensure hidx bits are read after reading the
+        * PTE by using the read-side barrier smp_rmb(). __real_pte() can be
+        * used for that.
+        */
+       smp_wmb();
+
+       /* No PTE bits to be modified, return 0x0UL */
+       return 0x0UL;
 }
 
 #define __rpte_to_pte(r)       ((r).pte)
index ecb1239d74f4bb4ee7c04f51a362b2be76588dfe..0920eff731b385221edeb46a04ed5f280ad76ff8 100644 (file)
@@ -9,11 +9,6 @@
  *
  */
 #define H_PTE_NONE_MASK                _PAGE_HPTEFLAGS
-#define H_PAGE_F_GIX_SHIFT     56
-#define H_PAGE_BUSY            _RPAGE_RSV1 /* software: PTE & hash are busy */
-#define H_PAGE_F_SECOND                _RPAGE_RSV2     /* HPTE is in 2ndary HPTEG */
-#define H_PAGE_F_GIX           (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44)
-#define H_PAGE_HASHPTE         _RPAGE_RPN43    /* PTE has associated HPTE */
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
@@ -167,6 +162,9 @@ static inline int hash__pte_none(pte_t pte)
        return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0;
 }
 
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+               int ssize, real_pte_t rpte, unsigned int subpg_index);
+
 /* This low level function performs the actual PTE insertion
  * Setting the PTE depends on the MMU type and other factors. It's
  * an horrible mess that I'm not going to try to clean up now but
index e91e115a816f1685479e1173a3156676b9402d69..50ed64fba4ae0f17bda5f420809ed3faa40dc783 100644 (file)
@@ -90,6 +90,8 @@
 #define HPTE_R_PP0             ASM_CONST(0x8000000000000000)
 #define HPTE_R_TS              ASM_CONST(0x4000000000000000)
 #define HPTE_R_KEY_HI          ASM_CONST(0x3000000000000000)
+#define HPTE_R_KEY_BIT0                ASM_CONST(0x2000000000000000)
+#define HPTE_R_KEY_BIT1                ASM_CONST(0x1000000000000000)
 #define HPTE_R_RPN_SHIFT       12
 #define HPTE_R_RPN             ASM_CONST(0x0ffffffffffff000)
 #define HPTE_R_RPN_3_0         ASM_CONST(0x01fffffffffff000)
 #define HPTE_R_C               ASM_CONST(0x0000000000000080)
 #define HPTE_R_R               ASM_CONST(0x0000000000000100)
 #define HPTE_R_KEY_LO          ASM_CONST(0x0000000000000e00)
+#define HPTE_R_KEY_BIT2                ASM_CONST(0x0000000000000800)
+#define HPTE_R_KEY_BIT3                ASM_CONST(0x0000000000000400)
+#define HPTE_R_KEY_BIT4                ASM_CONST(0x0000000000000200)
 #define HPTE_R_KEY             (HPTE_R_KEY_LO | HPTE_R_KEY_HI)
 
 #define HPTE_V_1TB_SEG         ASM_CONST(0x4000000000000000)
index c9448e19847a3ef5eba44ffe1d813d37e3ba5e39..0abeb0e2d616d5376646361836f63db0c8a32819 100644 (file)
@@ -108,6 +108,16 @@ typedef struct {
 #ifdef CONFIG_SPAPR_TCE_IOMMU
        struct list_head iommu_group_mem_list;
 #endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+       /*
+        * Each bit represents one protection key.
+        * bit set   -> key allocated
+        * bit unset -> key available for allocation
+        */
+       u32 pkey_allocation_map;
+       s16 execute_only_pkey; /* key holding execute-only protection */
+#endif
 } mm_context_t;
 
 /*
index 6ca1208cedcb7c9f65a138bf18b61975342aae98..51017726d49539fda8cba5b346460aeba104c3f9 100644 (file)
@@ -14,8 +14,9 @@
  */
 #define _PAGE_BIT_SWAP_TYPE    0
 
+#define _PAGE_NA               0
 #define _PAGE_RO               0
-#define _PAGE_SHARED           0
+#define _PAGE_USER             0
 
 #define _PAGE_EXEC             0x00001 /* execute permission */
 #define _PAGE_WRITE            0x00002 /* write access allowed */
@@ -39,6 +40,7 @@
 #define _RPAGE_RSV2            0x0800000000000000UL
 #define _RPAGE_RSV3            0x0400000000000000UL
 #define _RPAGE_RSV4            0x0200000000000000UL
+#define _RPAGE_RSV5            0x00040UL
 
 #define _PAGE_PTE              0x4000000000000000UL    /* distinguishes PTEs from pointers */
 #define _PAGE_PRESENT          0x8000000000000000UL    /* pte contains a translation */
 /* Max physical address bit as per radix table */
 #define _RPAGE_PA_MAX          57
 
+#ifdef CONFIG_PPC_MEM_KEYS
+#ifdef CONFIG_PPC_64K_PAGES
+#define H_PTE_PKEY_BIT0        _RPAGE_RSV1
+#define H_PTE_PKEY_BIT1        _RPAGE_RSV2
+#else /* CONFIG_PPC_64K_PAGES */
+#define H_PTE_PKEY_BIT0        0 /* _RPAGE_RSV1 is not available */
+#define H_PTE_PKEY_BIT1        0 /* _RPAGE_RSV2 is not available */
+#endif /* CONFIG_PPC_64K_PAGES */
+#define H_PTE_PKEY_BIT2        _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3        _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4        _RPAGE_RSV5
+#else /*  CONFIG_PPC_MEM_KEYS */
+#define H_PTE_PKEY_BIT0        0
+#define H_PTE_PKEY_BIT1        0
+#define H_PTE_PKEY_BIT2        0
+#define H_PTE_PKEY_BIT3        0
+#define H_PTE_PKEY_BIT4        0
+#endif /*  CONFIG_PPC_MEM_KEYS */
+
 /*
  * Max physical address bit we will use for now.
  *
 #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
                         _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |   \
                         _PAGE_SOFT_DIRTY)
+
+#define H_PTE_PKEY  (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
+                    H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
 /*
  * Mask of bits returned by pte_pgprot()
  */
 #define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
                         H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
                         _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
-                        _PAGE_SOFT_DIRTY)
+                        _PAGE_SOFT_DIRTY | H_PTE_PKEY)
 /*
  * We define 2 sets of base prot bits, one for basic pages (ie,
  * cacheable kernel and user pages) and one for non cacheable
@@ -546,6 +570,40 @@ static inline int pte_present(pte_t pte)
 {
        return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT));
 }
+
+#ifdef CONFIG_PPC_MEM_KEYS
+extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
+#else
+static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+       return true;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+       unsigned long pteval = pte_val(pte);
+       /* Also check for pte_user */
+       unsigned long clear_pte_bits = _PAGE_PRIVILEGED;
+       /*
+        * _PAGE_READ is needed for any access and will be
+        * cleared for PROT_NONE
+        */
+       unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ;
+
+       if (write)
+               need_pte_bits |= _PAGE_WRITE;
+
+       if ((pteval & need_pte_bits) != need_pte_bits)
+               return false;
+
+       if ((pteval & clear_pte_bits) == clear_pte_bits)
+               return false;
+
+       return arch_pte_access_permitted(pte_val(pte), write, 0);
+}
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
@@ -850,6 +908,11 @@ static inline int pud_bad(pud_t pud)
        return hash__pud_bad(pud);
 }
 
+#define pud_access_permitted pud_access_permitted
+static inline bool pud_access_permitted(pud_t pud, bool write)
+{
+       return pte_access_permitted(pud_pte(pud), write);
+}
 
 #define pgd_write(pgd)         pte_write(pgd_pte(pgd))
 static inline void pgd_set(pgd_t *pgdp, unsigned long val)
@@ -889,6 +952,12 @@ static inline int pgd_bad(pgd_t pgd)
        return hash__pgd_bad(pgd);
 }
 
+#define pgd_access_permitted pgd_access_permitted
+static inline bool pgd_access_permitted(pgd_t pgd, bool write)
+{
+       return pte_access_permitted(pgd_pte(pgd), write);
+}
+
 extern struct page *pgd_page(pgd_t pgd);
 
 /* Pointers in the page table tree are physical addresses */
@@ -1009,6 +1078,12 @@ static inline int pmd_protnone(pmd_t pmd)
 #define __pmd_write(pmd)       __pte_write(pmd_pte(pmd))
 #define pmd_savedwrite(pmd)    pte_savedwrite(pmd_pte(pmd))
 
+#define pmd_access_permitted pmd_access_permitted
+static inline bool pmd_access_permitted(pmd_t pmd, bool write)
+{
+       return pte_access_permitted(pmd_pte(pmd), write);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
 extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
index 849ecaae9e79d0508165bdf8f07a12fd66e6b3c9..64d02a704bcb596f146e95ebdff1343d459e4c20 100644 (file)
@@ -51,6 +51,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
 
 #define arch_flush_lazy_mmu_mode()      do {} while (0)
 
+extern void hash__tlbiel_all(unsigned int action);
 
 extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
                            int ssize, unsigned long flags);
index 6a9e68003387db977d8b63a81487d6ccee99559f..8eea90f80e45a5ab00d189801d97b4f0e74eeb80 100644 (file)
@@ -11,6 +11,12 @@ static inline int mmu_get_ap(int psize)
        return mmu_psize_defs[psize].ap;
 }
 
+#ifdef CONFIG_PPC_RADIX_MMU
+extern void radix__tlbiel_all(unsigned int action);
+#else
+static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); };
+#endif
+
 extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
                                           unsigned long start, unsigned long end);
 extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
@@ -47,4 +53,5 @@ extern void radix__flush_tlb_lpid(unsigned long lpid);
 extern void radix__flush_tlb_all(void);
 extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
                                        unsigned long address);
+
 #endif
index 58b576f654b373a9c5a293d52f02c988d4b1b4c4..0cac17253513d4c8aa9dc2e7e9a0157f88ecb15c 100644 (file)
@@ -8,6 +8,44 @@
 #include <asm/book3s/64/tlbflush-hash.h>
 #include <asm/book3s/64/tlbflush-radix.h>
 
+/* TLB flush actions. Used as argument to tlbiel_all() */
+enum {
+       TLB_INVAL_SCOPE_GLOBAL = 0,     /* invalidate all TLBs */
+       TLB_INVAL_SCOPE_LPID = 1,       /* invalidate TLBs for current LPID */
+};
+
+#ifdef CONFIG_PPC_NATIVE
+static inline void tlbiel_all(void)
+{
+       /*
+        * This is used for host machine check and bootup.
+        *
+        * This uses early_radix_enabled and implementations use
+        * early_cpu_has_feature etc because that works early in boot
+        * and this is the machine check path which is not performance
+        * critical.
+        */
+       if (early_radix_enabled())
+               radix__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+       else
+               hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+}
+#else
+static inline void tlbiel_all(void) { BUG(); };
+#endif
+
+static inline void tlbiel_all_lpid(bool radix)
+{
+       /*
+        * This is used for guest machine check.
+        */
+       if (radix)
+               radix__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+       else
+               hash__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+}
+
+
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
                                       unsigned long start, unsigned long end)
index 3c04249bcf39b76a50711c6bc3a5b789e496f1ad..fd06dbe7d7d3218c3608385c6690ddaece7780da 100644 (file)
@@ -133,9 +133,11 @@ struct pt_regs;
 extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
 extern void bad_page_fault(struct pt_regs *, unsigned long, int);
 extern void _exception(int, struct pt_regs *, int, unsigned long);
+extern void _exception_pkey(int, struct pt_regs *, int, unsigned long, int);
 extern void die(const char *, struct pt_regs *, long);
 extern bool die_will_crash(void);
-
+extern void panic_flush_kmsg_start(void);
+extern void panic_flush_kmsg_end(void);
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
index abef812de7f8cb4f7ae5a178c67775f81d4a22f7..812535f40124efddf601c6f53e3b74a52199715d 100644 (file)
@@ -31,8 +31,10 @@ unsigned int create_cond_branch(const unsigned int *addr,
                                unsigned long target, int flags);
 int patch_branch(unsigned int *addr, unsigned long target, int flags);
 int patch_instruction(unsigned int *addr, unsigned int instr);
+int raw_patch_instruction(unsigned int *addr, unsigned int instr);
 
 int instr_is_relative_branch(unsigned int instr);
+int instr_is_relative_link_branch(unsigned int instr);
 int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
 unsigned long branch_target(const unsigned int *instr);
 unsigned int translate_branch(const unsigned int *dest,
index b925df1b87d0c357e8a769a50b070cce81275540..4c24ea8209bbabe16dc68336f9ab206c7932e7a0 100644 (file)
@@ -166,6 +166,6 @@ static inline int cpm_command(u32 command, u8 opcode)
 }
 #endif /* CONFIG_CPM */
 
-int cpm2_gpiochip_add32(struct device_node *np);
+int cpm2_gpiochip_add32(struct device *dev);
 
 #endif
index 3db821876d485a964040cb815843b0122282f7bb..a116fe9317892e93426c7904a3b70d3b098e3708 100644 (file)
@@ -605,5 +605,7 @@ enum cpm_clk {
 };
 
 int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode);
+int cpm1_gpiochip_add16(struct device *dev);
+int cpm1_gpiochip_add32(struct device *dev);
 
 #endif /* __CPM1__ */
index 0546663a98db3f726600c5cee68cf192f424aeb6..a2c5c95882cf08d3fda4b4c87cc13d73ba66a652 100644 (file)
@@ -107,12 +107,6 @@ struct cpu_spec {
         * called in real mode to handle SLB and TLB errors.
         */
        long            (*machine_check_early)(struct pt_regs *regs);
-
-       /*
-        * Processor specific routine to flush tlbs.
-        */
-       void            (*flush_tlb)(unsigned int action);
-
 };
 
 extern struct cpu_spec         *cur_cpu_spec;
@@ -133,12 +127,6 @@ extern void cpu_feature_keys_init(void);
 static inline void cpu_feature_keys_init(void) { }
 #endif
 
-/* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
-enum {
-       TLB_INVAL_SCOPE_GLOBAL = 0,     /* invalidate all TLBs */
-       TLB_INVAL_SCOPE_LPID = 1,       /* invalidate TLBs for current LPID */
-};
-
 #endif /* __ASSEMBLY__ */
 
 /* CPU kernel features */
@@ -207,7 +195,7 @@ enum {
 #define CPU_FTR_STCX_CHECKS_ADDRESS    LONG_ASM_CONST(0x0004000000000000)
 #define CPU_FTR_POPCNTB                        LONG_ASM_CONST(0x0008000000000000)
 #define CPU_FTR_POPCNTD                        LONG_ASM_CONST(0x0010000000000000)
-/* Free                                        LONG_ASM_CONST(0x0020000000000000) */
+#define CPU_FTR_PKEY                   LONG_ASM_CONST(0x0020000000000000)
 #define CPU_FTR_VMX_COPY               LONG_ASM_CONST(0x0040000000000000)
 #define CPU_FTR_TM                     LONG_ASM_CONST(0x0080000000000000)
 #define CPU_FTR_CFAR                   LONG_ASM_CONST(0x0100000000000000)
@@ -454,7 +442,7 @@ enum {
            CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
            CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
            CPU_FTR_CFAR | CPU_FTR_HVMODE | \
-           CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX)
+           CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
 #define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
            CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -464,7 +452,7 @@ enum {
            CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
            CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
            CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
-           CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
+           CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY)
 #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
 #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
 #define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
@@ -476,7 +464,8 @@ enum {
            CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
            CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
            CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
-           CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
+           CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \
+           CPU_FTR_PKEY)
 #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
                             (~CPU_FTR_SAO))
 #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
new file mode 100644 (file)
index 0000000..ce242b9
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * drmem.h: Power specific logical memory block representation
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_LMB_H
+#define _ASM_POWERPC_LMB_H
+
+struct drmem_lmb {
+       u64     base_addr;
+       u32     drc_index;
+       u32     aa_index;
+       u32     flags;
+};
+
+struct drmem_lmb_info {
+       struct drmem_lmb        *lmbs;
+       int                     n_lmbs;
+       u32                     lmb_size;
+};
+
+extern struct drmem_lmb_info *drmem_info;
+
+#define for_each_drmem_lmb_in_range(lmb, start, end)           \
+       for ((lmb) = (start); (lmb) <= (end); (lmb)++)
+
+#define for_each_drmem_lmb(lmb)                                        \
+       for_each_drmem_lmb_in_range((lmb),                      \
+               &drmem_info->lmbs[0],                           \
+               &drmem_info->lmbs[drmem_info->n_lmbs - 1])
+
+/*
+ * The of_drconf_cell_v1 struct defines the layout of the LMB data
+ * specified in the ibm,dynamic-memory device tree property.
+ * The property itself is a 32-bit value specifying the number of
+ * LMBs followed by an array of of_drconf_cell_v1 entries, one
+ * per LMB.
+ */
+struct of_drconf_cell_v1 {
+       __be64  base_addr;
+       __be32  drc_index;
+       __be32  reserved;
+       __be32  aa_index;
+       __be32  flags;
+};
+
+/*
+ * Version 2 of the ibm,dynamic-memory property is defined as a
+ * 32-bit value specifying the number of LMB sets followed by an
+ * array of of_drconf_cell_v2 entries, one per LMB set.
+ */
+struct of_drconf_cell_v2 {
+       u32     seq_lmbs;
+       u64     base_addr;
+       u32     drc_index;
+       u32     aa_index;
+       u32     flags;
+} __packed;
+
+#define DRCONF_MEM_ASSIGNED    0x00000008
+#define DRCONF_MEM_AI_INVALID  0x00000040
+#define DRCONF_MEM_RESERVED    0x00000080
+
+static inline u32 drmem_lmb_size(void)
+{
+       return drmem_info->lmb_size;
+}
+
+#define DRMEM_LMB_RESERVED     0x80000000
+
+static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
+{
+       lmb->flags |= DRMEM_LMB_RESERVED;
+}
+
+static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
+{
+       lmb->flags &= ~DRMEM_LMB_RESERVED;
+}
+
+static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
+{
+       return lmb->flags & DRMEM_LMB_RESERVED;
+}
+
+u64 drmem_lmb_memory_max(void);
+void __init walk_drmem_lmbs(struct device_node *dn,
+                       void (*func)(struct drmem_lmb *, const __be32 **));
+int drmem_update_dt(void);
+
+#ifdef CONFIG_PPC_PSERIES
+void __init walk_drmem_lmbs_early(unsigned long node,
+                       void (*func)(struct drmem_lmb *, const __be32 **));
+#endif
+
+#endif /* _ASM_POWERPC_LMB_H */
index 5161c37dd039bd29f034fdd1375c174d3d198707..fd37cc101f4f2f6fcba5fe1b5aecd7885d1da074 100644 (file)
@@ -214,6 +214,7 @@ struct eeh_ops {
        int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val);
        int (*next_error)(struct eeh_pe **pe);
        int (*restore_config)(struct pci_dn *pdn);
+       int (*notify_resume)(struct pci_dn *pdn);
 };
 
 extern int eeh_subsystem_flags;
@@ -297,6 +298,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option);
 int eeh_pe_configure(struct eeh_pe *pe);
 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
                      unsigned long addr, unsigned long mask);
+int eeh_restore_vf_config(struct pci_dn *pdn);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
index 7197b179c1b150ba819f13a1f7feb6c9a45da1f9..176dfb73d42c073df181013c6497cb42f9c8f85d 100644 (file)
@@ -251,18 +251,40 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        std     r10,area+EX_R10(r13);   /* save r10 - r12 */            \
        OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
 
-#define __EXCEPTION_PROLOG_1(area, extra, vec)                         \
+#define __EXCEPTION_PROLOG_1_PRE(area)                                 \
        OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR);         \
        OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);          \
        SAVE_CTR(r10, area);                                            \
-       mfcr    r9;                                                     \
-       extra(vec);                                                     \
+       mfcr    r9;
+
+#define __EXCEPTION_PROLOG_1_POST(area)                                        \
        std     r11,area+EX_R11(r13);                                   \
        std     r12,area+EX_R12(r13);                                   \
        GET_SCRATCH0(r10);                                              \
        std     r10,area+EX_R13(r13)
+
+/*
+ * This version of the EXCEPTION_PROLOG_1 will carry
+ * addition parameter called "bitmask" to support
+ * checking of the interrupt maskable level in the SOFTEN_TEST.
+ * Intended to be used in MASKABLE_EXCPETION_* macros.
+ */
+#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask)                 \
+       __EXCEPTION_PROLOG_1_PRE(area);                                 \
+       extra(vec, bitmask);                                            \
+       __EXCEPTION_PROLOG_1_POST(area);
+
+/*
+ * This version of the EXCEPTION_PROLOG_1 is intended
+ * to be used in STD_EXCEPTION* macros
+ */
+#define _EXCEPTION_PROLOG_1(area, extra, vec)                          \
+       __EXCEPTION_PROLOG_1_PRE(area);                                 \
+       extra(vec);                                                     \
+       __EXCEPTION_PROLOG_1_POST(area);
+
 #define EXCEPTION_PROLOG_1(area, extra, vec)                           \
-       __EXCEPTION_PROLOG_1(area, extra, vec)
+       _EXCEPTION_PROLOG_1(area, extra, vec)
 
 #define __EXCEPTION_PROLOG_PSERIES_1(label, h)                         \
        ld      r10,PACAKMSR(r13);      /* get MSR value for kernel */  \
@@ -485,7 +507,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        mflr    r9;                     /* Get LR, later save to stack  */ \
        ld      r2,PACATOC(r13);        /* get kernel TOC into r2       */ \
        std     r9,_LINK(r1);                                              \
-       lbz     r10,PACASOFTIRQEN(r13);                            \
+       lbz     r10,PACAIRQSOFTMASK(r13);                                  \
        mfspr   r11,SPRN_XER;           /* save XER in stackframe       */ \
        std     r10,SOFTE(r1);                                             \
        std     r11,_XER(r1);                                              \
@@ -549,22 +571,23 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define SOFTEN_VALUE_0xe80     PACA_IRQ_DBELL
 #define SOFTEN_VALUE_0xe60     PACA_IRQ_HMI
 #define SOFTEN_VALUE_0xea0     PACA_IRQ_EE
+#define SOFTEN_VALUE_0xf00     PACA_IRQ_PMI
 
-#define __SOFTEN_TEST(h, vec)                                          \
-       lbz     r10,PACASOFTIRQEN(r13);                                 \
-       cmpwi   r10,0;                                                  \
+#define __SOFTEN_TEST(h, vec, bitmask)                                 \
+       lbz     r10,PACAIRQSOFTMASK(r13);                               \
+       andi.   r10,r10,bitmask;                                        \
        li      r10,SOFTEN_VALUE_##vec;                                 \
-       beq     masked_##h##interrupt
+       bne     masked_##h##interrupt
 
-#define _SOFTEN_TEST(h, vec)   __SOFTEN_TEST(h, vec)
+#define _SOFTEN_TEST(h, vec, bitmask)  __SOFTEN_TEST(h, vec, bitmask)
 
-#define SOFTEN_TEST_PR(vec)                                            \
+#define SOFTEN_TEST_PR(vec, bitmask)                                   \
        KVMTEST(EXC_STD, vec);                                          \
-       _SOFTEN_TEST(EXC_STD, vec)
+       _SOFTEN_TEST(EXC_STD, vec, bitmask)
 
-#define SOFTEN_TEST_HV(vec)                                            \
+#define SOFTEN_TEST_HV(vec, bitmask)                                   \
        KVMTEST(EXC_HV, vec);                                           \
-       _SOFTEN_TEST(EXC_HV, vec)
+       _SOFTEN_TEST(EXC_HV, vec, bitmask)
 
 #define KVMTEST_PR(vec)                                                        \
        KVMTEST(EXC_STD, vec)
@@ -572,53 +595,57 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define KVMTEST_HV(vec)                                                        \
        KVMTEST(EXC_HV, vec)
 
-#define SOFTEN_NOTEST_PR(vec)          _SOFTEN_TEST(EXC_STD, vec)
-#define SOFTEN_NOTEST_HV(vec)          _SOFTEN_TEST(EXC_HV, vec)
+#define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask)
+#define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask)
 
-#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)             \
+#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)    \
        SET_SCRATCH0(r13);    /* save r13 */                            \
        EXCEPTION_PROLOG_0(PACA_EXGEN);                                 \
-       __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec);                   \
+       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask);   \
        EXCEPTION_PROLOG_PSERIES_1(label, h);
 
-#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)              \
-       __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)
+#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)     \
+       __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)
 
-#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label)                    \
+#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label, bitmask)           \
        _MASKABLE_EXCEPTION_PSERIES(vec, label,                         \
-                                   EXC_STD, SOFTEN_TEST_PR)
+                                   EXC_STD, SOFTEN_TEST_PR, bitmask)
 
-#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label)                     \
-       EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec);            \
+#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask)            \
+       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\
        EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD)
 
-#define MASKABLE_EXCEPTION_HV(loc, vec, label                        \
+#define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask)                        \
        _MASKABLE_EXCEPTION_PSERIES(vec, label,                         \
-                                   EXC_HV, SOFTEN_TEST_HV)
+                                   EXC_HV, SOFTEN_TEST_HV, bitmask)
 
-#define MASKABLE_EXCEPTION_HV_OOL(vec, label)                          \
-       EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec);            \
+#define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask)                 \
+       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
        EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
 
-#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)       \
+#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
        SET_SCRATCH0(r13);    /* save r13 */                            \
        EXCEPTION_PROLOG_0(PACA_EXGEN);                                 \
-       __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec);                   \
+       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask);   \
        EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)
 
-#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)                \
-       __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)
+#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)\
+       __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)
 
-#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label)              \
+#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label, bitmask)     \
        _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label,                   \
-                                         EXC_STD, SOFTEN_NOTEST_PR)
+                                         EXC_STD, SOFTEN_NOTEST_PR, bitmask)
+
+#define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask)      \
+       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\
+       EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD);
 
-#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label)                   \
+#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask)          \
        _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label,                   \
-                                         EXC_HV, SOFTEN_TEST_HV)
+                                         EXC_HV, SOFTEN_TEST_HV, bitmask)
 
-#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label)                    \
-       EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec);            \
+#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask)           \
+       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
        EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /*
index 8645897472b10bfcb5bd6a4ac3699d3e375ce439..511acfd7ab0d3e20066dcd60e22bd3048ad34119 100644 (file)
@@ -51,6 +51,8 @@
 #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000)
 #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
 #define FW_FEATURE_PRRN                ASM_CONST(0x0000000200000000)
+#define FW_FEATURE_DRMEM_V2    ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRC_INFO    ASM_CONST(0x0000000400000000)
 
 #ifndef __ASSEMBLY__
 
@@ -67,7 +69,8 @@ enum {
                FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
                FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
                FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
-               FW_FEATURE_HPT_RESIZE,
+               FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
+               FW_FEATURE_DRC_INFO,
        FW_FEATURE_PSERIES_ALWAYS = 0,
        FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
        FW_FEATURE_POWERNV_ALWAYS = 0,
index 456f9e7b8d83ad343bed2a222df35023e148e7f6..5986d473722b5eef58530689f488d129c1669589 100644 (file)
@@ -29,6 +29,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 #define local_softirq_pending()        __this_cpu_read(irq_stat.__softirq_pending)
 
 #define __ARCH_SET_SOFTIRQ_PENDING
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED
 
 #define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x))
 #define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x))
index fdcff76e9a25a6095d44317a9074c5f420961fcf..7e0e93f24cb79d8d62d16c72ed6365168a990425 100644 (file)
@@ -178,7 +178,7 @@ name:
  * TRAMP_REAL_*   - real, unrelocated helpers (virt can call these)
  * TRAMP_VIRT_*   - virt, unreloc helpers (in practice, real can use)
  * TRAMP_KVM      - KVM handlers that get put into real, unrelocated
- * EXC_COMMON_*   - virt, relocated common handlers
+ * EXC_COMMON     - virt, relocated common handlers
  *
  * The EXC handlers are given a name, and branch to name_common, or the
  * appropriate KVM or masking function. Vector handler verieties are as
@@ -211,7 +211,6 @@ name:
  * EXC_COMMON_BEGIN/END - used to open-code the handler
  * EXC_COMMON
  * EXC_COMMON_ASYNC
- * EXC_COMMON_HV
  *
  * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
  * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
@@ -269,14 +268,14 @@ name:
        STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common);     \
        EXC_VIRT_END(name, start, size);
 
-#define EXC_REAL_MASKABLE(name, start, size)                           \
+#define EXC_REAL_MASKABLE(name, start, size, bitmask)                  \
        EXC_REAL_BEGIN(name, start, size);                              \
-       MASKABLE_EXCEPTION_PSERIES(start, start, name##_common);        \
+       MASKABLE_EXCEPTION_PSERIES(start, start, name##_common, bitmask);\
        EXC_REAL_END(name, start, size);
 
-#define EXC_VIRT_MASKABLE(name, start, size, realvec)                  \
+#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask)         \
        EXC_VIRT_BEGIN(name, start, size);                              \
-       MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
+       MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common, bitmask);\
        EXC_VIRT_END(name, start, size);
 
 #define EXC_REAL_HV(name, start, size)                                 \
@@ -305,13 +304,13 @@ name:
 #define __EXC_REAL_OOL_MASKABLE(name, start, size)                     \
        __EXC_REAL_OOL(name, start, size);
 
-#define __TRAMP_REAL_OOL_MASKABLE(name, vec)                           \
+#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask)                  \
        TRAMP_REAL_BEGIN(tramp_real_##name);                            \
-       MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common);             \
+       MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common, bitmask);    \
 
-#define EXC_REAL_OOL_MASKABLE(name, start, size)                       \
+#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask)              \
        __EXC_REAL_OOL_MASKABLE(name, start, size);                     \
-       __TRAMP_REAL_OOL_MASKABLE(name, start);
+       __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask);
 
 #define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler)           \
        EXC_REAL_BEGIN(name, start, size);                              \
@@ -332,13 +331,13 @@ name:
 #define __EXC_REAL_OOL_MASKABLE_HV(name, start, size)                  \
        __EXC_REAL_OOL(name, start, size);
 
-#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec)                                \
+#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask)               \
        TRAMP_REAL_BEGIN(tramp_real_##name);                            \
-       MASKABLE_EXCEPTION_HV_OOL(vec, name##_common);                  \
+       MASKABLE_EXCEPTION_HV_OOL(vec, name##_common, bitmask);         \
 
-#define EXC_REAL_OOL_MASKABLE_HV(name, start, size)                    \
+#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask)           \
        __EXC_REAL_OOL_MASKABLE_HV(name, start, size);                  \
-       __TRAMP_REAL_OOL_MASKABLE_HV(name, start);
+       __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask);
 
 #define __EXC_VIRT_OOL(name, start, size)                              \
        EXC_VIRT_BEGIN(name, start, size);                              \
@@ -356,13 +355,13 @@ name:
 #define __EXC_VIRT_OOL_MASKABLE(name, start, size)                     \
        __EXC_VIRT_OOL(name, start, size);
 
-#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec)                       \
+#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask)              \
        TRAMP_VIRT_BEGIN(tramp_virt_##name);                            \
-       MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common);   \
+       MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common, bitmask);\
 
-#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec)              \
+#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask)     \
        __EXC_VIRT_OOL_MASKABLE(name, start, size);                     \
-       __TRAMP_VIRT_OOL_MASKABLE(name, realvec);
+       __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask);
 
 #define __EXC_VIRT_OOL_HV(name, start, size)                           \
        __EXC_VIRT_OOL(name, start, size);
@@ -378,13 +377,13 @@ name:
 #define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size)                  \
        __EXC_VIRT_OOL(name, start, size);
 
-#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec)                    \
+#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask)           \
        TRAMP_VIRT_BEGIN(tramp_virt_##name);                            \
-       MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common);        \
+       MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common, bitmask);\
 
-#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec)           \
+#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask)  \
        __EXC_VIRT_OOL_MASKABLE_HV(name, start, size);                  \
-       __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec);
+       __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask);
 
 #define TRAMP_KVM(area, n)                                             \
        TRAMP_KVM_BEGIN(do_kvm_##n);                                    \
@@ -413,10 +412,6 @@ name:
        EXC_COMMON_BEGIN(name);                                         \
        STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr);                \
 
-#define EXC_COMMON_HV(name, realvec, hdlr)                             \
-       EXC_COMMON_BEGIN(name);                                         \
-       STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr);                \
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_HEAD_64_H */
index 85b7a1a21e228571df158782f36a79e20728cff9..9c14f7b5c46cd9bcb46a287ec229928c80c5c4ff 100644 (file)
@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
 static inline void wait_for_subcore_guest_exit(void) { }
 static inline void wait_for_tb_resync(void) { }
 #endif
+
+struct pt_regs;
+extern long hmi_handle_debugtrig(struct pt_regs *regs);
+
 #endif /* __ASM_PPC64_HMI_H__ */
index 14c9d44f355b0387de2cfd216496f3842ddf5e56..1a4847f67ea8d94955177ac2147aff5a2cf18344 100644 (file)
@@ -47,8 +47,7 @@ static inline pte_t *hugepd_page(hugepd_t hpd)
 {
        BUG_ON(!hugepd_ok(hpd));
 #ifdef CONFIG_PPC_8xx
-       return (pte_t *)__va(hpd_val(hpd) &
-                            ~(_PMD_PAGE_MASK | _PMD_PRESENT_MASK));
+       return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
 #else
        return (pte_t *)((hpd_val(hpd) &
                          ~HUGEPD_SHIFT_MASK) | PD_HUGE);
index 3818fa0164f0b678144ed56354130f657f20161e..88e5e8f17e9896e5a051845235bfa0c822684552 100644 (file)
 #define PACA_IRQ_DEC           0x08 /* Or FIT */
 #define PACA_IRQ_EE_EDGE       0x10 /* BookE only */
 #define PACA_IRQ_HMI           0x20
+#define PACA_IRQ_PMI           0x40
+
+/*
+ * flags for paca->irq_soft_mask
+ */
+#define IRQS_ENABLED           0
+#define IRQS_DISABLED          1 /* local_irq_disable() interrupts */
+#define IRQS_PMI_DISABLED      2
+#define IRQS_ALL_DISABLED      (IRQS_DISABLED | IRQS_PMI_DISABLED)
 
 #endif /* CONFIG_PPC64 */
 
@@ -43,46 +52,112 @@ extern void unknown_exception(struct pt_regs *regs);
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long irq_soft_mask_return(void)
 {
        unsigned long flags;
 
        asm volatile(
                "lbz %0,%1(13)"
                : "=r" (flags)
-               : "i" (offsetof(struct paca_struct, soft_enabled)));
+               : "i" (offsetof(struct paca_struct, irq_soft_mask)));
+
+       return flags;
+}
+
+/*
+ * The "memory" clobber acts as both a compiler barrier
+ * for the critical section and as a clobber because
+ * we changed paca->irq_soft_mask
+ */
+static inline notrace void irq_soft_mask_set(unsigned long mask)
+{
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+       /*
+        * The irq mask must always include the STD bit if any are set.
+        *
+        * and interrupts don't get replayed until the standard
+        * interrupt (local_irq_disable()) is unmasked.
+        *
+        * Other masks must only provide additional masking beyond
+        * the standard, and they are also not replayed until the
+        * standard interrupt becomes unmasked.
+        *
+        * This could be changed, but it will require partial
+        * unmasks to be replayed, among other things. For now, take
+        * the simple approach.
+        */
+       WARN_ON(mask && !(mask & IRQS_DISABLED));
+#endif
+
+       asm volatile(
+               "stb %0,%1(13)"
+               :
+               : "r" (mask),
+                 "i" (offsetof(struct paca_struct, irq_soft_mask))
+               : "memory");
+}
+
+static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask)
+{
+       unsigned long flags;
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+       WARN_ON(mask && !(mask & IRQS_DISABLED));
+#endif
+
+       asm volatile(
+               "lbz %0,%1(13); stb %2,%1(13)"
+               : "=&r" (flags)
+               : "i" (offsetof(struct paca_struct, irq_soft_mask)),
+                 "r" (mask)
+               : "memory");
 
        return flags;
 }
 
-static inline unsigned long arch_local_irq_disable(void)
+static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
 {
-       unsigned long flags, zero;
+       unsigned long flags, tmp;
 
        asm volatile(
-               "li %1,0; lbz %0,%2(13); stb %1,%2(13)"
-               : "=r" (flags), "=&r" (zero)
-               : "i" (offsetof(struct paca_struct, soft_enabled))
+               "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)"
+               : "=&r" (flags), "=r" (tmp)
+               : "i" (offsetof(struct paca_struct, irq_soft_mask)),
+                 "r" (mask)
                : "memory");
 
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+       WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED));
+#endif
+
        return flags;
 }
 
+static inline unsigned long arch_local_save_flags(void)
+{
+       return irq_soft_mask_return();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+       irq_soft_mask_set(IRQS_DISABLED);
+}
+
 extern void arch_local_irq_restore(unsigned long);
 
 static inline void arch_local_irq_enable(void)
 {
-       arch_local_irq_restore(1);
+       arch_local_irq_restore(IRQS_ENABLED);
 }
 
 static inline unsigned long arch_local_irq_save(void)
 {
-       return arch_local_irq_disable();
+       return irq_soft_mask_set_return(IRQS_DISABLED);
 }
 
 static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
-       return flags == 0;
+       return flags & IRQS_DISABLED;
 }
 
 static inline bool arch_irqs_disabled(void)
@@ -90,6 +165,55 @@ static inline bool arch_irqs_disabled(void)
        return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * To support disabling and enabling of irq with PMI, set of
+ * new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore()
+ * functions are added. These macros are implemented using generic
+ * linux local_irq_* code from include/linux/irqflags.h.
+ */
+#define raw_local_irq_pmu_save(flags)                                  \
+       do {                                                            \
+               typecheck(unsigned long, flags);                        \
+               flags = irq_soft_mask_or_return(IRQS_DISABLED | \
+                               IRQS_PMI_DISABLED);                     \
+       } while(0)
+
+#define raw_local_irq_pmu_restore(flags)                               \
+       do {                                                            \
+               typecheck(unsigned long, flags);                        \
+               arch_local_irq_restore(flags);                          \
+       } while(0)
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#define powerpc_local_irq_pmu_save(flags)                      \
+        do {                                                   \
+               raw_local_irq_pmu_save(flags);                  \
+               trace_hardirqs_off();                           \
+       } while(0)
+#define powerpc_local_irq_pmu_restore(flags)                   \
+       do {                                                    \
+               if (raw_irqs_disabled_flags(flags)) {           \
+                       raw_local_irq_pmu_restore(flags);       \
+                       trace_hardirqs_off();                   \
+               } else {                                        \
+                       trace_hardirqs_on();                    \
+                       raw_local_irq_pmu_restore(flags);       \
+               }                                               \
+       } while(0)
+#else
+#define powerpc_local_irq_pmu_save(flags)                      \
+       do {                                                    \
+               raw_local_irq_pmu_save(flags);                  \
+       } while(0)
+#define powerpc_local_irq_pmu_restore(flags)                   \
+       do {                                                    \
+               raw_local_irq_pmu_restore(flags);               \
+       } while (0)
+#endif  /* CONFIG_TRACE_IRQFLAGS */
+
+#endif /* CONFIG_PPC_BOOK3S */
+
 #ifdef CONFIG_PPC_BOOK3E
 #define __hard_irq_enable()    asm volatile("wrteei 1" : : : "memory")
 #define __hard_irq_disable()   asm volatile("wrteei 0" : : : "memory")
@@ -98,14 +222,13 @@ static inline bool arch_irqs_disabled(void)
 #define __hard_irq_disable()   __mtmsrd(local_paca->kernel_msr, 1)
 #endif
 
-#define hard_irq_disable()     do {                    \
-       u8 _was_enabled;                                \
-       __hard_irq_disable();                           \
-       _was_enabled = local_paca->soft_enabled;        \
-       local_paca->soft_enabled = 0;                   \
-       local_paca->irq_happened |= PACA_IRQ_HARD_DIS;  \
-       if (_was_enabled)                               \
-               trace_hardirqs_off();                   \
+#define hard_irq_disable()     do {                                    \
+       unsigned long flags;                                            \
+       __hard_irq_disable();                                           \
+       flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED);            \
+       local_paca->irq_happened |= PACA_IRQ_HARD_DIS;                  \
+       if (!arch_irqs_disabled_flags(flags))                           \
+               trace_hardirqs_off();                                   \
 } while(0)
 
 static inline bool lazy_irq_pending(void)
@@ -127,7 +250,7 @@ static inline void may_hard_irq_enable(void)
 
 static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
 {
-       return !regs->softe;
+       return (regs->softe & IRQS_DISABLED);
 }
 
 extern bool prep_irq_for_idle(void);
index fad0e6ff460f22398b8487083cb34cb7abcb1de9..d76cb11be3e3f054b3b49beea5945471db920331 100644 (file)
 #define THREAD_IMC_LDBAR_MASK           0x0003ffffffffe000ULL
 #define THREAD_IMC_ENABLE               0x8000000000000000ULL
 
+/*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET            0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET                8
+#define IMC_CNTL_BLK_MODE_OFFSET       32
+
 /*
  * Structure to hold memory address information for imc units.
  */
@@ -71,7 +78,7 @@ struct imc_events {
 struct imc_pmu {
        struct pmu pmu;
        struct imc_mem_info *mem_info;
-       struct imc_events **events;
+       struct imc_events *events;
        /*
         * Attribute groups for the PMU. Slot 0 used for
         * format attribute, slot 1 used for cpusmask attribute,
index 1aeb5f13b8c439f4c788b0711f1f5d2009b79d7d..1a6c1ce17735a5f3a80b6f3254e9f082d72f14c4 100644 (file)
  * be clobbered.
  */
 #define RECONCILE_IRQ_STATE(__rA, __rB)                \
-       lbz     __rA,PACASOFTIRQEN(r13);        \
+       lbz     __rA,PACAIRQSOFTMASK(r13);      \
        lbz     __rB,PACAIRQHAPPENED(r13);      \
-       cmpwi   cr0,__rA,0;                     \
-       li      __rA,0;                         \
+       andi.   __rA,__rA,IRQS_DISABLED;        \
+       li      __rA,IRQS_DISABLED;             \
        ori     __rB,__rB,PACA_IRQ_HARD_DIS;    \
        stb     __rB,PACAIRQHAPPENED(r13);      \
-       beq     44f;                            \
-       stb     __rA,PACASOFTIRQEN(r13);        \
+       bne     44f;                            \
+       stb     __rA,PACAIRQSOFTMASK(r13);      \
        TRACE_DISABLE_INTS;                     \
 44:
 
@@ -64,9 +64,9 @@
 
 #define RECONCILE_IRQ_STATE(__rA, __rB)                \
        lbz     __rA,PACAIRQHAPPENED(r13);      \
-       li      __rB,0;                         \
+       li      __rB,IRQS_DISABLED;             \
        ori     __rA,__rA,PACA_IRQ_HARD_DIS;    \
-       stb     __rB,PACASOFTIRQEN(r13);        \
+       stb     __rB,PACAIRQSOFTMASK(r13);      \
        stb     __rA,PACAIRQHAPPENED(r13)
 #endif
 #endif
index 4419d435639a8bb0219eb2ad34ae4a1487488d55..9dcbfa6bbb91e740e483fa6c5c56b8422b8440c8 100644 (file)
@@ -73,6 +73,8 @@ extern void kexec_smp_wait(void);     /* get and clear naca physid, wait for
                                          master to copy new code to 0 */
 extern int crashing_cpu;
 extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
+extern void crash_ipi_callback(struct pt_regs *);
+extern int crash_wake_offline;
 
 struct kimage;
 struct pt_regs;
index 941c2a3f231b90686481b6e711dbded50f72eaf6..9db18287b5f447503eb3a8453e32fe652b5e467d 100644 (file)
@@ -873,7 +873,7 @@ static inline void kvmppc_fix_ee_before_entry(void)
 
        /* Only need to enable IRQs by hard enabling them after this */
        local_paca->irq_happened = 0;
-       local_paca->soft_enabled = 1;
+       irq_soft_mask_set(IRQS_ENABLED);
 #endif
 }
 
index 600a68bd77f5a89c2da0a39d47d0a99b25fbcbd5..fdd00939270bf08113b537a090d6a6e34a048361 100644 (file)
@@ -2,76 +2,64 @@
 #ifndef _ARCH_POWERPC_LOCAL_H
 #define _ARCH_POWERPC_LOCAL_H
 
+#ifdef CONFIG_PPC_BOOK3S_64
+
 #include <linux/percpu.h>
 #include <linux/atomic.h>
+#include <linux/irqflags.h>
+
+#include <asm/hw_irq.h>
 
 typedef struct
 {
-       atomic_long_t a;
+       long v;
 } local_t;
 
-#define LOCAL_INIT(i)  { ATOMIC_LONG_INIT(i) }
-
-#define local_read(l)  atomic_long_read(&(l)->a)
-#define local_set(l,i) atomic_long_set(&(l)->a, (i))
+#define LOCAL_INIT(i)  { (i) }
 
-#define local_add(i,l) atomic_long_add((i),(&(l)->a))
-#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))
-#define local_inc(l)   atomic_long_inc(&(l)->a)
-#define local_dec(l)   atomic_long_dec(&(l)->a)
-
-static __inline__ long local_add_return(long a, local_t *l)
+static __inline__ long local_read(local_t *l)
 {
-       long t;
-
-       __asm__ __volatile__(
-"1:"   PPC_LLARX(%0,0,%2,0) "                  # local_add_return\n\
-       add     %0,%1,%0\n"
-       PPC405_ERR77(0,%2)
-       PPC_STLCX       "%0,0,%2 \n\
-       bne-    1b"
-       : "=&r" (t)
-       : "r" (a), "r" (&(l->a.counter))
-       : "cc", "memory");
-
-       return t;
+       return READ_ONCE(l->v);
 }
 
-#define local_add_negative(a, l)       (local_add_return((a), (l)) < 0)
-
-static __inline__ long local_sub_return(long a, local_t *l)
+static __inline__ void local_set(local_t *l, long i)
 {
-       long t;
+       WRITE_ONCE(l->v, i);
+}
 
-       __asm__ __volatile__(
-"1:"   PPC_LLARX(%0,0,%2,0) "                  # local_sub_return\n\
-       subf    %0,%1,%0\n"
-       PPC405_ERR77(0,%2)
-       PPC_STLCX       "%0,0,%2 \n\
-       bne-    1b"
-       : "=&r" (t)
-       : "r" (a), "r" (&(l->a.counter))
-       : "cc", "memory");
+#define LOCAL_OP(op, c_op)                                             \
+static __inline__ void local_##op(long i, local_t *l)                  \
+{                                                                      \
+       unsigned long flags;                                            \
+                                                                       \
+       powerpc_local_irq_pmu_save(flags);                              \
+       l->v c_op i;                                            \
+       powerpc_local_irq_pmu_restore(flags);                           \
+}
 
-       return t;
+#define LOCAL_OP_RETURN(op, c_op)                                      \
+static __inline__ long local_##op##_return(long a, local_t *l)         \
+{                                                                      \
+       long t;                                                         \
+       unsigned long flags;                                            \
+                                                                       \
+       powerpc_local_irq_pmu_save(flags);                              \
+       t = (l->v c_op a);                                              \
+       powerpc_local_irq_pmu_restore(flags);                           \
+                                                                       \
+       return t;                                                       \
 }
 
-static __inline__ long local_inc_return(local_t *l)
-{
-       long t;
+#define LOCAL_OPS(op, c_op)            \
+       LOCAL_OP(op, c_op)              \
+       LOCAL_OP_RETURN(op, c_op)
 
-       __asm__ __volatile__(
-"1:"   PPC_LLARX(%0,0,%1,0) "                  # local_inc_return\n\
-       addic   %0,%0,1\n"
-       PPC405_ERR77(0,%1)
-       PPC_STLCX       "%0,0,%1 \n\
-       bne-    1b"
-       : "=&r" (t)
-       : "r" (&(l->a.counter))
-       : "cc", "xer", "memory");
+LOCAL_OPS(add, +=)
+LOCAL_OPS(sub, -=)
 
-       return t;
-}
+#define local_add_negative(a, l)       (local_add_return((a), (l)) < 0)
+#define local_inc_return(l)            local_add_return(1LL, l)
+#define local_inc(l)                   local_inc_return(l)
 
 /*
  * local_inc_and_test - increment and test
@@ -81,28 +69,39 @@ static __inline__ long local_inc_return(local_t *l)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-#define local_inc_and_test(l) (local_inc_return(l) == 0)
+#define local_inc_and_test(l)          (local_inc_return(l) == 0)
 
-static __inline__ long local_dec_return(local_t *l)
+#define local_dec_return(l)            local_sub_return(1LL, l)
+#define local_dec(l)                   local_dec_return(l)
+#define local_sub_and_test(a, l)       (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l)          (local_dec_return((l)) == 0)
+
+static __inline__ long local_cmpxchg(local_t *l, long o, long n)
 {
        long t;
+       unsigned long flags;
 
-       __asm__ __volatile__(
-"1:"   PPC_LLARX(%0,0,%1,0) "                  # local_dec_return\n\
-       addic   %0,%0,-1\n"
-       PPC405_ERR77(0,%1)
-       PPC_STLCX       "%0,0,%1\n\
-       bne-    1b"
-       : "=&r" (t)
-       : "r" (&(l->a.counter))
-       : "cc", "xer", "memory");
+       powerpc_local_irq_pmu_save(flags);
+       t = l->v;
+       if (t == o)
+               l->v = n;
+       powerpc_local_irq_pmu_restore(flags);
 
        return t;
 }
 
-#define local_cmpxchg(l, o, n) \
-       (cmpxchg_local(&((l)->a.counter), (o), (n)))
-#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+static __inline__ long local_xchg(local_t *l, long n)
+{
+       long t;
+       unsigned long flags;
+
+       powerpc_local_irq_pmu_save(flags);
+       t = l->v;
+       l->v = n;
+       powerpc_local_irq_pmu_restore(flags);
+
+       return t;
+}
 
 /**
  * local_add_unless - add unless the number is a given value
@@ -115,62 +114,35 @@ static __inline__ long local_dec_return(local_t *l)
  */
 static __inline__ int local_add_unless(local_t *l, long a, long u)
 {
-       long t;
-
-       __asm__ __volatile__ (
-"1:"   PPC_LLARX(%0,0,%1,0) "                  # local_add_unless\n\
-       cmpw    0,%0,%3 \n\
-       beq-    2f \n\
-       add     %0,%2,%0 \n"
-       PPC405_ERR77(0,%2)
-       PPC_STLCX       "%0,0,%1 \n\
-       bne-    1b \n"
-"      subf    %0,%2,%0 \n\
-2:"
-       : "=&r" (t)
-       : "r" (&(l->a.counter)), "r" (a), "r" (u)
-       : "cc", "memory");
-
-       return t != u;
-}
-
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-#define local_sub_and_test(a, l)       (local_sub_return((a), (l)) == 0)
-#define local_dec_and_test(l)          (local_dec_return((l)) == 0)
-
-/*
- * Atomically test *l and decrement if it is greater than 0.
- * The function returns the old value of *l minus 1.
- */
-static __inline__ long local_dec_if_positive(local_t *l)
-{
-       long t;
+       unsigned long flags;
+       int ret = 0;
 
-       __asm__ __volatile__(
-"1:"   PPC_LLARX(%0,0,%1,0) "                  # local_dec_if_positive\n\
-       cmpwi   %0,1\n\
-       addi    %0,%0,-1\n\
-       blt-    2f\n"
-       PPC405_ERR77(0,%1)
-       PPC_STLCX       "%0,0,%1\n\
-       bne-    1b"
-       "\n\
-2:"    : "=&b" (t)
-       : "r" (&(l->a.counter))
-       : "cc", "memory");
+       powerpc_local_irq_pmu_save(flags);
+       if (l->v != u) {
+               l->v += a;
+               ret = 1;
+       }
+       powerpc_local_irq_pmu_restore(flags);
 
-       return t;
+       return ret;
 }
 
+#define local_inc_not_zero(l)          local_add_unless((l), 1, 0)
+
 /* Use these for per-cpu local_t variables: on some archs they are
  * much more efficient than these naive implementations.  Note they take
  * a variable, not an address.
  */
 
-#define __local_inc(l)         ((l)->a.counter++)
-#define __local_dec(l)         ((l)->a.counter++)
-#define __local_add(i,l)       ((l)->a.counter+=(i))
-#define __local_sub(i,l)       ((l)->a.counter-=(i))
+#define __local_inc(l)         ((l)->v++)
+#define __local_dec(l)         ((l)->v++)
+#define __local_add(i,l)       ((l)->v+=(i))
+#define __local_sub(i,l)       ((l)->v-=(i))
+
+#else /* CONFIG_PPC64 */
+
+#include <asm-generic/local.h>
+
+#endif /* CONFIG_PPC64 */
 
 #endif /* _ARCH_POWERPC_LOCAL_H */
index cd2fc1cc1cc7c056255b6f49effefb1e6d0cc1e6..ffe7c71e1132e028e2bdcb7f56eb1a31cbf8687d 100644 (file)
@@ -173,11 +173,19 @@ struct machdep_calls {
        /* Called after scan and before resource survey */
        void (*pcibios_fixup_phb)(struct pci_controller *hose);
 
+       /*
+        * Called after device has been added to bus and
+        * before sysfs has been created.
+        */
+       void (*pcibios_bus_add_device)(struct pci_dev *pdev);
+
        resource_size_t (*pcibios_default_alignment)(void);
 
 #ifdef CONFIG_PCI_IOV
        void (*pcibios_fixup_sriov)(struct pci_dev *pdev);
        resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno);
+       int (*pcibios_sriov_enable)(struct pci_dev *pdev, u16 num_vfs);
+       int (*pcibios_sriov_disable)(struct pci_dev *pdev);
 #endif /* CONFIG_PCI_IOV */
 
        /* Called to shutdown machine specific hardware not already controlled
index 30922f699341bc3fdf57f51eff9921b058140697..07e3f54de9e323486f40b1601428a44d4917c288 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <asm/cputable.h>
 #include <linux/mm.h>
+#include <linux/pkeys.h>
 #include <asm/cpu_has_feature.h>
 
 /*
 static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
                unsigned long pkey)
 {
-       return (prot & PROT_SAO) ? VM_SAO : 0;
+#ifdef CONFIG_PPC_MEM_KEYS
+       return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
+#else
+       return ((prot & PROT_SAO) ? VM_SAO : 0);
+#endif
 }
 #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
 
 static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
 {
+#ifdef CONFIG_PPC_MEM_KEYS
+       return (vm_flags & VM_SAO) ?
+               __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
+               __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
+#else
        return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
+#endif
 }
 #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
 
index 5bb3dbede41ad3aaf3f07060609b621ed25f7caf..2f806e329648dca4799e51262b95d6097c2e62cd 100644 (file)
 #define MI_Kp          0x40000000      /* Should always be set */
 
 /*
- * All pages' PP exec bits are set to 000, which means Execute for Supervisor
- * and no Execute for User.
- * Then we use the APG to say whether accesses are according to Page rules,
- * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone)
- * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER
- * 0 (00) => Not User, no exec => 11 (all accesses performed as user)
- * 1 (01) => User but no exec => 11 (all accesses performed as user)
- * 2 (10) => Not User, exec => 01 (rights according to page definition)
- * 3 (11) => User, exec => 00 (all accesses performed as supervisor)
- */
-#define MI_APG_INIT    0xf4ffffff
+ * All pages' PP data bits are set to either 001 or 011 by copying _PAGE_EXEC
+ * into bit 21 in the ITLBmiss handler (bit 21 is the middle bit), which means
+ * respectively NA for All or X for Supervisor and no access for User.
+ * Then we use the APG to say whether accesses are according to Page rules or
+ * "all Supervisor" rules (Access to all)
+ * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
+ * When that bit is not set access is done iaw "all user"
+ * which means no access iaw page rules.
+ * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
+ * 0x => No access => 11 (all accesses performed as user iaw page definition)
+ * 10 => No user => 01 (all accesses performed according to page definition)
+ * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * We define all 16 groups so that all other bits of APG can take any value
+ */
+#ifdef CONFIG_SWAP
+#define MI_APG_INIT    0xf4f4f4f4
+#else
+#define MI_APG_INIT    0x44444444
+#endif
 
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MI_RPN is written, bits in
 #define MD_Kp          0x40000000      /* Should always be set */
 
 /*
- * All pages' PP data bits are set to either 000 or 011, which means
+ * All pages' PP data bits are set to either 000 or 011 or 001, which means
  * respectively RW for Supervisor and no access for User, or RO for
- * Supervisor and no access for user.
+ * Supervisor and no access for user and NA for ALL.
  * Then we use the APG to say whether accesses are according to Page rules or
  * "all Supervisor" rules (Access to all)
- * Therefore, we define 2 APG groups. lsb is _PAGE_USER
- * 0 => No user => 01 (all accesses performed according to page definition)
- * 1 => User => 00 (all accesses performed as supervisor
- *                                 according to page definition)
- */
-#define MD_APG_INIT    0x4fffffff
+ * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
+ * When that bit is not set access is done iaw "all user"
+ * which means no access iaw page rules.
+ * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
+ * 0x => No access => 11 (all accesses performed as user iaw page definition)
+ * 10 => No user => 01 (all accesses performed according to page definition)
+ * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * We define all 16 groups so that all other bits of APG can take any value
+ */
+#ifdef CONFIG_SWAP
+#define MD_APG_INIT    0xf4f4f4f4
+#else
+#define MD_APG_INIT    0x44444444
+#endif
 
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MD_RPN is written, bits in
  */
 #define SPRN_M_TW      799
 
+/* APGs */
+#define M_APG0         0x00000000
+#define M_APG1         0x00000020
+#define M_APG2         0x00000040
+#define M_APG3         0x00000060
+
 #ifndef __ASSEMBLY__
 typedef struct {
        unsigned int id;
index 6364f5c2cc3e6bbee1244a8a6dce09f968987b70..bb38312cff28ceba0ece038a4523dd22ce0910fa 100644 (file)
@@ -260,6 +260,15 @@ static inline bool early_radix_enabled(void)
 }
 #endif
 
+#ifdef CONFIG_PPC_MEM_KEYS
+extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address);
+#else
+static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
+{
+       return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
 #endif /* !__ASSEMBLY__ */
 
 /* The kernel use the constants below to index in the page sizes array.
index e2a2b8400490049143edee40316313a906ca6db7..051b3d63afe34b89b5da18bb000541a0870b0b38 100644 (file)
@@ -187,11 +187,33 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 {
 }
 
+#ifdef CONFIG_PPC_MEM_KEYS
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+                              bool execute, bool foreign);
+#else /* CONFIG_PPC_MEM_KEYS */
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
                bool write, bool execute, bool foreign)
 {
        /* by default, allow everything */
        return true;
 }
+
+#define pkey_mm_init(mm)
+#define thread_pkey_regs_save(thread)
+#define thread_pkey_regs_restore(new_thread, old_thread)
+#define thread_pkey_regs_init(thread)
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+       return 0;
+}
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+{
+       return 0x0UL;
+}
+
+#endif /* CONFIG_PPC_MEM_KEYS */
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_MMU_CONTEXT_H */
index 0e23cd4ac8aabc4bbc02aca3f674e7651c1cbf51..13e6702ec458af4b9d103cb9bbefa0d04de56493 100644 (file)
@@ -29,17 +29,17 @@ struct mpic_timer {
 
 #ifdef CONFIG_MPIC_TIMER
 struct mpic_timer *mpic_request_timer(irq_handler_t fn,  void *dev,
-               const struct timeval *time);
+               time64_t time);
 void mpic_start_timer(struct mpic_timer *handle);
 void mpic_stop_timer(struct mpic_timer *handle);
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time);
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time);
 void mpic_free_timer(struct mpic_timer *handle);
 #else
 struct mpic_timer *mpic_request_timer(irq_handler_t fn,  void *dev,
-               const struct timeval *time) { return NULL; }
+               time64_t time) { return NULL; }
 void mpic_start_timer(struct mpic_timer *handle) { }
 void mpic_stop_timer(struct mpic_timer *handle) { }
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { }
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { }
 void mpic_free_timer(struct mpic_timer *handle) { }
 #endif
 
index e97f58689ca717d756217ac43d6458a4971eced4..9c80939b4d14773e1a57949c86988620e3006aa3 100644 (file)
@@ -4,10 +4,6 @@
 
 #ifdef CONFIG_PPC_WATCHDOG
 extern void arch_touch_nmi_watchdog(void);
-extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
-                                          bool exclude_self);
-#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
-
 #else
 static inline void arch_touch_nmi_watchdog(void) {}
 #endif
index d072139ff2e53fddfaf5b41ad2f753770a965bf3..29d37bd1f3b37f645c92c6ea585b682a3a44787f 100644 (file)
@@ -61,7 +61,8 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
                                pgtable_t pte_page)
 {
-       *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT);
+       *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_USER |
+                     _PMD_PRESENT);
 }
 
 #define pmd_pgtable(pmd) pmd_page(pmd)
index cc2bfec3aa3b9b540e92c1f5008229b7351e6195..504a3c36ce5c9b311a9c8864d112792ae60fab9f 100644 (file)
@@ -282,7 +282,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
 {
        unsigned long set = pte_val(entry) &
                (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
-       unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+       unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA);
 
        pte_update(ptep, clr, set);
 }
index 6dc0180fd5c7d227b9165fa96216e12d48c66a2d..f04cb46ae8a1ce580294a5e6ce52df5022f488b1 100644 (file)
 /* Definitions for 8xx embedded chips. */
 #define _PAGE_PRESENT  0x0001  /* Page is valid */
 #define _PAGE_NO_CACHE 0x0002  /* I: cache inhibit */
-#define _PAGE_SHARED   0x0004  /* No ASID (context) compare */
-#define _PAGE_SPECIAL  0x0008  /* SW entry, forced to 0 by the TLB miss */
+#define _PAGE_PRIVILEGED       0x0004  /* No ASID (context) compare */
+#define _PAGE_HUGE     0x0008  /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
 #define _PAGE_DIRTY    0x0100  /* C: page changed */
 
 /* These 4 software bits must be masked out when the L2 entry is loaded
  * into the TLB.
  */
 #define _PAGE_GUARDED  0x0010  /* Copied to L1 G entry in DTLB */
-#define _PAGE_USER     0x0020  /* Copied to L1 APG lsb */
-#define _PAGE_EXEC     0x0040  /* Copied to L1 APG */
-#define _PAGE_WRITETHRU        0x0080  /* software: caching is write through */
-#define _PAGE_ACCESSED 0x0800  /* software: page referenced */
+#define _PAGE_SPECIAL  0x0020  /* SW entry */
+#define _PAGE_EXEC     0x0040  /* Copied to PP (bit 21) in ITLB */
+#define _PAGE_ACCESSED 0x0080  /* software: page referenced */
 
+#define _PAGE_NA       0x0200  /* Supervisor NA, User no access */
 #define _PAGE_RO       0x0600  /* Supervisor RO, User no access */
 
 #define _PMD_PRESENT   0x0001
-#define _PMD_BAD       0x0ff0
+#define _PMD_BAD       0x0fd0
 #define _PMD_PAGE_MASK 0x000c
 #define _PMD_PAGE_8M   0x000c
 #define _PMD_PAGE_512K 0x0004
+#define _PMD_USER      0x0020  /* APG 1 */
 
 /* Until my rework is finished, 8xx still needs atomic PTE updates */
 #define PTE_ATOMIC_UPDATES     1
 
-/* We need to add _PAGE_SHARED to kernel pages */
-#define _PAGE_KERNEL_RO                (_PAGE_SHARED | _PAGE_RO)
-#define _PAGE_KERNEL_ROX       (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC)
-#define _PAGE_KERNEL_RW                (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
-                                _PAGE_HWWRITE)
-#define _PAGE_KERNEL_RWX       (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
-                                _PAGE_HWWRITE | _PAGE_EXEC)
+#ifdef CONFIG_PPC_16K_PAGES
+#define _PAGE_PSIZE    _PAGE_HUGE
+#endif
 
 #endif /* __KERNEL__ */
 #endif /*  _ASM_POWERPC_NOHASH_32_PTE_8xx_H */
index 5c68f4a59f75802d3ff98b43b951480b525daee7..c56de1e8026f676a1698a20fd03b5386b4785eb5 100644 (file)
@@ -45,6 +45,29 @@ static inline int pte_present(pte_t pte)
        return pte_val(pte) & _PAGE_PRESENT;
 }
 
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+       unsigned long pteval = pte_val(pte);
+       /*
+        * A read-only access is controlled by _PAGE_USER bit.
+        * We have _PAGE_READ set for WRITE and EXECUTE
+        */
+       unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
+
+       if (write)
+               need_pte_bits |= _PAGE_WRITE;
+
+       if ((pteval & need_pte_bits) != need_pte_bits)
+               return false;
+
+       return true;
+}
+
 /* Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
  *
@@ -103,7 +126,7 @@ static inline pte_t pte_mkspecial(pte_t pte)
 
 static inline pte_t pte_mkhuge(pte_t pte)
 {
-       return pte;
+       return __pte(pte_val(pte) | _PAGE_HUGE);
 }
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -212,8 +235,10 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre
 #define pgprot_cached(prot)       (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
                                            _PAGE_COHERENT))
 
+#if _PAGE_WRITETHRU != 0
 #define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
                                            _PAGE_COHERENT | _PAGE_WRITETHRU))
+#endif
 
 #define pgprot_cached_noncoherent(prot) \
                (__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
index 2da4532ca3775ce95e4bef313d9b28d17040dd09..ccee8eb509bb75060382e017a8ea7b02e62dd71a 100644 (file)
@@ -55,6 +55,7 @@
 #define _PAGE_KERNEL_RWX       (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX)
 #define _PAGE_KERNEL_ROX       (_PAGE_BAP_SR | _PAGE_BAP_SX)
 #define _PAGE_USER             (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
+#define _PAGE_PRIVILEGED       (_PAGE_BAP_SR)
 
 #define _PAGE_HASHPTE  0
 #define _PAGE_BUSY     0
index 233c7504b1f20bf036b1131eb0c9db6dcecdafaa..24c73f5575ee36a24681e6dbf37c74cf18e842b0 100644 (file)
 #define OPAL_SET_POWER_SHIFT_RATIO             155
 #define OPAL_SENSOR_GROUP_CLEAR                        156
 #define OPAL_PCI_SET_P2P                       157
-#define OPAL_LAST                              157
+#define OPAL_NPU_SPA_SETUP                     159
+#define OPAL_NPU_SPA_CLEAR_CACHE               160
+#define OPAL_NPU_TL_SET                                161
+#define OPAL_LAST                              161
 
 /* Device tree flags */
 
index 0c545f7fc77b5e697e68d9b464c968d2166b454b..12e70fb58700b04bc28c13d0455a139a030f25ad 100644 (file)
@@ -34,6 +34,12 @@ int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
                        uint64_t bdf);
 int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
                        uint64_t lpcr);
+int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
+                       uint64_t addr, uint64_t PE_mask);
+int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
+                               uint64_t PE_handle);
+int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
+                       uint64_t rate_phys, uint32_t size);
 int64_t opal_console_write(int64_t term_number, __be64 *length,
                           const uint8_t *buffer);
 int64_t opal_console_read(int64_t term_number, __be64 *length,
index 23ac7fc0af23b6cae8f4706665102dd7e2050667..b62c31037cadefe742326c441b49ba5a0204f480 100644 (file)
@@ -159,7 +159,7 @@ struct paca_struct {
        u64 saved_r1;                   /* r1 save for RTAS calls or PM */
        u64 saved_msr;                  /* MSR saved here by enter_rtas */
        u16 trap_save;                  /* Used when bad stack is encountered */
-       u8 soft_enabled;                /* irq soft-enable flag */
+       u8 irq_soft_mask;               /* mask for irq soft masking */
        u8 irq_happened;                /* irq happened while soft-disabled */
        u8 io_sync;                     /* writel() needs spin_unlock sync */
        u8 irq_work_pending;            /* IRQ_WORK interrupt while soft-disable */
@@ -239,8 +239,7 @@ struct paca_struct {
         */
        u64 exrfi[EX_SIZE] __aligned(0x80);
        void *rfi_flush_fallback_area;
-       u64 l1d_flush_congruence;
-       u64 l1d_flush_sets;
+       u64 l1d_flush_size;
 #endif
 };
 
index 62ed83db04ae96def6417e404fd3f099e1ab81ec..94d449031b181c89a00b34360fa49203fdc69e54 100644 (file)
@@ -197,25 +197,22 @@ struct pci_dn {
        struct  iommu_table_group *table_group; /* for phb's or bridges */
 
        int     pci_ext_config_space;   /* for pci devices */
-
-       struct  pci_dev *pcidev;        /* back-pointer to the pci device */
 #ifdef CONFIG_EEH
        struct eeh_dev *edev;           /* eeh device */
 #endif
 #define IODA_INVALID_PE                0xFFFFFFFF
-#ifdef CONFIG_PPC_POWERNV
        unsigned int pe_number;
-       int     vf_index;               /* VF index in the PF */
 #ifdef CONFIG_PCI_IOV
+       int     vf_index;               /* VF index in the PF */
        u16     vfs_expanded;           /* number of VFs IOV BAR expanded */
        u16     num_vfs;                /* number of VFs enabled*/
        unsigned int *pe_num_map;       /* PE# for the first VF PE or array */
        bool    m64_single_mode;        /* Use M64 BAR in Single Mode */
 #define IODA_INVALID_M64        (-1)
-       int     (*m64_map)[PCI_SRIOV_NUM_BARS];
+       int     (*m64_map)[PCI_SRIOV_NUM_BARS]; /* Only used on powernv */
+       int     last_allow_rc;                  /* Only used on pseries */
 #endif /* CONFIG_PCI_IOV */
        int     mps;                    /* Maximum Payload Size */
-#endif
        struct list_head child_list;
        struct list_head list;
        struct resource holes[PCI_SRIOV_NUM_BARS];
index 8dc32eacc97c8acbebefeef081ccc5367c545cae..d82802ff508889365fc921d4924a7675c872d616 100644 (file)
@@ -121,6 +121,8 @@ extern int remove_phb_dynamic(struct pci_controller *phb);
 extern struct pci_dev *of_create_pci_dev(struct device_node *node,
                                        struct pci_bus *bus, int devfn);
 
+extern unsigned int pci_parse_of_flags(u32 addr0, int bridge);
+
 extern void of_scan_pci_bridge(struct pci_dev *dev);
 
 extern void of_scan_bus(struct device_node *node, struct pci_bus *bus);
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
new file mode 100644 (file)
index 0000000..0409c80
--- /dev/null
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_KEYS_H
+#define _ASM_POWERPC_KEYS_H
+
+#include <linux/jump_label.h>
+#include <asm/firmware.h>
+
+DECLARE_STATIC_KEY_TRUE(pkey_disabled);
+extern int pkeys_total; /* total pkeys as per device tree */
+extern u32 initial_allocation_mask; /* bits set for reserved keys */
+
+/*
+ * Define these here temporarily so we're not dependent on patching linux/mm.h.
+ * Once it's updated we can drop these.
+ */
+#ifndef VM_PKEY_BIT0
+# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0
+# define VM_PKEY_BIT0  VM_HIGH_ARCH_0
+# define VM_PKEY_BIT1  VM_HIGH_ARCH_1
+# define VM_PKEY_BIT2  VM_HIGH_ARCH_2
+# define VM_PKEY_BIT3  VM_HIGH_ARCH_3
+# define VM_PKEY_BIT4  VM_HIGH_ARCH_4
+#endif
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
+                           VM_PKEY_BIT3 | VM_PKEY_BIT4)
+
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE   0x4
+#define PKEY_ACCESS_MASK       (PKEY_DISABLE_ACCESS | \
+                               PKEY_DISABLE_WRITE  | \
+                               PKEY_DISABLE_EXECUTE)
+
+static inline u64 pkey_to_vmflag_bits(u16 pkey)
+{
+       return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS);
+}
+
+static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return 0x0UL;
+
+       return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) |
+               ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) |
+               ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) |
+               ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) |
+               ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL));
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return 0;
+       return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+#define arch_max_pkey() pkeys_total
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+{
+       return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) |
+               ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
+               ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) |
+               ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) |
+               ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL));
+}
+
+static inline u16 pte_to_pkey_bits(u64 pteflags)
+{
+       return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) |
+               ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) |
+               ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) |
+               ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) |
+               ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL));
+}
+
+#define pkey_alloc_mask(pkey) (0x1 << pkey)
+
+#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
+
+#define __mm_pkey_allocated(mm, pkey) {        \
+       mm_pkey_allocation_map(mm) |= pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_free(mm, pkey) {     \
+       mm_pkey_allocation_map(mm) &= ~pkey_alloc_mask(pkey);   \
+}
+
+#define __mm_pkey_is_allocated(mm, pkey)       \
+       (mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey))
+
+#define __mm_pkey_is_reserved(pkey) (initial_allocation_mask & \
+                                      pkey_alloc_mask(pkey))
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+       /* A reserved key is never considered as 'explicitly allocated' */
+       return ((pkey < arch_max_pkey()) &&
+               !__mm_pkey_is_reserved(pkey) &&
+               __mm_pkey_is_allocated(mm, pkey));
+}
+
+extern void __arch_activate_pkey(int pkey);
+extern void __arch_deactivate_pkey(int pkey);
+/*
+ * Returns a positive, 5-bit key on success, or -1 on failure.
+ * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and
+ * mm_pkey_free().
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+       /*
+        * Note: this is the one and only place we make sure that the pkey is
+        * valid as far as the hardware is concerned. The rest of the kernel
+        * trusts that only good, valid pkeys come out of here.
+        */
+       u32 all_pkeys_mask = (u32)(~(0x0));
+       int ret;
+
+       if (static_branch_likely(&pkey_disabled))
+               return -1;
+
+       /*
+        * Are we out of pkeys? We must handle this specially because ffz()
+        * behavior is undefined if there are no zeros.
+        */
+       if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+               return -1;
+
+       ret = ffz((u32)mm_pkey_allocation_map(mm));
+       __mm_pkey_allocated(mm, ret);
+
+       /*
+        * Enable the key in the hardware
+        */
+       if (ret > 0)
+               __arch_activate_pkey(ret);
+       return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return -1;
+
+       if (!mm_pkey_is_allocated(mm, pkey))
+               return -EINVAL;
+
+       /*
+        * Disable the key in the hardware
+        */
+       __arch_deactivate_pkey(pkey);
+       __mm_pkey_free(mm, pkey);
+
+       return 0;
+}
+
+/*
+ * Try to dedicate one of the protection keys to be used as an
+ * execute-only protection key.
+ */
+extern int __execute_only_pkey(struct mm_struct *mm);
+static inline int execute_only_pkey(struct mm_struct *mm)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return -1;
+
+       return __execute_only_pkey(mm);
+}
+
+extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
+                                        int prot, int pkey);
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+                                             int prot, int pkey)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return 0;
+
+       /*
+        * Is this an mprotect_pkey() call? If so, never override the value that
+        * came from the user.
+        */
+       if (pkey != -1)
+               return pkey;
+
+       return __arch_override_mprotect_pkey(vma, prot, pkey);
+}
+
+extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+                                      unsigned long init_val);
+static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+                                           unsigned long init_val)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return -EINVAL;
+       return __arch_set_user_pkey_access(tsk, pkey, init_val);
+}
+
+static inline bool arch_pkeys_enabled(void)
+{
+       return !static_branch_likely(&pkey_disabled);
+}
+
+extern void pkey_mm_init(struct mm_struct *mm);
+extern bool arch_supports_pkeys(int cap);
+extern unsigned int arch_usable_pkeys(void);
+extern void thread_pkey_regs_save(struct thread_struct *thread);
+extern void thread_pkey_regs_restore(struct thread_struct *new_thread,
+                                    struct thread_struct *old_thread);
+extern void thread_pkey_regs_init(struct thread_struct *thread);
+#endif /*_ASM_POWERPC_KEYS_H */
diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
new file mode 100644 (file)
index 0000000..f6945d3
--- /dev/null
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _ASM_PNV_OCXL_H
+#define _ASM_PNV_OCXL_H
+
+#include <linux/pci.h>
+
+#define PNV_OCXL_TL_MAX_TEMPLATE        63
+#define PNV_OCXL_TL_BITS_PER_RATE       4
+#define PNV_OCXL_TL_RATE_BUF_SIZE       ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8)
+
+extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
+                       u16 *supported);
+extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
+
+extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+                       char *rate_buf, int rate_buf_size);
+extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+                       uint64_t rate_buf_phys, int rate_buf_size);
+
+extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq);
+extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+                               void __iomem *tfc, void __iomem *pe_handle);
+extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+                               void __iomem **dar, void __iomem **tfc,
+                               void __iomem **pe_handle);
+
+extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
+                       void **platform_data);
+extern void pnv_ocxl_spa_release(void *platform_data);
+extern int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle);
+
+extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
+extern void pnv_ocxl_free_xive_irq(u32 irq);
+
+#endif /* _ASM_PNV_OCXL_H */
index ce0930d68857c64b8f7c55e85df9028293f71ebd..ab5c1588b487acc877e03ed918e73c41ad231b41 100644 (file)
 #define PPC_INST_RFCI                  0x4c000066
 #define PPC_INST_RFDI                  0x4c00004e
 #define PPC_INST_RFMCI                 0x4c00004c
+#define PPC_INST_MFSPR                 0x7c0002a6
 #define PPC_INST_MFSPR_DSCR            0x7c1102a6
 #define PPC_INST_MFSPR_DSCR_MASK       0xfc1ffffe
 #define PPC_INST_MTSPR_DSCR            0x7c1103a6
 #define __PPC_ME64(s)  __PPC_MB64(s)
 #define __PPC_BI(s)    (((s) & 0x1f) << 16)
 #define __PPC_CT(t)    (((t) & 0x0f) << 21)
+#define __PPC_SPR(r)   ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11))
 
 /*
  * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
index bdab3b74eb98e0eb145d904d406335daf7f0e3e6..01299cdc980676a405ab0303b469b318576037a9 100644 (file)
@@ -309,6 +309,11 @@ struct thread_struct {
        struct thread_vr_state ckvr_state; /* Checkpointed VR state */
        unsigned long   ckvrsave; /* Checkpointed VRSAVE */
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#ifdef CONFIG_PPC_MEM_KEYS
+       unsigned long   amr;
+       unsigned long   iamr;
+       unsigned long   uamor;
+#endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
        void*           kvm_shadow_vcpu; /* KVM internal data */
 #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
index 825bd5998701c7ff8123c9b5f1dd1d0abaaed9dd..b04c5ce8191b563b12d5c44aa90e159b589494f0 100644 (file)
@@ -80,21 +80,20 @@ extern void of_instantiate_rtc(void);
 
 extern int of_get_ibm_chip_id(struct device_node *np);
 
-/* The of_drconf_cell struct defines the layout of the LMB array
- * specified in the device tree property
- * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
- */
-struct of_drconf_cell {
-       u64     base_addr;
-       u32     drc_index;
-       u32     reserved;
-       u32     aa_index;
-       u32     flags;
+struct of_drc_info {
+       char *drc_type;
+       char *drc_name_prefix;
+       u32 drc_index_start;
+       u32 drc_name_suffix_start;
+       u32 num_sequential_elems;
+       u32 sequential_inc;
+       u32 drc_power_domain;
+       u32 last_drc_index;
 };
 
-#define DRCONF_MEM_ASSIGNED    0x00000008
-#define DRCONF_MEM_AI_INVALID  0x00000040
-#define DRCONF_MEM_RESERVED    0x00000080
+extern int of_read_drc_info_cell(struct property **prop,
+                       const __be32 **curval, struct of_drc_info *data);
+
 
 /*
  * There are two methods for telling firmware what our capabilities are.
@@ -159,6 +158,7 @@ struct of_drconf_cell {
 #define OV5_PFO_HW_842         0x1140  /* PFO Compression Accelerator */
 #define OV5_PFO_HW_ENCR                0x1120  /* PFO Encryption Accelerator */
 #define OV5_SUB_PROCESSORS     0x1501  /* 1,2,or 4 Sub-Processors supported */
+#define OV5_DRMEM_V2           0x1680  /* ibm,dynamic-reconfiguration-v2 */
 #define OV5_XIVE_SUPPORT       0x17C0  /* XIVE Exploitation Support Mask */
 #define OV5_XIVE_LEGACY                0x1700  /* XIVE legacy mode Only */
 #define OV5_XIVE_EXPLOIT       0x1740  /* XIVE exploitation mode Only */
@@ -175,6 +175,7 @@ struct of_drconf_cell {
 #define OV5_HASH_GTSE          0x1940  /* Guest Translation Shoot Down Avail */
 /* Radix Table Extensions */
 #define OV5_RADIX_GTSE         0x1A40  /* Guest Translation Shoot Down Avail */
+#define OV5_DRC_INFO           0x1640  /* Redef Prop Structures: drc-info   */
 
 /* Option Vector 6: IBM PAPR hints */
 #define OV6_LINUX              0x02    /* Linux is our OS */
index ce142ef99ba77d18392a78b2c6ebe49d867df741..c4a72c7a8c831749faa89b7be71cad1573b218c9 100644 (file)
@@ -8,9 +8,6 @@
 #ifndef _PAGE_HASHPTE
 #define _PAGE_HASHPTE  0
 #endif
-#ifndef _PAGE_SHARED
-#define _PAGE_SHARED   0
-#endif
 #ifndef _PAGE_HWWRITE
 #define _PAGE_HWWRITE  0
 #endif
 #ifndef _PAGE_PTE
 #define _PAGE_PTE 0
 #endif
+/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */
+#ifndef _PAGE_PRIVILEGED
+#define _PAGE_PRIVILEGED 0
+#else
+#ifndef _PAGE_USER
+#define _PAGE_USER 0
+#endif
+#endif
+#ifndef _PAGE_NA
+#define _PAGE_NA 0
+#endif
+#ifndef _PAGE_HUGE
+#define _PAGE_HUGE 0
+#endif
 
 #ifndef _PMD_PRESENT_MASK
 #define _PMD_PRESENT_MASK      _PMD_PRESENT
 #define _PMD_SIZE      0
 #define PMD_PAGE_SIZE(pmd)     bad_call_to_PMD_PAGE_SIZE()
 #endif
+#ifndef _PMD_USER
+#define _PMD_USER      0
+#endif
 #ifndef _PAGE_KERNEL_RO
-#define _PAGE_KERNEL_RO                (_PAGE_RO)
+#define _PAGE_KERNEL_RO                (_PAGE_PRIVILEGED | _PAGE_RO)
 #endif
 #ifndef _PAGE_KERNEL_ROX
-#define _PAGE_KERNEL_ROX       (_PAGE_EXEC | _PAGE_RO)
+#define _PAGE_KERNEL_ROX       (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC)
 #endif
 #ifndef _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RW                (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
+#define _PAGE_KERNEL_RW                (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
+                                _PAGE_HWWRITE)
 #endif
 #ifndef _PAGE_KERNEL_RWX
-#define _PAGE_KERNEL_RWX       (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX       (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
+                                _PAGE_HWWRITE | _PAGE_EXEC)
 #endif
 #ifndef _PAGE_HPTEFLAGS
 #define _PAGE_HPTEFLAGS _PAGE_HASHPTE
@@ -85,7 +101,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
  */
 static inline bool pte_user(pte_t pte)
 {
-       return (pte_val(pte) & _PAGE_USER) == _PAGE_USER;
+       return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER;
 }
 #endif /* __ASSEMBLY__ */
 
@@ -115,7 +131,8 @@ static inline bool pte_user(pte_t pte)
 /* Mask of bits returned by pte_pgprot() */
 #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
                         _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \
-                        _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \
+                        _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \
+                        _PAGE_PRIVILEGED | \
                         _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
 
 /*
@@ -142,7 +159,7 @@ static inline bool pte_user(pte_t pte)
  *
  * Note due to the way vm flags are laid out, the bits are XWR
  */
-#define PAGE_NONE      __pgprot(_PAGE_BASE)
+#define PAGE_NONE      __pgprot(_PAGE_BASE | _PAGE_NA)
 #define PAGE_SHARED    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
 #define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
                                 _PAGE_EXEC)
index b779f3ccd4126d2de2ab8303654bef44a6a2af58..e6c7eadf6bceb7092e2615fc62b9d83195624ddf 100644 (file)
                                 DSISR_BAD_EXT_CTRL)
 #define          DSISR_BAD_FAULT_64S   (DSISR_BAD_FAULT_32S    | \
                                 DSISR_ATTR_CONFLICT    | \
-                                DSISR_KEYFAULT         | \
                                 DSISR_UNSUPP_MMU       | \
                                 DSISR_PRTABLE_FAULT    | \
                                 DSISR_ICSWX_NO_CT      | \
 #define SPRN_LPID      0x13F   /* Logical Partition Identifier */
 #endif
 #define   LPID_RSVD    0x3ff           /* Reserved LPID for partn switching */
-#define        SPRN_HMER       0x150   /* Hardware m? error recovery */
-#define        SPRN_HMEER      0x151   /* Hardware m? enable error recovery */
+#define        SPRN_HMER       0x150   /* Hypervisor maintenance exception reg */
+#define   HMER_DEBUG_TRIG      (1ul << (63 - 17)) /* Debug trigger */
+#define        SPRN_HMEER      0x151   /* Hyp maintenance exception enable reg */
 #define SPRN_PCR       0x152   /* Processor compatibility register */
 #define   PCR_VEC_DIS  (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
 #define   PCR_VSX_DIS  (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
index 53a7e2955d3e9a82939a53c05211e52719f6112e..7192eece6c3e1f9a10f59b83cc90e39d7a33148a 100644 (file)
 #define DC_DFWT                0x40000000      /* Data cache is forced write through */
 #define DC_LES         0x20000000      /* Caches are little endian mode */
 
-#ifdef CONFIG_8xx_CPU6
-#define do_mtspr_cpu6(rn, rn_addr, v)  \
-       do {                                                            \
-               int _reg_cpu6 = rn_addr, _tmp_cpu6;             \
-               asm volatile("stw %0, %1;"                              \
-                            "lwz %0, %1;"                              \
-                            "mtspr " __stringify(rn) ",%2" :           \
-                            : "r" (_reg_cpu6), "m"(_tmp_cpu6),         \
-                              "r" ((unsigned long)(v))                 \
-                            : "memory");                               \
-       } while (0)
-
-#define do_mtspr(rn, v)        asm volatile("mtspr " __stringify(rn) ",%0" :   \
-                                    : "r" ((unsigned long)(v))         \
-                                    : "memory")
-#define mtspr(rn, v) \
-       do {                                                            \
-               if (rn == SPRN_IMMR)                                    \
-                       do_mtspr_cpu6(rn, 0x3d30, v);                   \
-               else if (rn == SPRN_IC_CST)                             \
-                       do_mtspr_cpu6(rn, 0x2110, v);                   \
-               else if (rn == SPRN_IC_ADR)                             \
-                       do_mtspr_cpu6(rn, 0x2310, v);                   \
-               else if (rn == SPRN_IC_DAT)                             \
-                       do_mtspr_cpu6(rn, 0x2510, v);                   \
-               else if (rn == SPRN_DC_CST)                             \
-                       do_mtspr_cpu6(rn, 0x3110, v);                   \
-               else if (rn == SPRN_DC_ADR)                             \
-                       do_mtspr_cpu6(rn, 0x3310, v);                   \
-               else if (rn == SPRN_DC_DAT)                             \
-                       do_mtspr_cpu6(rn, 0x3510, v);                   \
-               else if (rn == SPRN_MI_CTR)                             \
-                       do_mtspr_cpu6(rn, 0x2180, v);                   \
-               else if (rn == SPRN_MI_AP)                              \
-                       do_mtspr_cpu6(rn, 0x2580, v);                   \
-               else if (rn == SPRN_MI_EPN)                             \
-                       do_mtspr_cpu6(rn, 0x2780, v);                   \
-               else if (rn == SPRN_MI_TWC)                             \
-                       do_mtspr_cpu6(rn, 0x2b80, v);                   \
-               else if (rn == SPRN_MI_RPN)                             \
-                       do_mtspr_cpu6(rn, 0x2d80, v);                   \
-               else if (rn == SPRN_MI_CAM)                             \
-                       do_mtspr_cpu6(rn, 0x2190, v);                   \
-               else if (rn == SPRN_MI_RAM0)                            \
-                       do_mtspr_cpu6(rn, 0x2390, v);                   \
-               else if (rn == SPRN_MI_RAM1)                            \
-                       do_mtspr_cpu6(rn, 0x2590, v);                   \
-               else if (rn == SPRN_MD_CTR)                             \
-                       do_mtspr_cpu6(rn, 0x3180, v);                   \
-               else if (rn == SPRN_M_CASID)                            \
-                       do_mtspr_cpu6(rn, 0x3380, v);                   \
-               else if (rn == SPRN_MD_AP)                              \
-                       do_mtspr_cpu6(rn, 0x3580, v);                   \
-               else if (rn == SPRN_MD_EPN)                             \
-                       do_mtspr_cpu6(rn, 0x3780, v);                   \
-               else if (rn == SPRN_M_TWB)                              \
-                       do_mtspr_cpu6(rn, 0x3980, v);                   \
-               else if (rn == SPRN_MD_TWC)                             \
-                       do_mtspr_cpu6(rn, 0x3b80, v);                   \
-               else if (rn == SPRN_MD_RPN)                             \
-                       do_mtspr_cpu6(rn, 0x3d80, v);                   \
-               else if (rn == SPRN_M_TW)                               \
-                       do_mtspr_cpu6(rn, 0x3f80, v);                   \
-               else if (rn == SPRN_MD_CAM)                             \
-                       do_mtspr_cpu6(rn, 0x3190, v);                   \
-               else if (rn == SPRN_MD_RAM0)                            \
-                       do_mtspr_cpu6(rn, 0x3390, v);                   \
-               else if (rn == SPRN_MD_RAM1)                            \
-                       do_mtspr_cpu6(rn, 0x3590, v);                   \
-               else if (rn == SPRN_DEC)                                \
-                       do_mtspr_cpu6(rn, 0x2c00, v);                   \
-               else if (rn == SPRN_TBWL)                               \
-                       do_mtspr_cpu6(rn, 0x3880, v);                   \
-               else if (rn == SPRN_TBWU)                               \
-                       do_mtspr_cpu6(rn, 0x3a80, v);                   \
-               else if (rn == SPRN_DPDR)                               \
-                       do_mtspr_cpu6(rn, 0x2d30, v);                   \
-               else                                                    \
-                       do_mtspr(rn, v);                                \
-       } while (0)
-#endif
-
 #endif /* _ASM_POWERPC_REG_8xx_H */
index 449912f057f6372bf77e228e7cda16cf22b034f9..d61f9c96d91634c2a1cda3412fe6802671a22d2c 100644 (file)
@@ -389,3 +389,6 @@ COMPAT_SYS_SPU(preadv2)
 COMPAT_SYS_SPU(pwritev2)
 SYSCALL(kexec_file_load)
 SYSCALL(statx)
+SYSCALL(pkey_alloc)
+SYSCALL(pkey_free)
+SYSCALL(pkey_mprotect)
index 9ba11dbcaca98f88c53ee46c3bd009b22f13df01..daf1ba97a00cdaf920e3806b955c3d4b129fe0ce 100644 (file)
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls            384
+#define NR_syscalls            387
 
 #define __NR__exit __NR_exit
 
-#define __IGNORE_pkey_mprotect
-#define __IGNORE_pkey_alloc
-#define __IGNORE_pkey_free
-
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
index 1d3f2be5ae39e9044a6dd56457ee146337fd2e3b..fa4288822b681db4337ecc7461ea118d95e36f3f 100644 (file)
@@ -9,6 +9,41 @@
 #ifndef _ASM_POWERPC_XIVE_REGS_H
 #define _ASM_POWERPC_XIVE_REGS_H
 
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI     0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI      0x000 /* Load */
+#define XIVE_ESB_GET           0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00     0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01     0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10     0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11     0xf00 /* Load */
+
+#define XIVE_ESB_VAL_P         0x2
+#define XIVE_ESB_VAL_Q         0x1
+
 /*
  * Thread Management (aka "TM") registers
  */
index 371fbebf1ec9e4c43739e0d44439145b34e791d6..7624e22f5045d2305af2220265aadaea18e36deb 100644 (file)
@@ -58,6 +58,9 @@ struct xive_irq_data {
 #define XIVE_IRQ_FLAG_EOI_FW   0x10
 #define XIVE_IRQ_FLAG_H_INT_ESB        0x20
 
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_NO_EOI                0x80
+
 #define XIVE_INVALID_CHIP_ID   -1
 
 /* A queue tracking structure in a CPU */
@@ -72,41 +75,6 @@ struct xive_q {
        atomic_t                pending_count;
 };
 
-/*
- * "magic" Event State Buffer (ESB) MMIO offsets.
- *
- * Each interrupt source has a 2-bit state machine called ESB
- * which can be controlled by MMIO. It's made of 2 bits, P and
- * Q. P indicates that an interrupt is pending (has been sent
- * to a queue and is waiting for an EOI). Q indicates that the
- * interrupt has been triggered while pending.
- *
- * This acts as a coalescing mechanism in order to guarantee
- * that a given interrupt only occurs at most once in a queue.
- *
- * When doing an EOI, the Q bit will indicate if the interrupt
- * needs to be re-triggered.
- *
- * The following offsets into the ESB MMIO allow to read or
- * manipulate the PQ bits. They must be used with an 8-bytes
- * load instruction. They all return the previous state of the
- * interrupt (atomically).
- *
- * Additionally, some ESB pages support doing an EOI via a
- * store at 0 and some ESBs support doing a trigger via a
- * separate trigger page.
- */
-#define XIVE_ESB_STORE_EOI     0x400 /* Store */
-#define XIVE_ESB_LOAD_EOI      0x000 /* Load */
-#define XIVE_ESB_GET           0x800 /* Load */
-#define XIVE_ESB_SET_PQ_00     0xc00 /* Load */
-#define XIVE_ESB_SET_PQ_01     0xd00 /* Load */
-#define XIVE_ESB_SET_PQ_10     0xe00 /* Load */
-#define XIVE_ESB_SET_PQ_11     0xf00 /* Load */
-
-#define XIVE_ESB_VAL_P         0x2
-#define XIVE_ESB_VAL_Q         0x1
-
 /* Global enable flags for the XIVE support */
 extern bool __xive_enabled;
 
@@ -154,7 +122,7 @@ static inline bool xive_enabled(void) { return false; }
 static inline bool xive_spapr_init(void) { return false; }
 static inline bool xive_native_init(void) { return false; }
 static inline void xive_smp_probe(void) { }
-extern inline int  xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
+static inline int  xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
 static inline void xive_smp_setup_cpu(void) { }
 static inline void xive_smp_disable_cpu(void) { }
 static inline void xive_kexec_teardown_cpu(int secondary) { }
index 5f201d40bcca64f2e1eb48b337d84714b53fe53e..860c59291bfcd62108e1ffbe30d4a109d856645d 100644 (file)
@@ -97,6 +97,7 @@
 #define ELF_NTMSPRREG  3       /* include tfhar, tfiar, texasr */
 #define ELF_NEBB       3       /* includes ebbrr, ebbhr, bescr */
 #define ELF_NPMU       5       /* includes siar, sdar, sier, mmcr2, mmcr0 */
+#define ELF_NPKEY      3       /* includes amr, iamr, uamor */
 
 typedef unsigned long elf_greg_t64;
 typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG];
index e63bc37e33af915e9cf970f71c2e8a865f26d0aa..65065ce3281496f82668a52cbc42a6faf781caad 100644 (file)
 #define MAP_STACK      0x20000         /* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB    0x40000         /* create a huge page mapping */
 
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE   0x4
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK       (PKEY_DISABLE_ACCESS |\
+                               PKEY_DISABLE_WRITE  |\
+                               PKEY_DISABLE_EXECUTE)
 #endif /* _UAPI_ASM_POWERPC_MMAN_H */
index df8684f31919a7a7bda0629f93a9fdb3bca926df..389c36fd82990f3f6b390342f56375ac0067054a 100644 (file)
 #define __NR_pwritev2          381
 #define __NR_kexec_file_load   382
 #define __NR_statx             383
+#define __NR_pkey_alloc                384
+#define __NR_pkey_free         385
+#define __NR_pkey_mprotect     386
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
index f390d57cf2e1a711335bbd66cf7819e9dcd8442f..88b84ac76b5325791b9df17e3d9a8bf9357aba1c 100644 (file)
@@ -178,7 +178,7 @@ int main(void)
        OFFSET(PACATOC, paca_struct, kernel_toc);
        OFFSET(PACAKBASE, paca_struct, kernelbase);
        OFFSET(PACAKMSR, paca_struct, kernel_msr);
-       OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled);
+       OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
        OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
 #ifdef CONFIG_PPC_BOOK3S
        OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
@@ -239,8 +239,7 @@ int main(void)
        OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
        OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
        OFFSET(PACA_EXRFI, paca_struct, exrfi);
-       OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
-       OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+       OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size);
 
 #endif
        OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
@@ -401,6 +400,8 @@ int main(void)
        /* Other bits used by the vdso */
        DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
        DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+       DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
+       DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
        DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
        DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
 
index 679bbe714e8561b8c259f37bbfc9db8264555554..3f30c994e9316a1476086334de4fa84edba9de0e 100644 (file)
@@ -31,7 +31,6 @@ _GLOBAL(__setup_cpu_power7)
        mfspr   r3,SPRN_LPCR
        li      r4,(LPCR_LPES1 >> LPCR_LPES_SH)
        bl      __init_LPCR_ISA206
-       bl      __init_tlb_power7
        mtlr    r11
        blr
 
@@ -45,7 +44,6 @@ _GLOBAL(__restore_cpu_power7)
        mfspr   r3,SPRN_LPCR
        li      r4,(LPCR_LPES1 >> LPCR_LPES_SH)
        bl      __init_LPCR_ISA206
-       bl      __init_tlb_power7
        mtlr    r11
        blr
 
@@ -64,7 +62,6 @@ _GLOBAL(__setup_cpu_power8)
        li      r4,0 /* LPES = 0 */
        bl      __init_LPCR_ISA206
        bl      __init_HFSCR
-       bl      __init_tlb_power8
        bl      __init_PMU_HV
        bl      __init_PMU_HV_ISA207
        mtlr    r11
@@ -86,7 +83,6 @@ _GLOBAL(__restore_cpu_power8)
        li      r4,0 /* LPES = 0 */
        bl      __init_LPCR_ISA206
        bl      __init_HFSCR
-       bl      __init_tlb_power8
        bl      __init_PMU_HV
        bl      __init_PMU_HV_ISA207
        mtlr    r11
@@ -111,7 +107,6 @@ _GLOBAL(__setup_cpu_power9)
        li      r4,0 /* LPES = 0 */
        bl      __init_LPCR_ISA300
        bl      __init_HFSCR
-       bl      __init_tlb_power9
        bl      __init_PMU_HV
        mtlr    r11
        blr
@@ -136,7 +131,6 @@ _GLOBAL(__restore_cpu_power9)
        li      r4,0 /* LPES = 0 */
        bl      __init_LPCR_ISA300
        bl      __init_HFSCR
-       bl      __init_tlb_power9
        bl      __init_PMU_HV
        mtlr    r11
        blr
@@ -194,50 +188,6 @@ __init_HFSCR:
        mtspr   SPRN_HFSCR,r3
        blr
 
-/*
- * Clear the TLB using the specified IS form of tlbiel instruction
- * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
- */
-__init_tlb_power7:
-       li      r6,POWER7_TLB_SETS
-       mtctr   r6
-       li      r7,0xc00        /* IS field = 0b11 */
-       ptesync
-2:     tlbiel  r7
-       addi    r7,r7,0x1000
-       bdnz    2b
-       ptesync
-1:     blr
-
-__init_tlb_power8:
-       li      r6,POWER8_TLB_SETS
-       mtctr   r6
-       li      r7,0xc00        /* IS field = 0b11 */
-       ptesync
-2:     tlbiel  r7
-       addi    r7,r7,0x1000
-       bdnz    2b
-       ptesync
-1:     blr
-
-/*
- * Flush the TLB in hash mode. Hash must flush with RIC=2 once for process
- * and one for partition scope to clear process and partition table entries.
- */
-__init_tlb_power9:
-       li      r6,POWER9_TLB_SETS_HASH - 1
-       mtctr   r6
-       li      r7,0xc00        /* IS field = 0b11 */
-       li      r8,0
-       ptesync
-       PPC_TLBIEL(7, 8, 2, 1, 0)
-       PPC_TLBIEL(7, 8, 2, 0, 0)
-2:     addi    r7,r7,0x1000
-       PPC_TLBIEL(7, 8, 0, 0, 0)
-       bdnz    2b
-       ptesync
-1:     blr
-
 __init_PMU_HV:
        li      r5,0
        mtspr   SPRN_MMCRC,r5
index 1350f49d81a847a318a619968682d92daec01d43..c40a9fc1e5d1270e4bbe75507819e0aa6ed74abe 100644 (file)
@@ -74,9 +74,6 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power8(void);
 extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power9(void);
-extern void __flush_tlb_power7(unsigned int action);
-extern void __flush_tlb_power8(unsigned int action);
-extern void __flush_tlb_power9(unsigned int action);
 extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
@@ -368,7 +365,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_cpu_type      = "ppc64/ibm-compat-v1",
                .cpu_setup              = __setup_cpu_power7,
                .cpu_restore            = __restore_cpu_power7,
-               .flush_tlb              = __flush_tlb_power7,
                .machine_check_early    = __machine_check_early_realmode_p7,
                .platform               = "power7",
        },
@@ -386,7 +382,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_cpu_type      = "ppc64/ibm-compat-v1",
                .cpu_setup              = __setup_cpu_power8,
                .cpu_restore            = __restore_cpu_power8,
-               .flush_tlb              = __flush_tlb_power8,
                .machine_check_early    = __machine_check_early_realmode_p8,
                .platform               = "power8",
        },
@@ -404,7 +399,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_cpu_type      = "ppc64/ibm-compat-v1",
                .cpu_setup              = __setup_cpu_power9,
                .cpu_restore            = __restore_cpu_power9,
-               .flush_tlb              = __flush_tlb_power9,
                .platform               = "power9",
        },
        {       /* Power7 */
@@ -423,7 +417,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_type          = PPC_OPROFILE_POWER4,
                .cpu_setup              = __setup_cpu_power7,
                .cpu_restore            = __restore_cpu_power7,
-               .flush_tlb              = __flush_tlb_power7,
                .machine_check_early    = __machine_check_early_realmode_p7,
                .platform               = "power7",
        },
@@ -443,7 +436,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_type          = PPC_OPROFILE_POWER4,
                .cpu_setup              = __setup_cpu_power7,
                .cpu_restore            = __restore_cpu_power7,
-               .flush_tlb              = __flush_tlb_power7,
                .machine_check_early    = __machine_check_early_realmode_p7,
                .platform               = "power7+",
        },
@@ -463,7 +455,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_type          = PPC_OPROFILE_INVALID,
                .cpu_setup              = __setup_cpu_power8,
                .cpu_restore            = __restore_cpu_power8,
-               .flush_tlb              = __flush_tlb_power8,
                .machine_check_early    = __machine_check_early_realmode_p8,
                .platform               = "power8",
        },
@@ -483,7 +474,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_type          = PPC_OPROFILE_INVALID,
                .cpu_setup              = __setup_cpu_power8,
                .cpu_restore            = __restore_cpu_power8,
-               .flush_tlb              = __flush_tlb_power8,
                .machine_check_early    = __machine_check_early_realmode_p8,
                .platform               = "power8",
        },
@@ -503,7 +493,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_type          = PPC_OPROFILE_INVALID,
                .cpu_setup              = __setup_cpu_power8,
                .cpu_restore            = __restore_cpu_power8,
-               .flush_tlb              = __flush_tlb_power8,
                .machine_check_early    = __machine_check_early_realmode_p8,
                .platform               = "power8",
        },
@@ -523,7 +512,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_type          = PPC_OPROFILE_INVALID,
                .cpu_setup              = __setup_cpu_power8,
                .cpu_restore            = __restore_cpu_power8,
-               .flush_tlb              = __flush_tlb_power8,
                .machine_check_early    = __machine_check_early_realmode_p8,
                .platform               = "power8",
        },
@@ -543,7 +531,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_type          = PPC_OPROFILE_INVALID,
                .cpu_setup              = __setup_cpu_power9,
                .cpu_restore            = __restore_cpu_power9,
-               .flush_tlb              = __flush_tlb_power9,
                .machine_check_early    = __machine_check_early_realmode_p9,
                .platform               = "power9",
        },
@@ -563,7 +550,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_type          = PPC_OPROFILE_INVALID,
                .cpu_setup              = __setup_cpu_power9,
                .cpu_restore            = __restore_cpu_power9,
-               .flush_tlb              = __flush_tlb_power9,
                .machine_check_early    = __machine_check_early_realmode_p9,
                .platform               = "power9",
        },
@@ -583,7 +569,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .oprofile_type          = PPC_OPROFILE_INVALID,
                .cpu_setup              = __setup_cpu_power9,
                .cpu_restore            = __restore_cpu_power9,
-               .flush_tlb              = __flush_tlb_power9,
                .machine_check_early    = __machine_check_early_realmode_p9,
                .platform               = "power9",
        },
index cbabb5adccd958d66442f4af17d701225bc81ef8..00b215125d3ea38beda6ac8f23d95e17f23ba87a 100644 (file)
 #define REAL_MODE_TIMEOUT      10000
 
 static int time_to_dump;
+/*
+ * crash_wake_offline should be set to 1 by platforms that intend to wake
+ * up offline cpus prior to jumping to a kdump kernel. Currently powernv
+ * sets it to 1, since we want to avoid things from happening when an
+ * offline CPU wakes up due to something like an HMI (malfunction error),
+ * which propagates to all threads.
+ */
+int crash_wake_offline;
 
 #define CRASH_HANDLER_MAX 3
 /* List of shutdown handles */
@@ -63,15 +71,12 @@ static int handle_fault(struct pt_regs *regs)
 #ifdef CONFIG_SMP
 
 static atomic_t cpus_in_crash;
-static void crash_ipi_callback(struct pt_regs *regs)
+void crash_ipi_callback(struct pt_regs *regs)
 {
        static cpumask_t cpus_state_saved = CPU_MASK_NONE;
 
        int cpu = smp_processor_id();
 
-       if (!cpu_online(cpu))
-               return;
-
        hard_irq_disable();
        if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
                crash_save_cpu(regs, cpu);
@@ -109,6 +114,9 @@ static void crash_kexec_prepare_cpus(int cpu)
 
        printk(KERN_EMERG "Sending IPI to other CPUs\n");
 
+       if (crash_wake_offline)
+               ncpus = num_present_cpus() - 1;
+
        crash_send_ipi(crash_ipi_callback);
        smp_wmb();
 
index 8bdc2f96c5d6a7a29b0fd299f2e121dc6a993911..945e2c29ad2daa55211ab747a1638f591cf5c19e 100644 (file)
@@ -77,8 +77,6 @@ struct dt_cpu_feature {
  * Set up the base CPU
  */
 
-extern void __flush_tlb_power8(unsigned int action);
-extern void __flush_tlb_power9(unsigned int action);
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
 
@@ -92,27 +90,6 @@ static struct {
 
 static void (*init_pmu_registers)(void);
 
-static void cpufeatures_flush_tlb(void)
-{
-       /*
-        * This is a temporary measure to keep equivalent TLB flush as the
-        * cputable based setup code.
-        */
-       switch (PVR_VER(mfspr(SPRN_PVR))) {
-       case PVR_POWER8:
-       case PVR_POWER8E:
-       case PVR_POWER8NVL:
-               __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL);
-               break;
-       case PVR_POWER9:
-               __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL);
-               break;
-       default:
-               pr_err("unknown CPU version for boot TLB flush\n");
-               break;
-       }
-}
-
 static void __restore_cpu_cpufeatures(void)
 {
        /*
@@ -137,8 +114,6 @@ static void __restore_cpu_cpufeatures(void)
 
        if (init_pmu_registers)
                init_pmu_registers();
-
-       cpufeatures_flush_tlb();
 }
 
 static char dt_cpu_name[64];
@@ -157,7 +132,6 @@ static struct cpu_spec __initdata base_cpu_spec = {
        .oprofile_type          = PPC_OPROFILE_INVALID,
        .cpu_setup              = NULL,
        .cpu_restore            = __restore_cpu_cpufeatures,
-       .flush_tlb              = NULL,
        .machine_check_early    = NULL,
        .platform               = NULL,
 };
@@ -412,7 +386,6 @@ static void init_pmu_power8(void)
 static int __init feat_enable_mce_power8(struct dt_cpu_feature *f)
 {
        cur_cpu_spec->platform = "power8";
-       cur_cpu_spec->flush_tlb = __flush_tlb_power8;
        cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8;
 
        return 1;
@@ -451,7 +424,6 @@ static void init_pmu_power9(void)
 static int __init feat_enable_mce_power9(struct dt_cpu_feature *f)
 {
        cur_cpu_spec->platform = "power9";
-       cur_cpu_spec->flush_tlb = __flush_tlb_power9;
        cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9;
 
        return 1;
@@ -752,8 +724,6 @@ static void __init cpufeatures_setup_finished(void)
        system_registers.hfscr = mfspr(SPRN_HFSCR);
        system_registers.fscr = mfspr(SPRN_FSCR);
 
-       cpufeatures_flush_tlb();
-
        pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
                cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
 }
index cbca0a6676829fe5ac4368365ef48d7deff66e32..cc649809885e6ed2d1a2058f9cb2654c6c99d5e0 100644 (file)
@@ -740,6 +740,65 @@ static void *eeh_restore_dev_state(void *data, void *userdata)
        return NULL;
 }
 
+int eeh_restore_vf_config(struct pci_dn *pdn)
+{
+       struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+       u32 devctl, cmd, cap2, aer_capctl;
+       int old_mps;
+
+       if (edev->pcie_cap) {
+               /* Restore MPS */
+               old_mps = (ffs(pdn->mps) - 8) << 5;
+               eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+                                    2, &devctl);
+               devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
+               devctl |= old_mps;
+               eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+                                     2, devctl);
+
+               /* Disable Completion Timeout */
+               eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
+                                    4, &cap2);
+               if (cap2 & 0x10) {
+                       eeh_ops->read_config(pdn,
+                                            edev->pcie_cap + PCI_EXP_DEVCTL2,
+                                            4, &cap2);
+                       cap2 |= 0x10;
+                       eeh_ops->write_config(pdn,
+                                             edev->pcie_cap + PCI_EXP_DEVCTL2,
+                                             4, cap2);
+               }
+       }
+
+       /* Enable SERR and parity checking */
+       eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
+       cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+       eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
+
+       /* Enable report various errors */
+       if (edev->pcie_cap) {
+               eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+                                    2, &devctl);
+               devctl &= ~PCI_EXP_DEVCTL_CERE;
+               devctl |= (PCI_EXP_DEVCTL_NFERE |
+                          PCI_EXP_DEVCTL_FERE |
+                          PCI_EXP_DEVCTL_URRE);
+               eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+                                     2, devctl);
+       }
+
+       /* Enable ECRC generation and check */
+       if (edev->pcie_cap && edev->aer_cap) {
+               eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+                                    4, &aer_capctl);
+               aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+               eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+                                     4, aer_capctl);
+       }
+
+       return 0;
+}
+
 /**
  * pcibios_set_pcie_reset_state - Set PCI-E reset state
  * @dev: pci device struct
index 4f71e4c9beb7c57ca613f9ceee535ead03655b75..beea2182d754bcd2d1f79184bcef1d390525cbb3 100644 (file)
@@ -228,6 +228,7 @@ static void *eeh_report_error(void *data, void *userdata)
 
        edev->in_error = true;
        eeh_pcid_put(dev);
+       pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
        return NULL;
 }
 
@@ -381,6 +382,10 @@ static void *eeh_report_resume(void *data, void *userdata)
        driver->err_handler->resume(dev);
 
        eeh_pcid_put(dev);
+       pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+#ifdef CONFIG_PCI_IOV
+       eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+#endif
        return NULL;
 }
 
@@ -416,6 +421,7 @@ static void *eeh_report_failure(void *data, void *userdata)
        driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
 
        eeh_pcid_put(dev);
+       pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
        return NULL;
 }
 
@@ -440,7 +446,7 @@ static void *eeh_add_virt_device(void *data, void *userdata)
                        return NULL;
        }
 
-#ifdef CONFIG_PPC_POWERNV
+#ifdef CONFIG_PCI_IOV
        pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
 #endif
        return NULL;
@@ -496,7 +502,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
                (*removed)++;
 
        if (edev->physfn) {
-#ifdef CONFIG_PPC_POWERNV
+#ifdef CONFIG_PCI_IOV
                struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
                pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
index 797549289798b33bf2e867c23d3047cf342e89f1..deed906dd8f1200e55e3d3ef65d48b37ec9dfb20 100644 (file)
@@ -48,7 +48,7 @@ static ssize_t eeh_show_##_name(struct device *dev,      \
                                                              \
        return sprintf(buf, _format "\n", edev->_memb);       \
 }                                                        \
-static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
+static DEVICE_ATTR(_name, 0444, eeh_show_##_name, NULL);
 
 EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
 EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
@@ -90,6 +90,65 @@ static ssize_t eeh_pe_state_store(struct device *dev,
 
 static DEVICE_ATTR_RW(eeh_pe_state);
 
+#ifdef CONFIG_PCI_IOV
+static ssize_t eeh_notify_resume_show(struct device *dev,
+                                     struct device_attribute *attr, char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+       struct pci_dn *pdn = pci_get_pdn(pdev);
+
+       if (!edev || !edev->pe)
+               return -ENODEV;
+
+       pdn = pci_get_pdn(pdev);
+       return sprintf(buf, "%d\n", pdn->last_allow_rc);
+}
+
+static ssize_t eeh_notify_resume_store(struct device *dev,
+                                      struct device_attribute *attr,
+                                      const char *buf, size_t count)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+       if (!edev || !edev->pe || !eeh_ops->notify_resume)
+               return -ENODEV;
+
+       if (eeh_ops->notify_resume(pci_get_pdn(pdev)))
+               return -EIO;
+
+       return count;
+}
+static DEVICE_ATTR_RW(eeh_notify_resume);
+
+static int eeh_notify_resume_add(struct pci_dev *pdev)
+{
+       struct device_node *np;
+       int rc = 0;
+
+       np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+       if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+               rc = device_create_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+
+       return rc;
+}
+
+static void eeh_notify_resume_remove(struct pci_dev *pdev)
+{
+       struct device_node *np;
+
+       np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+       if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+               device_remove_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+}
+#else
+static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; }
+static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { }
+#endif /* CONFIG_PCI_IOV */
+
 void eeh_sysfs_add_device(struct pci_dev *pdev)
 {
        struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
@@ -104,6 +163,7 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
        rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
        rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
        rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
+       rc += eeh_notify_resume_add(pdev);
 
        if (rc)
                pr_warn("EEH: Unable to create sysfs entries\n");
@@ -129,6 +189,8 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
        device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
        device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
 
+       eeh_notify_resume_remove(pdev);
+
        if (edev)
                edev->mode &= ~EEH_DEV_SYSFS;
 }
index e780e1fbf6c2123d5170de951a696e53c29fdf66..eb8d01bae8c613902d53d9233028c98cd36b087b 100644 (file)
@@ -211,7 +211,7 @@ transfer_to_handler_cont:
        mflr    r9
        lwz     r11,0(r9)               /* virtual address of handler */
        lwz     r9,4(r9)                /* where to go when done */
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
        mtspr   SPRN_NRI, r0
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -301,7 +301,7 @@ stack_ovf:
        lis     r9,StackOverflow@ha
        addi    r9,r9,StackOverflow@l
        LOAD_MSR_KERNEL(r10,MSR_KERNEL)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
        mtspr   SPRN_NRI, r0
 #endif
        mtspr   SPRN_SRR0,r9
@@ -430,7 +430,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
        lwz     r7,_NIP(r1)
        lwz     r2,GPR2(r1)
        lwz     r1,GPR1(r1)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
        mtspr   SPRN_NRI, r0
 #endif
        mtspr   SPRN_SRR0,r7
@@ -727,7 +727,7 @@ fast_exception_return:
        lwz     r10,_LINK(r11)
        mtlr    r10
        REST_GPR(10, r11)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
        mtspr   SPRN_NRI, r0
 #endif
        mtspr   SPRN_SRR1,r9
@@ -978,7 +978,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
        .globl exc_exit_restart
 exc_exit_restart:
        lwz     r12,_NIP(r1)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
        mtspr   SPRN_NRI, r0
 #endif
        mtspr   SPRN_SRR0,r12
index 2748584b767da3f5d788c0be27b27662053853e4..2cb5109a7ea3d3d4959fa0d9360248218b56d688 100644 (file)
@@ -133,10 +133,9 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
         * of irq tracing is used, we additionally check that condition
         * is correct
         */
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
-       lbz     r10,PACASOFTIRQEN(r13)
-       xori    r10,r10,1
-1:     tdnei   r10,0
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+       lbz     r10,PACAIRQSOFTMASK(r13)
+1:     tdnei   r10,IRQS_ENABLED
        EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
 #endif
 
@@ -152,7 +151,7 @@ system_call:                        /* label this so stack traces look sane */
        /* We do need to set SOFTE in the stack frame or the return
         * from interrupt will be painful
         */
-       li      r10,1
+       li      r10,IRQS_ENABLED
        std     r10,SOFTE(r1)
 
        CURRENT_THREAD_INFO(r11, r1)
@@ -755,10 +754,10 @@ resume_kernel:
        beq+    restore
        /* Check that preempt_count() == 0 and interrupts are enabled */
        lwz     r8,TI_PREEMPT(r9)
-       cmpwi   cr1,r8,0
+       cmpwi   cr0,r8,0
+       bne     restore
        ld      r0,SOFTE(r1)
-       cmpdi   r0,0
-       crandc  eq,cr1*4+eq,eq
+       andi.   r0,r0,IRQS_DISABLED
        bne     restore
 
        /*
@@ -796,12 +795,12 @@ restore:
         * are about to re-enable interrupts
         */
        ld      r5,SOFTE(r1)
-       lbz     r6,PACASOFTIRQEN(r13)
-       cmpwi   cr0,r5,0
-       beq     .Lrestore_irq_off
+       lbz     r6,PACAIRQSOFTMASK(r13)
+       andi.   r5,r5,IRQS_DISABLED
+       bne     .Lrestore_irq_off
 
        /* We are enabling, were we already enabled ? Yes, just return */
-       cmpwi   cr0,r6,1
+       andi.   r6,r6,IRQS_DISABLED
        beq     cr0,.Ldo_restore
 
        /*
@@ -820,8 +819,8 @@ restore:
         */
 .Lrestore_no_replay:
        TRACE_ENABLE_INTS
-       li      r0,1
-       stb     r0,PACASOFTIRQEN(r13);
+       li      r0,IRQS_ENABLED
+       stb     r0,PACAIRQSOFTMASK(r13);
 
        /*
         * Final return path. BookE is handled in a different file
@@ -939,9 +938,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        beq     1f
        rlwinm  r7,r7,0,~PACA_IRQ_HARD_DIS
        stb     r7,PACAIRQHAPPENED(r13)
-1:     li      r0,0
-       stb     r0,PACASOFTIRQEN(r13);
-       TRACE_DISABLE_INTS
+1:
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+       /* The interrupt should not have soft enabled. */
+       lbz     r7,PACAIRQSOFTMASK(r13)
+1:     tdeqi   r7,IRQS_ENABLED
+       EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
        b       .Ldo_restore
 
        /*
@@ -979,6 +982,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        addi    r3,r1,STACK_FRAME_OVERHEAD;
        bl      do_IRQ
        b       ret_from_except
+1:     cmpwi   cr0,r3,0xf00
+       bne     1f
+       addi    r3,r1,STACK_FRAME_OVERHEAD;
+       bl      performance_monitor_exception
+       b       ret_from_except
 1:     cmpwi   cr0,r3,0xe60
        bne     1f
        addi    r3,r1,STACK_FRAME_OVERHEAD;
@@ -1055,15 +1063,15 @@ _GLOBAL(enter_rtas)
        li      r0,0
        mtcr    r0
 
-#ifdef CONFIG_BUG      
+#ifdef CONFIG_BUG
        /* There is no way it is acceptable to get here with interrupts enabled,
         * check it with the asm equivalent of WARN_ON
         */
-       lbz     r0,PACASOFTIRQEN(r13)
-1:     tdnei   r0,0
+       lbz     r0,PACAIRQSOFTMASK(r13)
+1:     tdeqi   r0,IRQS_ENABLED
        EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
 #endif
-       
+
        /* Hard-disable interrupts */
        mfmsr   r6
        rldicl  r7,r6,48,1
@@ -1107,6 +1115,17 @@ __enter_rtas:
 rtas_return_loc:
        FIXUP_ENDIAN
 
+       /*
+        * Clear RI and set SF before anything.
+        */
+       mfmsr   r6
+       li      r0,MSR_RI
+       andc    r6,r6,r0
+       sldi    r0,r0,(MSR_SF_LG - MSR_RI_LG)
+       or      r6,r6,r0
+       sync
+       mtmsrd  r6
+
        /* relocation is off at this point */
        GET_PACA(r4)
        clrldi  r4,r4,2                 /* convert to realmode address */
@@ -1115,12 +1134,6 @@ rtas_return_loc:
 0:     mflr    r3
        ld      r3,(1f-0b)(r3)          /* get &rtas_restore_regs */
 
-       mfmsr   r6
-       li      r0,MSR_RI
-       andc    r6,r6,r0
-       sync    
-       mtmsrd  r6
-        
         ld     r1,PACAR1(r4)           /* Restore our SP */
         ld     r4,PACASAVEDMSR(r4)     /* Restore our MSR */
 
index acd8ca76233e871658bdb2b948d2652d273e0357..ee832d344a5a265018d8fa98f0c38c0a94549db5 100644 (file)
@@ -139,7 +139,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
        mfspr   r10,SPRN_ESR
        SPECIAL_EXC_STORE(r10,ESR)
 
-       lbz     r10,PACASOFTIRQEN(r13)
+       lbz     r10,PACAIRQSOFTMASK(r13)
        SPECIAL_EXC_STORE(r10,SOFTE)
        ld      r10,_NIP(r1)
        SPECIAL_EXC_STORE(r10,CSRR0)
@@ -206,17 +206,17 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_MAS8,r10
 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 
-       lbz     r6,PACASOFTIRQEN(r13)
+       lbz     r6,PACAIRQSOFTMASK(r13)
        ld      r5,SOFTE(r1)
 
        /* Interrupts had better not already be enabled... */
-       twnei   r6,0
+       tweqi   r6,IRQS_ENABLED
 
-       cmpwi   cr0,r5,0
-       beq     1f
+       andi.   r6,r5,IRQS_DISABLED
+       bne     1f
 
        TRACE_ENABLE_INTS
-       stb     r5,PACASOFTIRQEN(r13)
+       stb     r5,PACAIRQSOFTMASK(r13)
 1:
        /*
         * Restore PACAIRQHAPPENED rather than setting it based on
@@ -351,9 +351,9 @@ ret_from_mc_except:
 #define PROLOG_ADDITION_NONE_MC(n)
 
 #define PROLOG_ADDITION_MASKABLE_GEN(n)                                            \
-       lbz     r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */      \
-       cmpwi   cr0,r10,0;              /* yes -> go out of line */         \
-       beq     masked_interrupt_book3e_##n
+       lbz     r10,PACAIRQSOFTMASK(r13);       /* are irqs soft-masked? */ \
+       andi.   r10,r10,IRQS_DISABLED;  /* yes -> go out of line */ \
+       bne     masked_interrupt_book3e_##n
 
 #define PROLOG_ADDITION_2REGS_GEN(n)                                       \
        std     r14,PACA_EXGEN+EX_R14(r13);                                 \
@@ -397,7 +397,7 @@ exc_##n##_common:                                                       \
        mfspr   r8,SPRN_XER;            /* save XER in stackframe */        \
        ld      r9,excf+EX_R1(r13);     /* load orig r1 back from PACA */   \
        lwz     r10,excf+EX_CR(r13);    /* load orig CR back from PACA  */  \
-       lbz     r11,PACASOFTIRQEN(r13); /* get current IRQ softe */         \
+       lbz     r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */       \
        ld      r12,exception_marker@toc(r2);                               \
        li      r0,0;                                                       \
        std     r3,GPR10(r1);           /* save r10 to stackframe */        \
index 2dc10bf646b887b51dc2dc28feeb4aac6d9fc00d..243d072a225aac1f7c7eaa69b6e5ef8cd21ce2c6 100644 (file)
@@ -718,10 +718,12 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
 hardware_interrupt_hv:
        BEGIN_FTR_SECTION
                _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
-                                           EXC_HV, SOFTEN_TEST_HV)
+                                           EXC_HV, SOFTEN_TEST_HV,
+                                           IRQS_DISABLED)
        FTR_SECTION_ELSE
                _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
-                                           EXC_STD, SOFTEN_TEST_PR)
+                                           EXC_STD, SOFTEN_TEST_PR,
+                                           IRQS_DISABLED)
        ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
 
@@ -729,9 +731,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
        .globl hardware_interrupt_relon_hv;
 hardware_interrupt_relon_hv:
        BEGIN_FTR_SECTION
-               _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV)
+               _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+                                                 EXC_HV, SOFTEN_TEST_HV,
+                                                 IRQS_DISABLED)
        FTR_SECTION_ELSE
-               _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR)
+               _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+                                                 EXC_STD, SOFTEN_TEST_PR,
+                                                 IRQS_DISABLED)
        ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
 
@@ -827,8 +833,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 
 
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
+EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
 TRAMP_KVM(PACA_EXGEN, 0x900)
 EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
 
@@ -839,8 +845,8 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x980)
 EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
 
 
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00)
+EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
 TRAMP_KVM(PACA_EXGEN, 0xa00)
 #ifdef CONFIG_PPC_DOORBELL
 EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
@@ -1052,7 +1058,7 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
  * mode.
  */
 __EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early)
-__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60)
+__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED)
 EXC_VIRT_NONE(0x4e60, 0x20)
 TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
 TRAMP_REAL_BEGIN(hmi_exception_early)
@@ -1110,8 +1116,8 @@ EXC_COMMON_BEGIN(hmi_exception_common)
 EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
         ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
 
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
+EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
 TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
 #ifdef CONFIG_PPC_DOORBELL
 EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
@@ -1120,8 +1126,8 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
 #endif
 
 
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0)
+EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
 TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
 EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
 
@@ -1132,8 +1138,8 @@ EXC_REAL_NONE(0xee0, 0x20)
 EXC_VIRT_NONE(0x4ee0, 0x20)
 
 
-EXC_REAL_OOL(performance_monitor, 0xf00, 0x20)
-EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00)
+EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
+EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
 TRAMP_KVM(PACA_EXGEN, 0xf00)
 EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
 
@@ -1345,7 +1351,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
        b       .
 #endif
 
-EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception)
+EXC_COMMON(denorm_common, 0x1500, unknown_exception)
 
 
 #ifdef CONFIG_CBE_RAS
@@ -1455,39 +1461,37 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
        std     r9,PACA_EXRFI+EX_R9(r13)
        std     r10,PACA_EXRFI+EX_R10(r13)
        std     r11,PACA_EXRFI+EX_R11(r13)
-       std     r12,PACA_EXRFI+EX_R12(r13)
-       std     r8,PACA_EXRFI+EX_R13(r13)
        mfctr   r9
        ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-       ld      r11,PACA_L1D_FLUSH_SETS(r13)
-       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
-       /*
-        * The load adresses are at staggered offsets within cachelines,
-        * which suits some pipelines better (on others it should not
-        * hurt).
-        */
-       addi    r12,r12,8
+       ld      r11,PACA_L1D_FLUSH_SIZE(r13)
+       srdi    r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
        mtctr   r11
        DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
        /* order ld/st prior to dcbt stop all streams with flushing */
        sync
-1:     li      r8,0
-       .rept   8 /* 8-way set associative */
-       ldx     r11,r10,r8
-       add     r8,r8,r12
-       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
-       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
-       .endr
-       addi    r10,r10,128 /* 128 byte cache line */
+
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+1:
+       ld      r11,(0x80 + 8)*0(r10)
+       ld      r11,(0x80 + 8)*1(r10)
+       ld      r11,(0x80 + 8)*2(r10)
+       ld      r11,(0x80 + 8)*3(r10)
+       ld      r11,(0x80 + 8)*4(r10)
+       ld      r11,(0x80 + 8)*5(r10)
+       ld      r11,(0x80 + 8)*6(r10)
+       ld      r11,(0x80 + 8)*7(r10)
+       addi    r10,r10,0x80*8
        bdnz    1b
 
        mtctr   r9
        ld      r9,PACA_EXRFI+EX_R9(r13)
        ld      r10,PACA_EXRFI+EX_R10(r13)
        ld      r11,PACA_EXRFI+EX_R11(r13)
-       ld      r12,PACA_EXRFI+EX_R12(r13)
-       ld      r8,PACA_EXRFI+EX_R13(r13)
        GET_SCRATCH0(r13);
        rfid
 
@@ -1497,39 +1501,37 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
        std     r9,PACA_EXRFI+EX_R9(r13)
        std     r10,PACA_EXRFI+EX_R10(r13)
        std     r11,PACA_EXRFI+EX_R11(r13)
-       std     r12,PACA_EXRFI+EX_R12(r13)
-       std     r8,PACA_EXRFI+EX_R13(r13)
        mfctr   r9
        ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-       ld      r11,PACA_L1D_FLUSH_SETS(r13)
-       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
-       /*
-        * The load adresses are at staggered offsets within cachelines,
-        * which suits some pipelines better (on others it should not
-        * hurt).
-        */
-       addi    r12,r12,8
+       ld      r11,PACA_L1D_FLUSH_SIZE(r13)
+       srdi    r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
        mtctr   r11
        DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
        /* order ld/st prior to dcbt stop all streams with flushing */
        sync
-1:     li      r8,0
-       .rept   8 /* 8-way set associative */
-       ldx     r11,r10,r8
-       add     r8,r8,r12
-       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
-       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
-       .endr
-       addi    r10,r10,128 /* 128 byte cache line */
+
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+1:
+       ld      r11,(0x80 + 8)*0(r10)
+       ld      r11,(0x80 + 8)*1(r10)
+       ld      r11,(0x80 + 8)*2(r10)
+       ld      r11,(0x80 + 8)*3(r10)
+       ld      r11,(0x80 + 8)*4(r10)
+       ld      r11,(0x80 + 8)*5(r10)
+       ld      r11,(0x80 + 8)*6(r10)
+       ld      r11,(0x80 + 8)*7(r10)
+       addi    r10,r10,0x80*8
        bdnz    1b
 
        mtctr   r9
        ld      r9,PACA_EXRFI+EX_R9(r13)
        ld      r10,PACA_EXRFI+EX_R10(r13)
        ld      r11,PACA_EXRFI+EX_R11(r13)
-       ld      r12,PACA_EXRFI+EX_R12(r13)
-       ld      r8,PACA_EXRFI+EX_R13(r13)
        GET_SCRATCH0(r13);
        hrfid
 
@@ -1632,7 +1634,7 @@ USE_TEXT_SECTION()
        .balign IFETCH_ALIGN_BYTES
 do_hash_page:
 #ifdef CONFIG_PPC_BOOK3S_64
-       lis     r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h
+       lis     r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
        ori     r0,r0,DSISR_BAD_FAULT_64S@l
        and.    r0,r4,r0                /* weird error? */
        bne-    handle_page_fault       /* if not, try to insert a HPTE */
@@ -1828,6 +1830,8 @@ BEGIN_FTR_SECTION
 FTR_SECTION_ELSE
        beq     hardware_interrupt_common
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
+       cmpwi   r3,0xf00
+       beq     performance_monitor_common
 BEGIN_FTR_SECTION
        cmpwi   r3,0xa00
        beq     h_doorbell_common_msgclr
index aa71a90f5222e6842686d4be40016d3927d737c6..a61151a6ea5e82f8edc5828d923d80b07863182a 100644 (file)
@@ -765,8 +765,8 @@ _GLOBAL(pmac_secondary_start)
        /* Mark interrupts soft and hard disabled (they might be enabled
         * in the PACA when doing hotplug)
         */
-       li      r0,0
-       stb     r0,PACASOFTIRQEN(r13)
+       li      r0,IRQS_DISABLED
+       stb     r0,PACAIRQSOFTMASK(r13)
        li      r0,PACA_IRQ_HARD_DIS
        stb     r0,PACAIRQHAPPENED(r13)
 
@@ -822,7 +822,8 @@ __secondary_start:
        /* Mark interrupts soft and hard disabled (they might be enabled
         * in the PACA when doing hotplug)
         */
-       stb     r7,PACASOFTIRQEN(r13)
+       li      r7,IRQS_DISABLED
+       stb     r7,PACAIRQSOFTMASK(r13)
        li      r0,PACA_IRQ_HARD_DIS
        stb     r0,PACAIRQHAPPENED(r13)
 
@@ -988,8 +989,8 @@ start_here_common:
        /* Mark interrupts soft and hard disabled (they might be enabled
         * in the PACA when doing hotplug)
         */
-       li      r0,0
-       stb     r0,PACASOFTIRQEN(r13)
+       li      r0,IRQS_DISABLED
+       stb     r0,PACAIRQSOFTMASK(r13)
        li      r0,PACA_IRQ_HARD_DIS
        stb     r0,PACAIRQHAPPENED(r13)
 
index 4fee00d414e87c78d8a09ef3e9662b7bb7234c66..d8670a37d70ccbce26914eeb87dbf6659914f317 100644 (file)
 #include <asm/fixmap.h>
 #include <asm/export.h>
 
-/* Macro to make the code more readable. */
-#ifdef CONFIG_8xx_CPU6
-#define SPRN_MI_TWC_ADDR       0x2b80
-#define SPRN_MI_RPN_ADDR       0x2d80
-#define SPRN_MD_TWC_ADDR       0x3b80
-#define SPRN_MD_RPN_ADDR       0x3d80
-
-#define MTSPR_CPU6(spr, reg, treg)     \
-       li      treg, spr##_ADDR;       \
-       stw     treg, 12(r0);           \
-       lwz     treg, 12(r0);           \
-       mtspr   spr, reg
-#else
-#define MTSPR_CPU6(spr, reg, treg)     \
-       mtspr   spr, reg
-#endif
-
 #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
 /* By simply checking Address >= 0x80000000, we know if its a kernel address */
 #define SIMPLE_KERNEL_ADDRESS          1
  * Value for the bits that have fixed value in RPN entries.
  * Also used for tagging DAR for DTLBerror.
  */
-#ifdef CONFIG_PPC_16K_PAGES
-#define RPN_PATTERN    (0x00f0 | MD_SPS16K)
-#else
 #define RPN_PATTERN    0x00f0
-#endif
 
 #define PAGE_SHIFT_512K                19
 #define PAGE_SHIFT_8M          23
@@ -134,15 +113,12 @@ turn_on_mmu:
  * task's thread_struct.
  */
 #define EXCEPTION_PROLOG       \
-       EXCEPTION_PROLOG_0;     \
+       mtspr   SPRN_SPRG_SCRATCH0, r10;        \
+       mtspr   SPRN_SPRG_SCRATCH1, r11;        \
        mfcr    r10;            \
        EXCEPTION_PROLOG_1;     \
        EXCEPTION_PROLOG_2
 
-#define EXCEPTION_PROLOG_0     \
-       mtspr   SPRN_SPRG_SCRATCH0,r10; \
-       mtspr   SPRN_SPRG_SCRATCH1,r11
-
 #define EXCEPTION_PROLOG_1     \
        mfspr   r11,SPRN_SRR1;          /* check whether user or kernel */ \
        andi.   r11,r11,MSR_PR; \
@@ -176,13 +152,6 @@ turn_on_mmu:
        SAVE_4GPRS(3, r11);     \
        SAVE_2GPRS(7, r11)
 
-/*
- * Exception exit code.
- */
-#define EXCEPTION_EPILOG_0     \
-       mfspr   r10,SPRN_SPRG_SCRATCH0; \
-       mfspr   r11,SPRN_SPRG_SCRATCH1
-
 /*
  * Note: code which follows this uses cr0.eq (set if from kernel),
  * r11, r12 (SRR0), and r9 (SRR1).
@@ -326,15 +295,10 @@ SystemCall:
 #endif
 
 InstructionTLBMiss:
-#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
-       mtspr   SPRN_SPRG_SCRATCH2, r3
-#endif
-       EXCEPTION_PROLOG_0
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
-       lis     r10, (itlb_miss_counter - PAGE_OFFSET)@ha
-       lwz     r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
-       addi    r11, r11, 1
-       stw     r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+       mtspr   SPRN_SPRG_SCRATCH0, r10
+       mtspr   SPRN_SPRG_SCRATCH1, r11
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+       mtspr   SPRN_SPRG_SCRATCH2, r12
 #endif
 
        /* If we are faulting a kernel address, we have to use the
@@ -345,7 +309,7 @@ InstructionTLBMiss:
        /* Only modules will cause ITLB Misses as we always
         * pin the first 8MB of kernel memory */
 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
-       mfcr    r3
+       mfcr    r12
 #endif
 #ifdef ITLB_MISS_KERNEL
 #if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
@@ -388,40 +352,46 @@ _ENTRY(ITLBMiss_cmp)
        lwz     r10, 0(r10)     /* Get the pte */
 4:
 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
-       mtcr    r3
+       mtcr    r12
 #endif
-       /* Insert the APG into the TWC from the Linux PTE. */
-       rlwimi  r11, r10, 0, 25, 26
-       /* Load the MI_TWC with the attributes for this "segment." */
-       MTSPR_CPU6(SPRN_MI_TWC, r11, r3)        /* Set segment attributes */
 
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
-       rlwimi  r10, r11, 1, MI_SPS16K
-#endif
 #ifdef CONFIG_SWAP
-       rlwinm  r11, r10, 32-5, _PAGE_PRESENT
-       and     r11, r11, r10
-       rlwimi  r10, r11, 0, _PAGE_PRESENT
+       rlwinm  r11, r10, 31, _PAGE_ACCESSED >> 1
 #endif
-       li      r11, RPN_PATTERN
+       /* Load the MI_TWC with the attributes for this "segment." */
+       mtspr   SPRN_MI_TWC, r11        /* Set segment attributes */
+
+       li      r11, RPN_PATTERN | 0x200
        /* The Linux PTE won't go exactly into the MMU TLB.
-        * Software indicator bits 20-23 and 28 must be clear.
-        * Software indicator bits 24, 25, 26, and 27 must be
+        * Software indicator bits 20 and 23 must be clear.
+        * Software indicator bits 22, 24, 25, 26, and 27 must be
         * set.  All other Linux PTE bits control the behavior
         * of the MMU.
         */
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
-       rlwimi  r10, r11, 0, 0x0ff0     /* Set 24-27, clear 20-23 */
-#else
-       rlwimi  r10, r11, 0, 0x0ff8     /* Set 24-27, clear 20-23,28 */
-#endif
-       MTSPR_CPU6(SPRN_MI_RPN, r10, r3)        /* Update TLB entry */
+       rlwimi  r11, r10, 4, 0x0400     /* Copy _PAGE_EXEC into bit 21 */
+       rlwimi  r10, r11, 0, 0x0ff0     /* Set 22, 24-27, clear 20,23 */
+       mtspr   SPRN_MI_RPN, r10        /* Update TLB entry */
 
        /* Restore registers */
-#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
-       mfspr   r3, SPRN_SPRG_SCRATCH2
+_ENTRY(itlb_miss_exit_1)
+       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH1
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+       mfspr   r12, SPRN_SPRG_SCRATCH2
+#endif
+       rfi
+#ifdef CONFIG_PERF_EVENTS
+_ENTRY(itlb_miss_perf)
+       lis     r10, (itlb_miss_counter - PAGE_OFFSET)@ha
+       lwz     r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+       addi    r11, r11, 1
+       stw     r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
+       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH1
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+       mfspr   r12, SPRN_SPRG_SCRATCH2
 #endif
-       EXCEPTION_EPILOG_0
        rfi
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -436,7 +406,6 @@ _ENTRY(ITLBMiss_cmp)
        rlwinm  r10, r11, 0, ~HUGEPD_SHIFT_MASK
 #endif
        lwz     r10, 0(r10)     /* Get the pte */
-       rlwinm  r11, r11, 0, 0xf
        b       4b
 
 20:    /* 512k pages */
@@ -445,21 +414,15 @@ _ENTRY(ITLBMiss_cmp)
        /* Add level 2 base */
        rlwimi  r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
        lwz     r10, 0(r10)     /* Get the pte */
-       rlwinm  r11, r11, 0, 0xf
        b       4b
 #endif
 
        . = 0x1200
 DataStoreTLBMiss:
-       mtspr   SPRN_SPRG_SCRATCH2, r3
-       EXCEPTION_PROLOG_0
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
-       lis     r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
-       lwz     r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
-       addi    r11, r11, 1
-       stw     r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
-#endif
-       mfcr    r3
+       mtspr   SPRN_SPRG_SCRATCH0, r10
+       mtspr   SPRN_SPRG_SCRATCH1, r11
+       mtspr   SPRN_SPRG_SCRATCH2, r12
+       mfcr    r12
 
        /* If we are faulting a kernel address, we have to use the
         * kernel page tables.
@@ -499,59 +462,49 @@ _ENTRY(DTLBMiss_jmp)
        rlwimi  r10, r11, 0, 0, 32 - PAGE_SHIFT - 1     /* Add level 2 base */
        lwz     r10, 0(r10)     /* Get the pte */
 4:
-       mtcr    r3
+       mtcr    r12
 
-       /* Insert the Guarded flag and APG into the TWC from the Linux PTE.
-        * It is bit 26-27 of both the Linux PTE and the TWC (at least
+       /* Insert the Guarded flag into the TWC from the Linux PTE.
+        * It is bit 27 of both the Linux PTE and the TWC (at least
         * I got that right :-).  It will be better when we can put
         * this into the Linux pgd/pmd and load it in the operation
         * above.
         */
-       rlwimi  r11, r10, 0, 26, 27
-       /* Insert the WriteThru flag into the TWC from the Linux PTE.
-        * It is bit 25 in the Linux PTE and bit 30 in the TWC
-        */
-       rlwimi  r11, r10, 32-5, 30, 30
-       MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
-
-       /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29)
-        * In 16k pages mode, SPS is always 1 */
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
-       rlwimi  r10, r11, 1, MD_SPS16K
-#endif
-       /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
-        * We also need to know if the insn is a load/store, so:
-        * Clear _PAGE_PRESENT and load that which will
-        * trap into DTLB Error with store bit set accordinly.
-        */
-       /* PRESENT=0x1, ACCESSED=0x20
-        * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
-        * r10 = (r10 & ~PRESENT) | r11;
-        */
+       rlwimi  r11, r10, 0, _PAGE_GUARDED
 #ifdef CONFIG_SWAP
-       rlwinm  r11, r10, 32-5, _PAGE_PRESENT
-       and     r11, r11, r10
-       rlwimi  r10, r11, 0, _PAGE_PRESENT
+       /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0
+        * on that bit will represent a Non Access group
+        */
+       rlwinm  r11, r10, 31, _PAGE_ACCESSED >> 1
 #endif
+       mtspr   SPRN_MD_TWC, r11
+
        /* The Linux PTE won't go exactly into the MMU TLB.
-        * Software indicator bits 22 and 28 must be clear.
         * Software indicator bits 24, 25, 26, and 27 must be
         * set.  All other Linux PTE bits control the behavior
         * of the MMU.
         */
        li      r11, RPN_PATTERN
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
        rlwimi  r10, r11, 0, 24, 27     /* Set 24-27 */
-#else
-       rlwimi  r10, r11, 0, 24, 28     /* Set 24-27, clear 28 */
-#endif
-       rlwimi  r10, r11, 0, 20, 20     /* clear 20 */
-       MTSPR_CPU6(SPRN_MD_RPN, r10, r3)        /* Update TLB entry */
+       mtspr   SPRN_MD_RPN, r10        /* Update TLB entry */
 
        /* Restore registers */
-       mfspr   r3, SPRN_SPRG_SCRATCH2
        mtspr   SPRN_DAR, r11   /* Tag DAR */
-       EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_1)
+       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH1
+       mfspr   r12, SPRN_SPRG_SCRATCH2
+       rfi
+#ifdef CONFIG_PERF_EVENTS
+_ENTRY(dtlb_miss_perf)
+       lis     r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
+       lwz     r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+       addi    r11, r11, 1
+       stw     r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
+       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH1
+       mfspr   r12, SPRN_SPRG_SCRATCH2
        rfi
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -566,7 +519,6 @@ _ENTRY(DTLBMiss_jmp)
        rlwinm  r10, r11, 0, ~HUGEPD_SHIFT_MASK
 #endif
        lwz     r10, 0(r10)     /* Get the pte */
-       rlwinm  r11, r11, 0, 0xf
        b       4b
 
 20:    /* 512k pages */
@@ -575,7 +527,6 @@ _ENTRY(DTLBMiss_jmp)
        /* Add level 2 base */
        rlwimi  r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
        lwz     r10, 0(r10)     /* Get the pte */
-       rlwinm  r11, r11, 0, 0xf
        b       4b
 #endif
 
@@ -601,7 +552,8 @@ itlbie:
  */
        . = 0x1400
 DataTLBError:
-       EXCEPTION_PROLOG_0
+       mtspr   SPRN_SPRG_SCRATCH0, r10
+       mtspr   SPRN_SPRG_SCRATCH1, r11
        mfcr    r10
 
        mfspr   r11, SPRN_DAR
@@ -636,7 +588,8 @@ dtlbie:
  */
        . = 0x1c00
 DataBreakpoint:
-       EXCEPTION_PROLOG_0
+       mtspr   SPRN_SPRG_SCRATCH0, r10
+       mtspr   SPRN_SPRG_SCRATCH1, r11
        mfcr    r10
        mfspr   r11, SPRN_SRR0
        cmplwi  cr0, r11, (dtlbie - PAGE_OFFSET)@l
@@ -652,13 +605,15 @@ DataBreakpoint:
        EXC_XFER_EE(0x1c00, do_break)
 11:
        mtcr    r10
-       EXCEPTION_EPILOG_0
+       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH1
        rfi
 
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
        . = 0x1d00
 InstructionBreakpoint:
-       EXCEPTION_PROLOG_0
+       mtspr   SPRN_SPRG_SCRATCH0, r10
+       mtspr   SPRN_SPRG_SCRATCH1, r11
        lis     r10, (instruction_counter - PAGE_OFFSET)@ha
        lwz     r11, (instruction_counter - PAGE_OFFSET)@l(r10)
        addi    r11, r11, -1
@@ -666,7 +621,8 @@ InstructionBreakpoint:
        lis     r10, 0xffff
        ori     r10, r10, 0x01
        mtspr   SPRN_COUNTA, r10
-       EXCEPTION_EPILOG_0
+       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH1
        rfi
 #else
        EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
@@ -681,51 +637,57 @@ InstructionBreakpoint:
  * not enough space in the DataStoreTLBMiss area.
  */
 DTLBMissIMMR:
-       mtcr    r3
-       /* Set 512k byte guarded page and mark it valid */
-       li      r10, MD_PS512K | MD_GUARDED | MD_SVALID
-       MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
+       mtcr    r12
+       /* Set 512k byte guarded page and mark it valid and accessed */
+       li      r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2
+       mtspr   SPRN_MD_TWC, r10
        mfspr   r10, SPRN_IMMR                  /* Get current IMMR */
        rlwinm  r10, r10, 0, 0xfff80000         /* Get 512 kbytes boundary */
-       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
                          _PAGE_PRESENT | _PAGE_NO_CACHE
-       MTSPR_CPU6(SPRN_MD_RPN, r10, r11)       /* Update TLB entry */
+       mtspr   SPRN_MD_RPN, r10        /* Update TLB entry */
 
        li      r11, RPN_PATTERN
        mtspr   SPRN_DAR, r11   /* Tag DAR */
-       mfspr   r3, SPRN_SPRG_SCRATCH2
-       EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_2)
+       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH1
+       mfspr   r12, SPRN_SPRG_SCRATCH2
        rfi
 
 DTLBMissLinear:
-       mtcr    r3
-       /* Set 8M byte page and mark it valid */
-       li      r11, MD_PS8MEG | MD_SVALID
-       MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
+       mtcr    r12
+       /* Set 8M byte page and mark it valid and accessed */
+       li      r11, MD_PS8MEG | MD_SVALID | M_APG2
+       mtspr   SPRN_MD_TWC, r11
        rlwinm  r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
-       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
                          _PAGE_PRESENT
-       MTSPR_CPU6(SPRN_MD_RPN, r10, r11)       /* Update TLB entry */
+       mtspr   SPRN_MD_RPN, r10        /* Update TLB entry */
 
        li      r11, RPN_PATTERN
        mtspr   SPRN_DAR, r11   /* Tag DAR */
-       mfspr   r3, SPRN_SPRG_SCRATCH2
-       EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_3)
+       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH1
+       mfspr   r12, SPRN_SPRG_SCRATCH2
        rfi
 
 #ifndef CONFIG_PIN_TLB_TEXT
 ITLBMissLinear:
-       mtcr    r3
-       /* Set 8M byte page and mark it valid */
-       li      r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC
-       MTSPR_CPU6(SPRN_MI_TWC, r11, r3)
+       mtcr    r12
+       /* Set 8M byte page and mark it valid,accessed */
+       li      r11, MI_PS8MEG | MI_SVALID | M_APG2
+       mtspr   SPRN_MI_TWC, r11
        rlwinm  r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
-       ori     r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+       ori     r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
                          _PAGE_PRESENT
-       MTSPR_CPU6(SPRN_MI_RPN, r10, r11)       /* Update TLB entry */
+       mtspr   SPRN_MI_RPN, r10        /* Update TLB entry */
 
-       mfspr   r3, SPRN_SPRG_SCRATCH2
-       EXCEPTION_EPILOG_0
+_ENTRY(itlb_miss_exit_2)
+       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH1
+       mfspr   r12, SPRN_SPRG_SCRATCH2
        rfi
 #endif
 
@@ -933,13 +895,6 @@ start_here:
         */
        lis     r6, swapper_pg_dir@ha
        tophys(r6,r6)
-#ifdef CONFIG_8xx_CPU6
-       lis     r4, cpu6_errata_word@h
-       ori     r4, r4, cpu6_errata_word@l
-       li      r3, 0x3f80
-       stw     r3, 12(r4)
-       lwz     r3, 12(r4)
-#endif
        mtspr   SPRN_M_TW, r6
        lis     r4,2f@h
        ori     r4,r4,2f@l
@@ -1004,8 +959,8 @@ initial_mmu:
        lis     r8, KERNELBASE@h        /* Create vaddr for TLB */
        ori     r8, r8, MI_EVALID       /* Mark it valid */
        mtspr   SPRN_MI_EPN, r8
-       li      r8, MI_PS8MEG | (2 << 5)        /* Set 8M byte page, APG 2 */
-       ori     r8, r8, MI_SVALID       /* Make it valid */
+       li      r8, MI_PS8MEG /* Set 8M byte page */
+       ori     r8, r8, MI_SVALID | M_APG2      /* Make it valid, APG 2 */
        mtspr   SPRN_MI_TWC, r8
        li      r8, MI_BOOTINIT         /* Create RPN for address 0 */
        mtspr   SPRN_MI_RPN, r8         /* Store TLB entry */
@@ -1032,7 +987,7 @@ initial_mmu:
        ori     r8, r8, MD_EVALID       /* Mark it valid */
        mtspr   SPRN_MD_EPN, r8
        li      r8, MD_PS512K | MD_GUARDED      /* Set 512k byte page */
-       ori     r8, r8, MD_SVALID       /* Make it valid */
+       ori     r8, r8, MD_SVALID | M_APG2      /* Make it valid and accessed */
        mtspr   SPRN_MD_TWC, r8
        mr      r8, r9                  /* Create paddr for TLB */
        ori     r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
@@ -1061,7 +1016,7 @@ initial_mmu:
 #endif
        /* Disable debug mode entry on breakpoints */
        mfspr   r8, SPRN_DER
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
        rlwinm  r8, r8, 0, ~0xc
 #else
        rlwinm  r8, r8, 0, ~0x8
@@ -1094,13 +1049,7 @@ swapper_pg_dir:
 abatron_pteptrs:
        .space  8
 
-#ifdef CONFIG_8xx_CPU6
-       .globl  cpu6_errata_word
-cpu6_errata_word:
-       .space  16
-#endif
-
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
        .globl  itlb_miss_counter
 itlb_miss_counter:
        .space  4
index 48c21acef915883aa08205e2d9388cef67f91b99..2b269315d37701a3cf3c11e654da681c5ec3c0a5 100644 (file)
@@ -17,6 +17,7 @@
 #include <asm/processor.h>
 #include <asm/thread_info.h>
 #include <asm/epapr_hcalls.h>
+#include <asm/hw_irq.h>
 
 /* 64-bit version only for now */
 #ifdef CONFIG_PPC64
@@ -46,8 +47,8 @@ _GLOBAL(\name)
        bl      trace_hardirqs_on
        addi    r1,r1,128
 #endif
-       li      r0,1
-       stb     r0,PACASOFTIRQEN(r13)
+       li      r0,IRQS_ENABLED
+       stb     r0,PACAIRQSOFTMASK(r13)
        
        /* Interrupts will make use return to LR, so get something we want
         * in there
index f57a19348bddb4ec7f7bbf0f2bb2ff50aa292c97..08faa93755f9f912bd9673be5bfa9ea363880aa4 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/irqflags.h>
+#include <asm/hw_irq.h>
 
 #undef DEBUG
 
@@ -53,8 +54,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
        mfmsr   r7
 #endif /* CONFIG_TRACE_IRQFLAGS */
 
-       li      r0,1
-       stb     r0,PACASOFTIRQEN(r13)   /* we'll hard-enable shortly */
+       li      r0,IRQS_ENABLED
+       stb     r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */
 BEGIN_FTR_SECTION
        DSSALL
        sync
index b7a84522e652cc3d151d19f13d9526f139a7c8ed..f880388477908d60bf314318cc090e12157729c8 100644 (file)
@@ -67,6 +67,7 @@
 #include <asm/smp.h>
 #include <asm/livepatch.h>
 #include <asm/asm-prototypes.h>
+#include <asm/hw_irq.h>
 
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
@@ -106,12 +107,6 @@ static inline notrace unsigned long get_irq_happened(void)
        return happened;
 }
 
-static inline notrace void set_soft_enabled(unsigned long enable)
-{
-       __asm__ __volatile__("stb %0,%1(13)"
-       : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
-}
-
 static inline notrace int decrementer_check_overflow(void)
 {
        u64 now = get_tb_or_rtc();
@@ -191,6 +186,11 @@ notrace unsigned int __check_irq_replay(void)
                return 0x900;
        }
 
+       if (happened & PACA_IRQ_PMI) {
+               local_paca->irq_happened &= ~PACA_IRQ_PMI;
+               return 0xf00;
+       }
+
        if (happened & PACA_IRQ_EE) {
                local_paca->irq_happened &= ~PACA_IRQ_EE;
                return 0x500;
@@ -224,15 +224,16 @@ notrace unsigned int __check_irq_replay(void)
        return 0;
 }
 
-notrace void arch_local_irq_restore(unsigned long en)
+notrace void arch_local_irq_restore(unsigned long mask)
 {
        unsigned char irq_happened;
        unsigned int replay;
 
        /* Write the new soft-enabled value */
-       set_soft_enabled(en);
-       if (!en)
+       irq_soft_mask_set(mask);
+       if (mask)
                return;
+
        /*
         * From this point onward, we can take interrupts, preempt,
         * etc... unless we got hard-disabled. We check if an event
@@ -263,7 +264,7 @@ notrace void arch_local_irq_restore(unsigned long en)
         */
        if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
                __hard_irq_disable();
-#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
        else {
                /*
                 * We should already be hard disabled here. We had bugs
@@ -274,9 +275,9 @@ notrace void arch_local_irq_restore(unsigned long en)
                if (WARN_ON(mfmsr() & MSR_EE))
                        __hard_irq_disable();
        }
-#endif /* CONFIG_TRACE_IRQFLAGS */
+#endif
 
-       set_soft_enabled(0);
+       irq_soft_mask_set(IRQS_ALL_DISABLED);
        trace_hardirqs_off();
 
        /*
@@ -288,7 +289,7 @@ notrace void arch_local_irq_restore(unsigned long en)
 
        /* We can soft-enable now */
        trace_hardirqs_on();
-       set_soft_enabled(1);
+       irq_soft_mask_set(IRQS_ENABLED);
 
        /*
         * And replay if we have to. This will return with interrupts
@@ -363,7 +364,7 @@ bool prep_irq_for_idle(void)
         * of entering the low power state.
         */
        local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-       local_paca->soft_enabled = 1;
+       irq_soft_mask_set(IRQS_ENABLED);
 
        /* Tell the caller to enter the low power state */
        return true;
index 71e8a1b8c86ed851800098a6258d8a55fd5b3e8c..efdd16a79075f699ecf866f4dfc22abd794699fa 100644 (file)
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
        return handled;
 }
 
-long hmi_exception_realmode(struct pt_regs *regs)
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+       DTRIG_UNKNOWN,
+       DTRIG_VECTOR_CI,        /* need to emulate vector CI load instr */
+       DTRIG_SUSPEND_ESCAPE,   /* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
 {
-       __this_cpu_inc(irq_stat.hmi_exceptions);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-       /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
-       if (pvr_version_is(PVR_POWER9)) {
-               unsigned long hmer = mfspr(SPRN_HMER);
-
-               /* Do we have the debug bit set */
-               if (hmer & PPC_BIT(17)) {
-                       hmer &= ~PPC_BIT(17);
-                       mtspr(SPRN_HMER, hmer);
-
-                       /*
-                        * Now to avoid problems with soft-disable we
-                        * only do the emulation if we are coming from
-                        * user space
-                        */
-                       if (user_mode(regs))
-                               local_paca->hmi_p9_special_emu = 1;
-
-                       /*
-                        * Don't bother going to OPAL if that's the
-                        * only relevant bit.
-                        */
-                       if (!(hmer & mfspr(SPRN_HMEER)))
-                               return local_paca->hmi_p9_special_emu;
+       int pvr;
+       struct device_node *cpun;
+       struct property *prop = NULL;
+       const char *str;
+
+       /* First look in the device tree */
+       preempt_disable();
+       cpun = of_get_cpu_node(smp_processor_id(), NULL);
+       if (cpun) {
+               of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+                                           prop, str) {
+                       if (strcmp(str, "bit17-vector-ci-load") == 0)
+                               hmer_debug_trig_function = DTRIG_VECTOR_CI;
+                       else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+                               hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
                }
+               of_node_put(cpun);
+       }
+       preempt_enable();
+
+       /* If we found the property, don't look at PVR */
+       if (prop)
+               goto out;
+
+       pvr = mfspr(SPRN_PVR);
+       /* Check for POWER9 Nimbus (scale-out) */
+       if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+               /* DD2.2 and later */
+               if ((pvr & 0xfff) >= 0x202)
+                       hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+               /* DD2.0 and DD2.1 - used for vector CI load emulation */
+               else if ((pvr & 0xfff) >= 0x200)
+                       hmer_debug_trig_function = DTRIG_VECTOR_CI;
+       }
+
+ out:
+       switch (hmer_debug_trig_function) {
+       case DTRIG_VECTOR_CI:
+               pr_debug("HMI debug trigger used for vector CI load\n");
+               break;
+       case DTRIG_SUSPEND_ESCAPE:
+               pr_debug("HMI debug trigger used for TM suspend escape\n");
+               break;
+       default:
+               break;
        }
-#endif /* CONFIG_PPC_BOOK3S_64 */
+       return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ *  0 means no further handling is required
+ *  1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+       unsigned long hmer = mfspr(SPRN_HMER);
+       long ret = 0;
+
+       /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+       if (!((hmer & HMER_DEBUG_TRIG)
+             && hmer_debug_trig_function != DTRIG_UNKNOWN))
+               return -1;
+               
+       hmer &= ~HMER_DEBUG_TRIG;
+       /* HMER is a write-AND register */
+       mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+       switch (hmer_debug_trig_function) {
+       case DTRIG_VECTOR_CI:
+               /*
+                * Now to avoid problems with soft-disable we
+                * only do the emulation if we are coming from
+                * host user space
+                */
+               if (regs && user_mode(regs))
+                       ret = local_paca->hmi_p9_special_emu = 1;
+
+               break;
+
+       default:
+               break;
+       }
+
+       /*
+        * See if any other HMI causes remain to be handled
+        */
+       if (hmer & mfspr(SPRN_HMEER))
+               return -1;
+
+       return ret;
+}
+
+/*
+ * Return values:
+ */
+long hmi_exception_realmode(struct pt_regs *regs)
+{      
+       int ret;
+
+       __this_cpu_inc(irq_stat.hmi_exceptions);
+
+       ret = hmi_handle_debugtrig(regs);
+       if (ret >= 0)
+               return ret;
 
        wait_for_subcore_guest_exit();
 
index 644f7040b91c9c2f080ebe6236a9a9892e04ea16..fe6fc63251fec70e7cf2dee7f6deab47fe8ab256 100644 (file)
@@ -58,115 +58,6 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
        return pte_pfn(*ptep);
 }
 
-static void flush_tlb_206(unsigned int num_sets, unsigned int action)
-{
-       unsigned long rb;
-       unsigned int i;
-
-       switch (action) {
-       case TLB_INVAL_SCOPE_GLOBAL:
-               rb = TLBIEL_INVAL_SET;
-               break;
-       case TLB_INVAL_SCOPE_LPID:
-               rb = TLBIEL_INVAL_SET_LPID;
-               break;
-       default:
-               BUG();
-               break;
-       }
-
-       asm volatile("ptesync" : : : "memory");
-       for (i = 0; i < num_sets; i++) {
-               asm volatile("tlbiel %0" : : "r" (rb));
-               rb += 1 << TLBIEL_INVAL_SET_SHIFT;
-       }
-       asm volatile("ptesync" : : : "memory");
-}
-
-static void flush_tlb_300(unsigned int num_sets, unsigned int action)
-{
-       unsigned long rb;
-       unsigned int i;
-       unsigned int r;
-
-       switch (action) {
-       case TLB_INVAL_SCOPE_GLOBAL:
-               rb = TLBIEL_INVAL_SET;
-               break;
-       case TLB_INVAL_SCOPE_LPID:
-               rb = TLBIEL_INVAL_SET_LPID;
-               break;
-       default:
-               BUG();
-               break;
-       }
-
-       asm volatile("ptesync" : : : "memory");
-
-       if (early_radix_enabled())
-               r = 1;
-       else
-               r = 0;
-
-       /*
-        * First flush table/PWC caches with set 0, then flush the
-        * rest of the sets, partition scope. Radix must then do it
-        * all again with process scope. Hash just has to flush
-        * process table.
-        */
-       asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
-                       "r"(rb), "r"(0), "i"(2), "i"(0), "r"(r));
-       for (i = 1; i < num_sets; i++) {
-               unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
-
-               asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
-                               "r"(rb+set), "r"(0), "i"(2), "i"(0), "r"(r));
-       }
-
-       asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
-                       "r"(rb), "r"(0), "i"(2), "i"(1), "r"(r));
-       if (early_radix_enabled()) {
-               for (i = 1; i < num_sets; i++) {
-                       unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
-
-                       asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
-                               "r"(rb+set), "r"(0), "i"(2), "i"(1), "r"(r));
-               }
-       }
-
-       asm volatile("ptesync" : : : "memory");
-}
-
-/*
- * Generic routines to flush TLB on POWER processors. These routines
- * are used as flush_tlb hook in the cpu_spec.
- *
- * action => TLB_INVAL_SCOPE_GLOBAL:  Invalidate all TLBs.
- *          TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
- */
-void __flush_tlb_power7(unsigned int action)
-{
-       flush_tlb_206(POWER7_TLB_SETS, action);
-}
-
-void __flush_tlb_power8(unsigned int action)
-{
-       flush_tlb_206(POWER8_TLB_SETS, action);
-}
-
-void __flush_tlb_power9(unsigned int action)
-{
-       unsigned int num_sets;
-
-       if (early_radix_enabled())
-               num_sets = POWER9_TLB_SETS_RADIX;
-       else
-               num_sets = POWER9_TLB_SETS_HASH;
-
-       flush_tlb_300(num_sets, action);
-}
-
-
 /* flush SLBs and reload */
 #ifdef CONFIG_PPC_BOOK3S_64
 static void flush_and_reload_slb(void)
@@ -226,10 +117,8 @@ static int mce_flush(int what)
                return 1;
        }
        if (what == MCE_FLUSH_TLB) {
-               if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
-                       cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
-                       return 1;
-               }
+               tlbiel_all();
+               return 1;
        }
 
        return 0;
diff --git a/arch/powerpc/kernel/module.lds b/arch/powerpc/kernel/module.lds
new file mode 100644 (file)
index 0000000..cea5dc1
--- /dev/null
@@ -0,0 +1,8 @@
+/* Force alignment of .toc section.  */
+SECTIONS
+{
+       .toc 0 : ALIGN(256)
+       {
+               *(.got .toc)
+       }
+}
index 218971ac7e04c72499aa3dbb020fa490a091463d..a2636c250b7be63e08a9e8ff57140d1e83467c85 100644 (file)
@@ -348,8 +348,11 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
                char *p;
                if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
                        me->arch.stubs_section = i;
-               else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+               else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {
                        me->arch.toc_section = i;
+                       if (sechdrs[i].sh_addralign < 8)
+                               sechdrs[i].sh_addralign = 8;
+               }
                else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
                        dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
                                          sechdrs[i].sh_size);
@@ -387,12 +390,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
        return 0;
 }
 
-/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
-   gives the value maximum span in an instruction which uses a signed
-   offset) */
+/*
+ * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the
+ * value maximum span in an instruction which uses a signed offset). Round down
+ * to a 256 byte boundary for the odd case where we are setting up r2 without a
+ * .toc section.
+ */
 static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
 {
-       return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+       return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
 }
 
 /* Both low and high 16 bits are added as SIGNED additions, so if low
@@ -501,12 +507,22 @@ static bool is_early_mcount_callsite(u32 *instruction)
    restore r2. */
 static int restore_r2(u32 *instruction, struct module *me)
 {
-       if (is_early_mcount_callsite(instruction - 1))
+       u32 *prev_insn = instruction - 1;
+
+       if (is_early_mcount_callsite(prev_insn))
+               return 1;
+
+       /*
+        * Make sure the branch isn't a sibling call.  Sibling calls aren't
+        * "link" branches and they don't return, so they don't need the r2
+        * restore afterwards.
+        */
+       if (!instr_is_relative_link_branch(*prev_insn))
                return 1;
 
        if (*instruction != PPC_INST_NOP) {
-               pr_err("%s: Expect noop after relocate, got %08x\n",
-                      me->name, *instruction);
+               pr_err("%s: Expected nop after call, got %08x at %pS\n",
+                       me->name, *instruction, instruction);
                return 0;
        }
        /* ld r2,R2_STACK_OFFSET(r1) */
@@ -628,7 +644,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 
                case R_PPC_REL24:
                        /* FIXME: Handle weak symbols here --RR */
-                       if (sym->st_shndx == SHN_UNDEF) {
+                       if (sym->st_shndx == SHN_UNDEF ||
+                           sym->st_shndx == SHN_LIVEPATCH) {
                                /* External: go via stub */
                                value = stub_for_addr(sechdrs, value, me);
                                if (!value)
index 52fc864cdec4e9f3950fc5da1a48de74a34380d7..98a3aeeb3c8cddf705c5054142b98b7ef4d6090c 100644 (file)
@@ -58,7 +58,7 @@ optprobe_template_entry:
        std     r5,_XER(r1)
        mfcr    r5
        std     r5,_CCR(r1)
-       lbz     r5,PACASOFTIRQEN(r13)
+       lbz     r5,PACAIRQSOFTMASK(r13)
        std     r5,SOFTE(r1)
 
        /*
index d6597038931dc8a6bd65c55101339f0a63c39da5..95ffedf148856dd8444330a805a16c287e6399c9 100644 (file)
@@ -18,6 +18,8 @@
 #include <asm/pgtable.h>
 #include <asm/kexec.h>
 
+#include "setup.h"
+
 #ifdef CONFIG_PPC_BOOK3S
 
 /*
@@ -208,15 +210,14 @@ void __init allocate_pacas(void)
        u64 limit;
        int cpu;
 
-       limit = ppc64_rma_size;
-
 #ifdef CONFIG_PPC_BOOK3S_64
        /*
-        * We can't take SLB misses on the paca, and we want to access them
-        * in real mode, so allocate them within the RMA and also within
-        * the first segment.
+        * We access pacas in real mode, and cannot take SLB faults
+        * on them when in virtual mode, so allocate them accordingly.
         */
-       limit = min(0x10000000ULL, limit);
+       limit = min(ppc64_bolted_size(), ppc64_rma_size);
+#else
+       limit = ppc64_rma_size;
 #endif
 
        paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
index 590f4d0a6cb168945c4b1da53e507283ef82afdf..208e623b2557cf280a6e5b34c67d366d2225fc90 100644 (file)
@@ -249,8 +249,31 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno)
 
        return pci_iov_resource_size(pdev, resno);
 }
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+       if (ppc_md.pcibios_sriov_enable)
+               return ppc_md.pcibios_sriov_enable(pdev, num_vfs);
+
+       return 0;
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+       if (ppc_md.pcibios_sriov_disable)
+               return ppc_md.pcibios_sriov_disable(pdev);
+
+       return 0;
+}
+
 #endif /* CONFIG_PCI_IOV */
 
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+       if (ppc_md.pcibios_bus_add_device)
+               ppc_md.pcibios_bus_add_device(pdev);
+}
+
 static resource_size_t pcibios_io_size(const struct pci_controller *hose)
 {
 #ifdef CONFIG_PPC64
@@ -1276,8 +1299,8 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus)
                                                i + PCI_BRIDGE_RESOURCES) == 0)
                                continue;
                }
-               pr_warning("PCI: Cannot allocate resource region "
-                          "%d of PCI bridge %d, will remap\n", i, bus->number);
+               pr_warn("PCI: Cannot allocate resource region %d of PCI bridge %d, will remap\n",
+                       i, bus->number);
        clear_resource:
                /* The resource might be figured out when doing
                 * reassignment based on the resources required
index 0e395afbf0f498a89f2fd66a8f8b6ed74f381f24..ab147a1909c8b98e22e254fbaf62bd488b3c9d63 100644 (file)
@@ -156,10 +156,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
        pdn->parent = parent;
        pdn->busno = busno;
        pdn->devfn = devfn;
-#ifdef CONFIG_PPC_POWERNV
        pdn->vf_index = vf_index;
        pdn->pe_number = IODA_INVALID_PE;
-#endif
        INIT_LIST_HEAD(&pdn->child_list);
        INIT_LIST_HEAD(&pdn->list);
        list_add_tail(&pdn->list, &parent->child_list);
@@ -226,9 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev)
         */
        if (pdev->is_virtfn) {
                pdn = pci_get_pdn(pdev);
-#ifdef CONFIG_PPC_POWERNV
                pdn->pe_number = IODA_INVALID_PE;
-#endif
                return;
        }
 
@@ -294,9 +290,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
                return NULL;
        dn->data = pdn;
        pdn->phb = hose;
-#ifdef CONFIG_PPC_POWERNV
        pdn->pe_number = IODA_INVALID_PE;
-#endif
        regs = of_get_property(dn, "reg", NULL);
        if (regs) {
                u32 addr = of_read_number(regs, 1);
index 0d790f8432d27b6293c647176984d45f792c72c6..20ceec4a5f5ee31a68b6212d13d7aec57f72f401 100644 (file)
@@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
  * @addr0: value of 1st cell of a device tree PCI address.
  * @bridge: Set this flag if the address is from a bridge 'ranges' property
  */
-static unsigned int pci_parse_of_flags(u32 addr0, int bridge)
+unsigned int pci_parse_of_flags(u32 addr0, int bridge)
 {
        unsigned int flags = 0;
 
index 56548bf6231f8ef34e774411692b4857826a6940..9bfbd800d32f6e71a0caa2152a22d244e35d439a 100644 (file)
@@ -63,7 +63,7 @@ static int __init proc_ppc64_init(void)
 {
        struct proc_dir_entry *pde;
 
-       pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL,
+       pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
                               &page_map_fops, vdso_data);
        if (!pde)
                return 1;
index 4208cbe2fb7fd225861ccd62008395435662107b..1738c4127b3207506e434dea167f755eea66ff7f 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/uaccess.h>
 #include <linux/elf-randomize.h>
+#include <linux/pkeys.h>
 
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -57,6 +58,7 @@
 #include <asm/debug.h>
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
+#include <asm/hw_irq.h>
 #endif
 #include <asm/code-patching.h>
 #include <asm/exec.h>
@@ -1097,6 +1099,8 @@ static inline void save_sprs(struct thread_struct *t)
                t->tar = mfspr(SPRN_TAR);
        }
 #endif
+
+       thread_pkey_regs_save(t);
 }
 
 static inline void restore_sprs(struct thread_struct *old_thread,
@@ -1136,6 +1140,8 @@ static inline void restore_sprs(struct thread_struct *old_thread,
            old_thread->tidr != new_thread->tidr)
                mtspr(SPRN_TIDR, new_thread->tidr);
 #endif
+
+       thread_pkey_regs_restore(new_thread, old_thread);
 }
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -1404,7 +1410,7 @@ void show_regs(struct pt_regs * regs)
        print_msr_bits(regs->msr);
        pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
        trap = TRAP(regs);
-       if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+       if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
                pr_cont("CFAR: "REG" ", regs->orig_gpr3);
        if (trap == 0x200 || trap == 0x300 || trap == 0x600)
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
@@ -1504,14 +1510,15 @@ static int assign_thread_tidr(void)
 {
        int index;
        int err;
+       unsigned long flags;
 
 again:
        if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL))
                return -ENOMEM;
 
-       spin_lock(&vas_thread_id_lock);
+       spin_lock_irqsave(&vas_thread_id_lock, flags);
        err = ida_get_new_above(&vas_thread_ida, 1, &index);
-       spin_unlock(&vas_thread_id_lock);
+       spin_unlock_irqrestore(&vas_thread_id_lock, flags);
 
        if (err == -EAGAIN)
                goto again;
@@ -1519,9 +1526,9 @@ again:
                return err;
 
        if (index > MAX_THREAD_CONTEXT) {
-               spin_lock(&vas_thread_id_lock);
+               spin_lock_irqsave(&vas_thread_id_lock, flags);
                ida_remove(&vas_thread_ida, index);
-               spin_unlock(&vas_thread_id_lock);
+               spin_unlock_irqrestore(&vas_thread_id_lock, flags);
                return -ENOMEM;
        }
 
@@ -1530,9 +1537,11 @@ again:
 
 static void free_thread_tidr(int id)
 {
-       spin_lock(&vas_thread_id_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&vas_thread_id_lock, flags);
        ida_remove(&vas_thread_ida, id);
-       spin_unlock(&vas_thread_id_lock);
+       spin_unlock_irqrestore(&vas_thread_id_lock, flags);
 }
 
 /*
@@ -1584,6 +1593,7 @@ int set_thread_tidr(struct task_struct *t)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(set_thread_tidr);
 
 #endif /* CONFIG_PPC64 */
 
@@ -1669,7 +1679,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
                        childregs->gpr[14] = ppc_function_entry((void *)usp);
 #ifdef CONFIG_PPC64
                clear_tsk_thread_flag(p, TIF_32BIT);
-               childregs->softe = 1;
+               childregs->softe = IRQS_ENABLED;
 #endif
                childregs->gpr[15] = kthread_arg;
                p->thread.regs = NULL;  /* no user register state */
@@ -1860,6 +1870,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
        current->thread.tm_tfiar = 0;
        current->thread.load_tm = 0;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+       thread_pkey_regs_init(&current->thread);
 }
 EXPORT_SYMBOL(start_thread);
 
index b15bae265c90362792b74193244fa5a8b1949c42..4dffef947b8ab57e47c8019c3e8f6f7b4ad04299 100644 (file)
@@ -59,6 +59,7 @@
 #include <asm/epapr_hcalls.h>
 #include <asm/firmware.h>
 #include <asm/dt_cpu_ftrs.h>
+#include <asm/drmem.h>
 
 #include <mm/mmu_decl.h>
 
@@ -455,92 +456,74 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
 
 #ifdef CONFIG_PPC_PSERIES
 /*
- * Interpret the ibm,dynamic-memory property in the
- * /ibm,dynamic-reconfiguration-memory node.
+ * Interpret the ibm dynamic reconfiguration memory LMBs.
  * This contains a list of memory blocks along with NUMA affinity
  * information.
  */
-static int __init early_init_dt_scan_drconf_memory(unsigned long node)
+static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+                                       const __be32 **usm)
 {
-       const __be32 *dm, *ls, *usm;
-       int l;
-       unsigned long n, flags;
-       u64 base, size, memblock_size;
-       unsigned int is_kexec_kdump = 0, rngs;
-
-       ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
-       if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
-               return 0;
-       memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
+       u64 base, size;
+       int is_kexec_kdump = 0, rngs;
 
-       dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
-       if (dm == NULL || l < sizeof(__be32))
-               return 0;
+       base = lmb->base_addr;
+       size = drmem_lmb_size();
+       rngs = 1;
 
-       n = of_read_number(dm++, 1);    /* number of entries */
-       if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32))
-               return 0;
+       /*
+        * Skip this block if the reserved bit is set in flags
+        * or if the block is not assigned to this partition.
+        */
+       if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+           !(lmb->flags & DRCONF_MEM_ASSIGNED))
+               return;
 
-       /* check if this is a kexec/kdump kernel. */
-       usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
-                                                &l);
-       if (usm != NULL)
+       if (*usm)
                is_kexec_kdump = 1;
 
-       for (; n != 0; --n) {
-               base = dt_mem_next_cell(dt_root_addr_cells, &dm);
-               flags = of_read_number(&dm[3], 1);
-               /* skip DRC index, pad, assoc. list index, flags */
-               dm += 4;
-               /* skip this block if the reserved bit is set in flags
-                  or if the block is not assigned to this partition */
-               if ((flags & DRCONF_MEM_RESERVED) ||
-                               !(flags & DRCONF_MEM_ASSIGNED))
-                       continue;
-               size = memblock_size;
-               rngs = 1;
+       if (is_kexec_kdump) {
+               /*
+                * For each memblock in ibm,dynamic-memory, a
+                * corresponding entry in linux,drconf-usable-memory
+                * property contains a counter 'p' followed by 'p'
+                * (base, size) duple. Now read the counter from
+                * linux,drconf-usable-memory property
+                */
+               rngs = dt_mem_next_cell(dt_root_size_cells, usm);
+               if (!rngs) /* there are no (base, size) duple */
+                       return;
+       }
+
+       do {
                if (is_kexec_kdump) {
-                       /*
-                        * For each memblock in ibm,dynamic-memory, a corresponding
-                        * entry in linux,drconf-usable-memory property contains
-                        * a counter 'p' followed by 'p' (base, size) duple.
-                        * Now read the counter from
-                        * linux,drconf-usable-memory property
-                        */
-                       rngs = dt_mem_next_cell(dt_root_size_cells, &usm);
-                       if (!rngs) /* there are no (base, size) duple */
+                       base = dt_mem_next_cell(dt_root_addr_cells, usm);
+                       size = dt_mem_next_cell(dt_root_size_cells, usm);
+               }
+
+               if (iommu_is_off) {
+                       if (base >= 0x80000000ul)
                                continue;
+                       if ((base + size) > 0x80000000ul)
+                               size = 0x80000000ul - base;
                }
-               do {
-                       if (is_kexec_kdump) {
-                               base = dt_mem_next_cell(dt_root_addr_cells,
-                                                        &usm);
-                               size = dt_mem_next_cell(dt_root_size_cells,
-                                                        &usm);
-                       }
-                       if (iommu_is_off) {
-                               if (base >= 0x80000000ul)
-                                       continue;
-                               if ((base + size) > 0x80000000ul)
-                                       size = 0x80000000ul - base;
-                       }
-                       memblock_add(base, size);
-               } while (--rngs);
-       }
-       memblock_dump_all();
-       return 0;
+
+               DBG("Adding: %llx -> %llx\n", base, size);
+               memblock_add(base, size);
+       } while (--rngs);
 }
-#else
-#define early_init_dt_scan_drconf_memory(node) 0
 #endif /* CONFIG_PPC_PSERIES */
 
 static int __init early_init_dt_scan_memory_ppc(unsigned long node,
                                                const char *uname,
                                                int depth, void *data)
 {
+#ifdef CONFIG_PPC_PSERIES
        if (depth == 1 &&
-           strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
-               return early_init_dt_scan_drconf_memory(node);
+           strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+               walk_drmem_lmbs_early(node, early_init_drmem_lmb);
+               return 0;
+       }
+#endif
        
        return early_init_dt_scan_memory(node, uname, depth, data);
 }
index 02190e90c7aef4a1a71d513542ebcef556295753..adf044daafd763a544685b92f60dbe434cc64f90 100644 (file)
@@ -869,10 +869,12 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
                .reserved2 = 0,
                .reserved3 = 0,
                .subprocessors = 1,
+               .byte22 = OV5_FEAT(OV5_DRMEM_V2),
                .intarch = 0,
                .mmu = 0,
                .hash_ext = 0,
                .radix_ext = 0,
+               .byte22 = OV5_FEAT(OV5_DRC_INFO),
        },
 
        /* option vector 6: IBM PAPR hints */
index f52ad5bb710960906b8ae61400688845e2811dd5..ca72d7391d404f9acb4dee60b800a2b1edd2f03c 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/context_tracking.h>
 
 #include <linux/uaccess.h>
+#include <linux/pkeys.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/switch_to.h>
@@ -283,6 +284,18 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
        if (regno == PT_DSCR)
                return get_user_dscr(task, data);
 
+#ifdef CONFIG_PPC64
+       /*
+        * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+        * no more used as a flag, lets force usr to alway see the softe value as 1
+        * which means interrupts are not soft disabled.
+        */
+       if (regno == PT_SOFTE) {
+               *data = 1;
+               return  0;
+       }
+#endif
+
        if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
                *data = ((unsigned long *)task->thread.regs)[regno];
                return 0;
@@ -1775,6 +1788,61 @@ static int pmu_set(struct task_struct *target,
        return ret;
 }
 #endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target,
+                      const struct user_regset *regset)
+{
+       if (!arch_pkeys_enabled())
+               return -ENODEV;
+
+       return regset->n;
+}
+
+static int pkey_get(struct task_struct *target,
+                   const struct user_regset *regset,
+                   unsigned int pos, unsigned int count,
+                   void *kbuf, void __user *ubuf)
+{
+       BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
+       BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
+
+       if (!arch_pkeys_enabled())
+               return -ENODEV;
+
+       return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                  &target->thread.amr, 0,
+                                  ELF_NPKEY * sizeof(unsigned long));
+}
+
+static int pkey_set(struct task_struct *target,
+                     const struct user_regset *regset,
+                     unsigned int pos, unsigned int count,
+                     const void *kbuf, const void __user *ubuf)
+{
+       u64 new_amr;
+       int ret;
+
+       if (!arch_pkeys_enabled())
+               return -ENODEV;
+
+       /* Only the AMR can be set from userspace */
+       if (pos != 0 || count != sizeof(new_amr))
+               return -EINVAL;
+
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &new_amr, 0, sizeof(new_amr));
+       if (ret)
+               return ret;
+
+       /* UAMOR determines which bits of the AMR can be set from userspace. */
+       target->thread.amr = (new_amr & target->thread.uamor) |
+               (target->thread.amr & ~target->thread.uamor);
+
+       return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
 /*
  * These are our native regset flavors.
  */
@@ -1809,6 +1877,9 @@ enum powerpc_regset {
        REGSET_EBB,             /* EBB registers */
        REGSET_PMR,             /* Performance Monitor Registers */
 #endif
+#ifdef CONFIG_PPC_MEM_KEYS
+       REGSET_PKEY,            /* AMR register */
+#endif
 };
 
 static const struct user_regset native_regsets[] = {
@@ -1914,6 +1985,13 @@ static const struct user_regset native_regsets[] = {
                .active = pmu_active, .get = pmu_get, .set = pmu_set
        },
 #endif
+#ifdef CONFIG_PPC_MEM_KEYS
+       [REGSET_PKEY] = {
+               .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
+               .size = sizeof(u64), .align = sizeof(u64),
+               .active = pkey_active, .get = pkey_get, .set = pkey_set
+       },
+#endif
 };
 
 static const struct user_regset_view user_ppc_native_view = {
index c8c5f3a550c26c8f0cf44b3c1a2b32b5d6f9baf2..fb070d8cad07d4d3d096c3d98be253b82b5ea617 100644 (file)
@@ -261,19 +261,19 @@ static int __init proc_rtas_init(void)
        if (rtas_node == NULL)
                return -ENODEV;
 
-       proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL,
+       proc_create("powerpc/rtas/progress", 0644, NULL,
                    &ppc_rtas_progress_operations);
-       proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL,
+       proc_create("powerpc/rtas/clock", 0644, NULL,
                    &ppc_rtas_clock_operations);
-       proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL,
+       proc_create("powerpc/rtas/poweron", 0644, NULL,
                    &ppc_rtas_poweron_operations);
-       proc_create("powerpc/rtas/sensors", S_IRUGO, NULL,
+       proc_create("powerpc/rtas/sensors", 0444, NULL,
                    &ppc_rtas_sensors_operations);
-       proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL,
+       proc_create("powerpc/rtas/frequency", 0644, NULL,
                    &ppc_rtas_tone_freq_operations);
-       proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL,
+       proc_create("powerpc/rtas/volume", 0644, NULL,
                    &ppc_rtas_tone_volume_operations);
-       proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL,
+       proc_create("powerpc/rtas/rmo_buffer", 0400, NULL,
                    &ppc_rtas_rmo_buf_ops);
        return 0;
 }
index f6f6a8a5103ab954773f252bd11e804604df82ad..10fabae2574d5910b3cbf5ada83d912879d71351 100644 (file)
@@ -727,7 +727,7 @@ static int __init rtas_flash_init(void)
                const struct rtas_flash_file *f = &rtas_flash_files[i];
                int token;
 
-               if (!proc_create(f->filename, S_IRUSR | S_IWUSR, NULL, &f->fops))
+               if (!proc_create(f->filename, 0600, NULL, &f->fops))
                        goto enomem;
 
                /*
index 1da8b7d8c6ca5cda96343f8ba573abbe1abc2f83..fc600a8b1e77bfff7b5215593ce72729ca483d7d 100644 (file)
@@ -581,7 +581,7 @@ static int __init rtas_init(void)
        if (!rtas_log_buf)
                return -ENODEV;
 
-       entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL,
+       entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
                            &proc_rtas_log_operations);
        if (!entry)
                printk(KERN_ERR "Failed to create error_log proc entry\n");
index 3f33869c6486eb1b8bd4e9ea86d7f7dd918a8258..d73ec518ef8057e202c013b3dd4b98894ec7ea0b 100644 (file)
@@ -346,10 +346,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                   loops_per_jiffy / (500000/HZ),
                   (loops_per_jiffy / (5000/HZ)) % 100);
 #endif
-
-#ifdef CONFIG_SMP
        seq_printf(m, "\n");
-#endif
+
        /* If this is the last cpu, print the summary */
        if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
                show_cpuinfo_summary(m);
@@ -379,10 +377,10 @@ static void c_stop(struct seq_file *m, void *v)
 }
 
 const struct seq_operations cpuinfo_op = {
-       .start =c_start,
-       .next = c_next,
-       .stop = c_stop,
-       .show = show_cpuinfo,
+       .start  = c_start,
+       .next   = c_next,
+       .stop   = c_stop,
+       .show   = show_cpuinfo,
 };
 
 void __init check_for_initrd(void)
@@ -459,13 +457,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
  */
 void __init smp_setup_cpu_maps(void)
 {
-       struct device_node *dn = NULL;
+       struct device_node *dn;
        int cpu = 0;
        int nthreads = 1;
 
        DBG("smp_setup_cpu_maps()\n");
 
-       while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
+       for_each_node_by_type(dn, "cpu") {
                const __be32 *intserv;
                __be32 cpu_be;
                int j, len;
@@ -505,6 +503,11 @@ void __init smp_setup_cpu_maps(void)
                        set_cpu_possible(cpu, true);
                        cpu++;
                }
+
+               if (cpu >= nr_cpu_ids) {
+                       of_node_put(dn);
+                       break;
+               }
        }
 
        /* If no SMT supported, nthreads is forced to 1 */
index 21c18071d9d574b186b57881638fa5626cd1c105..3fc11e30308fc2700658ba0bebf6142140a8705b 100644 (file)
@@ -51,6 +51,10 @@ void record_spr_defaults(void);
 static inline void record_spr_defaults(void) { };
 #endif
 
+#ifdef CONFIG_PPC64
+u64 ppc64_bolted_size(void);
+#endif
+
 /*
  * Having this in kvm_ppc.h makes include dependencies too
  * tricky to solve for setup-common.c so have it here.
index e67413f4a8f0c1fc852c1231f09176c152ed9059..c388cc3357fa0e9f236277ac6f18a782a62c6bcd 100644 (file)
@@ -10,8 +10,6 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#define DEBUG
-
 #include <linux/export.h>
 #include <linux/string.h>
 #include <linux/sched.h>
@@ -69,6 +67,7 @@
 #include <asm/livepatch.h>
 #include <asm/opal.h>
 #include <asm/cputhreads.h>
+#include <asm/hw_irq.h>
 
 #include "setup.h"
 
@@ -190,6 +189,8 @@ static void __init fixup_boot_paca(void)
        get_paca()->cpu_start = 1;
        /* Allow percpu accesses to work until we setup percpu data */
        get_paca()->data_offset = 0;
+       /* Mark interrupts disabled in PACA */
+       irq_soft_mask_set(IRQS_DISABLED);
 }
 
 static void __init configure_exceptions(void)
@@ -352,7 +353,7 @@ void __init early_setup(unsigned long dt_ptr)
 void early_setup_secondary(void)
 {
        /* Mark interrupts disabled in PACA */
-       get_paca()->soft_enabled = 0;
+       irq_soft_mask_set(IRQS_DISABLED);
 
        /* Initialize the hash table or TLB handling */
        early_init_mmu_secondary();
@@ -568,25 +569,31 @@ void __init initialize_cache_info(void)
        DBG(" <- initialize_cache_info()\n");
 }
 
-/* This returns the limit below which memory accesses to the linear
- * mapping are guarnateed not to cause a TLB or SLB miss. This is
- * used to allocate interrupt or emergency stacks for which our
- * exception entry path doesn't deal with being interrupted.
+/*
+ * This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause an architectural exception (e.g.,
+ * TLB or SLB miss fault).
+ *
+ * This is used to allocate PACAs and various interrupt stacks that
+ * that are accessed early in interrupt handlers that must not cause
+ * re-entrant interrupts.
  */
-static __init u64 safe_stack_limit(void)
+__init u64 ppc64_bolted_size(void)
 {
 #ifdef CONFIG_PPC_BOOK3E
        /* Freescale BookE bolts the entire linear mapping */
-       if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+       /* XXX: BookE ppc64_rma_limit setup seems to disagree? */
+       if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
                return linear_map_top;
        /* Other BookE, we assume the first GB is bolted */
        return 1ul << 30;
 #else
+       /* BookS radix, does not take faults on linear mapping */
        if (early_radix_enabled())
                return ULONG_MAX;
 
-       /* BookS, the first segment is bolted */
-       if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+       /* BookS hash, the first segment is bolted */
+       if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT))
                return 1UL << SID_SHIFT_1T;
        return 1UL << SID_SHIFT;
 #endif
@@ -594,7 +601,7 @@ static __init u64 safe_stack_limit(void)
 
 void __init irqstack_early_init(void)
 {
-       u64 limit = safe_stack_limit();
+       u64 limit = ppc64_bolted_size();
        unsigned int i;
 
        /*
@@ -679,7 +686,7 @@ void __init emergency_stack_init(void)
         * initialized in kernel/irq.c. These are initialized here in order
         * to have emergency stacks available as early as possible.
         */
-       limit = min(safe_stack_limit(), ppc64_rma_size);
+       limit = min(ppc64_bolted_size(), ppc64_rma_size);
 
        for_each_possible_cpu(i) {
                struct thread_info *ti;
@@ -857,7 +864,7 @@ static void init_fallback_flush(void)
        int cpu;
 
        l1d_size = ppc64_caches.l1d.size;
-       limit = min(safe_stack_limit(), ppc64_rma_size);
+       limit = min(ppc64_bolted_size(), ppc64_rma_size);
 
        /*
         * Align to L1d size, and size it at 2x L1d size, to catch possible
@@ -868,19 +875,8 @@ static void init_fallback_flush(void)
        memset(l1d_flush_fallback_area, 0, l1d_size * 2);
 
        for_each_possible_cpu(cpu) {
-               /*
-                * The fallback flush is currently coded for 8-way
-                * associativity. Different associativity is possible, but it
-                * will be treated as 8-way and may not evict the lines as
-                * effectively.
-                *
-                * 128 byte lines are mandatory.
-                */
-               u64 c = l1d_size / 8;
-
                paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
-               paca[cpu].l1d_flush_congruence = c;
-               paca[cpu].l1d_flush_sets = c / 128;
+               paca[cpu].l1d_flush_size = l1d_size;
        }
 }
 
index aded811696488e0122e62795d57aecb01770af3e..a46de0035214dc94772960245861a7bb43f16b8c 100644 (file)
@@ -111,12 +111,20 @@ static inline int save_general_regs(struct pt_regs *regs,
 {
        elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
        int i;
+       /* Force usr to alway see softe as 1 (interrupts enabled) */
+       elf_greg_t64 softe = 0x1;
 
        WARN_ON(!FULL_REGS(regs));
 
        for (i = 0; i <= PT_RESULT; i ++) {
                if (i == 14 && !FULL_REGS(regs))
                        i = 32;
+               if ( i == PT_SOFTE) {
+                       if(__put_user((unsigned int)softe, &frame->mc_gregs[i]))
+                               return -EFAULT;
+                       else
+                               continue;
+               }
                if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
                        return -EFAULT;
        }
index 4b9ca3570344c17ba8fa5fa218fe132f01a3f446..720117690822db346293cf9ebfc84e89676ee376 100644 (file)
@@ -110,6 +110,8 @@ static long setup_sigcontext(struct sigcontext __user *sc,
        struct pt_regs *regs = tsk->thread.regs;
        unsigned long msr = regs->msr;
        long err = 0;
+       /* Force usr to alway see softe as 1 (interrupts enabled) */
+       unsigned long softe = 0x1;
 
        BUG_ON(tsk != current);
 
@@ -169,6 +171,7 @@ static long setup_sigcontext(struct sigcontext __user *sc,
        WARN_ON(!FULL_REGS(regs));
        err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
        err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
+       err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
        err |= __put_user(signr, &sc->signal);
        err |= __put_user(handler, &sc->handler);
        if (set != NULL)
@@ -207,7 +210,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
        elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
 #endif
        struct pt_regs *regs = tsk->thread.regs;
-       unsigned long msr = tsk->thread.ckpt_regs.msr;
+       unsigned long msr = tsk->thread.regs->msr;
        long err = 0;
 
        BUG_ON(tsk != current);
@@ -216,6 +219,12 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 
        WARN_ON(tm_suspend_disabled);
 
+       /* Restore checkpointed FP, VEC, and VSX bits from ckpt_regs as
+        * it contains the correct FP, VEC, VSX state after we treclaimed
+        * the transaction and giveup_all() was called on reclaiming.
+        */
+       msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX);
+
        /* Remove TM bits from thread's MSR.  The MSR in the sigcontext
         * just indicates to userland that we were doing a transaction, but we
         * don't want to return in transactional state.  This also ensures
index e0a4c1f82e25a33de0d3b0c86fbb8ea8c8f9a2ba..bbe7634b3a43538cbbcd11e03cc515aba99b0deb 100644 (file)
@@ -543,7 +543,25 @@ void smp_send_debugger_break(void)
 #ifdef CONFIG_KEXEC_CORE
 void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 {
+       int cpu;
+
        smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
+       if (kdump_in_progress() && crash_wake_offline) {
+               for_each_present_cpu(cpu) {
+                       if (cpu_online(cpu))
+                               continue;
+                       /*
+                        * crash_ipi_callback will wait for
+                        * all cpus, including offline CPUs.
+                        * We don't care about nmi_ipi_function.
+                        * Offline cpus will jump straight into
+                        * crash_ipi_callback, we can skip the
+                        * entire NMI dance and waiting for
+                        * cpus to clear pending mask, etc.
+                        */
+                       do_smp_send_nmi_ipi(cpu);
+               }
+       }
 }
 #endif
 
index b8d4a1dac39fc1524d77297dcda0c93ae9675edf..5a8bfee6e1877c58ae607445ea77af1ed6b2e869 100644 (file)
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_SPRSETUP(purr, SPRN_PURR);
 SYSFS_SPRSETUP(spurr, SPRN_SPURR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(tscr, SPRN_TSCR);
 
 /*
   Lets only enable read for phyp resources and
@@ -495,6 +496,7 @@ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
 
 /*
  * This is the system wide DSCR register default value. Any
@@ -785,6 +787,9 @@ static int register_cpu_online(unsigned int cpu)
 
        if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
                device_create_file(s, &dev_attr_pir);
+
+       if (cpu_has_feature(CPU_FTR_ARCH_206))
+               device_create_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_PPC_FSL_BOOK3E
@@ -867,6 +872,9 @@ static int unregister_cpu_online(unsigned int cpu)
 
        if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
                device_remove_file(s, &dev_attr_pir);
+
+       if (cpu_has_feature(CPU_FTR_ARCH_206))
+               device_remove_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_PPC_FSL_BOOK3E
index fe6f3a28545578524045a6f8583e691b233f3738..a32823dcd9a4d24444cb5291f17ec79bc7fe23ef 100644 (file)
@@ -244,7 +244,7 @@ static u64 scan_dispatch_log(u64 stop_tb)
 void accumulate_stolen_time(void)
 {
        u64 sst, ust;
-       u8 save_soft_enabled = local_paca->soft_enabled;
+       unsigned long save_irq_soft_mask = irq_soft_mask_return();
        struct cpu_accounting_data *acct = &local_paca->accounting;
 
        /* We are called early in the exception entry, before
@@ -253,7 +253,7 @@ void accumulate_stolen_time(void)
         * needs to reflect that so various debug stuff doesn't
         * complain
         */
-       local_paca->soft_enabled = 0;
+       irq_soft_mask_set(IRQS_DISABLED);
 
        sst = scan_dispatch_log(acct->starttime_user);
        ust = scan_dispatch_log(acct->starttime);
@@ -261,7 +261,7 @@ void accumulate_stolen_time(void)
        acct->utime -= ust;
        acct->steal_time += ust + sst;
 
-       local_paca->soft_enabled = save_soft_enabled;
+       irq_soft_mask_set(save_irq_soft_mask);
 }
 
 static inline u64 calculate_stolen_time(u64 stop_tb)
index c93f1e6a9fff9359749a429aca067a59b4edc5c8..1e48d157196a6157b014d0ed6046cfde92d40777 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/pkeys.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/ptrace.h>
@@ -38,6 +39,8 @@
 #include <linux/ratelimit.h>
 #include <linux/context_tracking.h>
 #include <linux/smp.h>
+#include <linux/console.h>
+#include <linux/kmsg_dump.h>
 
 #include <asm/emulated_ops.h>
 #include <asm/pgtable.h>
@@ -142,6 +145,28 @@ static int die_owner = -1;
 static unsigned int die_nest_count;
 static int die_counter;
 
+extern void panic_flush_kmsg_start(void)
+{
+       /*
+        * These are mostly taken from kernel/panic.c, but tries to do
+        * relatively minimal work. Don't use delay functions (TB may
+        * be broken), don't crash dump (need to set a firmware log),
+        * don't run notifiers. We do want to get some information to
+        * Linux console.
+        */
+       console_verbose();
+       bust_spinlocks(1);
+}
+
+extern void panic_flush_kmsg_end(void)
+{
+       printk_safe_flush_on_panic();
+       kmsg_dump(KMSG_DUMP_PANIC);
+       bust_spinlocks(0);
+       debug_locks_off();
+       console_flush_on_panic();
+}
+
 static unsigned long oops_begin(struct pt_regs *regs)
 {
        int cpu;
@@ -266,7 +291,9 @@ void user_single_step_siginfo(struct task_struct *tsk,
        info->si_addr = (void __user *)regs->nip;
 }
 
-void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+
+void _exception_pkey(int signr, struct pt_regs *regs, int code,
+               unsigned long addr, int key)
 {
        siginfo_t info;
        const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \
@@ -289,13 +316,27 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
                local_irq_enable();
 
        current->thread.trap_nr = code;
+
+       /*
+        * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
+        * to capture the content, if the task gets killed.
+        */
+       thread_pkey_regs_save(&current->thread);
+
        memset(&info, 0, sizeof(info));
        info.si_signo = signr;
        info.si_code = code;
        info.si_addr = (void __user *) addr;
+       info.si_pkey = key;
+
        force_sig_info(signr, &info, current);
 }
 
+void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+{
+       _exception_pkey(signr, regs, code, addr, 0);
+}
+
 void system_reset_exception(struct pt_regs *regs)
 {
        /*
@@ -337,7 +378,7 @@ void system_reset_exception(struct pt_regs *regs)
         * No debugger or crash dump registered, print logs then
         * panic.
         */
-       __die("System Reset", regs, SIGABRT);
+       die("System Reset", regs, SIGABRT);
 
        mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
@@ -1564,7 +1605,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
        u8 status;
        bool hv;
 
-       hv = (regs->trap == 0xf80);
+       hv = (TRAP(regs) == 0xf80);
        if (hv)
                value = mfspr(SPRN_HFSCR);
        else
@@ -2113,13 +2154,13 @@ static int __init ppc_warn_emulated_init(void)
        if (!dir)
                return -ENOMEM;
 
-       d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir,
+       d = debugfs_create_u32("do_warn", 0644, dir,
                               &ppc_warn_emulated);
        if (!d)
                goto fail;
 
        for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
-               d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir,
+               d = debugfs_create_u32(entries[i].name, 0644, dir,
                                       (u32 *)&entries[i].val.counter);
                if (!d)
                        goto fail;
index 3820213248836474c3db105f0a4fe2d0fad3d0d7..c002adcc694c66966c67fceba4db34a1bf685059 100644 (file)
@@ -64,6 +64,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
        cmpwi   cr0,r3,CLOCK_REALTIME
        cmpwi   cr1,r3,CLOCK_MONOTONIC
        cror    cr0*4+eq,cr0*4+eq,cr1*4+eq
+
+       cmpwi   cr5,r3,CLOCK_REALTIME_COARSE
+       cmpwi   cr6,r3,CLOCK_MONOTONIC_COARSE
+       cror    cr5*4+eq,cr5*4+eq,cr6*4+eq
+
+       cror    cr0*4+eq,cr0*4+eq,cr5*4+eq
        bne     cr0,99f
 
        mflr    r12                     /* r12 saves lr */
@@ -72,6 +78,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
        bl      V_LOCAL_FUNC(__get_datapage)    /* get data page */
        lis     r7,NSEC_PER_SEC@h       /* want nanoseconds */
        ori     r7,r7,NSEC_PER_SEC@l
+       beq     cr5,70f
 50:    bl      V_LOCAL_FUNC(__do_get_tspec)    /* get time from tb & kernel */
        bne     cr1,80f                 /* if not monotonic, all done */
 
@@ -97,19 +104,57 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
        ld      r0,CFG_TB_UPDATE_COUNT(r3)
         cmpld   cr0,r0,r8              /* check if updated */
        bne-    50b
+       b       78f
 
-       /* Add wall->monotonic offset and check for overflow or underflow.
+       /*
+        * For coarse clocks we get data directly from the vdso data page, so
+        * we don't need to call __do_get_tspec, but we still need to do the
+        * counter trick.
         */
-       add     r4,r4,r6
-       add     r5,r5,r9
-       cmpd    cr0,r5,r7
-       cmpdi   cr1,r5,0
-       blt     1f
-       subf    r5,r7,r5
-       addi    r4,r4,1
-1:     bge     cr1,80f
-       addi    r4,r4,-1
-       add     r5,r5,r7
+70:    ld      r8,CFG_TB_UPDATE_COUNT(r3)
+       andi.   r0,r8,1                 /* pending update ? loop */
+       bne-    70b
+       add     r3,r3,r0                /* r0 is already 0 */
+
+       /*
+        * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+        * too
+        */
+       ld      r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+       ld      r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
+       bne     cr6,75f
+
+       /* CLOCK_MONOTONIC_COARSE */
+       lwa     r6,WTOM_CLOCK_SEC(r3)
+       lwa     r9,WTOM_CLOCK_NSEC(r3)
+
+       /* check if counter has updated */
+       or      r0,r6,r9
+75:    or      r0,r0,r4
+       or      r0,r0,r5
+       xor     r0,r0,r0
+       add     r3,r3,r0
+       ld      r0,CFG_TB_UPDATE_COUNT(r3)
+       cmpld   cr0,r0,r8               /* check if updated */
+       bne-    70b
+
+       /* Counter has not updated, so continue calculating proper values for
+        * sec and nsec if monotonic coarse, or just return with the proper
+        * values for realtime.
+        */
+       bne     cr6,80f
+
+       /* Add wall->monotonic offset and check for overflow or underflow */
+78:    add     r4,r4,r6
+       add     r5,r5,r9
+       cmpd    cr0,r5,r7
+       cmpdi   cr1,r5,0
+       blt     79f
+       subf    r5,r7,r5
+       addi    r4,r4,1
+79:    bge     cr1,80f
+       addi    r4,r4,-1
+       add     r5,r5,r7
 
 80:    std     r4,TSPC64_TV_SEC(r11)
        std     r5,TSPC64_TV_NSEC(r11)
index 74901a87bf7a2c5416baf5cfbb903cde7f96d1dc..c8af90ff49f0526630ffb938c9c5d48cd0279933 100644 (file)
@@ -273,6 +273,7 @@ SECTIONS
 #ifdef CONFIG_PPC32
        .data : AT(ADDR(.data) - LOAD_OFFSET) {
                DATA_DATA
+               *(.data.rel*)
                *(.sdata)
                *(.sdata2)
                *(.got.plt) *(.got)
index 87da80ccced14d9ad8cdef50b27700416ead6dd8..6256dc3b0087d2967b8453bd7cd6658d09018d7c 100644 (file)
@@ -6,6 +6,9 @@
  *
  * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
  */
+
+#define pr_fmt(fmt) "watchdog: " fmt
+
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
 #include <asm/paca.h>
 
 /*
- * The watchdog has a simple timer that runs on each CPU, once per timer
- * period. This is the heartbeat.
+ * The powerpc watchdog ensures that each CPU is able to service timers.
+ * The watchdog sets up a simple timer on each CPU to run once per timer
+ * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
+ * the heartbeat.
+ *
+ * Then there are two systems to check that the heartbeat is still running.
+ * The local soft-NMI, and the SMP checker.
+ *
+ * The soft-NMI checker can detect lockups on the local CPU. When interrupts
+ * are disabled with local_irq_disable(), platforms that use soft-masking
+ * can leave hardware interrupts enabled and handle them with a masked
+ * interrupt handler. The masked handler can send the timer interrupt to the
+ * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
+ * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
+ *
+ * The soft-NMI checker will compare the heartbeat timestamp for this CPU
+ * with the current time, and take action if the difference exceeds the
+ * watchdog threshold.
+ *
+ * The limitation of the soft-NMI watchdog is that it does not work when
+ * interrupts are hard disabled or otherwise not being serviced. This is
+ * solved by also having a SMP watchdog where all CPUs check all other
+ * CPUs heartbeat.
  *
- * Then there are checks to see if the heartbeat has not triggered on a CPU
- * for the panic timeout period. Currently the watchdog only supports an
- * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ * The SMP checker can detect lockups on other CPUs. A gobal "pending"
+ * cpumask is kept, containing all CPUs which enable the watchdog. Each
+ * CPU clears their pending bit in their heartbeat timer. When the bitmask
+ * becomes empty, the last CPU to clear its pending bit updates a global
+ * timestamp and refills the pending bitmask.
  *
- * This is not an NMI watchdog, but Linux uses that name for a generic
- * watchdog in some cases, so NMI gets used in some places.
+ * In the heartbeat timer, if any CPU notices that the global timestamp has
+ * not been updated for a period exceeding the watchdog threshold, then it
+ * means the CPU(s) with their bit still set in the pending mask have had
+ * their heartbeat stop, and action is taken.
+ *
+ * Some platforms implement true NMI IPIs, which can by used by the SMP
+ * watchdog to detect an unresponsive CPU and pull it out of its stuck
+ * state with the NMI IPI, to get crash/debug data from it. This way the
+ * SMP watchdog can detect hardware interrupts off lockups.
  */
 
 static cpumask_t wd_cpus_enabled __read_mostly;
@@ -47,19 +80,7 @@ static u64 wd_timer_period_ms __read_mostly;  /* interval between heartbeat */
 static DEFINE_PER_CPU(struct timer_list, wd_timer);
 static DEFINE_PER_CPU(u64, wd_timer_tb);
 
-/*
- * These are for the SMP checker. CPUs clear their pending bit in their
- * heartbeat. If the bitmask becomes empty, the time is noted and the
- * bitmask is refilled.
- *
- * All CPUs clear their bit in the pending mask every timer period.
- * Once all have cleared, the time is noted and the bits are reset.
- * If the time since all clear was greater than the panic timeout,
- * we can panic with the list of stuck CPUs.
- *
- * This will work best with NMI IPIs for crash code so the stuck CPUs
- * can be pulled out to get their backtraces.
- */
+/* SMP checker bits */
 static unsigned long __wd_smp_lock;
 static cpumask_t wd_smp_cpus_pending;
 static cpumask_t wd_smp_cpus_stuck;
@@ -90,7 +111,7 @@ static inline void wd_smp_unlock(unsigned long *flags)
 
 static void wd_lockup_ipi(struct pt_regs *regs)
 {
-       pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id());
+       pr_emerg("CPU %d Hard LOCKUP\n", raw_smp_processor_id());
        print_modules();
        print_irqtrace_events(current);
        if (regs)
@@ -131,8 +152,8 @@ static void watchdog_smp_panic(int cpu, u64 tb)
        if (cpumask_weight(&wd_smp_cpus_pending) == 0)
                goto out;
 
-       pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
-                       cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+       pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
+                cpu, cpumask_pr_args(&wd_smp_cpus_pending));
 
        if (!sysctl_hardlockup_all_cpu_backtrace) {
                /*
@@ -175,7 +196,7 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
                if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
                        unsigned long flags;
 
-                       pr_emerg("Watchdog CPU:%d became unstuck\n", cpu);
+                       pr_emerg("CPU %d became unstuck\n", cpu);
                        wd_smp_lock(&flags);
                        cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
                        wd_smp_unlock(&flags);
@@ -233,13 +254,10 @@ void soft_nmi_interrupt(struct pt_regs *regs)
                }
                set_cpu_stuck(cpu, tb);
 
-               pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu);
+               pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip);
                print_modules();
                print_irqtrace_events(current);
-               if (regs)
-                       show_regs(regs);
-               else
-                       dump_stack();
+               show_regs(regs);
 
                wd_smp_unlock(&flags);
 
@@ -388,30 +406,8 @@ int __init watchdog_nmi_probe(void)
                                        "powerpc/watchdog:online",
                                        start_wd_on_cpu, stop_wd_on_cpu);
        if (err < 0) {
-               pr_warn("Watchdog could not be initialized");
+               pr_warn("could not be initialized");
                return err;
        }
        return 0;
 }
-
-static void handle_backtrace_ipi(struct pt_regs *regs)
-{
-       nmi_cpu_backtrace(regs);
-}
-
-static void raise_backtrace_ipi(cpumask_t *mask)
-{
-       unsigned int cpu;
-
-       for_each_cpu(cpu, mask) {
-               if (cpu == smp_processor_id())
-                       handle_backtrace_ipi(NULL);
-               else
-                       smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000);
-       }
-}
-
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
-{
-       nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
-}
index 2d46037ce93664199adee27806b8972d9130368d..e4f70c33fbc7db0c1d90b75bf134323625a676cf 100644 (file)
 static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 
 static int dynamic_mt_modes = 6;
-module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
+module_param(dynamic_mt_modes, int, 0644);
 MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
 static int target_smt_mode;
-module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
+module_param(target_smt_mode, int, 0644);
 MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
 
 static bool indep_threads_mode = true;
@@ -109,12 +109,10 @@ static struct kernel_param_ops module_param_ops = {
        .get = param_get_int,
 };
 
-module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
-                                                       S_IRUGO | S_IWUSR);
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644);
 MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
 
-module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
-                                                       S_IRUGO | S_IWUSR);
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
 MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
 #endif
 
index c356f9a40b244e8715eaabd4d7c5818aba547399..b11043b23c185be659ae82f0648eb1c343342128 100644 (file)
@@ -87,8 +87,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
                                   DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
                }
                if (dsisr & DSISR_MC_TLB_MULTI) {
-                       if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
-                               cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
+                       tlbiel_all_lpid(vcpu->kvm->arch.radix);
                        dsisr &= ~DSISR_MC_TLB_MULTI;
                }
                /* Any other errors we don't understand? */
@@ -105,8 +104,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
                reload_slb(vcpu);
                break;
        case SRR1_MC_IFETCH_TLBMULTI:
-               if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
-                       cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
+               tlbiel_all_lpid(vcpu->kvm->arch.radix);
                break;
        default:
                handled = 0;
@@ -268,17 +266,19 @@ static void kvmppc_tb_resync_done(void)
  *   secondary threads to proceed.
  * - All secondary threads will eventually call opal hmi handler on
  *   their exit path.
+ *
+ * Returns 1 if the timebase offset should be applied, 0 if not.
  */
 
 long kvmppc_realmode_hmi_handler(void)
 {
-       int ptid = local_paca->kvm_hstate.ptid;
        bool resync_req;
 
-       /* This is only called on primary thread. */
-       BUG_ON(ptid != 0);
        __this_cpu_inc(irq_stat.hmi_exceptions);
 
+       if (hmi_handle_debugtrig(NULL) >= 0)
+               return 1;
+
        /*
         * By now primary thread has already completed guest->host
         * partition switch but haven't signaled secondaries yet.
index 26c11f678fbf401b2b99e639190c3a33b30fe742..8888e625a9991c4ab3dea1e05f1a6aa8508b30e1 100644 (file)
@@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x)
 }
 
 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
-static int global_invalidates(struct kvm *kvm, unsigned long flags)
+static int global_invalidates(struct kvm *kvm)
 {
        int global;
        int cpu;
@@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
        if (v & HPTE_V_VALID) {
                hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
                rb = compute_tlbie_rb(v, pte_r, pte_index);
-               do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
+               do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
                /*
                 * The reference (R) and change (C) bits in a HPT
                 * entry can be set by hardware at any time up until
@@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 
        if (kvm_is_radix(kvm))
                return H_FUNCTION;
-       global = global_invalidates(kvm, 0);
+       global = global_invalidates(kvm);
        for (i = 0; i < 4 && ret == H_SUCCESS; ) {
                n = 0;
                for (; i < 4; ++i) {
@@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
                        rb = compute_tlbie_rb(v, r, pte_index);
                        hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
                                              HPTE_V_ABSENT);
-                       do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
-                                 true);
+                       do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
                        /* Don't lose R/C bit updates done by hardware */
                        r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
                        hpte[1] = cpu_to_be64(r);
index 9c61f736c75b2d0761ec4f9d2e385df75b6dc882..7886b313d135fb84333d34511feb78e29528d5e5 100644 (file)
@@ -1908,16 +1908,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        bne     27f
        bl      kvmppc_realmode_hmi_handler
        nop
+       cmpdi   r3, 0
        li      r12, BOOK3S_INTERRUPT_HMI
        /*
-        * At this point kvmppc_realmode_hmi_handler would have resync-ed
-        * the TB. Hence it is not required to subtract guest timebase
-        * offset from timebase. So, skip it.
+        * At this point kvmppc_realmode_hmi_handler may have resync-ed
+        * the TB, and if it has, we must not subtract the guest timebase
+        * offset from the timebase. So, skip it.
         *
         * Also, do not call kvmppc_subcore_exit_guest() because it has
         * been invoked as part of kvmppc_realmode_hmi_handler().
         */
-       b       30f
+       beq     30f
 
 27:
        /* Subtract timebase offset from timebase */
@@ -3248,7 +3249,7 @@ kvmppc_bad_host_intr:
        mfctr   r4
 #endif
        mfxer   r5
-       lbz     r6, PACASOFTIRQEN(r13)
+       lbz     r6, PACAIRQSOFTMASK(r13)
        std     r3, _LINK(r1)
        std     r4, _CTR(r1)
        std     r5, _XER(r1)
index d329b2add7e2f007e7a24a24cd57b8ffa2b2ebfe..b8356cdc0c043d2407b6cbbe172deec8b56f5dbf 100644 (file)
@@ -1039,7 +1039,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
                return;
        }
 
-       xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+       xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
                                           xics, &xics_debug_fops);
 
        pr_debug("%s: created %s\n", __func__, name);
index d469224c4ada8c23b923dc077b6249fa79ae0583..e0d881ab304e97482721c3e307ec0232c7a9fbb3 100644 (file)
 #include <asm/code-patching.h>
 #include <asm/setup.h>
 
-static int __patch_instruction(unsigned int *addr, unsigned int instr)
+static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
+                              unsigned int *patch_addr)
 {
        int err;
 
-       __put_user_size(instr, addr, 4, err);
+       __put_user_size(instr, patch_addr, 4, err);
        if (err)
                return err;
 
-       asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr));
+       asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
+                                                           "r" (exec_addr));
 
        return 0;
 }
 
+int raw_patch_instruction(unsigned int *addr, unsigned int instr)
+{
+       return __patch_instruction(addr, instr, addr);
+}
+
 #ifdef CONFIG_STRICT_KERNEL_RWX
 static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
 
@@ -138,7 +145,7 @@ static inline int unmap_patch_area(unsigned long addr)
 int patch_instruction(unsigned int *addr, unsigned int instr)
 {
        int err;
-       unsigned int *dest = NULL;
+       unsigned int *patch_addr = NULL;
        unsigned long flags;
        unsigned long text_poke_addr;
        unsigned long kaddr = (unsigned long)addr;
@@ -148,8 +155,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
         * when text_poke_area is not ready, but we still need
         * to allow patching. We just do the plain old patching
         */
-       if (!this_cpu_read(*PTRRELOC(&text_poke_area)))
-               return __patch_instruction(addr, instr);
+       if (!this_cpu_read(text_poke_area))
+               return raw_patch_instruction(addr, instr);
 
        local_irq_save(flags);
 
@@ -159,17 +166,10 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
                goto out;
        }
 
-       dest = (unsigned int *)(text_poke_addr) +
+       patch_addr = (unsigned int *)(text_poke_addr) +
                        ((kaddr & ~PAGE_MASK) / sizeof(unsigned int));
 
-       /*
-        * We use __put_user_size so that we can handle faults while
-        * writing to dest and return err to handle faults gracefully
-        */
-       __put_user_size(instr, dest, 4, err);
-       if (!err)
-               asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync"
-                       ::"r" (dest), "r"(addr));
+       __patch_instruction(addr, instr, patch_addr);
 
        err = unmap_patch_area(text_poke_addr);
        if (err)
@@ -184,7 +184,7 @@ out:
 
 int patch_instruction(unsigned int *addr, unsigned int instr)
 {
-       return __patch_instruction(addr, instr);
+       return raw_patch_instruction(addr, instr);
 }
 
 #endif /* CONFIG_STRICT_KERNEL_RWX */
@@ -302,6 +302,11 @@ int instr_is_relative_branch(unsigned int instr)
        return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
 }
 
+int instr_is_relative_link_branch(unsigned int instr)
+{
+       return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK);
+}
+
 static unsigned long branch_iform_target(const unsigned int *instr)
 {
        signed long imm;
index a95ea007d654d5db2b78d811a4ef21a750a95609..73697c4e34681b59ec2f1caaf22136ac07dbacf8 100644 (file)
@@ -62,7 +62,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
                }
        }
 
-       patch_instruction(dest, instr);
+       raw_patch_instruction(dest, instr);
 
        return 0;
 }
@@ -91,7 +91,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
        }
 
        for (; dest < end; dest++)
-               patch_instruction(dest, PPC_INST_NOP);
+               raw_patch_instruction(dest, PPC_INST_NOP);
 
        return 0;
 }
@@ -170,7 +170,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 
        for (; start < end; start++) {
                dest = (void *)start + *start;
-               patch_instruction(dest, PPC_INST_LWSYNC);
+               raw_patch_instruction(dest, PPC_INST_LWSYNC);
        }
 }
 
@@ -188,7 +188,7 @@ static void do_final_fixups(void)
        length = (__end_interrupts - _stext) / sizeof(int);
 
        while (length--) {
-               patch_instruction(dest, *src);
+               raw_patch_instruction(dest, *src);
                src++;
                dest++;
        }
index f29212e40f40928e9d4b7e5c5a28bc294e8d0051..849f50cd62f289d82500ce05a8d97fb7829cee34 100644 (file)
@@ -67,7 +67,7 @@ void __init MMU_init_hw(void)
        /* PIN up to the 3 first 8Mb after IMMR in DTLB table */
 #ifdef CONFIG_PIN_TLB_DATA
        unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
-       unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY;
+       unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY;
 #ifdef CONFIG_PIN_TLB_IMMR
        int i = 29;
 #else
@@ -79,7 +79,7 @@ void __init MMU_init_hw(void)
        for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
                mtspr(SPRN_MD_CTR, ctr | (i << 8));
                mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
-               mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
+               mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2);
                mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
                addr += LARGE_PAGE_SIZE_8M;
                mem -= LARGE_PAGE_SIZE_8M;
index 76a6b057d4546083017d2c69782214d4f4784523..f06f3577d8d1e83fe23e34e8771a8041a7f76b86 100644 (file)
@@ -9,7 +9,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
 
 obj-y                          := fault.o mem.o pgtable.o mmap.o \
                                   init_$(BITS).o pgtable_$(BITS).o \
-                                  init-common.o mmu_context.o
+                                  init-common.o mmu_context.o drmem.o
 obj-$(CONFIG_PPC_MMU_NOHASH)   += mmu_context_nohash.o tlb_nohash.o \
                                   tlb_nohash_low.o
 obj-$(CONFIG_PPC_BOOK3E)       += tlb_low_$(BITS)e.o
@@ -44,3 +44,4 @@ obj-$(CONFIG_PPC_COPRO_BASE)  += copro_fault.o
 obj-$(CONFIG_SPAPR_TCE_IOMMU)  += mmu_context_iommu.o
 obj-$(CONFIG_PPC_PTDUMP)       += dump_linuxpagetables.o
 obj-$(CONFIG_PPC_HTDUMP)       += dump_hashpagetable.o
+obj-$(CONFIG_PPC_MEM_KEYS)     += pkeys.o
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
new file mode 100644 (file)
index 0000000..1604110
--- /dev/null
@@ -0,0 +1,439 @@
+/*
+ * Dynamic reconfiguration memory support
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "drmem: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/memblock.h>
+#include <asm/prom.h>
+#include <asm/drmem.h>
+
+static struct drmem_lmb_info __drmem_info;
+struct drmem_lmb_info *drmem_info = &__drmem_info;
+
+u64 drmem_lmb_memory_max(void)
+{
+       struct drmem_lmb *last_lmb;
+
+       last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
+       return last_lmb->base_addr + drmem_lmb_size();
+}
+
+static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
+{
+       /*
+        * Return the value of the lmb flags field minus the reserved
+        * bit used internally for hotplug processing.
+        */
+       return lmb->flags & ~DRMEM_LMB_RESERVED;
+}
+
+static struct property *clone_property(struct property *prop, u32 prop_sz)
+{
+       struct property *new_prop;
+
+       new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+       if (!new_prop)
+               return NULL;
+
+       new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+       new_prop->value = kzalloc(prop_sz, GFP_KERNEL);
+       if (!new_prop->name || !new_prop->value) {
+               kfree(new_prop->name);
+               kfree(new_prop->value);
+               kfree(new_prop);
+               return NULL;
+       }
+
+       new_prop->length = prop_sz;
+#if defined(CONFIG_OF_DYNAMIC)
+       of_property_set_flag(new_prop, OF_DYNAMIC);
+#endif
+       return new_prop;
+}
+
+static int drmem_update_dt_v1(struct device_node *memory,
+                             struct property *prop)
+{
+       struct property *new_prop;
+       struct of_drconf_cell_v1 *dr_cell;
+       struct drmem_lmb *lmb;
+       u32 *p;
+
+       new_prop = clone_property(prop, prop->length);
+       if (!new_prop)
+               return -1;
+
+       p = new_prop->value;
+       *p++ = cpu_to_be32(drmem_info->n_lmbs);
+
+       dr_cell = (struct of_drconf_cell_v1 *)p;
+
+       for_each_drmem_lmb(lmb) {
+               dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+               dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+               dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+               dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
+
+               dr_cell++;
+       }
+
+       of_update_property(memory, new_prop);
+       return 0;
+}
+
+static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+                               struct drmem_lmb *lmb)
+{
+       dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+       dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+       dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+       dr_cell->flags = cpu_to_be32(lmb->flags);
+}
+
+static int drmem_update_dt_v2(struct device_node *memory,
+                             struct property *prop)
+{
+       struct property *new_prop;
+       struct of_drconf_cell_v2 *dr_cell;
+       struct drmem_lmb *lmb, *prev_lmb;
+       u32 lmb_sets, prop_sz, seq_lmbs;
+       u32 *p;
+
+       /* First pass, determine how many LMB sets are needed. */
+       lmb_sets = 0;
+       prev_lmb = NULL;
+       for_each_drmem_lmb(lmb) {
+               if (!prev_lmb) {
+                       prev_lmb = lmb;
+                       lmb_sets++;
+                       continue;
+               }
+
+               if (prev_lmb->aa_index != lmb->aa_index ||
+                   prev_lmb->flags != lmb->flags)
+                       lmb_sets++;
+
+               prev_lmb = lmb;
+       }
+
+       prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32);
+       new_prop = clone_property(prop, prop_sz);
+       if (!new_prop)
+               return -1;
+
+       p = new_prop->value;
+       *p++ = cpu_to_be32(lmb_sets);
+
+       dr_cell = (struct of_drconf_cell_v2 *)p;
+
+       /* Second pass, populate the LMB set data */
+       prev_lmb = NULL;
+       seq_lmbs = 0;
+       for_each_drmem_lmb(lmb) {
+               if (prev_lmb == NULL) {
+                       /* Start of first LMB set */
+                       prev_lmb = lmb;
+                       init_drconf_v2_cell(dr_cell, lmb);
+                       seq_lmbs++;
+                       continue;
+               }
+
+               if (prev_lmb->aa_index != lmb->aa_index ||
+                   prev_lmb->flags != lmb->flags) {
+                       /* end of one set, start of another */
+                       dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+                       dr_cell++;
+
+                       init_drconf_v2_cell(dr_cell, lmb);
+                       seq_lmbs = 1;
+               } else {
+                       seq_lmbs++;
+               }
+
+               prev_lmb = lmb;
+       }
+
+       /* close out last LMB set */
+       dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+       of_update_property(memory, new_prop);
+       return 0;
+}
+
+int drmem_update_dt(void)
+{
+       struct device_node *memory;
+       struct property *prop;
+       int rc = -1;
+
+       memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+       if (!memory)
+               return -1;
+
+       prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
+       if (prop) {
+               rc = drmem_update_dt_v1(memory, prop);
+       } else {
+               prop = of_find_property(memory, "ibm,dynamic-memory-v2", NULL);
+               if (prop)
+                       rc = drmem_update_dt_v2(memory, prop);
+       }
+
+       of_node_put(memory);
+       return rc;
+}
+
+static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
+                                      const __be32 **prop)
+{
+       const __be32 *p = *prop;
+
+       lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+       lmb->drc_index = of_read_number(p++, 1);
+
+       p++; /* skip reserved field */
+
+       lmb->aa_index = of_read_number(p++, 1);
+       lmb->flags = of_read_number(p++, 1);
+
+       *prop = p;
+}
+
+static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
+                       void (*func)(struct drmem_lmb *, const __be32 **))
+{
+       struct drmem_lmb lmb;
+       u32 i, n_lmbs;
+
+       n_lmbs = of_read_number(prop++, 1);
+
+       for (i = 0; i < n_lmbs; i++) {
+               read_drconf_v1_cell(&lmb, &prop);
+               func(&lmb, &usm);
+       }
+}
+
+static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+                                      const __be32 **prop)
+{
+       const __be32 *p = *prop;
+
+       dr_cell->seq_lmbs = of_read_number(p++, 1);
+       dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+       dr_cell->drc_index = of_read_number(p++, 1);
+       dr_cell->aa_index = of_read_number(p++, 1);
+       dr_cell->flags = of_read_number(p++, 1);
+
+       *prop = p;
+}
+
+static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
+                       void (*func)(struct drmem_lmb *, const __be32 **))
+{
+       struct of_drconf_cell_v2 dr_cell;
+       struct drmem_lmb lmb;
+       u32 i, j, lmb_sets;
+
+       lmb_sets = of_read_number(prop++, 1);
+
+       for (i = 0; i < lmb_sets; i++) {
+               read_drconf_v2_cell(&dr_cell, &prop);
+
+               for (j = 0; j < dr_cell.seq_lmbs; j++) {
+                       lmb.base_addr = dr_cell.base_addr;
+                       dr_cell.base_addr += drmem_lmb_size();
+
+                       lmb.drc_index = dr_cell.drc_index;
+                       dr_cell.drc_index++;
+
+                       lmb.aa_index = dr_cell.aa_index;
+                       lmb.flags = dr_cell.flags;
+
+                       func(&lmb, &usm);
+               }
+       }
+}
+
+#ifdef CONFIG_PPC_PSERIES
+void __init walk_drmem_lmbs_early(unsigned long node,
+                       void (*func)(struct drmem_lmb *, const __be32 **))
+{
+       const __be32 *prop, *usm;
+       int len;
+
+       prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
+       if (!prop || len < dt_root_size_cells * sizeof(__be32))
+               return;
+
+       drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+
+       usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
+
+       prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len);
+       if (prop) {
+               __walk_drmem_v1_lmbs(prop, usm, func);
+       } else {
+               prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2",
+                                          &len);
+               if (prop)
+                       __walk_drmem_v2_lmbs(prop, usm, func);
+       }
+
+       memblock_dump_all();
+}
+
+#endif
+
+static int __init init_drmem_lmb_size(struct device_node *dn)
+{
+       const __be32 *prop;
+       int len;
+
+       if (drmem_info->lmb_size)
+               return 0;
+
+       prop = of_get_property(dn, "ibm,lmb-size", &len);
+       if (!prop || len < dt_root_size_cells * sizeof(__be32)) {
+               pr_info("Could not determine LMB size\n");
+               return -1;
+       }
+
+       drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+       return 0;
+}
+
+/*
+ * Returns the property linux,drconf-usable-memory if
+ * it exists (the property exists only in kexec/kdump kernels,
+ * added by kexec-tools)
+ */
+static const __be32 *of_get_usable_memory(struct device_node *dn)
+{
+       const __be32 *prop;
+       u32 len;
+
+       prop = of_get_property(dn, "linux,drconf-usable-memory", &len);
+       if (!prop || len < sizeof(unsigned int))
+               return NULL;
+
+       return prop;
+}
+
+void __init walk_drmem_lmbs(struct device_node *dn,
+                           void (*func)(struct drmem_lmb *, const __be32 **))
+{
+       const __be32 *prop, *usm;
+
+       if (init_drmem_lmb_size(dn))
+               return;
+
+       usm = of_get_usable_memory(dn);
+
+       prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+       if (prop) {
+               __walk_drmem_v1_lmbs(prop, usm, func);
+       } else {
+               prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+               if (prop)
+                       __walk_drmem_v2_lmbs(prop, usm, func);
+       }
+}
+
+static void __init init_drmem_v1_lmbs(const __be32 *prop)
+{
+       struct drmem_lmb *lmb;
+
+       drmem_info->n_lmbs = of_read_number(prop++, 1);
+
+       drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+                                  GFP_KERNEL);
+       if (!drmem_info->lmbs)
+               return;
+
+       for_each_drmem_lmb(lmb)
+               read_drconf_v1_cell(lmb, &prop);
+}
+
+static void __init init_drmem_v2_lmbs(const __be32 *prop)
+{
+       struct drmem_lmb *lmb;
+       struct of_drconf_cell_v2 dr_cell;
+       const __be32 *p;
+       u32 i, j, lmb_sets;
+       int lmb_index;
+
+       lmb_sets = of_read_number(prop++, 1);
+
+       /* first pass, calculate the number of LMBs */
+       p = prop;
+       for (i = 0; i < lmb_sets; i++) {
+               read_drconf_v2_cell(&dr_cell, &p);
+               drmem_info->n_lmbs += dr_cell.seq_lmbs;
+       }
+
+       drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+                                  GFP_KERNEL);
+       if (!drmem_info->lmbs)
+               return;
+
+       /* second pass, read in the LMB information */
+       lmb_index = 0;
+       p = prop;
+
+       for (i = 0; i < lmb_sets; i++) {
+               read_drconf_v2_cell(&dr_cell, &p);
+
+               for (j = 0; j < dr_cell.seq_lmbs; j++) {
+                       lmb = &drmem_info->lmbs[lmb_index++];
+
+                       lmb->base_addr = dr_cell.base_addr;
+                       dr_cell.base_addr += drmem_info->lmb_size;
+
+                       lmb->drc_index = dr_cell.drc_index;
+                       dr_cell.drc_index++;
+
+                       lmb->aa_index = dr_cell.aa_index;
+                       lmb->flags = dr_cell.flags;
+               }
+       }
+}
+
+static int __init drmem_init(void)
+{
+       struct device_node *dn;
+       const __be32 *prop;
+
+       dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+       if (!dn) {
+               pr_info("No dynamic reconfiguration memory found\n");
+               return 0;
+       }
+
+       if (init_drmem_lmb_size(dn)) {
+               of_node_put(dn);
+               return 0;
+       }
+
+       prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+       if (prop) {
+               init_drmem_v1_lmbs(prop);
+       } else {
+               prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+               if (prop)
+                       init_drmem_v2_lmbs(prop);
+       }
+
+       of_node_put(dn);
+       return 0;
+}
+late_initcall(drmem_init);
index c2e7dea5949066c921c09ca096c6273ebdc829b9..876e2a3c79f201eb1a86187814efdf35d911e00b 100644 (file)
@@ -112,26 +112,25 @@ struct flag_info {
 
 static const struct flag_info flag_array[] = {
        {
-#ifdef CONFIG_PPC_BOOK3S_64
-               .mask   = _PAGE_PRIVILEGED,
-               .val    = 0,
-#else
-               .mask   = _PAGE_USER,
+               .mask   = _PAGE_USER | _PAGE_PRIVILEGED,
                .val    = _PAGE_USER,
-#endif
                .set    = "user",
                .clear  = "    ",
        }, {
-#if _PAGE_RO == 0
-               .mask   = _PAGE_RW,
+               .mask   = _PAGE_RW | _PAGE_RO | _PAGE_NA,
                .val    = _PAGE_RW,
-#else
-               .mask   = _PAGE_RO,
-               .val    = 0,
-#endif
                .set    = "rw",
-               .clear  = "ro",
        }, {
+               .mask   = _PAGE_RW | _PAGE_RO | _PAGE_NA,
+               .val    = _PAGE_RO,
+               .set    = "ro",
+       }, {
+#if _PAGE_NA != 0
+               .mask   = _PAGE_RW | _PAGE_RO | _PAGE_NA,
+               .val    = _PAGE_RO,
+               .set    = "na",
+       }, {
+#endif
                .mask   = _PAGE_EXEC,
                .val    = _PAGE_EXEC,
                .set    = " X ",
@@ -213,7 +212,7 @@ static const struct flag_info flag_array[] = {
                .val    = H_PAGE_4K_PFN,
                .set    = "4K_pfn",
        }, {
-#endif
+#else /* CONFIG_PPC_64K_PAGES */
                .mask   = H_PAGE_F_GIX,
                .val    = H_PAGE_F_GIX,
                .set    = "f_gix",
@@ -224,14 +223,11 @@ static const struct flag_info flag_array[] = {
                .val    = H_PAGE_F_SECOND,
                .set    = "f_second",
        }, {
+#endif /* CONFIG_PPC_64K_PAGES */
 #endif
                .mask   = _PAGE_SPECIAL,
                .val    = _PAGE_SPECIAL,
                .set    = "special",
-       }, {
-               .mask   = _PAGE_SHARED,
-               .val    = _PAGE_SHARED,
-               .set    = "shared",
        }
 };
 
index 6e1e3903538065becbab2ad47febad597f5d6d49..866446cf2d9abd5ae1b0a5ebc1076e16feca3f4c 100644 (file)
@@ -107,7 +107,8 @@ static bool store_updates_sp(struct pt_regs *regs)
  */
 
 static int
-__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code,
+               int pkey)
 {
        /*
         * If we are in kernel mode, bail out with a SEGV, this will
@@ -117,17 +118,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
        if (!user_mode(regs))
                return SIGSEGV;
 
-       _exception(SIGSEGV, regs, si_code, address);
+       _exception_pkey(SIGSEGV, regs, si_code, address, pkey);
 
        return 0;
 }
 
 static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
 {
-       return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
+       return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0);
 }
 
-static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
+                       int pkey)
 {
        struct mm_struct *mm = current->mm;
 
@@ -137,17 +139,23 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
         */
        up_read(&mm->mmap_sem);
 
-       return __bad_area_nosemaphore(regs, address, si_code);
+       return __bad_area_nosemaphore(regs, address, si_code, pkey);
 }
 
 static noinline int bad_area(struct pt_regs *regs, unsigned long address)
 {
-       return __bad_area(regs, address, SEGV_MAPERR);
+       return __bad_area(regs, address, SEGV_MAPERR, 0);
+}
+
+static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
+                                   int pkey)
+{
+       return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey);
 }
 
 static noinline int bad_access(struct pt_regs *regs, unsigned long address)
 {
-       return __bad_area(regs, address, SEGV_ACCERR);
+       return __bad_area(regs, address, SEGV_ACCERR, 0);
 }
 
 static int do_sigbus(struct pt_regs *regs, unsigned long address,
@@ -432,6 +440,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 
        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
+       if (error_code & DSISR_KEYFAULT)
+               return bad_key_fault_exception(regs, address,
+                                              get_mm_addr_key(mm, address));
+
        /*
         * We want to do this outside mmap_sem, because reading code around nip
         * can result in fault, which will cause a deadlock when called with
@@ -503,6 +515,31 @@ good_area:
         * the fault.
         */
        fault = handle_mm_fault(vma, address, flags);
+
+#ifdef CONFIG_PPC_MEM_KEYS
+       /*
+        * if the HPTE is not hashed, hardware will not detect
+        * a key fault. Lets check if we failed because of a
+        * software detected key fault.
+        */
+       if (unlikely(fault & VM_FAULT_SIGSEGV) &&
+               !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+                       is_exec, 0)) {
+               /*
+                * The PGD-PDT...PMD-PTE tree may not have been fully setup.
+                * Hence we cannot walk the tree to locate the PTE, to locate
+                * the key. Hence let's use vma_pkey() to get the key; instead
+                * of get_mm_addr_key().
+                */
+               int pkey = vma_pkey(vma);
+
+               if (likely(pkey)) {
+                       up_read(&mm->mmap_sem);
+                       return bad_key_fault_exception(regs, address, pkey);
+               }
+       }
+#endif /* CONFIG_PPC_MEM_KEYS */
+
        major |= fault & VM_FAULT_MAJOR;
 
        /*
@@ -576,7 +613,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 
        /* kernel has accessed a bad area */
 
-       switch (regs->trap) {
+       switch (TRAP(regs)) {
        case 0x300:
        case 0x380:
                printk(KERN_ALERT "Unable to handle kernel paging request for "
index 6fa450c12d6d1b28c71429ac4aa10fdd4dabf2a0..5a69b51d08a3615f319a325536a209b6399db2a7 100644 (file)
@@ -20,6 +20,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
                   pte_t *ptep, unsigned long trap, unsigned long flags,
                   int ssize, int subpg_prot)
 {
+       real_pte_t rpte;
        unsigned long hpte_group;
        unsigned long rflags, pa;
        unsigned long old_pte, new_pte;
@@ -54,6 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
         * need to add in 0x1 if it's a read-only user page
         */
        rflags = htab_convert_pte_flags(new_pte);
+       rpte = __real_pte(__pte(old_pte), ptep);
 
        if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
            !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -64,13 +66,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
                /*
                 * There MIGHT be an HPTE for this pte
                 */
-               hash = hpt_hash(vpn, shift, ssize);
-               if (old_pte & H_PAGE_F_SECOND)
-                       hash = ~hash;
-               slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-               slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
+               unsigned long gslot = pte_get_hash_gslot(vpn, shift, ssize,
+                                                        rpte, 0);
 
-               if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
+               if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K,
                                               MMU_PAGE_4K, ssize, flags) == -1)
                        old_pte &= ~_PAGE_HPTEFLAGS;
        }
@@ -118,8 +117,7 @@ repeat:
                        return -1;
                }
                new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-               new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
-                       (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
        }
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
index 1a68cb19b0e33c0031a1568f9e977f1681b80cd8..2253bbc6a599d7804cb81dc49b2c6f82b81a435d 100644 (file)
 #include <linux/mm.h>
 #include <asm/machdep.h>
 #include <asm/mmu.h>
+
 /*
- * index from 0 - 15
+ * Return true, if the entry has a slot value which
+ * the software considers as invalid.
  */
-bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
+static inline bool hpte_soft_invalid(unsigned long hidx)
 {
-       unsigned long g_idx;
-       unsigned long ptev = pte_val(rpte.pte);
-
-       g_idx = (ptev & H_PAGE_COMBO_VALID) >> H_PAGE_F_GIX_SHIFT;
-       index = index >> 2;
-       if (g_idx & (0x1 << index))
-               return true;
-       else
-               return false;
+       return ((hidx & 0xfUL) == 0xfUL);
 }
+
 /*
  * index from 0 - 15
  */
-static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long index)
+bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
 {
-       unsigned long g_idx;
-
-       if (!(ptev & H_PAGE_COMBO))
-               return ptev;
-       index = index >> 2;
-       g_idx = 0x1 << index;
-
-       return ptev | (g_idx << H_PAGE_F_GIX_SHIFT);
+       return !(hpte_soft_invalid(__rpte_to_hidx(rpte, index)));
 }
 
 int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
@@ -50,12 +38,11 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
                   int ssize, int subpg_prot)
 {
        real_pte_t rpte;
-       unsigned long *hidxp;
        unsigned long hpte_group;
        unsigned int subpg_index;
-       unsigned long rflags, pa, hidx;
+       unsigned long rflags, pa;
        unsigned long old_pte, new_pte, subpg_pte;
-       unsigned long vpn, hash, slot;
+       unsigned long vpn, hash, slot, gslot;
        unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
 
        /*
@@ -116,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
                 * On hash insert failure we use old pte value and we don't
                 * want slot information there if we have a insert failure.
                 */
-               old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
-               new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
+               old_pte &= ~H_PAGE_HASHPTE;
+               new_pte &= ~H_PAGE_HASHPTE;
                goto htab_insert_hpte;
        }
        /*
@@ -126,18 +113,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
        if (__rpte_sub_valid(rpte, subpg_index)) {
                int ret;
 
-               hash = hpt_hash(vpn, shift, ssize);
-               hidx = __rpte_to_hidx(rpte, subpg_index);
-               if (hidx & _PTEIDX_SECONDARY)
-                       hash = ~hash;
-               slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-               slot += hidx & _PTEIDX_GROUP_IX;
-
-               ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+               gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte,
+                                          subpg_index);
+               ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn,
                                                 MMU_PAGE_4K, MMU_PAGE_4K,
                                                 ssize, flags);
+
                /*
-                *if we failed because typically the HPTE wasn't really here
+                * If we failed because typically the HPTE wasn't really here
                 * we try an insertion.
                 */
                if (ret == -1)
@@ -148,6 +131,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
        }
 
 htab_insert_hpte:
+
+       /*
+        * Initialize all hidx entries to invalid value, the first time
+        * the PTE is about to allocate a 4K HPTE.
+        */
+       if (!(old_pte & H_PAGE_COMBO))
+               rpte.hidx = INVALID_RPTE_HIDX;
+
        /*
         * handle H_PAGE_4K_PFN case
         */
@@ -172,15 +163,39 @@ repeat:
         * Primary is full, try the secondary
         */
        if (unlikely(slot == -1)) {
+               bool soft_invalid;
+
                hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
                slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
                                                rflags, HPTE_V_SECONDARY,
                                                MMU_PAGE_4K, MMU_PAGE_4K,
                                                ssize);
-               if (slot == -1) {
-                       if (mftb() & 0x1)
+
+               soft_invalid = hpte_soft_invalid(slot);
+               if (unlikely(soft_invalid)) {
+                       /*
+                        * We got a valid slot from a hardware point of view.
+                        * but we cannot use it, because we use this special
+                        * value; as defined by hpte_soft_invalid(), to track
+                        * invalid slots. We cannot use it. So invalidate it.
+                        */
+                       gslot = slot & _PTEIDX_GROUP_IX;
+                       mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn,
+                                                    MMU_PAGE_4K, MMU_PAGE_4K,
+                                                    ssize, 0);
+               }
+
+               if (unlikely(slot == -1 || soft_invalid)) {
+                       /*
+                        * For soft invalid slot, let's ensure that we release a
+                        * slot from the primary, with the hope that we will
+                        * acquire that slot next time we try. This will ensure
+                        * that we do not get the same soft-invalid slot.
+                        */
+                       if (soft_invalid || (mftb() & 0x1))
                                hpte_group = ((hash & htab_hash_mask) *
                                              HPTES_PER_GROUP) & ~0x7UL;
+
                        mmu_hash_ops.hpte_remove(hpte_group);
                        /*
                         * FIXME!! Should be try the group from which we removed ?
@@ -198,21 +213,10 @@ repeat:
                                   MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
                return -1;
        }
-       /*
-        * Insert slot number & secondary bit in PTE second half,
-        * clear H_PAGE_BUSY and set appropriate HPTE slot bit
-        * Since we have H_PAGE_BUSY set on ptep, we can be sure
-        * nobody is undating hidx.
-        */
-       hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
-       rpte.hidx &= ~(0xfUL << (subpg_index << 2));
-       *hidxp = rpte.hidx  | (slot << (subpg_index << 2));
-       new_pte = mark_subptegroup_valid(new_pte, subpg_index);
-       new_pte |=  H_PAGE_HASHPTE;
-       /*
-        * check __real_pte for details on matching smp_rmb()
-        */
-       smp_wmb();
+
+       new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+       new_pte |= H_PAGE_HASHPTE;
+
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
 }
@@ -221,6 +225,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
                    unsigned long vsid, pte_t *ptep, unsigned long trap,
                    unsigned long flags, int ssize)
 {
+       real_pte_t rpte;
        unsigned long hpte_group;
        unsigned long rflags, pa;
        unsigned long old_pte, new_pte;
@@ -257,6 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
        } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
        rflags = htab_convert_pte_flags(new_pte);
+       rpte = __real_pte(__pte(old_pte), ptep);
 
        if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
            !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -264,16 +270,13 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 
        vpn  = hpt_vpn(ea, vsid, ssize);
        if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+               unsigned long gslot;
+
                /*
                 * There MIGHT be an HPTE for this pte
                 */
-               hash = hpt_hash(vpn, shift, ssize);
-               if (old_pte & H_PAGE_F_SECOND)
-                       hash = ~hash;
-               slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-               slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
-
-               if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
+               gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+               if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K,
                                               MMU_PAGE_64K, ssize,
                                               flags) == -1)
                        old_pte &= ~_PAGE_HPTEFLAGS;
@@ -322,9 +325,9 @@ repeat:
                                           MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
                        return -1;
                }
+
                new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-               new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
-                       (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
        }
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
index 640cf566e98653ab43c06744b6cf9ef76622fa1c..a0675e91ad7d11318d8f630a378509e5703dfe70 100644 (file)
 
 DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
+static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
+{
+       unsigned long rb;
+
+       rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+
+       asm volatile("tlbiel %0" : : "r" (rb));
+}
+
+/*
+ * tlbiel instruction for hash, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+                                       unsigned int pid,
+                                       unsigned int ric, unsigned int prs)
+{
+       unsigned long rb;
+       unsigned long rs;
+       unsigned int r = 0; /* hash format */
+
+       rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+       rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+       asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+                    : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+                    : "memory");
+}
+
+
+static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+{
+       unsigned int set;
+
+       asm volatile("ptesync": : :"memory");
+
+       for (set = 0; set < num_sets; set++)
+               tlbiel_hash_set_isa206(set, is);
+
+       asm volatile("ptesync": : :"memory");
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+       unsigned int set;
+
+       asm volatile("ptesync": : :"memory");
+
+       /*
+        * Flush the first set of the TLB, and any caching of partition table
+        * entries. Then flush the remaining sets of the TLB. Hash mode uses
+        * partition scoped TLB translations.
+        */
+       tlbiel_hash_set_isa300(0, is, 0, 2, 0);
+       for (set = 1; set < num_sets; set++)
+               tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
+       /*
+        * Now invalidate the process table cache.
+        *
+        * From ISA v3.0B p. 1078:
+        *     The following forms are invalid.
+        *      * PRS=1, R=0, and RIC!=2 (The only process-scoped
+        *        HPT caching is of the Process Table.)
+        */
+       tlbiel_hash_set_isa300(0, is, 0, 2, 1);
+
+       asm volatile("ptesync": : :"memory");
+}
+
+void hash__tlbiel_all(unsigned int action)
+{
+       unsigned int is;
+
+       switch (action) {
+       case TLB_INVAL_SCOPE_GLOBAL:
+               is = 3;
+               break;
+       case TLB_INVAL_SCOPE_LPID:
+               is = 2;
+               break;
+       default:
+               BUG();
+       }
+
+       if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+               tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
+       else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+               tlbiel_all_isa206(POWER8_TLB_SETS, is);
+       else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
+               tlbiel_all_isa206(POWER7_TLB_SETS, is);
+       else
+               WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
+
+       asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
 static inline unsigned long  ___tlbie(unsigned long vpn, int psize,
                                                int apsize, int ssize)
 {
index 655a5a9a183d732dc332b2c926a87c8a4f714efa..7d07c7e17db6708334ea38cad711e1f5c32de1c6 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/memblock.h>
 #include <linux/context_tracking.h>
 #include <linux/libfdt.h>
+#include <linux/pkeys.h>
 
 #include <asm/debugfs.h>
 #include <asm/processor.h>
@@ -232,6 +233,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
                 */
                rflags |= HPTE_R_M;
 
+       rflags |= pte_to_hpte_pkey_bits(pteflags);
        return rflags;
 }
 
@@ -606,7 +608,7 @@ static void init_hpte_page_sizes(void)
                        continue;       /* not a supported page size */
                for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
                        penc = mmu_psize_defs[bp].penc[ap];
-                       if (penc == -1)
+                       if (penc == -1 || !mmu_psize_defs[ap].shift)
                                continue;
                        shift = mmu_psize_defs[ap].shift - LP_SHIFT;
                        if (shift <= 0)
@@ -772,7 +774,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
                int rc;
 
                rc = mmu_hash_ops.resize_hpt(target_hpt_shift);
-               if (rc)
+               if (rc && (rc != -ENODEV))
                        printk(KERN_WARNING
                               "Unable to resize hash page table to target order %d: %d\n",
                               target_hpt_shift, rc);
@@ -979,8 +981,9 @@ void __init hash__early_init_devtree(void)
 
 void __init hash__early_init_mmu(void)
 {
+#ifndef CONFIG_PPC_64K_PAGES
        /*
-        * We have code in __hash_page_64K() and elsewhere, which assumes it can
+        * We have code in __hash_page_4K() and elsewhere, which assumes it can
         * do the following:
         *   new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX);
         *
@@ -991,6 +994,7 @@ void __init hash__early_init_mmu(void)
         * with a BUILD_BUG_ON().
         */
        BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul  << (H_PAGE_F_GIX_SHIFT + 3)));
+#endif /* CONFIG_PPC_64K_PAGES */
 
        htab_init_page_sizes();
 
@@ -1049,6 +1053,10 @@ void __init hash__early_init_mmu(void)
        pr_info("Initializing hash mmu with SLB\n");
        /* Initialize SLB management */
        slb_initialize();
+
+       if (cpu_has_feature(CPU_FTR_ARCH_206)
+                       && cpu_has_feature(CPU_FTR_HVMODE))
+               tlbiel_all();
 }
 
 #ifdef CONFIG_SMP
@@ -1068,6 +1076,10 @@ void hash__early_init_mmu_secondary(void)
        }
        /* Initialize SLB */
        slb_initialize();
+
+       if (cpu_has_feature(CPU_FTR_ARCH_206)
+                       && cpu_has_feature(CPU_FTR_HVMODE))
+               tlbiel_all();
 }
 #endif /* CONFIG_SMP */
 
@@ -1569,6 +1581,30 @@ out_exit:
        local_irq_restore(flags);
 }
 
+#ifdef CONFIG_PPC_MEM_KEYS
+/*
+ * Return the protection key associated with the given address and the
+ * mm_struct.
+ */
+u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
+{
+       pte_t *ptep;
+       u16 pkey = 0;
+       unsigned long flags;
+
+       if (!mm || !mm->pgd)
+               return 0;
+
+       local_irq_save(flags);
+       ptep = find_linux_pte(mm->pgd, address, NULL, NULL);
+       if (ptep)
+               pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep)));
+       local_irq_restore(flags);
+
+       return pkey;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 static inline void tm_flush_hash_page(int local)
 {
@@ -1592,29 +1628,42 @@ static inline void tm_flush_hash_page(int local)
 }
 #endif
 
+/*
+ * Return the global hash slot, corresponding to the given PTE, which contains
+ * the HPTE.
+ */
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+               int ssize, real_pte_t rpte, unsigned int subpg_index)
+{
+       unsigned long hash, gslot, hidx;
+
+       hash = hpt_hash(vpn, shift, ssize);
+       hidx = __rpte_to_hidx(rpte, subpg_index);
+       if (hidx & _PTEIDX_SECONDARY)
+               hash = ~hash;
+       gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+       gslot += hidx & _PTEIDX_GROUP_IX;
+       return gslot;
+}
+
 /* WARNING: This is called from hash_low_64.S, if you change this prototype,
  *          do not forget to update the assembly call site !
  */
 void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
                     unsigned long flags)
 {
-       unsigned long hash, index, shift, hidx, slot;
+       unsigned long index, shift, gslot;
        int local = flags & HPTE_LOCAL_UPDATE;
 
        DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
        pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
-               hash = hpt_hash(vpn, shift, ssize);
-               hidx = __rpte_to_hidx(pte, index);
-               if (hidx & _PTEIDX_SECONDARY)
-                       hash = ~hash;
-               slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-               slot += hidx & _PTEIDX_GROUP_IX;
-               DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
+               gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index);
+               DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot);
                /*
                 * We use same base page size and actual psize, because we don't
                 * use these functions for hugepage
                 */
-               mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize,
+               mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize,
                                             ssize, local);
        } pte_iterate_hashed_end();
 
@@ -1825,16 +1874,24 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
         */
        BUG_ON(first_memblock_base != 0);
 
-       /* On LPAR systems, the first entry is our RMA region,
-        * non-LPAR 64-bit hash MMU systems don't have a limitation
-        * on real mode access, but using the first entry works well
-        * enough. We also clamp it to 1G to avoid some funky things
-        * such as RTAS bugs etc...
+       /*
+        * On virtualized systems the first entry is our RMA region aka VRMA,
+        * non-virtualized 64-bit hash MMU systems don't have a limitation
+        * on real mode access.
+        *
+        * For guests on platforms before POWER9, we clamp the it limit to 1G
+        * to avoid some funky things such as RTAS bugs etc...
         */
-       ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+       if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
+               ppc64_rma_size = first_memblock_size;
+               if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
+                       ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
 
-       /* Finally limit subsequent allocations */
-       memblock_set_current_limit(ppc64_rma_size);
+               /* Finally limit subsequent allocations */
+               memblock_set_current_limit(ppc64_rma_size);
+       } else {
+               ppc64_rma_size = ULONG_MAX;
+       }
 }
 
 #ifdef CONFIG_DEBUG_FS
index 0c2a91df3210e23b24c1657b3cf722242b6e206c..12511f5a015fcfee349e9dd7ac00f6ecd3b8df90 100644 (file)
@@ -23,6 +23,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
                     pte_t *ptep, unsigned long trap, unsigned long flags,
                     int ssize, unsigned int shift, unsigned int mmu_psize)
 {
+       real_pte_t rpte;
        unsigned long vpn;
        unsigned long old_pte, new_pte;
        unsigned long rflags, pa, sz;
@@ -62,6 +63,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
        } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
        rflags = htab_convert_pte_flags(new_pte);
+       rpte = __real_pte(__pte(old_pte), ptep);
 
        sz = ((1UL) << shift);
        if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -72,15 +74,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
        /* Check if pte already has an hpte (case 2) */
        if (unlikely(old_pte & H_PAGE_HASHPTE)) {
                /* There MIGHT be an HPTE for this pte */
-               unsigned long hash, slot;
+               unsigned long gslot;
 
-               hash = hpt_hash(vpn, shift, ssize);
-               if (old_pte & H_PAGE_F_SECOND)
-                       hash = ~hash;
-               slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-               slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
-
-               if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize,
+               gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+               if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
                                               mmu_psize, ssize, flags) == -1)
                        old_pte &= ~_PAGE_HPTEFLAGS;
        }
@@ -107,8 +104,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
                        return -1;
                }
 
-               new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
-                       (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
        }
 
        /*
index a9b9083c5e4999bc398442ef3f8945040b47fac6..876da2bc1796ba1b8568ad9a26450dcd4c162ecd 100644 (file)
@@ -96,7 +96,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
                        *hpdp = __hugepd(__pa(new) |
                                         (shift_to_mmu_psize(pshift) << 2));
 #elif defined(CONFIG_PPC_8xx)
-                       *hpdp = __hugepd(__pa(new) |
+                       *hpdp = __hugepd(__pa(new) | _PMD_USER |
                                         (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
                                          _PMD_PAGE_512K) | _PMD_PRESENT);
 #else
@@ -752,7 +752,7 @@ void flush_dcache_icache_hugepage(struct page *page)
  * So long as we atomically load page table pointers we are safe against teardown,
  * we can follow the address down to the the page and take a ref on it.
  * This function need to be called with interrupts disabled. We use this variant
- * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
+ * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
  */
 pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
                        bool *is_thp, unsigned *hpage_shift)
@@ -855,9 +855,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 
        pte = READ_ONCE(*ptep);
 
-       if (!pte_present(pte) || !pte_read(pte))
-               return 0;
-       if (write && !pte_write(pte))
+       if (!pte_access_permitted(pte, write))
                return 0;
 
        /* hugepages are never "special" */
index a07722531b32e3dfb329e38460085ef789b0128e..f6eb7e8f4c938c9cb6966924e38525d47e2c33dd 100644 (file)
@@ -214,9 +214,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 
                rc = vmemmap_create_mapping(start, page_size, __pa(p));
                if (rc < 0) {
-                       pr_warning(
-                               "vmemmap_populate: Unable to create vmemmap mapping: %d\n",
-                               rc);
+                       pr_warn("%s: Unable to create vmemmap mapping: %d\n",
+                               __func__, rc);
                        return -EFAULT;
                }
        }
index 4362b86ef84c5daa404e806cf3802128b77b8532..1281c6eb3a85b705f09f8f93e4249d84a3716221 100644 (file)
@@ -138,8 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
        start = (unsigned long)__va(start);
        rc = create_section_mapping(start, start + size);
        if (rc) {
-               pr_warning(
-                       "Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
+               pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
                        start, start + size, rc);
                return -EFAULT;
        }
index 59c0766ae4e04d024cd33dad8faf577b7001c25c..929d9ef7083f1ac85155f349788713eafbd1ea09 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/pkeys.h>
 #include <linux/spinlock.h>
 #include <linux/idr.h>
 #include <linux/export.h>
@@ -118,6 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm)
 
        subpage_prot_init_new_context(mm);
 
+       pkey_mm_init(mm);
        return index;
 }
 
index adb6364f40911b984f05f5b989ca0c54af5efdd1..314d19ab9385e038a4f38c18a50364da4873eb86 100644 (file)
@@ -40,6 +40,7 @@
 #include <asm/hvcall.h>
 #include <asm/setup.h>
 #include <asm/vdso.h>
+#include <asm/drmem.h>
 
 static int numa_enabled = 1;
 
@@ -179,21 +180,6 @@ static const __be32 *of_get_associativity(struct device_node *dev)
        return of_get_property(dev, "ibm,associativity", NULL);
 }
 
-/*
- * Returns the property linux,drconf-usable-memory if
- * it exists (the property exists only in kexec/kdump kernels,
- * added by kexec-tools)
- */
-static const __be32 *of_get_usable_memory(struct device_node *memory)
-{
-       const __be32 *prop;
-       u32 len;
-       prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
-       if (!prop || len < sizeof(unsigned int))
-               return NULL;
-       return prop;
-}
-
 int __node_distance(int a, int b)
 {
        int i;
@@ -387,69 +373,6 @@ static unsigned long read_n_cells(int n, const __be32 **buf)
        return result;
 }
 
-/*
- * Read the next memblock list entry from the ibm,dynamic-memory property
- * and return the information in the provided of_drconf_cell structure.
- */
-static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 **cellp)
-{
-       const __be32 *cp;
-
-       drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
-
-       cp = *cellp;
-       drmem->drc_index = of_read_number(cp, 1);
-       drmem->reserved = of_read_number(&cp[1], 1);
-       drmem->aa_index = of_read_number(&cp[2], 1);
-       drmem->flags = of_read_number(&cp[3], 1);
-
-       *cellp = cp + 4;
-}
-
-/*
- * Retrieve and validate the ibm,dynamic-memory property of the device tree.
- *
- * The layout of the ibm,dynamic-memory property is a number N of memblock
- * list entries followed by N memblock list entries.  Each memblock list entry
- * contains information as laid out in the of_drconf_cell struct above.
- */
-static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm)
-{
-       const __be32 *prop;
-       u32 len, entries;
-
-       prop = of_get_property(memory, "ibm,dynamic-memory", &len);
-       if (!prop || len < sizeof(unsigned int))
-               return 0;
-
-       entries = of_read_number(prop++, 1);
-
-       /* Now that we know the number of entries, revalidate the size
-        * of the property read in to ensure we have everything
-        */
-       if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
-               return 0;
-
-       *dm = prop;
-       return entries;
-}
-
-/*
- * Retrieve and validate the ibm,lmb-size property for drconf memory
- * from the device tree.
- */
-static u64 of_get_lmb_size(struct device_node *memory)
-{
-       const __be32 *prop;
-       u32 len;
-
-       prop = of_get_property(memory, "ibm,lmb-size", &len);
-       if (!prop || len < sizeof(unsigned int))
-               return 0;
-
-       return read_n_cells(n_mem_size_cells, &prop);
-}
-
 struct assoc_arrays {
        u32     n_arrays;
        u32     array_sz;
@@ -466,19 +389,27 @@ struct assoc_arrays {
  * indicating the size of each associativity array, followed by a list
  * of N associativity arrays.
  */
-static int of_get_assoc_arrays(struct device_node *memory,
-                              struct assoc_arrays *aa)
+static int of_get_assoc_arrays(struct assoc_arrays *aa)
 {
+       struct device_node *memory;
        const __be32 *prop;
        u32 len;
 
+       memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+       if (!memory)
+               return -1;
+
        prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
-       if (!prop || len < 2 * sizeof(unsigned int))
+       if (!prop || len < 2 * sizeof(unsigned int)) {
+               of_node_put(memory);
                return -1;
+       }
 
        aa->n_arrays = of_read_number(prop++, 1);
        aa->array_sz = of_read_number(prop++, 1);
 
+       of_node_put(memory);
+
        /* Now that we know the number of arrays and size of each array,
         * revalidate the size of the property read in.
         */
@@ -493,26 +424,30 @@ static int of_get_assoc_arrays(struct device_node *memory,
  * This is like of_node_to_nid_single() for memory represented in the
  * ibm,dynamic-reconfiguration-memory node.
  */
-static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
-                                  struct assoc_arrays *aa)
+static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
 {
+       struct assoc_arrays aa = { .arrays = NULL };
        int default_nid = 0;
        int nid = default_nid;
-       int index;
+       int rc, index;
+
+       rc = of_get_assoc_arrays(&aa);
+       if (rc)
+               return default_nid;
 
-       if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
-           !(drmem->flags & DRCONF_MEM_AI_INVALID) &&
-           drmem->aa_index < aa->n_arrays) {
-               index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
-               nid = of_read_number(&aa->arrays[index], 1);
+       if (min_common_depth > 0 && min_common_depth <= aa.array_sz &&
+           !(lmb->flags & DRCONF_MEM_AI_INVALID) &&
+           lmb->aa_index < aa.n_arrays) {
+               index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
+               nid = of_read_number(&aa.arrays[index], 1);
 
                if (nid == 0xffff || nid >= MAX_NUMNODES)
                        nid = default_nid;
 
                if (nid > 0) {
-                       index = drmem->aa_index * aa->array_sz;
+                       index = lmb->aa_index * aa.array_sz;
                        initialize_distance_lookup_table(nid,
-                                                       &aa->arrays[index]);
+                                                       &aa.arrays[index]);
                }
        }
 
@@ -551,7 +486,7 @@ static int numa_setup_cpu(unsigned long lcpu)
        nid = of_node_to_nid_single(cpu);
 
 out_present:
-       if (nid < 0 || !node_online(nid))
+       if (nid < 0 || !node_possible(nid))
                nid = first_online_node;
 
        map_cpu_to_node(lcpu, nid);
@@ -645,67 +580,48 @@ static inline int __init read_usm_ranges(const __be32 **usm)
  * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
  * node.  This assumes n_mem_{addr,size}_cells have been set.
  */
-static void __init parse_drconf_memory(struct device_node *memory)
+static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+                                       const __be32 **usm)
 {
-       const __be32 *uninitialized_var(dm), *usm;
-       unsigned int n, rc, ranges, is_kexec_kdump = 0;
-       unsigned long lmb_size, base, size, sz;
+       unsigned int ranges, is_kexec_kdump = 0;
+       unsigned long base, size, sz;
        int nid;
-       struct assoc_arrays aa = { .arrays = NULL };
-
-       n = of_get_drconf_memory(memory, &dm);
-       if (!n)
-               return;
-
-       lmb_size = of_get_lmb_size(memory);
-       if (!lmb_size)
-               return;
 
-       rc = of_get_assoc_arrays(memory, &aa);
-       if (rc)
+       /*
+        * Skip this block if the reserved bit is set in flags (0x80)
+        * or if the block is not assigned to this partition (0x8)
+        */
+       if ((lmb->flags & DRCONF_MEM_RESERVED)
+           || !(lmb->flags & DRCONF_MEM_ASSIGNED))
                return;
 
-       /* check if this is a kexec/kdump kernel */
-       usm = of_get_usable_memory(memory);
-       if (usm != NULL)
+       if (*usm)
                is_kexec_kdump = 1;
 
-       for (; n != 0; --n) {
-               struct of_drconf_cell drmem;
-
-               read_drconf_cell(&drmem, &dm);
+       base = lmb->base_addr;
+       size = drmem_lmb_size();
+       ranges = 1;
 
-               /* skip this block if the reserved bit is set in flags (0x80)
-                  or if the block is not assigned to this partition (0x8) */
-               if ((drmem.flags & DRCONF_MEM_RESERVED)
-                   || !(drmem.flags & DRCONF_MEM_ASSIGNED))
-                       continue;
-
-               base = drmem.base_addr;
-               size = lmb_size;
-               ranges = 1;
+       if (is_kexec_kdump) {
+               ranges = read_usm_ranges(usm);
+               if (!ranges) /* there are no (base, size) duple */
+                       return;
+       }
 
+       do {
                if (is_kexec_kdump) {
-                       ranges = read_usm_ranges(&usm);
-                       if (!ranges) /* there are no (base, size) duple */
-                               continue;
+                       base = read_n_cells(n_mem_addr_cells, usm);
+                       size = read_n_cells(n_mem_size_cells, usm);
                }
-               do {
-                       if (is_kexec_kdump) {
-                               base = read_n_cells(n_mem_addr_cells, &usm);
-                               size = read_n_cells(n_mem_size_cells, &usm);
-                       }
-                       nid = of_drconf_to_nid_single(&drmem, &aa);
-                       fake_numa_create_new_node(
-                               ((base + size) >> PAGE_SHIFT),
-                                          &nid);
-                       node_set_online(nid);
-                       sz = numa_enforce_memory_limit(base, size);
-                       if (sz)
-                               memblock_set_node(base, sz,
-                                                 &memblock.memory, nid);
-               } while (--ranges);
-       }
+
+               nid = of_drconf_to_nid_single(lmb);
+               fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
+                                         &nid);
+               node_set_online(nid);
+               sz = numa_enforce_memory_limit(base, size);
+               if (sz)
+                       memblock_set_node(base, sz, &memblock.memory, nid);
+       } while (--ranges);
 }
 
 static int __init parse_numa_properties(void)
@@ -800,8 +716,10 @@ new_range:
         * ibm,dynamic-reconfiguration-memory node.
         */
        memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-       if (memory)
-               parse_drconf_memory(memory);
+       if (memory) {
+               walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
+               of_node_put(memory);
+       }
 
        return 0;
 }
@@ -892,6 +810,32 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
        NODE_DATA(nid)->node_spanned_pages = spanned_pages;
 }
 
+static void __init find_possible_nodes(void)
+{
+       struct device_node *rtas;
+       u32 numnodes, i;
+
+       if (min_common_depth <= 0)
+               return;
+
+       rtas = of_find_node_by_path("/rtas");
+       if (!rtas)
+               return;
+
+       if (of_property_read_u32_index(rtas,
+                               "ibm,max-associativity-domains",
+                               min_common_depth, &numnodes))
+               goto out;
+
+       for (i = 0; i < numnodes; i++) {
+               if (!node_possible(i))
+                       node_set(i, node_possible_map);
+       }
+
+out:
+       of_node_put(rtas);
+}
+
 void __init initmem_init(void)
 {
        int nid, cpu;
@@ -905,12 +849,15 @@ void __init initmem_init(void)
        memblock_dump_all();
 
        /*
-        * Reduce the possible NUMA nodes to the online NUMA nodes,
-        * since we do not support node hotplug. This ensures that  we
-        * lower the maximum NUMA node ID to what is actually present.
+        * Modify the set of possible NUMA nodes to reflect information
+        * available about the set of online nodes, and the set of nodes
+        * that we expect to make use of for this platform's affinity
+        * calculations.
         */
        nodes_and(node_possible_map, node_possible_map, node_online_map);
 
+       find_possible_nodes();
+
        for_each_online_node(nid) {
                unsigned long start_pfn, end_pfn;
 
@@ -979,43 +926,26 @@ early_param("topology_updates", early_topology_updates);
  * memory represented in the device tree by the property
  * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
  */
-static int hot_add_drconf_scn_to_nid(struct device_node *memory,
-                                    unsigned long scn_addr)
+static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
 {
-       const __be32 *dm;
-       unsigned int drconf_cell_cnt, rc;
+       struct drmem_lmb *lmb;
        unsigned long lmb_size;
-       struct assoc_arrays aa;
        int nid = -1;
 
-       drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
-       if (!drconf_cell_cnt)
-               return -1;
-
-       lmb_size = of_get_lmb_size(memory);
-       if (!lmb_size)
-               return -1;
-
-       rc = of_get_assoc_arrays(memory, &aa);
-       if (rc)
-               return -1;
-
-       for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
-               struct of_drconf_cell drmem;
-
-               read_drconf_cell(&drmem, &dm);
+       lmb_size = drmem_lmb_size();
 
+       for_each_drmem_lmb(lmb) {
                /* skip this block if it is reserved or not assigned to
                 * this partition */
-               if ((drmem.flags & DRCONF_MEM_RESERVED)
-                   || !(drmem.flags & DRCONF_MEM_ASSIGNED))
+               if ((lmb->flags & DRCONF_MEM_RESERVED)
+                   || !(lmb->flags & DRCONF_MEM_ASSIGNED))
                        continue;
 
-               if ((scn_addr < drmem.base_addr)
-                   || (scn_addr >= (drmem.base_addr + lmb_size)))
+               if ((scn_addr < lmb->base_addr)
+                   || (scn_addr >= (lmb->base_addr + lmb_size)))
                        continue;
 
-               nid = of_drconf_to_nid_single(&drmem, &aa);
+               nid = of_drconf_to_nid_single(lmb);
                break;
        }
 
@@ -1080,7 +1010,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 
        memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
        if (memory) {
-               nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
+               nid = hot_add_drconf_scn_to_nid(scn_addr);
                of_node_put(memory);
        } else {
                nid = hot_add_node_scn_to_nid(scn_addr);
@@ -1096,11 +1026,7 @@ static u64 hot_add_drconf_memory_max(void)
 {
        struct device_node *memory = NULL;
        struct device_node *dn = NULL;
-       unsigned int drconf_cell_cnt = 0;
-       u64 lmb_size = 0;
-       const __be32 *dm = NULL;
        const __be64 *lrdr = NULL;
-       struct of_drconf_cell drmem;
 
        dn = of_find_node_by_path("/rtas");
        if (dn) {
@@ -1112,14 +1038,8 @@ static u64 hot_add_drconf_memory_max(void)
 
        memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
        if (memory) {
-               drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
-               lmb_size = of_get_lmb_size(memory);
-
-               /* Advance to the last cell, each cell has 6 32 bit integers */
-               dm += (drconf_cell_cnt - 1) * 6;
-               read_drconf_cell(&drmem, &dm);
                of_node_put(memory);
-               return drmem.base_addr + lmb_size;
+               return drmem_lmb_memory_max();
        }
        return 0;
 }
@@ -1278,6 +1198,42 @@ static long vphn_get_associativity(unsigned long cpu,
        return rc;
 }
 
+int find_and_online_cpu_nid(int cpu)
+{
+       __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+       int new_nid;
+
+       /* Use associativity from first thread for all siblings */
+       vphn_get_associativity(cpu, associativity);
+       new_nid = associativity_to_nid(associativity);
+       if (new_nid < 0 || !node_possible(new_nid))
+               new_nid = first_online_node;
+
+       if (NODE_DATA(new_nid) == NULL) {
+#ifdef CONFIG_MEMORY_HOTPLUG
+               /*
+                * Need to ensure that NODE_DATA is initialized for a node from
+                * available memory (see memblock_alloc_try_nid). If unable to
+                * init the node, then default to nearest node that has memory
+                * installed.
+                */
+               if (try_online_node(new_nid))
+                       new_nid = first_online_node;
+#else
+               /*
+                * Default to using the nearest node that has memory installed.
+                * Otherwise, it would be necessary to patch the kernel MM code
+                * to deal with more memoryless-node error conditions.
+                */
+               new_nid = first_online_node;
+#endif
+       }
+
+       pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
+               cpu, new_nid);
+       return new_nid;
+}
+
 /*
  * Update the CPU maps and sysfs entries for a single CPU when its NUMA
  * characteristics change. This function doesn't perform any locking and is
@@ -1345,7 +1301,6 @@ int numa_update_cpu_topology(bool cpus_locked)
 {
        unsigned int cpu, sibling, changed = 0;
        struct topology_update_data *updates, *ud;
-       __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
        cpumask_t updated_cpus;
        struct device *dev;
        int weight, new_nid, i = 0;
@@ -1383,11 +1338,7 @@ int numa_update_cpu_topology(bool cpus_locked)
                        continue;
                }
 
-               /* Use associativity from first thread for all siblings */
-               vphn_get_associativity(cpu, associativity);
-               new_nid = associativity_to_nid(associativity);
-               if (new_nid < 0 || !node_online(new_nid))
-                       new_nid = first_online_node;
+               new_nid = find_and_online_cpu_nid(cpu);
 
                if (new_nid == numa_cpu_lookup_table[cpu]) {
                        cpumask_andnot(&cpu_associativity_changes_mask,
index cfbbee941a76b186cc2cccdc4c7ec1b2e0a19f10..573a9a2ee4555c53ab2416e70fb4fdd05fb464da 100644 (file)
@@ -579,6 +579,9 @@ void __init radix__early_init_mmu(void)
 
        radix_init_iamr();
        radix_init_pgtable();
+
+       if (cpu_has_feature(CPU_FTR_HVMODE))
+               tlbiel_all();
 }
 
 void radix__early_init_mmu_secondary(void)
@@ -600,6 +603,9 @@ void radix__early_init_mmu_secondary(void)
                radix_init_amor();
        }
        radix_init_iamr();
+
+       if (cpu_has_feature(CPU_FTR_HVMODE))
+               tlbiel_all();
 }
 
 void radix__mmu_cleanup_all(void)
@@ -622,22 +628,11 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
         * physical on those processors
         */
        BUG_ON(first_memblock_base != 0);
+
        /*
-        * We limit the allocation that depend on ppc64_rma_size
-        * to first_memblock_size. We also clamp it to 1GB to
-        * avoid some funky things such as RTAS bugs.
-        *
-        * On radix config we really don't have a limitation
-        * on real mode access. But keeping it as above works
-        * well enough.
-        */
-       ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
-       /*
-        * Finally limit subsequent allocations. We really don't want
-        * to limit the memblock allocations to rma_size. FIXME!! should
-        * we even limit at all ?
+        * Radix mode is not limited by RMA / VRMA addressing.
         */
-       memblock_set_current_limit(first_memblock_base + first_memblock_size);
+       ppc64_rma_size = ULONG_MAX;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
index a03ff3d99e0c420e87cf0f81cb64f43ba78f1998..9f361ae571e95229355abdc0d1c1b602573a9677 100644 (file)
@@ -54,7 +54,8 @@ static inline int pte_looks_normal(pte_t pte)
        return 0;
 #else
        return (pte_val(pte) &
-               (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
+               (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
+                _PAGE_PRIVILEGED)) ==
                (_PAGE_PRESENT | _PAGE_USER);
 #endif
 }
index f6c7f54c05157e226c5426798b6fa0f379f8becb..d35d9ad3c1cd686b3c48d57f51cd1d8bae66acc0 100644 (file)
@@ -98,14 +98,7 @@ ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
 
        /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
        flags &= ~(_PAGE_USER | _PAGE_EXEC);
-
-#ifdef _PAGE_BAP_SR
-       /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
-        * which means that we just cleared supervisor access... oops ;-) This
-        * restores it
-        */
-       flags |= _PAGE_BAP_SR;
-#endif
+       flags |= _PAGE_PRIVILEGED;
 
        return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
 }
index 813ea22c3e00d17e795d0c85065505a51308092b..c9a623c2d8a270a14966003258f035fb95d08cff 100644 (file)
@@ -244,20 +244,8 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
        /*
         * Force kernel mapping.
         */
-#if defined(CONFIG_PPC_BOOK3S_64)
-       flags |= _PAGE_PRIVILEGED;
-#else
        flags &= ~_PAGE_USER;
-#endif
-
-
-#ifdef _PAGE_BAP_SR
-       /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
-        * which means that we just cleared supervisor access... oops ;-) This
-        * restores it
-        */
-       flags |= _PAGE_BAP_SR;
-#endif
+       flags |= _PAGE_PRIVILEGED;
 
        if (ppc_md.ioremap)
                return ppc_md.ioremap(addr, size, flags, caller);
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
new file mode 100644 (file)
index 0000000..ba71c54
--- /dev/null
@@ -0,0 +1,468 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#include <asm/mman.h>
+#include <asm/setup.h>
+#include <linux/pkeys.h>
+#include <linux/of_device.h>
+
+DEFINE_STATIC_KEY_TRUE(pkey_disabled);
+bool pkey_execute_disable_supported;
+int  pkeys_total;              /* Total pkeys as per device tree */
+bool pkeys_devtree_defined;    /* pkey property exported by device tree */
+u32  initial_allocation_mask;  /* Bits set for reserved keys */
+u64  pkey_amr_uamor_mask;      /* Bits in AMR/UMOR not to be touched */
+u64  pkey_iamr_mask;           /* Bits in AMR not to be touched */
+
+#define AMR_BITS_PER_PKEY 2
+#define AMR_RD_BIT 0x1UL
+#define AMR_WR_BIT 0x2UL
+#define IAMR_EX_BIT 0x1UL
+#define PKEY_REG_BITS (sizeof(u64)*8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
+
+static void scan_pkey_feature(void)
+{
+       u32 vals[2];
+       struct device_node *cpu;
+
+       cpu = of_find_node_by_type(NULL, "cpu");
+       if (!cpu)
+               return;
+
+       if (of_property_read_u32_array(cpu,
+                       "ibm,processor-storage-keys", vals, 2))
+               return;
+
+       /*
+        * Since any pkey can be used for data or execute, we will just treat
+        * all keys as equal and track them as one entity.
+        */
+       pkeys_total = be32_to_cpu(vals[0]);
+       pkeys_devtree_defined = true;
+}
+
+static inline bool pkey_mmu_enabled(void)
+{
+       if (firmware_has_feature(FW_FEATURE_LPAR))
+               return pkeys_total;
+       else
+               return cpu_has_feature(CPU_FTR_PKEY);
+}
+
+int pkey_initialize(void)
+{
+       int os_reserved, i;
+
+       /*
+        * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
+        * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE.
+        * Ensure that the bits a distinct.
+        */
+       BUILD_BUG_ON(PKEY_DISABLE_EXECUTE &
+                    (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+       /*
+        * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous
+        * in the vmaflag. Make sure that is really the case.
+        */
+       BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) +
+                    __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)
+                               != (sizeof(u64) * BITS_PER_BYTE));
+
+       /* scan the device tree for pkey feature */
+       scan_pkey_feature();
+
+       /*
+        * Let's assume 32 pkeys on P8 bare metal, if its not defined by device
+        * tree. We make this exception since skiboot forgot to expose this
+        * property on power8.
+        */
+       if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) &&
+                       cpu_has_feature(CPU_FTRS_POWER8))
+               pkeys_total = 32;
+
+       /*
+        * Adjust the upper limit, based on the number of bits supported by
+        * arch-neutral code.
+        */
+       pkeys_total = min_t(int, pkeys_total,
+                       (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT));
+
+       if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
+               static_branch_enable(&pkey_disabled);
+       else
+               static_branch_disable(&pkey_disabled);
+
+       if (static_branch_likely(&pkey_disabled))
+               return 0;
+
+       /*
+        * The device tree cannot be relied to indicate support for
+        * execute_disable support. Instead we use a PVR check.
+        */
+       if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p))
+               pkey_execute_disable_supported = false;
+       else
+               pkey_execute_disable_supported = true;
+
+#ifdef CONFIG_PPC_4K_PAGES
+       /*
+        * The OS can manage only 8 pkeys due to its inability to represent them
+        * in the Linux 4K PTE.
+        */
+       os_reserved = pkeys_total - 8;
+#else
+       os_reserved = 0;
+#endif
+       /*
+        * Bits are in LE format. NOTE: 1, 0 are reserved.
+        * key 0 is the default key, which allows read/write/execute.
+        * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
+        * programming note.
+        */
+       initial_allocation_mask = ~0x0;
+
+       /* register mask is in BE format */
+       pkey_amr_uamor_mask = ~0x0ul;
+       pkey_iamr_mask = ~0x0ul;
+
+       for (i = 2; i < (pkeys_total - os_reserved); i++) {
+               initial_allocation_mask &= ~(0x1 << i);
+               pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
+               pkey_iamr_mask &= ~(0x1ul << pkeyshift(i));
+       }
+       return 0;
+}
+
+arch_initcall(pkey_initialize);
+
+void pkey_mm_init(struct mm_struct *mm)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return;
+       mm_pkey_allocation_map(mm) = initial_allocation_mask;
+       /* -1 means unallocated or invalid */
+       mm->context.execute_only_pkey = -1;
+}
+
+static inline u64 read_amr(void)
+{
+       return mfspr(SPRN_AMR);
+}
+
+static inline void write_amr(u64 value)
+{
+       mtspr(SPRN_AMR, value);
+}
+
+static inline u64 read_iamr(void)
+{
+       if (!likely(pkey_execute_disable_supported))
+               return 0x0UL;
+
+       return mfspr(SPRN_IAMR);
+}
+
+static inline void write_iamr(u64 value)
+{
+       if (!likely(pkey_execute_disable_supported))
+               return;
+
+       mtspr(SPRN_IAMR, value);
+}
+
+static inline u64 read_uamor(void)
+{
+       return mfspr(SPRN_UAMOR);
+}
+
+static inline void write_uamor(u64 value)
+{
+       mtspr(SPRN_UAMOR, value);
+}
+
+static bool is_pkey_enabled(int pkey)
+{
+       u64 uamor = read_uamor();
+       u64 pkey_bits = 0x3ul << pkeyshift(pkey);
+       u64 uamor_pkey_bits = (uamor & pkey_bits);
+
+       /*
+        * Both the bits in UAMOR corresponding to the key should be set or
+        * reset.
+        */
+       WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits));
+       return !!(uamor_pkey_bits);
+}
+
+static inline void init_amr(int pkey, u8 init_bits)
+{
+       u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
+       u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey));
+
+       write_amr(old_amr | new_amr_bits);
+}
+
+static inline void init_iamr(int pkey, u8 init_bits)
+{
+       u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey));
+       u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey));
+
+       write_iamr(old_iamr | new_iamr_bits);
+}
+
+static void pkey_status_change(int pkey, bool enable)
+{
+       u64 old_uamor;
+
+       /* Reset the AMR and IAMR bits for this key */
+       init_amr(pkey, 0x0);
+       init_iamr(pkey, 0x0);
+
+       /* Enable/disable key */
+       old_uamor = read_uamor();
+       if (enable)
+               old_uamor |= (0x3ul << pkeyshift(pkey));
+       else
+               old_uamor &= ~(0x3ul << pkeyshift(pkey));
+       write_uamor(old_uamor);
+}
+
+void __arch_activate_pkey(int pkey)
+{
+       pkey_status_change(pkey, true);
+}
+
+void __arch_deactivate_pkey(int pkey)
+{
+       pkey_status_change(pkey, false);
+}
+
+/*
+ * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
+ * specified in @init_val.
+ */
+int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+                               unsigned long init_val)
+{
+       u64 new_amr_bits = 0x0ul;
+       u64 new_iamr_bits = 0x0ul;
+
+       if (!is_pkey_enabled(pkey))
+               return -EINVAL;
+
+       if (init_val & PKEY_DISABLE_EXECUTE) {
+               if (!pkey_execute_disable_supported)
+                       return -EINVAL;
+               new_iamr_bits |= IAMR_EX_BIT;
+       }
+       init_iamr(pkey, new_iamr_bits);
+
+       /* Set the bits we need in AMR: */
+       if (init_val & PKEY_DISABLE_ACCESS)
+               new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT;
+       else if (init_val & PKEY_DISABLE_WRITE)
+               new_amr_bits |= AMR_WR_BIT;
+
+       init_amr(pkey, new_amr_bits);
+       return 0;
+}
+
+void thread_pkey_regs_save(struct thread_struct *thread)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return;
+
+       /*
+        * TODO: Skip saving registers if @thread hasn't used any keys yet.
+        */
+       thread->amr = read_amr();
+       thread->iamr = read_iamr();
+       thread->uamor = read_uamor();
+}
+
+void thread_pkey_regs_restore(struct thread_struct *new_thread,
+                             struct thread_struct *old_thread)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return;
+
+       /*
+        * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet.
+        */
+       if (old_thread->amr != new_thread->amr)
+               write_amr(new_thread->amr);
+       if (old_thread->iamr != new_thread->iamr)
+               write_iamr(new_thread->iamr);
+       if (old_thread->uamor != new_thread->uamor)
+               write_uamor(new_thread->uamor);
+}
+
+void thread_pkey_regs_init(struct thread_struct *thread)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return;
+
+       write_amr(read_amr() & pkey_amr_uamor_mask);
+       write_iamr(read_iamr() & pkey_iamr_mask);
+       write_uamor(read_uamor() & pkey_amr_uamor_mask);
+}
+
+static inline bool pkey_allows_readwrite(int pkey)
+{
+       int pkey_shift = pkeyshift(pkey);
+
+       if (!is_pkey_enabled(pkey))
+               return true;
+
+       return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift));
+}
+
+int __execute_only_pkey(struct mm_struct *mm)
+{
+       bool need_to_set_mm_pkey = false;
+       int execute_only_pkey = mm->context.execute_only_pkey;
+       int ret;
+
+       /* Do we need to assign a pkey for mm's execute-only maps? */
+       if (execute_only_pkey == -1) {
+               /* Go allocate one to use, which might fail */
+               execute_only_pkey = mm_pkey_alloc(mm);
+               if (execute_only_pkey < 0)
+                       return -1;
+               need_to_set_mm_pkey = true;
+       }
+
+       /*
+        * We do not want to go through the relatively costly dance to set AMR
+        * if we do not need to. Check it first and assume that if the
+        * execute-only pkey is readwrite-disabled than we do not have to set it
+        * ourselves.
+        */
+       if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey))
+               return execute_only_pkey;
+
+       /*
+        * Set up AMR so that it denies access for everything other than
+        * execution.
+        */
+       ret = __arch_set_user_pkey_access(current, execute_only_pkey,
+                                         PKEY_DISABLE_ACCESS |
+                                         PKEY_DISABLE_WRITE);
+       /*
+        * If the AMR-set operation failed somehow, just return 0 and
+        * effectively disable execute-only support.
+        */
+       if (ret) {
+               mm_pkey_free(mm, execute_only_pkey);
+               return -1;
+       }
+
+       /* We got one, store it and use it from here on out */
+       if (need_to_set_mm_pkey)
+               mm->context.execute_only_pkey = execute_only_pkey;
+       return execute_only_pkey;
+}
+
+static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
+{
+       /* Do this check first since the vm_flags should be hot */
+       if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
+               return false;
+
+       return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey);
+}
+
+/*
+ * This should only be called for *plain* mprotect calls.
+ */
+int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot,
+                                 int pkey)
+{
+       /*
+        * If the currently associated pkey is execute-only, but the requested
+        * protection requires read or write, move it back to the default pkey.
+        */
+       if (vma_is_pkey_exec_only(vma) && (prot & (PROT_READ | PROT_WRITE)))
+               return 0;
+
+       /*
+        * The requested protection is execute-only. Hence let's use an
+        * execute-only pkey.
+        */
+       if (prot == PROT_EXEC) {
+               pkey = execute_only_pkey(vma->vm_mm);
+               if (pkey > 0)
+                       return pkey;
+       }
+
+       /* Nothing to override. */
+       return vma_pkey(vma);
+}
+
+static bool pkey_access_permitted(int pkey, bool write, bool execute)
+{
+       int pkey_shift;
+       u64 amr;
+
+       if (!pkey)
+               return true;
+
+       if (!is_pkey_enabled(pkey))
+               return true;
+
+       pkey_shift = pkeyshift(pkey);
+       if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift)))
+               return true;
+
+       amr = read_amr(); /* Delay reading amr until absolutely needed */
+       return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) ||
+               (write &&  !(amr & (AMR_WR_BIT << pkey_shift))));
+}
+
+bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return true;
+
+       return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
+}
+
+/*
+ * We only want to enforce protection keys on the current thread because we
+ * effectively have no access to AMR/IAMR for other threads or any way to tell
+ * which AMR/IAMR in a threaded process we could use.
+ *
+ * So do not enforce things if the VMA is not from the current mm, or if we are
+ * in a kernel thread.
+ */
+static inline bool vma_is_foreign(struct vm_area_struct *vma)
+{
+       if (!current->mm)
+               return true;
+
+       /* if it is not our ->mm, it has to be foreign */
+       if (current->mm != vma->vm_mm)
+               return true;
+
+       return false;
+}
+
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+                              bool execute, bool foreign)
+{
+       if (static_branch_likely(&pkey_disabled))
+               return true;
+       /*
+        * Do not enforce our key-permissions on a foreign vma.
+        */
+       if (foreign || vma_is_foreign(vma))
+               return true;
+
+       return pkey_access_permitted(vma_pkey(vma), write, execute);
+}
index 781532d7bc4d29683ab2ac72cff6f0ad7d4d01f4..f14a07c2fb9030aa7d350a4a3977d83d22fff983 100644 (file)
@@ -195,6 +195,9 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
        unsigned long next, limit;
        int err;
 
+       if (radix_enabled())
+               return -ENOENT;
+
        /* Check parameters */
        if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
            addr >= mm->task_size || len >= mm->task_size ||
index 884f4b705b572ddc66444079c9ed596f10a20312..71d1b19ad1c0da190ecaf5cd394e918a771b01f9 100644 (file)
 #define RIC_FLUSH_PWC 1
 #define RIC_FLUSH_ALL 2
 
+/*
+ * tlbiel instruction for radix, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+                                       unsigned int pid,
+                                       unsigned int ric, unsigned int prs)
+{
+       unsigned long rb;
+       unsigned long rs;
+       unsigned int r = 1; /* radix format */
+
+       rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+       rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+       asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+                    : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+                    : "memory");
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+       unsigned int set;
+
+       asm volatile("ptesync": : :"memory");
+
+       /*
+        * Flush the first set of the TLB, and the entire Page Walk Cache
+        * and partition table entries. Then flush the remaining sets of the
+        * TLB.
+        */
+       tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+       for (set = 1; set < num_sets; set++)
+               tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
+
+       /* Do the same for process scoped entries. */
+       tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
+       for (set = 1; set < num_sets; set++)
+               tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
+
+       asm volatile("ptesync": : :"memory");
+}
+
+void radix__tlbiel_all(unsigned int action)
+{
+       unsigned int is;
+
+       switch (action) {
+       case TLB_INVAL_SCOPE_GLOBAL:
+               is = 3;
+               break;
+       case TLB_INVAL_SCOPE_LPID:
+               is = 2;
+               break;
+       default:
+               BUG();
+       }
+
+       if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+               tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
+       else
+               WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
+
+       asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
 static inline void __tlbiel_pid(unsigned long pid, int set,
                                unsigned long ric)
 {
@@ -600,14 +666,12 @@ void radix__flush_tlb_all(void)
         */
        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
                     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
-       trace_tlbie(0, 0, rb, rs, ric, prs, r);
        /*
         * now flush host entires by passing PRS = 0 and LPID == 0
         */
        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
-       trace_tlbie(0, 0, rb, 0, ric, prs, r);
 }
 
 void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
index bfc4a086960927a3a164eecceb69ab0c4089c365..15fe5f0c8665b0774e2d9e76f487d9a6d9b5978c 100644 (file)
@@ -388,7 +388,10 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
                     unsigned long end)
 
 {
-       flush_tlb_mm(vma->vm_mm);
+       if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK))
+               flush_tlb_page(vma, start);
+       else
+               flush_tlb_mm(vma->vm_mm);
 }
 EXPORT_SYMBOL(flush_tlb_range);
 
index 3c39f05f0af32b8a01e16b384c3659f00263f1e6..6c0020d1c5614c6610faa359ef87254e3a4c8107 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/ptrace.h>
+#include <asm/code-patching.h>
 
 #define PERF_8xx_ID_CPU_CYCLES         1
 #define PERF_8xx_ID_HW_INSTRUCTIONS    2
 
 extern unsigned long itlb_miss_counter, dtlb_miss_counter;
 extern atomic_t instruction_counter;
+extern unsigned int itlb_miss_perf, dtlb_miss_perf;
+extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
+extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;
 
 static atomic_t insn_ctr_ref;
+static atomic_t itlb_miss_ref;
+static atomic_t dtlb_miss_ref;
 
 static s64 get_insn_ctr(void)
 {
@@ -96,9 +102,24 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
                val = get_insn_ctr();
                break;
        case PERF_8xx_ID_ITLB_LOAD_MISS:
+               if (atomic_inc_return(&itlb_miss_ref) == 1) {
+                       unsigned long target = (unsigned long)&itlb_miss_perf;
+
+                       patch_branch(&itlb_miss_exit_1, target, 0);
+#ifndef CONFIG_PIN_TLB_TEXT
+                       patch_branch(&itlb_miss_exit_2, target, 0);
+#endif
+               }
                val = itlb_miss_counter;
                break;
        case PERF_8xx_ID_DTLB_LOAD_MISS:
+               if (atomic_inc_return(&dtlb_miss_ref) == 1) {
+                       unsigned long target = (unsigned long)&dtlb_miss_perf;
+
+                       patch_branch(&dtlb_miss_exit_1, target, 0);
+                       patch_branch(&dtlb_miss_exit_2, target, 0);
+                       patch_branch(&dtlb_miss_exit_3, target, 0);
+               }
                val = dtlb_miss_counter;
                break;
        }
@@ -143,13 +164,36 @@ static void mpc8xx_pmu_read(struct perf_event *event)
 
 static void mpc8xx_pmu_del(struct perf_event *event, int flags)
 {
+       /* mfspr r10, SPRN_SPRG_SCRATCH0 */
+       unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
+                           __PPC_SPR(SPRN_SPRG_SCRATCH0);
+
        mpc8xx_pmu_read(event);
-       if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS)
-               return;
 
        /* If it was the last user, stop counting to avoid useles overhead */
-       if (atomic_dec_return(&insn_ctr_ref) == 0)
-               mtspr(SPRN_ICTRL, 7);
+       switch (event_type(event)) {
+       case PERF_8xx_ID_CPU_CYCLES:
+               break;
+       case PERF_8xx_ID_HW_INSTRUCTIONS:
+               if (atomic_dec_return(&insn_ctr_ref) == 0)
+                       mtspr(SPRN_ICTRL, 7);
+               break;
+       case PERF_8xx_ID_ITLB_LOAD_MISS:
+               if (atomic_dec_return(&itlb_miss_ref) == 0) {
+                       patch_instruction(&itlb_miss_exit_1, insn);
+#ifndef CONFIG_PIN_TLB_TEXT
+                       patch_instruction(&itlb_miss_exit_2, insn);
+#endif
+               }
+               break;
+       case PERF_8xx_ID_DTLB_LOAD_MISS:
+               if (atomic_dec_return(&dtlb_miss_ref) == 0) {
+                       patch_instruction(&dtlb_miss_exit_1, insn);
+                       patch_instruction(&dtlb_miss_exit_2, insn);
+                       patch_instruction(&dtlb_miss_exit_3, insn);
+               }
+               break;
+       }
 }
 
 static struct pmu mpc8xx_pmu = {
index 225c9c86d7c07becaf5bf4ecd3e987cb64263253..57ebc655d2accd1b7d7e8b5c009990cd7017ddbd 100644 (file)
@@ -15,7 +15,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
 obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
 
-obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o
+obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
 
 obj-$(CONFIG_PPC64)            += $(obj64-y)
 obj-$(CONFIG_PPC32)            += $(obj32-y)
index fce545774d50afc6093c28ad2f4127c24ed5331c..f89bbd54ecec54b777b987292e7dbb48ae138b57 100644 (file)
@@ -322,7 +322,7 @@ static inline void perf_read_regs(struct pt_regs *regs)
  */
 static inline int perf_intr_is_nmi(struct pt_regs *regs)
 {
-       return !regs->softe;
+       return (regs->softe & IRQS_DISABLED);
 }
 
 /*
index be4e7f84f70a59db60e92a9bfe845678f71cc608..d7532e7b9ab5ccd4c37607b46bb43f7a9e75e5e5 100644 (file)
@@ -40,7 +40,6 @@ static struct imc_pmu *core_imc_pmu;
 /* Thread IMC data structures and variables */
 
 static DEFINE_PER_CPU(u64 *, thread_imc_mem);
-static struct imc_pmu *thread_imc_pmu;
 static int thread_imc_mem_size;
 
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
@@ -117,17 +116,13 @@ static struct attribute *device_str_attr_create(const char *name, const char *st
        return &attr->attr.attr;
 }
 
-struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
-                                 const char *unit, const char *prefix, u32 base)
+static int imc_parse_event(struct device_node *np, const char *scale,
+                                 const char *unit, const char *prefix,
+                                 u32 base, struct imc_events *event)
 {
-       struct imc_events *event;
        const char *s;
        u32 reg;
 
-       event = kzalloc(sizeof(struct imc_events), GFP_KERNEL);
-       if (!event)
-               return NULL;
-
        if (of_property_read_u32(np, "reg", &reg))
                goto error;
        /* Add the base_reg value to the "reg" */
@@ -158,14 +153,32 @@ struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
                        goto error;
        }
 
-       return event;
+       return 0;
 error:
        kfree(event->unit);
        kfree(event->scale);
        kfree(event->name);
-       kfree(event);
+       return -EINVAL;
+}
+
+/*
+ * imc_free_events: Function to cleanup the events list, having
+ *                 "nr_entries".
+ */
+static void imc_free_events(struct imc_events *events, int nr_entries)
+{
+       int i;
 
-       return NULL;
+       /* Nothing to clean, return */
+       if (!events)
+               return;
+       for (i = 0; i < nr_entries; i++) {
+               kfree(events[i].unit);
+               kfree(events[i].scale);
+               kfree(events[i].name);
+       }
+
+       kfree(events);
 }
 
 /*
@@ -177,9 +190,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
        struct attribute_group *attr_group;
        struct attribute **attrs, *dev_str;
        struct device_node *np, *pmu_events;
-       struct imc_events *ev;
        u32 handle, base_reg;
-       int i=0, j=0, ct;
+       int i = 0, j = 0, ct, ret;
        const char *prefix, *g_scale, *g_unit;
        const char *ev_val_str, *ev_scale_str, *ev_unit_str;
 
@@ -217,15 +229,17 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
        ct = 0;
        /* Parse the events and update the struct */
        for_each_child_of_node(pmu_events, np) {
-               ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg);
-               if (ev)
-                       pmu->events[ct++] = ev;
+               ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]);
+               if (!ret)
+                       ct++;
        }
 
        /* Allocate memory for attribute group */
        attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
-       if (!attr_group)
+       if (!attr_group) {
+               imc_free_events(pmu->events, ct);
                return -ENOMEM;
+       }
 
        /*
         * Allocate memory for attributes.
@@ -238,31 +252,31 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
        attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
        if (!attrs) {
                kfree(attr_group);
-               kfree(pmu->events);
+               imc_free_events(pmu->events, ct);
                return -ENOMEM;
        }
 
        attr_group->name = "events";
        attr_group->attrs = attrs;
        do {
-               ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value);
-               dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str);
+               ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
+               dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
                if (!dev_str)
                        continue;
 
                attrs[j++] = dev_str;
-               if (pmu->events[i]->scale) {
-                       ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name);
-                       dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale);
+               if (pmu->events[i].scale) {
+                       ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
+                       dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
                        if (!dev_str)
                                continue;
 
                        attrs[j++] = dev_str;
                }
 
-               if (pmu->events[i]->unit) {
-                       ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name);
-                       dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit);
+               if (pmu->events[i].unit) {
+                       ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
+                       dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
                        if (!dev_str)
                                continue;
 
@@ -273,7 +287,6 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
        /* Save the event attribute */
        pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
 
-       kfree(pmu->events);
        return 0;
 }
 
@@ -611,7 +624,8 @@ static int ppc_core_imc_cpu_online(unsigned int cpu)
 
 static int ppc_core_imc_cpu_offline(unsigned int cpu)
 {
-       unsigned int ncpu, core_id;
+       unsigned int core_id;
+       int ncpu;
        struct imc_pmu_ref *ref;
 
        /*
@@ -1171,6 +1185,15 @@ static void cleanup_all_thread_imc_memory(void)
        }
 }
 
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+       if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
+               kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+       kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+       kfree(pmu_ptr);
+}
+
 /*
  * Common function to unregister cpu hotplug callback and
  * free the memory.
@@ -1203,13 +1226,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
                cleanup_all_thread_imc_memory();
        }
-
-       /* Only free the attr_groups which are dynamically allocated  */
-       if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
-               kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
-       kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
-       kfree(pmu_ptr);
-       return;
 }
 
 
@@ -1258,8 +1274,10 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
                core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
                                                                GFP_KERNEL);
 
-               if (!core_imc_refc)
+               if (!core_imc_refc) {
+                       kfree(pmu_ptr->mem_info);
                        return -ENOMEM;
+               }
 
                core_imc_pmu = pmu_ptr;
                break;
@@ -1272,11 +1290,12 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
                thread_imc_mem_size = pmu_ptr->counter_mem_size;
                for_each_online_cpu(cpu) {
                        res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
-                       if (res)
+                       if (res) {
+                               cleanup_all_thread_imc_memory();
                                return res;
+                       }
                }
 
-               thread_imc_pmu = pmu_ptr;
                break;
        default:
                return -EINVAL;
@@ -1300,8 +1319,10 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
        int ret;
 
        ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
-       if (ret)
-               goto err_free;
+       if (ret) {
+               imc_common_mem_free(pmu_ptr);
+               return ret;
+       }
 
        switch (pmu_ptr->domain) {
        case IMC_DOMAIN_NEST:
@@ -1368,6 +1389,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
        return 0;
 
 err_free:
+       imc_common_mem_free(pmu_ptr);
        imc_common_cpuhp_mem_free(pmu_ptr);
        return ret;
 }
index 92e98048404ff411e30d4e11c2a862db81e2ddb6..04f0c73a9b4fc335d7899bb102c39e31634b6a8e 100644 (file)
 #include <asm/time.h>
 #include <asm/uic.h>
 #include <asm/ppc4xx.h>
+#include <asm/dcr.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include "fsp2.h"
+
+#define FSP2_BUS_ERR   "ibm,bus-error-irq"
+#define FSP2_CMU_ERR   "ibm,cmu-error-irq"
+#define FSP2_CONF_ERR  "ibm,conf-error-irq"
+#define FSP2_OPBD_ERR  "ibm,opbd-error-irq"
+#define FSP2_MCUE      "ibm,mc-ue-irq"
+#define FSP2_RST_WRN   "ibm,reset-warning-irq"
 
 static __initdata struct of_device_id fsp2_of_bus[] = {
        { .compatible = "ibm,plb4", },
@@ -35,6 +46,194 @@ static __initdata struct of_device_id fsp2_of_bus[] = {
        {},
 };
 
+static void l2regs(void)
+{
+       pr_err("L2 Controller:\n");
+       pr_err("MCK:      0x%08x\n", mfl2(L2MCK));
+       pr_err("INT:      0x%08x\n", mfl2(L2INT));
+       pr_err("PLBSTAT0: 0x%08x\n", mfl2(L2PLBSTAT0));
+       pr_err("PLBSTAT1: 0x%08x\n", mfl2(L2PLBSTAT1));
+       pr_err("ARRSTAT0: 0x%08x\n", mfl2(L2ARRSTAT0));
+       pr_err("ARRSTAT1: 0x%08x\n", mfl2(L2ARRSTAT1));
+       pr_err("ARRSTAT2: 0x%08x\n", mfl2(L2ARRSTAT2));
+       pr_err("CPUSTAT:  0x%08x\n", mfl2(L2CPUSTAT));
+       pr_err("RACSTAT0: 0x%08x\n", mfl2(L2RACSTAT0));
+       pr_err("WACSTAT0: 0x%08x\n", mfl2(L2WACSTAT0));
+       pr_err("WACSTAT1: 0x%08x\n", mfl2(L2WACSTAT1));
+       pr_err("WACSTAT2: 0x%08x\n", mfl2(L2WACSTAT2));
+       pr_err("WDFSTAT:  0x%08x\n", mfl2(L2WDFSTAT));
+       pr_err("LOG0:     0x%08x\n", mfl2(L2LOG0));
+       pr_err("LOG1:     0x%08x\n", mfl2(L2LOG1));
+       pr_err("LOG2:     0x%08x\n", mfl2(L2LOG2));
+       pr_err("LOG3:     0x%08x\n", mfl2(L2LOG3));
+       pr_err("LOG4:     0x%08x\n", mfl2(L2LOG4));
+       pr_err("LOG5:     0x%08x\n", mfl2(L2LOG5));
+}
+
+static void show_plbopb_regs(u32 base, int num)
+{
+       pr_err("\nPLBOPB Bridge %d:\n", num);
+       pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0));
+       pr_err("GESR1: 0x%08x\n", mfdcr(base + PLB4OPB_GESR1));
+       pr_err("GESR2: 0x%08x\n", mfdcr(base + PLB4OPB_GESR2));
+       pr_err("GEARU: 0x%08x\n", mfdcr(base + PLB4OPB_GEARU));
+       pr_err("GEAR:  0x%08x\n", mfdcr(base + PLB4OPB_GEAR));
+}
+
+static irqreturn_t bus_err_handler(int irq, void *data)
+{
+       pr_err("Bus Error\n");
+
+       l2regs();
+
+       pr_err("\nPLB6 Controller:\n");
+       pr_err("BC_SHD: 0x%08x\n", mfdcr(DCRN_PLB6_SHD));
+       pr_err("BC_ERR: 0x%08x\n", mfdcr(DCRN_PLB6_ERR));
+
+       pr_err("\nPLB6-to-PLB4 Bridge:\n");
+       pr_err("ESR:  0x%08x\n", mfdcr(DCRN_PLB6PLB4_ESR));
+       pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARH));
+       pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARL));
+
+       pr_err("\nPLB4-to-PLB6 Bridge:\n");
+       pr_err("ESR:  0x%08x\n", mfdcr(DCRN_PLB4PLB6_ESR));
+       pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARH));
+       pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARL));
+
+       pr_err("\nPLB6-to-MCIF Bridge:\n");
+       pr_err("BESR0: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR0));
+       pr_err("BESR1: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR1));
+       pr_err("BEARH: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARH));
+       pr_err("BEARL: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARL));
+
+       pr_err("\nPLB4 Arbiter:\n");
+       pr_err("P0ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRH));
+       pr_err("P0ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRL));
+       pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+       pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+       pr_err("P1ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRH));
+       pr_err("P1ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRL));
+       pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+       pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+
+       show_plbopb_regs(DCRN_PLB4OPB0_BASE, 0);
+       show_plbopb_regs(DCRN_PLB4OPB1_BASE, 1);
+       show_plbopb_regs(DCRN_PLB4OPB2_BASE, 2);
+       show_plbopb_regs(DCRN_PLB4OPB3_BASE, 3);
+
+       pr_err("\nPLB4-to-AHB Bridge:\n");
+       pr_err("ESR:   0x%08x\n", mfdcr(DCRN_PLB4AHB_ESR));
+       pr_err("SEUAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SEUAR));
+       pr_err("SELAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SELAR));
+
+       pr_err("\nAHB-to-PLB4 Bridge:\n");
+       pr_err("\nESR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_ESR));
+       pr_err("\nEAR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_EAR));
+       panic("Bus Error\n");
+}
+
+static irqreturn_t cmu_err_handler(int irq, void *data) {
+       pr_err("CMU Error\n");
+       pr_err("FIR0: 0x%08x\n", mfcmu(CMUN_FIR0));
+       panic("CMU Error\n");
+}
+
+static irqreturn_t conf_err_handler(int irq, void *data) {
+       pr_err("Configuration Logic Error\n");
+       pr_err("CONF_FIR: 0x%08x\n", mfdcr(DCRN_CONF_FIR_RWC));
+       pr_err("RPERR0:   0x%08x\n", mfdcr(DCRN_CONF_RPERR0));
+       pr_err("RPERR1:   0x%08x\n", mfdcr(DCRN_CONF_RPERR1));
+       panic("Configuration Logic Error\n");
+}
+
+static irqreturn_t opbd_err_handler(int irq, void *data) {
+       panic("OPBD Error\n");
+}
+
+static irqreturn_t mcue_handler(int irq, void *data) {
+       pr_err("DDR: Uncorrectable Error\n");
+       pr_err("MCSTAT:            0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCSTAT));
+       pr_err("MCOPT1:            0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT1));
+       pr_err("MCOPT2:            0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT2));
+       pr_err("PHYSTAT:           0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_PHYSTAT));
+       pr_err("CFGR0:             0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR0));
+       pr_err("CFGR1:             0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR1));
+       pr_err("CFGR2:             0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR2));
+       pr_err("CFGR3:             0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR3));
+       pr_err("SCRUB_CNTL:        0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_SCRUB_CNTL));
+       pr_err("ECCERR_PORT0:      0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_PORT0));
+       pr_err("ECCERR_ADDR_PORT0: 0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_ADDR_PORT0));
+       pr_err("ECCERR_CNT_PORT0:  0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_COUNT_PORT0));
+       pr_err("ECC_CHECK_PORT0:   0x%08x\n",
+               mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECC_CHECK_PORT0));
+       pr_err("MCER0:            0x%08x\n",
+               mfdcr(DCRN_CW_BASE + DCRN_CW_MCER0));
+       pr_err("MCER1:            0x%08x\n",
+               mfdcr(DCRN_CW_BASE + DCRN_CW_MCER1));
+       pr_err("BESR:             0x%08x\n",
+               mfdcr(DCRN_PLB6MCIF_BESR0));
+       pr_err("BEARL:            0x%08x\n",
+               mfdcr(DCRN_PLB6MCIF_BEARL));
+       pr_err("BEARH:            0x%08x\n",
+               mfdcr(DCRN_PLB6MCIF_BEARH));
+       panic("DDR: Uncorrectable Error\n");
+}
+
+static irqreturn_t rst_wrn_handler(int irq, void *data) {
+       u32 crcs = mfcmu(CMUN_CRCS);
+       switch (crcs & CRCS_STAT_MASK) {
+       case CRCS_STAT_CHIP_RST_B:
+               panic("Received chassis-initiated reset request");
+       default:
+               panic("Unknown external reset: CRCS=0x%x", crcs);
+       }
+}
+
+static void node_irq_request(const char *compat, irq_handler_t errirq_handler)
+{
+       struct device_node *np;
+       unsigned int irq;
+       int32_t rc;
+
+       for_each_compatible_node(np, NULL, compat) {
+               irq = irq_of_parse_and_map(np, 0);
+               if (irq == NO_IRQ) {
+                       pr_err("device tree node %s is missing a interrupt",
+                             np->name);
+                       return;
+               }
+
+               rc = request_irq(irq, errirq_handler, 0, np->name, np);
+               if (rc) {
+                       pr_err("fsp_of_probe: request_irq failed: np=%s rc=%d",
+                             np->full_name, rc);
+                       return;
+               }
+       }
+}
+
+static void critical_irq_setup(void)
+{
+       node_irq_request(FSP2_CMU_ERR, cmu_err_handler);
+       node_irq_request(FSP2_BUS_ERR, bus_err_handler);
+       node_irq_request(FSP2_CONF_ERR, conf_err_handler);
+       node_irq_request(FSP2_OPBD_ERR, opbd_err_handler);
+       node_irq_request(FSP2_MCUE, mcue_handler);
+       node_irq_request(FSP2_RST_WRN, rst_wrn_handler);
+}
+
 static int __init fsp2_device_probe(void)
 {
        of_platform_bus_probe(NULL, fsp2_of_bus, NULL);
@@ -44,18 +243,76 @@ machine_device_initcall(fsp2, fsp2_device_probe);
 
 static int __init fsp2_probe(void)
 {
+       u32 val;
        unsigned long root = of_get_flat_dt_root();
 
        if (!of_flat_dt_is_compatible(root, "ibm,fsp2"))
                return 0;
+
+       /* Clear BC_ERR and mask snoopable request plb errors. */
+       val = mfdcr(DCRN_PLB6_CR0);
+       val |= 0x20000000;
+       mtdcr(DCRN_PLB6_BASE, val);
+       mtdcr(DCRN_PLB6_HD, 0xffff0000);
+       mtdcr(DCRN_PLB6_SHD, 0xffff0000);
+
+       /* TVSENSE reset is blocked (clock gated) by the POR default of the TVS
+        * sleep config bit. As a consequence, TVSENSE will provide erratic
+        * sensor values, which may result in spurious (parity) errors
+        * recorded in the CMU FIR and leading to erroneous interrupt requests
+        * once the CMU interrupt is unmasked.
+        */
+
+       /* 1. set TVS1[UNDOZE] */
+       val = mfcmu(CMUN_TVS1);
+       val |= 0x4;
+       mtcmu(CMUN_TVS1, val);
+
+       /* 2. clear FIR[TVS] and FIR[TVSPAR] */
+       val = mfcmu(CMUN_FIR0);
+       val |= 0x30000000;
+       mtcmu(CMUN_FIR0, val);
+
+       /* L2 machine checks */
+       mtl2(L2PLBMCKEN0, 0xffffffff);
+       mtl2(L2PLBMCKEN1, 0x0000ffff);
+       mtl2(L2ARRMCKEN0, 0xffffffff);
+       mtl2(L2ARRMCKEN1, 0xffffffff);
+       mtl2(L2ARRMCKEN2, 0xfffff000);
+       mtl2(L2CPUMCKEN,  0xffffffff);
+       mtl2(L2RACMCKEN0, 0xffffffff);
+       mtl2(L2WACMCKEN0, 0xffffffff);
+       mtl2(L2WACMCKEN1, 0xffffffff);
+       mtl2(L2WACMCKEN2, 0xffffffff);
+       mtl2(L2WDFMCKEN,  0xffffffff);
+
+       /* L2 interrupts */
+       mtl2(L2PLBINTEN1, 0xffff0000);
+
+       /*
+        * At a global level, enable all L2 machine checks and interrupts
+        * reported by the L2 subsystems, except for the external machine check
+        * input (UIC0.1).
+        */
+       mtl2(L2MCKEN, 0x000007ff);
+       mtl2(L2INTEN, 0x000004ff);
+
+       /* Enable FSP-2 configuration logic parity errors */
+       mtdcr(DCRN_CONF_EIR_RS, 0x80000000);
        return 1;
 }
 
+static void __init fsp2_irq_init(void)
+{
+       uic_init_tree();
+       critical_irq_setup();
+}
+
 define_machine(fsp2) {
        .name                   = "FSP-2",
        .probe                  = fsp2_probe,
        .progress               = udbg_progress,
-       .init_IRQ               = uic_init_tree,
+       .init_IRQ               = fsp2_irq_init,
        .get_irq                = uic_get_irq,
        .restart                = ppc4xx_reset_system,
        .calibrate_decr         = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/44x/fsp2.h b/arch/powerpc/platforms/44x/fsp2.h
new file mode 100644 (file)
index 0000000..9e1d527
--- /dev/null
@@ -0,0 +1,272 @@
+#ifndef _ASM_POWERPC_FSP_DCR_H_
+#define _ASM_POWERPC_FSP_DCR_H_
+#ifdef __KERNEL__
+#include <asm/dcr.h>
+
+#define DCRN_CMU_ADDR          0x00C   /* Chip management unic addr */
+#define DCRN_CMU_DATA          0x00D   /* Chip management unic data */
+
+/* PLB4 Arbiter */
+#define DCRN_PLB4_PCBI         0x010   /* PLB Crossbar ID/Rev Register */
+#define DCRN_PLB4_P0ACR                0x011   /* PLB0 Arbiter Control Register */
+#define DCRN_PLB4_P0ESRL       0x012   /* PLB0 Error Status Register Low */
+#define DCRN_PLB4_P0ESRH       0x013   /* PLB0 Error Status Register High */
+#define DCRN_PLB4_P0EARL       0x014   /* PLB0 Error Address Register Low */
+#define DCRN_PLB4_P0EARH       0x015   /* PLB0 Error Address Register High */
+#define DCRN_PLB4_P0ESRLS      0x016   /* PLB0 Error Status Register Low Set*/
+#define DCRN_PLB4_P0ESRHS      0x017   /* PLB0 Error Status Register High */
+#define DCRN_PLB4_PCBC         0x018   /* PLB Crossbar Control Register */
+#define DCRN_PLB4_P1ACR                0x019   /* PLB1 Arbiter Control Register */
+#define DCRN_PLB4_P1ESRL       0x01A   /* PLB1 Error Status Register Low */
+#define DCRN_PLB4_P1ESRH       0x01B   /* PLB1 Error Status Register High */
+#define DCRN_PLB4_P1EARL       0x01C   /* PLB1 Error Address Register Low */
+#define DCRN_PLB4_P1EARH       0x01D   /* PLB1 Error Address Register High */
+#define DCRN_PLB4_P1ESRLS      0x01E   /* PLB1 Error Status Register Low Set*/
+#define DCRN_PLB4_P1ESRHS      0x01F   /*PLB1 Error Status Register High Set*/
+
+/* PLB4/OPB bridge 0, 1, 2, 3 */
+#define DCRN_PLB4OPB0_BASE     0x020
+#define DCRN_PLB4OPB1_BASE     0x030
+#define DCRN_PLB4OPB2_BASE     0x040
+#define DCRN_PLB4OPB3_BASE     0x050
+
+#define PLB4OPB_GESR0          0x0     /* Error status 0: Master Dev 0-3 */
+#define PLB4OPB_GEAR           0x2     /* Error Address Register */
+#define PLB4OPB_GEARU          0x3     /* Error Upper Address Register */
+#define PLB4OPB_GESR1          0x4     /* Error Status 1: Master Dev 4-7 */
+#define PLB4OPB_GESR2          0xC     /* Error Status 2: Master Dev 8-11 */
+
+/* PLB4-to-AHB Bridge */
+#define DCRN_PLB4AHB_BASE      0x400
+#define DCRN_PLB4AHB_SEUAR     (DCRN_PLB4AHB_BASE + 1)
+#define DCRN_PLB4AHB_SELAR     (DCRN_PLB4AHB_BASE + 2)
+#define DCRN_PLB4AHB_ESR       (DCRN_PLB4AHB_BASE + 3)
+#define DCRN_AHBPLB4_ESR       (DCRN_PLB4AHB_BASE + 8)
+#define DCRN_AHBPLB4_EAR       (DCRN_PLB4AHB_BASE + 9)
+
+/* PLB6 Controller */
+#define DCRN_PLB6_BASE         0x11111300
+#define DCRN_PLB6_CR0          (DCRN_PLB6_BASE)
+#define DCRN_PLB6_ERR          (DCRN_PLB6_BASE + 0x0B)
+#define DCRN_PLB6_HD           (DCRN_PLB6_BASE + 0x0E)
+#define DCRN_PLB6_SHD          (DCRN_PLB6_BASE + 0x10)
+
+/* PLB4-to-PLB6 Bridge */
+#define DCRN_PLB4PLB6_BASE     0x11111320
+#define DCRN_PLB4PLB6_ESR      (DCRN_PLB4PLB6_BASE + 1)
+#define DCRN_PLB4PLB6_EARH     (DCRN_PLB4PLB6_BASE + 3)
+#define DCRN_PLB4PLB6_EARL     (DCRN_PLB4PLB6_BASE + 4)
+
+/* PLB6-to-PLB4 Bridge */
+#define DCRN_PLB6PLB4_BASE     0x11111350
+#define DCRN_PLB6PLB4_ESR      (DCRN_PLB6PLB4_BASE + 1)
+#define DCRN_PLB6PLB4_EARH     (DCRN_PLB6PLB4_BASE + 3)
+#define DCRN_PLB6PLB4_EARL     (DCRN_PLB6PLB4_BASE + 4)
+
+/* PLB6-to-MCIF Bridge */
+#define DCRN_PLB6MCIF_BASE     0x11111380
+#define DCRN_PLB6MCIF_BESR0    (DCRN_PLB6MCIF_BASE + 0)
+#define DCRN_PLB6MCIF_BESR1    (DCRN_PLB6MCIF_BASE + 1)
+#define DCRN_PLB6MCIF_BEARL    (DCRN_PLB6MCIF_BASE + 2)
+#define DCRN_PLB6MCIF_BEARH    (DCRN_PLB6MCIF_BASE + 3)
+
+/* Configuration Logic Registers */
+#define DCRN_CONF_BASE         0x11111400
+#define DCRN_CONF_FIR_RWC      (DCRN_CONF_BASE + 0x3A)
+#define DCRN_CONF_EIR_RS       (DCRN_CONF_BASE + 0x3E)
+#define DCRN_CONF_RPERR0       (DCRN_CONF_BASE + 0x4D)
+#define DCRN_CONF_RPERR1       (DCRN_CONF_BASE + 0x4E)
+
+#define DCRN_L2CDCRAI          0x11111100
+#define DCRN_L2CDCRDI          0x11111104
+/* L2 indirect addresses */
+#define L2MCK          0x120
+#define L2MCKEN                0x130
+#define L2INT          0x150
+#define L2INTEN                0x160
+#define L2LOG0         0x180
+#define L2LOG1         0x184
+#define L2LOG2         0x188
+#define L2LOG3         0x18C
+#define L2LOG4         0x190
+#define L2LOG5         0x194
+#define L2PLBSTAT0     0x300
+#define L2PLBSTAT1     0x304
+#define L2PLBMCKEN0    0x330
+#define L2PLBMCKEN1    0x334
+#define L2PLBINTEN0    0x360
+#define L2PLBINTEN1    0x364
+#define L2ARRSTAT0     0x500
+#define L2ARRSTAT1     0x504
+#define L2ARRSTAT2     0x508
+#define L2ARRMCKEN0    0x530
+#define L2ARRMCKEN1    0x534
+#define L2ARRMCKEN2    0x538
+#define L2ARRINTEN0    0x560
+#define L2ARRINTEN1    0x564
+#define L2ARRINTEN2    0x568
+#define L2CPUSTAT      0x700
+#define L2CPUMCKEN     0x730
+#define L2CPUINTEN     0x760
+#define L2RACSTAT0     0x900
+#define L2RACMCKEN0    0x930
+#define L2RACINTEN0    0x960
+#define L2WACSTAT0     0xD00
+#define L2WACSTAT1     0xD04
+#define L2WACSTAT2     0xD08
+#define L2WACMCKEN0    0xD30
+#define L2WACMCKEN1    0xD34
+#define L2WACMCKEN2    0xD38
+#define L2WACINTEN0    0xD60
+#define L2WACINTEN1    0xD64
+#define L2WACINTEN2    0xD68
+#define L2WDFSTAT      0xF00
+#define L2WDFMCKEN     0xF30
+#define L2WDFINTEN     0xF60
+
+/* DDR3/4 Memory Controller */
+#define DCRN_DDR34_BASE                        0x11120000
+#define DCRN_DDR34_MCSTAT              0x10
+#define DCRN_DDR34_MCOPT1              0x20
+#define DCRN_DDR34_MCOPT2              0x21
+#define DCRN_DDR34_PHYSTAT             0x32
+#define DCRN_DDR34_CFGR0               0x40
+#define DCRN_DDR34_CFGR1               0x41
+#define DCRN_DDR34_CFGR2               0x42
+#define DCRN_DDR34_CFGR3               0x43
+#define DCRN_DDR34_SCRUB_CNTL          0xAA
+#define DCRN_DDR34_SCRUB_INT           0xAB
+#define DCRN_DDR34_SCRUB_START_ADDR    0xB0
+#define DCRN_DDR34_SCRUB_END_ADDR      0xD0
+#define DCRN_DDR34_ECCERR_ADDR_PORT0   0xE0
+#define DCRN_DDR34_ECCERR_ADDR_PORT1   0xE1
+#define DCRN_DDR34_ECCERR_ADDR_PORT2   0xE2
+#define DCRN_DDR34_ECCERR_ADDR_PORT3   0xE3
+#define DCRN_DDR34_ECCERR_COUNT_PORT0  0xE4
+#define DCRN_DDR34_ECCERR_COUNT_PORT1  0xE5
+#define DCRN_DDR34_ECCERR_COUNT_PORT2  0xE6
+#define DCRN_DDR34_ECCERR_COUNT_PORT3  0xE7
+#define DCRN_DDR34_ECCERR_PORT0                0xF0
+#define DCRN_DDR34_ECCERR_PORT1                0xF2
+#define DCRN_DDR34_ECCERR_PORT2                0xF4
+#define DCRN_DDR34_ECCERR_PORT3                0xF6
+#define DCRN_DDR34_ECC_CHECK_PORT0     0xF8
+#define DCRN_DDR34_ECC_CHECK_PORT1     0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT2     0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT3     0xFB
+
+#define DDR34_SCRUB_CNTL_STOP          0x00000000
+#define DDR34_SCRUB_CNTL_SCRUB         0x80000000
+#define DDR34_SCRUB_CNTL_UE_STOP       0x20000000
+#define DDR34_SCRUB_CNTL_CE_STOP       0x10000000
+#define DDR34_SCRUB_CNTL_RANK_EN       0x00008000
+
+/* PLB-Attached DDR3/4 Core Wrapper */
+#define DCRN_CW_BASE                   0x11111800
+#define DCRN_CW_MCER0                  0x00
+#define DCRN_CW_MCER1                  0x01
+#define DCRN_CW_MCER_AND0              0x02
+#define DCRN_CW_MCER_AND1              0x03
+#define DCRN_CW_MCER_OR0               0x04
+#define DCRN_CW_MCER_OR1               0x05
+#define DCRN_CW_MCER_MASK0             0x06
+#define DCRN_CW_MCER_MASK1             0x07
+#define DCRN_CW_MCER_MASK_AND0         0x08
+#define DCRN_CW_MCER_MASK_AND1         0x09
+#define DCRN_CW_MCER_MASK_OR0          0x0A
+#define DCRN_CW_MCER_MASK_OR1          0x0B
+#define DCRN_CW_MCER_ACTION0           0x0C
+#define DCRN_CW_MCER_ACTION1           0x0D
+#define DCRN_CW_MCER_WOF0              0x0E
+#define DCRN_CW_MCER_WOF1              0x0F
+#define DCRN_CW_LFIR                   0x10
+#define DCRN_CW_LFIR_AND               0x11
+#define DCRN_CW_LFIR_OR                        0x12
+#define DCRN_CW_LFIR_MASK              0x13
+#define DCRN_CW_LFIR_MASK_AND          0x14
+#define DCRN_CW_LFIR_MASK_OR           0x15
+
+#define CW_MCER0_MEM_CE                        0x00020000
+/* CMU addresses */
+#define CMUN_CRCS              0x00 /* Chip Reset Control/Status */
+#define CMUN_CONFFIR0          0x20 /* Config Reg Parity FIR 0 */
+#define CMUN_CONFFIR1          0x21 /* Config Reg Parity FIR 1 */
+#define CMUN_CONFFIR2          0x22 /* Config Reg Parity FIR 2 */
+#define CMUN_CONFFIR3          0x23 /* Config Reg Parity FIR 3 */
+#define CMUN_URCR3_RS          0x24 /* Unit Reset Control Reg 3 Set */
+#define CMUN_URCR3_C           0x25 /* Unit Reset Control Reg 3 Clear */
+#define CMUN_URCR3_P           0x26 /* Unit Reset Control Reg 3 Pulse */
+#define CMUN_PW0               0x2C /* Pulse Width Register */
+#define CMUN_URCR0_P           0x2D /* Unit Reset Control Reg 0 Pulse */
+#define CMUN_URCR1_P           0x2E /* Unit Reset Control Reg 1 Pulse */
+#define CMUN_URCR2_P           0x2F /* Unit Reset Control Reg 2 Pulse */
+#define CMUN_CLS_RW            0x30 /* Code Load Status (Read/Write) */
+#define CMUN_CLS_S             0x31 /* Code Load Status (Set) */
+#define CMUN_CLS_C             0x32 /* Code Load Status (Clear */
+#define CMUN_URCR2_RS          0x33 /* Unit Reset Control Reg 2 Set */
+#define CMUN_URCR2_C           0x34 /* Unit Reset Control Reg 2 Clear */
+#define CMUN_CLKEN0            0x35 /* Clock Enable 0 */
+#define CMUN_CLKEN1            0x36 /* Clock Enable 1 */
+#define CMUN_PCD0              0x37 /* PSI clock divider 0 */
+#define CMUN_PCD1              0x38 /* PSI clock divider 1 */
+#define CMUN_TMR0              0x39 /* Reset Timer */
+#define CMUN_TVS0              0x3A /* TV Sense Reg 0 */
+#define CMUN_TVS1              0x3B /* TV Sense Reg 1 */
+#define CMUN_MCCR              0x3C /* DRAM Configuration Reg */
+#define CMUN_FIR0              0x3D /* Fault Isolation Reg 0 */
+#define CMUN_FMR0              0x3E /* FIR Mask Reg 0 */
+#define CMUN_ETDRB             0x3F /* ETDR Backdoor */
+
+/* CRCS bit fields */
+#define CRCS_STAT_MASK         0xF0000000
+#define CRCS_STAT_POR          0x10000000
+#define CRCS_STAT_PHR          0x20000000
+#define CRCS_STAT_PCIE         0x30000000
+#define CRCS_STAT_CRCS_SYS     0x40000000
+#define CRCS_STAT_DBCR_SYS     0x50000000
+#define CRCS_STAT_HOST_SYS     0x60000000
+#define CRCS_STAT_CHIP_RST_B   0x70000000
+#define CRCS_STAT_CRCS_CHIP    0x80000000
+#define CRCS_STAT_DBCR_CHIP    0x90000000
+#define CRCS_STAT_HOST_CHIP    0xA0000000
+#define CRCS_STAT_PSI_CHIP     0xB0000000
+#define CRCS_STAT_CRCS_CORE    0xC0000000
+#define CRCS_STAT_DBCR_CORE    0xD0000000
+#define CRCS_STAT_HOST_CORE    0xE0000000
+#define CRCS_STAT_PCIE_HOT     0xF0000000
+#define CRCS_STAT_SELF_CORE    0x40000000
+#define CRCS_STAT_SELF_CHIP    0x50000000
+#define CRCS_WATCHE            0x08000000
+#define CRCS_CORE              0x04000000 /* Reset PPC440 core */
+#define CRCS_CHIP              0x02000000 /* Chip Reset */
+#define CRCS_SYS               0x01000000 /* System Reset */
+#define CRCS_WRCR              0x00800000 /* Watchdog reset on core reset */
+#define CRCS_EXTCR             0x00080000 /* CHIP_RST_B triggers chip reset */
+#define CRCS_PLOCK             0x00000002 /* PLL Locked */
+
+#define mtcmu(reg, data)               \
+do {                                   \
+       mtdcr(DCRN_CMU_ADDR, reg);      \
+       mtdcr(DCRN_CMU_DATA, data);     \
+} while (0)
+
+#define mfcmu(reg)\
+       ({u32 data;                     \
+       mtdcr(DCRN_CMU_ADDR, reg);      \
+       data = mfdcr(DCRN_CMU_DATA);    \
+       data; })
+
+#define mtl2(reg, data)                        \
+do {                                   \
+       mtdcr(DCRN_L2CDCRAI, reg);      \
+       mtdcr(DCRN_L2CDCRDI, data);     \
+} while (0)
+
+#define mfl2(reg)                      \
+       ({u32 data;                     \
+       mtdcr(DCRN_L2CDCRAI, reg);      \
+       data = mfdcr(DCRN_L2CDCRDI);    \
+       data; })
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_FSP2_DCR_H_ */
index f99e79ee060e11169c3ef72522324fd23b5627c1..48abb4cb304c48562653807e9594a9ce17a7716b 100644 (file)
@@ -387,8 +387,8 @@ static unsigned int __init get_fifo_size(struct device_node *np,
        if (fp)
                return *fp;
 
-       pr_warning("no %s property in %pOF node, defaulting to %d\n",
-                  prop_name, np, DEFAULT_FIFO_SIZE);
+       pr_warn("no %s property in %pOF node, defaulting to %d\n",
+               prop_name, np, DEFAULT_FIFO_SIZE);
 
        return DEFAULT_FIFO_SIZE;
 }
index 9e974b1e169727726a86937a72e222b93710a7b0..17cf249b18eea9b6e78644c6aa3be3e6631baa6a 100644 (file)
@@ -90,7 +90,7 @@ struct mpc52xx_gpt_priv {
        struct list_head list;          /* List of all GPT devices */
        struct device *dev;
        struct mpc52xx_gpt __iomem *regs;
-       spinlock_t lock;
+       raw_spinlock_t lock;
        struct irq_domain *irqhost;
        u32 ipb_freq;
        u8 wdt_mode;
@@ -141,9 +141,9 @@ static void mpc52xx_gpt_irq_unmask(struct irq_data *d)
        struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
        unsigned long flags;
 
-       spin_lock_irqsave(&gpt->lock, flags);
+       raw_spin_lock_irqsave(&gpt->lock, flags);
        setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
-       spin_unlock_irqrestore(&gpt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt->lock, flags);
 }
 
 static void mpc52xx_gpt_irq_mask(struct irq_data *d)
@@ -151,9 +151,9 @@ static void mpc52xx_gpt_irq_mask(struct irq_data *d)
        struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
        unsigned long flags;
 
-       spin_lock_irqsave(&gpt->lock, flags);
+       raw_spin_lock_irqsave(&gpt->lock, flags);
        clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
-       spin_unlock_irqrestore(&gpt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt->lock, flags);
 }
 
 static void mpc52xx_gpt_irq_ack(struct irq_data *d)
@@ -171,14 +171,14 @@ static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type)
 
        dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type);
 
-       spin_lock_irqsave(&gpt->lock, flags);
+       raw_spin_lock_irqsave(&gpt->lock, flags);
        reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK;
        if (flow_type & IRQF_TRIGGER_RISING)
                reg |= MPC52xx_GPT_MODE_ICT_RISING;
        if (flow_type & IRQF_TRIGGER_FALLING)
                reg |= MPC52xx_GPT_MODE_ICT_FALLING;
        out_be32(&gpt->regs->mode, reg);
-       spin_unlock_irqrestore(&gpt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt->lock, flags);
 
        return 0;
 }
@@ -264,11 +264,11 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
        /* If the GPT is currently disabled, then change it to be in Input
         * Capture mode.  If the mode is non-zero, then the pin could be
         * already in use for something. */
-       spin_lock_irqsave(&gpt->lock, flags);
+       raw_spin_lock_irqsave(&gpt->lock, flags);
        mode = in_be32(&gpt->regs->mode);
        if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0)
                out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC);
-       spin_unlock_irqrestore(&gpt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt->lock, flags);
 
        dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq);
 }
@@ -295,9 +295,9 @@ mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
        dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v);
        r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW;
 
-       spin_lock_irqsave(&gpt->lock, flags);
+       raw_spin_lock_irqsave(&gpt->lock, flags);
        clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r);
-       spin_unlock_irqrestore(&gpt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt->lock, flags);
 }
 
 static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
@@ -307,9 +307,9 @@ static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
 
        dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio);
 
-       spin_lock_irqsave(&gpt->lock, flags);
+       raw_spin_lock_irqsave(&gpt->lock, flags);
        clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK);
-       spin_unlock_irqrestore(&gpt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt->lock, flags);
 
        return 0;
 }
@@ -436,16 +436,16 @@ static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
        }
 
        /* Set and enable the timer, reject an attempt to use a wdt as gpt */
-       spin_lock_irqsave(&gpt->lock, flags);
+       raw_spin_lock_irqsave(&gpt->lock, flags);
        if (as_wdt)
                gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
        else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
-               spin_unlock_irqrestore(&gpt->lock, flags);
+               raw_spin_unlock_irqrestore(&gpt->lock, flags);
                return -EBUSY;
        }
        out_be32(&gpt->regs->count, prescale << 16 | clocks);
        clrsetbits_be32(&gpt->regs->mode, clear, set);
-       spin_unlock_irqrestore(&gpt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt->lock, flags);
 
        return 0;
 }
@@ -476,14 +476,14 @@ int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt)
        unsigned long flags;
 
        /* reject the operation if the timer is used as watchdog (gpt 0 only) */
-       spin_lock_irqsave(&gpt->lock, flags);
+       raw_spin_lock_irqsave(&gpt->lock, flags);
        if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
-               spin_unlock_irqrestore(&gpt->lock, flags);
+               raw_spin_unlock_irqrestore(&gpt->lock, flags);
                return -EBUSY;
        }
 
        clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
-       spin_unlock_irqrestore(&gpt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt->lock, flags);
        return 0;
 }
 EXPORT_SYMBOL(mpc52xx_gpt_stop_timer);
@@ -500,9 +500,9 @@ u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt)
        u64 prescale;
        unsigned long flags;
 
-       spin_lock_irqsave(&gpt->lock, flags);
+       raw_spin_lock_irqsave(&gpt->lock, flags);
        period = in_be32(&gpt->regs->count);
-       spin_unlock_irqrestore(&gpt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt->lock, flags);
 
        prescale = period >> 16;
        period &= 0xffff;
@@ -532,9 +532,9 @@ static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&gpt_wdt->lock, flags);
+       raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
        out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING);
-       spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
 }
 
 /* wdt misc device api */
@@ -638,11 +638,11 @@ static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
        struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
        unsigned long flags;
 
-       spin_lock_irqsave(&gpt_wdt->lock, flags);
+       raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
        clrbits32(&gpt_wdt->regs->mode,
                  MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN);
        gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT;
-       spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+       raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
 #endif
        clear_bit(0, &wdt_is_active);
        return 0;
@@ -723,7 +723,7 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev)
        if (!gpt)
                return -ENOMEM;
 
-       spin_lock_init(&gpt->lock);
+       raw_spin_lock_init(&gpt->lock);
        gpt->dev = &ofdev->dev;
        gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node);
        gpt->regs = of_iomap(ofdev->dev.of_node, 0);
index 96bb55ca61d39884b56550112791e9e6ac0dd7f5..d2ef39f0edc8fb05e384b706fc5405a90662796d 100644 (file)
@@ -84,7 +84,7 @@ static ssize_t show_status(struct device *d,
 
        return sprintf(buf, "%02x\n", ret);
 }
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
+static DEVICE_ATTR(status, 0444, show_status, NULL);
 
 static void mcu_power_off(void)
 {
index bb7b25acf26ffd800d0ead5173c70031b5a488d2..74c154e67c8bbf82a627a84074febbafea42eeb4 100644 (file)
@@ -75,7 +75,7 @@ static void __init mpc832x_sys_setup_arch(void)
                par_io_init(np);
                of_node_put(np);
 
-               for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+               for_each_node_by_name(np, "ucc")
                        par_io_of_config(np);
        }
 
index a4539c5accb0bc68f29aa814d5845eb24a13c241..43898659387371b1db60a6d4372ea3c0ffbb1f6f 100644 (file)
@@ -204,7 +204,7 @@ static void __init mpc832x_rdb_setup_arch(void)
                par_io_init(np);
                of_node_put(np);
 
-               for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+               for_each_node_by_name(np, "ucc")
                        par_io_of_config(np);
        }
 #endif                         /* CONFIG_QUICC_ENGINE */
index 4fc3051c2b2eee0bd4c463dec489dcab21d8f099..fd44dd03e1f3a4f90bf0efc3f81565c527dec204 100644 (file)
@@ -83,7 +83,7 @@ static void __init mpc836x_mds_setup_arch(void)
                par_io_init(np);
                of_node_put(np);
 
-               for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+               for_each_node_by_name(np, "ucc")
                        par_io_of_config(np);
 #ifdef CONFIG_QE_USB
                /* Must fixup Par IO before QE GPIO chips are registered. */
index 82f8490b5aa7d9d5494394f9a426243f56881cb8..38d4ba9f37b58acd0b7dd377859a29d238d2baa6 100644 (file)
@@ -252,8 +252,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
                /* type is configurable */
                if (intspec[1] != IRQ_TYPE_LEVEL_LOW &&
                    intspec[1] != IRQ_TYPE_LEVEL_HIGH) {
-                       pr_warning("FPGA PIC: invalid irq type, "
-                                  "setting default active low\n");
+                       pr_warn("FPGA PIC: invalid irq type, setting default active low\n");
                        *out_flags = IRQ_TYPE_LEVEL_LOW;
                } else {
                        *out_flags = intspec[1];
@@ -267,7 +266,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
        if (intspec[2] <= 2)
                fpga_irq->irq_line = intspec[2];
        else
-               pr_warning("FPGA PIC: invalid irq routing\n");
+               pr_warn("FPGA PIC: invalid irq routing\n");
 
        return 0;
 }
@@ -293,7 +292,7 @@ void socrates_fpga_pic_init(struct device_node *pic)
        for (i = 0; i < 3; i++) {
                socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i);
                if (!socrates_fpga_irqs[i]) {
-                       pr_warning("FPGA PIC: can't get irq%d.\n", i);
+                       pr_warn("FPGA PIC: can't get irq%d\n", i);
                        continue;
                }
                irq_set_chained_handler(socrates_fpga_irqs[i],
index a0e989ed4b6f425036e880d886b6a15476bf9e85..17c6cd3d02e6727bb13abb419ef13d72661d749d 100644 (file)
@@ -101,7 +101,7 @@ static int __init mpc86xx_hpcn_probe(void)
 
        /* Be nice and don't give silent boot death.  Delete this in 2.6.27 */
        if (of_machine_is_compatible("mpc86xx")) {
-               pr_warning("WARNING: your dts/dtb is old. You must update before the next kernel release\n");
+               pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n");
                return 1;
        }
 
index e2089d3de00c8dc82c7e4f1cf6e0b33e8af29adb..d408162d5af451472caf14fee7d641104084c2a5 100644 (file)
@@ -116,18 +116,6 @@ config 8xx_GPIO
 
          If in doubt, say Y here.
 
-config 8xx_CPU6
-       bool "CPU6 Silicon Errata (860 Pre Rev. C)"
-       help
-         MPC860 CPUs, prior to Rev C have some bugs in the silicon, which
-         require workarounds for Linux (and most other OSes to work).  If you
-         get a BUG() very early in boot, this might fix the problem.  For
-         more details read the document entitled "MPC860 Family Device Errata
-         Reference" on Freescale's website.  This option also incurs a
-         performance hit.
-
-         If in doubt, say N here.
-
 config 8xx_CPU15
        bool "CPU15 Silicon Errata"
        depends on !HUGETLB_PAGE
index 5a96a2763e4ae176cc5c6215db5da793f2fb81d5..14ef17e10ec9a2af2330ffd551e2ca778dad1ecb 100644 (file)
@@ -293,17 +293,6 @@ config CPM2
          you wish to build a kernel for a machine with a CPM2 coprocessor
          on it (826x, 827x, 8560).
 
-config AXON_RAM
-       tristate "Axon DDR2 memory device driver"
-       depends on PPC_IBM_CELL_BLADE && BLOCK
-       select DAX
-       default m
-       help
-         It registers one block device per Axon's DDR2 memory bank found
-         on a system. Block devices are called axonram?, their major and
-         minor numbers are available in /proc/devices, /proc/partitions or
-         in /sys/block/axonram?/dev.
-
 config FSL_ULI1575
        bool
        default n
index ae07470fde3c92bcf8d5c0b9da8e7abe8467e1cc..a429d859f15d7ae04635453b0089131a596a56fc 100644 (file)
@@ -33,7 +33,6 @@ config PPC_85xx
 config PPC_8xx
        bool "Freescale 8xx"
        select FSL_SOC
-       select PPC_LIB_RHEAP
        select SYS_SUPPORTS_HUGETLBFS
 
 config 40x
@@ -168,13 +167,6 @@ config PPC_FPU
        bool
        default y if PPC64
 
-config PPC_8xx_PERF_EVENT
-       bool "PPC 8xx perf events"
-       depends on PPC_8xx && PERF_EVENTS
-       help
-         This is Performance Events support for PPC 8xx. The 8xx doesn't
-         have a PMU but some events are emulated using 8xx features.
-
 config FSL_EMB_PERFMON
        bool "Freescale Embedded Perfmon"
        depends on E500 || PPC_83xx
index 6fc85e29dc082a5a00a79298dc768acf9b99f0bd..5d4bf9aed51ae74da8f4e28763917acd8cba885c 100644 (file)
@@ -315,8 +315,7 @@ static int __init setup_iic(void)
        struct cbe_iic_regs __iomem *node_iic;
        const u32 *np;
 
-       for (dn = NULL;
-            (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
+       for_each_node_by_name(dn, "interrupt-controller") {
                if (!of_device_is_compatible(dn,
                                     "IBM,CBEA-Internal-Interrupt-Controller"))
                        continue;
index d3543e68efe80afde410f40665bb17af61a4e5f7..7d31b8d146613e7c0b66919357c9ae9533836a00 100644 (file)
@@ -192,8 +192,7 @@ static void __init mpic_init_IRQ(void)
        struct device_node *dn;
        struct mpic *mpic;
 
-       for (dn = NULL;
-            (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+       for_each_node_by_name(dn, "interrupt-controller") {
                if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
                        continue;
 
index aa44bfc464677a3c3718d0f9e5febfb55b90a38a..c137f0cb4151757410d584d53c69c4a68dd2f369 100644 (file)
@@ -343,8 +343,7 @@ void __init spider_init_IRQ(void)
         * device-tree is bogus anyway) so all we can do is pray or maybe test
         * the address and deduce the node-id
         */
-       for (dn = NULL;
-            (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+       for_each_node_by_name(dn, "interrupt-controller") {
                if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
                        if (of_address_to_resource(dn, 0, &r)) {
                                printk(KERN_WARNING "spider-pic: Failed\n");
index f636ee22b20358097e6628c6e8ca7772d53ae9d2..5c409c98cca8a2da3b5e39881319f4c6b7c66de7 100644 (file)
@@ -292,12 +292,12 @@ static int __init of_enumerate_spus(int (*fn)(void *data))
        unsigned int n = 0;
 
        ret = -ENODEV;
-       for (node = of_find_node_by_type(NULL, "spe");
-                       node; node = of_find_node_by_type(node, "spe")) {
+       for_each_node_by_type(node, "spe") {
                ret = fn(node);
                if (ret) {
                        printk(KERN_WARNING "%s: Error initializing %s\n",
                                __func__, node->name);
+                       of_node_put(node);
                        break;
                }
                n++;
index fc7772c3d068b489017a1c41dee5ccae8b8a94cb..c1be486da8993fc828e09d0ea678790c47994ecf 100644 (file)
@@ -2375,8 +2375,8 @@ static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
 
        p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
 
-       return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n",
-                       (unsigned int) p->tstamp.tv_sec,
+       return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n",
+                       (unsigned long long) p->tstamp.tv_sec,
                        (unsigned int) p->tstamp.tv_nsec,
                        p->spu_id,
                        (unsigned int) p->type,
@@ -2499,7 +2499,7 @@ void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
                struct switch_log_entry *p;
 
                p = ctx->switch_log->log + ctx->switch_log->head;
-               ktime_get_ts(&p->tstamp);
+               ktime_get_ts64(&p->tstamp);
                p->timebase = get_tb();
                p->spu_id = spu ? spu->number : -1;
                p->type = type;
index 9558d725a99b57347ed8d7d106536360e806210e..db329d4bf1c30610836bedf072c49cc40edf0d00 100644 (file)
@@ -455,7 +455,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
                }
        }
 
-       ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
+       ret = spufs_mkdir(inode, dentry, flags, mode & 0777);
        if (ret)
                goto out_aff_unlock;
 
@@ -546,7 +546,7 @@ static int spufs_create_gang(struct inode *inode,
        struct path path = {.mnt = mnt, .dentry = dentry};
        int ret;
 
-       ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
+       ret = spufs_mkgang(inode, dentry, mode & 0777);
        if (!ret) {
                ret = spufs_gang_open(&path);
                if (ret < 0) {
index 2d0479ad3af47894899f0a0f55ca03f51041faed..b5fc1b3fe53868f0ffee14f4fd02b7239981336f 100644 (file)
@@ -69,7 +69,7 @@ struct switch_log {
        unsigned long           head;
        unsigned long           tail;
        struct switch_log_entry {
-               struct timespec tstamp;
+               struct timespec64 tstamp;
                s32             spu_id;
                u32             type;
                u32             val;
index aafa01ba062f80f904a44fa8e7fd7db93763aed0..2c72263ad6abcda28cfb2e3dd0c491114175b999 100644 (file)
@@ -589,7 +589,7 @@ int pasemi_dma_init(void)
        pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0);
        while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) {
                if (time_after(jiffies, timeout)) {
-                       pr_warning("Warning: Could not disable RX section\n");
+                       pr_warn("Warning: Could not disable RX section\n");
                        break;
                }
        }
@@ -598,7 +598,7 @@ int pasemi_dma_init(void)
        pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0);
        while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) {
                if (time_after(jiffies, timeout)) {
-                       pr_warning("Warning: Could not disable TX section\n");
+                       pr_warn("Warning: Could not disable TX section\n");
                        break;
                }
        }
index a00096b1c713429be3cbe4a512e83cd66fcf6ee9..6b5dcccae1d30b6d5fc94b5c385efca66e61957b 100644 (file)
@@ -186,7 +186,7 @@ int pmac_backlight_set_legacy_brightness(int brightness)
        return __pmac_backlight_set_legacy_brightness(brightness);
 }
 
-int pmac_backlight_get_legacy_brightness()
+int pmac_backlight_get_legacy_brightness(void)
 {
        int result = -ENXIO;
 
@@ -205,12 +205,12 @@ int pmac_backlight_get_legacy_brightness()
        return result;
 }
 
-void pmac_backlight_disable()
+void pmac_backlight_disable(void)
 {
        atomic_inc(&kernel_backlight_disabled);
 }
 
-void pmac_backlight_enable()
+void pmac_backlight_enable(void)
 {
        atomic_dec(&kernel_backlight_disabled);
 }
index 9e3f39d36e88ff32dfd2c7585ef635ec2daf5e2d..466b842346831947e9af8480694361859e6fd150 100644 (file)
@@ -2641,7 +2641,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
        phys_addr_t             addr;
        u64                     size;
 
-       for (node = NULL; (node = of_find_node_by_name(node, name)) != NULL;) {
+       for_each_node_by_name(node, name) {
                if (!compat)
                        break;
                if (of_device_is_compatible(node, compat))
@@ -2853,7 +2853,6 @@ set_initial_features(void)
                }
 
                /* Enable ATA-100 before PCI probe. */
-               np = of_find_node_by_name(NULL, "ata-6");
                for_each_node_by_name(np, "ata-6") {
                        if (np->parent
                            && of_device_is_compatible(np->parent, "uni-north")
index 5e0719b2729488feff62f46ce1b03e6295b090e9..57bbff4659645c8edddc64e95a6674c0a33ac87b 100644 (file)
@@ -486,15 +486,16 @@ static int __init pmac_pic_probe_mpic(void)
        struct device_node *np, *master = NULL, *slave = NULL;
 
        /* We can have up to 2 MPICs cascaded */
-       for (np = NULL; (np = of_find_node_by_type(np, "open-pic"))
-                    != NULL;) {
+       for_each_node_by_type(np, "open-pic") {
                if (master == NULL &&
                    of_get_property(np, "interrupts", NULL) == NULL)
                        master = of_node_get(np);
                else if (slave == NULL)
                        slave = of_node_get(np);
-               if (master && slave)
+               if (master && slave) {
+                       of_node_put(np);
                        break;
+               }
        }
 
        /* Check for bogus setups */
@@ -604,6 +605,7 @@ static int pmacpic_find_viaint(void)
        if (np == NULL)
                goto not_found;
        viaint = irq_of_parse_and_map(np, 0);
+       of_node_put(np);
 
 not_found:
 #endif /* CONFIG_ADB_PMU */
index 2cd99eb307621fffa044adddc66361b86f84ec89..95275e0e2efa531677de320396301ae838666390 100644 (file)
@@ -774,8 +774,8 @@ static void __init smp_core99_probe(void)
        if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345);
 
        /* Count CPUs in the device-tree */
-               for (cpus = NULL; (cpus = of_find_node_by_type(cpus, "cpu")) != NULL;)
-               ++ncpus;
+       for_each_node_by_type(cpus, "cpu")
+               ++ncpus;
 
        printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus);
 
index 3732118a04825f2d0b47cd7d658999eaf4c00314..6c9d5199a7e2a17f2f291572e88311fbbaf56713 100644 (file)
@@ -17,3 +17,4 @@ obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
 obj-$(CONFIG_PPC_MEMTRACE)     += memtrace.o
 obj-$(CONFIG_PPC_VAS)  += vas.o vas-window.o vas-debug.o
 obj-$(CONFIG_PPC_FTW)  += nx-ftw.o
+obj-$(CONFIG_OCXL_BASE)        += ocxl.o
index 4650fb294e7a5391187b23763b66a83283793f7b..33c86c1a17204445a231828c0b7467335a823098 100644 (file)
 
 static int eeh_event_irq = -EINVAL;
 
+void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+       struct pci_dn *pdn = pci_get_pdn(pdev);
+
+       if (!pdev->is_virtfn)
+               return;
+
+       /*
+        * The following operations will fail if VF's sysfs files
+        * aren't created or its resources aren't finalized.
+        */
+       eeh_add_device_early(pdn);
+       eeh_add_device_late(pdev);
+       eeh_sysfs_add_device(pdev);
+}
+
 static int pnv_eeh_init(void)
 {
        struct pci_controller *hose;
@@ -86,6 +102,7 @@ static int pnv_eeh_init(void)
        }
 
        eeh_set_pe_aux_size(max_diag_size);
+       ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
 
        return 0;
 }
@@ -1638,70 +1655,11 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
        return ret;
 }
 
-static int pnv_eeh_restore_vf_config(struct pci_dn *pdn)
-{
-       struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-       u32 devctl, cmd, cap2, aer_capctl;
-       int old_mps;
-
-       if (edev->pcie_cap) {
-               /* Restore MPS */
-               old_mps = (ffs(pdn->mps) - 8) << 5;
-               eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
-                                    2, &devctl);
-               devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
-               devctl |= old_mps;
-               eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
-                                     2, devctl);
-
-               /* Disable Completion Timeout */
-               eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
-                                    4, &cap2);
-               if (cap2 & 0x10) {
-                       eeh_ops->read_config(pdn,
-                                            edev->pcie_cap + PCI_EXP_DEVCTL2,
-                                            4, &cap2);
-                       cap2 |= 0x10;
-                       eeh_ops->write_config(pdn,
-                                             edev->pcie_cap + PCI_EXP_DEVCTL2,
-                                             4, cap2);
-               }
-       }
-
-       /* Enable SERR and parity checking */
-       eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
-       cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
-       eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
-
-       /* Enable report various errors */
-       if (edev->pcie_cap) {
-               eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
-                                    2, &devctl);
-               devctl &= ~PCI_EXP_DEVCTL_CERE;
-               devctl |= (PCI_EXP_DEVCTL_NFERE |
-                          PCI_EXP_DEVCTL_FERE |
-                          PCI_EXP_DEVCTL_URRE);
-               eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
-                                     2, devctl);
-       }
-
-       /* Enable ECRC generation and check */
-       if (edev->pcie_cap && edev->aer_cap) {
-               eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
-                                    4, &aer_capctl);
-               aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
-               eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
-                                     4, aer_capctl);
-       }
-
-       return 0;
-}
-
 static int pnv_eeh_restore_config(struct pci_dn *pdn)
 {
        struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
        struct pnv_phb *phb;
-       s64 ret;
+       s64 ret = 0;
        int config_addr = (pdn->busno << 8) | (pdn->devfn);
 
        if (!edev)
@@ -1715,7 +1673,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
         * to be exported by firmware in extendible way.
         */
        if (edev->physfn) {
-               ret = pnv_eeh_restore_vf_config(pdn);
+               ret = eeh_restore_vf_config(pdn);
        } else {
                phb = pdn->phb->private_data;
                ret = opal_pci_reinit(phb->opal_id,
@@ -1728,7 +1686,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
                return -EIO;
        }
 
-       return 0;
+       return ret;
 }
 
 static struct eeh_ops pnv_eeh_ops = {
@@ -1746,25 +1704,10 @@ static struct eeh_ops pnv_eeh_ops = {
        .read_config            = pnv_eeh_read_config,
        .write_config           = pnv_eeh_write_config,
        .next_error             = pnv_eeh_next_error,
-       .restore_config         = pnv_eeh_restore_config
+       .restore_config         = pnv_eeh_restore_config,
+       .notify_resume          = NULL
 };
 
-void pcibios_bus_add_device(struct pci_dev *pdev)
-{
-       struct pci_dn *pdn = pci_get_pdn(pdev);
-
-       if (!pdev->is_virtfn)
-               return;
-
-       /*
-        * The following operations will fail if VF's sysfs files
-        * aren't created or its resources aren't finalized.
-        */
-       eeh_add_device_early(pdn);
-       eeh_add_device_late(pdev);
-       eeh_sysfs_add_device(pdev);
-}
-
 #ifdef CONFIG_PCI_IOV
 static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev)
 {
index f6cbc1a7147242393182785459d4f5fafbb40724..0a253b64ac5fed049f81082183a35ef6e0fdf76b 100644 (file)
  */
 static struct pci_dev *get_pci_dev(struct device_node *dn)
 {
-       return PCI_DN(dn)->pcidev;
+       struct pci_dn *pdn = PCI_DN(dn);
+
+       return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
+                                          pdn->busno, pdn->devfn);
 }
 
 /* Given a NPU device get the associated PCI device. */
@@ -277,7 +280,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
        int64_t rc = 0;
        phys_addr_t top = memblock_end_of_DRAM();
 
-       if (phb->type != PNV_PHB_NPU || !npe->pdev)
+       if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
                return -EINVAL;
 
        rc = pnv_npu_unset_window(npe, 0);
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
new file mode 100644 (file)
index 0000000..fa9b53a
--- /dev/null
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <asm/pnv-ocxl.h>
+#include <asm/opal.h>
+#include <asm/xive.h>
+#include <misc/ocxl-config.h>
+#include "pci.h"
+
+#define PNV_OCXL_TL_P9_RECV_CAP                0x000000000000000Full
+#define PNV_OCXL_ACTAG_MAX             64
+/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
+#define PNV_OCXL_PASID_BITS            15
+#define PNV_OCXL_PASID_MAX             ((1 << PNV_OCXL_PASID_BITS) - 1)
+
+#define AFU_PRESENT (1 << 31)
+#define AFU_INDEX_MASK 0x3F000000
+#define AFU_INDEX_SHIFT 24
+#define ACTAG_MASK 0xFFF
+
+
+struct actag_range {
+       u16 start;
+       u16 count;
+};
+
+struct npu_link {
+       struct list_head list;
+       int domain;
+       int bus;
+       int dev;
+       u16 fn_desired_actags[8];
+       struct actag_range fn_actags[8];
+       bool assignment_done;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+
+/*
+ * opencapi actags handling:
+ *
+ * When sending commands, the opencapi device references the memory
+ * context it's targeting with an 'actag', which is really an alias
+ * for a (BDF, pasid) combination. When it receives a command, the NPU
+ * must do a lookup of the actag to identify the memory context. The
+ * hardware supports a finite number of actags per link (64 for
+ * POWER9).
+ *
+ * The device can carry multiple functions, and each function can have
+ * multiple AFUs. Each AFU advertises in its config space the number
+ * of desired actags. The host must configure in the config space of
+ * the AFU how many actags the AFU is really allowed to use (which can
+ * be less than what the AFU desires).
+ *
+ * When a PCI function is probed by the driver, it has no visibility
+ * about the other PCI functions and how many actags they'd like,
+ * which makes it impossible to distribute actags fairly among AFUs.
+ *
+ * Unfortunately, the only way to know how many actags a function
+ * desires is by looking at the data for each AFU in the config space
+ * and add them up. Similarly, the only way to know how many actags
+ * all the functions of the physical device desire is by adding the
+ * previously computed function counts. Then we can match that against
+ * what the hardware supports.
+ *
+ * To get a comprehensive view, we use a 'pci fixup': at the end of
+ * PCI enumeration, each function counts how many actags its AFUs
+ * desire and we save it in a 'npu_link' structure, shared between all
+ * the PCI functions of a same device. Therefore, when the first
+ * function is probed by the driver, we can get an idea of the total
+ * count of desired actags for the device, and assign the actags to
+ * the AFUs, by pro-rating if needed.
+ */
+
+static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
+{
+       int vsec = pos;
+       u16 vendor, id;
+
+       while ((vsec = pci_find_next_ext_capability(dev, vsec,
+                                                   OCXL_EXT_CAP_ID_DVSEC))) {
+               pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+                               &vendor);
+               pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+               if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+                       return vsec;
+       }
+       return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+       int vsec = 0;
+       u8 idx;
+
+       while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
+                                          vsec))) {
+               pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+                               &idx);
+               if (idx == afu_idx)
+                       return vsec;
+       }
+       return 0;
+}
+
+static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
+{
+       int pos;
+       u32 val;
+
+       pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0);
+       if (!pos)
+               return -ESRCH;
+
+       pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+       if (val & AFU_PRESENT)
+               *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
+       else
+               *afu_idx = -1;
+       return 0;
+}
+
+static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
+{
+       int pos;
+       u16 actag_sup;
+
+       pos = find_dvsec_afu_ctrl(dev, afu_idx);
+       if (!pos)
+               return -ESRCH;
+
+       pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
+                       &actag_sup);
+       *actag = actag_sup & ACTAG_MASK;
+       return 0;
+}
+
+static struct npu_link *find_link(struct pci_dev *dev)
+{
+       struct npu_link *link;
+
+       list_for_each_entry(link, &links_list, list) {
+               /* The functions of a device all share the same link */
+               if (link->domain == pci_domain_nr(dev->bus) &&
+                       link->bus == dev->bus->number &&
+                       link->dev == PCI_SLOT(dev->devfn)) {
+                       return link;
+               }
+       }
+
+       /* link doesn't exist yet. Allocate one */
+       link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
+       if (!link)
+               return NULL;
+       link->domain = pci_domain_nr(dev->bus);
+       link->bus = dev->bus->number;
+       link->dev = PCI_SLOT(dev->devfn);
+       list_add(&link->list, &links_list);
+       return link;
+}
+
+static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
+{
+       struct pci_controller *hose = pci_bus_to_host(dev->bus);
+       struct pnv_phb *phb = hose->private_data;
+       struct npu_link *link;
+       int rc, afu_idx = -1, i, actag;
+
+       if (!machine_is(powernv))
+               return;
+
+       if (phb->type != PNV_PHB_NPU_OCAPI)
+               return;
+
+       mutex_lock(&links_list_lock);
+
+       link = find_link(dev);
+       if (!link) {
+               dev_warn(&dev->dev, "couldn't update actag information\n");
+               mutex_unlock(&links_list_lock);
+               return;
+       }
+
+       /*
+        * Check how many actags are desired for the AFUs under that
+        * function and add it to the count for the link
+        */
+       rc = get_max_afu_index(dev, &afu_idx);
+       if (rc) {
+               /* Most likely an invalid config space */
+               dev_dbg(&dev->dev, "couldn't find AFU information\n");
+               afu_idx = -1;
+       }
+
+       link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
+       for (i = 0; i <= afu_idx; i++) {
+               /*
+                * AFU index 'holes' are allowed. So don't fail if we
+                * can't read the actag info for an index
+                */
+               rc = get_actag_count(dev, i, &actag);
+               if (rc)
+                       continue;
+               link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
+       }
+       dev_dbg(&dev->dev, "total actags for function: %d\n",
+               link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
+
+       mutex_unlock(&links_list_lock);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
+
+static u16 assign_fn_actags(u16 desired, u16 total)
+{
+       u16 count;
+
+       if (total <= PNV_OCXL_ACTAG_MAX)
+               count = desired;
+       else
+               count = PNV_OCXL_ACTAG_MAX * desired / total;
+
+       return count;
+}
+
+static void assign_actags(struct npu_link *link)
+{
+       u16 actag_count, range_start = 0, total_desired = 0;
+       int i;
+
+       for (i = 0; i < 8; i++)
+               total_desired += link->fn_desired_actags[i];
+
+       for (i = 0; i < 8; i++) {
+               if (link->fn_desired_actags[i]) {
+                       actag_count = assign_fn_actags(
+                               link->fn_desired_actags[i],
+                               total_desired);
+                       link->fn_actags[i].start = range_start;
+                       link->fn_actags[i].count = actag_count;
+                       range_start += actag_count;
+                       WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
+               }
+               pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
+                       link->domain, link->bus, link->dev, i,
+                       link->fn_actags[i].start, link->fn_actags[i].count,
+                       link->fn_desired_actags[i]);
+       }
+       link->assignment_done = true;
+}
+
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
+               u16 *supported)
+{
+       struct npu_link *link;
+
+       mutex_lock(&links_list_lock);
+
+       link = find_link(dev);
+       if (!link) {
+               dev_err(&dev->dev, "actag information not found\n");
+               mutex_unlock(&links_list_lock);
+               return -ENODEV;
+       }
+       /*
+        * On p9, we only have 64 actags per link, so they must be
+        * shared by all the functions of the same adapter. We counted
+        * the desired actag counts during PCI enumeration, so that we
+        * can allocate a pro-rated number of actags to each function.
+        */
+       if (!link->assignment_done)
+               assign_actags(link);
+
+       *base      = link->fn_actags[PCI_FUNC(dev->devfn)].start;
+       *enabled   = link->fn_actags[PCI_FUNC(dev->devfn)].count;
+       *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
+
+       mutex_unlock(&links_list_lock);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
+
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
+{
+       struct npu_link *link;
+       int i, rc = -EINVAL;
+
+       /*
+        * The number of PASIDs (process address space ID) which can
+        * be used by a function depends on how many functions exist
+        * on the device. The NPU needs to be configured to know how
+        * many bits are available to PASIDs and how many are to be
+        * used by the function BDF indentifier.
+        *
+        * We only support one AFU-carrying function for now.
+        */
+       mutex_lock(&links_list_lock);
+
+       link = find_link(dev);
+       if (!link) {
+               dev_err(&dev->dev, "actag information not found\n");
+               mutex_unlock(&links_list_lock);
+               return -ENODEV;
+       }
+
+       for (i = 0; i < 8; i++)
+               if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
+                       *count = PNV_OCXL_PASID_MAX;
+                       rc = 0;
+                       break;
+               }
+
+       mutex_unlock(&links_list_lock);
+       dev_dbg(&dev->dev, "%d PASIDs available for function\n",
+               rc ? 0 : *count);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
+
+static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
+{
+       int shift, idx;
+
+       WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
+       idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
+       shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
+       buf[idx] |= rate << shift;
+}
+
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+                       char *rate_buf, int rate_buf_size)
+{
+       if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+               return -EINVAL;
+       /*
+        * The TL capabilities are a characteristic of the NPU, so
+        * we go with hard-coded values.
+        *
+        * The receiving rate of each template is encoded on 4 bits.
+        *
+        * On P9:
+        * - templates 0 -> 3 are supported
+        * - templates 0, 1 and 3 have a 0 receiving rate
+        * - template 2 has receiving rate of 1 (extra cycle)
+        */
+       memset(rate_buf, 0, rate_buf_size);
+       set_templ_rate(2, 1, rate_buf);
+       *cap = PNV_OCXL_TL_P9_RECV_CAP;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
+
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+                       uint64_t rate_buf_phys, int rate_buf_size)
+{
+       struct pci_controller *hose = pci_bus_to_host(dev->bus);
+       struct pnv_phb *phb = hose->private_data;
+       int rc;
+
+       if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+               return -EINVAL;
+
+       rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
+                       rate_buf_phys, rate_buf_size);
+       if (rc) {
+               dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
+               return -EINVAL;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
+
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
+{
+       int rc;
+
+       rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
+       if (rc) {
+               dev_err(&dev->dev,
+                       "Can't get translation interrupt for device\n");
+               return rc;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
+
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+                       void __iomem *tfc, void __iomem *pe_handle)
+{
+       iounmap(dsisr);
+       iounmap(dar);
+       iounmap(tfc);
+       iounmap(pe_handle);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
+
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+                       void __iomem **dar, void __iomem **tfc,
+                       void __iomem **pe_handle)
+{
+       u64 reg;
+       int i, j, rc = 0;
+       void __iomem *regs[4];
+
+       /*
+        * opal stores the mmio addresses of the DSISR, DAR, TFC and
+        * PE_HANDLE registers in a device tree property, in that
+        * order
+        */
+       for (i = 0; i < 4; i++) {
+               rc = of_property_read_u64_index(dev->dev.of_node,
+                                               "ibm,opal-xsl-mmio", i, &reg);
+               if (rc)
+                       break;
+               regs[i] = ioremap(reg, 8);
+               if (!regs[i]) {
+                       rc = -EINVAL;
+                       break;
+               }
+       }
+       if (rc) {
+               dev_err(&dev->dev, "Can't map translation mmio registers\n");
+               for (j = i - 1; j >= 0; j--)
+                       iounmap(regs[j]);
+       } else {
+               *dsisr = regs[0];
+               *dar = regs[1];
+               *tfc = regs[2];
+               *pe_handle = regs[3];
+       }
+       return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
+
+struct spa_data {
+       u64 phb_opal_id;
+       u32 bdfn;
+};
+
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
+               void **platform_data)
+{
+       struct pci_controller *hose = pci_bus_to_host(dev->bus);
+       struct pnv_phb *phb = hose->private_data;
+       struct spa_data *data;
+       u32 bdfn;
+       int rc;
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       bdfn = (dev->bus->number << 8) | dev->devfn;
+       rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
+                               PE_mask);
+       if (rc) {
+               dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
+               kfree(data);
+               return rc;
+       }
+       data->phb_opal_id = phb->opal_id;
+       data->bdfn = bdfn;
+       *platform_data = (void *) data;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
+
+void pnv_ocxl_spa_release(void *platform_data)
+{
+       struct spa_data *data = (struct spa_data *) platform_data;
+       int rc;
+
+       rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
+       WARN_ON(rc);
+       kfree(data);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
+
+int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle)
+{
+       struct spa_data *data = (struct spa_data *) platform_data;
+       int rc;
+
+       rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe);
+
+int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
+{
+       __be64 flags, trigger_page;
+       s64 rc;
+       u32 hwirq;
+
+       hwirq = xive_native_alloc_irq();
+       if (!hwirq)
+               return -ENOENT;
+
+       rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
+                               NULL);
+       if (rc || !trigger_page) {
+               xive_native_free_irq(hwirq);
+               return -ENOENT;
+       }
+       *irq = hwirq;
+       *trigger_addr = be64_to_cpu(trigger_page);
+       return 0;
+
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
+
+void pnv_ocxl_free_xive_irq(u32 irq)
+{
+       xive_native_free_irq(irq);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
index 4c827826c05eb13b09f4e0cd9f9c73d6967ec9a9..0dc8fa4e0af2a9d0015c3e8ad654e9280454e5b1 100644 (file)
@@ -103,9 +103,9 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj,
  * due to the dynamic size of the dump
  */
 static struct dump_attribute id_attribute =
-       __ATTR(id, S_IRUGO, dump_id_show, NULL);
+       __ATTR(id, 0444, dump_id_show, NULL);
 static struct dump_attribute type_attribute =
-       __ATTR(type, S_IRUGO, dump_type_show, NULL);
+       __ATTR(type, 0444, dump_type_show, NULL);
 static struct dump_attribute ack_attribute =
        __ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
 
index ecd6d9177d13f31e35f2ef4c67bdd8d9e47bffa3..ba6e437abb4bb96fa8f78f18517208bb570a08c6 100644 (file)
@@ -83,9 +83,9 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj,
 }
 
 static struct elog_attribute id_attribute =
-       __ATTR(id, S_IRUGO, elog_id_show, NULL);
+       __ATTR(id, 0444, elog_id_show, NULL);
 static struct elog_attribute type_attribute =
-       __ATTR(type, S_IRUGO, elog_type_show, NULL);
+       __ATTR(type, 0444, elog_type_show, NULL);
 static struct elog_attribute ack_attribute =
        __ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store);
 
index 465ea105b7710ecf0ff7320dcec8fd2b9775cf2e..dd4c9b8b8a81e6967b29061014918b4f591921df 100644 (file)
 #include <asm/io.h>
 #include <asm/imc-pmu.h>
 #include <asm/cputhreads.h>
+#include <asm/debugfs.h>
+
+static struct dentry *imc_debugfs_parent;
+
+/* Helpers to export imc command and mode via debugfs */
+static int imc_mem_get(void *data, u64 *val)
+{
+       *val = cpu_to_be64(*(u64 *)data);
+       return 0;
+}
+
+static int imc_mem_set(void *data, u64 val)
+{
+       *(u64 *)data = cpu_to_be64(val);
+       return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n");
+
+static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode,
+                                            struct dentry *parent, u64  *value)
+{
+       return debugfs_create_file_unsafe(name, mode, parent,
+                                         value, &fops_imc_x64);
+}
+
+/*
+ * export_imc_mode_and_cmd: Create a debugfs interface
+ *                     for imc_cmd and imc_mode
+ *                     for each node in the system.
+ *  imc_mode and imc_cmd can be changed by echo into
+ *  this interface.
+ */
+static void export_imc_mode_and_cmd(struct device_node *node,
+                                   struct imc_pmu *pmu_ptr)
+{
+       static u64 loc, *imc_mode_addr, *imc_cmd_addr;
+       int chip = 0, nid;
+       char mode[16], cmd[16];
+       u32 cb_offset;
+
+       imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
+
+       /*
+        * Return here, either because 'imc' directory already exists,
+        * Or failed to create a new one.
+        */
+       if (!imc_debugfs_parent)
+               return;
+
+       if (of_property_read_u32(node, "cb_offset", &cb_offset))
+               cb_offset = IMC_CNTL_BLK_OFFSET;
+
+       for_each_node(nid) {
+               loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset;
+               imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET);
+               sprintf(mode, "imc_mode_%d", nid);
+               if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent,
+                                           imc_mode_addr))
+                       goto err;
+
+               imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET);
+               sprintf(cmd, "imc_cmd_%d", nid);
+               if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent,
+                                           imc_cmd_addr))
+                       goto err;
+               chip++;
+       }
+       return;
+
+err:
+       debugfs_remove_recursive(imc_debugfs_parent);
+}
 
 /*
  * imc_get_mem_addr_nest: Function to get nest counter memory region
@@ -65,6 +137,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
        }
 
        pmu_ptr->imc_counter_mmaped = true;
+       export_imc_mode_and_cmd(node, pmu_ptr);
        kfree(base_addr_arr);
        kfree(chipid_arr);
        return 0;
@@ -213,6 +286,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
                }
        }
 
+       /* If none of the nest units are registered, remove debugfs interface */
+       if (pmu_count == 0)
+               debugfs_remove_recursive(imc_debugfs_parent);
+
        return 0;
 }
 
index 23fb6647dced6b49967f1e977dd57cf91257526d..6fd4092798d5f5b415cd73e26211ddfc26e0359f 100644 (file)
@@ -260,13 +260,13 @@ void __init opal_sys_param_init(void)
                /* If the parameter is read-only or read-write */
                switch (perm[i] & 3) {
                case OPAL_SYSPARAM_READ:
-                       attr[i].kobj_attr.attr.mode = S_IRUGO;
+                       attr[i].kobj_attr.attr.mode = 0444;
                        break;
                case OPAL_SYSPARAM_WRITE:
-                       attr[i].kobj_attr.attr.mode = S_IWUSR;
+                       attr[i].kobj_attr.attr.mode = 0200;
                        break;
                case OPAL_SYSPARAM_RW:
-                       attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR;
+                       attr[i].kobj_attr.attr.mode = 0644;
                        break;
                default:
                        break;
index 6f4b00a2ac46e26f6a766fbc20e5dc8a5e4b6afd..1b2936ba604087cc33673ebe5a564a5dca93a5e5 100644 (file)
@@ -320,3 +320,6 @@ OPAL_CALL(opal_set_powercap,                        OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,          OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,          OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear,             OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_npu_spa_setup,                  OPAL_NPU_SPA_SETUP);
+OPAL_CALL(opal_npu_spa_clear_cache,            OPAL_NPU_SPA_CLEAR_CACHE);
+OPAL_CALL(opal_npu_tl_set,                     OPAL_NPU_TL_SET);
index 041ddbd1fc577db3f6d95b28ac3f0fc110c6563f..c15182765ff59a4809ac5777a8bdeb4fae0b89e9 100644 (file)
@@ -127,7 +127,7 @@ int __init early_init_dt_scan_opal(unsigned long node,
 
        if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
                powerpc_firmware_features |= FW_FEATURE_OPAL;
-               pr_info("OPAL detected !\n");
+               pr_debug("OPAL detected !\n");
        } else {
                panic("OPAL != V3 detected, no longer supported.\n");
        }
@@ -239,8 +239,8 @@ int opal_message_notifier_register(enum opal_msg_type msg_type,
                                        struct notifier_block *nb)
 {
        if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
-               pr_warning("%s: Invalid arguments, msg_type:%d\n",
-                          __func__, msg_type);
+               pr_warn("%s: Invalid arguments, msg_type:%d\n",
+                       __func__, msg_type);
                return -EINVAL;
        }
 
@@ -281,8 +281,8 @@ static void opal_handle_message(void)
 
        /* check for errors. */
        if (ret) {
-               pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
-                               __func__, ret);
+               pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
+                       __func__, ret);
                return;
        }
 
@@ -461,24 +461,14 @@ static int opal_recover_mce(struct pt_regs *regs,
 
 void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
 {
-       /*
-        * This is mostly taken from kernel/panic.c, but tries to do
-        * relatively minimal work. Don't use delay functions (TB may
-        * be broken), don't crash dump (need to set a firmware log),
-        * don't run notifiers. We do want to get some information to
-        * Linux console.
-        */
-       console_verbose();
-       bust_spinlocks(1);
+       panic_flush_kmsg_start();
+
        pr_emerg("Hardware platform error: %s\n", msg);
        if (regs)
                show_regs(regs);
        smp_send_stop();
-       printk_safe_flush_on_panic();
-       kmsg_dump(KMSG_DUMP_PANIC);
-       bust_spinlocks(0);
-       debug_locks_off();
-       console_flush_on_panic();
+
+       panic_flush_kmsg_end();
 
        /*
         * Don't bother to shut things down because this will
index 9582aeb1fe4c772e00c1dd394e77502ff4917510..496e47696ed0c57c413b6ce4c920c7c3beda745f 100644 (file)
@@ -54,7 +54,8 @@
 #define POWERNV_IOMMU_DEFAULT_LEVELS   1
 #define POWERNV_IOMMU_MAX_LEVELS       5
 
-static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" };
+static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
+                                             "NPU_OCAPI" };
 static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
 
 void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
@@ -89,6 +90,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 }
 
 static bool pnv_iommu_bypass_disabled __read_mostly;
+static bool pci_reset_phbs __read_mostly;
 
 static int __init iommu_setup(char *str)
 {
@@ -110,6 +112,14 @@ static int __init iommu_setup(char *str)
 }
 early_param("iommu", iommu_setup);
 
+static int __init pci_reset_phbs_setup(char *str)
+{
+       pci_reset_phbs = true;
+       return 0;
+}
+
+early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
+
 static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
 {
        /*
@@ -924,7 +934,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
         * Configure PELTV. NPUs don't have a PELTV table so skip
         * configuration on them.
         */
-       if (phb->type != PNV_PHB_NPU)
+       if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
                pnv_ioda_set_peltv(phb, pe, true);
 
        /* Setup reverse map */
@@ -1059,8 +1069,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 
        pe = pnv_ioda_alloc_pe(phb);
        if (!pe) {
-               pr_warning("%s: Not enough PE# available, disabling device\n",
-                          pci_name(dev));
+               pr_warn("%s: Not enough PE# available, disabling device\n",
+                       pci_name(dev));
                return NULL;
        }
 
@@ -1072,7 +1082,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
         * At some point we want to remove the PDN completely anyways
         */
        pci_dev_get(dev);
-       pdn->pcidev = dev;
        pdn->pe_number = pe->pe_number;
        pe->flags = PNV_IODA_PE_DEV;
        pe->pdev = dev;
@@ -1119,7 +1128,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
                        continue;
 
                pe->device_count++;
-               pdn->pcidev = dev;
                pdn->pe_number = pe->pe_number;
                if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
                        pnv_ioda_setup_same_PE(dev->subordinate, pe);
@@ -1164,7 +1172,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
                pe = pnv_ioda_alloc_pe(phb);
 
        if (!pe) {
-               pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
+               pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
                        __func__, pci_domain_nr(bus), bus->number);
                return NULL;
        }
@@ -1234,7 +1242,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
                        pci_dev_get(npu_pdev);
                        npu_pdn = pci_get_pdn(npu_pdev);
                        rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
-                       npu_pdn->pcidev = npu_pdev;
                        npu_pdn->pe_number = pe_num;
                        phb->ioda.pe_rmap[rid] = pe->pe_number;
 
@@ -1272,16 +1279,23 @@ static void pnv_pci_ioda_setup_PEs(void)
 {
        struct pci_controller *hose, *tmp;
        struct pnv_phb *phb;
+       struct pci_bus *bus;
+       struct pci_dev *pdev;
 
        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
                phb = hose->private_data;
-               if (phb->type == PNV_PHB_NPU) {
+               if (phb->type == PNV_PHB_NPU_NVLINK) {
                        /* PE#0 is needed for error reporting */
                        pnv_ioda_reserve_pe(phb, 0);
                        pnv_ioda_setup_npu_PEs(hose->bus);
                        if (phb->model == PNV_PHB_MODEL_NPU2)
                                pnv_npu2_init(phb);
                }
+               if (phb->type == PNV_PHB_NPU_OCAPI) {
+                       bus = hose->bus;
+                       list_for_each_entry(pdev, &bus->devices, bus_list)
+                               pnv_ioda_setup_dev_PE(pdev);
+               }
        }
 }
 
@@ -1692,7 +1706,7 @@ m64_failed:
        return ret;
 }
 
-int pcibios_sriov_disable(struct pci_dev *pdev)
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
 {
        pnv_pci_sriov_disable(pdev);
 
@@ -1701,7 +1715,7 @@ int pcibios_sriov_disable(struct pci_dev *pdev)
        return 0;
 }
 
-int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 {
        /* Allocate PCI data */
        add_dev_pci_data(pdev);
@@ -2572,7 +2586,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
        unsigned long direct_table_size;
 
        if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
-                       (window_size > memory_hotplug_max()) ||
                        !is_power_of_2(window_size))
                return 0;
 
@@ -2640,7 +2653,7 @@ static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
 
        hose = pci_bus_to_host(pdev->bus);
        phb = hose->private_data;
-       if (phb->type != PNV_PHB_NPU)
+       if (phb->type != PNV_PHB_NPU_NVLINK)
                return 0;
 
        *ptmppe = &phb->ioda.pe_array[pdn->pe_number];
@@ -2724,7 +2737,7 @@ static void pnv_pci_ioda_setup_iommu_api(void)
        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
                phb = hose->private_data;
 
-               if (phb->type != PNV_PHB_NPU)
+               if (phb->type != PNV_PHB_NPU_NVLINK)
                        continue;
 
                list_for_each_entry(pe, &phb->ioda.pe_list, list) {
@@ -3293,7 +3306,7 @@ static void pnv_pci_ioda_create_dbgfs(void)
                sprintf(name, "PCI%04x", hose->global_number);
                phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
                if (!phb->dbgfs) {
-                       pr_warning("%s: Error on creating debugfs on PHB#%x\n",
+                       pr_warn("%s: Error on creating debugfs on PHB#%x\n",
                                __func__, hose->global_number);
                        continue;
                }
@@ -3774,6 +3787,13 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
        .shutdown               = pnv_pci_ioda_shutdown,
 };
 
+static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
+       .enable_device_hook     = pnv_pci_enable_device_hook,
+       .window_alignment       = pnv_pci_window_alignment,
+       .reset_secondary_bus    = pnv_pci_reset_secondary_bus,
+       .shutdown               = pnv_pci_ioda_shutdown,
+};
+
 #ifdef CONFIG_CXL_BASE
 const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = {
        .dma_dev_setup          = pnv_pci_dma_dev_setup,
@@ -4007,9 +4027,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
         */
        ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
 
-       if (phb->type == PNV_PHB_NPU) {
+       switch (phb->type) {
+       case PNV_PHB_NPU_NVLINK:
                hose->controller_ops = pnv_npu_ioda_controller_ops;
-       } else {
+               break;
+       case PNV_PHB_NPU_OCAPI:
+               hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
+               break;
+       default:
                phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
                hose->controller_ops = pnv_pci_ioda_controller_ops;
        }
@@ -4019,6 +4044,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 #ifdef CONFIG_PCI_IOV
        ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
        ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
+       ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
+       ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable;
 #endif
 
        pci_add_flags(PCI_REASSIGN_ALL_RSRC);
@@ -4026,15 +4053,16 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
        /* Reset IODA tables to a clean state */
        rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
        if (rc)
-               pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
+               pr_warn("  OPAL Error %ld performing IODA table reset !\n", rc);
 
        /*
         * If we're running in kdump kernel, the previous kernel never
         * shutdown PCI devices correctly. We already got IODA table
         * cleaned out. So we have to issue PHB reset to stop all PCI
-        * transactions from previous kernel.
+        * transactions from previous kernel. The ppc_pci_reset_phbs
+        * kernel parameter will force this reset too.
         */
-       if (is_kdump_kernel()) {
+       if (is_kdump_kernel() || pci_reset_phbs) {
                pr_info("  Issue PHB reset ...\n");
                pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
                pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
@@ -4052,8 +4080,26 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
 
 void __init pnv_pci_init_npu_phb(struct device_node *np)
 {
-       pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU);
+       pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
+}
+
+void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
+{
+       pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
+}
+
+static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
+{
+       struct pci_controller *hose = pci_bus_to_host(dev->bus);
+       struct pnv_phb *phb = hose->private_data;
+
+       if (!machine_is(powernv))
+               return;
+
+       if (phb->type == PNV_PHB_NPU_OCAPI)
+               dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
 }
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup);
 
 void __init pnv_pci_init_ioda_hub(struct device_node *np)
 {
index 5422f4a6317cadf06335c4c1a18e3d7dcccd6db6..69d102cbf48f2b386f58cb88223ee99f25da797e 100644 (file)
@@ -1142,6 +1142,10 @@ void __init pnv_pci_init(void)
        for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb")
                pnv_pci_init_npu_phb(np);
 
+       /* Look for NPU2 OpenCAPI PHBs */
+       for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
+               pnv_pci_init_npu2_opencapi_phb(np);
+
        /* Configure IOMMU DMA hooks */
        set_pci_dma_ops(&dma_iommu_ops);
 }
index b772d747389634f9713d1d4f70aaabcfffca630c..eada4b6068cb172301ee89f08c25726355dfb2e6 100644 (file)
@@ -12,9 +12,10 @@ struct pci_dn;
 #define NV_NMMU_ATSD_REGS 8
 
 enum pnv_phb_type {
-       PNV_PHB_IODA1   = 0,
-       PNV_PHB_IODA2   = 1,
-       PNV_PHB_NPU     = 2,
+       PNV_PHB_IODA1           = 0,
+       PNV_PHB_IODA2           = 1,
+       PNV_PHB_NPU_NVLINK      = 2,
+       PNV_PHB_NPU_OCAPI       = 3,
 };
 
 /* Precise PHB model for error management */
@@ -227,6 +228,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_init_npu_phb(struct device_node *np);
+extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
 extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
index ba030669eca1ed769842e7977d2f3cb30b7d886e..9664c8461f032c1eed34d28660034b5a1adcc873 100644 (file)
@@ -37,6 +37,8 @@
 #include <asm/kvm_ppc.h>
 #include <asm/ppc-opcode.h>
 #include <asm/cpuidle.h>
+#include <asm/kexec.h>
+#include <asm/reg.h>
 
 #include "powernv.h"
 
@@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void)
                } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
                        unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
                        asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+               } else if ((srr1 & wmask) == SRR1_WAKERESET) {
+                       irq_set_pending_from_srr1(srr1);
+                       /* Does not return */
                }
+
                smp_mb();
 
+               /*
+                * For kdump kernels, we process the ipi and jump to
+                * crash_ipi_callback
+                */
+               if (kdump_in_progress()) {
+                       /*
+                        * If we got to this point, we've not used
+                        * NMI's, otherwise we would have gone
+                        * via the SRR1_WAKERESET path. We are
+                        * using regular IPI's for waking up offline
+                        * threads.
+                        */
+                       struct pt_regs regs;
+
+                       ppc_save_regs(&regs);
+                       crash_ipi_callback(&regs);
+                       /* Does not return */
+               }
+
                if (cpu_core_split_required())
                        continue;
 
@@ -371,5 +396,8 @@ void __init pnv_smp_init(void)
 
 #ifdef CONFIG_HOTPLUG_CPU
        ppc_md.cpu_die  = pnv_smp_cpu_kill_self;
+#ifdef CONFIG_KEXEC_CORE
+       crash_wake_offline = 1;
+#endif
 #endif
 }
index e48462447ff0049af5cf27ac24355f8929078b6f..e7075aaff1bb62094f8d8f1df706c7d5157496e3 100644 (file)
@@ -663,8 +663,8 @@ static void ps3_find_and_add_device(u64 bus_id, u64 dev_id)
                if (rem)
                        break;
        }
-       pr_warning("%s:%u: device %llu:%llu not found\n", __func__, __LINE__,
-                  bus_id, dev_id);
+       pr_warn("%s:%u: device %llu:%llu not found\n",
+               __func__, __LINE__, bus_id, dev_id);
        return;
 
 found:
@@ -859,11 +859,9 @@ static int ps3_probe_thread(void *data)
 
                if (notify_event->event_type != notify_region_probe ||
                    notify_event->bus_id != dev.sbd.bus_id) {
-                       pr_warning("%s:%u: bad notify_event: event %llu, "
-                                  "dev_id %llu, dev_type %llu\n",
-                                  __func__, __LINE__, notify_event->event_type,
-                                  notify_event->dev_id,
-                                  notify_event->dev_type);
+                       pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n",
+                               __func__, __LINE__, notify_event->event_type,
+                               notify_event->dev_id, notify_event->dev_type);
                        continue;
                }
 
index b0f34663b1aecf388a5e6cf6e09bebbf7ae76bc3..7f870ec29daf0675e366daff6552096de03adb41 100644 (file)
@@ -607,8 +607,8 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
                                       r->ioid,
                                       iopte_flag);
                if (result) {
-                       pr_warning("%s:%d: lv1_put_iopte failed: %s\n",
-                                  __func__, __LINE__, ps3_result(result));
+                       pr_warn("%s:%d: lv1_put_iopte failed: %s\n",
+                               __func__, __LINE__, ps3_result(result));
                        goto fail_map;
                }
                DBG("%s: pg=%d bus=%#lx, lpar=%#lx, ioid=%#x\n", __func__,
index 3db53e8aff9279cfe761ac9f43926559e347eaf4..cdbfc5cfd6f38ee85620288c14d2288808572e1e 100644 (file)
@@ -699,7 +699,7 @@ static void os_area_queue_work_handler(struct work_struct *work)
 
        error = update_flash_db();
        if (error)
-               pr_warning("%s: Could not update FLASH ROM\n", __func__);
+               pr_warn("%s: Could not update FLASH ROM\n", __func__);
 
        pr_debug(" <- %s:%d\n", __func__, __LINE__);
 }
index 6244bc849469e33af7dcc5a4ac2b21dd970ac1c6..77a37520068ded6cd49b4e5d60d0982ae69a5c38 100644 (file)
@@ -113,6 +113,7 @@ static void ps3_panic(char *str)
        printk("   System does not reboot automatically.\n");
        printk("   Please press POWER button.\n");
        printk("\n");
+       panic_flush_kmsg_end();
 
        while(1)
                lv1_pause(1);
index 560aefde06c0240a8c6071e3a92ff062337afc1d..25427a48feae3a2dcdee2182cd3bb6c5765066d9 100644 (file)
@@ -72,20 +72,20 @@ MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(CMM_DRIVER_VERSION);
 
-module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
+module_param_named(delay, delay, uint, 0644);
 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
                 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
-module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
+module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
 MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
                 "before loaning resumes. "
                 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
-module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
+module_param_named(oom_kb, oom_kb, uint, 0644);
 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
                 "[Default=" __stringify(CMM_OOM_KB) "]");
-module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
+module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
                 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
-module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug, cmm_debug, uint, 0644);
 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
                 "[Default=" __stringify(CMM_DEBUG) "]");
 
@@ -385,7 +385,7 @@ static int cmm_thread(void *dummy)
        {                                                       \
                return sprintf(buf, format, ##args);            \
        }                                                       \
-       static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+       static DEVICE_ATTR(name, 0444, show_##name, NULL)
 
 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
@@ -411,7 +411,7 @@ static ssize_t store_oom_pages(struct device *dev,
        return count;
 }
 
-static DEVICE_ATTR(oom_freed_kb, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(oom_freed_kb, 0644,
                   show_oom_pages, store_oom_pages);
 
 static struct device_attribute *cmm_attrs[] = {
@@ -765,7 +765,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp)
 }
 
 module_param_call(disable, cmm_set_disable, param_get_uint,
-                 &cmm_disabled, S_IRUGO | S_IWUSR);
+                 &cmm_disabled, 0644);
 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
                 "[Default=" __stringify(CMM_DISABLE) "]");
 
index 6b812ad990e48318803c0d1d5d6d02e569c2ee9c..823cb27efa8b88a76f3fdd0d495fb7b6f6f0b0c2 100644 (file)
@@ -55,6 +55,43 @@ static int ibm_get_config_addr_info;
 static int ibm_get_config_addr_info2;
 static int ibm_configure_pe;
 
+#ifdef CONFIG_PCI_IOV
+void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+       struct pci_dn *pdn = pci_get_pdn(pdev);
+       struct pci_dn *physfn_pdn;
+       struct eeh_dev *edev;
+
+       if (!pdev->is_virtfn)
+               return;
+
+       pdn->device_id  =  pdev->device;
+       pdn->vendor_id  =  pdev->vendor;
+       pdn->class_code =  pdev->class;
+       /*
+        * Last allow unfreeze return code used for retrieval
+        * by user space in eeh-sysfs to show the last command
+        * completion from platform.
+        */
+       pdn->last_allow_rc =  0;
+       physfn_pdn      =  pci_get_pdn(pdev->physfn);
+       pdn->pe_number  =  physfn_pdn->pe_num_map[pdn->vf_index];
+       edev = pdn_to_eeh_dev(pdn);
+
+       /*
+        * The following operations will fail if VF's sysfs files
+        * aren't created or its resources aren't finalized.
+        */
+       eeh_add_device_early(pdn);
+       eeh_add_device_late(pdev);
+       edev->pe_config_addr =  (pdn->busno << 16) | (pdn->devfn << 8);
+       eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
+       eeh_add_to_parent_pe(edev);   /* Add as VF PE type */
+       eeh_sysfs_add_device(pdev);
+
+}
+#endif
+
 /*
  * Buffer for reporting slot-error-detail rtas calls. Its here
  * in BSS, and not dynamically alloced, so that it ends up in
@@ -120,6 +157,11 @@ static int pseries_eeh_init(void)
        /* Set EEH probe mode */
        eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
 
+#ifdef CONFIG_PCI_IOV
+       /* Set EEH machine dependent code */
+       ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+#endif
+
        return 0;
 }
 
@@ -684,6 +726,121 @@ static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32
        return rtas_write_config(pdn, where, size, val);
 }
 
+static int pseries_eeh_restore_config(struct pci_dn *pdn)
+{
+       struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+       s64 ret = 0;
+
+       if (!edev)
+               return -EEXIST;
+
+       /*
+        * FIXME: The MPS, error routing rules, timeout setting are worthy
+        * to be exported by firmware in extendible way.
+        */
+       if (edev->physfn)
+               ret = eeh_restore_vf_config(pdn);
+
+       if (ret) {
+               pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
+                       __func__, edev->pe_config_addr, ret);
+               return -EIO;
+       }
+
+       return ret;
+}
+
+#ifdef CONFIG_PCI_IOV
+int pseries_send_allow_unfreeze(struct pci_dn *pdn,
+                               u16 *vf_pe_array, int cur_vfs)
+{
+       int rc;
+       int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze");
+       unsigned long buid, addr;
+
+       addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+       buid = pdn->phb->buid;
+       spin_lock(&rtas_data_buf_lock);
+       memcpy(rtas_data_buf, vf_pe_array, RTAS_DATA_BUF_SIZE);
+       rc = rtas_call(ibm_allow_unfreeze, 5, 1, NULL,
+                      addr,
+                      BUID_HI(buid),
+                      BUID_LO(buid),
+                      rtas_data_buf, cur_vfs * sizeof(u16));
+       spin_unlock(&rtas_data_buf_lock);
+       if (rc)
+               pr_warn("%s: Failed to allow unfreeze for PHB#%x-PE#%lx, rc=%x\n",
+                       __func__,
+                       pdn->phb->global_number, addr, rc);
+       return rc;
+}
+
+static int pseries_call_allow_unfreeze(struct eeh_dev *edev)
+{
+       struct pci_dn *pdn, *tmp, *parent, *physfn_pdn;
+       int cur_vfs = 0, rc = 0, vf_index, bus, devfn;
+       u16 *vf_pe_array;
+
+       vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+       if (!vf_pe_array)
+               return -ENOMEM;
+       if (pci_num_vf(edev->physfn ? edev->physfn : edev->pdev)) {
+               if (edev->pdev->is_physfn) {
+                       cur_vfs = pci_num_vf(edev->pdev);
+                       pdn = eeh_dev_to_pdn(edev);
+                       parent = pdn->parent;
+                       for (vf_index = 0; vf_index < cur_vfs; vf_index++)
+                               vf_pe_array[vf_index] =
+                                       cpu_to_be16(pdn->pe_num_map[vf_index]);
+                       rc = pseries_send_allow_unfreeze(pdn, vf_pe_array,
+                                                        cur_vfs);
+                       pdn->last_allow_rc = rc;
+                       for (vf_index = 0; vf_index < cur_vfs; vf_index++) {
+                               list_for_each_entry_safe(pdn, tmp,
+                                                        &parent->child_list,
+                                                        list) {
+                                       bus = pci_iov_virtfn_bus(edev->pdev,
+                                                                vf_index);
+                                       devfn = pci_iov_virtfn_devfn(edev->pdev,
+                                                                    vf_index);
+                                       if (pdn->busno != bus ||
+                                           pdn->devfn != devfn)
+                                               continue;
+                                       pdn->last_allow_rc = rc;
+                               }
+                       }
+               } else {
+                       pdn = pci_get_pdn(edev->pdev);
+                       vf_pe_array[0] = cpu_to_be16(pdn->pe_number);
+                       physfn_pdn = pci_get_pdn(edev->physfn);
+                       rc = pseries_send_allow_unfreeze(physfn_pdn,
+                                                        vf_pe_array, 1);
+                       pdn->last_allow_rc = rc;
+               }
+       }
+
+       kfree(vf_pe_array);
+       return rc;
+}
+
+static int pseries_notify_resume(struct pci_dn *pdn)
+{
+       struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+       if (!edev)
+               return -EEXIST;
+
+       if (rtas_token("ibm,open-sriov-allow-unfreeze")
+           == RTAS_UNKNOWN_SERVICE)
+               return -EINVAL;
+
+       if (edev->pdev->is_physfn || edev->pdev->is_virtfn)
+               return pseries_call_allow_unfreeze(edev);
+
+       return 0;
+}
+#endif
+
 static struct eeh_ops pseries_eeh_ops = {
        .name                   = "pseries",
        .init                   = pseries_eeh_init,
@@ -699,7 +856,10 @@ static struct eeh_ops pseries_eeh_ops = {
        .read_config            = pseries_eeh_read_config,
        .write_config           = pseries_eeh_write_config,
        .next_error             = NULL,
-       .restore_config         = NULL
+       .restore_config         = pseries_eeh_restore_config,
+#ifdef CONFIG_PCI_IOV
+       .notify_resume          = pseries_notify_resume
+#endif
 };
 
 /**
index 63cc82ad58ac2ed766bd374b3f0ef754c52b5160..a3bbeb43689e6e95d76c881b83d704e68de06979 100644 (file)
@@ -114,6 +114,8 @@ static __initdata struct vec5_fw_feature
 vec5_fw_features_table[] = {
        {FW_FEATURE_TYPE1_AFFINITY,     OV5_TYPE1_AFFINITY},
        {FW_FEATURE_PRRN,               OV5_PRRN},
+       {FW_FEATURE_DRMEM_V2,           OV5_DRMEM_V2},
+       {FW_FEATURE_DRC_INFO,           OV5_DRC_INFO},
 };
 
 static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
index a7d14aa7bb7c5c2d67fdd0e522d271f71f05a738..dceb51454d8d212a5cbc78ef891322bddf499800 100644 (file)
@@ -340,6 +340,8 @@ static void pseries_remove_processor(struct device_node *np)
        cpu_maps_update_done();
 }
 
+extern int find_and_online_cpu_nid(int cpu);
+
 static int dlpar_online_cpu(struct device_node *dn)
 {
        int rc = 0;
@@ -364,6 +366,7 @@ static int dlpar_online_cpu(struct device_node *dn)
                                        != CPU_STATE_OFFLINE);
                        cpu_maps_update_done();
                        timed_topology_update(1);
+                       find_and_online_cpu_nid(cpu);
                        rc = device_online(get_cpu_device(cpu));
                        if (rc)
                                goto out;
index 1d48ab424bd9029a1da7ec310a76bd119c582961..c1578f54c62610df5d09f93e170259131044b7ce 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/prom.h>
 #include <asm/sparsemem.h>
 #include <asm/fadump.h>
+#include <asm/drmem.h>
 #include "pseries.h"
 
 static bool rtas_hp_event;
@@ -100,100 +101,6 @@ static struct property *dlpar_clone_property(struct property *prop,
        return new_prop;
 }
 
-static struct property *dlpar_clone_drconf_property(struct device_node *dn)
-{
-       struct property *prop, *new_prop;
-       struct of_drconf_cell *lmbs;
-       u32 num_lmbs, *p;
-       int i;
-
-       prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
-       if (!prop)
-               return NULL;
-
-       new_prop = dlpar_clone_property(prop, prop->length);
-       if (!new_prop)
-               return NULL;
-
-       /* Convert the property to cpu endian-ness */
-       p = new_prop->value;
-       *p = be32_to_cpu(*p);
-
-       num_lmbs = *p++;
-       lmbs = (struct of_drconf_cell *)p;
-
-       for (i = 0; i < num_lmbs; i++) {
-               lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
-               lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
-               lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index);
-               lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
-       }
-
-       return new_prop;
-}
-
-static void dlpar_update_drconf_property(struct device_node *dn,
-                                        struct property *prop)
-{
-       struct of_drconf_cell *lmbs;
-       u32 num_lmbs, *p;
-       int i;
-
-       /* Convert the property back to BE */
-       p = prop->value;
-       num_lmbs = *p;
-       *p = cpu_to_be32(*p);
-       p++;
-
-       lmbs = (struct of_drconf_cell *)p;
-       for (i = 0; i < num_lmbs; i++) {
-               lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
-               lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
-               lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index);
-               lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
-       }
-
-       rtas_hp_event = true;
-       of_update_property(dn, prop);
-       rtas_hp_event = false;
-}
-
-static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb)
-{
-       struct device_node *dn;
-       struct property *prop;
-       struct of_drconf_cell *lmbs;
-       u32 *p, num_lmbs;
-       int i;
-
-       dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-       if (!dn)
-               return -ENODEV;
-
-       prop = dlpar_clone_drconf_property(dn);
-       if (!prop) {
-               of_node_put(dn);
-               return -ENODEV;
-       }
-
-       p = prop->value;
-       num_lmbs = *p++;
-       lmbs = (struct of_drconf_cell *)p;
-
-       for (i = 0; i < num_lmbs; i++) {
-               if (lmbs[i].drc_index == lmb->drc_index) {
-                       lmbs[i].flags = lmb->flags;
-                       lmbs[i].aa_index = lmb->aa_index;
-
-                       dlpar_update_drconf_property(dn, prop);
-                       break;
-               }
-       }
-
-       of_node_put(dn);
-       return 0;
-}
-
 static u32 find_aa_index(struct device_node *dr_node,
                         struct property *ala_prop, const u32 *lmb_assoc)
 {
@@ -256,7 +163,7 @@ static u32 find_aa_index(struct device_node *dr_node,
        return aa_index;
 }
 
-static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb)
+static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
 {
        struct device_node *parent, *lmb_node, *dr_node;
        struct property *ala_prop;
@@ -299,9 +206,9 @@ static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb)
        return aa_index;
 }
 
-static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb)
+static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
 {
-       int aa_index;
+       int rc, aa_index;
 
        lmb->flags |= DRCONF_MEM_ASSIGNED;
 
@@ -313,17 +220,29 @@ static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb)
        }
 
        lmb->aa_index = aa_index;
-       return dlpar_update_device_tree_lmb(lmb);
+
+       rtas_hp_event = true;
+       rc = drmem_update_dt();
+       rtas_hp_event = false;
+
+       return rc;
 }
 
-static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
+static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb)
 {
+       int rc;
+
        lmb->flags &= ~DRCONF_MEM_ASSIGNED;
        lmb->aa_index = 0xffffffff;
-       return dlpar_update_device_tree_lmb(lmb);
+
+       rtas_hp_event = true;
+       rc = drmem_update_dt();
+       rtas_hp_event = false;
+
+       return rc;
 }
 
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
 {
        unsigned long section_nr;
        struct mem_section *mem_sect;
@@ -336,7 +255,36 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
        return mem_block;
 }
 
-static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
+static int get_lmb_range(u32 drc_index, int n_lmbs,
+                        struct drmem_lmb **start_lmb,
+                        struct drmem_lmb **end_lmb)
+{
+       struct drmem_lmb *lmb, *start, *end;
+       struct drmem_lmb *last_lmb;
+
+       start = NULL;
+       for_each_drmem_lmb(lmb) {
+               if (lmb->drc_index == drc_index) {
+                       start = lmb;
+                       break;
+               }
+       }
+
+       if (!start)
+               return -EINVAL;
+
+       end = &start[n_lmbs - 1];
+
+       last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
+       if (end > last_lmb)
+               return -EINVAL;
+
+       *start_lmb = start;
+       *end_lmb = end;
+       return 0;
+}
+
+static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online)
 {
        struct memory_block *mem_block;
        int rc;
@@ -357,13 +305,13 @@ static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
        return rc;
 }
 
-static int dlpar_online_lmb(struct of_drconf_cell *lmb)
+static int dlpar_online_lmb(struct drmem_lmb *lmb)
 {
        return dlpar_change_lmb_state(lmb, true);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-static int dlpar_offline_lmb(struct of_drconf_cell *lmb)
+static int dlpar_offline_lmb(struct drmem_lmb *lmb)
 {
        return dlpar_change_lmb_state(lmb, false);
 }
@@ -426,7 +374,7 @@ static int pseries_remove_mem_node(struct device_node *np)
        return 0;
 }
 
-static bool lmb_is_removable(struct of_drconf_cell *lmb)
+static bool lmb_is_removable(struct drmem_lmb *lmb)
 {
        int i, scns_per_block;
        int rc = 1;
@@ -458,9 +406,9 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
        return rc ? true : false;
 }
 
-static int dlpar_add_lmb(struct of_drconf_cell *);
+static int dlpar_add_lmb(struct drmem_lmb *);
 
-static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
        unsigned long block_sz;
        int nid, rc;
@@ -484,28 +432,25 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
        return 0;
 }
 
-static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
-                                       struct property *prop)
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
 {
-       struct of_drconf_cell *lmbs;
+       struct drmem_lmb *lmb;
        int lmbs_removed = 0;
        int lmbs_available = 0;
-       u32 num_lmbs, *p;
-       int i, rc;
+       int rc;
 
        pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
 
        if (lmbs_to_remove == 0)
                return -EINVAL;
 
-       p = prop->value;
-       num_lmbs = *p++;
-       lmbs = (struct of_drconf_cell *)p;
-
        /* Validate that there are enough LMBs to satisfy the request */
-       for (i = 0; i < num_lmbs; i++) {
-               if (lmb_is_removable(&lmbs[i]))
+       for_each_drmem_lmb(lmb) {
+               if (lmb_is_removable(lmb))
                        lmbs_available++;
+
+               if (lmbs_available == lmbs_to_remove)
+                       break;
        }
 
        if (lmbs_available < lmbs_to_remove) {
@@ -514,45 +459,47 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
                return -EINVAL;
        }
 
-       for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) {
-               rc = dlpar_remove_lmb(&lmbs[i]);
+       for_each_drmem_lmb(lmb) {
+               rc = dlpar_remove_lmb(lmb);
                if (rc)
                        continue;
 
-               lmbs_removed++;
-
                /* Mark this lmb so we can add it later if all of the
                 * requested LMBs cannot be removed.
                 */
-               lmbs[i].reserved = 1;
+               drmem_mark_lmb_reserved(lmb);
+
+               lmbs_removed++;
+               if (lmbs_removed == lmbs_to_remove)
+                       break;
        }
 
        if (lmbs_removed != lmbs_to_remove) {
                pr_err("Memory hot-remove failed, adding LMB's back\n");
 
-               for (i = 0; i < num_lmbs; i++) {
-                       if (!lmbs[i].reserved)
+               for_each_drmem_lmb(lmb) {
+                       if (!drmem_lmb_reserved(lmb))
                                continue;
 
-                       rc = dlpar_add_lmb(&lmbs[i]);
+                       rc = dlpar_add_lmb(lmb);
                        if (rc)
                                pr_err("Failed to add LMB back, drc index %x\n",
-                                      lmbs[i].drc_index);
+                                      lmb->drc_index);
 
-                       lmbs[i].reserved = 0;
+                       drmem_remove_lmb_reservation(lmb);
                }
 
                rc = -EINVAL;
        } else {
-               for (i = 0; i < num_lmbs; i++) {
-                       if (!lmbs[i].reserved)
+               for_each_drmem_lmb(lmb) {
+                       if (!drmem_lmb_reserved(lmb))
                                continue;
 
-                       dlpar_release_drc(lmbs[i].drc_index);
+                       dlpar_release_drc(lmb->drc_index);
                        pr_info("Memory at %llx was hot-removed\n",
-                               lmbs[i].base_addr);
+                               lmb->base_addr);
 
-                       lmbs[i].reserved = 0;
+                       drmem_remove_lmb_reservation(lmb);
                }
                rc = 0;
        }
@@ -560,26 +507,21 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
        return rc;
 }
 
-static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_remove_by_index(u32 drc_index)
 {
-       struct of_drconf_cell *lmbs;
-       u32 num_lmbs, *p;
+       struct drmem_lmb *lmb;
        int lmb_found;
-       int i, rc;
+       int rc;
 
        pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index);
 
-       p = prop->value;
-       num_lmbs = *p++;
-       lmbs = (struct of_drconf_cell *)p;
-
        lmb_found = 0;
-       for (i = 0; i < num_lmbs; i++) {
-               if (lmbs[i].drc_index == drc_index) {
+       for_each_drmem_lmb(lmb) {
+               if (lmb->drc_index == drc_index) {
                        lmb_found = 1;
-                       rc = dlpar_remove_lmb(&lmbs[i]);
+                       rc = dlpar_remove_lmb(lmb);
                        if (!rc)
-                               dlpar_release_drc(lmbs[i].drc_index);
+                               dlpar_release_drc(lmb->drc_index);
 
                        break;
                }
@@ -590,35 +532,30 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
 
        if (rc)
                pr_info("Failed to hot-remove memory at %llx\n",
-                       lmbs[i].base_addr);
+                       lmb->base_addr);
        else
-               pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr);
+               pr_info("Memory at %llx was hot-removed\n", lmb->base_addr);
 
        return rc;
 }
 
-static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_readd_by_index(u32 drc_index)
 {
-       struct of_drconf_cell *lmbs;
-       u32 num_lmbs, *p;
+       struct drmem_lmb *lmb;
        int lmb_found;
-       int i, rc;
+       int rc;
 
        pr_info("Attempting to update LMB, drc index %x\n", drc_index);
 
-       p = prop->value;
-       num_lmbs = *p++;
-       lmbs = (struct of_drconf_cell *)p;
-
        lmb_found = 0;
-       for (i = 0; i < num_lmbs; i++) {
-               if (lmbs[i].drc_index == drc_index) {
+       for_each_drmem_lmb(lmb) {
+               if (lmb->drc_index == drc_index) {
                        lmb_found = 1;
-                       rc = dlpar_remove_lmb(&lmbs[i]);
+                       rc = dlpar_remove_lmb(lmb);
                        if (!rc) {
-                               rc = dlpar_add_lmb(&lmbs[i]);
+                               rc = dlpar_add_lmb(lmb);
                                if (rc)
-                                       dlpar_release_drc(lmbs[i].drc_index);
+                                       dlpar_release_drc(lmb->drc_index);
                        }
                        break;
                }
@@ -629,20 +566,18 @@ static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
 
        if (rc)
                pr_info("Failed to update memory at %llx\n",
-                       lmbs[i].base_addr);
+                       lmb->base_addr);
        else
-               pr_info("Memory at %llx was updated\n", lmbs[i].base_addr);
+               pr_info("Memory at %llx was updated\n", lmb->base_addr);
 
        return rc;
 }
 
-static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
-                                    struct property *prop)
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 {
-       struct of_drconf_cell *lmbs;
-       u32 num_lmbs, *p;
-       int i, rc, start_lmb_found;
-       int lmbs_available = 0, start_index = 0, end_index;
+       struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+       int lmbs_available = 0;
+       int rc;
 
        pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
                lmbs_to_remove, drc_index);
@@ -650,29 +585,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
        if (lmbs_to_remove == 0)
                return -EINVAL;
 
-       p = prop->value;
-       num_lmbs = *p++;
-       lmbs = (struct of_drconf_cell *)p;
-       start_lmb_found = 0;
-
-       /* Navigate to drc_index */
-       while (start_index < num_lmbs) {
-               if (lmbs[start_index].drc_index == drc_index) {
-                       start_lmb_found = 1;
-                       break;
-               }
-
-               start_index++;
-       }
-
-       if (!start_lmb_found)
+       rc = get_lmb_range(drc_index, lmbs_to_remove, &start_lmb, &end_lmb);
+       if (rc)
                return -EINVAL;
 
-       end_index = start_index + lmbs_to_remove;
-
        /* Validate that there are enough LMBs to satisfy the request */
-       for (i = start_index; i < end_index; i++) {
-               if (lmbs[i].flags & DRCONF_MEM_RESERVED)
+       for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+               if (lmb->flags & DRCONF_MEM_RESERVED)
                        break;
 
                lmbs_available++;
@@ -681,42 +600,43 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
        if (lmbs_available < lmbs_to_remove)
                return -EINVAL;
 
-       for (i = start_index; i < end_index; i++) {
-               if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+       for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+               if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
                        continue;
 
-               rc = dlpar_remove_lmb(&lmbs[i]);
+               rc = dlpar_remove_lmb(lmb);
                if (rc)
                        break;
 
-               lmbs[i].reserved = 1;
+               drmem_mark_lmb_reserved(lmb);
        }
 
        if (rc) {
                pr_err("Memory indexed-count-remove failed, adding any removed LMBs\n");
 
-               for (i = start_index; i < end_index; i++) {
-                       if (!lmbs[i].reserved)
+
+               for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+                       if (!drmem_lmb_reserved(lmb))
                                continue;
 
-                       rc = dlpar_add_lmb(&lmbs[i]);
+                       rc = dlpar_add_lmb(lmb);
                        if (rc)
                                pr_err("Failed to add LMB, drc index %x\n",
-                                      be32_to_cpu(lmbs[i].drc_index));
+                                      lmb->drc_index);
 
-                       lmbs[i].reserved = 0;
+                       drmem_remove_lmb_reservation(lmb);
                }
                rc = -EINVAL;
        } else {
-               for (i = start_index; i < end_index; i++) {
-                       if (!lmbs[i].reserved)
+               for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+                       if (!drmem_lmb_reserved(lmb))
                                continue;
 
-                       dlpar_release_drc(lmbs[i].drc_index);
+                       dlpar_release_drc(lmb->drc_index);
                        pr_info("Memory at %llx (drc index %x) was hot-removed\n",
-                               lmbs[i].base_addr, lmbs[i].drc_index);
+                               lmb->base_addr, lmb->drc_index);
 
-                       lmbs[i].reserved = 0;
+                       drmem_remove_lmb_reservation(lmb);
                }
        }
 
@@ -737,32 +657,30 @@ static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
 {
        return -EOPNOTSUPP;
 }
-static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
        return -EOPNOTSUPP;
 }
-static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
-                                       struct property *prop)
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
 {
        return -EOPNOTSUPP;
 }
-static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_remove_by_index(u32 drc_index)
 {
        return -EOPNOTSUPP;
 }
-static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_readd_by_index(u32 drc_index)
 {
        return -EOPNOTSUPP;
 }
 
-static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
-                                    struct property *prop)
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 {
        return -EOPNOTSUPP;
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
-static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+static int dlpar_add_lmb(struct drmem_lmb *lmb)
 {
        unsigned long block_sz;
        int nid, rc;
@@ -801,77 +719,79 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
        return rc;
 }
 
-static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
+static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 {
-       struct of_drconf_cell *lmbs;
-       u32 num_lmbs, *p;
+       struct drmem_lmb *lmb;
        int lmbs_available = 0;
        int lmbs_added = 0;
-       int i, rc;
+       int rc;
 
        pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
 
        if (lmbs_to_add == 0)
                return -EINVAL;
 
-       p = prop->value;
-       num_lmbs = *p++;
-       lmbs = (struct of_drconf_cell *)p;
-
        /* Validate that there are enough LMBs to satisfy the request */
-       for (i = 0; i < num_lmbs; i++) {
-               if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+       for_each_drmem_lmb(lmb) {
+               if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
                        lmbs_available++;
+
+               if (lmbs_available == lmbs_to_add)
+                       break;
        }
 
        if (lmbs_available < lmbs_to_add)
                return -EINVAL;
 
-       for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
-               if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+       for_each_drmem_lmb(lmb) {
+               if (lmb->flags & DRCONF_MEM_ASSIGNED)
                        continue;
 
-               rc = dlpar_acquire_drc(lmbs[i].drc_index);
+               rc = dlpar_acquire_drc(lmb->drc_index);
                if (rc)
                        continue;
 
-               rc = dlpar_add_lmb(&lmbs[i]);
+               rc = dlpar_add_lmb(lmb);
                if (rc) {
-                       dlpar_release_drc(lmbs[i].drc_index);
+                       dlpar_release_drc(lmb->drc_index);
                        continue;
                }
 
-               lmbs_added++;
-
                /* Mark this lmb so we can remove it later if all of the
                 * requested LMBs cannot be added.
                 */
-               lmbs[i].reserved = 1;
+               drmem_mark_lmb_reserved(lmb);
+
+               lmbs_added++;
+               if (lmbs_added == lmbs_to_add)
+                       break;
        }
 
        if (lmbs_added != lmbs_to_add) {
                pr_err("Memory hot-add failed, removing any added LMBs\n");
 
-               for (i = 0; i < num_lmbs; i++) {
-                       if (!lmbs[i].reserved)
+               for_each_drmem_lmb(lmb) {
+                       if (!drmem_lmb_reserved(lmb))
                                continue;
 
-                       rc = dlpar_remove_lmb(&lmbs[i]);
+                       rc = dlpar_remove_lmb(lmb);
                        if (rc)
                                pr_err("Failed to remove LMB, drc index %x\n",
-                                      be32_to_cpu(lmbs[i].drc_index));
+                                      lmb->drc_index);
                        else
-                               dlpar_release_drc(lmbs[i].drc_index);
+                               dlpar_release_drc(lmb->drc_index);
+
+                       drmem_remove_lmb_reservation(lmb);
                }
                rc = -EINVAL;
        } else {
-               for (i = 0; i < num_lmbs; i++) {
-                       if (!lmbs[i].reserved)
+               for_each_drmem_lmb(lmb) {
+                       if (!drmem_lmb_reserved(lmb))
                                continue;
 
                        pr_info("Memory at %llx (drc index %x) was hot-added\n",
-                               lmbs[i].base_addr, lmbs[i].drc_index);
-                       lmbs[i].reserved = 0;
+                               lmb->base_addr, lmb->drc_index);
+                       drmem_remove_lmb_reservation(lmb);
                }
                rc = 0;
        }
@@ -879,28 +799,22 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
        return rc;
 }
 
-static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_add_by_index(u32 drc_index)
 {
-       struct of_drconf_cell *lmbs;
-       u32 num_lmbs, *p;
-       int i, lmb_found;
-       int rc;
+       struct drmem_lmb *lmb;
+       int rc, lmb_found;
 
        pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
 
-       p = prop->value;
-       num_lmbs = *p++;
-       lmbs = (struct of_drconf_cell *)p;
-
        lmb_found = 0;
-       for (i = 0; i < num_lmbs; i++) {
-               if (lmbs[i].drc_index == drc_index) {
+       for_each_drmem_lmb(lmb) {
+               if (lmb->drc_index == drc_index) {
                        lmb_found = 1;
-                       rc = dlpar_acquire_drc(lmbs[i].drc_index);
+                       rc = dlpar_acquire_drc(lmb->drc_index);
                        if (!rc) {
-                               rc = dlpar_add_lmb(&lmbs[i]);
+                               rc = dlpar_add_lmb(lmb);
                                if (rc)
-                                       dlpar_release_drc(lmbs[i].drc_index);
+                                       dlpar_release_drc(lmb->drc_index);
                        }
 
                        break;
@@ -914,18 +828,16 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
                pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
        else
                pr_info("Memory at %llx (drc index %x) was hot-added\n",
-                       lmbs[i].base_addr, drc_index);
+                       lmb->base_addr, drc_index);
 
        return rc;
 }
 
-static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
-                                 struct property *prop)
+static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
 {
-       struct of_drconf_cell *lmbs;
-       u32 num_lmbs, *p;
-       int i, rc, start_lmb_found;
-       int lmbs_available = 0, start_index = 0, end_index;
+       struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+       int lmbs_available = 0;
+       int rc;
 
        pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
                lmbs_to_add, drc_index);
@@ -933,29 +845,13 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
        if (lmbs_to_add == 0)
                return -EINVAL;
 
-       p = prop->value;
-       num_lmbs = *p++;
-       lmbs = (struct of_drconf_cell *)p;
-       start_lmb_found = 0;
-
-       /* Navigate to drc_index */
-       while (start_index < num_lmbs) {
-               if (lmbs[start_index].drc_index == drc_index) {
-                       start_lmb_found = 1;
-                       break;
-               }
-
-               start_index++;
-       }
-
-       if (!start_lmb_found)
+       rc = get_lmb_range(drc_index, lmbs_to_add, &start_lmb, &end_lmb);
+       if (rc)
                return -EINVAL;
 
-       end_index = start_index + lmbs_to_add;
-
        /* Validate that the LMBs in this range are not reserved */
-       for (i = start_index; i < end_index; i++) {
-               if (lmbs[i].flags & DRCONF_MEM_RESERVED)
+       for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+               if (lmb->flags & DRCONF_MEM_RESERVED)
                        break;
 
                lmbs_available++;
@@ -964,46 +860,48 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
        if (lmbs_available < lmbs_to_add)
                return -EINVAL;
 
-       for (i = start_index; i < end_index; i++) {
-               if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+       for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+               if (lmb->flags & DRCONF_MEM_ASSIGNED)
                        continue;
 
-               rc = dlpar_acquire_drc(lmbs[i].drc_index);
+               rc = dlpar_acquire_drc(lmb->drc_index);
                if (rc)
                        break;
 
-               rc = dlpar_add_lmb(&lmbs[i]);
+               rc = dlpar_add_lmb(lmb);
                if (rc) {
-                       dlpar_release_drc(lmbs[i].drc_index);
+                       dlpar_release_drc(lmb->drc_index);
                        break;
                }
 
-               lmbs[i].reserved = 1;
+               drmem_mark_lmb_reserved(lmb);
        }
 
        if (rc) {
                pr_err("Memory indexed-count-add failed, removing any added LMBs\n");
 
-               for (i = start_index; i < end_index; i++) {
-                       if (!lmbs[i].reserved)
+               for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+                       if (!drmem_lmb_reserved(lmb))
                                continue;
 
-                       rc = dlpar_remove_lmb(&lmbs[i]);
+                       rc = dlpar_remove_lmb(lmb);
                        if (rc)
                                pr_err("Failed to remove LMB, drc index %x\n",
-                                      be32_to_cpu(lmbs[i].drc_index));
+                                      lmb->drc_index);
                        else
-                               dlpar_release_drc(lmbs[i].drc_index);
+                               dlpar_release_drc(lmb->drc_index);
+
+                       drmem_remove_lmb_reservation(lmb);
                }
                rc = -EINVAL;
        } else {
-               for (i = start_index; i < end_index; i++) {
-                       if (!lmbs[i].reserved)
+               for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+                       if (!drmem_lmb_reserved(lmb))
                                continue;
 
                        pr_info("Memory at %llx (drc index %x) was hot-added\n",
-                               lmbs[i].base_addr, lmbs[i].drc_index);
-                       lmbs[i].reserved = 0;
+                               lmb->base_addr, lmb->drc_index);
+                       drmem_remove_lmb_reservation(lmb);
                }
        }
 
@@ -1012,37 +910,23 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
 
 int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
 {
-       struct device_node *dn;
-       struct property *prop;
        u32 count, drc_index;
        int rc;
 
        lock_device_hotplug();
 
-       dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-       if (!dn) {
-               rc = -EINVAL;
-               goto dlpar_memory_out;
-       }
-
-       prop = dlpar_clone_drconf_property(dn);
-       if (!prop) {
-               rc = -EINVAL;
-               goto dlpar_memory_out;
-       }
-
        switch (hp_elog->action) {
        case PSERIES_HP_ELOG_ACTION_ADD:
                if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
                        count = hp_elog->_drc_u.drc_count;
-                       rc = dlpar_memory_add_by_count(count, prop);
+                       rc = dlpar_memory_add_by_count(count);
                } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
                        drc_index = hp_elog->_drc_u.drc_index;
-                       rc = dlpar_memory_add_by_index(drc_index, prop);
+                       rc = dlpar_memory_add_by_index(drc_index);
                } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
                        count = hp_elog->_drc_u.ic.count;
                        drc_index = hp_elog->_drc_u.ic.index;
-                       rc = dlpar_memory_add_by_ic(count, drc_index, prop);
+                       rc = dlpar_memory_add_by_ic(count, drc_index);
                } else {
                        rc = -EINVAL;
                }
@@ -1051,14 +935,14 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
        case PSERIES_HP_ELOG_ACTION_REMOVE:
                if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
                        count = hp_elog->_drc_u.drc_count;
-                       rc = dlpar_memory_remove_by_count(count, prop);
+                       rc = dlpar_memory_remove_by_count(count);
                } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
                        drc_index = hp_elog->_drc_u.drc_index;
-                       rc = dlpar_memory_remove_by_index(drc_index, prop);
+                       rc = dlpar_memory_remove_by_index(drc_index);
                } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
                        count = hp_elog->_drc_u.ic.count;
                        drc_index = hp_elog->_drc_u.ic.index;
-                       rc = dlpar_memory_remove_by_ic(count, drc_index, prop);
+                       rc = dlpar_memory_remove_by_ic(count, drc_index);
                } else {
                        rc = -EINVAL;
                }
@@ -1066,7 +950,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
                break;
        case PSERIES_HP_ELOG_ACTION_READD:
                drc_index = hp_elog->_drc_u.drc_index;
-               rc = dlpar_memory_readd_by_index(drc_index, prop);
+               rc = dlpar_memory_readd_by_index(drc_index);
                break;
        default:
                pr_err("Invalid action (%d) specified\n", hp_elog->action);
@@ -1074,10 +958,6 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
                break;
        }
 
-       dlpar_free_property(prop);
-
-dlpar_memory_out:
-       of_node_put(dn);
        unlock_device_hotplug();
        return rc;
 }
@@ -1116,7 +996,7 @@ static int pseries_add_mem_node(struct device_node *np)
 
 static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
 {
-       struct of_drconf_cell *new_drmem, *old_drmem;
+       struct of_drconf_cell_v1 *new_drmem, *old_drmem;
        unsigned long memblock_size;
        u32 entries;
        __be32 *p;
@@ -1139,11 +1019,11 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
         * of_drconf_cell's.
         */
        entries = be32_to_cpu(*p++);
-       old_drmem = (struct of_drconf_cell *)p;
+       old_drmem = (struct of_drconf_cell_v1 *)p;
 
        p = (__be32 *)pr->prop->value;
        p++;
-       new_drmem = (struct of_drconf_cell *)p;
+       new_drmem = (struct of_drconf_cell_v1 *)p;
 
        for (i = 0; i < entries; i++) {
                if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) &&
index 957ae347b0b31e6e46d82e0a904017626a4b1b7f..89b7ce807e70c950aa17e5f3de87f75aa36e6feb 100644 (file)
@@ -163,7 +163,7 @@ static int __init hcall_inst_init(void)
 
        for_each_possible_cpu(cpu) {
                snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
-               hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+               hcall_file = debugfs_create_file(cpu_name_buf, 0444,
                                                 hcall_root,
                                                 per_cpu(hcall_stats, cpu),
                                                 &hcall_inst_seq_fops);
index 408a8604413300bd0085158f75247752318db8a6..c7c1140c13b6cb8707c4204c9266e2bd217022d3 100644 (file)
@@ -298,7 +298,7 @@ out:
                return rc;
        return count;
 }
-static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe);
+static BUS_ATTR(probe, 0200, NULL, ibmebus_store_probe);
 
 static ssize_t ibmebus_store_remove(struct bus_type *bus,
                                    const char *buf, size_t count)
@@ -325,7 +325,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus,
                return -ENODEV;
        }
 }
-static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove);
+static BUS_ATTR(remove, 0200, NULL, ibmebus_store_remove);
 
 static struct attribute *ibmbus_bus_attrs[] = {
        &bus_attr_probe.attr,
index eaa11334fc8c3cd435c7e55f3a3977b66576b0e8..06f02960b4392386375b5b4d87eb604cf7465438 100644 (file)
@@ -816,15 +816,15 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
        ret = tce_clearrange_multi_pSeriesLP(0,
                1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
        if (ret)
-               pr_warning("%pOF failed to clear tces in window.\n",
-                        np);
+               pr_warn("%pOF failed to clear tces in window.\n",
+                       np);
        else
                pr_debug("%pOF successfully cleared tces in window.\n",
                         np);
 
        ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
        if (ret)
-               pr_warning("%pOF: failed to remove direct window: rtas returned "
+               pr_warn("%pOF: failed to remove direct window: rtas returned "
                        "%d to ibm,remove-pe-dma-window(%x) %llx\n",
                        np, ret, ddw_avail[2], liobn);
        else
@@ -836,7 +836,7 @@ delprop:
        if (remove_prop)
                ret = of_remove_property(np, win64);
        if (ret)
-               pr_warning("%pOF: failed to remove direct window property: %d\n",
+               pr_warn("%pOF: failed to remove direct window property: %d\n",
                        np, ret);
 }
 
index b2706c4830672ff51f5bb97e22c2adc720b538bb..c508c938dc71e0bc0d76d0764ffe81ddcd5f8b8f 100644 (file)
@@ -370,10 +370,10 @@ static void parse_system_parameter_string(struct seq_file *m)
  */
 static int lparcfg_count_active_processors(void)
 {
-       struct device_node *cpus_dn = NULL;
+       struct device_node *cpus_dn;
        int count = 0;
 
-       while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+       for_each_node_by_type(cpus_dn, "cpu") {
 #ifdef LPARCFG_DEBUG
                printk(KERN_ERR "cpus_dn %p\n", cpus_dn);
 #endif
@@ -697,11 +697,11 @@ static const struct file_operations lparcfg_fops = {
 
 static int __init lparcfg_init(void)
 {
-       umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+       umode_t mode = 0444;
 
        /* Allow writing if we have FW_FEATURE_SPLPAR */
        if (firmware_has_feature(FW_FEATURE_SPLPAR))
-               mode |= S_IWUSR;
+               mode |= 0200;
 
        if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) {
                printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
index f7042ad492bafba5ac21e3db9ce24977fd527169..0f7fb7170b03ce5646aa13b9b9b55f5965f09aae 100644 (file)
@@ -384,7 +384,7 @@ static ssize_t migration_store(struct class *class,
 #define MIGRATION_API_VERSION  1
 
 static CLASS_ATTR_WO(migration);
-static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION));
+static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION));
 
 static int __init mobility_sysfs_init(void)
 {
index 7e75101fa52229bbb327ebeee2b0caa410a1f60e..6df192f38f8005c5edde2d72fa68eae91078c5ba 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <asm/prom.h>
 
 #include "of_helpers.h"
 
@@ -37,3 +38,62 @@ struct device_node *pseries_of_derive_parent(const char *path)
                kfree(parent_path);
        return parent ? parent : ERR_PTR(-EINVAL);
 }
+
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
+                       struct of_drc_info *data)
+{
+       const char *p;
+       const __be32 *p2;
+
+       if (!data)
+               return -EINVAL;
+
+       /* Get drc-type:encode-string */
+       p = data->drc_type = (char*) (*curval);
+       p = of_prop_next_string(*prop, p);
+       if (!p)
+               return -EINVAL;
+
+       /* Get drc-name-prefix:encode-string */
+       data->drc_name_prefix = (char *)p;
+       p = of_prop_next_string(*prop, p);
+       if (!p)
+               return -EINVAL;
+
+       /* Get drc-index-start:encode-int */
+       p2 = (const __be32 *)p;
+       p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start);
+       if (!p2)
+               return -EINVAL;
+
+       /* Get drc-name-suffix-start:encode-int */
+       p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
+       if (!p2)
+               return -EINVAL;
+
+       /* Get number-sequential-elements:encode-int */
+       p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems);
+       if (!p2)
+               return -EINVAL;
+
+       /* Get sequential-increment:encode-int */
+       p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc);
+       if (!p2)
+               return -EINVAL;
+
+       /* Get drc-power-domain:encode-int */
+       p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain);
+       if (!p2)
+               return -EINVAL;
+
+       /* Should now know end of current entry */
+       (*curval) = (void *)p2;
+       data->last_drc_index = data->drc_index_start +
+               ((data->num_sequential_elems - 1) * data->sequential_inc);
+
+       return 0;
+}
+EXPORT_SYMBOL(of_read_drc_info_cell);
index 09eba5a9929afc6818dac60bac41079c89f6330b..eab96637d6cf30d068e67a1664ef9b7d0b7a254b 100644 (file)
@@ -3,17 +3,17 @@
  * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
  *
  * pSeries specific routines for PCI.
- * 
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
- *    
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
@@ -54,10 +54,174 @@ void pcibios_name_device(struct pci_dev *dev)
                        }
                }
        }
-}   
+}
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
 #endif
 
+#ifdef CONFIG_PCI_IOV
+#define MAX_VFS_FOR_MAP_PE 256
+struct pe_map_bar_entry {
+       __be64     bar;       /* Input:  Virtual Function BAR */
+       __be16     rid;       /* Input:  Virtual Function Router ID */
+       __be16     pe_num;    /* Output: Virtual Function PE Number */
+       __be32     reserved;  /* Reserved Space */
+};
+
+int pseries_send_map_pe(struct pci_dev *pdev,
+                       u16 num_vfs,
+                       struct pe_map_bar_entry *vf_pe_array)
+{
+       struct pci_dn *pdn;
+       int rc;
+       unsigned long buid, addr;
+       int ibm_map_pes = rtas_token("ibm,open-sriov-map-pe-number");
+
+       if (ibm_map_pes == RTAS_UNKNOWN_SERVICE)
+               return -EINVAL;
+
+       pdn = pci_get_pdn(pdev);
+       addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+       buid = pdn->phb->buid;
+       spin_lock(&rtas_data_buf_lock);
+       memcpy(rtas_data_buf, vf_pe_array,
+              RTAS_DATA_BUF_SIZE);
+       rc = rtas_call(ibm_map_pes, 5, 1, NULL, addr,
+                      BUID_HI(buid), BUID_LO(buid),
+                      rtas_data_buf,
+                      num_vfs * sizeof(struct pe_map_bar_entry));
+       memcpy(vf_pe_array, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+       spin_unlock(&rtas_data_buf_lock);
+
+       if (rc)
+               dev_err(&pdev->dev,
+                       "%s: Failed to associate pes PE#%lx, rc=%x\n",
+                       __func__,  addr, rc);
+
+       return rc;
+}
+
+void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num)
+{
+       struct pci_dn *pdn;
+
+       pdn = pci_get_pdn(pdev);
+       pdn->pe_num_map[vf_index] = be16_to_cpu(pe_num);
+       dev_dbg(&pdev->dev, "VF %04x:%02x:%02x.%x associated with PE#%x\n",
+               pci_domain_nr(pdev->bus),
+               pdev->bus->number,
+               PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+               PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)),
+               pdn->pe_num_map[vf_index]);
+}
+
+int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs)
+{
+       struct pci_dn *pdn;
+       int i, rc, vf_index;
+       struct pe_map_bar_entry *vf_pe_array;
+       struct resource *res;
+       u64 size;
+
+       vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+       if (!vf_pe_array)
+               return -ENOMEM;
+
+       pdn = pci_get_pdn(pdev);
+       /* create firmware structure to associate pes */
+       for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+               pdn->pe_num_map[vf_index] = IODA_INVALID_PE;
+               for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+                       res = &pdev->resource[i + PCI_IOV_RESOURCES];
+                       if (!res->parent)
+                               continue;
+                       size = pcibios_iov_resource_alignment(pdev, i +
+                                       PCI_IOV_RESOURCES);
+                       vf_pe_array[vf_index].bar =
+                               cpu_to_be64(res->start + size * vf_index);
+                       vf_pe_array[vf_index].rid =
+                               cpu_to_be16((pci_iov_virtfn_bus(pdev, vf_index)
+                                           << 8) | pci_iov_virtfn_devfn(pdev,
+                                           vf_index));
+                       vf_pe_array[vf_index].pe_num =
+                               cpu_to_be16(IODA_INVALID_PE);
+               }
+       }
+
+       rc = pseries_send_map_pe(pdev, num_vfs, vf_pe_array);
+       /* Only zero is success */
+       if (!rc)
+               for (vf_index = 0; vf_index < num_vfs; vf_index++)
+                       pseries_set_pe_num(pdev, vf_index,
+                                          vf_pe_array[vf_index].pe_num);
+
+       kfree(vf_pe_array);
+       return rc;
+}
+
+int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+       struct pci_dn         *pdn;
+       int                    rc;
+       const int *max_vfs;
+       int max_config_vfs;
+       struct device_node *dn = pci_device_to_OF_node(pdev);
+
+       max_vfs = of_get_property(dn, "ibm,number-of-configurable-vfs", NULL);
+
+       if (!max_vfs)
+               return -EINVAL;
+
+       /* First integer stores max config */
+       max_config_vfs = of_read_number(&max_vfs[0], 1);
+       if (max_config_vfs < num_vfs && num_vfs > MAX_VFS_FOR_MAP_PE) {
+               dev_err(&pdev->dev,
+                       "Num VFs %x > %x Configurable VFs\n",
+                       num_vfs, (num_vfs > MAX_VFS_FOR_MAP_PE) ?
+                       MAX_VFS_FOR_MAP_PE : max_config_vfs);
+               return -EINVAL;
+       }
+
+       pdn = pci_get_pdn(pdev);
+       pdn->pe_num_map = kmalloc_array(num_vfs,
+                                       sizeof(*pdn->pe_num_map),
+                                       GFP_KERNEL);
+       if (!pdn->pe_num_map)
+               return -ENOMEM;
+
+       rc = pseries_associate_pes(pdev, num_vfs);
+
+       /* Anything other than zero is failure */
+       if (rc) {
+               dev_err(&pdev->dev, "Failure to enable sriov: %x\n", rc);
+               kfree(pdn->pe_num_map);
+       } else {
+               pci_vf_drivers_autoprobe(pdev, false);
+       }
+
+       return rc;
+}
+
+int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+       /* Allocate PCI data */
+       add_dev_pci_data(pdev);
+       return pseries_pci_sriov_enable(pdev, num_vfs);
+}
+
+int pseries_pcibios_sriov_disable(struct pci_dev *pdev)
+{
+       struct pci_dn         *pdn;
+
+       pdn = pci_get_pdn(pdev);
+       /* Releasing pe_num_map */
+       kfree(pdn->pe_num_map);
+       /* Release PCI data */
+       remove_dev_pci_data(pdev);
+       pci_vf_drivers_autoprobe(pdev, true);
+       return 0;
+}
+#endif
+
 static void __init pSeries_request_regions(void)
 {
        if (!isa_io_base)
@@ -76,6 +240,11 @@ void __init pSeries_final_fixup(void)
        pSeries_request_regions();
 
        eeh_addr_cache_build();
+
+#ifdef CONFIG_PCI_IOV
+       ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
+       ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
+#endif
 }
 
 /*
index 35c891aabef0e0347cd5b231c73d991786f920e9..6ed22127391b6d0a7789bb363476452bf0991a65 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/page.h>
 #include <asm/hvcall.h>
 #include <asm/firmware.h>
+#include <asm/prom.h>
 
 
 #define MODULE_VERS "1.0"
@@ -38,26 +39,58 @@ static int sysfs_entries;
 static u32 cpu_to_drc_index(int cpu)
 {
        struct device_node *dn = NULL;
-       const int *indexes;
-       int i;
+       int thread_index;
        int rc = 1;
        u32 ret = 0;
 
        dn = of_find_node_by_path("/cpus");
        if (dn == NULL)
                goto err;
-       indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
-       if (indexes == NULL)
-               goto err_of_node_put;
+
        /* Convert logical cpu number to core number */
-       i = cpu_core_index_of_thread(cpu);
-       /*
-        * The first element indexes[0] is the number of drc_indexes
-        * returned in the list.  Hence i+1 will get the drc_index
-        * corresponding to core number i.
-        */
-       WARN_ON(i > indexes[0]);
-       ret = indexes[i + 1];
+       thread_index = cpu_core_index_of_thread(cpu);
+
+       if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+               struct property *info = NULL;
+               struct of_drc_info drc;
+               int j;
+               u32 num_set_entries;
+               const __be32 *value;
+
+               info = of_find_property(dn, "ibm,drc-info", NULL);
+               if (info == NULL)
+                       goto err_of_node_put;
+
+               value = of_prop_next_u32(info, NULL, &num_set_entries);
+               if (!value)
+                       goto err_of_node_put;
+
+               for (j = 0; j < num_set_entries; j++) {
+
+                       of_read_drc_info_cell(&info, &value, &drc);
+                       if (strncmp(drc.drc_type, "CPU", 3))
+                               goto err;
+
+                       if (thread_index < drc.last_drc_index)
+                               break;
+               }
+
+               ret = drc.drc_index_start + (thread_index * drc.sequential_inc);
+       } else {
+               const __be32 *indexes;
+
+               indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+               if (indexes == NULL)
+                       goto err_of_node_put;
+
+               /*
+                * The first element indexes[0] is the number of drc_indexes
+                * returned in the list.  Hence thread_index+1 will get the
+                * drc_index corresponding to core number thread_index.
+                */
+               ret = indexes[thread_index + 1];
+       }
+
        rc = 0;
 
 err_of_node_put:
@@ -72,34 +105,71 @@ static int drc_index_to_cpu(u32 drc_index)
 {
        struct device_node *dn = NULL;
        const int *indexes;
-       int i, cpu = 0;
+       int thread_index = 0, cpu = 0;
        int rc = 1;
 
        dn = of_find_node_by_path("/cpus");
        if (dn == NULL)
                goto err;
-       indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
-       if (indexes == NULL)
-               goto err_of_node_put;
-       /*
-        * First element in the array is the number of drc_indexes
-        * returned.  Search through the list to find the matching
-        * drc_index and get the core number
-        */
-       for (i = 0; i < indexes[0]; i++) {
-               if (indexes[i + 1] == drc_index)
+
+       if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+               struct property *info = NULL;
+               struct of_drc_info drc;
+               int j;
+               u32 num_set_entries;
+               const __be32 *value;
+
+               info = of_find_property(dn, "ibm,drc-info", NULL);
+               if (info == NULL)
+                       goto err_of_node_put;
+
+               value = of_prop_next_u32(info, NULL, &num_set_entries);
+               if (!value)
+                       goto err_of_node_put;
+
+               for (j = 0; j < num_set_entries; j++) {
+
+                       of_read_drc_info_cell(&info, &value, &drc);
+                       if (strncmp(drc.drc_type, "CPU", 3))
+                               goto err;
+
+                       if (drc_index > drc.last_drc_index) {
+                               cpu += drc.num_sequential_elems;
+                               continue;
+                       }
+                       cpu += ((drc_index - drc.drc_index_start) /
+                               drc.sequential_inc);
+
+                       thread_index = cpu_first_thread_of_core(cpu);
+                       rc = 0;
                        break;
+               }
+       } else {
+               unsigned long int i;
+
+               indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+               if (indexes == NULL)
+                       goto err_of_node_put;
+               /*
+                * First element in the array is the number of drc_indexes
+                * returned.  Search through the list to find the matching
+                * drc_index and get the core number
+                */
+               for (i = 0; i < indexes[0]; i++) {
+                       if (indexes[i + 1] == drc_index)
+                               break;
+               }
+               /* Convert core number to logical cpu number */
+               thread_index = cpu_first_thread_of_core(i);
+               rc = 0;
        }
-       /* Convert core number to logical cpu number */
-       cpu = cpu_first_thread_of_core(i);
-       rc = 0;
 
 err_of_node_put:
        of_node_put(dn);
 err:
        if (rc)
                printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index);
-       return cpu;
+       return thread_index;
 }
 
 /*
index f24d8159c9e1eed31a53f248c61ccdbdaec578b3..0e0208117e772407f33d772d7cfd8845926c4e98 100644 (file)
@@ -405,7 +405,7 @@ static int proc_ppc64_create_ofdt(void)
 {
        struct proc_dir_entry *ent;
 
-       ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops);
+       ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_fops);
        if (ent)
                proc_set_size(ent, 0);
 
index c47585a78b69158e8fb880689bd38d44d1d29db5..054ce7a16fc336b42b1c05243be5a45a68b91d5a 100644 (file)
@@ -179,7 +179,7 @@ static int __init scanlog_init(void)
        if (!scanlog_buffer)
                goto err;
 
-       ent = proc_create("powerpc/rtas/scan-log-dump", S_IRUSR, NULL,
+       ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL,
                          &scanlog_fops);
        if (!ent)
                goto err;
index ae4f596273b51a836e5d27307f81bfe6438590bf..372d7ada1a0c115aa9078cb1ba0d01d69a80a73d 100644 (file)
@@ -371,8 +371,8 @@ void pseries_disable_reloc_on_exc(void)
                mdelay(get_longbusy_msecs(rc));
        }
        if (rc != H_SUCCESS)
-               pr_warning("Warning: Failed to disable relocation on "
-                          "exceptions: %ld\n", rc);
+               pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
+                       rc);
 }
 EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
 
@@ -492,6 +492,162 @@ static void pseries_setup_rfi_flush(void)
        setup_rfi_flush(types, enable);
 }
 
+#ifdef CONFIG_PCI_IOV
+enum rtas_iov_fw_value_map {
+       NUM_RES_PROPERTY  = 0, /* Number of Resources */
+       LOW_INT           = 1, /* Lowest 32 bits of Address */
+       START_OF_ENTRIES  = 2, /* Always start of entry */
+       APERTURE_PROPERTY = 2, /* Start of entry+ to  Aperture Size */
+       WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
+       NEXT_ENTRY        = 7  /* Go to next entry on array */
+};
+
+enum get_iov_fw_value_index {
+       BAR_ADDRS     = 1,    /*  Get Bar Address */
+       APERTURE_SIZE = 2,    /*  Get Aperture Size */
+       WDW_SIZE      = 3     /*  Get Window Size */
+};
+
+resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
+                                        enum get_iov_fw_value_index value)
+{
+       const int *indexes;
+       struct device_node *dn = pci_device_to_OF_node(dev);
+       int i, num_res, ret = 0;
+
+       indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+       if (!indexes)
+               return  0;
+
+       /*
+        * First element in the array is the number of Bars
+        * returned.  Search through the list to find the matching
+        * bar
+        */
+       num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+       if (resno >= num_res)
+               return 0; /* or an errror */
+
+       i = START_OF_ENTRIES + NEXT_ENTRY * resno;
+       switch (value) {
+       case BAR_ADDRS:
+               ret = of_read_number(&indexes[i], 2);
+               break;
+       case APERTURE_SIZE:
+               ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+               break;
+       case WDW_SIZE:
+               ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+               break;
+       }
+
+       return ret;
+}
+
+void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
+{
+       struct resource *res;
+       resource_size_t base, size;
+       int i, r, num_res;
+
+       num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+       num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
+       for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+            i += NEXT_ENTRY, r++) {
+               res = &dev->resource[r + PCI_IOV_RESOURCES];
+               base = of_read_number(&indexes[i], 2);
+               size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+               res->flags = pci_parse_of_flags(of_read_number
+                                               (&indexes[i + LOW_INT], 1), 0);
+               res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
+               res->name = pci_name(dev);
+               res->start = base;
+               res->end = base + size - 1;
+       }
+}
+
+void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
+{
+       struct resource *res, *root, *conflict;
+       resource_size_t base, size;
+       int i, r, num_res;
+
+       /*
+        * First element in the array is the number of Bars
+        * returned.  Search through the list to find the matching
+        * bars assign them from firmware into resources structure.
+        */
+       num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+       for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+            i += NEXT_ENTRY, r++) {
+               res = &dev->resource[r + PCI_IOV_RESOURCES];
+               base = of_read_number(&indexes[i], 2);
+               size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+               res->name = pci_name(dev);
+               res->start = base;
+               res->end = base + size - 1;
+               root = &iomem_resource;
+               dev_dbg(&dev->dev,
+                       "pSeries IOV BAR %d: trying firmware assignment %pR\n",
+                        r + PCI_IOV_RESOURCES, res);
+               conflict = request_resource_conflict(root, res);
+               if (conflict) {
+                       dev_info(&dev->dev,
+                                "BAR %d: %pR conflicts with %s %pR\n",
+                                r + PCI_IOV_RESOURCES, res,
+                                conflict->name, conflict);
+                       res->flags |= IORESOURCE_UNSET;
+               }
+       }
+}
+
+static void pseries_pci_fixup_resources(struct pci_dev *pdev)
+{
+       const int *indexes;
+       struct device_node *dn = pci_device_to_OF_node(pdev);
+
+       /*Firmware must support open sriov otherwise dont configure*/
+       indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+       if (!indexes)
+               return;
+       /* Assign the addresses from device tree*/
+       of_pci_set_vf_bar_size(pdev, indexes);
+}
+
+static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
+{
+       const int *indexes;
+       struct device_node *dn = pci_device_to_OF_node(pdev);
+
+       if (!pdev->is_physfn || pdev->is_added)
+               return;
+       /*Firmware must support open sriov otherwise dont configure*/
+       indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+       if (!indexes)
+               return;
+       /* Assign the addresses from device tree*/
+       of_pci_parse_iov_addrs(pdev, indexes);
+}
+
+static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
+                                                         int resno)
+{
+       const __be32 *reg;
+       struct device_node *dn = pci_device_to_OF_node(pdev);
+
+       /*Firmware must support open sriov otherwise report regular alignment*/
+       reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
+       if (!reg)
+               return pci_iov_resource_size(pdev, resno);
+
+       if (!pdev->is_physfn)
+               return 0;
+       return pseries_get_iov_fw_value(pdev,
+                                       resno - PCI_IOV_RESOURCES,
+                                       APERTURE_SIZE);
+}
+#endif
+
 static void __init pSeries_setup_arch(void)
 {
        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -525,6 +681,14 @@ static void __init pSeries_setup_arch(void)
                vpa_init(boot_cpuid);
                ppc_md.power_save = pseries_lpar_idle;
                ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+#ifdef CONFIG_PCI_IOV
+               ppc_md.pcibios_fixup_resources =
+                       pseries_pci_fixup_resources;
+               ppc_md.pcibios_fixup_sriov =
+                       pseries_pci_fixup_iov_resources;
+               ppc_md.pcibios_iov_resource_alignment =
+                       pseries_pci_iov_resource_alignment;
+#endif
        } else {
                /* No special idle routine */
                ppc_md.enable_pmcs = power4_enable_pmcs;
@@ -533,6 +697,12 @@ static void __init pSeries_setup_arch(void)
        ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
 }
 
+static void pseries_panic(char *str)
+{
+       panic_flush_kmsg_end();
+       rtas_os_term(str);
+}
+
 static int __init pSeries_init_panel(void)
 {
        /* Manually leave the kernel version on the panel. */
@@ -761,7 +931,7 @@ define_machine(pseries) {
        .pcibios_fixup          = pSeries_final_fixup,
        .restart                = rtas_restart,
        .halt                   = rtas_halt,
-       .panic                  = rtas_os_term,
+       .panic                  = pseries_panic,
        .get_boot_time          = rtas_get_boot_time,
        .get_rtc_time           = rtas_get_rtc_time,
        .set_rtc_time           = rtas_set_rtc_time,
index 89726f07d2492fd17beb05eea48c476fd2652550..52a021e1f86bfe6950d450ba03c3f7c881dec22a 100644 (file)
@@ -214,8 +214,7 @@ static ssize_t show_hibernate(struct device *dev,
        return sprintf(buf, "%d\n", KERN_DT_UPDATE);
 }
 
-static DEVICE_ATTR(hibernate, S_IWUSR | S_IRUGO,
-                  show_hibernate, store_hibernate);
+static DEVICE_ATTR(hibernate, 0644, show_hibernate, store_hibernate);
 
 static struct bus_type suspend_subsys = {
        .name = "power",
index 0baba21404dcc0ec3f0342fca8f16d418be8fec2..9861407d644aacb61b2a4d52043f364141bb6c82 100644 (file)
@@ -32,7 +32,6 @@ mv64x60-$(CONFIG_PCI)         += mv64x60_pci.o
 obj-$(CONFIG_MV64X60)          += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \
                                   mv64x60_udbg.o
 obj-$(CONFIG_RTC_DRV_CMOS)     += rtc_cmos_setup.o
-obj-$(CONFIG_AXON_RAM)         += axonram.o
 
 obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o
 obj-$(CONFIG_PPC_I8259)                += i8259.o
@@ -43,7 +42,8 @@ obj-$(CONFIG_OF_RTC)          += of_rtc.o
 
 obj-$(CONFIG_CPM)              += cpm_common.o
 obj-$(CONFIG_CPM1)             += cpm1.o
-obj-$(CONFIG_CPM2)             += cpm2.o cpm2_pic.o
+obj-$(CONFIG_CPM2)             += cpm2.o cpm2_pic.o cpm_gpio.o
+obj-$(CONFIG_8xx_GPIO)         += cpm_gpio.o
 obj-$(CONFIG_QUICC_ENGINE)     += cpm_common.o
 obj-$(CONFIG_PPC_DCR)          += dcr.o
 obj-$(CONFIG_UCODE_PATCH)      += micropatch.o
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
deleted file mode 100644 (file)
index 1b307c8..0000000
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * (C) Copyright IBM Deutschland Entwicklung GmbH 2006
- *
- * Author: Maxim Shchetynin <maxim@de.ibm.com>
- *
- * Axon DDR2 device driver.
- * It registers one block device per Axon's DDR2 memory bank found on a system.
- * Block devices are called axonram?, their major and minor numbers are
- * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/dax.h>
-#include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/irq.h>
-#include <linux/irqreturn.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mod_devicetable.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/pfn_t.h>
-#include <linux/uio.h>
-
-#include <asm/page.h>
-#include <asm/prom.h>
-
-#define AXON_RAM_MODULE_NAME           "axonram"
-#define AXON_RAM_DEVICE_NAME           "axonram"
-#define AXON_RAM_MINORS_PER_DISK       16
-#define AXON_RAM_BLOCK_SHIFT           PAGE_SHIFT
-#define AXON_RAM_BLOCK_SIZE            1 << AXON_RAM_BLOCK_SHIFT
-#define AXON_RAM_SECTOR_SHIFT          9
-#define AXON_RAM_SECTOR_SIZE           1 << AXON_RAM_SECTOR_SHIFT
-#define AXON_RAM_IRQ_FLAGS             IRQF_SHARED | IRQF_TRIGGER_RISING
-
-static int azfs_major, azfs_minor;
-
-struct axon_ram_bank {
-       struct platform_device  *device;
-       struct gendisk          *disk;
-       struct dax_device       *dax_dev;
-       unsigned int            irq_id;
-       unsigned long           ph_addr;
-       unsigned long           io_addr;
-       unsigned long           size;
-       unsigned long           ecc_counter;
-};
-
-static ssize_t
-axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       struct platform_device *device = to_platform_device(dev);
-       struct axon_ram_bank *bank = device->dev.platform_data;
-
-       BUG_ON(!bank);
-
-       return sprintf(buf, "%ld\n", bank->ecc_counter);
-}
-
-static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL);
-
-/**
- * axon_ram_irq_handler - interrupt handler for Axon RAM ECC
- * @irq: interrupt ID
- * @dev: pointer to of_device
- */
-static irqreturn_t
-axon_ram_irq_handler(int irq, void *dev)
-{
-       struct platform_device *device = dev;
-       struct axon_ram_bank *bank = device->dev.platform_data;
-
-       BUG_ON(!bank);
-
-       dev_err(&device->dev, "Correctable memory error occurred\n");
-       bank->ecc_counter++;
-       return IRQ_HANDLED;
-}
-
-/**
- * axon_ram_make_request - make_request() method for block device
- * @queue, @bio: see blk_queue_make_request()
- */
-static blk_qc_t
-axon_ram_make_request(struct request_queue *queue, struct bio *bio)
-{
-       struct axon_ram_bank *bank = bio->bi_disk->private_data;
-       unsigned long phys_mem, phys_end;
-       void *user_mem;
-       struct bio_vec vec;
-       unsigned int transfered;
-       struct bvec_iter iter;
-
-       phys_mem = bank->io_addr + (bio->bi_iter.bi_sector <<
-                                   AXON_RAM_SECTOR_SHIFT);
-       phys_end = bank->io_addr + bank->size;
-       transfered = 0;
-       bio_for_each_segment(vec, bio, iter) {
-               if (unlikely(phys_mem + vec.bv_len > phys_end)) {
-                       bio_io_error(bio);
-                       return BLK_QC_T_NONE;
-               }
-
-               user_mem = page_address(vec.bv_page) + vec.bv_offset;
-               if (bio_data_dir(bio) == READ)
-                       memcpy(user_mem, (void *) phys_mem, vec.bv_len);
-               else
-                       memcpy((void *) phys_mem, user_mem, vec.bv_len);
-
-               phys_mem += vec.bv_len;
-               transfered += vec.bv_len;
-       }
-       bio_endio(bio);
-       return BLK_QC_T_NONE;
-}
-
-static const struct block_device_operations axon_ram_devops = {
-       .owner          = THIS_MODULE,
-};
-
-static long
-__axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_pages,
-                      void **kaddr, pfn_t *pfn)
-{
-       resource_size_t offset = pgoff * PAGE_SIZE;
-
-       *kaddr = (void *) bank->io_addr + offset;
-       *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
-       return (bank->size - offset) / PAGE_SIZE;
-}
-
-static long
-axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
-                      void **kaddr, pfn_t *pfn)
-{
-       struct axon_ram_bank *bank = dax_get_private(dax_dev);
-
-       return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
-}
-
-static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
-               void *addr, size_t bytes, struct iov_iter *i)
-{
-       return copy_from_iter(addr, bytes, i);
-}
-
-static const struct dax_operations axon_ram_dax_ops = {
-       .direct_access = axon_ram_dax_direct_access,
-       .copy_from_iter = axon_ram_copy_from_iter,
-};
-
-/**
- * axon_ram_probe - probe() method for platform driver
- * @device: see platform_driver method
- */
-static int axon_ram_probe(struct platform_device *device)
-{
-       static int axon_ram_bank_id = -1;
-       struct axon_ram_bank *bank;
-       struct resource resource;
-       int rc;
-
-       axon_ram_bank_id++;
-
-       dev_info(&device->dev, "Found memory controller on %pOF\n",
-                       device->dev.of_node);
-
-       bank = kzalloc(sizeof(*bank), GFP_KERNEL);
-       if (!bank)
-               return -ENOMEM;
-
-       device->dev.platform_data = bank;
-
-       bank->device = device;
-
-       if (of_address_to_resource(device->dev.of_node, 0, &resource) != 0) {
-               dev_err(&device->dev, "Cannot access device tree\n");
-               rc = -EFAULT;
-               goto failed;
-       }
-
-       bank->size = resource_size(&resource);
-
-       if (bank->size == 0) {
-               dev_err(&device->dev, "No DDR2 memory found for %s%d\n",
-                               AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
-               rc = -ENODEV;
-               goto failed;
-       }
-
-       dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n",
-                       AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
-
-       bank->ph_addr = resource.start;
-       bank->io_addr = (unsigned long) ioremap_prot(
-                       bank->ph_addr, bank->size, _PAGE_NO_CACHE);
-       if (bank->io_addr == 0) {
-               dev_err(&device->dev, "ioremap() failed\n");
-               rc = -EFAULT;
-               goto failed;
-       }
-
-       bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK);
-       if (bank->disk == NULL) {
-               dev_err(&device->dev, "Cannot register disk\n");
-               rc = -EFAULT;
-               goto failed;
-       }
-
-
-       bank->disk->major = azfs_major;
-       bank->disk->first_minor = azfs_minor;
-       bank->disk->fops = &axon_ram_devops;
-       bank->disk->private_data = bank;
-
-       sprintf(bank->disk->disk_name, "%s%d",
-                       AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
-
-       bank->dax_dev = alloc_dax(bank, bank->disk->disk_name,
-                       &axon_ram_dax_ops);
-       if (!bank->dax_dev) {
-               rc = -ENOMEM;
-               goto failed;
-       }
-
-       bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
-       if (bank->disk->queue == NULL) {
-               dev_err(&device->dev, "Cannot register disk queue\n");
-               rc = -EFAULT;
-               goto failed;
-       }
-
-       set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
-       blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
-       blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
-       device_add_disk(&device->dev, bank->disk);
-
-       bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
-       if (!bank->irq_id) {
-               dev_err(&device->dev, "Cannot access ECC interrupt ID\n");
-               rc = -EFAULT;
-               goto failed;
-       }
-
-       rc = request_irq(bank->irq_id, axon_ram_irq_handler,
-                       AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
-       if (rc != 0) {
-               dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
-               bank->irq_id = 0;
-               rc = -EFAULT;
-               goto failed;
-       }
-
-       rc = device_create_file(&device->dev, &dev_attr_ecc);
-       if (rc != 0) {
-               dev_err(&device->dev, "Cannot create sysfs file\n");
-               rc = -EFAULT;
-               goto failed;
-       }
-
-       azfs_minor += bank->disk->minors;
-
-       return 0;
-
-failed:
-       if (bank->irq_id)
-               free_irq(bank->irq_id, device);
-       if (bank->disk != NULL) {
-               if (bank->disk->major > 0)
-                       unregister_blkdev(bank->disk->major,
-                                       bank->disk->disk_name);
-               if (bank->disk->flags & GENHD_FL_UP)
-                       del_gendisk(bank->disk);
-               put_disk(bank->disk);
-       }
-       kill_dax(bank->dax_dev);
-       put_dax(bank->dax_dev);
-       device->dev.platform_data = NULL;
-       if (bank->io_addr != 0)
-               iounmap((void __iomem *) bank->io_addr);
-       kfree(bank);
-       return rc;
-}
-
-/**
- * axon_ram_remove - remove() method for platform driver
- * @device: see of_platform_driver method
- */
-static int
-axon_ram_remove(struct platform_device *device)
-{
-       struct axon_ram_bank *bank = device->dev.platform_data;
-
-       BUG_ON(!bank || !bank->disk);
-
-       device_remove_file(&device->dev, &dev_attr_ecc);
-       free_irq(bank->irq_id, device);
-       kill_dax(bank->dax_dev);
-       put_dax(bank->dax_dev);
-       del_gendisk(bank->disk);
-       put_disk(bank->disk);
-       iounmap((void __iomem *) bank->io_addr);
-       kfree(bank);
-
-       return 0;
-}
-
-static const struct of_device_id axon_ram_device_id[] = {
-       {
-               .type   = "dma-memory"
-       },
-       {}
-};
-MODULE_DEVICE_TABLE(of, axon_ram_device_id);
-
-static struct platform_driver axon_ram_driver = {
-       .probe          = axon_ram_probe,
-       .remove         = axon_ram_remove,
-       .driver = {
-               .name = AXON_RAM_MODULE_NAME,
-               .of_match_table = axon_ram_device_id,
-       },
-};
-
-/**
- * axon_ram_init
- */
-static int __init
-axon_ram_init(void)
-{
-       azfs_major = register_blkdev(azfs_major, AXON_RAM_DEVICE_NAME);
-       if (azfs_major < 0) {
-               printk(KERN_ERR "%s cannot become block device major number\n",
-                               AXON_RAM_MODULE_NAME);
-               return -EFAULT;
-       }
-       azfs_minor = 0;
-
-       return platform_driver_register(&axon_ram_driver);
-}
-
-/**
- * axon_ram_exit
- */
-static void __exit
-axon_ram_exit(void)
-{
-       platform_driver_unregister(&axon_ram_driver);
-       unregister_blkdev(azfs_major, AXON_RAM_DEVICE_NAME);
-}
-
-module_init(axon_ram_init);
-module_exit(axon_ram_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>");
-MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE");
index c6f154b602fb8b005480c3a369c45a5068d524da..5240d3a74a1076772fc48be35656ee50f4cf27fb 100644 (file)
@@ -629,8 +629,9 @@ static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio)
        return 0;
 }
 
-int cpm1_gpiochip_add16(struct device_node *np)
+int cpm1_gpiochip_add16(struct device *dev)
 {
+       struct device_node *np = dev->of_node;
        struct cpm1_gpio16_chip *cpm1_gc;
        struct of_mm_gpio_chip *mm_gc;
        struct gpio_chip *gc;
@@ -660,6 +661,8 @@ int cpm1_gpiochip_add16(struct device_node *np)
        gc->get = cpm1_gpio16_get;
        gc->set = cpm1_gpio16_set;
        gc->to_irq = cpm1_gpio16_to_irq;
+       gc->parent = dev;
+       gc->owner = THIS_MODULE;
 
        return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
 }
@@ -755,8 +758,9 @@ static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
        return 0;
 }
 
-int cpm1_gpiochip_add32(struct device_node *np)
+int cpm1_gpiochip_add32(struct device *dev)
 {
+       struct device_node *np = dev->of_node;
        struct cpm1_gpio32_chip *cpm1_gc;
        struct of_mm_gpio_chip *mm_gc;
        struct gpio_chip *gc;
@@ -776,31 +780,10 @@ int cpm1_gpiochip_add32(struct device_node *np)
        gc->direction_output = cpm1_gpio32_dir_out;
        gc->get = cpm1_gpio32_get;
        gc->set = cpm1_gpio32_set;
+       gc->parent = dev;
+       gc->owner = THIS_MODULE;
 
        return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
 }
 
-static int cpm_init_par_io(void)
-{
-       struct device_node *np;
-
-       for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-a")
-               cpm1_gpiochip_add16(np);
-
-       for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-b")
-               cpm1_gpiochip_add32(np);
-
-       for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-c")
-               cpm1_gpiochip_add16(np);
-
-       for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-d")
-               cpm1_gpiochip_add16(np);
-
-       /* Port E uses CPM2 layout */
-       for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-e")
-               cpm2_gpiochip_add32(np);
-       return 0;
-}
-arch_initcall(cpm_init_par_io);
-
 #endif /* CONFIG_8xx_GPIO */
index f78ff841652c2fd2006b977db12e28447661aa87..07718b9a2c99b4fdd6e22f75cbc0f4f78b7e6866 100644 (file)
@@ -354,14 +354,3 @@ void cpm2_set_pin(int port, int pin, int flags)
        else
                clrbits32(&iop[port].odr, pin);
 }
-
-static int cpm_init_par_io(void)
-{
-       struct device_node *np;
-
-       for_each_compatible_node(np, NULL, "fsl,cpm2-pario-bank")
-               cpm2_gpiochip_add32(np);
-       return 0;
-}
-arch_initcall(cpm_init_par_io);
-
index 51bf749a4f3a7b6c8a245c8d793cee0cc47f9a51..b74508175b6777a5196236a249c947afc413bc00 100644 (file)
@@ -190,8 +190,9 @@ static int cpm2_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
        return 0;
 }
 
-int cpm2_gpiochip_add32(struct device_node *np)
+int cpm2_gpiochip_add32(struct device *dev)
 {
+       struct device_node *np = dev->of_node;
        struct cpm2_gpio32_chip *cpm2_gc;
        struct of_mm_gpio_chip *mm_gc;
        struct gpio_chip *gc;
@@ -211,6 +212,8 @@ int cpm2_gpiochip_add32(struct device_node *np)
        gc->direction_output = cpm2_gpio32_dir_out;
        gc->get = cpm2_gpio32_get;
        gc->set = cpm2_gpio32_set;
+       gc->parent = dev;
+       gc->owner = THIS_MODULE;
 
        return of_mm_gpiochip_add_data(np, mm_gc, cpm2_gc);
 }
diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c
new file mode 100644 (file)
index 0000000..0badc90
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common CPM GPIO wrapper for the CPM GPIO ports
+ *
+ * Author: Christophe Leroy <christophe.leroy@c-s.fr>
+ *
+ * Copyright 2017 CS Systemes d'Information.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include <asm/cpm.h>
+#ifdef CONFIG_8xx_GPIO
+#include <asm/cpm1.h>
+#endif
+
+static int cpm_gpio_probe(struct platform_device *ofdev)
+{
+       struct device *dev = &ofdev->dev;
+       int (*gp_add)(struct device *dev) = of_device_get_match_data(dev);
+
+       if (!gp_add)
+               return -ENODEV;
+
+       return gp_add(dev);
+}
+
+static const struct of_device_id cpm_gpio_match[] = {
+#ifdef CONFIG_8xx_GPIO
+       {
+               .compatible = "fsl,cpm1-pario-bank-a",
+               .data = cpm1_gpiochip_add16,
+       },
+       {
+               .compatible = "fsl,cpm1-pario-bank-b",
+               .data = cpm1_gpiochip_add32,
+       },
+       {
+               .compatible = "fsl,cpm1-pario-bank-c",
+               .data = cpm1_gpiochip_add16,
+       },
+       {
+               .compatible = "fsl,cpm1-pario-bank-d",
+               .data = cpm1_gpiochip_add16,
+       },
+       /* Port E uses CPM2 layout */
+       {
+               .compatible = "fsl,cpm1-pario-bank-e",
+               .data = cpm2_gpiochip_add32,
+       },
+#endif
+       {
+               .compatible = "fsl,cpm2-pario-bank",
+               .data = cpm2_gpiochip_add32,
+       },
+       {},
+};
+MODULE_DEVICE_TABLE(of, cpm_gpio_match);
+
+static struct platform_driver cpm_gpio_driver = {
+       .probe          = cpm_gpio_probe,
+       .driver         = {
+               .name   = "cpm-gpio",
+               .owner  = THIS_MODULE,
+               .of_match_table = cpm_gpio_match,
+       },
+};
+
+static int __init cpm_gpio_init(void)
+{
+       return platform_driver_register(&cpm_gpio_driver);
+}
+arch_initcall(cpm_gpio_init);
+
+MODULE_AUTHOR("Christophe Leroy <christophe.leroy@c-s.fr>");
+MODULE_DESCRIPTION("Driver for CPM GPIO");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:cpm-gpio");
index 1707bf04dec6a7d55a4e13bc5eb8dc82a77d7e35..94278e8af1920c5a2d257c24f3c8d005d132ae8e 100644 (file)
@@ -56,17 +56,16 @@ static ssize_t fsl_timer_wakeup_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
 {
-       struct timeval interval;
-       int val = 0;
+       time64_t interval = 0;
 
        mutex_lock(&sysfs_lock);
        if (fsl_wakeup->timer) {
                mpic_get_remain_time(fsl_wakeup->timer, &interval);
-               val = interval.tv_sec + 1;
+               interval++;
        }
        mutex_unlock(&sysfs_lock);
 
-       return sprintf(buf, "%d\n", val);
+       return sprintf(buf, "%lld\n", interval);
 }
 
 static ssize_t fsl_timer_wakeup_store(struct device *dev,
@@ -74,11 +73,10 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev,
                                const char *buf,
                                size_t count)
 {
-       struct timeval interval;
+       time64_t interval;
        int ret;
 
-       interval.tv_usec = 0;
-       if (kstrtol(buf, 0, &interval.tv_sec))
+       if (kstrtoll(buf, 0, &interval))
                return -EINVAL;
 
        mutex_lock(&sysfs_lock);
@@ -89,13 +87,13 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev,
                fsl_wakeup->timer = NULL;
        }
 
-       if (!interval.tv_sec) {
+       if (!interval) {
                mutex_unlock(&sysfs_lock);
                return count;
        }
 
        fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq,
-                                               fsl_wakeup, &interval);
+                                               fsl_wakeup, interval);
        if (!fsl_wakeup->timer) {
                mutex_unlock(&sysfs_lock);
                return -EINVAL;
index 61e07c78d64f649456fe21ef23b8e6b935e9b1a6..918be816b0977c5feba2a9ef79b57111de1901c3 100644 (file)
@@ -448,7 +448,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
 #endif
                /* adjusting outbound windows could reclaim space in mem map */
                if (paddr_hi < 0xffffffffull)
-                       pr_warning("%pOF: WARNING: Outbound window cfg leaves "
+                       pr_warn("%pOF: WARNING: Outbound window cfg leaves "
                                "gaps in memory map. Adjusting the memory map "
                                "could reduce unnecessary bounce buffering.\n",
                                hose->dn);
@@ -531,7 +531,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
        dev = pdev->dev.of_node;
 
        if (!of_device_is_available(dev)) {
-               pr_warning("%pOF: disabled\n", dev);
+               pr_warn("%pOF: disabled\n", dev);
                return -ENODEV;
        }
 
@@ -808,8 +808,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev)
        is_mpc83xx_pci = 1;
 
        if (!of_device_is_available(dev)) {
-               pr_warning("%pOF: disabled by the firmware.\n",
-                          dev);
+               pr_warn("%pOF: disabled by the firmware.\n",
+                       dev);
                return -ENODEV;
        }
        pr_debug("Adding PCI host bridge %pOF\n", dev);
@@ -1070,7 +1070,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
        if (is_in_pci_mem_space(addr)) {
                if (user_mode(regs)) {
                        pagefault_disable();
-                       ret = get_user(regs->nip, &inst);
+                       ret = get_user(inst, (__u32 __user *)regs->nip);
                        pagefault_enable();
                } else {
                        ret = probe_kernel_address((void *)regs->nip, inst);
@@ -1304,10 +1304,8 @@ static int add_err_dev(struct platform_device *pdev)
                                                   pdev->resource,
                                                   pdev->num_resources,
                                                   &pd, sizeof(pd));
-       if (IS_ERR(errdev))
-               return PTR_ERR(errdev);
 
-       return 0;
+       return PTR_ERR_OR_ZERO(errdev);
 }
 
 static int fsl_pci_probe(struct platform_device *pdev)
index ead3e2549ebfe2b891e54e37e8ef3e3f77e963d0..73067805300a26338b0868292cff95c8b78f5ce1 100644 (file)
@@ -1008,9 +1008,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq,
        if (hw == mpic->spurious_vec)
                return -EINVAL;
        if (mpic->protected && test_bit(hw, mpic->protected)) {
-               pr_warning("mpic: Mapping of source 0x%x failed, "
-                          "source protected by firmware !\n",\
-                          (unsigned int)hw);
+               pr_warn("mpic: Mapping of source 0x%x failed, source protected by firmware !\n",
+                       (unsigned int)hw);
                return -EPERM;
        }
 
@@ -1040,9 +1039,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq,
                return 0;
 
        if (hw >= mpic->num_sources) {
-               pr_warning("mpic: Mapping of source 0x%x failed, "
-                          "source out of range !\n",\
-                          (unsigned int)hw);
+               pr_warn("mpic: Mapping of source 0x%x failed, source out of range !\n",
+                       (unsigned int)hw);
                return -EINVAL;
        }
 
index a418579591bed5e3de647afc2087e780d1d6c8ab..87e7c42777a8c919a959c0b264a14b96b8d33112 100644 (file)
@@ -47,9 +47,6 @@
 #define MAX_TICKS_CASCADE              (~0U)
 #define TIMER_OFFSET(num)              (1 << (TIMERS_PER_GROUP - 1 - num))
 
-/* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */
-#define ONE_SECOND                     1000000
-
 struct timer_regs {
        u32     gtccr;
        u32     res0[3];
@@ -90,51 +87,23 @@ static struct cascade_priv cascade_timer[] = {
 static LIST_HEAD(timer_group_list);
 
 static void convert_ticks_to_time(struct timer_group_priv *priv,
-               const u64 ticks, struct timeval *time)
+               const u64 ticks, time64_t *time)
 {
-       u64 tmp_sec;
-
-       time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq);
-       tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
-
-       time->tv_usec = 0;
-
-       if (tmp_sec <= ticks)
-               time->tv_usec = (__kernel_suseconds_t)
-                       div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq);
-
-       return;
+       *time = (u64)div_u64(ticks, priv->timerfreq);
 }
 
 /* the time set by the user is converted to "ticks" */
 static int convert_time_to_ticks(struct timer_group_priv *priv,
-               const struct timeval *time, u64 *ticks)
+               time64_t time, u64 *ticks)
 {
        u64 max_value;          /* prevent u64 overflow */
-       u64 tmp = 0;
-
-       u64 tmp_sec;
-       u64 tmp_ms;
-       u64 tmp_us;
 
        max_value = div_u64(ULLONG_MAX, priv->timerfreq);
 
-       if (time->tv_sec > max_value ||
-                       (time->tv_sec == max_value && time->tv_usec > 0))
+       if (time > max_value)
                return -EINVAL;
 
-       tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
-       tmp += tmp_sec;
-
-       tmp_ms = time->tv_usec / 1000;
-       tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000);
-       tmp += tmp_ms;
-
-       tmp_us = time->tv_usec % 1000;
-       tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000);
-       tmp += tmp_us;
-
-       *ticks = tmp;
+       *ticks = (u64)time * (u64)priv->timerfreq;
 
        return 0;
 }
@@ -223,7 +192,7 @@ static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv,
        return allocated_timer;
 }
 
-static struct mpic_timer *get_timer(const struct timeval *time)
+static struct mpic_timer *get_timer(time64_t time)
 {
        struct timer_group_priv *priv;
        struct mpic_timer *timer;
@@ -277,7 +246,7 @@ static struct mpic_timer *get_timer(const struct timeval *time)
  * @handle: the timer to be started.
  *
  * It will do ->fn(->dev) callback from the hardware interrupt at
- * the ->timeval point in the future.
+ * the 'time64_t' point in the future.
  */
 void mpic_start_timer(struct mpic_timer *handle)
 {
@@ -319,7 +288,7 @@ EXPORT_SYMBOL(mpic_stop_timer);
  *
  * Query timer remaining time.
  */
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time)
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time)
 {
        struct timer_group_priv *priv = container_of(handle,
                        struct timer_group_priv, timer[handle->num]);
@@ -391,7 +360,7 @@ EXPORT_SYMBOL(mpic_free_timer);
  * else "handle" on success.
  */
 struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
-                                       const struct timeval *time)
+                                     time64_t time)
 {
        struct mpic_timer *allocated_timer;
        int ret;
@@ -399,11 +368,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
        if (list_empty(&timer_group_list))
                return NULL;
 
-       if (!(time->tv_sec + time->tv_usec) ||
-                       time->tv_sec < 0 || time->tv_usec < 0)
-               return NULL;
-
-       if (time->tv_usec > ONE_SECOND)
+       if (time < 0)
                return NULL;
 
        allocated_timer = get_timer(time);
index d52b3b81e05fba706af2012804fcf410f5d8c2b4..50c411b1761e0105bbe504dcf3f7cb628d21701f 100644 (file)
@@ -73,7 +73,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj,
 static const struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */
        .attr = {
                .name = "hs_reg",
-               .mode = S_IRUGO | S_IWUSR,
+               .mode = 0644,
        },
        .size  = MV64X60_VAL_LEN_MAX,
        .read  = mv64x60_hs_reg_read,
index 2bfb9968d56222ca6060177d9cf6f6a4829d0505..1459f4e8b698e440d524e5454460bc3f9206baa6 100644 (file)
@@ -241,18 +241,16 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
                          cpu, hw_id);
 
        if (!request_mem_region(addr, size, rname)) {
-               pr_warning("icp_native: Could not reserve ICP MMIO"
-                          " for CPU %d, interrupt server #0x%x\n",
-                          cpu, hw_id);
+               pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n",
+                       cpu, hw_id);
                return -EBUSY;
        }
 
        icp_native_regs[cpu] = ioremap(addr, size);
        kvmppc_set_xics_phys(cpu, addr);
        if (!icp_native_regs[cpu]) {
-               pr_warning("icp_native: Failed ioremap for CPU %d, "
-                          "interrupt server #0x%x, addr %#lx\n",
-                          cpu, hw_id, addr);
+               pr_warn("icp_native: Failed ioremap for CPU %d, interrupt server #0x%x, addr %#lx\n",
+                       cpu, hw_id, addr);
                release_mem_region(addr, size);
                return -ENOMEM;
        }
index 1c6bf4b66f56854d0717a644c75dea70e3fabde7..f85f916ba432efaa7c3979ef2088d5886154fe96 100644 (file)
@@ -131,8 +131,8 @@ static int ics_opal_set_affinity(struct irq_data *d,
 
        wanted_server = xics_get_irq_server(d->irq, cpumask, 1);
        if (wanted_server < 0) {
-               pr_warning("%s: No online cpus in the mask %*pb for irq %d\n",
-                          __func__, cpumask_pr_args(cpumask), d->irq);
+               pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+                       __func__, cpumask_pr_args(cpumask), d->irq);
                return -1;
        }
        server = ics_opal_mangle_server(wanted_server);
index 42e0c56ff81c3fcde6e4ad27c523b049cce38995..6aabc74688a6e20002f78326a0d593786c0f0b83 100644 (file)
@@ -141,8 +141,8 @@ static int ics_rtas_set_affinity(struct irq_data *d,
 
        irq_server = xics_get_irq_server(d->irq, cpumask, 1);
        if (irq_server == -1) {
-               pr_warning("%s: No online cpus in the mask %*pb for irq %d\n",
-                          __func__, cpumask_pr_args(cpumask), d->irq);
+               pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+                       __func__, cpumask_pr_args(cpumask), d->irq);
                return -1;
        }
 
index ffe138b8b9dc458646aaa45b79f1c18410a56c74..77e864d5506d3d815a46188eddcc752bdecb1423 100644 (file)
@@ -243,8 +243,8 @@ void xics_migrate_irqs_away(void)
 
                /* This is expected during cpu offline. */
                if (cpu_online(cpu))
-                       pr_warning("IRQ %u affinity broken off cpu %u\n",
-                              virq, cpu);
+                       pr_warn("IRQ %u affinity broken off cpu %u\n",
+                               virq, cpu);
 
                /* Reset affinity to all cpus */
                raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -466,7 +466,7 @@ void __init xics_init(void)
                    rc = icp_opal_init();
        }
        if (rc < 0) {
-               pr_warning("XICS: Cannot find a Presentation Controller !\n");
+               pr_warn("XICS: Cannot find a Presentation Controller !\n");
                return;
        }
 
@@ -481,7 +481,7 @@ void __init xics_init(void)
        if (rc < 0)
                rc = ics_opal_init();
        if (rc < 0)
-               pr_warning("XICS: Cannot find a Source Controller !\n");
+               pr_warn("XICS: Cannot find a Source Controller !\n");
 
        /* Initialize common bits */
        xics_get_server_size();
index a3b8d7d1316eb1863f19ffa19ff34f0c761ada33..40c06110821c36221010fa221326950759c9c5c1 100644 (file)
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
         * EOI the source if it hasn't been disabled and hasn't
         * been passed-through to a KVM guest
         */
-       if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
+       if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+           !(xd->flags & XIVE_IRQ_NO_EOI))
                xive_do_source_eoi(irqd_to_hwirq(d), xd);
 
        /*
@@ -1269,11 +1270,6 @@ static void xive_setup_cpu(void)
 {
        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
 
-       /* Debug: Dump the TM state */
-       pr_devel("CPU %d [HW 0x%02x] VT=%02x\n",
-           smp_processor_id(), hard_smp_processor_id(),
-           in_8(xive_tima + xive_tima_offset + TM_WORD2));
-
        /* The backend might have additional things to do */
        if (xive_ops->setup_cpu)
                xive_ops->setup_cpu(smp_processor_id(), xc);
index 31db8c072acd03b80dcf177c842273d508409d0a..9deea5ee13f652cd6c8a8b7f1f7d58a9229f31ae 100644 (file)
@@ -93,10 +93,6 @@ lookup_powerpc (unsigned long insn, ppc_cpu_t dialect)
 {
   const struct powerpc_opcode *opcode;
   const struct powerpc_opcode *opcode_end;
-  unsigned long op;
-
-  /* Get the major opcode of the instruction.  */
-  op = PPC_OP (insn);
 
   opcode_end = powerpc_opcodes + powerpc_num_opcodes;
   /* Find the first match in the opcode table for this major opcode.  */
index 0ddc7ac6c5f13df2dd688a0dee38bfb53a2cdf80..82e1a3ee6e0fc0e8bf53ea22e8dd986ab2de508b 100644 (file)
@@ -1623,7 +1623,7 @@ static void excprint(struct pt_regs *fp)
        printf("  current = 0x%lx\n", current);
 #ifdef CONFIG_PPC64
        printf("  paca    = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n",
-              local_paca, local_paca->soft_enabled, local_paca->irq_happened);
+              local_paca, local_paca->irq_soft_mask, local_paca->irq_happened);
 #endif
        if (current) {
                printf("    pid   = %ld, comm = %s\n",
@@ -2377,8 +2377,6 @@ static void dump_one_paca(int cpu)
                printf(" slb_cache[%d]:        = 0x%016lx\n", i, p->slb_cache[i]);
 
        DUMP(p, rfi_flush_fallback_area, "px");
-       DUMP(p, l1d_flush_congruence, "llx");
-       DUMP(p, l1d_flush_sets, "llx");
 #endif
        DUMP(p, dscr_default, "llx");
 #ifdef CONFIG_PPC_BOOK3E
@@ -2395,7 +2393,7 @@ static void dump_one_paca(int cpu)
        DUMP(p, stab_rr, "lx");
        DUMP(p, saved_r1, "lx");
        DUMP(p, trap_save, "x");
-       DUMP(p, soft_enabled, "x");
+       DUMP(p, irq_soft_mask, "x");
        DUMP(p, irq_happened, "x");
        DUMP(p, io_sync, "x");
        DUMP(p, irq_work_pending, "x");
index e06605b21841acae0d1d4b5d5d6ea963a7d732b8..1a8234e706bc7525683f34fa4e9294118987503d 100644 (file)
@@ -76,6 +76,8 @@ static int snooze_loop(struct cpuidle_device *dev,
        ppc64_runlatch_on();
        clear_thread_flag(TIF_POLLING_NRFLAG);
 
+       local_irq_disable();
+
        return index;
 }
 
index a187a39fb86628c958f9fed412de104f4ce8b4a9..9e56bc411061fd573bf06b56acf8dc4e6235ecd8 100644 (file)
@@ -51,8 +51,6 @@ static inline void idle_loop_epilog(unsigned long in_purr)
        get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
        get_lppaca()->idle = 0;
 
-       if (irqs_disabled())
-               local_irq_enable();
        ppc64_runlatch_on();
 }
 
@@ -87,6 +85,8 @@ static int snooze_loop(struct cpuidle_device *dev,
        HMT_medium();
        clear_thread_flag(TIF_POLLING_NRFLAG);
 
+       local_irq_disable();
+
        idle_loop_epilog(in_purr);
 
        return index;
@@ -121,6 +121,7 @@ static int dedicated_cede_loop(struct cpuidle_device *dev,
        HMT_medium();
        check_and_cede_processor();
 
+       local_irq_disable();
        get_lppaca()->donate_dedicated_cpu = 0;
 
        idle_loop_epilog(in_purr);
@@ -145,6 +146,7 @@ static int shared_cede_loop(struct cpuidle_device *dev,
         */
        check_and_cede_processor();
 
+       local_irq_disable();
        idle_loop_epilog(in_purr);
 
        return index;
@@ -172,11 +174,17 @@ static struct cpuidle_state dedicated_states[] = {
  * States for shared partition case.
  */
 static struct cpuidle_state shared_states[] = {
+       { /* Snooze */
+               .name = "snooze",
+               .desc = "snooze",
+               .exit_latency = 0,
+               .target_residency = 0,
+               .enter = &snooze_loop },
        { /* Shared Cede */
                .name = "Shared Cede",
                .desc = "Shared Cede",
-               .exit_latency = 0,
-               .target_residency = 0,
+               .exit_latency = 10,
+               .target_residency = 100,
                .enter = &shared_cede_loop },
 };
 
index 289800b5235dc0b20457e4136f0821e0464beda4..4c8097e0e6fe10f7df4b1553703c4357e50edbcd 100644 (file)
@@ -203,18 +203,17 @@ static int adb_scan_bus(void)
        }
 
        /* Now fill in the handler_id field of the adb_handler entries. */
-       printk(KERN_DEBUG "adb devices:");
+       pr_debug("adb devices:\n");
        for (i = 1; i < 16; i++) {
                if (adb_handler[i].original_address == 0)
                        continue;
                adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1,
                            (i << 4) | 0xf);
                adb_handler[i].handler_id = req.reply[2];
-               printk(" [%d]: %d %x", i, adb_handler[i].original_address,
-                      adb_handler[i].handler_id);
+               pr_debug(" [%d]: %d %x\n", i, adb_handler[i].original_address,
+                        adb_handler[i].handler_id);
                devmask |= 1 << i;
        }
-       printk("\n");
        return devmask;
 }
 
@@ -225,9 +224,9 @@ static int adb_scan_bus(void)
 static int
 adb_probe_task(void *x)
 {
-       printk(KERN_INFO "adb: starting probe task...\n");
+       pr_debug("adb: starting probe task...\n");
        do_adb_reset_bus();
-       printk(KERN_INFO "adb: finished probe task...\n");
+       pr_debug("adb: finished probe task...\n");
 
        up(&adb_probe_mutex);
 
@@ -337,7 +336,7 @@ static int __init adb_init(void)
            adb_controller->init())
                adb_controller = NULL;
        if (adb_controller == NULL) {
-               printk(KERN_WARNING "Warning: no ADB interface detected\n");
+               pr_warn("Warning: no ADB interface detected\n");
        } else {
 #ifdef CONFIG_PPC
                if (of_machine_is_compatible("AAPL,PowerBook1998") ||
@@ -480,8 +479,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids,
                    (!handler_id || (handler_id == adb_handler[i].handler_id) || 
                    try_handler_change(i, handler_id))) {
                        if (adb_handler[i].handler != 0) {
-                               printk(KERN_ERR
-                                      "Two handlers for ADB device %d\n",
+                               pr_err("Two handlers for ADB device %d\n",
                                       default_id);
                                continue;
                        }
@@ -535,10 +533,10 @@ adb_input(unsigned char *buf, int nb, int autopoll)
                
        id = buf[0] >> 4;
        if (dump_adb_input) {
-               printk(KERN_INFO "adb packet: ");
+               pr_info("adb packet: ");
                for (i = 0; i < nb; ++i)
-                       printk(" %x", buf[i]);
-               printk(", id = %d\n", id);
+                       pr_cont(" %x", buf[i]);
+               pr_cont(", id = %d\n", id);
        }
        write_lock_irqsave(&adb_handler_lock, flags);
        handler = adb_handler[id].handler;
@@ -884,7 +882,7 @@ static void __init
 adbdev_init(void)
 {
        if (register_chrdev(ADB_MAJOR, "adb", &adb_fops)) {
-               printk(KERN_ERR "adb: unable to get major %d\n", ADB_MAJOR);
+               pr_err("adb: unable to get major %d\n", ADB_MAJOR);
                return;
        }
 
index e091193104f741b724aaaf02e1a97037bf60c27f..a261892c03b3a487b0ef348a8a7fd8d4f2c9c4e1 100644 (file)
@@ -268,7 +268,7 @@ adbhid_keyboard_input(unsigned char *data, int nb, int apoll)
        int id = (data[0] >> 4) & 0x0f;
 
        if (!adbhid[id]) {
-               printk(KERN_ERR "ADB HID on ID %d not yet registered, packet %#02x, %#02x, %#02x, %#02x\n",
+               pr_err("ADB HID on ID %d not yet registered, packet %#02x, %#02x, %#02x, %#02x\n",
                       id, data[0], data[1], data[2], data[3]);
                return;
        }
@@ -320,8 +320,7 @@ adbhid_input_keycode(int id, int scancode, int repeat)
                                        ahid->flags &= ~FLAG_CAPSLOCK_TRANSLATE;
                                }
                        } else {
-                               printk(KERN_INFO "Spurious caps lock event "
-                                                "(scancode 0xff).\n");
+                               pr_info("Spurious caps lock event (scancode 0xff).\n");
                        }
                }
        }
@@ -397,8 +396,8 @@ adbhid_input_keycode(int id, int scancode, int repeat)
                input_report_key(adbhid[id]->input, key, !up_flag);
                input_sync(adbhid[id]->input);
        } else
-               printk(KERN_INFO "Unhandled ADB key (scancode %#02x) %s.\n", keycode,
-                      up_flag ? "released" : "pressed");
+               pr_info("Unhandled ADB key (scancode %#02x) %s.\n", keycode,
+                       up_flag ? "released" : "pressed");
 
 }
 
@@ -408,7 +407,7 @@ adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
        int id = (data[0] >> 4) & 0x0f;
 
        if (!adbhid[id]) {
-               printk(KERN_ERR "ADB HID on ID %d not yet registered\n", id);
+               pr_err("ADB HID on ID %d not yet registered\n", id);
                return;
        }
 
@@ -506,7 +505,7 @@ adbhid_buttons_input(unsigned char *data, int nb, int autopoll)
        int id = (data[0] >> 4) & 0x0f;
 
        if (!adbhid[id]) {
-               printk(KERN_ERR "ADB HID on ID %d not yet registered\n", id);
+               pr_err("ADB HID on ID %d not yet registered\n", id);
                return;
        }
 
@@ -534,8 +533,8 @@ adbhid_buttons_input(unsigned char *data, int nb, int autopoll)
                        break;
 
                default:
-                       printk(KERN_INFO "Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
-                              data[0], data[1], data[2], data[3]);
+                       pr_info("Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
+                               data[0], data[1], data[2], data[3]);
                        break;
                }
          }
@@ -609,14 +608,14 @@ adbhid_buttons_input(unsigned char *data, int nb, int autopoll)
                                break;
 
                        default:
-                               printk(KERN_INFO "Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
-                                      data[0], data[1], data[2], data[3]);
+                               pr_info("Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
+                                       data[0], data[1], data[2], data[3]);
                                break;
                        }
                        break;
                default:
-                       printk(KERN_INFO "Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
-                              data[0], data[1], data[2], data[3]);
+                       pr_info("Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
+                               data[0], data[1], data[2], data[3]);
                        break;
                }
          }
@@ -760,7 +759,7 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
        int i;
 
        if (adbhid[id]) {
-               printk(KERN_ERR "Trying to reregister ADB HID on ID %d\n", id);
+               pr_err("Trying to reregister ADB HID on ID %d\n", id);
                return -EEXIST;
        }
 
@@ -799,24 +798,24 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
 
                memcpy(hid->keycode, adb_to_linux_keycodes, sizeof(adb_to_linux_keycodes));
 
-               printk(KERN_INFO "Detected ADB keyboard, type ");
+               pr_info("Detected ADB keyboard, type ");
                switch (original_handler_id) {
                default:
-                       printk("<unknown>.\n");
+                       pr_cont("<unknown>.\n");
                        input_dev->id.version = ADB_KEYBOARD_UNKNOWN;
                        break;
 
                case 0x01: case 0x02: case 0x03: case 0x06: case 0x08:
                case 0x0C: case 0x10: case 0x18: case 0x1B: case 0x1C:
                case 0xC0: case 0xC3: case 0xC6:
-                       printk("ANSI.\n");
+                       pr_cont("ANSI.\n");
                        input_dev->id.version = ADB_KEYBOARD_ANSI;
                        break;
 
                case 0x04: case 0x05: case 0x07: case 0x09: case 0x0D:
                case 0x11: case 0x14: case 0x19: case 0x1D: case 0xC1:
                case 0xC4: case 0xC7:
-                       printk("ISO, swapping keys.\n");
+                       pr_cont("ISO, swapping keys.\n");
                        input_dev->id.version = ADB_KEYBOARD_ISO;
                        i = hid->keycode[10];
                        hid->keycode[10] = hid->keycode[50];
@@ -825,7 +824,7 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
 
                case 0x12: case 0x15: case 0x16: case 0x17: case 0x1A:
                case 0x1E: case 0xC2: case 0xC5: case 0xC8: case 0xC9:
-                       printk("JIS.\n");
+                       pr_cont("JIS.\n");
                        input_dev->id.version = ADB_KEYBOARD_JIS;
                        break;
                }
@@ -884,7 +883,7 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
                /* else fall through */
 
        default:
-               printk(KERN_INFO "Trying to register unknown ADB device to input layer.\n");
+               pr_info("Trying to register unknown ADB device to input layer.\n");
                err = -ENODEV;
                goto fail;
        }
@@ -1073,12 +1072,12 @@ adbhid_probe(void)
                            (req.reply[1] == 0x4b) && (req.reply[2] == 0x4f) &&
                            (req.reply[3] == 0x49) && (req.reply[4] == 0x54)) {
                                if (adb_try_handler_change(id, 0x42)) {
-                                       printk("\nADB MacAlly 2-button mouse at %d, handler set to 0x42", id);
+                                       pr_cont("\nADB MacAlly 2-button mouse at %d, handler set to 0x42", id);
                                        mouse_kind = ADBMOUSE_MACALLY2;
                                }
                        }
                }
-               printk("\n");
+               pr_cont("\n");
 
                adb_get_infos(id, &default_id, &cur_handler_id);
                reg |= adbhid_input_reregister(id, default_id, org_handler_id,
@@ -1093,12 +1092,12 @@ init_trackpad(int id)
        struct adb_request req;
        unsigned char r1_buffer[8];
 
-       printk(" (trackpad)");
+       pr_cont(" (trackpad)");
 
        adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1,
                    ADB_READREG(id,1));
        if (req.reply_len < 8)
-           printk("bad length for reg. 1\n");
+           pr_cont("bad length for reg. 1\n");
        else
        {
            memcpy(r1_buffer, &req.reply[1], 8);
@@ -1146,7 +1145,7 @@ init_trackball(int id)
 {
        struct adb_request req;
 
-       printk(" (trackman/mouseman)");
+       pr_cont(" (trackman/mouseman)");
 
        adb_request(&req, NULL, ADBREQ_SYNC, 3,
        ADB_WRITEREG(id,1), 00,0x81);
@@ -1178,7 +1177,7 @@ init_turbomouse(int id)
 {
        struct adb_request req;
 
-        printk(" (TurboMouse 5)");
+       pr_cont(" (TurboMouse 5)");
 
        adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(id));
 
@@ -1214,7 +1213,7 @@ init_microspeed(int id)
 {
        struct adb_request req;
 
-        printk(" (Microspeed/MacPoint or compatible)");
+       pr_cont(" (Microspeed/MacPoint or compatible)");
 
        adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(id));
 
@@ -1254,7 +1253,7 @@ init_ms_a3(int id)
 {
        struct adb_request req;
 
-       printk(" (Mouse Systems A3 Mouse, or compatible)");
+       pr_cont(" (Mouse Systems A3 Mouse, or compatible)");
        adb_request(&req, NULL, ADBREQ_SYNC, 3,
        ADB_WRITEREG(id, 0x2),
            0x00,
index 2edae7dfcab25407c4c677bcfad6e4f3bdf4e57f..fe248f6a30c1ac0ebe5907ab60194cf45c018544 100644 (file)
@@ -91,7 +91,7 @@ static int ams_input_enable(void)
                return error;
        }
 
-       joystick = 1;
+       joystick = true;
 
        return 0;
 }
@@ -104,7 +104,7 @@ static void ams_input_disable(void)
                ams_info.idev = NULL;
        }
 
-       joystick = 0;
+       joystick = false;
 }
 
 static ssize_t ams_input_show_joystick(struct device *dev,
index f433521a6f9d91d9ad6c5563866b3b3f30ee09ac..d7cd5afa38cd16ffd3d2906d5dfc957ec1369b2f 100644 (file)
@@ -230,7 +230,7 @@ static void update_fans_speed (struct thermostat *th)
 
        /* we don't care about local sensor, so we start at sensor 1 */
        for (i = 1; i < 3; i++) {
-               int started = 0;
+               bool started = false;
                int fan_number = (th->type == ADT7460 && i == 2);
                int var = th->temps[i] - th->limits[i];
 
@@ -243,7 +243,7 @@ static void update_fans_speed (struct thermostat *th)
                        if (abs(var - th->last_var[fan_number]) < 2)
                                continue;
 
-                       started = 1;
+                       started = true;
                        new_speed = fan_speed + ((var-1)*step);
 
                        if (new_speed < fan_speed)
index 89ed51571b62b003a69e17c0e66bb346e97b01ae..50ada02ae75d7deb4d9bd4af53b93d262653de73 100644 (file)
@@ -137,7 +137,7 @@ void pmu_backlight_set_sleep(int sleep)
 }
 #endif /* CONFIG_PM */
 
-void __init pmu_backlight_init()
+void __init pmu_backlight_init(void)
 {
        struct backlight_properties props;
        struct backlight_device *bd;
index 96d16fca68b2469f5a73c57369deaac3ed04e8ec..fec91db1142ec298de407917437f0db0072e086f 100644 (file)
@@ -96,14 +96,14 @@ static int cpu_last_target;
 static struct wf_pid_state backside_pid;
 static int backside_tick;
 static struct wf_pid_state slots_pid;
-static int slots_started;
+static bool slots_started;
 static struct wf_pid_state drive_bay_pid;
 static int drive_bay_tick;
 
 static int nr_cores;
 static int have_all_controls;
 static int have_all_sensors;
-static int started;
+static bool started;
 
 static int failure_state;
 #define FAILURE_SENSOR         1
@@ -462,7 +462,7 @@ static void slots_fan_tick(void)
                /* first time; initialize things */
                printk(KERN_INFO "windfarm: Slots control loop started.\n");
                wf_pid_init(&slots_pid, &slots_param);
-               slots_started = 1;
+               slots_started = true;
        }
 
        err = slots_power->ops->get_value(slots_power, &power);
@@ -506,7 +506,7 @@ static void pm112_tick(void)
        int i, last_failure;
 
        if (!started) {
-               started = 1;
+               started = true;
                printk(KERN_INFO "windfarm: CPUs control loops started.\n");
                for (i = 0; i < nr_cores; ++i) {
                        if (create_cpu_loop(i) < 0) {
index b350fb86ff08c93f9bc11c43d642d4c3feed765d..4d72d8f58cb6388d3e7f89111273d77d40816823 100644 (file)
@@ -246,7 +246,8 @@ enum {
 static struct wf_control *controls[N_CONTROLS] = {};
 
 /* Set to kick the control loop into life */
-static int pm121_all_controls_ok, pm121_all_sensors_ok, pm121_started;
+static int pm121_all_controls_ok, pm121_all_sensors_ok;
+static bool pm121_started;
 
 enum {
        FAILURE_FAN             = 1 << 0,
@@ -806,7 +807,7 @@ static void pm121_tick(void)
                        pm121_create_sys_fans(i);
 
                pm121_create_cpu_fans();
-               pm121_started = 1;
+               pm121_started = true;
        }
 
        /* skipping ticks */
index e88cfb36a74d139b035574a9b078e16a13f7d35b..833021508c05c879b2a8c929105bb2e90b0353cd 100644 (file)
@@ -611,7 +611,7 @@ static void pm72_tick(void)
        int i, last_failure;
 
        if (!started) {
-               started = 1;
+               started = true;
                printk(KERN_INFO "windfarm: CPUs control loops started.\n");
                for (i = 0; i < nr_chips; ++i) {
                        if (cpu_setup_pid(i) < 0) {
index 93faf298a3c56e39a5703eb014ea1512af06fc28..d9ea45581b9e879350a0b791f9f8c28ea07cc856 100644 (file)
@@ -140,7 +140,8 @@ static struct wf_control *fan_system;
 static struct wf_control *cpufreq_clamp;
 
 /* Set to kick the control loop into life */
-static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok, wf_smu_started;
+static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok;
+static bool wf_smu_started;
 
 /* Failure handling.. could be nicer */
 #define FAILURE_FAN            0x01
@@ -549,7 +550,7 @@ static void wf_smu_tick(void)
                DBG("wf: creating control loops !\n");
                wf_smu_create_sys_fans();
                wf_smu_create_cpu_fans();
-               wf_smu_started = 1;
+               wf_smu_started = true;
        }
 
        /* Skipping ticks */
index 81fdf40c5b82b6ee460de4008cf7a661ebc35de7..7fd73dcb2b0a43f1ce50407d4485e033e0862c2b 100644 (file)
@@ -75,7 +75,8 @@ static struct wf_control *fan_slots;
 static struct wf_control *cpufreq_clamp;
 
 /* Set to kick the control loop into life */
-static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok, wf_smu_started;
+static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok;
+static bool wf_smu_started;
 static bool wf_smu_overtemp;
 
 /* Failure handling.. could be nicer */
@@ -467,7 +468,7 @@ static void wf_smu_tick(void)
                wf_smu_create_drive_fans();
                wf_smu_create_slots_fans();
                wf_smu_create_cpu_fans();
-               wf_smu_started = 1;
+               wf_smu_started = true;
        }
 
        /* Skipping ticks */
index a0cd9c7f98351553625b62f63f5dcd03c8b95df5..9ce87cc0597f086204a8f8fc078e7344776f2322 100644 (file)
@@ -514,7 +514,7 @@ static void rm31_tick(void)
        int i, last_failure;
 
        if (!started) {
-               started = 1;
+               started = true;
                printk(KERN_INFO "windfarm: CPUs control loops started.\n");
                for (i = 0; i < nr_chips; ++i) {
                        if (cpu_setup_pid(i) < 0) {
index 6722073e339baea8863db71c217c2565212aa52a..03605f8fc0dc94490f9ee76fb69ec68c22528ec7 100644 (file)
@@ -512,5 +512,6 @@ source "drivers/misc/mic/Kconfig"
 source "drivers/misc/genwqe/Kconfig"
 source "drivers/misc/echo/Kconfig"
 source "drivers/misc/cxl/Kconfig"
+source "drivers/misc/ocxl/Kconfig"
 source "drivers/misc/cardreader/Kconfig"
 endmenu
index 8d8cc096063bde0a75b9c0854dcbdeb787630479..c3c8624f4d9506efe0f21168a3ebf7c0ae4536ee 100644 (file)
@@ -55,7 +55,8 @@ obj-$(CONFIG_CXL_BASE)                += cxl/
 obj-$(CONFIG_ASPEED_LPC_CTRL)  += aspeed-lpc-ctrl.o
 obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o
 obj-$(CONFIG_PCI_ENDPOINT_TEST)        += pci_endpoint_test.o
-obj-$(CONFIG_MISC_RTSX)        += cardreader/
+obj-$(CONFIG_OCXL)             += ocxl/
+obj-$(CONFIG_MISC_RTSX)                += cardreader/
 
 lkdtm-$(CONFIG_LKDTM)          += lkdtm_core.o
 lkdtm-$(CONFIG_LKDTM)          += lkdtm_bugs.o
index 12a41b2753f05721350982a5041d292ed5149d08..7ff315ad369281a5d166bd7563e45763ed0ea459 100644 (file)
@@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
        ctx->pid = NULL; /* Set in start work ioctl */
        mutex_init(&ctx->mapping_lock);
        ctx->mapping = NULL;
+       ctx->tidr = 0;
+       ctx->assign_tidr = false;
 
        if (cxl_is_power8()) {
                spin_lock_init(&ctx->sste_lock);
index a798c2ccd67d37ac147f78f23e6e6521b83f0e1e..4f015da78f283952ad506ae2ff2bf39297ef3dff 100644 (file)
@@ -630,6 +630,9 @@ struct cxl_context {
        struct list_head extra_irq_contexts;
 
        struct mm_struct *mm;
+
+       u16 tidr;
+       bool assign_tidr;
 };
 
 struct cxl_irq_info;
index dc9bc1807fdfa52aede0748308d16f5c36c6c95f..30ccba436b3b1cada1dd838632c4b8167769319f 100644 (file)
@@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task,
                 */
                attr->pid = mm->context.id;
                mmput(mm);
+               attr->tid = task->thread.tidr;
        } else {
                attr->pid = 0;
+               attr->tid = 0;
        }
-       attr->tid = 0;
        return 0;
 }
 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
index 90341ccda9bd3bf682adb4d44b8532d3dbf89271..0162516f5e57b1f5297f1a443da849f69407f91e 100644 (file)
@@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
         * flags are set it's invalid
         */
        if (work.reserved1 || work.reserved2 || work.reserved3 ||
-           work.reserved4 || work.reserved5 || work.reserved6 ||
+           work.reserved4 || work.reserved5 ||
            (work.flags & ~CXL_START_WORK_ALL)) {
                rc = -EINVAL;
                goto out;
@@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
                rc =  -EINVAL;
                goto out;
        }
+
        if ((rc = afu_register_irqs(ctx, work.num_interrupts)))
                goto out;
 
        if (work.flags & CXL_START_WORK_AMR)
                amr = work.amr & mfspr(SPRN_UAMOR);
 
+       if (work.flags & CXL_START_WORK_TID)
+               ctx->assign_tidr = true;
+
        ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
 
        /*
@@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
                goto out;
        }
 
-       ctx->status = STARTED;
        rc = 0;
+       if (work.flags & CXL_START_WORK_TID) {
+               work.tid = ctx->tidr;
+               if (copy_to_user(uwork, &work, sizeof(work)))
+                       rc = -EFAULT;
+       }
+
+       ctx->status = STARTED;
+
 out:
        mutex_unlock(&ctx->status_mutex);
        return rc;
index 02b6b45b4c204d8eb166c613ec560b550722ab1b..1b3d7c65ea3fe2a7f22307630302e29cd69f7b66 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/uaccess.h>
 #include <linux/delay.h>
 #include <asm/synch.h>
+#include <asm/switch_to.h>
 #include <misc/cxl-base.h>
 
 #include "cxl.h"
@@ -655,6 +656,7 @@ static void update_ivtes_directed(struct cxl_context *ctx)
 static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
 {
        u32 pid;
+       int rc;
 
        cxl_assign_psn_space(ctx);
 
@@ -673,7 +675,16 @@ static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
                pid = ctx->mm->context.id;
        }
 
-       ctx->elem->common.tid = 0;
+       /* Assign a unique TIDR (thread id) for the current thread */
+       if (!(ctx->tidr) && (ctx->assign_tidr)) {
+               rc = set_thread_tidr(current);
+               if (rc)
+                       return -ENODEV;
+               ctx->tidr = current->thread.tidr;
+               pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr);
+       }
+
+       ctx->elem->common.tid = cpu_to_be32(ctx->tidr);
        ctx->elem->common.pid = cpu_to_be32(pid);
 
        ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
index 19969ee86d6f781c64f2acf632781a600c647c0d..758842f65a1b372ea8aecf48fe8a58d738dba0d7 100644 (file)
@@ -125,8 +125,6 @@ static const struct pci_device_id cxl_pci_tbl[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), },
        { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0623), },
        { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0628), },
-       { PCI_DEVICE_CLASS(0x120000, ~0), },
-
        { }
 };
 MODULE_DEVICE_TABLE(pci, cxl_pci_tbl);
diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
new file mode 100644 (file)
index 0000000..4bbdb0d
--- /dev/null
@@ -0,0 +1,31 @@
+#
+# Open Coherent Accelerator (OCXL) compatible devices
+#
+
+config OCXL_BASE
+       bool
+       default n
+       select PPC_COPRO_BASE
+
+config OCXL
+       tristate "OpenCAPI coherent accelerator support"
+       depends on PPC_POWERNV && PCI && EEH
+       select OCXL_BASE
+       default m
+       help
+         Select this option to enable the ocxl driver for Open
+         Coherent Accelerator Processor Interface (OpenCAPI) devices.
+
+         OpenCAPI allows FPGA and ASIC accelerators to be coherently
+         attached to a CPU over an OpenCAPI link.
+
+         The ocxl driver enables userspace programs to access these
+         accelerators through devices in /dev/ocxl/.
+
+         For more information, see http://opencapi.org.
+
+         This is not to be confused with the support for IBM CAPI
+         accelerators (CONFIG_CXL), which are PCI-based instead of a
+         dedicated OpenCAPI link, and don't follow the same protocol.
+
+         If unsure, say N.
diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile
new file mode 100644 (file)
index 0000000..5229dcd
--- /dev/null
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0+
+ccflags-$(CONFIG_PPC_WERROR)   += -Werror
+
+ocxl-y                         += main.o pci.o config.o file.o pasid.o
+ocxl-y                         += link.o context.o afu_irq.o sysfs.o trace.o
+obj-$(CONFIG_OCXL)             += ocxl.o
+
+# For tracepoints to include our trace.h from tracepoint infrastructure:
+CFLAGS_trace.o := -I$(src)
+
+# ccflags-y += -DDEBUG
diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
new file mode 100644 (file)
index 0000000..e70cfa2
--- /dev/null
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/interrupt.h>
+#include <linux/eventfd.h>
+#include <asm/pnv-ocxl.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+struct afu_irq {
+       int id;
+       int hw_irq;
+       unsigned int virq;
+       char *name;
+       u64 trigger_page;
+       struct eventfd_ctx *ev_ctx;
+};
+
+static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset)
+{
+       return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT;
+}
+
+static u64 irq_id_to_offset(struct ocxl_context *ctx, int id)
+{
+       return ctx->afu->irq_base_offset + (id << PAGE_SHIFT);
+}
+
+static irqreturn_t afu_irq_handler(int virq, void *data)
+{
+       struct afu_irq *irq = (struct afu_irq *) data;
+
+       trace_ocxl_afu_irq_receive(virq);
+       if (irq->ev_ctx)
+               eventfd_signal(irq->ev_ctx, 1);
+       return IRQ_HANDLED;
+}
+
+static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq)
+{
+       int rc;
+
+       irq->virq = irq_create_mapping(NULL, irq->hw_irq);
+       if (!irq->virq) {
+               pr_err("irq_create_mapping failed\n");
+               return -ENOMEM;
+       }
+       pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq);
+
+       irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq);
+       if (!irq->name) {
+               irq_dispose_mapping(irq->virq);
+               return -ENOMEM;
+       }
+
+       rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq);
+       if (rc) {
+               kfree(irq->name);
+               irq->name = NULL;
+               irq_dispose_mapping(irq->virq);
+               pr_err("request_irq failed: %d\n", rc);
+               return rc;
+       }
+       return 0;
+}
+
+static void release_afu_irq(struct afu_irq *irq)
+{
+       free_irq(irq->virq, irq);
+       irq_dispose_mapping(irq->virq);
+       kfree(irq->name);
+}
+
+int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset)
+{
+       struct afu_irq *irq;
+       int rc;
+
+       irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL);
+       if (!irq)
+               return -ENOMEM;
+
+       /*
+        * We limit the number of afu irqs per context and per link to
+        * avoid a single process or user depleting the pool of IPIs
+        */
+
+       mutex_lock(&ctx->irq_lock);
+
+       irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT,
+                       GFP_KERNEL);
+       if (irq->id < 0) {
+               rc = -ENOSPC;
+               goto err_unlock;
+       }
+
+       rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq,
+                               &irq->trigger_page);
+       if (rc)
+               goto err_idr;
+
+       rc = setup_afu_irq(ctx, irq);
+       if (rc)
+               goto err_alloc;
+
+       *irq_offset = irq_id_to_offset(ctx, irq->id);
+
+       trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq,
+                               *irq_offset);
+       mutex_unlock(&ctx->irq_lock);
+       return 0;
+
+err_alloc:
+       ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+err_idr:
+       idr_remove(&ctx->irq_idr, irq->id);
+err_unlock:
+       mutex_unlock(&ctx->irq_lock);
+       kfree(irq);
+       return rc;
+}
+
+static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx)
+{
+       trace_ocxl_afu_irq_free(ctx->pasid, irq->id);
+       if (ctx->mapping)
+               unmap_mapping_range(ctx->mapping,
+                               irq_id_to_offset(ctx, irq->id),
+                               1 << PAGE_SHIFT, 1);
+       release_afu_irq(irq);
+       if (irq->ev_ctx)
+               eventfd_ctx_put(irq->ev_ctx);
+       ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+       kfree(irq);
+}
+
+int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset)
+{
+       struct afu_irq *irq;
+       int id = irq_offset_to_id(ctx, irq_offset);
+
+       mutex_lock(&ctx->irq_lock);
+
+       irq = idr_find(&ctx->irq_idr, id);
+       if (!irq) {
+               mutex_unlock(&ctx->irq_lock);
+               return -EINVAL;
+       }
+       idr_remove(&ctx->irq_idr, irq->id);
+       afu_irq_free(irq, ctx);
+       mutex_unlock(&ctx->irq_lock);
+       return 0;
+}
+
+void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
+{
+       struct afu_irq *irq;
+       int id;
+
+       mutex_lock(&ctx->irq_lock);
+       idr_for_each_entry(&ctx->irq_idr, irq, id)
+               afu_irq_free(irq, ctx);
+       mutex_unlock(&ctx->irq_lock);
+}
+
+int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd)
+{
+       struct afu_irq *irq;
+       struct eventfd_ctx *ev_ctx;
+       int rc = 0, id = irq_offset_to_id(ctx, irq_offset);
+
+       mutex_lock(&ctx->irq_lock);
+       irq = idr_find(&ctx->irq_idr, id);
+       if (!irq) {
+               rc = -EINVAL;
+               goto unlock;
+       }
+
+       ev_ctx = eventfd_ctx_fdget(eventfd);
+       if (IS_ERR(ev_ctx)) {
+               rc = -EINVAL;
+               goto unlock;
+       }
+
+       irq->ev_ctx = ev_ctx;
+unlock:
+       mutex_unlock(&ctx->irq_lock);
+       return rc;
+}
+
+u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset)
+{
+       struct afu_irq *irq;
+       int id = irq_offset_to_id(ctx, irq_offset);
+       u64 addr = 0;
+
+       mutex_lock(&ctx->irq_lock);
+       irq = idr_find(&ctx->irq_idr, id);
+       if (irq)
+               addr = irq->trigger_page;
+       mutex_unlock(&ctx->irq_lock);
+       return addr;
+}
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
new file mode 100644 (file)
index 0000000..2e30de9
--- /dev/null
@@ -0,0 +1,723 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/pci.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl.h>
+#include <misc/ocxl-config.h>
+
+#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
+#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
+
+#define OCXL_DVSEC_AFU_IDX_MASK              GENMASK(5, 0)
+#define OCXL_DVSEC_ACTAG_MASK                GENMASK(11, 0)
+#define OCXL_DVSEC_PASID_MASK                GENMASK(19, 0)
+#define OCXL_DVSEC_PASID_LOG_MASK            GENMASK(4, 0)
+
+#define OCXL_DVSEC_TEMPL_VERSION         0x0
+#define OCXL_DVSEC_TEMPL_NAME            0x4
+#define OCXL_DVSEC_TEMPL_AFU_VERSION     0x1C
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL     0x20
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ  0x28
+#define OCXL_DVSEC_TEMPL_MMIO_PP         0x30
+#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ      0x38
+#define OCXL_DVSEC_TEMPL_MEM_SZ          0x3C
+#define OCXL_DVSEC_TEMPL_WWID            0x40
+
+#define OCXL_MAX_AFU_PER_FUNCTION 64
+#define OCXL_TEMPL_LEN            0x58
+#define OCXL_TEMPL_NAME_LEN       24
+#define OCXL_CFG_TIMEOUT     3
+
+static int find_dvsec(struct pci_dev *dev, int dvsec_id)
+{
+       int vsec = 0;
+       u16 vendor, id;
+
+       while ((vsec = pci_find_next_ext_capability(dev, vsec,
+                                                   OCXL_EXT_CAP_ID_DVSEC))) {
+               pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+                               &vendor);
+               pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+               if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+                       return vsec;
+       }
+       return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+       int vsec = 0;
+       u16 vendor, id;
+       u8 idx;
+
+       while ((vsec = pci_find_next_ext_capability(dev, vsec,
+                                                   OCXL_EXT_CAP_ID_DVSEC))) {
+               pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+                               &vendor);
+               pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+
+               if (vendor == PCI_VENDOR_ID_IBM &&
+                       id == OCXL_DVSEC_AFU_CTRL_ID) {
+                       pci_read_config_byte(dev,
+                                       vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+                                       &idx);
+                       if (idx == afu_idx)
+                               return vsec;
+               }
+       }
+       return 0;
+}
+
+static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+       u16 val;
+       int pos;
+
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID);
+       if (!pos) {
+               /*
+                * PASID capability is not mandatory, but there
+                * shouldn't be any AFU
+                */
+               dev_dbg(&dev->dev, "Function doesn't require any PASID\n");
+               fn->max_pasid_log = -1;
+               goto out;
+       }
+       pci_read_config_word(dev, pos + PCI_PASID_CAP, &val);
+       fn->max_pasid_log = EXTRACT_BITS(val, 8, 12);
+
+out:
+       dev_dbg(&dev->dev, "PASID capability:\n");
+       dev_dbg(&dev->dev, "  Max PASID log = %d\n", fn->max_pasid_log);
+       return 0;
+}
+
+static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+       int pos;
+
+       pos = find_dvsec(dev, OCXL_DVSEC_TL_ID);
+       if (!pos && PCI_FUNC(dev->devfn) == 0) {
+               dev_err(&dev->dev, "Can't find TL DVSEC\n");
+               return -ENODEV;
+       }
+       if (pos && PCI_FUNC(dev->devfn) != 0) {
+               dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n");
+               return -ENODEV;
+       }
+       fn->dvsec_tl_pos = pos;
+       return 0;
+}
+
+static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+       int pos, afu_present;
+       u32 val;
+
+       pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID);
+       if (!pos) {
+               dev_err(&dev->dev, "Can't find function DVSEC\n");
+               return -ENODEV;
+       }
+       fn->dvsec_function_pos = pos;
+
+       pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+       afu_present = EXTRACT_BIT(val, 31);
+       if (!afu_present) {
+               fn->max_afu_index = -1;
+               dev_dbg(&dev->dev, "Function doesn't define any AFU\n");
+               goto out;
+       }
+       fn->max_afu_index = EXTRACT_BITS(val, 24, 29);
+
+out:
+       dev_dbg(&dev->dev, "Function DVSEC:\n");
+       dev_dbg(&dev->dev, "  Max AFU index = %d\n", fn->max_afu_index);
+       return 0;
+}
+
+static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+       int pos;
+
+       if (fn->max_afu_index < 0) {
+               fn->dvsec_afu_info_pos = -1;
+               return 0;
+       }
+
+       pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID);
+       if (!pos) {
+               dev_err(&dev->dev, "Can't find AFU information DVSEC\n");
+               return -ENODEV;
+       }
+       fn->dvsec_afu_info_pos = pos;
+       return 0;
+}
+
+static int read_dvsec_vendor(struct pci_dev *dev)
+{
+       int pos;
+       u32 cfg, tlx, dlx;
+
+       /*
+        * vendor specific DVSEC is optional
+        *
+        * It's currently only used on function 0 to specify the
+        * version of some logic blocks. Some older images may not
+        * even have it so we ignore any errors
+        */
+       if (PCI_FUNC(dev->devfn) != 0)
+               return 0;
+
+       pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
+       if (!pos)
+               return 0;
+
+       pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg);
+       pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx);
+       pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx);
+
+       dev_dbg(&dev->dev, "Vendor specific DVSEC:\n");
+       dev_dbg(&dev->dev, "  CFG version = 0x%x\n", cfg);
+       dev_dbg(&dev->dev, "  TLX version = 0x%x\n", tlx);
+       dev_dbg(&dev->dev, "  DLX version = 0x%x\n", dlx);
+       return 0;
+}
+
+static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+       if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) {
+               dev_err(&dev->dev,
+                       "AFUs are defined but no PASIDs are requested\n");
+               return -EINVAL;
+       }
+
+       if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) {
+               dev_err(&dev->dev,
+                       "Max AFU index out of architectural limit (%d vs %d)\n",
+                       fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+       int rc;
+
+       rc = read_pasid(dev, fn);
+       if (rc) {
+               dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc);
+               return -ENODEV;
+       }
+
+       rc = read_dvsec_tl(dev, fn);
+       if (rc) {
+               dev_err(&dev->dev,
+                       "Invalid Transaction Layer DVSEC configuration: %d\n",
+                       rc);
+               return -ENODEV;
+       }
+
+       rc = read_dvsec_function(dev, fn);
+       if (rc) {
+               dev_err(&dev->dev,
+                       "Invalid Function DVSEC configuration: %d\n", rc);
+               return -ENODEV;
+       }
+
+       rc = read_dvsec_afu_info(dev, fn);
+       if (rc) {
+               dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc);
+               return -ENODEV;
+       }
+
+       rc = read_dvsec_vendor(dev);
+       if (rc) {
+               dev_err(&dev->dev,
+                       "Invalid vendor specific DVSEC configuration: %d\n",
+                       rc);
+               return -ENODEV;
+       }
+
+       rc = validate_function(dev, fn);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_function);
+
+static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn,
+                       int offset, u32 *data)
+{
+       u32 val;
+       unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+       int pos = fn->dvsec_afu_info_pos;
+
+       /* Protect 'data valid' bit */
+       if (EXTRACT_BIT(offset, 31)) {
+               dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n");
+               return -EINVAL;
+       }
+
+       pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset);
+       pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+       while (!EXTRACT_BIT(val, 31)) {
+               if (time_after_eq(jiffies, timeout)) {
+                       dev_err(&dev->dev,
+                               "Timeout while reading AFU info DVSEC (offset=%d)\n",
+                               offset);
+                       return -EBUSY;
+               }
+               cpu_relax();
+               pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+       }
+       pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data);
+       return 0;
+}
+
+int ocxl_config_check_afu_index(struct pci_dev *dev,
+                               struct ocxl_fn_config *fn, int afu_idx)
+{
+       u32 val;
+       int rc, templ_major, templ_minor, len;
+
+       pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+       rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val);
+       if (rc)
+               return rc;
+
+       /* AFU index map can have holes */
+       if (!val)
+               return 0;
+
+       templ_major = EXTRACT_BITS(val, 8, 15);
+       templ_minor = EXTRACT_BITS(val, 0, 7);
+       dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n",
+               templ_major, templ_minor);
+
+       len = EXTRACT_BITS(val, 16, 31);
+       if (len != OCXL_TEMPL_LEN) {
+               dev_warn(&dev->dev,
+                       "Unexpected template length in AFU information (%#x)\n",
+                       len);
+       }
+       return 1;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index);
+
+static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
+                       struct ocxl_afu_config *afu)
+{
+       int i, rc;
+       u32 val, *ptr;
+
+       BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN);
+       for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) {
+               rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val);
+               if (rc)
+                       return rc;
+               ptr = (u32 *) &afu->name[i];
+               *ptr = val;
+       }
+       afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */
+       return 0;
+}
+
+static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn,
+                       struct ocxl_afu_config *afu)
+{
+       int rc;
+       u32 val;
+
+       /*
+        * Global MMIO
+        */
+       rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val);
+       if (rc)
+               return rc;
+       afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2);
+       afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+       rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val);
+       if (rc)
+               return rc;
+       afu->global_mmio_offset += (u64) val << 32;
+
+       rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val);
+       if (rc)
+               return rc;
+       afu->global_mmio_size = val;
+
+       /*
+        * Per-process MMIO
+        */
+       rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val);
+       if (rc)
+               return rc;
+       afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2);
+       afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+       rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val);
+       if (rc)
+               return rc;
+       afu->pp_mmio_offset += (u64) val << 32;
+
+       rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val);
+       if (rc)
+               return rc;
+       afu->pp_mmio_stride = val;
+
+       return 0;
+}
+
+static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+       int pos;
+       u8 val8;
+       u16 val16;
+
+       pos = find_dvsec_afu_ctrl(dev, afu->idx);
+       if (!pos) {
+               dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n",
+                       afu->idx);
+               return -ENODEV;
+       }
+       afu->dvsec_afu_control_pos = pos;
+
+       pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8);
+       afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4);
+
+       pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16);
+       afu->actag_supported = EXTRACT_BITS(val16, 0, 11);
+       return 0;
+}
+
+static bool char_allowed(int c)
+{
+       /*
+        * Permitted Characters : Alphanumeric, hyphen, underscore, comma
+        */
+       if ((c >= 0x30 && c <= 0x39) /* digits */ ||
+               (c >= 0x41 && c <= 0x5A) /* upper case */ ||
+               (c >= 0x61 && c <= 0x7A) /* lower case */ ||
+               c == 0 /* NULL */ ||
+               c == 0x2D /* - */ ||
+               c == 0x5F /* _ */ ||
+               c == 0x2C /* , */)
+               return true;
+       return false;
+}
+
+static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+       int i;
+
+       if (!afu->name[0]) {
+               dev_err(&dev->dev, "Empty AFU name\n");
+               return -EINVAL;
+       }
+       for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) {
+               if (!char_allowed(afu->name[i])) {
+                       dev_err(&dev->dev,
+                               "Invalid character in AFU name\n");
+                       return -EINVAL;
+               }
+       }
+
+       if (afu->global_mmio_bar != 0 &&
+               afu->global_mmio_bar != 2 &&
+               afu->global_mmio_bar != 4) {
+               dev_err(&dev->dev, "Invalid global MMIO bar number\n");
+               return -EINVAL;
+       }
+       if (afu->pp_mmio_bar != 0 &&
+               afu->pp_mmio_bar != 2 &&
+               afu->pp_mmio_bar != 4) {
+               dev_err(&dev->dev, "Invalid per-process MMIO bar number\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn,
+                       struct ocxl_afu_config *afu, u8 afu_idx)
+{
+       int rc;
+       u32 val32;
+
+       /*
+        * First, we need to write the AFU idx for the AFU we want to
+        * access.
+        */
+       WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx);
+       afu->idx = afu_idx;
+       pci_write_config_byte(dev,
+                       fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+                       afu->idx);
+
+       rc = read_afu_name(dev, fn, afu);
+       if (rc)
+               return rc;
+
+       rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32);
+       if (rc)
+               return rc;
+       afu->version_major = EXTRACT_BITS(val32, 24, 31);
+       afu->version_minor = EXTRACT_BITS(val32, 16, 23);
+       afu->afuc_type = EXTRACT_BITS(val32, 14, 15);
+       afu->afum_type = EXTRACT_BITS(val32, 12, 13);
+       afu->profile = EXTRACT_BITS(val32, 0, 7);
+
+       rc = read_afu_mmio(dev, fn, afu);
+       if (rc)
+               return rc;
+
+       rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32);
+       if (rc)
+               return rc;
+       afu->log_mem_size = EXTRACT_BITS(val32, 0, 7);
+
+       rc = read_afu_control(dev, afu);
+       if (rc)
+               return rc;
+
+       dev_dbg(&dev->dev, "AFU configuration:\n");
+       dev_dbg(&dev->dev, "  name = %s\n", afu->name);
+       dev_dbg(&dev->dev, "  version = %d.%d\n", afu->version_major,
+               afu->version_minor);
+       dev_dbg(&dev->dev, "  global mmio bar = %hhu\n", afu->global_mmio_bar);
+       dev_dbg(&dev->dev, "  global mmio offset = %#llx\n",
+               afu->global_mmio_offset);
+       dev_dbg(&dev->dev, "  global mmio size = %#x\n", afu->global_mmio_size);
+       dev_dbg(&dev->dev, "  pp mmio bar = %hhu\n", afu->pp_mmio_bar);
+       dev_dbg(&dev->dev, "  pp mmio offset = %#llx\n", afu->pp_mmio_offset);
+       dev_dbg(&dev->dev, "  pp mmio stride = %#x\n", afu->pp_mmio_stride);
+       dev_dbg(&dev->dev, "  mem size (log) = %hhu\n", afu->log_mem_size);
+       dev_dbg(&dev->dev, "  pasid supported (log) = %u\n",
+               afu->pasid_supported_log);
+       dev_dbg(&dev->dev, "  actag supported = %u\n",
+               afu->actag_supported);
+
+       rc = validate_afu(dev, afu);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_afu);
+
+int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled,
+                       u16 *supported)
+{
+       int rc;
+
+       /*
+        * This is really a simple wrapper for the kernel API, to
+        * avoid an external driver using ocxl as a library to call
+        * platform-dependent code
+        */
+       rc = pnv_ocxl_get_actag(dev, base, enabled, supported);
+       if (rc) {
+               dev_err(&dev->dev, "Can't get actag for device: %d\n", rc);
+               return rc;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_get_actag_info);
+
+void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base,
+                       int actag_count)
+{
+       u16 val;
+
+       val = actag_count & OCXL_DVSEC_ACTAG_MASK;
+       pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val);
+
+       val = actag_base & OCXL_DVSEC_ACTAG_MASK;
+       pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_actag);
+
+int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count)
+{
+       return pnv_ocxl_get_pasid_count(dev, count);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info);
+
+void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
+                       u32 pasid_count_log)
+{
+       u8 val8;
+       u32 val32;
+
+       val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK;
+       pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8);
+
+       pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+                       &val32);
+       val32 &= ~OCXL_DVSEC_PASID_MASK;
+       val32 |= pasid_base & OCXL_DVSEC_PASID_MASK;
+       pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+                       val32);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_pasid);
+
+void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable)
+{
+       u8 val;
+
+       pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val);
+       if (enable)
+               val |= 1;
+       else
+               val &= 0xFE;
+       pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state);
+
+int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
+{
+       u32 val;
+       __be32 *be32ptr;
+       u8 timers;
+       int i, rc;
+       long recv_cap;
+       char *recv_rate;
+
+       /*
+        * Skip on function != 0, as the TL can only be defined on 0
+        */
+       if (PCI_FUNC(dev->devfn) != 0)
+               return 0;
+
+       recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
+       if (!recv_rate)
+               return -ENOMEM;
+       /*
+        * The spec defines 64 templates for messages in the
+        * Transaction Layer (TL).
+        *
+        * The host and device each support a subset, so we need to
+        * configure the transmitters on each side to send only
+        * templates the receiver understands, at a rate the receiver
+        * can process.  Per the spec, template 0 must be supported by
+        * everybody. That's the template which has been used by the
+        * host and device so far.
+        *
+        * The sending rate limit must be set before the template is
+        * enabled.
+        */
+
+       /*
+        * Device -> host
+        */
+       rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
+                               PNV_OCXL_TL_RATE_BUF_SIZE);
+       if (rc)
+               goto out;
+
+       for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+               be32ptr = (__be32 *) &recv_rate[i];
+               pci_write_config_dword(dev,
+                               tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
+                               be32_to_cpu(*be32ptr));
+       }
+       val = recv_cap >> 32;
+       pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
+       val = recv_cap & GENMASK(31, 0);
+       pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
+
+       /*
+        * Host -> device
+        */
+       for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+               pci_read_config_dword(dev,
+                               tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
+                               &val);
+               be32ptr = (__be32 *) &recv_rate[i];
+               *be32ptr = cpu_to_be32(val);
+       }
+       pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
+       recv_cap = (long) val << 32;
+       pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
+       recv_cap |= val;
+
+       rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
+                               PNV_OCXL_TL_RATE_BUF_SIZE);
+       if (rc)
+               goto out;
+
+       /*
+        * Opencapi commands needing to be retried are classified per
+        * the TL in 2 groups: short and long commands.
+        *
+        * The short back off timer it not used for now. It will be
+        * for opencapi 4.0.
+        *
+        * The long back off timer is typically used when an AFU hits
+        * a page fault but the NPU is already processing one. So the
+        * AFU needs to wait before it can resubmit. Having a value
+        * too low doesn't break anything, but can generate extra
+        * traffic on the link.
+        * We set it to 1.6 us for now. It's shorter than, but in the
+        * same order of magnitude as the time spent to process a page
+        * fault.
+        */
+       timers = 0x2 << 4; /* long timer = 1.6 us */
+       pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
+                       timers);
+
+       rc = 0;
+out:
+       kfree(recv_rate);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
+
+int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
+{
+       u32 val;
+       unsigned long timeout;
+
+       pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+                       &val);
+       if (EXTRACT_BIT(val, 20)) {
+               dev_err(&dev->dev,
+                       "Can't terminate PASID %#x, previous termination didn't complete\n",
+                       pasid);
+               return -EBUSY;
+       }
+
+       val &= ~OCXL_DVSEC_PASID_MASK;
+       val |= pasid & OCXL_DVSEC_PASID_MASK;
+       val |= BIT(20);
+       pci_write_config_dword(dev,
+                       afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+                       val);
+
+       timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+       pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+                       &val);
+       while (EXTRACT_BIT(val, 20)) {
+               if (time_after_eq(jiffies, timeout)) {
+                       dev_err(&dev->dev,
+                               "Timeout while waiting for AFU to terminate PASID %#x\n",
+                               pasid);
+                       return -EBUSY;
+               }
+               cpu_relax();
+               pci_read_config_dword(dev,
+                               afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+                               &val);
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_terminate_pasid);
+
+void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first,
+                       u32 tag_count)
+{
+       u32 val;
+
+       val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16;
+       val |= tag_count & OCXL_DVSEC_ACTAG_MASK;
+       pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG,
+                       val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_actag);
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
new file mode 100644 (file)
index 0000000..909e880
--- /dev/null
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include "trace.h"
+#include "ocxl_internal.h"
+
+struct ocxl_context *ocxl_context_alloc(void)
+{
+       return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
+}
+
+int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+               struct address_space *mapping)
+{
+       int pasid;
+
+       ctx->afu = afu;
+       mutex_lock(&afu->contexts_lock);
+       pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
+                       afu->pasid_base + afu->pasid_max, GFP_KERNEL);
+       if (pasid < 0) {
+               mutex_unlock(&afu->contexts_lock);
+               return pasid;
+       }
+       afu->pasid_count++;
+       mutex_unlock(&afu->contexts_lock);
+
+       ctx->pasid = pasid;
+       ctx->status = OPENED;
+       mutex_init(&ctx->status_mutex);
+       ctx->mapping = mapping;
+       mutex_init(&ctx->mapping_lock);
+       init_waitqueue_head(&ctx->events_wq);
+       mutex_init(&ctx->xsl_error_lock);
+       mutex_init(&ctx->irq_lock);
+       idr_init(&ctx->irq_idr);
+       /*
+        * Keep a reference on the AFU to make sure it's valid for the
+        * duration of the life of the context
+        */
+       ocxl_afu_get(afu);
+       return 0;
+}
+
+/*
+ * Callback for when a translation fault triggers an error
+ * data:       a pointer to the context which triggered the fault
+ * addr:       the address that triggered the error
+ * dsisr:      the value of the PPC64 dsisr register
+ */
+static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
+{
+       struct ocxl_context *ctx = (struct ocxl_context *) data;
+
+       mutex_lock(&ctx->xsl_error_lock);
+       ctx->xsl_error.addr = addr;
+       ctx->xsl_error.dsisr = dsisr;
+       ctx->xsl_error.count++;
+       mutex_unlock(&ctx->xsl_error_lock);
+
+       wake_up_all(&ctx->events_wq);
+}
+
+int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
+{
+       int rc;
+
+       mutex_lock(&ctx->status_mutex);
+       if (ctx->status != OPENED) {
+               rc = -EIO;
+               goto out;
+       }
+
+       rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
+                       current->mm->context.id, 0, amr, current->mm,
+                       xsl_fault_error, ctx);
+       if (rc)
+               goto out;
+
+       ctx->status = ATTACHED;
+out:
+       mutex_unlock(&ctx->status_mutex);
+       return rc;
+}
+
+static int map_afu_irq(struct vm_area_struct *vma, unsigned long address,
+               u64 offset, struct ocxl_context *ctx)
+{
+       u64 trigger_addr;
+
+       trigger_addr = ocxl_afu_irq_get_addr(ctx, offset);
+       if (!trigger_addr)
+               return VM_FAULT_SIGBUS;
+
+       vm_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT);
+       return VM_FAULT_NOPAGE;
+}
+
+static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address,
+               u64 offset, struct ocxl_context *ctx)
+{
+       u64 pp_mmio_addr;
+       int pasid_off;
+
+       if (offset >= ctx->afu->config.pp_mmio_stride)
+               return VM_FAULT_SIGBUS;
+
+       mutex_lock(&ctx->status_mutex);
+       if (ctx->status != ATTACHED) {
+               mutex_unlock(&ctx->status_mutex);
+               pr_debug("%s: Context not attached, failing mmio mmap\n",
+                       __func__);
+               return VM_FAULT_SIGBUS;
+       }
+
+       pasid_off = ctx->pasid - ctx->afu->pasid_base;
+       pp_mmio_addr = ctx->afu->pp_mmio_start +
+               pasid_off * ctx->afu->config.pp_mmio_stride +
+               offset;
+
+       vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT);
+       mutex_unlock(&ctx->status_mutex);
+       return VM_FAULT_NOPAGE;
+}
+
+static int ocxl_mmap_fault(struct vm_fault *vmf)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       struct ocxl_context *ctx = vma->vm_file->private_data;
+       u64 offset;
+       int rc;
+
+       offset = vmf->pgoff << PAGE_SHIFT;
+       pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__,
+               ctx->pasid, vmf->address, offset);
+
+       if (offset < ctx->afu->irq_base_offset)
+               rc = map_pp_mmio(vma, vmf->address, offset, ctx);
+       else
+               rc = map_afu_irq(vma, vmf->address, offset, ctx);
+       return rc;
+}
+
+static const struct vm_operations_struct ocxl_vmops = {
+       .fault = ocxl_mmap_fault,
+};
+
+static int check_mmap_afu_irq(struct ocxl_context *ctx,
+                       struct vm_area_struct *vma)
+{
+       /* only one page */
+       if (vma_pages(vma) != 1)
+               return -EINVAL;
+
+       /* check offset validty */
+       if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT))
+               return -EINVAL;
+
+       /*
+        * trigger page should only be accessible in write mode.
+        *
+        * It's a bit theoretical, as a page mmaped with only
+        * PROT_WRITE is currently readable, but it doesn't hurt.
+        */
+       if ((vma->vm_flags & VM_READ) || (vma->vm_flags & VM_EXEC) ||
+               !(vma->vm_flags & VM_WRITE))
+               return -EINVAL;
+       vma->vm_flags &= ~(VM_MAYREAD | VM_MAYEXEC);
+       return 0;
+}
+
+static int check_mmap_mmio(struct ocxl_context *ctx,
+                       struct vm_area_struct *vma)
+{
+       if ((vma_pages(vma) + vma->vm_pgoff) >
+               (ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT))
+               return -EINVAL;
+       return 0;
+}
+
+int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma)
+{
+       int rc;
+
+       if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset)
+               rc = check_mmap_mmio(ctx, vma);
+       else
+               rc = check_mmap_afu_irq(ctx, vma);
+       if (rc)
+               return rc;
+
+       vma->vm_flags |= VM_IO | VM_PFNMAP;
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       vma->vm_ops = &ocxl_vmops;
+       return 0;
+}
+
+int ocxl_context_detach(struct ocxl_context *ctx)
+{
+       struct pci_dev *dev;
+       int afu_control_pos;
+       enum ocxl_context_status status;
+       int rc;
+
+       mutex_lock(&ctx->status_mutex);
+       status = ctx->status;
+       ctx->status = CLOSED;
+       mutex_unlock(&ctx->status_mutex);
+       if (status != ATTACHED)
+               return 0;
+
+       dev = to_pci_dev(ctx->afu->fn->dev.parent);
+       afu_control_pos = ctx->afu->config.dvsec_afu_control_pos;
+
+       mutex_lock(&ctx->afu->afu_control_lock);
+       rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid);
+       mutex_unlock(&ctx->afu->afu_control_lock);
+       trace_ocxl_terminate_pasid(ctx->pasid, rc);
+       if (rc) {
+               /*
+                * If we timeout waiting for the AFU to terminate the
+                * pasid, then it's dangerous to clean up the Process
+                * Element entry in the SPA, as it may be referenced
+                * in the future by the AFU. In which case, we would
+                * checkstop because of an invalid PE access (FIR
+                * register 2, bit 42). So leave the PE
+                * defined. Caller shouldn't free the context so that
+                * PASID remains allocated.
+                *
+                * A link reset will be required to cleanup the AFU
+                * and the SPA.
+                */
+               if (rc == -EBUSY)
+                       return rc;
+       }
+       rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid);
+       if (rc) {
+               dev_warn(&ctx->afu->dev,
+                       "Couldn't remove PE entry cleanly: %d\n", rc);
+       }
+       return 0;
+}
+
+void ocxl_context_detach_all(struct ocxl_afu *afu)
+{
+       struct ocxl_context *ctx;
+       int tmp;
+
+       mutex_lock(&afu->contexts_lock);
+       idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
+               ocxl_context_detach(ctx);
+               /*
+                * We are force detaching - remove any active mmio
+                * mappings so userspace cannot interfere with the
+                * card if it comes back.  Easiest way to exercise
+                * this is to unbind and rebind the driver via sysfs
+                * while it is in use.
+                */
+               mutex_lock(&ctx->mapping_lock);
+               if (ctx->mapping)
+                       unmap_mapping_range(ctx->mapping, 0, 0, 1);
+               mutex_unlock(&ctx->mapping_lock);
+       }
+       mutex_unlock(&afu->contexts_lock);
+}
+
+void ocxl_context_free(struct ocxl_context *ctx)
+{
+       mutex_lock(&ctx->afu->contexts_lock);
+       ctx->afu->pasid_count--;
+       idr_remove(&ctx->afu->contexts_idr, ctx->pasid);
+       mutex_unlock(&ctx->afu->contexts_lock);
+
+       ocxl_afu_irq_free_all(ctx);
+       idr_destroy(&ctx->irq_idr);
+       /* reference to the AFU taken in ocxl_context_init */
+       ocxl_afu_put(ctx->afu);
+       kfree(ctx);
+}
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
new file mode 100644 (file)
index 0000000..c90c1a5
--- /dev/null
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+#include <uapi/misc/ocxl.h>
+#include "ocxl_internal.h"
+
+
+#define OCXL_NUM_MINORS 256 /* Total to reserve */
+
+static dev_t ocxl_dev;
+static struct class *ocxl_class;
+static struct mutex minors_idr_lock;
+static struct idr minors_idr;
+
+static struct ocxl_afu *find_and_get_afu(dev_t devno)
+{
+       struct ocxl_afu *afu;
+       int afu_minor;
+
+       afu_minor = MINOR(devno);
+       /*
+        * We don't declare an RCU critical section here, as our AFU
+        * is protected by a reference counter on the device. By the time the
+        * minor number of a device is removed from the idr, the ref count of
+        * the device is already at 0, so no user API will access that AFU and
+        * this function can't return it.
+        */
+       afu = idr_find(&minors_idr, afu_minor);
+       if (afu)
+               ocxl_afu_get(afu);
+       return afu;
+}
+
+static int allocate_afu_minor(struct ocxl_afu *afu)
+{
+       int minor;
+
+       mutex_lock(&minors_idr_lock);
+       minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL);
+       mutex_unlock(&minors_idr_lock);
+       return minor;
+}
+
+static void free_afu_minor(struct ocxl_afu *afu)
+{
+       mutex_lock(&minors_idr_lock);
+       idr_remove(&minors_idr, MINOR(afu->dev.devt));
+       mutex_unlock(&minors_idr_lock);
+}
+
+static int afu_open(struct inode *inode, struct file *file)
+{
+       struct ocxl_afu *afu;
+       struct ocxl_context *ctx;
+       int rc;
+
+       pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+
+       afu = find_and_get_afu(inode->i_rdev);
+       if (!afu)
+               return -ENODEV;
+
+       ctx = ocxl_context_alloc();
+       if (!ctx) {
+               rc = -ENOMEM;
+               goto put_afu;
+       }
+
+       rc = ocxl_context_init(ctx, afu, inode->i_mapping);
+       if (rc)
+               goto put_afu;
+       file->private_data = ctx;
+       ocxl_afu_put(afu);
+       return 0;
+
+put_afu:
+       ocxl_afu_put(afu);
+       return rc;
+}
+
+static long afu_ioctl_attach(struct ocxl_context *ctx,
+                       struct ocxl_ioctl_attach __user *uarg)
+{
+       struct ocxl_ioctl_attach arg;
+       u64 amr = 0;
+       int rc;
+
+       pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+       if (copy_from_user(&arg, uarg, sizeof(arg)))
+               return -EFAULT;
+
+       /* Make sure reserved fields are not set for forward compatibility */
+       if (arg.reserved1 || arg.reserved2 || arg.reserved3)
+               return -EINVAL;
+
+       amr = arg.amr & mfspr(SPRN_UAMOR);
+       rc = ocxl_context_attach(ctx, amr);
+       return rc;
+}
+
+#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" :                        \
+                       x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" :       \
+                       x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" :         \
+                       x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" :     \
+                       "UNKNOWN")
+
+static long afu_ioctl(struct file *file, unsigned int cmd,
+               unsigned long args)
+{
+       struct ocxl_context *ctx = file->private_data;
+       struct ocxl_ioctl_irq_fd irq_fd;
+       u64 irq_offset;
+       long rc;
+
+       pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
+               CMD_STR(cmd));
+
+       if (ctx->status == CLOSED)
+               return -EIO;
+
+       switch (cmd) {
+       case OCXL_IOCTL_ATTACH:
+               rc = afu_ioctl_attach(ctx,
+                               (struct ocxl_ioctl_attach __user *) args);
+               break;
+
+       case OCXL_IOCTL_IRQ_ALLOC:
+               rc = ocxl_afu_irq_alloc(ctx, &irq_offset);
+               if (!rc) {
+                       rc = copy_to_user((u64 __user *) args, &irq_offset,
+                                       sizeof(irq_offset));
+                       if (rc)
+                               ocxl_afu_irq_free(ctx, irq_offset);
+               }
+               break;
+
+       case OCXL_IOCTL_IRQ_FREE:
+               rc = copy_from_user(&irq_offset, (u64 __user *) args,
+                               sizeof(irq_offset));
+               if (rc)
+                       return -EFAULT;
+               rc = ocxl_afu_irq_free(ctx, irq_offset);
+               break;
+
+       case OCXL_IOCTL_IRQ_SET_FD:
+               rc = copy_from_user(&irq_fd, (u64 __user *) args,
+                               sizeof(irq_fd));
+               if (rc)
+                       return -EFAULT;
+               if (irq_fd.reserved)
+                       return -EINVAL;
+               rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset,
+                                       irq_fd.eventfd);
+               break;
+
+       default:
+               rc = -EINVAL;
+       }
+       return rc;
+}
+
+static long afu_compat_ioctl(struct file *file, unsigned int cmd,
+                       unsigned long args)
+{
+       return afu_ioctl(file, cmd, args);
+}
+
+static int afu_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct ocxl_context *ctx = file->private_data;
+
+       pr_debug("%s for context %d\n", __func__, ctx->pasid);
+       return ocxl_context_mmap(ctx, vma);
+}
+
+static bool has_xsl_error(struct ocxl_context *ctx)
+{
+       bool ret;
+
+       mutex_lock(&ctx->xsl_error_lock);
+       ret = !!ctx->xsl_error.addr;
+       mutex_unlock(&ctx->xsl_error_lock);
+
+       return ret;
+}
+
+/*
+ * Are there any events pending on the AFU
+ * ctx: The AFU context
+ * Returns: true if there are events pending
+ */
+static bool afu_events_pending(struct ocxl_context *ctx)
+{
+       if (has_xsl_error(ctx))
+               return true;
+       return false;
+}
+
+static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait)
+{
+       struct ocxl_context *ctx = file->private_data;
+       unsigned int mask = 0;
+       bool closed;
+
+       pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+       poll_wait(file, &ctx->events_wq, wait);
+
+       mutex_lock(&ctx->status_mutex);
+       closed = (ctx->status == CLOSED);
+       mutex_unlock(&ctx->status_mutex);
+
+       if (afu_events_pending(ctx))
+               mask = POLLIN | POLLRDNORM;
+       else if (closed)
+               mask = POLLERR;
+
+       return mask;
+}
+
+/*
+ * Populate the supplied buffer with a single XSL error
+ * ctx:        The AFU context to report the error from
+ * header: the event header to populate
+ * buf: The buffer to write the body into (should be at least
+ *      AFU_EVENT_BODY_XSL_ERROR_SIZE)
+ * Return: the amount of buffer that was populated
+ */
+static ssize_t append_xsl_error(struct ocxl_context *ctx,
+                               struct ocxl_kernel_event_header *header,
+                               char __user *buf)
+{
+       struct ocxl_kernel_event_xsl_fault_error body;
+
+       memset(&body, 0, sizeof(body));
+
+       mutex_lock(&ctx->xsl_error_lock);
+       if (!ctx->xsl_error.addr) {
+               mutex_unlock(&ctx->xsl_error_lock);
+               return 0;
+       }
+
+       body.addr = ctx->xsl_error.addr;
+       body.dsisr = ctx->xsl_error.dsisr;
+       body.count = ctx->xsl_error.count;
+
+       ctx->xsl_error.addr = 0;
+       ctx->xsl_error.dsisr = 0;
+       ctx->xsl_error.count = 0;
+
+       mutex_unlock(&ctx->xsl_error_lock);
+
+       header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR;
+
+       if (copy_to_user(buf, &body, sizeof(body)))
+               return -EFAULT;
+
+       return sizeof(body);
+}
+
+#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error)
+
+/*
+ * Reports events on the AFU
+ * Format:
+ *     Header (struct ocxl_kernel_event_header)
+ *     Body (struct ocxl_kernel_event_*)
+ *     Header...
+ */
+static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+                       loff_t *off)
+{
+       struct ocxl_context *ctx = file->private_data;
+       struct ocxl_kernel_event_header header;
+       ssize_t rc;
+       size_t used = 0;
+       DEFINE_WAIT(event_wait);
+
+       memset(&header, 0, sizeof(header));
+
+       /* Require offset to be 0 */
+       if (*off != 0)
+               return -EINVAL;
+
+       if (count < (sizeof(struct ocxl_kernel_event_header) +
+                       AFU_EVENT_BODY_MAX_SIZE))
+               return -EINVAL;
+
+       for (;;) {
+               prepare_to_wait(&ctx->events_wq, &event_wait,
+                               TASK_INTERRUPTIBLE);
+
+               if (afu_events_pending(ctx))
+                       break;
+
+               if (ctx->status == CLOSED)
+                       break;
+
+               if (file->f_flags & O_NONBLOCK) {
+                       finish_wait(&ctx->events_wq, &event_wait);
+                       return -EAGAIN;
+               }
+
+               if (signal_pending(current)) {
+                       finish_wait(&ctx->events_wq, &event_wait);
+                       return -ERESTARTSYS;
+               }
+
+               schedule();
+       }
+
+       finish_wait(&ctx->events_wq, &event_wait);
+
+       if (has_xsl_error(ctx)) {
+               used = append_xsl_error(ctx, &header, buf + sizeof(header));
+               if (used < 0)
+                       return used;
+       }
+
+       if (!afu_events_pending(ctx))
+               header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST;
+
+       if (copy_to_user(buf, &header, sizeof(header)))
+               return -EFAULT;
+
+       used += sizeof(header);
+
+       rc = (ssize_t) used;
+       return rc;
+}
+
+static int afu_release(struct inode *inode, struct file *file)
+{
+       struct ocxl_context *ctx = file->private_data;
+       int rc;
+
+       pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+       rc = ocxl_context_detach(ctx);
+       mutex_lock(&ctx->mapping_lock);
+       ctx->mapping = NULL;
+       mutex_unlock(&ctx->mapping_lock);
+       wake_up_all(&ctx->events_wq);
+       if (rc != -EBUSY)
+               ocxl_context_free(ctx);
+       return 0;
+}
+
+static const struct file_operations ocxl_afu_fops = {
+       .owner          = THIS_MODULE,
+       .open           = afu_open,
+       .unlocked_ioctl = afu_ioctl,
+       .compat_ioctl   = afu_compat_ioctl,
+       .mmap           = afu_mmap,
+       .poll           = afu_poll,
+       .read           = afu_read,
+       .release        = afu_release,
+};
+
+int ocxl_create_cdev(struct ocxl_afu *afu)
+{
+       int rc;
+
+       cdev_init(&afu->cdev, &ocxl_afu_fops);
+       rc = cdev_add(&afu->cdev, afu->dev.devt, 1);
+       if (rc) {
+               dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc);
+               return rc;
+       }
+       return 0;
+}
+
+void ocxl_destroy_cdev(struct ocxl_afu *afu)
+{
+       cdev_del(&afu->cdev);
+}
+
+int ocxl_register_afu(struct ocxl_afu *afu)
+{
+       int minor;
+
+       minor = allocate_afu_minor(afu);
+       if (minor < 0)
+               return minor;
+       afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor);
+       afu->dev.class = ocxl_class;
+       return device_register(&afu->dev);
+}
+
+void ocxl_unregister_afu(struct ocxl_afu *afu)
+{
+       free_afu_minor(afu);
+}
+
+static char *ocxl_devnode(struct device *dev, umode_t *mode)
+{
+       return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev));
+}
+
+int ocxl_file_init(void)
+{
+       int rc;
+
+       mutex_init(&minors_idr_lock);
+       idr_init(&minors_idr);
+
+       rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl");
+       if (rc) {
+               pr_err("Unable to allocate ocxl major number: %d\n", rc);
+               return rc;
+       }
+
+       ocxl_class = class_create(THIS_MODULE, "ocxl");
+       if (IS_ERR(ocxl_class)) {
+               pr_err("Unable to create ocxl class\n");
+               unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+               return PTR_ERR(ocxl_class);
+       }
+
+       ocxl_class->devnode = ocxl_devnode;
+       return 0;
+}
+
+void ocxl_file_exit(void)
+{
+       class_destroy(ocxl_class);
+       unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+       idr_destroy(&minors_idr);
+}
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
new file mode 100644 (file)
index 0000000..f307905
--- /dev/null
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include <linux/mutex.h>
+#include <linux/mmu_context.h>
+#include <asm/copro.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+
+#define SPA_PASID_BITS         15
+#define SPA_PASID_MAX          ((1 << SPA_PASID_BITS) - 1)
+#define SPA_PE_MASK            SPA_PASID_MAX
+#define SPA_SPA_SIZE_LOG       22 /* Each SPA is 4 Mb */
+
+#define SPA_CFG_SF             (1ull << (63-0))
+#define SPA_CFG_TA             (1ull << (63-1))
+#define SPA_CFG_HV             (1ull << (63-3))
+#define SPA_CFG_UV             (1ull << (63-4))
+#define SPA_CFG_XLAT_hpt       (0ull << (63-6)) /* Hashed page table (HPT) mode */
+#define SPA_CFG_XLAT_roh       (2ull << (63-6)) /* Radix on HPT mode */
+#define SPA_CFG_XLAT_ror       (3ull << (63-6)) /* Radix on Radix mode */
+#define SPA_CFG_PR             (1ull << (63-49))
+#define SPA_CFG_TC             (1ull << (63-54))
+#define SPA_CFG_DR             (1ull << (63-59))
+
+#define SPA_XSL_TF             (1ull << (63-3))  /* Translation fault */
+#define SPA_XSL_S              (1ull << (63-38)) /* Store operation */
+
+#define SPA_PE_VALID           0x80000000
+
+
+struct pe_data {
+       struct mm_struct *mm;
+       /* callback to trigger when a translation fault occurs */
+       void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
+       /* opaque pointer to be passed to the above callback */
+       void *xsl_err_data;
+       struct rcu_head rcu;
+};
+
+struct spa {
+       struct ocxl_process_element *spa_mem;
+       int spa_order;
+       struct mutex spa_lock;
+       struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
+       char *irq_name;
+       int virq;
+       void __iomem *reg_dsisr;
+       void __iomem *reg_dar;
+       void __iomem *reg_tfc;
+       void __iomem *reg_pe_handle;
+       /*
+        * The following field are used by the memory fault
+        * interrupt handler. We can only have one interrupt at a
+        * time. The NPU won't raise another interrupt until the
+        * previous one has been ack'd by writing to the TFC register
+        */
+       struct xsl_fault {
+               struct work_struct fault_work;
+               u64 pe;
+               u64 dsisr;
+               u64 dar;
+               struct pe_data pe_data;
+       } xsl_fault;
+};
+
+/*
+ * A opencapi link can be used be by several PCI functions. We have
+ * one link per device slot.
+ *
+ * A linked list of opencapi links should suffice, as there's a
+ * limited number of opencapi slots on a system and lookup is only
+ * done when the device is probed
+ */
+struct link {
+       struct list_head list;
+       struct kref ref;
+       int domain;
+       int bus;
+       int dev;
+       atomic_t irq_available;
+       struct spa *spa;
+       void *platform_data;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+enum xsl_response {
+       CONTINUE,
+       ADDRESS_ERROR,
+       RESTART,
+};
+
+
+static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
+{
+       u64 reg;
+
+       *dsisr = in_be64(spa->reg_dsisr);
+       *dar = in_be64(spa->reg_dar);
+       reg = in_be64(spa->reg_pe_handle);
+       *pe = reg & SPA_PE_MASK;
+}
+
+static void ack_irq(struct spa *spa, enum xsl_response r)
+{
+       u64 reg = 0;
+
+       /* continue is not supported */
+       if (r == RESTART)
+               reg = PPC_BIT(31);
+       else if (r == ADDRESS_ERROR)
+               reg = PPC_BIT(30);
+       else
+               WARN(1, "Invalid irq response %d\n", r);
+
+       if (reg) {
+               trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe,
+                               spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg);
+               out_be64(spa->reg_tfc, reg);
+       }
+}
+
+static void xsl_fault_handler_bh(struct work_struct *fault_work)
+{
+       unsigned int flt = 0;
+       unsigned long access, flags, inv_flags = 0;
+       enum xsl_response r;
+       struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
+                                       fault_work);
+       struct spa *spa = container_of(fault, struct spa, xsl_fault);
+
+       int rc;
+
+       /*
+        * We need to release a reference on the mm whenever exiting this
+        * function (taken in the memory fault interrupt handler)
+        */
+       rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
+                               &flt);
+       if (rc) {
+               pr_debug("copro_handle_mm_fault failed: %d\n", rc);
+               if (fault->pe_data.xsl_err_cb) {
+                       fault->pe_data.xsl_err_cb(
+                               fault->pe_data.xsl_err_data,
+                               fault->dar, fault->dsisr);
+               }
+               r = ADDRESS_ERROR;
+               goto ack;
+       }
+
+       if (!radix_enabled()) {
+               /*
+                * update_mmu_cache() will not have loaded the hash
+                * since current->trap is not a 0x400 or 0x300, so
+                * just call hash_page_mm() here.
+                */
+               access = _PAGE_PRESENT | _PAGE_READ;
+               if (fault->dsisr & SPA_XSL_S)
+                       access |= _PAGE_WRITE;
+
+               if (REGION_ID(fault->dar) != USER_REGION_ID)
+                       access |= _PAGE_PRIVILEGED;
+
+               local_irq_save(flags);
+               hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
+                       inv_flags);
+               local_irq_restore(flags);
+       }
+       r = RESTART;
+ack:
+       mmdrop(fault->pe_data.mm);
+       ack_irq(spa, r);
+}
+
+static irqreturn_t xsl_fault_handler(int irq, void *data)
+{
+       struct link *link = (struct link *) data;
+       struct spa *spa = link->spa;
+       u64 dsisr, dar, pe_handle;
+       struct pe_data *pe_data;
+       struct ocxl_process_element *pe;
+       int lpid, pid, tid;
+
+       read_irq(spa, &dsisr, &dar, &pe_handle);
+       trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1);
+
+       WARN_ON(pe_handle > SPA_PE_MASK);
+       pe = spa->spa_mem + pe_handle;
+       lpid = be32_to_cpu(pe->lpid);
+       pid = be32_to_cpu(pe->pid);
+       tid = be32_to_cpu(pe->tid);
+       /* We could be reading all null values here if the PE is being
+        * removed while an interrupt kicks in. It's not supposed to
+        * happen if the driver notified the AFU to terminate the
+        * PASID, and the AFU waited for pending operations before
+        * acknowledging. But even if it happens, we won't find a
+        * memory context below and fail silently, so it should be ok.
+        */
+       if (!(dsisr & SPA_XSL_TF)) {
+               WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
+               ack_irq(spa, ADDRESS_ERROR);
+               return IRQ_HANDLED;
+       }
+
+       rcu_read_lock();
+       pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
+       if (!pe_data) {
+               /*
+                * Could only happen if the driver didn't notify the
+                * AFU about PASID termination before removing the PE,
+                * or the AFU didn't wait for all memory access to
+                * have completed.
+                *
+                * Either way, we fail early, but we shouldn't log an
+                * error message, as it is a valid (if unexpected)
+                * scenario
+                */
+               rcu_read_unlock();
+               pr_debug("Unknown mm context for xsl interrupt\n");
+               ack_irq(spa, ADDRESS_ERROR);
+               return IRQ_HANDLED;
+       }
+       WARN_ON(pe_data->mm->context.id != pid);
+
+       spa->xsl_fault.pe = pe_handle;
+       spa->xsl_fault.dar = dar;
+       spa->xsl_fault.dsisr = dsisr;
+       spa->xsl_fault.pe_data = *pe_data;
+       mmgrab(pe_data->mm); /* mm count is released by bottom half */
+
+       rcu_read_unlock();
+       schedule_work(&spa->xsl_fault.fault_work);
+       return IRQ_HANDLED;
+}
+
+static void unmap_irq_registers(struct spa *spa)
+{
+       pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
+                               spa->reg_pe_handle);
+}
+
+static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
+{
+       return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
+                               &spa->reg_tfc, &spa->reg_pe_handle);
+}
+
+static int setup_xsl_irq(struct pci_dev *dev, struct link *link)
+{
+       struct spa *spa = link->spa;
+       int rc;
+       int hwirq;
+
+       rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
+       if (rc)
+               return rc;
+
+       rc = map_irq_registers(dev, spa);
+       if (rc)
+               return rc;
+
+       spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
+                               link->domain, link->bus, link->dev);
+       if (!spa->irq_name) {
+               unmap_irq_registers(spa);
+               dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
+               return -ENOMEM;
+       }
+       /*
+        * At some point, we'll need to look into allowing a higher
+        * number of interrupts. Could we have an IRQ domain per link?
+        */
+       spa->virq = irq_create_mapping(NULL, hwirq);
+       if (!spa->virq) {
+               kfree(spa->irq_name);
+               unmap_irq_registers(spa);
+               dev_err(&dev->dev,
+                       "irq_create_mapping failed for translation interrupt\n");
+               return -EINVAL;
+       }
+
+       dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
+
+       rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
+                       link);
+       if (rc) {
+               irq_dispose_mapping(spa->virq);
+               kfree(spa->irq_name);
+               unmap_irq_registers(spa);
+               dev_err(&dev->dev,
+                       "request_irq failed for translation interrupt: %d\n",
+                       rc);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static void release_xsl_irq(struct link *link)
+{
+       struct spa *spa = link->spa;
+
+       if (spa->virq) {
+               free_irq(spa->virq, link);
+               irq_dispose_mapping(spa->virq);
+       }
+       kfree(spa->irq_name);
+       unmap_irq_registers(spa);
+}
+
+static int alloc_spa(struct pci_dev *dev, struct link *link)
+{
+       struct spa *spa;
+
+       spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
+       if (!spa)
+               return -ENOMEM;
+
+       mutex_init(&spa->spa_lock);
+       INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
+       INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
+
+       spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
+       spa->spa_mem = (struct ocxl_process_element *)
+               __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
+       if (!spa->spa_mem) {
+               dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
+               kfree(spa);
+               return -ENOMEM;
+       }
+       pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
+               link->dev, spa->spa_mem);
+
+       link->spa = spa;
+       return 0;
+}
+
+static void free_spa(struct link *link)
+{
+       struct spa *spa = link->spa;
+
+       pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
+               link->dev);
+
+       if (spa && spa->spa_mem) {
+               free_pages((unsigned long) spa->spa_mem, spa->spa_order);
+               kfree(spa);
+               link->spa = NULL;
+       }
+}
+
+static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link)
+{
+       struct link *link;
+       int rc;
+
+       link = kzalloc(sizeof(struct link), GFP_KERNEL);
+       if (!link)
+               return -ENOMEM;
+
+       kref_init(&link->ref);
+       link->domain = pci_domain_nr(dev->bus);
+       link->bus = dev->bus->number;
+       link->dev = PCI_SLOT(dev->devfn);
+       atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
+
+       rc = alloc_spa(dev, link);
+       if (rc)
+               goto err_free;
+
+       rc = setup_xsl_irq(dev, link);
+       if (rc)
+               goto err_spa;
+
+       /* platform specific hook */
+       rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
+                               &link->platform_data);
+       if (rc)
+               goto err_xsl_irq;
+
+       *out_link = link;
+       return 0;
+
+err_xsl_irq:
+       release_xsl_irq(link);
+err_spa:
+       free_spa(link);
+err_free:
+       kfree(link);
+       return rc;
+}
+
+static void free_link(struct link *link)
+{
+       release_xsl_irq(link);
+       free_spa(link);
+       kfree(link);
+}
+
+int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
+{
+       int rc = 0;
+       struct link *link;
+
+       mutex_lock(&links_list_lock);
+       list_for_each_entry(link, &links_list, list) {
+               /* The functions of a device all share the same link */
+               if (link->domain == pci_domain_nr(dev->bus) &&
+                       link->bus == dev->bus->number &&
+                       link->dev == PCI_SLOT(dev->devfn)) {
+                       kref_get(&link->ref);
+                       *link_handle = link;
+                       goto unlock;
+               }
+       }
+       rc = alloc_link(dev, PE_mask, &link);
+       if (rc)
+               goto unlock;
+
+       list_add(&link->list, &links_list);
+       *link_handle = link;
+unlock:
+       mutex_unlock(&links_list_lock);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_setup);
+
+static void release_xsl(struct kref *ref)
+{
+       struct link *link = container_of(ref, struct link, ref);
+
+       list_del(&link->list);
+       /* call platform code before releasing data */
+       pnv_ocxl_spa_release(link->platform_data);
+       free_link(link);
+}
+
+void ocxl_link_release(struct pci_dev *dev, void *link_handle)
+{
+       struct link *link = (struct link *) link_handle;
+
+       mutex_lock(&links_list_lock);
+       kref_put(&link->ref, release_xsl);
+       mutex_unlock(&links_list_lock);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_release);
+
+static u64 calculate_cfg_state(bool kernel)
+{
+       u64 state;
+
+       state = SPA_CFG_DR;
+       if (mfspr(SPRN_LPCR) & LPCR_TC)
+               state |= SPA_CFG_TC;
+       if (radix_enabled())
+               state |= SPA_CFG_XLAT_ror;
+       else
+               state |= SPA_CFG_XLAT_hpt;
+       state |= SPA_CFG_HV;
+       if (kernel) {
+               if (mfmsr() & MSR_SF)
+                       state |= SPA_CFG_SF;
+       } else {
+               state |= SPA_CFG_PR;
+               if (!test_tsk_thread_flag(current, TIF_32BIT))
+                       state |= SPA_CFG_SF;
+       }
+       return state;
+}
+
+int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
+               u64 amr, struct mm_struct *mm,
+               void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
+               void *xsl_err_data)
+{
+       struct link *link = (struct link *) link_handle;
+       struct spa *spa = link->spa;
+       struct ocxl_process_element *pe;
+       int pe_handle, rc = 0;
+       struct pe_data *pe_data;
+
+       BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
+       if (pasid > SPA_PASID_MAX)
+               return -EINVAL;
+
+       mutex_lock(&spa->spa_lock);
+       pe_handle = pasid & SPA_PE_MASK;
+       pe = spa->spa_mem + pe_handle;
+
+       if (pe->software_state) {
+               rc = -EBUSY;
+               goto unlock;
+       }
+
+       pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
+       if (!pe_data) {
+               rc = -ENOMEM;
+               goto unlock;
+       }
+
+       pe_data->mm = mm;
+       pe_data->xsl_err_cb = xsl_err_cb;
+       pe_data->xsl_err_data = xsl_err_data;
+
+       memset(pe, 0, sizeof(struct ocxl_process_element));
+       pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
+       pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+       pe->pid = cpu_to_be32(pidr);
+       pe->tid = cpu_to_be32(tidr);
+       pe->amr = cpu_to_be64(amr);
+       pe->software_state = cpu_to_be32(SPA_PE_VALID);
+
+       mm_context_add_copro(mm);
+       /*
+        * Barrier is to make sure PE is visible in the SPA before it
+        * is used by the device. It also helps with the global TLBI
+        * invalidation
+        */
+       mb();
+       radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
+
+       /*
+        * The mm must stay valid for as long as the device uses it. We
+        * lower the count when the context is removed from the SPA.
+        *
+        * We grab mm_count (and not mm_users), as we don't want to
+        * end up in a circular dependency if a process mmaps its
+        * mmio, therefore incrementing the file ref count when
+        * calling mmap(), and forgets to unmap before exiting. In
+        * that scenario, when the kernel handles the death of the
+        * process, the file is not cleaned because unmap was not
+        * called, and the mm wouldn't be freed because we would still
+        * have a reference on mm_users. Incrementing mm_count solves
+        * the problem.
+        */
+       mmgrab(mm);
+       trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr);
+unlock:
+       mutex_unlock(&spa->spa_lock);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
+
+int ocxl_link_remove_pe(void *link_handle, int pasid)
+{
+       struct link *link = (struct link *) link_handle;
+       struct spa *spa = link->spa;
+       struct ocxl_process_element *pe;
+       struct pe_data *pe_data;
+       int pe_handle, rc;
+
+       if (pasid > SPA_PASID_MAX)
+               return -EINVAL;
+
+       /*
+        * About synchronization with our memory fault handler:
+        *
+        * Before removing the PE, the driver is supposed to have
+        * notified the AFU, which should have cleaned up and make
+        * sure the PASID is no longer in use, including pending
+        * interrupts. However, there's no way to be sure...
+        *
+        * We clear the PE and remove the context from our radix
+        * tree. From that point on, any new interrupt for that
+        * context will fail silently, which is ok. As mentioned
+        * above, that's not expected, but it could happen if the
+        * driver or AFU didn't do the right thing.
+        *
+        * There could still be a bottom half running, but we don't
+        * need to wait/flush, as it is managing a reference count on
+        * the mm it reads from the radix tree.
+        */
+       pe_handle = pasid & SPA_PE_MASK;
+       pe = spa->spa_mem + pe_handle;
+
+       mutex_lock(&spa->spa_lock);
+
+       if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) {
+               rc = -EINVAL;
+               goto unlock;
+       }
+
+       trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid,
+                               be32_to_cpu(pe->pid), be32_to_cpu(pe->tid));
+
+       memset(pe, 0, sizeof(struct ocxl_process_element));
+       /*
+        * The barrier makes sure the PE is removed from the SPA
+        * before we clear the NPU context cache below, so that the
+        * old PE cannot be reloaded erroneously.
+        */
+       mb();
+
+       /*
+        * hook to platform code
+        * On powerpc, the entry needs to be cleared from the context
+        * cache of the NPU.
+        */
+       rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle);
+       WARN_ON(rc);
+
+       pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
+       if (!pe_data) {
+               WARN(1, "Couldn't find pe data when removing PE\n");
+       } else {
+               mm_context_remove_copro(pe_data->mm);
+               mmdrop(pe_data->mm);
+               kfree_rcu(pe_data, rcu);
+       }
+unlock:
+       mutex_unlock(&spa->spa_lock);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_remove_pe);
+
+int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr)
+{
+       struct link *link = (struct link *) link_handle;
+       int rc, irq;
+       u64 addr;
+
+       if (atomic_dec_if_positive(&link->irq_available) < 0)
+               return -ENOSPC;
+
+       rc = pnv_ocxl_alloc_xive_irq(&irq, &addr);
+       if (rc) {
+               atomic_inc(&link->irq_available);
+               return rc;
+       }
+
+       *hw_irq = irq;
+       *trigger_addr = addr;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc);
+
+void ocxl_link_free_irq(void *link_handle, int hw_irq)
+{
+       struct link *link = (struct link *) link_handle;
+
+       pnv_ocxl_free_xive_irq(hw_irq);
+       atomic_inc(&link->irq_available);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_free_irq);
diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c
new file mode 100644 (file)
index 0000000..7210d9e
--- /dev/null
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "ocxl_internal.h"
+
+static int __init init_ocxl(void)
+{
+       int rc = 0;
+
+       rc = ocxl_file_init();
+       if (rc)
+               return rc;
+
+       rc = pci_register_driver(&ocxl_pci_driver);
+       if (rc) {
+               ocxl_file_exit();
+               return rc;
+       }
+       return 0;
+}
+
+static void exit_ocxl(void)
+{
+       pci_unregister_driver(&ocxl_pci_driver);
+       ocxl_file_exit();
+}
+
+module_init(init_ocxl);
+module_exit(exit_ocxl);
+
+MODULE_DESCRIPTION("Open Coherent Accelerator");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
new file mode 100644 (file)
index 0000000..5d42182
--- /dev/null
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _OCXL_INTERNAL_H_
+#define _OCXL_INTERNAL_H_
+
+#include <linux/pci.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <misc/ocxl.h>
+
+#define MAX_IRQ_PER_LINK       2000
+#define MAX_IRQ_PER_CONTEXT    MAX_IRQ_PER_LINK
+
+#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev)
+#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev)
+
+extern struct pci_driver ocxl_pci_driver;
+
+
+struct ocxl_fn {
+       struct device dev;
+       int bar_used[3];
+       struct ocxl_fn_config config;
+       struct list_head afu_list;
+       int pasid_base;
+       int actag_base;
+       int actag_enabled;
+       int actag_supported;
+       struct list_head pasid_list;
+       struct list_head actag_list;
+       void *link;
+};
+
+struct ocxl_afu {
+       struct ocxl_fn *fn;
+       struct list_head list;
+       struct device dev;
+       struct cdev cdev;
+       struct ocxl_afu_config config;
+       int pasid_base;
+       int pasid_count; /* opened contexts */
+       int pasid_max; /* maximum number of contexts */
+       int actag_base;
+       int actag_enabled;
+       struct mutex contexts_lock;
+       struct idr contexts_idr;
+       struct mutex afu_control_lock;
+       u64 global_mmio_start;
+       u64 irq_base_offset;
+       void __iomem *global_mmio_ptr;
+       u64 pp_mmio_start;
+       struct bin_attribute attr_global_mmio;
+};
+
+enum ocxl_context_status {
+       CLOSED,
+       OPENED,
+       ATTACHED,
+};
+
+// Contains metadata about a translation fault
+struct ocxl_xsl_error {
+       u64 addr; // The address that triggered the fault
+       u64 dsisr; // the value of the dsisr register
+       u64 count; // The number of times this fault has been triggered
+};
+
+struct ocxl_context {
+       struct ocxl_afu *afu;
+       int pasid;
+       struct mutex status_mutex;
+       enum ocxl_context_status status;
+       struct address_space *mapping;
+       struct mutex mapping_lock;
+       wait_queue_head_t events_wq;
+       struct mutex xsl_error_lock;
+       struct ocxl_xsl_error xsl_error;
+       struct mutex irq_lock;
+       struct idr irq_idr;
+};
+
+struct ocxl_process_element {
+       __be64 config_state;
+       __be32 reserved1[11];
+       __be32 lpid;
+       __be32 tid;
+       __be32 pid;
+       __be32 reserved2[10];
+       __be64 amr;
+       __be32 reserved3[3];
+       __be32 software_state;
+};
+
+
+extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu);
+extern void ocxl_afu_put(struct ocxl_afu *afu);
+
+extern int ocxl_create_cdev(struct ocxl_afu *afu);
+extern void ocxl_destroy_cdev(struct ocxl_afu *afu);
+extern int ocxl_register_afu(struct ocxl_afu *afu);
+extern void ocxl_unregister_afu(struct ocxl_afu *afu);
+
+extern int ocxl_file_init(void);
+extern void ocxl_file_exit(void);
+
+extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
+extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
+extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+
+extern struct ocxl_context *ocxl_context_alloc(void);
+extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+                       struct address_space *mapping);
+extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
+extern int ocxl_context_mmap(struct ocxl_context *ctx,
+                       struct vm_area_struct *vma);
+extern int ocxl_context_detach(struct ocxl_context *ctx);
+extern void ocxl_context_detach_all(struct ocxl_afu *afu);
+extern void ocxl_context_free(struct ocxl_context *ctx);
+
+extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
+extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
+
+extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset);
+extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset);
+extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
+extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset,
+                       int eventfd);
+extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset);
+
+#endif /* _OCXL_INTERNAL_H_ */
diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
new file mode 100644 (file)
index 0000000..d14cb56
--- /dev/null
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include "ocxl_internal.h"
+
+
+struct id_range {
+       struct list_head list;
+       u32 start;
+       u32 end;
+};
+
+#ifdef DEBUG
+static void dump_list(struct list_head *head, char *type_str)
+{
+       struct id_range *cur;
+
+       pr_debug("%s ranges allocated:\n", type_str);
+       list_for_each_entry(cur, head, list) {
+               pr_debug("Range %d->%d\n", cur->start, cur->end);
+       }
+}
+#endif
+
+static int range_alloc(struct list_head *head, u32 size, int max_id,
+               char *type_str)
+{
+       struct list_head *pos;
+       struct id_range *cur, *new;
+       int rc, last_end;
+
+       new = kmalloc(sizeof(struct id_range), GFP_KERNEL);
+       if (!new)
+               return -ENOMEM;
+
+       pos = head;
+       last_end = -1;
+       list_for_each_entry(cur, head, list) {
+               if ((cur->start - last_end) > size)
+                       break;
+               last_end = cur->end;
+               pos = &cur->list;
+       }
+
+       new->start = last_end + 1;
+       new->end = new->start + size - 1;
+
+       if (new->end > max_id) {
+               kfree(new);
+               rc = -ENOSPC;
+       } else {
+               list_add(&new->list, pos);
+               rc = new->start;
+       }
+
+#ifdef DEBUG
+       dump_list(head, type_str);
+#endif
+       return rc;
+}
+
+static void range_free(struct list_head *head, u32 start, u32 size,
+               char *type_str)
+{
+       bool found = false;
+       struct id_range *cur, *tmp;
+
+       list_for_each_entry_safe(cur, tmp, head, list) {
+               if (cur->start == start && cur->end == (start + size - 1)) {
+                       found = true;
+                       list_del(&cur->list);
+                       kfree(cur);
+                       break;
+               }
+       }
+       WARN_ON(!found);
+#ifdef DEBUG
+       dump_list(head, type_str);
+#endif
+}
+
+int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+       int max_pasid;
+
+       if (fn->config.max_pasid_log < 0)
+               return -ENOSPC;
+       max_pasid = 1 << fn->config.max_pasid_log;
+       return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid");
+}
+
+void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+       return range_free(&fn->pasid_list, start, size, "afu pasid");
+}
+
+int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+       int max_actag;
+
+       max_actag = fn->actag_enabled;
+       return range_alloc(&fn->actag_list, size, max_actag, "afu actag");
+}
+
+void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+       return range_free(&fn->actag_list, start, size, "afu actag");
+}
diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c
new file mode 100644 (file)
index 0000000..0051d9e
--- /dev/null
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/idr.h>
+#include <asm/pnv-ocxl.h>
+#include "ocxl_internal.h"
+
+/*
+ * Any opencapi device which wants to use this 'generic' driver should
+ * use the 0x062B device ID. Vendors should define the subsystem
+ * vendor/device ID to help differentiate devices.
+ */
+static const struct pci_device_id ocxl_pci_tbl[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), },
+       { }
+};
+MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl);
+
+
+static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
+{
+       return (get_device(&fn->dev) == NULL) ? NULL : fn;
+}
+
+static void ocxl_fn_put(struct ocxl_fn *fn)
+{
+       put_device(&fn->dev);
+}
+
+struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu)
+{
+       return (get_device(&afu->dev) == NULL) ? NULL : afu;
+}
+
+void ocxl_afu_put(struct ocxl_afu *afu)
+{
+       put_device(&afu->dev);
+}
+
+static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
+{
+       struct ocxl_afu *afu;
+
+       afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
+       if (!afu)
+               return NULL;
+
+       mutex_init(&afu->contexts_lock);
+       mutex_init(&afu->afu_control_lock);
+       idr_init(&afu->contexts_idr);
+       afu->fn = fn;
+       ocxl_fn_get(fn);
+       return afu;
+}
+
+static void free_afu(struct ocxl_afu *afu)
+{
+       idr_destroy(&afu->contexts_idr);
+       ocxl_fn_put(afu->fn);
+       kfree(afu);
+}
+
+static void free_afu_dev(struct device *dev)
+{
+       struct ocxl_afu *afu = to_ocxl_afu(dev);
+
+       ocxl_unregister_afu(afu);
+       free_afu(afu);
+}
+
+static int set_afu_device(struct ocxl_afu *afu, const char *location)
+{
+       struct ocxl_fn *fn = afu->fn;
+       int rc;
+
+       afu->dev.parent = &fn->dev;
+       afu->dev.release = free_afu_dev;
+       rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location,
+               afu->config.idx);
+       return rc;
+}
+
+static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+       struct ocxl_fn *fn = afu->fn;
+       int actag_count, actag_offset;
+
+       /*
+        * if there were not enough actags for the function, each afu
+        * reduces its count as well
+        */
+       actag_count = afu->config.actag_supported *
+               fn->actag_enabled / fn->actag_supported;
+       actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
+       if (actag_offset < 0) {
+               dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n",
+                       actag_count, actag_offset);
+               return actag_offset;
+       }
+       afu->actag_base = fn->actag_base + actag_offset;
+       afu->actag_enabled = actag_count;
+
+       ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos,
+                               afu->actag_base, afu->actag_enabled);
+       dev_dbg(&afu->dev, "actag base=%d enabled=%d\n",
+               afu->actag_base, afu->actag_enabled);
+       return 0;
+}
+
+static void reclaim_afu_actag(struct ocxl_afu *afu)
+{
+       struct ocxl_fn *fn = afu->fn;
+       int start_offset, size;
+
+       start_offset = afu->actag_base - fn->actag_base;
+       size = afu->actag_enabled;
+       ocxl_actag_afu_free(afu->fn, start_offset, size);
+}
+
+static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+       struct ocxl_fn *fn = afu->fn;
+       int pasid_count, pasid_offset;
+
+       /*
+        * We only support the case where the function configuration
+        * requested enough PASIDs to cover all AFUs.
+        */
+       pasid_count = 1 << afu->config.pasid_supported_log;
+       pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
+       if (pasid_offset < 0) {
+               dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n",
+                       pasid_count, pasid_offset);
+               return pasid_offset;
+       }
+       afu->pasid_base = fn->pasid_base + pasid_offset;
+       afu->pasid_count = 0;
+       afu->pasid_max = pasid_count;
+
+       ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos,
+                               afu->pasid_base,
+                               afu->config.pasid_supported_log);
+       dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n",
+               afu->pasid_base, pasid_count);
+       return 0;
+}
+
+static void reclaim_afu_pasid(struct ocxl_afu *afu)
+{
+       struct ocxl_fn *fn = afu->fn;
+       int start_offset, size;
+
+       start_offset = afu->pasid_base - fn->pasid_base;
+       size = 1 << afu->config.pasid_supported_log;
+       ocxl_pasid_afu_free(afu->fn, start_offset, size);
+}
+
+static int reserve_fn_bar(struct ocxl_fn *fn, int bar)
+{
+       struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+       int rc, idx;
+
+       if (bar != 0 && bar != 2 && bar != 4)
+               return -EINVAL;
+
+       idx = bar >> 1;
+       if (fn->bar_used[idx]++ == 0) {
+               rc = pci_request_region(dev, bar, "ocxl");
+               if (rc)
+                       return rc;
+       }
+       return 0;
+}
+
+static void release_fn_bar(struct ocxl_fn *fn, int bar)
+{
+       struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+       int idx;
+
+       if (bar != 0 && bar != 2 && bar != 4)
+               return;
+
+       idx = bar >> 1;
+       if (--fn->bar_used[idx] == 0)
+               pci_release_region(dev, bar);
+       WARN_ON(fn->bar_used[idx] < 0);
+}
+
+static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+       int rc;
+
+       rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);
+       if (rc)
+               return rc;
+
+       rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+       if (rc) {
+               release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+               return rc;
+       }
+
+       afu->global_mmio_start =
+               pci_resource_start(dev, afu->config.global_mmio_bar) +
+               afu->config.global_mmio_offset;
+       afu->pp_mmio_start =
+               pci_resource_start(dev, afu->config.pp_mmio_bar) +
+               afu->config.pp_mmio_offset;
+
+       afu->global_mmio_ptr = ioremap(afu->global_mmio_start,
+                               afu->config.global_mmio_size);
+       if (!afu->global_mmio_ptr) {
+               release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+               release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+               dev_err(&dev->dev, "Error mapping global mmio area\n");
+               return -ENOMEM;
+       }
+
+       /*
+        * Leave an empty page between the per-process mmio area and
+        * the AFU interrupt mappings
+        */
+       afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE;
+       return 0;
+}
+
+static void unmap_mmio_areas(struct ocxl_afu *afu)
+{
+       if (afu->global_mmio_ptr) {
+               iounmap(afu->global_mmio_ptr);
+               afu->global_mmio_ptr = NULL;
+       }
+       afu->global_mmio_start = 0;
+       afu->pp_mmio_start = 0;
+       release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+       release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+}
+
+static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
+{
+       int rc;
+
+       rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx);
+       if (rc)
+               return rc;
+
+       rc = set_afu_device(afu, dev_name(&dev->dev));
+       if (rc)
+               return rc;
+
+       rc = assign_afu_actag(afu, dev);
+       if (rc)
+               return rc;
+
+       rc = assign_afu_pasid(afu, dev);
+       if (rc) {
+               reclaim_afu_actag(afu);
+               return rc;
+       }
+
+       rc = map_mmio_areas(afu, dev);
+       if (rc) {
+               reclaim_afu_pasid(afu);
+               reclaim_afu_actag(afu);
+               return rc;
+       }
+       return 0;
+}
+
+static void deconfigure_afu(struct ocxl_afu *afu)
+{
+       unmap_mmio_areas(afu);
+       reclaim_afu_pasid(afu);
+       reclaim_afu_actag(afu);
+}
+
+static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu)
+{
+       int rc;
+
+       ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1);
+       /*
+        * Char device creation is the last step, as processes can
+        * call our driver immediately, so all our inits must be finished.
+        */
+       rc = ocxl_create_cdev(afu);
+       if (rc)
+               return rc;
+       return 0;
+}
+
+static void deactivate_afu(struct ocxl_afu *afu)
+{
+       struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
+
+       ocxl_destroy_cdev(afu);
+       ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0);
+}
+
+static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx)
+{
+       int rc;
+       struct ocxl_afu *afu;
+
+       afu = alloc_afu(fn);
+       if (!afu)
+               return -ENOMEM;
+
+       rc = configure_afu(afu, afu_idx, dev);
+       if (rc) {
+               free_afu(afu);
+               return rc;
+       }
+
+       rc = ocxl_register_afu(afu);
+       if (rc)
+               goto err;
+
+       rc = ocxl_sysfs_add_afu(afu);
+       if (rc)
+               goto err;
+
+       rc = activate_afu(dev, afu);
+       if (rc)
+               goto err_sys;
+
+       list_add_tail(&afu->list, &fn->afu_list);
+       return 0;
+
+err_sys:
+       ocxl_sysfs_remove_afu(afu);
+err:
+       deconfigure_afu(afu);
+       device_unregister(&afu->dev);
+       return rc;
+}
+
+static void remove_afu(struct ocxl_afu *afu)
+{
+       list_del(&afu->list);
+       ocxl_context_detach_all(afu);
+       deactivate_afu(afu);
+       ocxl_sysfs_remove_afu(afu);
+       deconfigure_afu(afu);
+       device_unregister(&afu->dev);
+}
+
+static struct ocxl_fn *alloc_function(struct pci_dev *dev)
+{
+       struct ocxl_fn *fn;
+
+       fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL);
+       if (!fn)
+               return NULL;
+
+       INIT_LIST_HEAD(&fn->afu_list);
+       INIT_LIST_HEAD(&fn->pasid_list);
+       INIT_LIST_HEAD(&fn->actag_list);
+       return fn;
+}
+
+static void free_function(struct ocxl_fn *fn)
+{
+       WARN_ON(!list_empty(&fn->afu_list));
+       WARN_ON(!list_empty(&fn->pasid_list));
+       kfree(fn);
+}
+
+static void free_function_dev(struct device *dev)
+{
+       struct ocxl_fn *fn = to_ocxl_function(dev);
+
+       free_function(fn);
+}
+
+static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+       int rc;
+
+       fn->dev.parent = &dev->dev;
+       fn->dev.release = free_function_dev;
+       rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev));
+       if (rc)
+               return rc;
+       pci_set_drvdata(dev, fn);
+       return 0;
+}
+
+static int assign_function_actag(struct ocxl_fn *fn)
+{
+       struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+       u16 base, enabled, supported;
+       int rc;
+
+       rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported);
+       if (rc)
+               return rc;
+
+       fn->actag_base = base;
+       fn->actag_enabled = enabled;
+       fn->actag_supported = supported;
+
+       ocxl_config_set_actag(dev, fn->config.dvsec_function_pos,
+                       fn->actag_base, fn->actag_enabled);
+       dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n",
+               fn->actag_base, fn->actag_enabled);
+       return 0;
+}
+
+static int set_function_pasid(struct ocxl_fn *fn)
+{
+       struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+       int rc, desired_count, max_count;
+
+       /* A function may not require any PASID */
+       if (fn->config.max_pasid_log < 0)
+               return 0;
+
+       rc = ocxl_config_get_pasid_info(dev, &max_count);
+       if (rc)
+               return rc;
+
+       desired_count = 1 << fn->config.max_pasid_log;
+
+       if (desired_count > max_count) {
+               dev_err(&fn->dev,
+                       "Function requires more PASIDs than is available (%d vs. %d)\n",
+                       desired_count, max_count);
+               return -ENOSPC;
+       }
+
+       fn->pasid_base = 0;
+       return 0;
+}
+
+static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+       int rc;
+
+       rc = pci_enable_device(dev);
+       if (rc) {
+               dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc);
+               return rc;
+       }
+
+       /*
+        * Once it has been confirmed to work on our hardware, we
+        * should reset the function, to force the adapter to restart
+        * from scratch.
+        * A function reset would also reset all its AFUs.
+        *
+        * Some hints for implementation:
+        *
+        * - there's not status bit to know when the reset is done. We
+        *   should try reading the config space to know when it's
+        *   done.
+        * - probably something like:
+        *      Reset
+        *      wait 100ms
+        *      issue config read
+        *      allow device up to 1 sec to return success on config
+        *      read before declaring it broken
+        *
+        * Some shared logic on the card (CFG, TLX) won't be reset, so
+        * there's no guarantee that it will be enough.
+        */
+       rc = ocxl_config_read_function(dev, &fn->config);
+       if (rc)
+               return rc;
+
+       rc = set_function_device(fn, dev);
+       if (rc)
+               return rc;
+
+       rc = assign_function_actag(fn);
+       if (rc)
+               return rc;
+
+       rc = set_function_pasid(fn);
+       if (rc)
+               return rc;
+
+       rc = ocxl_link_setup(dev, 0, &fn->link);
+       if (rc)
+               return rc;
+
+       rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos);
+       if (rc) {
+               ocxl_link_release(dev, fn->link);
+               return rc;
+       }
+       return 0;
+}
+
+static void deconfigure_function(struct ocxl_fn *fn)
+{
+       struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+
+       ocxl_link_release(dev, fn->link);
+       pci_disable_device(dev);
+}
+
+static struct ocxl_fn *init_function(struct pci_dev *dev)
+{
+       struct ocxl_fn *fn;
+       int rc;
+
+       fn = alloc_function(dev);
+       if (!fn)
+               return ERR_PTR(-ENOMEM);
+
+       rc = configure_function(fn, dev);
+       if (rc) {
+               free_function(fn);
+               return ERR_PTR(rc);
+       }
+
+       rc = device_register(&fn->dev);
+       if (rc) {
+               deconfigure_function(fn);
+               device_unregister(&fn->dev);
+               return ERR_PTR(rc);
+       }
+       return fn;
+}
+
+static void remove_function(struct ocxl_fn *fn)
+{
+       deconfigure_function(fn);
+       device_unregister(&fn->dev);
+}
+
+static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+       int rc, afu_count = 0;
+       u8 afu;
+       struct ocxl_fn *fn;
+
+       if (!radix_enabled()) {
+               dev_err(&dev->dev, "Unsupported memory model (hash)\n");
+               return -ENODEV;
+       }
+
+       fn = init_function(dev);
+       if (IS_ERR(fn)) {
+               dev_err(&dev->dev, "function init failed: %li\n",
+                       PTR_ERR(fn));
+               return PTR_ERR(fn);
+       }
+
+       for (afu = 0; afu <= fn->config.max_afu_index; afu++) {
+               rc = ocxl_config_check_afu_index(dev, &fn->config, afu);
+               if (rc > 0) {
+                       rc = init_afu(dev, fn, afu);
+                       if (rc) {
+                               dev_err(&dev->dev,
+                                       "Can't initialize AFU index %d\n", afu);
+                               continue;
+                       }
+                       afu_count++;
+               }
+       }
+       dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count);
+       return 0;
+}
+
+static void ocxl_remove(struct pci_dev *dev)
+{
+       struct ocxl_afu *afu, *tmp;
+       struct ocxl_fn *fn = pci_get_drvdata(dev);
+
+       list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) {
+               remove_afu(afu);
+       }
+       remove_function(fn);
+}
+
+struct pci_driver ocxl_pci_driver = {
+       .name = "ocxl",
+       .id_table = ocxl_pci_tbl,
+       .probe = ocxl_probe,
+       .remove = ocxl_remove,
+       .shutdown = ocxl_remove,
+};
diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c
new file mode 100644 (file)
index 0000000..d9753a1
--- /dev/null
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sysfs.h>
+#include "ocxl_internal.h"
+
+static ssize_t global_mmio_size_show(struct device *device,
+                               struct device_attribute *attr,
+                               char *buf)
+{
+       struct ocxl_afu *afu = to_ocxl_afu(device);
+
+       return scnprintf(buf, PAGE_SIZE, "%d\n",
+                       afu->config.global_mmio_size);
+}
+
+static ssize_t pp_mmio_size_show(struct device *device,
+                               struct device_attribute *attr,
+                               char *buf)
+{
+       struct ocxl_afu *afu = to_ocxl_afu(device);
+
+       return scnprintf(buf, PAGE_SIZE, "%d\n",
+                       afu->config.pp_mmio_stride);
+}
+
+static ssize_t afu_version_show(struct device *device,
+                               struct device_attribute *attr,
+                               char *buf)
+{
+       struct ocxl_afu *afu = to_ocxl_afu(device);
+
+       return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n",
+                       afu->config.version_major,
+                       afu->config.version_minor);
+}
+
+static ssize_t contexts_show(struct device *device,
+               struct device_attribute *attr,
+               char *buf)
+{
+       struct ocxl_afu *afu = to_ocxl_afu(device);
+
+       return scnprintf(buf, PAGE_SIZE, "%d/%d\n",
+                       afu->pasid_count, afu->pasid_max);
+}
+
+static struct device_attribute afu_attrs[] = {
+       __ATTR_RO(global_mmio_size),
+       __ATTR_RO(pp_mmio_size),
+       __ATTR_RO(afu_version),
+       __ATTR_RO(contexts),
+};
+
+static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj,
+                               struct bin_attribute *bin_attr, char *buf,
+                               loff_t off, size_t count)
+{
+       struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
+
+       if (count == 0 || off < 0 ||
+               off >= afu->config.global_mmio_size)
+               return 0;
+       memcpy_fromio(buf, afu->global_mmio_ptr + off, count);
+       return count;
+}
+
+static int global_mmio_fault(struct vm_fault *vmf)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       struct ocxl_afu *afu = vma->vm_private_data;
+       unsigned long offset;
+
+       if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT))
+               return VM_FAULT_SIGBUS;
+
+       offset = vmf->pgoff;
+       offset += (afu->global_mmio_start >> PAGE_SHIFT);
+       vm_insert_pfn(vma, vmf->address, offset);
+       return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct global_mmio_vmops = {
+       .fault = global_mmio_fault,
+};
+
+static int global_mmio_mmap(struct file *filp, struct kobject *kobj,
+                       struct bin_attribute *bin_attr,
+                       struct vm_area_struct *vma)
+{
+       struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
+
+       if ((vma_pages(vma) + vma->vm_pgoff) >
+               (afu->config.global_mmio_size >> PAGE_SHIFT))
+               return -EINVAL;
+
+       vma->vm_flags |= VM_IO | VM_PFNMAP;
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       vma->vm_ops = &global_mmio_vmops;
+       vma->vm_private_data = afu;
+       return 0;
+}
+
+int ocxl_sysfs_add_afu(struct ocxl_afu *afu)
+{
+       int i, rc;
+
+       for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+               rc = device_create_file(&afu->dev, &afu_attrs[i]);
+               if (rc)
+                       goto err;
+       }
+
+       sysfs_attr_init(&afu->attr_global_mmio.attr);
+       afu->attr_global_mmio.attr.name = "global_mmio_area";
+       afu->attr_global_mmio.attr.mode = 0600;
+       afu->attr_global_mmio.size = afu->config.global_mmio_size;
+       afu->attr_global_mmio.read = global_mmio_read;
+       afu->attr_global_mmio.mmap = global_mmio_mmap;
+       rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio);
+       if (rc) {
+               dev_err(&afu->dev,
+                       "Unable to create global mmio attr for afu: %d\n",
+                       rc);
+               goto err;
+       }
+
+       return 0;
+
+err:
+       for (i--; i >= 0; i--)
+               device_remove_file(&afu->dev, &afu_attrs[i]);
+       return rc;
+}
+
+void ocxl_sysfs_remove_afu(struct ocxl_afu *afu)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
+               device_remove_file(&afu->dev, &afu_attrs[i]);
+       device_remove_bin_file(&afu->dev, &afu->attr_global_mmio);
+}
diff --git a/drivers/misc/ocxl/trace.c b/drivers/misc/ocxl/trace.c
new file mode 100644 (file)
index 0000000..1e69470
--- /dev/null
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#endif
diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
new file mode 100644 (file)
index 0000000..bcb7ff3
--- /dev/null
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ocxl
+
+#if !defined(_TRACE_OCXL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OCXL_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(ocxl_context,
+       TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+       TP_ARGS(pid, spa, pasid, pidr, tidr),
+
+       TP_STRUCT__entry(
+               __field(pid_t, pid)
+               __field(void*, spa)
+               __field(int, pasid)
+               __field(u32, pidr)
+               __field(u32, tidr)
+       ),
+
+       TP_fast_assign(
+               __entry->pid = pid;
+               __entry->spa = spa;
+               __entry->pasid = pasid;
+               __entry->pidr = pidr;
+               __entry->tidr = tidr;
+       ),
+
+       TP_printk("linux pid=%d spa=0x%p pasid=0x%x pidr=0x%x tidr=0x%x",
+               __entry->pid,
+               __entry->spa,
+               __entry->pasid,
+               __entry->pidr,
+               __entry->tidr
+       )
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_add,
+       TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+       TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_remove,
+       TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+       TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+TRACE_EVENT(ocxl_terminate_pasid,
+       TP_PROTO(int pasid, int rc),
+       TP_ARGS(pasid, rc),
+
+       TP_STRUCT__entry(
+               __field(int, pasid)
+               __field(int, rc)
+       ),
+
+       TP_fast_assign(
+               __entry->pasid = pasid;
+               __entry->rc = rc;
+       ),
+
+       TP_printk("pasid=0x%x rc=%d",
+               __entry->pasid,
+               __entry->rc
+       )
+);
+
+DECLARE_EVENT_CLASS(ocxl_fault_handler,
+       TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+       TP_ARGS(spa, pe, dsisr, dar, tfc),
+
+       TP_STRUCT__entry(
+               __field(void *, spa)
+               __field(u64, pe)
+               __field(u64, dsisr)
+               __field(u64, dar)
+               __field(u64, tfc)
+       ),
+
+       TP_fast_assign(
+               __entry->spa = spa;
+               __entry->pe = pe;
+               __entry->dsisr = dsisr;
+               __entry->dar = dar;
+               __entry->tfc = tfc;
+       ),
+
+       TP_printk("spa=%p pe=0x%llx dsisr=0x%llx dar=0x%llx tfc=0x%llx",
+               __entry->spa,
+               __entry->pe,
+               __entry->dsisr,
+               __entry->dar,
+               __entry->tfc
+       )
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault,
+       TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+       TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack,
+       TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+       TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+TRACE_EVENT(ocxl_afu_irq_alloc,
+       TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq,
+               u64 irq_offset),
+       TP_ARGS(pasid, irq_id, virq, hw_irq, irq_offset),
+
+       TP_STRUCT__entry(
+               __field(int, pasid)
+               __field(int, irq_id)
+               __field(unsigned int, virq)
+               __field(int, hw_irq)
+               __field(u64, irq_offset)
+       ),
+
+       TP_fast_assign(
+               __entry->pasid = pasid;
+               __entry->irq_id = irq_id;
+               __entry->virq = virq;
+               __entry->hw_irq = hw_irq;
+               __entry->irq_offset = irq_offset;
+       ),
+
+       TP_printk("pasid=0x%x irq_id=%d virq=%u hw_irq=%d irq_offset=0x%llx",
+               __entry->pasid,
+               __entry->irq_id,
+               __entry->virq,
+               __entry->hw_irq,
+               __entry->irq_offset
+       )
+);
+
+TRACE_EVENT(ocxl_afu_irq_free,
+       TP_PROTO(int pasid, int irq_id),
+       TP_ARGS(pasid, irq_id),
+
+       TP_STRUCT__entry(
+               __field(int, pasid)
+               __field(int, irq_id)
+       ),
+
+       TP_fast_assign(
+               __entry->pasid = pasid;
+               __entry->irq_id = irq_id;
+       ),
+
+       TP_printk("pasid=0x%x irq_id=%d",
+               __entry->pasid,
+               __entry->irq_id
+       )
+);
+
+TRACE_EVENT(ocxl_afu_irq_receive,
+       TP_PROTO(int virq),
+       TP_ARGS(virq),
+
+       TP_STRUCT__entry(
+               __field(int, virq)
+       ),
+
+       TP_fast_assign(
+               __entry->virq = virq;
+       ),
+
+       TP_printk("virq=%d",
+               __entry->virq
+       )
+);
+
+#endif /* _TRACE_OCXL_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
index a3449d717a992e2f149475d38a393a2fea6578bd..fc01d7d807f3f4d9e2b9d04bd02e6f6305375a20 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/mutex.h>
 #include <asm/rtas.h>
 #include <asm/vio.h>
+#include <linux/firmware.h>
 
 #include "../pci.h"
 #include "rpaphp.h"
@@ -44,15 +45,14 @@ static struct device_node *find_vio_slot_node(char *drc_name)
 {
        struct device_node *parent = of_find_node_by_name(NULL, "vdevice");
        struct device_node *dn = NULL;
-       char *name;
        int rc;
 
        if (!parent)
                return NULL;
 
        while ((dn = of_get_next_child(parent, dn))) {
-               rc = rpaphp_get_drc_props(dn, NULL, &name, NULL, NULL);
-               if ((rc == 0) && (!strcmp(drc_name, name)))
+               rc = rpaphp_check_drc_props(dn, drc_name, NULL);
+               if (rc == 0)
                        break;
        }
 
@@ -64,15 +64,12 @@ static struct device_node *find_php_slot_pci_node(char *drc_name,
                                                  char *drc_type)
 {
        struct device_node *np = NULL;
-       char *name;
-       char *type;
        int rc;
 
        while ((np = of_find_node_by_name(np, "pci"))) {
-               rc = rpaphp_get_drc_props(np, NULL, &name, &type, NULL);
+               rc = rpaphp_check_drc_props(np, drc_name, drc_type);
                if (rc == 0)
-                       if (!strcmp(drc_name, name) && !strcmp(drc_type, type))
-                               break;
+                       break;
        }
 
        return np;
index edb5d8a53020dada20936454a7cd14a5ea5098d5..b806314349cfeaa6e38a413d8a5c892a79da271d 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
+#include "rpaphp.h"
 #include "rpadlpar.h"
 #include "../pci.h"
 
@@ -27,8 +28,6 @@
 #define ADD_SLOT_ATTR_NAME    add_slot
 #define REMOVE_SLOT_ATTR_NAME remove_slot
 
-#define MAX_DRC_NAME_LEN 64
-
 static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr,
                              const char *buf, size_t nbytes)
 {
index 7db024e68fe69e6378f847116a206ef284b427c8..bdb844b01a3dd412623bce06c1614382b9bad672 100644 (file)
@@ -64,6 +64,10 @@ extern bool rpaphp_debug;
 #define        CONFIGURED      1
 #define        EMPTY           0
 
+/* DRC constants */
+
+#define MAX_DRC_NAME_LEN 64
+
 /*
  * struct slot - slot information for each *physical* slot
  */
@@ -91,8 +95,8 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state);
 
 /* rpaphp_core.c */
 int rpaphp_add_slot(struct device_node *dn);
-int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
-               char **drc_name, char **drc_type, int *drc_power_domain);
+int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
+               char *drc_type);
 
 /* rpaphp_slot.c */
 void dealloc_slot_struct(struct slot *slot);
index 1e29abaaea083e3649f70605c602a9b2070291d1..53902c7c38f26a2b009287fc793902711b82c645 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/vmalloc.h>
+#include <asm/firmware.h>
 #include <asm/eeh.h>       /* for eeh_add_device() */
 #include <asm/rtas.h>          /* rtas_call */
 #include <asm/pci-bridge.h>    /* for pci_controller */
@@ -196,25 +197,21 @@ static int get_children_props(struct device_node *dn, const int **drc_indexes,
        return 0;
 }
 
-/* To get the DRC props describing the current node, first obtain it's
- * my-drc-index property.  Next obtain the DRC list from it's parent.  Use
- * the my-drc-index for correlation, and obtain the requested properties.
+
+/* Verify the existence of 'drc_name' and/or 'drc_type' within the
+ * current node.  First obtain it's my-drc-index property.  Next,
+ * obtain the DRC info from it's parent.  Use the my-drc-index for
+ * correlation, and obtain/validate the requested properties.
  */
-int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
-               char **drc_name, char **drc_type, int *drc_power_domain)
+
+static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
+                               char *drc_type, unsigned int my_index)
 {
+       char *name_tmp, *type_tmp;
        const int *indexes, *names;
        const int *types, *domains;
-       const unsigned int *my_index;
-       char *name_tmp, *type_tmp;
        int i, rc;
 
-       my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
-       if (!my_index) {
-               /* Node isn't DLPAR/hotplug capable */
-               return -EINVAL;
-       }
-
        rc = get_children_props(dn->parent, &indexes, &names, &types, &domains);
        if (rc < 0) {
                return -EINVAL;
@@ -225,24 +222,84 @@ int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
 
        /* Iterate through parent properties, looking for my-drc-index */
        for (i = 0; i < be32_to_cpu(indexes[0]); i++) {
-               if ((unsigned int) indexes[i + 1] == *my_index) {
-                       if (drc_name)
-                               *drc_name = name_tmp;
-                       if (drc_type)
-                               *drc_type = type_tmp;
-                       if (drc_index)
-                               *drc_index = be32_to_cpu(*my_index);
-                       if (drc_power_domain)
-                               *drc_power_domain = be32_to_cpu(domains[i+1]);
-                       return 0;
-               }
+               if ((unsigned int) indexes[i + 1] == my_index)
+                       break;
+
                name_tmp += (strlen(name_tmp) + 1);
                type_tmp += (strlen(type_tmp) + 1);
        }
 
+       if (((drc_name == NULL) || (drc_name && !strcmp(drc_name, name_tmp))) &&
+           ((drc_type == NULL) || (drc_type && !strcmp(drc_type, type_tmp))))
+               return 0;
+
        return -EINVAL;
 }
-EXPORT_SYMBOL_GPL(rpaphp_get_drc_props);
+
+static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
+                               char *drc_type, unsigned int my_index)
+{
+       struct property *info;
+       unsigned int entries;
+       struct of_drc_info drc;
+       const __be32 *value;
+       char cell_drc_name[MAX_DRC_NAME_LEN];
+       int j, fndit;
+
+       info = of_find_property(dn->parent, "ibm,drc-info", NULL);
+       if (info == NULL)
+               return -EINVAL;
+
+       value = of_prop_next_u32(info, NULL, &entries);
+       if (!value)
+               return -EINVAL;
+
+       for (j = 0; j < entries; j++) {
+               of_read_drc_info_cell(&info, &value, &drc);
+
+               /* Should now know end of current entry */
+
+               if (my_index > drc.last_drc_index)
+                       continue;
+
+               fndit = 1;
+               break;
+       }
+       /* Found it */
+
+       if (fndit)
+               sprintf(cell_drc_name, "%s%d", drc.drc_name_prefix, 
+                       my_index);
+
+       if (((drc_name == NULL) ||
+            (drc_name && !strcmp(drc_name, cell_drc_name))) &&
+           ((drc_type == NULL) ||
+            (drc_type && !strcmp(drc_type, drc.drc_type))))
+               return 0;
+
+       return -EINVAL;
+}
+
+int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
+                       char *drc_type)
+{
+       const unsigned int *my_index;
+
+       my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
+       if (!my_index) {
+               /* Node isn't DLPAR/hotplug capable */
+               return -EINVAL;
+       }
+
+       if (firmware_has_feature(FW_FEATURE_DRC_INFO))
+               return rpaphp_check_drc_props_v2(dn, drc_name, drc_type,
+                                               *my_index);
+       else
+               return rpaphp_check_drc_props_v1(dn, drc_name, drc_type,
+                                               *my_index);
+}
+EXPORT_SYMBOL_GPL(rpaphp_check_drc_props);
+
 
 static int is_php_type(char *drc_type)
 {
index 6bacb8995e9641dfc0ab98d6a5e9f032bbd7f98e..0b7b5da63d4e21a086e9211fd6f678728e13d994 100644 (file)
@@ -635,6 +635,17 @@ void pci_restore_iov_state(struct pci_dev *dev)
                sriov_restore_state(dev);
 }
 
+/**
+ * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs
+ * @dev: the PCI device
+ * @auto_probe: set VF drivers auto probe flag
+ */
+void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe)
+{
+       if (dev->is_physfn)
+               dev->sriov->drivers_autoprobe = auto_probe;
+}
+
 /**
  * pci_iov_bus_range - find bus range used by Virtual Function
  * @bus: the PCI bus
index 744805232155ccb15c7020ca3fdcab08bd50af7c..8d7448063fd1168aaafff9e1c1a37456b12ddefe 100644 (file)
@@ -278,6 +278,7 @@ static int report_error_detected(struct pci_dev *dev, void *data)
        } else {
                err_handler = dev->driver->err_handler;
                vote = err_handler->error_detected(dev, result_data->state);
+               pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
        }
 
        result_data->result = merge_result(result_data->result, vote);
@@ -341,6 +342,7 @@ static int report_resume(struct pci_dev *dev, void *data)
 
        err_handler = dev->driver->err_handler;
        err_handler->resume(dev);
+       pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
 out:
        device_unlock(&dev->dev);
        return 0;
@@ -541,6 +543,7 @@ static void do_recovery(struct pci_dev *dev, int severity)
        return;
 
 failed:
+       pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
        /* TODO: Should kernel panic here? */
        dev_info(&dev->dev, "AER: Device recovery failed\n");
 }
index 437fc35beb7b706e130f115cfc433b5138002c43..e293606b03344d3ee0cc74b4fcee58a3669e714a 100644 (file)
@@ -44,7 +44,6 @@ static struct ps3av {
        struct mutex mutex;
        struct work_struct work;
        struct completion done;
-       struct workqueue_struct *wq;
        int open_count;
        struct ps3_system_bus_device *dev;
 
@@ -485,7 +484,7 @@ static int ps3av_set_videomode(void)
        ps3av_set_av_video_mute(PS3AV_CMD_MUTE_ON);
 
        /* wake up ps3avd to do the actual video mode setting */
-       queue_work(ps3av->wq, &ps3av->work);
+       schedule_work(&ps3av->work);
 
        return 0;
 }
@@ -956,11 +955,6 @@ static int ps3av_probe(struct ps3_system_bus_device *dev)
        INIT_WORK(&ps3av->work, ps3avd);
        init_completion(&ps3av->done);
        complete(&ps3av->done);
-       ps3av->wq = create_singlethread_workqueue("ps3avd");
-       if (!ps3av->wq) {
-               res = -ENOMEM;
-               goto fail;
-       }
 
        switch (ps3_os_area_get_av_multi_out()) {
        case PS3_PARAM_AV_MULTI_OUT_NTSC:
@@ -1018,8 +1012,7 @@ static int ps3av_remove(struct ps3_system_bus_device *dev)
        dev_dbg(&dev->core, " -> %s:%d\n", __func__, __LINE__);
        if (ps3av) {
                ps3av_cmd_fin();
-               if (ps3av->wq)
-                       destroy_workqueue(ps3av->wq);
+               flush_work(&ps3av->work);
                kfree(ps3av);
                ps3av = NULL;
        }
index e2a946c0e667e150e1029728134c6d20baf4d076..304e891e35fcb060a8ff26f78122fa3c63a1eb87 100644 (file)
@@ -58,6 +58,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms)
 static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 {
        long rc = OPAL_BUSY;
+       int retries = 10;
        u32 y_m_d;
        u64 h_m_s_ms;
        __be32 __y_m_d;
@@ -67,8 +68,11 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
                rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
                if (rc == OPAL_BUSY_EVENT)
                        opal_poll_events(NULL);
-               else
+               else if (retries-- && (rc == OPAL_HARDWARE
+                                      || rc == OPAL_INTERNAL_ERROR))
                        msleep(10);
+               else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
+                       break;
        }
 
        if (rc != OPAL_SUCCESS)
@@ -84,6 +88,7 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
 {
        long rc = OPAL_BUSY;
+       int retries = 10;
        u32 y_m_d = 0;
        u64 h_m_s_ms = 0;
 
@@ -92,8 +97,11 @@ static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
                rc = opal_rtc_write(y_m_d, h_m_s_ms);
                if (rc == OPAL_BUSY_EVENT)
                        opal_poll_events(NULL);
-               else
+               else if (retries-- && (rc == OPAL_HARDWARE
+                                      || rc == OPAL_INTERNAL_ERROR))
                        msleep(10);
+               else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
+                       break;
        }
 
        return rc == OPAL_SUCCESS ? 0 : -EIO;
index 0314e0716c3099ac0c06ccb3cef44b3d2ac2514c..ad35aac87971169bc69fca755fdfcff4dc289273 100644 (file)
@@ -1965,6 +1965,7 @@ int pci_vfs_assigned(struct pci_dev *dev);
 int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
 int pci_sriov_get_totalvfs(struct pci_dev *dev);
 resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
+void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);
 #else
 static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id)
 {
@@ -1992,6 +1993,7 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev)
 { return 0; }
 static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
 { return 0; }
+static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { }
 #endif
 
 #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
@@ -2279,6 +2281,42 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev)
        return false;
 }
 
+/**
+ * pci_uevent_ers - emit a uevent during recovery path of pci device
+ * @pdev: pci device to check
+ * @err_type: type of error event
+ *
+ */
+static inline void pci_uevent_ers(struct pci_dev *pdev,
+                                 enum  pci_ers_result err_type)
+{
+       int idx = 0;
+       char *envp[3];
+
+       switch (err_type) {
+       case PCI_ERS_RESULT_NONE:
+       case PCI_ERS_RESULT_CAN_RECOVER:
+               envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY";
+               envp[idx++] = "DEVICE_ONLINE=0";
+               break;
+       case PCI_ERS_RESULT_RECOVERED:
+               envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY";
+               envp[idx++] = "DEVICE_ONLINE=1";
+               break;
+       case PCI_ERS_RESULT_DISCONNECT:
+               envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY";
+               envp[idx++] = "DEVICE_ONLINE=0";
+               break;
+       default:
+               break;
+       }
+
+       if (idx > 0) {
+               envp[idx++] = NULL;
+               kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp);
+       }
+}
+
 /* provide the legacy pci_dma_* API */
 #include <linux/pci-dma-compat.h>
 
diff --git a/include/misc/ocxl-config.h b/include/misc/ocxl-config.h
new file mode 100644 (file)
index 0000000..3526fa9
--- /dev/null
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _OCXL_CONFIG_H_
+#define _OCXL_CONFIG_H_
+
+/*
+ * This file lists the various constants used to read the
+ * configuration space of an opencapi adapter.
+ *
+ * It follows the specification for opencapi 3.0
+ */
+
+#define OCXL_EXT_CAP_ID_DVSEC                 0x23
+
+#define OCXL_DVSEC_VENDOR_OFFSET              0x4
+#define OCXL_DVSEC_ID_OFFSET                  0x8
+#define OCXL_DVSEC_TL_ID                      0xF000
+#define   OCXL_DVSEC_TL_BACKOFF_TIMERS          0x10
+#define   OCXL_DVSEC_TL_RECV_CAP                0x18
+#define   OCXL_DVSEC_TL_SEND_CAP                0x20
+#define   OCXL_DVSEC_TL_RECV_RATE               0x30
+#define   OCXL_DVSEC_TL_SEND_RATE               0x50
+#define OCXL_DVSEC_FUNC_ID                    0xF001
+#define   OCXL_DVSEC_FUNC_OFF_INDEX             0x08
+#define   OCXL_DVSEC_FUNC_OFF_ACTAG             0x0C
+#define OCXL_DVSEC_AFU_INFO_ID                0xF003
+#define   OCXL_DVSEC_AFU_INFO_AFU_IDX           0x0A
+#define   OCXL_DVSEC_AFU_INFO_OFF               0x0C
+#define   OCXL_DVSEC_AFU_INFO_DATA              0x10
+#define OCXL_DVSEC_AFU_CTRL_ID                0xF004
+#define   OCXL_DVSEC_AFU_CTRL_AFU_IDX           0x0A
+#define   OCXL_DVSEC_AFU_CTRL_TERM_PASID        0x0C
+#define   OCXL_DVSEC_AFU_CTRL_ENABLE            0x0F
+#define   OCXL_DVSEC_AFU_CTRL_PASID_SUP         0x10
+#define   OCXL_DVSEC_AFU_CTRL_PASID_EN          0x11
+#define   OCXL_DVSEC_AFU_CTRL_PASID_BASE        0x14
+#define   OCXL_DVSEC_AFU_CTRL_ACTAG_SUP         0x18
+#define   OCXL_DVSEC_AFU_CTRL_ACTAG_EN          0x1A
+#define   OCXL_DVSEC_AFU_CTRL_ACTAG_BASE        0x1C
+#define OCXL_DVSEC_VENDOR_ID                  0xF0F0
+#define   OCXL_DVSEC_VENDOR_CFG_VERS            0x0C
+#define   OCXL_DVSEC_VENDOR_TLX_VERS            0x10
+#define   OCXL_DVSEC_VENDOR_DLX_VERS            0x20
+
+#endif /* _OCXL_CONFIG_H_ */
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
new file mode 100644 (file)
index 0000000..51ccf76
--- /dev/null
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _MISC_OCXL_H_
+#define _MISC_OCXL_H_
+
+#include <linux/pci.h>
+
+/*
+ * Opencapi drivers all need some common facilities, like parsing the
+ * device configuration space, adding a Process Element to the Shared
+ * Process Area, etc...
+ *
+ * The ocxl module provides a kernel API, to allow other drivers to
+ * reuse common code. A bit like a in-kernel library.
+ */
+
+#define OCXL_AFU_NAME_SZ      (24+1)  /* add 1 for NULL termination */
+
+/*
+ * The following 2 structures are a fairly generic way of representing
+ * the configuration data for a function and AFU, as read from the
+ * configuration space.
+ */
+struct ocxl_afu_config {
+       u8 idx;
+       int dvsec_afu_control_pos; /* offset of AFU control DVSEC */
+       char name[OCXL_AFU_NAME_SZ];
+       u8 version_major;
+       u8 version_minor;
+       u8 afuc_type;
+       u8 afum_type;
+       u8 profile;
+       u8 global_mmio_bar;     /* global MMIO area */
+       u64 global_mmio_offset;
+       u32 global_mmio_size;
+       u8 pp_mmio_bar;         /* per-process MMIO area */
+       u64 pp_mmio_offset;
+       u32 pp_mmio_stride;
+       u8 log_mem_size;
+       u8 pasid_supported_log;
+       u16 actag_supported;
+};
+
+struct ocxl_fn_config {
+       int dvsec_tl_pos;       /* offset of the Transaction Layer DVSEC */
+       int dvsec_function_pos; /* offset of the Function DVSEC */
+       int dvsec_afu_info_pos; /* offset of the AFU information DVSEC */
+       s8 max_pasid_log;
+       s8 max_afu_index;
+};
+
+/*
+ * Read the configuration space of a function and fill in a
+ * ocxl_fn_config structure with all the function details
+ */
+extern int ocxl_config_read_function(struct pci_dev *dev,
+                               struct ocxl_fn_config *fn);
+
+/*
+ * Check if an AFU index is valid for the given function.
+ *
+ * AFU indexes can be sparse, so a driver should check all indexes up
+ * to the maximum found in the function description
+ */
+extern int ocxl_config_check_afu_index(struct pci_dev *dev,
+                               struct ocxl_fn_config *fn, int afu_idx);
+
+/*
+ * Read the configuration space of a function for the AFU specified by
+ * the index 'afu_idx'. Fills in a ocxl_afu_config structure
+ */
+extern int ocxl_config_read_afu(struct pci_dev *dev,
+                               struct ocxl_fn_config *fn,
+                               struct ocxl_afu_config *afu,
+                               u8 afu_idx);
+
+/*
+ * Get the max PASID value that can be used by the function
+ */
+extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
+
+/*
+ * Tell an AFU, by writing in the configuration space, the PASIDs that
+ * it can use. Range starts at 'pasid_base' and its size is a multiple
+ * of 2
+ *
+ * 'afu_control_offset' is the offset of the AFU control DVSEC which
+ * can be found in the function configuration
+ */
+extern void ocxl_config_set_afu_pasid(struct pci_dev *dev,
+                               int afu_control_offset,
+                               int pasid_base, u32 pasid_count_log);
+
+/*
+ * Get the actag configuration for the function:
+ * 'base' is the first actag value that can be used.
+ * 'enabled' it the number of actags available, starting from base.
+ * 'supported' is the total number of actags desired by all the AFUs
+ *             of the function.
+ */
+extern int ocxl_config_get_actag_info(struct pci_dev *dev,
+                               u16 *base, u16 *enabled, u16 *supported);
+
+/*
+ * Tell a function, by writing in the configuration space, the actags
+ * it can use.
+ *
+ * 'func_offset' is the offset of the Function DVSEC that can found in
+ * the function configuration
+ */
+extern void ocxl_config_set_actag(struct pci_dev *dev, int func_offset,
+                               u32 actag_base, u32 actag_count);
+
+/*
+ * Tell an AFU, by writing in the configuration space, the actags it
+ * can use.
+ *
+ * 'afu_control_offset' is the offset of the AFU control DVSEC for the
+ * desired AFU. It can be found in the AFU configuration
+ */
+extern void ocxl_config_set_afu_actag(struct pci_dev *dev,
+                               int afu_control_offset,
+                               int actag_base, int actag_count);
+
+/*
+ * Enable/disable an AFU, by writing in the configuration space.
+ *
+ * 'afu_control_offset' is the offset of the AFU control DVSEC for the
+ * desired AFU. It can be found in the AFU configuration
+ */
+extern void ocxl_config_set_afu_state(struct pci_dev *dev,
+                               int afu_control_offset, int enable);
+
+/*
+ * Set the Transaction Layer configuration in the configuration space.
+ * Only needed for function 0.
+ *
+ * It queries the host TL capabilities, find some common ground
+ * between the host and device, and set the Transaction Layer on both
+ * accordingly.
+ */
+extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec);
+
+/*
+ * Request an AFU to terminate a PASID.
+ * Will return once the AFU has acked the request, or an error in case
+ * of timeout.
+ *
+ * The hardware can only terminate one PASID at a time, so caller must
+ * guarantee some kind of serialization.
+ *
+ * 'afu_control_offset' is the offset of the AFU control DVSEC for the
+ * desired AFU. It can be found in the AFU configuration
+ */
+extern int ocxl_config_terminate_pasid(struct pci_dev *dev,
+                               int afu_control_offset, int pasid);
+
+/*
+ * Set up the opencapi link for the function.
+ *
+ * When called for the first time for a link, it sets up the Shared
+ * Process Area for the link and the interrupt handler to process
+ * translation faults.
+ *
+ * Returns a 'link handle' that should be used for further calls for
+ * the link
+ */
+extern int ocxl_link_setup(struct pci_dev *dev, int PE_mask,
+                       void **link_handle);
+
+/*
+ * Remove the association between the function and its link.
+ */
+extern void ocxl_link_release(struct pci_dev *dev, void *link_handle);
+
+/*
+ * Add a Process Element to the Shared Process Area for a link.
+ * The process is defined by its PASID, pid, tid and its mm_struct.
+ *
+ * 'xsl_err_cb' is an optional callback if the driver wants to be
+ * notified when the translation fault interrupt handler detects an
+ * address error.
+ * 'xsl_err_data' is an argument passed to the above callback, if
+ * defined
+ */
+extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
+               u64 amr, struct mm_struct *mm,
+               void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
+               void *xsl_err_data);
+
+/*
+ * Remove a Process Element from the Shared Process Area for a link
+ */
+extern int ocxl_link_remove_pe(void *link_handle, int pasid);
+
+/*
+ * Allocate an AFU interrupt associated to the link.
+ *
+ * 'hw_irq' is the hardware interrupt number
+ * 'obj_handle' is the 64-bit object handle to be passed to the AFU to
+ * trigger the interrupt.
+ * On P9, 'obj_handle' is an address, which, if written, triggers the
+ * interrupt. It is an MMIO address which needs to be remapped (one
+ * page).
+ */
+extern int ocxl_link_irq_alloc(void *link_handle, int *hw_irq,
+                       u64 *obj_handle);
+
+/*
+ * Free a previously allocated AFU interrupt
+ */
+extern void ocxl_link_free_irq(void *link_handle, int hw_irq);
+
+#endif /* _MISC_OCXL_H_ */
index bb6836986200d61d4c01fc24304966b54bbd9f94..3bf73fb58045da56562c5c790f80aed860827672 100644 (file)
@@ -396,6 +396,7 @@ typedef struct elf64_shdr {
 #define NT_PPC_TM_CTAR 0x10d           /* TM checkpointed Target Address Register */
 #define NT_PPC_TM_CPPR 0x10e           /* TM checkpointed Program Priority Register */
 #define NT_PPC_TM_CDSCR        0x10f           /* TM checkpointed Data Stream Control Register */
+#define NT_PPC_PKEY    0x110           /* Memory Protection Keys registers */
 #define NT_386_TLS     0x200           /* i386 TLS slots (struct user_desc) */
 #define NT_386_IOPERM  0x201           /* x86 io permission bitmap (1=deny) */
 #define NT_X86_XSTATE  0x202           /* x86 extended state using xsave */
index 49e8fd08855a68886109890bf7fa7506fd318e5e..56376d3907d83c3bf3c79bc73448dfd16d2d83f0 100644 (file)
@@ -20,20 +20,22 @@ struct cxl_ioctl_start_work {
        __u64 work_element_descriptor;
        __u64 amr;
        __s16 num_interrupts;
-       __s16 reserved1;
-       __s32 reserved2;
+       __u16 tid;
+       __s32 reserved1;
+       __u64 reserved2;
        __u64 reserved3;
        __u64 reserved4;
        __u64 reserved5;
-       __u64 reserved6;
 };
 
 #define CXL_START_WORK_AMR             0x0000000000000001ULL
 #define CXL_START_WORK_NUM_IRQS                0x0000000000000002ULL
 #define CXL_START_WORK_ERR_FF          0x0000000000000004ULL
+#define CXL_START_WORK_TID             0x0000000000000008ULL
 #define CXL_START_WORK_ALL             (CXL_START_WORK_AMR |\
                                         CXL_START_WORK_NUM_IRQS |\
-                                        CXL_START_WORK_ERR_FF)
+                                        CXL_START_WORK_ERR_FF |\
+                                        CXL_START_WORK_TID)
 
 
 /* Possible modes that an afu can be in */
diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h
new file mode 100644 (file)
index 0000000..4b0b0b7
--- /dev/null
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright 2017 IBM Corp. */
+#ifndef _UAPI_MISC_OCXL_H
+#define _UAPI_MISC_OCXL_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+enum ocxl_event_type {
+       OCXL_AFU_EVENT_XSL_FAULT_ERROR = 0,
+};
+
+#define OCXL_KERNEL_EVENT_FLAG_LAST 0x0001  /* This is the last event pending */
+
+struct ocxl_kernel_event_header {
+       __u16 type;
+       __u16 flags;
+       __u32 reserved;
+};
+
+struct ocxl_kernel_event_xsl_fault_error {
+       __u64 addr;
+       __u64 dsisr;
+       __u64 count;
+       __u64 reserved;
+};
+
+struct ocxl_ioctl_attach {
+       __u64 amr;
+       __u64 reserved1;
+       __u64 reserved2;
+       __u64 reserved3;
+};
+
+struct ocxl_ioctl_irq_fd {
+       __u64 irq_offset;
+       __s32 eventfd;
+       __u32 reserved;
+};
+
+/* ioctl numbers */
+#define OCXL_MAGIC 0xCA
+/* AFU devices */
+#define OCXL_IOCTL_ATTACH      _IOW(OCXL_MAGIC, 0x10, struct ocxl_ioctl_attach)
+#define OCXL_IOCTL_IRQ_ALLOC   _IOR(OCXL_MAGIC, 0x11, __u64)
+#define OCXL_IOCTL_IRQ_FREE    _IOW(OCXL_MAGIC, 0x12, __u64)
+#define OCXL_IOCTL_IRQ_SET_FD  _IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd)
+
+#endif /* _UAPI_MISC_OCXL_H */
index 16b22004e75f4526e3b527f410fa78a9ce4bcc44..083a48a008b4290882396313dd98d269eba9f550 100644 (file)
@@ -1,4 +1,5 @@
-TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned
+TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned \
+       paste_last_unaligned alignment_handler
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
new file mode 100644 (file)
index 0000000..39fd362
--- /dev/null
@@ -0,0 +1,491 @@
+/*
+ * Test the powerpc alignment handler on POWER8/POWER9
+ *
+ * Copyright (C) 2017 IBM Corporation (Michael Neuling, Andrew Donnellan)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * This selftest exercises the powerpc alignment fault handler.
+ *
+ * We create two sets of source and destination buffers, one in regular memory,
+ * the other cache-inhibited (we use /dev/fb0 for this).
+ *
+ * We initialise the source buffers, then use whichever set of load/store
+ * instructions is under test to copy bytes from the source buffers to the
+ * destination buffers. For the regular buffers, these instructions will
+ * execute normally. For the cache-inhibited buffers, these instructions
+ * will trap and cause an alignment fault, and the alignment fault handler
+ * will emulate the particular instruction under test. We then compare the
+ * destination buffers to ensure that the native and emulated cases give the
+ * same result.
+ *
+ * TODO:
+ *   - Any FIXMEs below
+ *   - Test VSX regs < 32 and > 32
+ *   - Test all loads and stores
+ *   - Check update forms do update register
+ *   - Test alignment faults over page boundary
+ *
+ * Some old binutils may not support all the instructions.
+ */
+
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "utils.h"
+
+int bufsize;
+int debug;
+int testing;
+volatile int gotsig;
+
+void sighandler(int sig, siginfo_t *info, void *ctx)
+{
+       struct ucontext *ucp = ctx;
+
+       if (!testing) {
+               signal(sig, SIG_DFL);
+               kill(0, sig);
+       }
+       gotsig = sig;
+#ifdef __powerpc64__
+       ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+#else
+       ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
+#endif
+}
+
+#define XFORM(reg, n)  " " #reg " ,%"#n",%2 ;"
+#define DFORM(reg, n)  " " #reg " ,0(%"#n") ;"
+
+#define TEST(name, ld_op, st_op, form, ld_reg, st_reg)         \
+       void test_##name(char *s, char *d)                      \
+       {                                                       \
+               asm volatile(                                   \
+                       #ld_op form(ld_reg, 0)                  \
+                       #st_op form(st_reg, 1)                  \
+                       :: "r"(s), "r"(d), "r"(0)               \
+                       : "memory", "vs0", "vs32", "r31");      \
+       }                                                       \
+       rc |= do_test(#name, test_##name)
+
+#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
+#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
+#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
+#define STORE_VSX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 32)
+#define LOAD_VMX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 0, 32)
+#define STORE_VMX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 0)
+#define LOAD_VMX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 0, 32)
+#define STORE_VMX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 0)
+
+#define LOAD_XFORM_TEST(op) TEST(op, op, stdx, XFORM, 31, 31)
+#define STORE_XFORM_TEST(op) TEST(op, ldx, op, XFORM, 31, 31)
+#define LOAD_DFORM_TEST(op) TEST(op, op, std, DFORM, 31, 31)
+#define STORE_DFORM_TEST(op) TEST(op, ld, op, DFORM, 31, 31)
+
+#define LOAD_FLOAT_DFORM_TEST(op)  TEST(op, op, stfd, DFORM, 0, 0)
+#define STORE_FLOAT_DFORM_TEST(op) TEST(op, lfd, op, DFORM, 0, 0)
+#define LOAD_FLOAT_XFORM_TEST(op)  TEST(op, op, stfdx, XFORM, 0, 0)
+#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+
+
+/* FIXME: Unimplemented tests: */
+// STORE_DFORM_TEST(stq)   /* FIXME: need two registers for quad */
+// STORE_DFORM_TEST(stswi) /* FIXME: string instruction */
+
+// STORE_XFORM_TEST(stwat) /* AMO can't emulate or run on CI */
+// STORE_XFORM_TEST(stdat) /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
+
+
+/* preload byte by byte */
+void preload_data(void *dst, int offset, int width)
+{
+       char *c = dst;
+       int i;
+
+       c += offset;
+
+       for (i = 0 ; i < width ; i++)
+               c[i] = i;
+}
+
+int test_memcpy(void *dst, void *src, int size, int offset,
+               void (*test_func)(char *, char *))
+{
+       char *s, *d;
+
+       s = src;
+       s += offset;
+       d = dst;
+       d += offset;
+
+       assert(size == 16);
+       gotsig = 0;
+       testing = 1;
+
+       test_func(s, d); /* run the actual test */
+
+       testing = 0;
+       if (gotsig) {
+               if (debug)
+                       printf("  Got signal %i\n", gotsig);
+               return 1;
+       }
+       return 0;
+}
+
+void dumpdata(char *s1, char *s2, int n, char *test_name)
+{
+       int i;
+
+       printf("  %s: unexpected result:\n", test_name);
+       printf("    mem:");
+       for (i = 0; i < n; i++)
+               printf(" %02x", s1[i]);
+       printf("\n");
+       printf("    ci: ");
+       for (i = 0; i < n; i++)
+               printf(" %02x", s2[i]);
+       printf("\n");
+}
+
+int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name)
+{
+       char *s1c, *s2c;
+
+       s1c = s1;
+       s1c += offset;
+       s2c = s2;
+       s2c += offset;
+
+       if (memcmp(s1c, s2c, n)) {
+               if (debug) {
+                       printf("\n  Compare failed. Offset:%i length:%i\n",
+                              offset, n);
+                       dumpdata(s1c, s2c, n, test_name);
+               }
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * Do two memcpy tests using the same instructions. One cachable
+ * memory and the other doesn't.
+ */
+int do_test(char *test_name, void (*test_func)(char *, char *))
+{
+       int offset, width, fd, rc = 0, r;
+       void *mem0, *mem1, *ci0, *ci1;
+
+       printf("\tDoing %s:\t", test_name);
+
+       fd = open("/dev/fb0", O_RDWR);
+       if (fd < 0) {
+               printf("\n");
+               perror("Can't open /dev/fb0");
+               SKIP_IF(1);
+       }
+
+       ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+                  fd, 0x0);
+       ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+                  fd, bufsize);
+       if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
+               printf("\n");
+               perror("mmap failed");
+               SKIP_IF(1);
+       }
+
+       rc = posix_memalign(&mem0, bufsize, bufsize);
+       if (rc) {
+               printf("\n");
+               return rc;
+       }
+
+       rc = posix_memalign(&mem1, bufsize, bufsize);
+       if (rc) {
+               printf("\n");
+               free(mem0);
+               return rc;
+       }
+
+       /* offset = 0 no alignment fault, so skip */
+       for (offset = 1; offset < 16; offset++) {
+               width = 16; /* vsx == 16 bytes */
+               r = 0;
+
+               /* load pattern into memory byte by byte */
+               preload_data(ci0, offset, width);
+               preload_data(mem0, offset, width); // FIXME: remove??
+               memcpy(ci0, mem0, bufsize);
+               memcpy(ci1, mem1, bufsize); /* initialise output to the same */
+
+               /* sanity check */
+               test_memcmp(mem0, ci0, width, offset, test_name);
+
+               r |= test_memcpy(ci1,  ci0,  width, offset, test_func);
+               r |= test_memcpy(mem1, mem0, width, offset, test_func);
+               if (r && !debug) {
+                       printf("FAILED: Got signal");
+                       break;
+               }
+
+               r |= test_memcmp(mem1, ci1, width, offset, test_name);
+               rc |= r;
+               if (r && !debug) {
+                       printf("FAILED: Wrong Data");
+                       break;
+               }
+       }
+       if (!r)
+               printf("PASSED");
+       printf("\n");
+
+       munmap(ci0, bufsize);
+       munmap(ci1, bufsize);
+       free(mem0);
+       free(mem1);
+
+       return rc;
+}
+
+int test_alignment_handler_vsx_206(void)
+{
+       int rc = 0;
+
+       printf("VSX: 2.06B\n");
+       LOAD_VSX_XFORM_TEST(lxvd2x);
+       LOAD_VSX_XFORM_TEST(lxvw4x);
+       LOAD_VSX_XFORM_TEST(lxsdx);
+       LOAD_VSX_XFORM_TEST(lxvdsx);
+       STORE_VSX_XFORM_TEST(stxvd2x);
+       STORE_VSX_XFORM_TEST(stxvw4x);
+       STORE_VSX_XFORM_TEST(stxsdx);
+       return rc;
+}
+
+int test_alignment_handler_vsx_207(void)
+{
+       int rc = 0;
+
+       printf("VSX: 2.07B\n");
+       LOAD_VSX_XFORM_TEST(lxsspx);
+       LOAD_VSX_XFORM_TEST(lxsiwax);
+       LOAD_VSX_XFORM_TEST(lxsiwzx);
+       STORE_VSX_XFORM_TEST(stxsspx);
+       STORE_VSX_XFORM_TEST(stxsiwx);
+       return rc;
+}
+
+int test_alignment_handler_vsx_300(void)
+{
+       int rc = 0;
+
+       SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+       printf("VSX: 3.00B\n");
+       LOAD_VMX_DFORM_TEST(lxsd);
+       LOAD_VSX_XFORM_TEST(lxsibzx);
+       LOAD_VSX_XFORM_TEST(lxsihzx);
+       LOAD_VMX_DFORM_TEST(lxssp);
+       LOAD_VSX_DFORM_TEST(lxv);
+       LOAD_VSX_XFORM_TEST(lxvb16x);
+       LOAD_VSX_XFORM_TEST(lxvh8x);
+       LOAD_VSX_XFORM_TEST(lxvx);
+       LOAD_VSX_XFORM_TEST(lxvwsx);
+       LOAD_VSX_XFORM_TEST(lxvl);
+       LOAD_VSX_XFORM_TEST(lxvll);
+       STORE_VMX_DFORM_TEST(stxsd);
+       STORE_VSX_XFORM_TEST(stxsibx);
+       STORE_VSX_XFORM_TEST(stxsihx);
+       STORE_VMX_DFORM_TEST(stxssp);
+       STORE_VSX_DFORM_TEST(stxv);
+       STORE_VSX_XFORM_TEST(stxvb16x);
+       STORE_VSX_XFORM_TEST(stxvh8x);
+       STORE_VSX_XFORM_TEST(stxvx);
+       STORE_VSX_XFORM_TEST(stxvl);
+       STORE_VSX_XFORM_TEST(stxvll);
+       return rc;
+}
+
+int test_alignment_handler_integer(void)
+{
+       int rc = 0;
+
+       printf("Integer\n");
+       LOAD_DFORM_TEST(lbz);
+       LOAD_DFORM_TEST(lbzu);
+       LOAD_XFORM_TEST(lbzx);
+       LOAD_XFORM_TEST(lbzux);
+       LOAD_DFORM_TEST(lhz);
+       LOAD_DFORM_TEST(lhzu);
+       LOAD_XFORM_TEST(lhzx);
+       LOAD_XFORM_TEST(lhzux);
+       LOAD_DFORM_TEST(lha);
+       LOAD_DFORM_TEST(lhau);
+       LOAD_XFORM_TEST(lhax);
+       LOAD_XFORM_TEST(lhaux);
+       LOAD_XFORM_TEST(lhbrx);
+       LOAD_DFORM_TEST(lwz);
+       LOAD_DFORM_TEST(lwzu);
+       LOAD_XFORM_TEST(lwzx);
+       LOAD_XFORM_TEST(lwzux);
+       LOAD_DFORM_TEST(lwa);
+       LOAD_XFORM_TEST(lwax);
+       LOAD_XFORM_TEST(lwaux);
+       LOAD_XFORM_TEST(lwbrx);
+       LOAD_DFORM_TEST(ld);
+       LOAD_DFORM_TEST(ldu);
+       LOAD_XFORM_TEST(ldx);
+       LOAD_XFORM_TEST(ldux);
+       LOAD_XFORM_TEST(ldbrx);
+       LOAD_DFORM_TEST(lmw);
+       STORE_DFORM_TEST(stb);
+       STORE_XFORM_TEST(stbx);
+       STORE_DFORM_TEST(stbu);
+       STORE_XFORM_TEST(stbux);
+       STORE_DFORM_TEST(sth);
+       STORE_XFORM_TEST(sthx);
+       STORE_DFORM_TEST(sthu);
+       STORE_XFORM_TEST(sthux);
+       STORE_XFORM_TEST(sthbrx);
+       STORE_DFORM_TEST(stw);
+       STORE_XFORM_TEST(stwx);
+       STORE_DFORM_TEST(stwu);
+       STORE_XFORM_TEST(stwux);
+       STORE_XFORM_TEST(stwbrx);
+       STORE_DFORM_TEST(std);
+       STORE_XFORM_TEST(stdx);
+       STORE_DFORM_TEST(stdu);
+       STORE_XFORM_TEST(stdux);
+       STORE_XFORM_TEST(stdbrx);
+       STORE_DFORM_TEST(stmw);
+       return rc;
+}
+
+int test_alignment_handler_vmx(void)
+{
+       int rc = 0;
+
+       printf("VMX\n");
+       LOAD_VMX_XFORM_TEST(lvx);
+
+       /*
+        * FIXME: These loads only load part of the register, so our
+        * testing method doesn't work. Also they don't take alignment
+        * faults, so it's kinda pointless anyway
+        *
+        LOAD_VMX_XFORM_TEST(lvebx)
+        LOAD_VMX_XFORM_TEST(lvehx)
+        LOAD_VMX_XFORM_TEST(lvewx)
+        LOAD_VMX_XFORM_TEST(lvxl)
+       */
+       STORE_VMX_XFORM_TEST(stvx);
+       STORE_VMX_XFORM_TEST(stvebx);
+       STORE_VMX_XFORM_TEST(stvehx);
+       STORE_VMX_XFORM_TEST(stvewx);
+       STORE_VMX_XFORM_TEST(stvxl);
+       return rc;
+}
+
+int test_alignment_handler_fp(void)
+{
+       int rc = 0;
+
+       printf("Floating point\n");
+       LOAD_FLOAT_DFORM_TEST(lfd);
+       LOAD_FLOAT_XFORM_TEST(lfdx);
+       LOAD_FLOAT_DFORM_TEST(lfdp);
+       LOAD_FLOAT_XFORM_TEST(lfdpx);
+       LOAD_FLOAT_DFORM_TEST(lfdu);
+       LOAD_FLOAT_XFORM_TEST(lfdux);
+       LOAD_FLOAT_DFORM_TEST(lfs);
+       LOAD_FLOAT_XFORM_TEST(lfsx);
+       LOAD_FLOAT_DFORM_TEST(lfsu);
+       LOAD_FLOAT_XFORM_TEST(lfsux);
+       LOAD_FLOAT_XFORM_TEST(lfiwzx);
+       LOAD_FLOAT_XFORM_TEST(lfiwax);
+       STORE_FLOAT_DFORM_TEST(stfd);
+       STORE_FLOAT_XFORM_TEST(stfdx);
+       STORE_FLOAT_DFORM_TEST(stfdp);
+       STORE_FLOAT_XFORM_TEST(stfdpx);
+       STORE_FLOAT_DFORM_TEST(stfdu);
+       STORE_FLOAT_XFORM_TEST(stfdux);
+       STORE_FLOAT_DFORM_TEST(stfs);
+       STORE_FLOAT_XFORM_TEST(stfsx);
+       STORE_FLOAT_DFORM_TEST(stfsu);
+       STORE_FLOAT_XFORM_TEST(stfsux);
+       STORE_FLOAT_XFORM_TEST(stfiwx);
+
+       return rc;
+}
+
+void usage(char *prog)
+{
+       printf("Usage: %s [options]\n", prog);
+       printf("  -d    Enable debug error output\n");
+       printf("\n");
+       printf("This test requires a POWER8 or POWER9 CPU and a usable ");
+       printf("framebuffer at /dev/fb0.\n");
+}
+
+int main(int argc, char *argv[])
+{
+
+       struct sigaction sa;
+       int rc = 0;
+       int option = 0;
+
+       while ((option = getopt(argc, argv, "d")) != -1) {
+               switch (option) {
+               case 'd':
+                       debug++;
+                       break;
+               default:
+                       usage(argv[0]);
+                       exit(1);
+               }
+       }
+
+       bufsize = getpagesize();
+
+       sa.sa_sigaction = sighandler;
+       sigemptyset(&sa.sa_mask);
+       sa.sa_flags = SA_SIGINFO;
+       if (sigaction(SIGSEGV, &sa, NULL) == -1
+           || sigaction(SIGBUS, &sa, NULL) == -1
+           || sigaction(SIGILL, &sa, NULL) == -1) {
+               perror("sigaction");
+               exit(1);
+       }
+
+       rc |= test_harness(test_alignment_handler_vsx_206,
+                          "test_alignment_handler_vsx_206");
+       rc |= test_harness(test_alignment_handler_vsx_207,
+                          "test_alignment_handler_vsx_207");
+       rc |= test_harness(test_alignment_handler_vsx_300,
+                          "test_alignment_handler_vsx_300");
+       rc |= test_harness(test_alignment_handler_integer,
+                          "test_alignment_handler_integer");
+       rc |= test_harness(test_alignment_handler_vmx,
+                          "test_alignment_handler_vmx");
+       rc |= test_harness(test_alignment_handler_fp,
+                          "test_alignment_handler_fp");
+       return rc;
+}
index 8d084a2d6e7489c20347f327de2816920f4a25c1..7a0a462a22726361587567e724abf7306de6c90f 100644 (file)
@@ -7,17 +7,34 @@
 #include <stdlib.h>
 #include <sys/mman.h>
 #include <time.h>
+#include <getopt.h>
 
 #include "utils.h"
 
 #define ITERATIONS 5000000
 
-#define MEMSIZE (128 * 1024 * 1024)
+#define MEMSIZE (1UL << 27)
+#define PAGE_SIZE (1UL << 16)
+#define CHUNK_COUNT (MEMSIZE/PAGE_SIZE)
+
+static int pg_fault;
+static int iterations = ITERATIONS;
+
+static struct option options[] = {
+       { "pgfault", no_argument, &pg_fault, 1 },
+       { "iterations", required_argument, 0, 'i' },
+       { 0, },
+};
+
+static void usage(void)
+{
+       printf("mmap_bench <--pgfault> <--iterations count>\n");
+}
 
 int test_mmap(void)
 {
        struct timespec ts_start, ts_end;
-       unsigned long i = ITERATIONS;
+       unsigned long i = iterations;
 
        clock_gettime(CLOCK_MONOTONIC, &ts_start);
 
@@ -25,6 +42,11 @@ int test_mmap(void)
                char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE,
                               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
                FAIL_IF(c == MAP_FAILED);
+               if (pg_fault) {
+                       int count;
+                       for (count = 0; count < CHUNK_COUNT; count++)
+                               c[count << 16] = 'c';
+               }
                munmap(c, MEMSIZE);
        }
 
@@ -35,7 +57,32 @@ int test_mmap(void)
        return 0;
 }
 
-int main(void)
+int main(int argc, char *argv[])
 {
+       signed char c;
+       while (1) {
+               int option_index = 0;
+
+               c = getopt_long(argc, argv, "", options, &option_index);
+
+               if (c == -1)
+                       break;
+
+               switch (c) {
+               case 0:
+                       if (options[option_index].flag != 0)
+                               break;
+
+                       usage();
+                       exit(1);
+                       break;
+               case 'i':
+                       iterations = atoi(optarg);
+                       break;
+               default:
+                       usage();
+                       exit(1);
+               }
+       }
        return test_harness(test_mmap, "mmap_bench");
 }
index e715a3f2fbf487e631b5fbe7c312f53d25d89e70..7d7c42ed6de904dc695a8762379da3b458a05a16 100644 (file)
@@ -1,4 +1,5 @@
 hugetlb_vs_thp_test
 subpage_prot
 tempfile
-prot_sao
\ No newline at end of file
+prot_sao
+segv_errors
\ No newline at end of file
index bf315bcbe66393dbf32d58c48e79ff2338bcbe5c..8ebbe96d80a8452575fb234b6eba23a19e2c3a4e 100644 (file)
@@ -2,7 +2,7 @@
 noarg:
        $(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
 TEST_GEN_FILES := tempfile
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/mm/segv_errors.c b/tools/testing/selftests/powerpc/mm/segv_errors.c
new file mode 100644 (file)
index 0000000..06ae76e
--- /dev/null
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2017 John Sperbeck
+ *
+ * Test that an access to a mapped but inaccessible area causes a SEGV and
+ * reports si_code == SEGV_ACCERR.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <ucontext.h>
+
+#include "utils.h"
+
+static bool faulted;
+static int si_code;
+
+static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+       ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+       struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+       faulted = true;
+       si_code = info->si_code;
+       regs->nip += 4;
+}
+
+int test_segv_errors(void)
+{
+       struct sigaction act = {
+               .sa_sigaction = segv_handler,
+               .sa_flags = SA_SIGINFO,
+       };
+       char c, *p = NULL;
+
+       p = mmap(NULL, getpagesize(), 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+       FAIL_IF(p == MAP_FAILED);
+
+       FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+       faulted = false;
+       si_code = 0;
+
+       /*
+        * We just need a compiler barrier, but mb() works and has the nice
+        * property of being easy to spot in the disassembly.
+        */
+       mb();
+       c = *p;
+       mb();
+
+       FAIL_IF(!faulted);
+       FAIL_IF(si_code != SEGV_ACCERR);
+
+       faulted = false;
+       si_code = 0;
+
+       mb();
+       *p = c;
+       mb();
+
+       FAIL_IF(!faulted);
+       FAIL_IF(si_code != SEGV_ACCERR);
+
+       return 0;
+}
+
+int main(void)
+{
+       return test_harness(test_segv_errors, "segv_errors");
+}
index 0df3c23b788828734bf04d44ee3898e50cf0c87c..277dade1b382f011c2915c6c2393da7948c74196 100644 (file)
@@ -79,8 +79,8 @@ trans:
                : [res] "=r" (result), [texasr] "=r" (texasr)
                : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt),
                [sprn_texasr] "i"  (SPRN_TEXASR)
-               : "memory", "r0", "r1", "r2", "r3", "r4",
-               "r8", "r9", "r10", "r11"
+               : "memory", "r0", "r1", "r3", "r4",
+               "r7", "r8", "r9", "r10", "r11"
                );
 
        if (result) {
index 94e57cb897698a4612aade2c395d866a485242a0..51427a2465f69ebb7cf806edc95fc5976e89af66 100644 (file)
@@ -76,8 +76,7 @@ trans:
                : [tfhar] "=r" (tfhar), [res] "=r" (result),
                [texasr] "=r" (texasr), [cptr1] "=r" (cptr1)
                : [sprn_texasr] "i"  (SPRN_TEXASR)
-               : "memory", "r0", "r1", "r2", "r3", "r4",
-               "r8", "r9", "r10", "r11", "r31"
+               : "memory", "r0", "r8", "r31"
                );
 
        /* There are 2 32bit instructions before tbegin. */
index b4081e2b22d5021dcaa442a6612604157c95b130..17c23cabac3ea3317ccd87df548146a1242ac0a2 100644 (file)
@@ -67,7 +67,7 @@ trans:
                : [res] "=r" (result), [texasr] "=r" (texasr)
                : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt),
                [sprn_texasr] "i"  (SPRN_TEXASR), [cptr1] "r" (&cptr[1])
-               : "memory", "r0", "r1", "r2", "r3", "r4",
+               : "memory", "r0", "r1", "r3", "r4",
                "r7", "r8", "r9", "r10", "r11"
                );
 
index 241a4a4ee0e4f43dc447eb98887e07b9c2868e07..bb90d4b79524eff3c50d9037814749a44cc8061c 100644 (file)
@@ -13,3 +13,4 @@ tm-signal-context-chk-vmx
 tm-signal-context-chk-vsx
 tm-vmx-unavail
 tm-unavailable
+tm-trap
index 8ed6f8c5723075d2dbc56e8fa058186fcc319ece..a23453943ad2b95538571015c066eaa2c2deb711 100644 (file)
@@ -3,7 +3,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
        tm-signal-context-chk-vmx tm-signal-context-chk-vsx
 
 TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
-       tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable \
+       tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
        $(SIGNAL_CONTEXT_CHK_TESTS)
 
 include ../../lib.mk
@@ -18,6 +18,7 @@ $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
 $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
 $(OUTPUT)/tm-resched-dscr: ../pmu/lib.o
 $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
+$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
 
 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
new file mode 100644 (file)
index 0000000..5d92c23
--- /dev/null
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2017, Gustavo Romero, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Check if thread endianness is flipped inadvertently to BE on trap
+ * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after
+ * load_fp and load_vec overflowed).
+ *
+ * The issue can be checked on LE machines simply by zeroing load_fp
+ * and load_vec and then causing a trap in TM. Since the endianness
+ * changes to BE on return from the signal handler, 'nop' is
+ * thread as an illegal instruction in following sequence:
+ *     tbegin.
+ *     beq 1f
+ *     trap
+ *     tend.
+ * 1:  nop
+ *
+ * However, although the issue is also present on BE machines, it's a
+ * bit trickier to check it on BE machines because MSR.LE bit is set
+ * to zero which determines a BE endianness that is the native
+ * endianness on BE machines, so nothing notably critical happens,
+ * i.e. no illegal instruction is observed immediately after returning
+ * from the signal handler (as it happens on LE machines). Thus to test
+ * it on BE machines LE endianness is forced after a first trap and then
+ * the endianness is verified on subsequent traps to determine if the
+ * endianness "flipped back" to the native endianness (BE).
+ */
+
+#define _GNU_SOURCE
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <htmintrin.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include "tm.h"
+#include "utils.h"
+
+#define pr_error(error_code, format, ...) \
+       error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__)
+
+#define MSR_LE 1UL
+#define LE     1UL
+
+pthread_t t0_ping;
+pthread_t t1_pong;
+
+int exit_from_pong;
+
+int trap_event;
+int le;
+
+bool success;
+
+void trap_signal_handler(int signo, siginfo_t *si, void *uc)
+{
+       ucontext_t *ucp = uc;
+       uint64_t thread_endianness;
+
+       /* Get thread endianness: extract bit LE from MSR */
+       thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
+
+       /***
+        * Little-Endian Machine
+        */
+
+       if (le) {
+               /* First trap event */
+               if (trap_event == 0) {
+                       /* Do nothing. Since it is returning from this trap
+                        * event that endianness is flipped by the bug, so just
+                        * let the process return from the signal handler and
+                        * check on the second trap event if endianness is
+                        * flipped or not.
+                        */
+               }
+               /* Second trap event */
+               else if (trap_event == 1) {
+                       /*
+                        * Since trap was caught in TM on first trap event, if
+                        * endianness was still LE (not flipped inadvertently)
+                        * after returning from the signal handler instruction
+                        * (1) is executed (basically a 'nop'), as it's located
+                        * at address of tbegin. +4 (rollback addr). As (1) on
+                        * LE endianness does in effect nothing, instruction (2)
+                        * is then executed again as 'trap', generating a second
+                        * trap event (note that in that case 'trap' is caught
+                        * not in transacional mode). On te other hand, if after
+                        * the return from the signal handler the endianness in-
+                        * advertently flipped, instruction (1) is tread as a
+                        * branch instruction, i.e. b .+8, hence instruction (3)
+                        * and (4) are executed (tbegin.; trap;) and we get sim-
+                        * ilaly on the trap signal handler, but now in TM mode.
+                        * Either way, it's now possible to check the MSR LE bit
+                        * once in the trap handler to verify if endianness was
+                        * flipped or not after the return from the second trap
+                        * event. If endianness is flipped, the bug is present.
+                        * Finally, getting a trap in TM mode or not is just
+                        * worth noting because it affects the math to determine
+                        * the offset added to the NIP on return: the NIP for a
+                        * trap caught in TM is the rollback address, i.e. the
+                        * next instruction after 'tbegin.', whilst the NIP for
+                        * a trap caught in non-transactional mode is the very
+                        * same address of the 'trap' instruction that generated
+                        * the trap event.
+                        */
+
+                       if (thread_endianness == LE) {
+                               /* Go to 'success', i.e. instruction (6) */
+                               ucp->uc_mcontext.gp_regs[PT_NIP] += 16;
+                       } else {
+                               /*
+                                * Thread endianness is BE, so it flipped
+                                * inadvertently. Thus we flip back to LE and
+                                * set NIP to go to 'failure', instruction (5).
+                                */
+                               ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+                               ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+                       }
+               }
+       }
+
+       /***
+        * Big-Endian Machine
+        */
+
+       else {
+               /* First trap event */
+               if (trap_event == 0) {
+                       /*
+                        * Force thread endianness to be LE. Instructions (1),
+                        * (3), and (4) will be executed, generating a second
+                        * trap in TM mode.
+                        */
+                       ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+               }
+               /* Second trap event */
+               else if (trap_event == 1) {
+                       /*
+                        * Do nothing. If bug is present on return from this
+                        * second trap event endianness will flip back "automat-
+                        * ically" to BE, otherwise thread endianness will
+                        * continue to be LE, just as it was set above.
+                        */
+               }
+               /* A third trap event */
+               else {
+                       /*
+                        * Once here it means that after returning from the sec-
+                        * ond trap event instruction (4) (trap) was executed
+                        * as LE, generating a third trap event. In that case
+                        * endianness is still LE as set on return from the
+                        * first trap event, hence no bug. Otherwise, bug
+                        * flipped back to BE on return from the second trap
+                        * event and instruction (4) was executed as 'tdi' (so
+                        * basically a 'nop') and branch to 'failure' in
+                        * instruction (5) was taken to indicate failure and we
+                        * never get here.
+                        */
+
+                       /*
+                        * Flip back to BE and go to instruction (6), i.e. go to
+                        * 'success'.
+                        */
+                       ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL;
+                       ucp->uc_mcontext.gp_regs[PT_NIP] += 8;
+               }
+       }
+
+       trap_event++;
+}
+
+void usr1_signal_handler(int signo, siginfo_t *si, void *not_used)
+{
+       /* Got a USR1 signal from ping(), so just tell pong() to exit */
+       exit_from_pong = 1;
+}
+
+void *ping(void *not_used)
+{
+       uint64_t i;
+
+       trap_event = 0;
+
+       /*
+        * Wait an amount of context switches so load_fp and load_vec overflows
+        * and MSR_[FP|VEC|V] is 0.
+        */
+       for (i = 0; i < 1024*1024*512; i++)
+               ;
+
+       asm goto(
+               /*
+                * [NA] means "Native Endianness", i.e. it tells how a
+                * instruction is executed on machine's native endianness (in
+                * other words, native endianness matches kernel endianness).
+                * [OP] means "Opposite Endianness", i.e. on a BE machine, it
+                * tells how a instruction is executed as a LE instruction; con-
+                * versely, on a LE machine, it tells how a instruction is
+                * executed as a BE instruction. When [NA] is omitted, it means
+                * that the native interpretation of a given instruction is not
+                * relevant for the test. Likewise when [OP] is omitted.
+                */
+
+               " tbegin.        ;" /* (0) tbegin. [NA]                    */
+               " tdi  0, 0, 0x48;" /* (1) nop     [NA]; b (3) [OP]        */
+               " trap           ;" /* (2) trap    [NA]                    */
+               ".long 0x1D05007C;" /* (3) tbegin. [OP]                    */
+               ".long 0x0800E07F;" /* (4) trap    [OP]; nop   [NA]        */
+               " b %l[failure]  ;" /* (5) b [NA]; MSR.LE flipped (bug)    */
+               " b %l[success]  ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/
+
+               : : : : failure, success);
+
+failure:
+       success = false;
+       goto exit_from_ping;
+
+success:
+       success = true;
+
+exit_from_ping:
+       /* Tell pong() to exit before leaving */
+       pthread_kill(t1_pong, SIGUSR1);
+       return NULL;
+}
+
+void *pong(void *not_used)
+{
+       while (!exit_from_pong)
+               /*
+                * Induce context switches on ping() thread
+                * until ping() finishes its job and signs
+                * to exit from this loop.
+                */
+               sched_yield();
+
+       return NULL;
+}
+
+int tm_trap_test(void)
+{
+       uint16_t k = 1;
+
+       int rc;
+
+       pthread_attr_t attr;
+       cpu_set_t cpuset;
+
+       struct sigaction trap_sa;
+
+       trap_sa.sa_flags = SA_SIGINFO;
+       trap_sa.sa_sigaction = trap_signal_handler;
+       sigaction(SIGTRAP, &trap_sa, NULL);
+
+       struct sigaction usr1_sa;
+
+       usr1_sa.sa_flags = SA_SIGINFO;
+       usr1_sa.sa_sigaction = usr1_signal_handler;
+       sigaction(SIGUSR1, &usr1_sa, NULL);
+
+       /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+       CPU_ZERO(&cpuset);
+       CPU_SET(0, &cpuset);
+
+       /* Init pthread attribute */
+       rc = pthread_attr_init(&attr);
+       if (rc)
+               pr_error(rc, "pthread_attr_init()");
+
+       /*
+        * Bind thread ping() and pong() both to CPU 0 so they ping-pong and
+        * speed up context switches on ping() thread, speeding up the load_fp
+        * and load_vec overflow.
+        */
+       rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+       if (rc)
+               pr_error(rc, "pthread_attr_setaffinity()");
+
+       /* Figure out the machine endianness */
+       le = (int) *(uint8_t *)&k;
+
+       printf("%s machine detected. Checking if endianness flips %s",
+               le ? "Little-Endian" : "Big-Endian",
+               "inadvertently on trap in TM... ");
+
+       rc = fflush(0);
+       if (rc)
+               pr_error(rc, "fflush()");
+
+       /* Launch ping() */
+       rc = pthread_create(&t0_ping, &attr, ping, NULL);
+       if (rc)
+               pr_error(rc, "pthread_create()");
+
+       exit_from_pong = 0;
+
+       /* Launch pong() */
+       rc = pthread_create(&t1_pong, &attr, pong, NULL);
+       if (rc)
+               pr_error(rc, "pthread_create()");
+
+       rc = pthread_join(t0_ping, NULL);
+       if (rc)
+               pr_error(rc, "pthread_join()");
+
+       rc = pthread_join(t1_pong, NULL);
+       if (rc)
+               pr_error(rc, "pthread_join()");
+
+       if (success) {
+               printf("no.\n"); /* no, endianness did not flip inadvertently */
+               return EXIT_SUCCESS;
+       }
+
+       printf("yes!\n"); /* yes, endianness did flip inadvertently */
+       return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+       return test_harness(tm_trap_test, "tm_trap_test");
+}
index 96c37f84ce5436bae2b6b509099759bdaa5b8323..e6a0fad2bfd019beb398e9c024c4ccc584f8e4d7 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #define _GNU_SOURCE
+#include <error.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #define VSX_UNA_EXCEPTION      2
 
 #define NUM_EXCEPTIONS         3
+#define err_at_line(status, errnum, format, ...) \
+       error_at_line(status, errnum,  __FILE__, __LINE__, format ##__VA_ARGS__)
+
+#define pr_warn(code, format, ...) err_at_line(0, code, format, ##__VA_ARGS__)
+#define pr_err(code, format, ...) err_at_line(1, code, format, ##__VA_ARGS__)
 
 struct Flags {
        int touch_fp;
@@ -303,10 +309,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
         * checking if the failure cause is the one we expect.
         */
        do {
+               int rc;
+
                /* Bind 'ping' to CPU 0, as specified in 'attr'. */
-               pthread_create(&t0, attr, ping, (void *) &flags);
-               pthread_setname_np(t0, "ping");
-               pthread_join(t0, &ret_value);
+               rc = pthread_create(&t0, attr, ping, (void *) &flags);
+               if (rc)
+                       pr_err(rc, "pthread_create()");
+               rc = pthread_setname_np(t0, "ping");
+               if (rc)
+                       pr_warn(rc, "pthread_setname_np");
+               rc = pthread_join(t0, &ret_value);
+               if (rc)
+                       pr_err(rc, "pthread_join");
+
                retries--;
        } while (ret_value != NULL && retries);
 
@@ -320,7 +335,7 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
 
 int main(int argc, char **argv)
 {
-       int exception; /* FP = 0, VEC = 1, VSX = 2 */
+       int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
        pthread_t t1;
        pthread_attr_t attr;
        cpu_set_t cpuset;
@@ -330,13 +345,23 @@ int main(int argc, char **argv)
        CPU_SET(0, &cpuset);
 
        /* Init pthread attribute. */
-       pthread_attr_init(&attr);
+       rc = pthread_attr_init(&attr);
+       if (rc)
+               pr_err(rc, "pthread_attr_init()");
 
        /* Set CPU 0 mask into the pthread attribute. */
-       pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
-
-       pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
-       pthread_setname_np(t1, "pong"); /* Name it for systemtap convenience */
+       rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+       if (rc)
+               pr_err(rc, "pthread_attr_setaffinity_np()");
+
+       rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
+       if (rc)
+               pr_err(rc, "pthread_create()");
+
+       /* Name it for systemtap convenience */
+       rc = pthread_setname_np(t1, "pong");
+       if (rc)
+               pr_warn(rc, "pthread_create()");
 
        flags.result = 0;