0 = don't trust, the image may be different (default)
1 = trust that the image will not change.
Users: https://github.com/ibm-capi/libcxl
+
+What: /sys/class/cxl/<card>/psl_timebase_synced
+Date: March 2016
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Returns 1 if the psl timebase register is synchronized
+ with the core timebase register, 0 otherwise.
+Users: https://github.com/ibm-capi/libcxl
</itemizedlist>
</para>
<para>
- The function returns 1 when the fixup was successful,
- otherwise 0. The return value is used to update the
+ The function returns true when the fixup was successful,
+ otherwise false. The return value is used to update the
statistics.
</para>
<para>
</itemizedlist>
</para>
<para>
- The function returns 1 when the fixup was successful,
- otherwise 0. The return value is used to update the
+ The function returns true when the fixup was successful,
+ otherwise false. The return value is used to update the
statistics.
</para>
<para>
statically initialized object or not. In case it is it calls
debug_object_init() and debug_object_activate() to make the
object known to the tracker and marked active. In this case
- the function should return 0 because this is not a real fixup.
+ the function should return false because this is not a real
+ fixup.
</para>
</sect1>
</itemizedlist>
</para>
<para>
- The function returns 1 when the fixup was successful,
- otherwise 0. The return value is used to update the
+ The function returns true when the fixup was successful,
+ otherwise false. The return value is used to update the
statistics.
</para>
</sect1>
</itemizedlist>
</para>
<para>
- The function returns 1 when the fixup was successful,
- otherwise 0. The return value is used to update the
+ The function returns true when the fixup was successful,
+ otherwise false. The return value is used to update the
statistics.
</para>
</sect1>
debug bucket.
</para>
<para>
- The function returns 1 when the fixup was successful,
- otherwise 0. The return value is used to update the
+ The function returns true when the fixup was successful,
+ otherwise false. The return value is used to update the
statistics.
</para>
<para>
case. The fixup function should check if this is a legitimate
case of a statically initialized object or not. In this case only
debug_object_init() should be called to make the object known to
- the tracker. Then the function should return 0 because this is not
+ the tracker. Then the function should return false because this
+ is not
a real fixup.
</para>
</sect1>
clocks = <&clk32k>;
};
+SHDWC SAMA5D2-Compatible Shutdown Controller
+
+1) shdwc node
+
+required properties:
+- compatible: should be "atmel,sama5d2-shdwc".
+- reg: should contain registers location and length
+- clocks: phandle to input clock.
+- #address-cells: should be one. The cell is the wake-up input index.
+- #size-cells: should be zero.
+
+optional properties:
+
+- debounce-delay-us: minimum wake-up inputs debouncer period in
+ microseconds. It's usually a board-related property.
+- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
+
+The node contains child nodes for each wake-up input that the platform uses.
+
+2) input nodes
+
+Wake-up input nodes are usually described in the "board" part of the Device
+Tree. Note also that input 0 is linked to the wake-up pin and is frequently
+used.
+
+Required properties:
+- reg: should contain the wake-up input index [0 - 15].
+
+Optional properties:
+- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
+ by the child, forces the wake-up of the core power supply on a high level.
+ The default is to be active low.
+
+Example:
+
+On the SoC side:
+ shdwc@f8048010 {
+ compatible = "atmel,sama5d2-shdwc";
+ reg = <0xf8048010 0x10>;
+ clocks = <&clk32k>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ atmel,wakeup-rtc-timer;
+ };
+
+On the board side:
+ shdwc@f8048010 {
+ debounce-delay-us = <976>;
+
+ input@0 {
+ reg = <0>;
+ };
+
+ input@1 {
+ reg = <1>;
+ atmel,wakeup-active-high;
+ };
+ };
+
Special Function Registers (SFR)
Special Function Registers (SFR) manage specific aspects of the integrated
"arm,cci-400-pmu,r0"
"arm,cci-400-pmu,r1"
"arm,cci-400-pmu" - DEPRECATED, permitted only where OS has
- secure acces to CCI registers
+ secure access to CCI registers
"arm,cci-500-pmu,r0"
"arm,cci-550-pmu,r0"
- reg:
- prefetch-instr : Instruction prefetch. Value: <0> (forcibly disable),
<1> (forcibly enable), property absent (retain settings set by
firmware)
+- arm,dynamic-clock-gating : L2 dynamic clock gating. Value: <0> (forcibly
+ disable), <1> (forcibly enable), property absent (OS specific behavior,
+ preferrably retain firmware settings)
+- arm,standby-mode: L2 standby mode enable. Value <0> (forcibly disable),
+ <1> (forcibly enable), property absent (OS specific behavior,
+ preferrably retain firmware settings)
Example:
Consumer:
========
See Documentation/devicetree/bindings/interrupt-controller/interrupts.txt and
-Documentation/devicetree/bindings/arm/gic.txt for further details.
+Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt for
+further details.
An interrupt consumer on an SoC using crossbar will use:
interrupts = <GIC_SPI request_number interrupt_level>
misc node required properties:
- compatible Should be "st,spear1340-misc", "syscon".
-- reg: Address range of misc space upto 8K
+- reg: Address range of misc space up to 8K
see binding for arm/scu.txt
interrupt-controller:
- see binding for arm/gic.txt
+ see binding for interrupt-controller/arm,gic.txt
timer:
see binding for arm/twd.txt
--- /dev/null
+Tegra124 SoC SATA AHCI controller
+
+Required properties :
+- compatible : For Tegra124, must contain "nvidia,tegra124-ahci". Otherwise,
+ must contain '"nvidia,<chip>-ahci", "nvidia,tegra124-ahci"', where <chip>
+ is tegra132.
+- reg : Should contain 2 entries:
+ - AHCI register set (SATA BAR5)
+ - SATA register set
+- interrupts : Defines the interrupt used by SATA
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - sata
+ - sata-oob
+ - cml1
+ - pll_e
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - sata
+ - sata-oob
+ - sata-cold
+- phys : Must contain an entry for each entry in phy-names.
+ See ../phy/phy-bindings.txt for details.
+- phy-names : Must include the following entries:
+ - sata-phy : XUSB PADCTL SATA PHY
+- hvdd-supply : Defines the SATA HVDD regulator
+- vddio-supply : Defines the SATA VDDIO regulator
+- avdd-supply : Defines the SATA AVDD regulator
+- target-5v-supply : Defines the SATA 5V power regulator
+- target-12v-supply : Defines the SATA 12V power regulator
+++ /dev/null
-Tegra124 SoC SATA AHCI controller
-
-Required properties :
-- compatible : For Tegra124, must contain "nvidia,tegra124-ahci". Otherwise,
- must contain '"nvidia,<chip>-ahci", "nvidia,tegra124-ahci"', where <chip>
- is tegra132.
-- reg : Should contain 2 entries:
- - AHCI register set (SATA BAR5)
- - SATA register set
-- interrupts : Defines the interrupt used by SATA
-- clocks : Must contain an entry for each entry in clock-names.
- See ../clocks/clock-bindings.txt for details.
-- clock-names : Must include the following entries:
- - sata
- - sata-oob
- - cml1
- - pll_e
-- resets : Must contain an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
-- reset-names : Must include the following entries:
- - sata
- - sata-oob
- - sata-cold
-- phys : Must contain an entry for each entry in phy-names.
- See ../phy/phy-bindings.txt for details.
-- phy-names : Must include the following entries:
- - sata-phy : XUSB PADCTL SATA PHY
-- hvdd-supply : Defines the SATA HVDD regulator
-- vddio-supply : Defines the SATA VDDIO regulator
-- avdd-supply : Defines the SATA AVDD regulator
-- target-5v-supply : Defines the SATA 5V power regulator
-- target-12v-supply : Defines the SATA 12V power regulator
Example:
-clock@0,70110000 {
+clock@70110000 {
compatible = "nvidia,tegra124-dfll";
reg = <0 0x70110000 0 0x100>, /* DFLL control */
<0 0x70110000 0 0x100>, /* I2C output control */
Optional Properties:
- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing pll rates are not changable, due to the missing pll lock status.
+ If missing pll rates are not changeable, due to the missing pll lock status.
Each clock is assigned an identifier and client nodes can use this identifier
to specify the clock which they consume. All available clocks are defined as
Optional Properties:
- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing pll rates are not changable, due to the missing pll lock status.
+ If missing pll rates are not changeable, due to the missing pll lock status.
Each clock is assigned an identifier and client nodes can use this identifier
to specify the clock which they consume. All available clocks are defined as
};
This binding uses the common clock binding[1].
-Each subnode should use the binding discribe in [2]..[7]
+Each subnode should use the binding described in [2]..[7]
[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
[2] Documentation/devicetree/bindings/clock/st,clkgen-divmux.txt
--- /dev/null
+Tegra124 CPU frequency scaling driver bindings
+----------------------------------------------
+
+Both required and optional properties listed below must be defined
+under node /cpus/cpu@0.
+
+Required properties:
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - cpu_g: Clock mux for the fast CPU cluster.
+ - cpu_lp: Clock mux for the low-power CPU cluster.
+ - pll_x: Fast PLL clocksource.
+ - pll_p: Auxiliary PLL used during fast PLL rate changes.
+ - dfll: Fast DFLL clocksource that also automatically scales CPU voltage.
+- vdd-cpu-supply: Regulator for CPU voltage
+
+Optional properties:
+- clock-latency: Specify the possible maximum transition latency for clock,
+ in unit of nanoseconds.
+
+Example:
+--------
+cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+
+ clocks = <&tegra_car TEGRA124_CLK_CCLK_G>,
+ <&tegra_car TEGRA124_CLK_CCLK_LP>,
+ <&tegra_car TEGRA124_CLK_PLL_X>,
+ <&tegra_car TEGRA124_CLK_PLL_P>,
+ <&dfll>;
+ clock-names = "cpu_g", "cpu_lp", "pll_x", "pll_p", "dfll";
+ clock-latency = <300000>;
+ vdd-cpu-supply: <&vdd_cpu>;
+ };
+
+ <...>
+};
+++ /dev/null
-Tegra124 CPU frequency scaling driver bindings
-----------------------------------------------
-
-Both required and optional properties listed below must be defined
-under node /cpus/cpu@0.
-
-Required properties:
-- clocks: Must contain an entry for each entry in clock-names.
- See ../clocks/clock-bindings.txt for details.
-- clock-names: Must include the following entries:
- - cpu_g: Clock mux for the fast CPU cluster.
- - cpu_lp: Clock mux for the low-power CPU cluster.
- - pll_x: Fast PLL clocksource.
- - pll_p: Auxiliary PLL used during fast PLL rate changes.
- - dfll: Fast DFLL clocksource that also automatically scales CPU voltage.
-- vdd-cpu-supply: Regulator for CPU voltage
-
-Optional properties:
-- clock-latency: Specify the possible maximum transition latency for clock,
- in unit of nanoseconds.
-
-Example:
---------
-cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0>;
-
- clocks = <&tegra_car TEGRA124_CLK_CCLK_G>,
- <&tegra_car TEGRA124_CLK_CCLK_LP>,
- <&tegra_car TEGRA124_CLK_PLL_X>,
- <&tegra_car TEGRA124_CLK_PLL_P>,
- <&dfll>;
- clock-names = "cpu_g", "cpu_lp", "pll_x", "pll_p", "dfll";
- clock-latency = <300000>;
- vdd-cpu-supply: <&vdd_cpu>;
- };
-
- <...>
-};
endpoint node connected from mic node (reg = 0):
- remote-endpoint: specifies the endpoint in mic node. This node is required
for Exynos5433 mipi dsi. So mic can access to panel node
- thoughout this dsi node.
+ throughout this dsi node.
endpoint node connected to panel node (reg = 1):
- remote-endpoint: specifies the endpoint in panel node. This node is
required in all kinds of exynos mipi dsi to represent
--- /dev/null
+* NVIDIA Tegra APB DMA controller
+
+Required properties:
+- compatible: Should be "nvidia,<chip>-apbdma"
+- reg: Should contain DMA registers location and length. This shuld include
+ all of the per-channel registers.
+- interrupts: Should contain all of the per-channel DMA interrupts.
+- clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - dma
+- #dma-cells : Must be <1>. This dictates the length of DMA specifiers in
+ client nodes' dmas properties. The specifier represents the DMA request
+ select value for the peripheral. For more details, consult the Tegra TRM's
+ documentation of the APB DMA channel control register REQ_SEL field.
+
+Examples:
+
+apbdma: dma@6000a000 {
+ compatible = "nvidia,tegra20-apbdma";
+ reg = <0x6000a000 0x1200>;
+ interrupts = < 0 136 0x04
+ 0 137 0x04
+ 0 138 0x04
+ 0 139 0x04
+ 0 140 0x04
+ 0 141 0x04
+ 0 142 0x04
+ 0 143 0x04
+ 0 144 0x04
+ 0 145 0x04
+ 0 146 0x04
+ 0 147 0x04
+ 0 148 0x04
+ 0 149 0x04
+ 0 150 0x04
+ 0 151 0x04 >;
+ clocks = <&tegra_car 34>;
+ resets = <&tegra_car 34>;
+ reset-names = "dma";
+ #dma-cells = <1>;
+};
+++ /dev/null
-* NVIDIA Tegra APB DMA controller
-
-Required properties:
-- compatible: Should be "nvidia,<chip>-apbdma"
-- reg: Should contain DMA registers location and length. This shuld include
- all of the per-channel registers.
-- interrupts: Should contain all of the per-channel DMA interrupts.
-- clocks: Must contain one entry, for the module clock.
- See ../clocks/clock-bindings.txt for details.
-- resets : Must contain an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
-- reset-names : Must include the following entries:
- - dma
-- #dma-cells : Must be <1>. This dictates the length of DMA specifiers in
- client nodes' dmas properties. The specifier represents the DMA request
- select value for the peripheral. For more details, consult the Tegra TRM's
- documentation of the APB DMA channel control register REQ_SEL field.
-
-Examples:
-
-apbdma: dma@6000a000 {
- compatible = "nvidia,tegra20-apbdma";
- reg = <0x6000a000 0x1200>;
- interrupts = < 0 136 0x04
- 0 137 0x04
- 0 138 0x04
- 0 139 0x04
- 0 140 0x04
- 0 141 0x04
- 0 142 0x04
- 0 143 0x04
- 0 144 0x04
- 0 145 0x04
- 0 146 0x04
- 0 147 0x04
- 0 148 0x04
- 0 149 0x04
- 0 150 0x04
- 0 151 0x04 >;
- clocks = <&tegra_car 34>;
- resets = <&tegra_car 34>;
- reset-names = "dma";
- #dma-cells = <1>;
-};
- compatible: Should be "xlnx,axi-dma-1.00.a"
- #dma-cells: Should be <1>, see "dmas" property below
- reg: Should contain DMA registers location and length.
-- dma-channel child node: Should have atleast one channel and can have upto
+- dma-channel child node: Should have at least one channel and can have up to
two channels per device. This node specifies the properties of each
DMA channel (see child node properties below).
--- /dev/null
+* IBM/AMCC/APM GPIO Controller for PowerPC 4XX series and compatible SoCs
+
+All GPIOs are pin-shared with other functions. DCRs control whether a
+particular pin that has GPIO capabilities acts as a GPIO or is used for
+another purpose. GPIO outputs are separately programmable to emulate
+an open-drain driver.
+
+Required properties:
+ - compatible: must be "ibm,ppc4xx-gpio"
+ - reg: address and length of the register set for the device
+ - #gpio-cells: must be set to 2. The first cell is the pin number
+ and the second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+ - gpio-controller: marks the device node as a gpio controller.
+
+Example:
+
+GPIO0: gpio@ef600b00 {
+ compatible = "ibm,ppc4xx-gpio";
+ reg = <0xef600b00 0x00000048>;
+ #gpio-cells = <2>;
+ gpio-controller;
+};
ti,vref-delay-usecs vref supply delay in usecs, 0 for
external vref (u16).
ti,vref-mv The VREF voltage, in millivolts (u16).
- Set to 0 to use internal refernce
+ Set to 0 to use internal references
(ADS7846).
ti,keep-vref-on set to keep vref on for differential
measurements as well
- fsl,pen-debounce-ns: Pen debounce time in nanoseconds.
- fsl,pen-threshold: Pen-down threshold for the touchscreen. This is a value
between 1 and 4096. It is the ratio between the internal reference voltage
- and the measured voltage after the plate was precharged. Resistence between
+ and the measured voltage after the plate was precharged. Resistance between
plates and therefore the voltage decreases with pressure so that a smaller
value is equivalent to a higher pressure.
- fsl,settling-time-ns: Settling time in nanoseconds. The settling time is before
"mediatek,mt6577-sysirq"
"mediatek,mt2701-sysirq"
- interrupt-controller : Identifies the node as an interrupt controller
-- #interrupt-cells : Use the same format as specified by GIC in
- Documentation/devicetree/bindings/arm/gic.txt
+- #interrupt-cells : Use the same format as specified by GIC in arm,gic.txt.
- interrupt-parent: phandle of irq parent for sysirq. The parent must
use the same interrupt-cells format as GIC.
- reg: Physical base address of the intpol registers and length of memory
+++ /dev/null
-NVIDIA Legacy Interrupt Controller
-
-All Tegra SoCs contain a legacy interrupt controller that routes
-interrupts to the GIC, and also serves as a wakeup source. It is also
-referred to as "ictlr", hence the name of the binding.
-
-The HW block exposes a number of interrupt controllers, each
-implementing a set of 32 interrupts.
-
-Required properties:
-
-- compatible : should be: "nvidia,tegra<chip>-ictlr". The LIC on
- subsequent SoCs remained backwards-compatible with Tegra30, so on
- Tegra generations later than Tegra30 the compatible value should
- include "nvidia,tegra30-ictlr".
-- reg : Specifies base physical address and size of the registers.
- Each controller must be described separately (Tegra20 has 4 of them,
- whereas Tegra30 and later have 5"
-- interrupt-controller : Identifies the node as an interrupt controller.
-- #interrupt-cells : Specifies the number of cells needed to encode an
- interrupt source. The value must be 3.
-- interrupt-parent : a phandle to the GIC these interrupts are routed
- to.
-
-Notes:
-
-- Because this HW ultimately routes interrupts to the GIC, the
- interrupt specifier must be that of the GIC.
-- Only SPIs can use the ictlr as an interrupt parent. SGIs and PPIs
- are explicitly forbidden.
-
-Example:
-
- ictlr: interrupt-controller@60004000 {
- compatible = "nvidia,tegra20-ictlr", "nvidia,tegra-ictlr";
- reg = <0x60004000 64>,
- <0x60004100 64>,
- <0x60004200 64>,
- <0x60004300 64>;
- interrupt-controller;
- #interrupt-cells = <3>;
- interrupt-parent = <&intc>;
- };
--- /dev/null
+NVIDIA Legacy Interrupt Controller
+
+All Tegra SoCs contain a legacy interrupt controller that routes
+interrupts to the GIC, and also serves as a wakeup source. It is also
+referred to as "ictlr", hence the name of the binding.
+
+The HW block exposes a number of interrupt controllers, each
+implementing a set of 32 interrupts.
+
+Required properties:
+
+- compatible : should be: "nvidia,tegra<chip>-ictlr". The LIC on
+ subsequent SoCs remained backwards-compatible with Tegra30, so on
+ Tegra generations later than Tegra30 the compatible value should
+ include "nvidia,tegra30-ictlr".
+- reg : Specifies base physical address and size of the registers.
+ Each controller must be described separately (Tegra20 has 4 of them,
+ whereas Tegra30 and later have 5"
+- interrupt-controller : Identifies the node as an interrupt controller.
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value must be 3.
+- interrupt-parent : a phandle to the GIC these interrupts are routed
+ to.
+
+Notes:
+
+- Because this HW ultimately routes interrupts to the GIC, the
+ interrupt specifier must be that of the GIC.
+- Only SPIs can use the ictlr as an interrupt parent. SGIs and PPIs
+ are explicitly forbidden.
+
+Example:
+
+ ictlr: interrupt-controller@60004000 {
+ compatible = "nvidia,tegra20-ictlr", "nvidia,tegra-ictlr";
+ reg = <0x60004000 64>,
+ <0x60004100 64>,
+ <0x60004200 64>,
+ <0x60004300 64>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ interrupt-parent = <&intc>;
+ };
routes interrupts to the GIC, and also serves as a wakeup source. It
is also referred to as "WUGEN-MPU", hence the name of the binding.
-Reguired properties:
+Required properties:
- compatible : should contain at least "ti,omap4-wugen-mpu" or
"ti,omap5-wugen-mpu"
- Because this HW ultimately routes interrupts to the GIC, the
interrupt specifier must be that of the GIC.
- Only SPIs can use the WUGEN as an interrupt parent. SGIs and PPIs
- are explicitly forbiden.
+ are explicitly forbidden.
Example:
+++ /dev/null
-NVIDIA Tegra Memory Controller device tree bindings
-===================================================
-
-memory-controller node
-----------------------
-
-Required properties:
-- compatible: Should be "nvidia,tegra<chip>-mc"
-- reg: Physical base address and length of the controller's registers.
-- clocks: Must contain an entry for each entry in clock-names.
- See ../clocks/clock-bindings.txt for details.
-- clock-names: Must include the following entries:
- - mc: the module's clock input
-- interrupts: The interrupt outputs from the controller.
-- #iommu-cells: Should be 1. The single cell of the IOMMU specifier defines
- the SWGROUP of the master.
-
-This device implements an IOMMU that complies with the generic IOMMU binding.
-See ../iommu/iommu.txt for details.
-
-emc-timings subnode
--------------------
-
-The node should contain a "emc-timings" subnode for each supported RAM type (see field RAM_CODE in
-register PMC_STRAPPING_OPT_A).
-
-Required properties for "emc-timings" nodes :
-- nvidia,ram-code : Should contain the value of RAM_CODE this timing set is used for.
-
-timing subnode
---------------
-
-Each "emc-timings" node should contain a subnode for every supported EMC clock rate.
-
-Required properties for timing nodes :
-- clock-frequency : Should contain the memory clock rate in Hz.
-- nvidia,emem-configuration : Values to be written to the EMEM register block. For the Tegra124 SoC
-(see section "15.6.1 MC Registers" in the TRM), these are the registers whose values need to be
-specified, according to the board documentation:
-
- MC_EMEM_ARB_CFG
- MC_EMEM_ARB_OUTSTANDING_REQ
- MC_EMEM_ARB_TIMING_RCD
- MC_EMEM_ARB_TIMING_RP
- MC_EMEM_ARB_TIMING_RC
- MC_EMEM_ARB_TIMING_RAS
- MC_EMEM_ARB_TIMING_FAW
- MC_EMEM_ARB_TIMING_RRD
- MC_EMEM_ARB_TIMING_RAP2PRE
- MC_EMEM_ARB_TIMING_WAP2PRE
- MC_EMEM_ARB_TIMING_R2R
- MC_EMEM_ARB_TIMING_W2W
- MC_EMEM_ARB_TIMING_R2W
- MC_EMEM_ARB_TIMING_W2R
- MC_EMEM_ARB_DA_TURNS
- MC_EMEM_ARB_DA_COVERS
- MC_EMEM_ARB_MISC0
- MC_EMEM_ARB_MISC1
- MC_EMEM_ARB_RING1_THROTTLE
-
-Example SoC include file:
-
-/ {
- mc: memory-controller@0,70019000 {
- compatible = "nvidia,tegra124-mc";
- reg = <0x0 0x70019000 0x0 0x1000>;
- clocks = <&tegra_car TEGRA124_CLK_MC>;
- clock-names = "mc";
-
- interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
-
- #iommu-cells = <1>;
- };
-
- sdhci@0,700b0000 {
- compatible = "nvidia,tegra124-sdhci";
- ...
- iommus = <&mc TEGRA_SWGROUP_SDMMC1A>;
- };
-};
-
-Example board file:
-
-/ {
- memory-controller@0,70019000 {
- emc-timings-3 {
- nvidia,ram-code = <3>;
-
- timing-12750000 {
- clock-frequency = <12750000>;
-
- nvidia,emem-configuration = <
- 0x40040001 /* MC_EMEM_ARB_CFG */
- 0x8000000a /* MC_EMEM_ARB_OUTSTANDING_REQ */
- 0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
- 0x00000001 /* MC_EMEM_ARB_TIMING_RP */
- 0x00000002 /* MC_EMEM_ARB_TIMING_RC */
- 0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
- 0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
- 0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
- 0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
- 0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
- 0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
- 0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
- 0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
- 0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
- 0x06030203 /* MC_EMEM_ARB_DA_TURNS */
- 0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
- 0x77e30303 /* MC_EMEM_ARB_MISC0 */
- 0x70000f03 /* MC_EMEM_ARB_MISC1 */
- 0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
- >;
- };
- };
- };
-};
--- /dev/null
+NVIDIA Tegra124 SoC EMC (external memory controller)
+====================================================
+
+Required properties :
+- compatible : Should be "nvidia,tegra124-emc".
+- reg : physical base address and length of the controller's registers.
+- nvidia,memory-controller : phandle of the MC driver.
+
+The node should contain a "emc-timings" subnode for each supported RAM type
+(see field RAM_CODE in register PMC_STRAPPING_OPT_A), with its unit address
+being its RAM_CODE.
+
+Required properties for "emc-timings" nodes :
+- nvidia,ram-code : Should contain the value of RAM_CODE this timing set is
+used for.
+
+Each "emc-timings" node should contain a "timing" subnode for every supported
+EMC clock rate. The "timing" subnodes should have the clock rate in Hz as
+their unit address.
+
+Required properties for "timing" nodes :
+- clock-frequency : Should contain the memory clock rate in Hz.
+- The following properties contain EMC timing characterization values
+(specified in the board documentation) :
+ - nvidia,emc-auto-cal-config : EMC_AUTO_CAL_CONFIG
+ - nvidia,emc-auto-cal-config2 : EMC_AUTO_CAL_CONFIG2
+ - nvidia,emc-auto-cal-config3 : EMC_AUTO_CAL_CONFIG3
+ - nvidia,emc-auto-cal-interval : EMC_AUTO_CAL_INTERVAL
+ - nvidia,emc-bgbias-ctl0 : EMC_BGBIAS_CTL0
+ - nvidia,emc-cfg : EMC_CFG
+ - nvidia,emc-cfg-2 : EMC_CFG_2
+ - nvidia,emc-ctt-term-ctrl : EMC_CTT_TERM_CTRL
+ - nvidia,emc-mode-1 : Mode Register 1
+ - nvidia,emc-mode-2 : Mode Register 2
+ - nvidia,emc-mode-4 : Mode Register 4
+ - nvidia,emc-mode-reset : Mode Register 0
+ - nvidia,emc-mrs-wait-cnt : EMC_MRS_WAIT_CNT
+ - nvidia,emc-sel-dpd-ctrl : EMC_SEL_DPD_CTRL
+ - nvidia,emc-xm2dqspadctrl2 : EMC_XM2DQSPADCTRL2
+ - nvidia,emc-zcal-cnt-long : EMC_ZCAL_WAIT_CNT after clock change
+ - nvidia,emc-zcal-interval : EMC_ZCAL_INTERVAL
+- nvidia,emc-configuration : EMC timing characterization data. These are the
+registers (see section "15.6.2 EMC Registers" in the TRM) whose values need to
+be specified, according to the board documentation:
+
+ EMC_RC
+ EMC_RFC
+ EMC_RFC_SLR
+ EMC_RAS
+ EMC_RP
+ EMC_R2W
+ EMC_W2R
+ EMC_R2P
+ EMC_W2P
+ EMC_RD_RCD
+ EMC_WR_RCD
+ EMC_RRD
+ EMC_REXT
+ EMC_WEXT
+ EMC_WDV
+ EMC_WDV_MASK
+ EMC_QUSE
+ EMC_QUSE_WIDTH
+ EMC_IBDLY
+ EMC_EINPUT
+ EMC_EINPUT_DURATION
+ EMC_PUTERM_EXTRA
+ EMC_PUTERM_WIDTH
+ EMC_PUTERM_ADJ
+ EMC_CDB_CNTL_1
+ EMC_CDB_CNTL_2
+ EMC_CDB_CNTL_3
+ EMC_QRST
+ EMC_QSAFE
+ EMC_RDV
+ EMC_RDV_MASK
+ EMC_REFRESH
+ EMC_BURST_REFRESH_NUM
+ EMC_PRE_REFRESH_REQ_CNT
+ EMC_PDEX2WR
+ EMC_PDEX2RD
+ EMC_PCHG2PDEN
+ EMC_ACT2PDEN
+ EMC_AR2PDEN
+ EMC_RW2PDEN
+ EMC_TXSR
+ EMC_TXSRDLL
+ EMC_TCKE
+ EMC_TCKESR
+ EMC_TPD
+ EMC_TFAW
+ EMC_TRPAB
+ EMC_TCLKSTABLE
+ EMC_TCLKSTOP
+ EMC_TREFBW
+ EMC_FBIO_CFG6
+ EMC_ODT_WRITE
+ EMC_ODT_READ
+ EMC_FBIO_CFG5
+ EMC_CFG_DIG_DLL
+ EMC_CFG_DIG_DLL_PERIOD
+ EMC_DLL_XFORM_DQS0
+ EMC_DLL_XFORM_DQS1
+ EMC_DLL_XFORM_DQS2
+ EMC_DLL_XFORM_DQS3
+ EMC_DLL_XFORM_DQS4
+ EMC_DLL_XFORM_DQS5
+ EMC_DLL_XFORM_DQS6
+ EMC_DLL_XFORM_DQS7
+ EMC_DLL_XFORM_DQS8
+ EMC_DLL_XFORM_DQS9
+ EMC_DLL_XFORM_DQS10
+ EMC_DLL_XFORM_DQS11
+ EMC_DLL_XFORM_DQS12
+ EMC_DLL_XFORM_DQS13
+ EMC_DLL_XFORM_DQS14
+ EMC_DLL_XFORM_DQS15
+ EMC_DLL_XFORM_QUSE0
+ EMC_DLL_XFORM_QUSE1
+ EMC_DLL_XFORM_QUSE2
+ EMC_DLL_XFORM_QUSE3
+ EMC_DLL_XFORM_QUSE4
+ EMC_DLL_XFORM_QUSE5
+ EMC_DLL_XFORM_QUSE6
+ EMC_DLL_XFORM_QUSE7
+ EMC_DLL_XFORM_ADDR0
+ EMC_DLL_XFORM_ADDR1
+ EMC_DLL_XFORM_ADDR2
+ EMC_DLL_XFORM_ADDR3
+ EMC_DLL_XFORM_ADDR4
+ EMC_DLL_XFORM_ADDR5
+ EMC_DLL_XFORM_QUSE8
+ EMC_DLL_XFORM_QUSE9
+ EMC_DLL_XFORM_QUSE10
+ EMC_DLL_XFORM_QUSE11
+ EMC_DLL_XFORM_QUSE12
+ EMC_DLL_XFORM_QUSE13
+ EMC_DLL_XFORM_QUSE14
+ EMC_DLL_XFORM_QUSE15
+ EMC_DLI_TRIM_TXDQS0
+ EMC_DLI_TRIM_TXDQS1
+ EMC_DLI_TRIM_TXDQS2
+ EMC_DLI_TRIM_TXDQS3
+ EMC_DLI_TRIM_TXDQS4
+ EMC_DLI_TRIM_TXDQS5
+ EMC_DLI_TRIM_TXDQS6
+ EMC_DLI_TRIM_TXDQS7
+ EMC_DLI_TRIM_TXDQS8
+ EMC_DLI_TRIM_TXDQS9
+ EMC_DLI_TRIM_TXDQS10
+ EMC_DLI_TRIM_TXDQS11
+ EMC_DLI_TRIM_TXDQS12
+ EMC_DLI_TRIM_TXDQS13
+ EMC_DLI_TRIM_TXDQS14
+ EMC_DLI_TRIM_TXDQS15
+ EMC_DLL_XFORM_DQ0
+ EMC_DLL_XFORM_DQ1
+ EMC_DLL_XFORM_DQ2
+ EMC_DLL_XFORM_DQ3
+ EMC_DLL_XFORM_DQ4
+ EMC_DLL_XFORM_DQ5
+ EMC_DLL_XFORM_DQ6
+ EMC_DLL_XFORM_DQ7
+ EMC_XM2CMDPADCTRL
+ EMC_XM2CMDPADCTRL4
+ EMC_XM2CMDPADCTRL5
+ EMC_XM2DQPADCTRL2
+ EMC_XM2DQPADCTRL3
+ EMC_XM2CLKPADCTRL
+ EMC_XM2CLKPADCTRL2
+ EMC_XM2COMPPADCTRL
+ EMC_XM2VTTGENPADCTRL
+ EMC_XM2VTTGENPADCTRL2
+ EMC_XM2VTTGENPADCTRL3
+ EMC_XM2DQSPADCTRL3
+ EMC_XM2DQSPADCTRL4
+ EMC_XM2DQSPADCTRL5
+ EMC_XM2DQSPADCTRL6
+ EMC_DSR_VTTGEN_DRV
+ EMC_TXDSRVTTGEN
+ EMC_FBIO_SPARE
+ EMC_ZCAL_WAIT_CNT
+ EMC_MRS_WAIT_CNT2
+ EMC_CTT
+ EMC_CTT_DURATION
+ EMC_CFG_PIPE
+ EMC_DYN_SELF_REF_CONTROL
+ EMC_QPOP
+
+Example SoC include file:
+
+/ {
+ emc@7001b000 {
+ compatible = "nvidia,tegra124-emc";
+ reg = <0x0 0x7001b000 0x0 0x1000>;
+
+ nvidia,memory-controller = <&mc>;
+ };
+};
+
+Example board file:
+
+/ {
+ emc@7001b000 {
+ emc-timings-3 {
+ nvidia,ram-code = <3>;
+
+ timing-12750000 {
+ clock-frequency = <12750000>;
+
+ nvidia,emc-zcal-cnt-long = <0x00000042>;
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-ctt-term-ctrl = <0x00000802>;
+ nvidia,emc-cfg = <0x73240000>;
+ nvidia,emc-cfg-2 = <0x000008c5>;
+ nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+ nvidia,emc-bgbias-ctl0 = <0x00000008>;
+ nvidia,emc-auto-cal-config = <0xa1430000>;
+ nvidia,emc-auto-cal-config2 = <0x00000000>;
+ nvidia,emc-auto-cal-config3 = <0x00000000>;
+ nvidia,emc-mode-reset = <0x80001221>;
+ nvidia,emc-mode-1 = <0x80100003>;
+ nvidia,emc-mode-2 = <0x80200008>;
+ nvidia,emc-mode-4 = <0x00000000>;
+
+ nvidia,emc-configuration = <
+ 0x00000000 /* EMC_RC */
+ 0x00000003 /* EMC_RFC */
+ 0x00000000 /* EMC_RFC_SLR */
+ 0x00000000 /* EMC_RAS */
+ 0x00000000 /* EMC_RP */
+ 0x00000004 /* EMC_R2W */
+ 0x0000000a /* EMC_W2R */
+ 0x00000003 /* EMC_R2P */
+ 0x0000000b /* EMC_W2P */
+ 0x00000000 /* EMC_RD_RCD */
+ 0x00000000 /* EMC_WR_RCD */
+ 0x00000003 /* EMC_RRD */
+ 0x00000003 /* EMC_REXT */
+ 0x00000000 /* EMC_WEXT */
+ 0x00000006 /* EMC_WDV */
+ 0x00000006 /* EMC_WDV_MASK */
+ 0x00000006 /* EMC_QUSE */
+ 0x00000002 /* EMC_QUSE_WIDTH */
+ 0x00000000 /* EMC_IBDLY */
+ 0x00000005 /* EMC_EINPUT */
+ 0x00000005 /* EMC_EINPUT_DURATION */
+ 0x00010000 /* EMC_PUTERM_EXTRA */
+ 0x00000003 /* EMC_PUTERM_WIDTH */
+ 0x00000000 /* EMC_PUTERM_ADJ */
+ 0x00000000 /* EMC_CDB_CNTL_1 */
+ 0x00000000 /* EMC_CDB_CNTL_2 */
+ 0x00000000 /* EMC_CDB_CNTL_3 */
+ 0x00000004 /* EMC_QRST */
+ 0x0000000c /* EMC_QSAFE */
+ 0x0000000d /* EMC_RDV */
+ 0x0000000f /* EMC_RDV_MASK */
+ 0x00000060 /* EMC_REFRESH */
+ 0x00000000 /* EMC_BURST_REFRESH_NUM */
+ 0x00000018 /* EMC_PRE_REFRESH_REQ_CNT */
+ 0x00000002 /* EMC_PDEX2WR */
+ 0x00000002 /* EMC_PDEX2RD */
+ 0x00000001 /* EMC_PCHG2PDEN */
+ 0x00000000 /* EMC_ACT2PDEN */
+ 0x00000007 /* EMC_AR2PDEN */
+ 0x0000000f /* EMC_RW2PDEN */
+ 0x00000005 /* EMC_TXSR */
+ 0x00000005 /* EMC_TXSRDLL */
+ 0x00000004 /* EMC_TCKE */
+ 0x00000005 /* EMC_TCKESR */
+ 0x00000004 /* EMC_TPD */
+ 0x00000000 /* EMC_TFAW */
+ 0x00000000 /* EMC_TRPAB */
+ 0x00000005 /* EMC_TCLKSTABLE */
+ 0x00000005 /* EMC_TCLKSTOP */
+ 0x00000064 /* EMC_TREFBW */
+ 0x00000000 /* EMC_FBIO_CFG6 */
+ 0x00000000 /* EMC_ODT_WRITE */
+ 0x00000000 /* EMC_ODT_READ */
+ 0x106aa298 /* EMC_FBIO_CFG5 */
+ 0x002c00a0 /* EMC_CFG_DIG_DLL */
+ 0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+ 0x00064000 /* EMC_DLL_XFORM_DQS0 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS1 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS2 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS3 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS4 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS5 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS6 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS7 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS8 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS9 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS10 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS11 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS12 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS13 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS14 */
+ 0x00064000 /* EMC_DLL_XFORM_DQS15 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+ 0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+ 0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+ 0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+ 0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+ 0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+ 0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+ 0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+ 0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+ 0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+ 0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+ 0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+ 0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+ 0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+ 0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+ 0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+ 0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+ 0x10000280 /* EMC_XM2CMDPADCTRL */
+ 0x00000000 /* EMC_XM2CMDPADCTRL4 */
+ 0x00111111 /* EMC_XM2CMDPADCTRL5 */
+ 0x00000000 /* EMC_XM2DQPADCTRL2 */
+ 0x00000000 /* EMC_XM2DQPADCTRL3 */
+ 0x77ffc081 /* EMC_XM2CLKPADCTRL */
+ 0x00000e0e /* EMC_XM2CLKPADCTRL2 */
+ 0x81f1f108 /* EMC_XM2COMPPADCTRL */
+ 0x07070004 /* EMC_XM2VTTGENPADCTRL */
+ 0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+ 0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+ 0x51451400 /* EMC_XM2DQSPADCTRL3 */
+ 0x00514514 /* EMC_XM2DQSPADCTRL4 */
+ 0x00514514 /* EMC_XM2DQSPADCTRL5 */
+ 0x51451400 /* EMC_XM2DQSPADCTRL6 */
+ 0x0000003f /* EMC_DSR_VTTGEN_DRV */
+ 0x00000007 /* EMC_TXDSRVTTGEN */
+ 0x00000000 /* EMC_FBIO_SPARE */
+ 0x00000042 /* EMC_ZCAL_WAIT_CNT */
+ 0x000e000e /* EMC_MRS_WAIT_CNT2 */
+ 0x00000000 /* EMC_CTT */
+ 0x00000003 /* EMC_CTT_DURATION */
+ 0x0000f2f3 /* EMC_CFG_PIPE */
+ 0x800001c5 /* EMC_DYN_SELF_REF_CONTROL */
+ 0x0000000a /* EMC_QPOP */
+ >;
+ };
+ };
+ };
+};
--- /dev/null
+NVIDIA Tegra Memory Controller device tree bindings
+===================================================
+
+memory-controller node
+----------------------
+
+Required properties:
+- compatible: Should be "nvidia,tegra<chip>-mc"
+- reg: Physical base address and length of the controller's registers.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - mc: the module's clock input
+- interrupts: The interrupt outputs from the controller.
+- #iommu-cells: Should be 1. The single cell of the IOMMU specifier defines
+ the SWGROUP of the master.
+
+This device implements an IOMMU that complies with the generic IOMMU binding.
+See ../iommu/iommu.txt for details.
+
+emc-timings subnode
+-------------------
+
+The node should contain a "emc-timings" subnode for each supported RAM type (see field RAM_CODE in
+register PMC_STRAPPING_OPT_A).
+
+Required properties for "emc-timings" nodes :
+- nvidia,ram-code : Should contain the value of RAM_CODE this timing set is used for.
+
+timing subnode
+--------------
+
+Each "emc-timings" node should contain a subnode for every supported EMC clock rate.
+
+Required properties for timing nodes :
+- clock-frequency : Should contain the memory clock rate in Hz.
+- nvidia,emem-configuration : Values to be written to the EMEM register block. For the Tegra124 SoC
+(see section "15.6.1 MC Registers" in the TRM), these are the registers whose values need to be
+specified, according to the board documentation:
+
+ MC_EMEM_ARB_CFG
+ MC_EMEM_ARB_OUTSTANDING_REQ
+ MC_EMEM_ARB_TIMING_RCD
+ MC_EMEM_ARB_TIMING_RP
+ MC_EMEM_ARB_TIMING_RC
+ MC_EMEM_ARB_TIMING_RAS
+ MC_EMEM_ARB_TIMING_FAW
+ MC_EMEM_ARB_TIMING_RRD
+ MC_EMEM_ARB_TIMING_RAP2PRE
+ MC_EMEM_ARB_TIMING_WAP2PRE
+ MC_EMEM_ARB_TIMING_R2R
+ MC_EMEM_ARB_TIMING_W2W
+ MC_EMEM_ARB_TIMING_R2W
+ MC_EMEM_ARB_TIMING_W2R
+ MC_EMEM_ARB_DA_TURNS
+ MC_EMEM_ARB_DA_COVERS
+ MC_EMEM_ARB_MISC0
+ MC_EMEM_ARB_MISC1
+ MC_EMEM_ARB_RING1_THROTTLE
+
+Example SoC include file:
+
+/ {
+ mc: memory-controller@70019000 {
+ compatible = "nvidia,tegra124-mc";
+ reg = <0x0 0x70019000 0x0 0x1000>;
+ clocks = <&tegra_car TEGRA124_CLK_MC>;
+ clock-names = "mc";
+
+ interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+ #iommu-cells = <1>;
+ };
+
+ sdhci@700b0000 {
+ compatible = "nvidia,tegra124-sdhci";
+ ...
+ iommus = <&mc TEGRA_SWGROUP_SDMMC1A>;
+ };
+};
+
+Example board file:
+
+/ {
+ memory-controller@70019000 {
+ emc-timings-3 {
+ nvidia,ram-code = <3>;
+
+ timing-12750000 {
+ clock-frequency = <12750000>;
+
+ nvidia,emem-configuration = <
+ 0x40040001 /* MC_EMEM_ARB_CFG */
+ 0x8000000a /* MC_EMEM_ARB_OUTSTANDING_REQ */
+ 0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+ 0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+ 0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+ 0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+ 0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+ 0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+ 0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+ 0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+ 0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+ 0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+ 0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+ 0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+ 0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+ 0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+ 0x77e30303 /* MC_EMEM_ARB_MISC0 */
+ 0x70000f03 /* MC_EMEM_ARB_MISC1 */
+ 0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+ >;
+ };
+ };
+ };
+};
+++ /dev/null
-NVIDIA Tegra124 SoC EMC (external memory controller)
-====================================================
-
-Required properties :
-- compatible : Should be "nvidia,tegra124-emc".
-- reg : physical base address and length of the controller's registers.
-- nvidia,memory-controller : phandle of the MC driver.
-
-The node should contain a "emc-timings" subnode for each supported RAM type
-(see field RAM_CODE in register PMC_STRAPPING_OPT_A), with its unit address
-being its RAM_CODE.
-
-Required properties for "emc-timings" nodes :
-- nvidia,ram-code : Should contain the value of RAM_CODE this timing set is
-used for.
-
-Each "emc-timings" node should contain a "timing" subnode for every supported
-EMC clock rate. The "timing" subnodes should have the clock rate in Hz as
-their unit address.
-
-Required properties for "timing" nodes :
-- clock-frequency : Should contain the memory clock rate in Hz.
-- The following properties contain EMC timing characterization values
-(specified in the board documentation) :
- - nvidia,emc-auto-cal-config : EMC_AUTO_CAL_CONFIG
- - nvidia,emc-auto-cal-config2 : EMC_AUTO_CAL_CONFIG2
- - nvidia,emc-auto-cal-config3 : EMC_AUTO_CAL_CONFIG3
- - nvidia,emc-auto-cal-interval : EMC_AUTO_CAL_INTERVAL
- - nvidia,emc-bgbias-ctl0 : EMC_BGBIAS_CTL0
- - nvidia,emc-cfg : EMC_CFG
- - nvidia,emc-cfg-2 : EMC_CFG_2
- - nvidia,emc-ctt-term-ctrl : EMC_CTT_TERM_CTRL
- - nvidia,emc-mode-1 : Mode Register 1
- - nvidia,emc-mode-2 : Mode Register 2
- - nvidia,emc-mode-4 : Mode Register 4
- - nvidia,emc-mode-reset : Mode Register 0
- - nvidia,emc-mrs-wait-cnt : EMC_MRS_WAIT_CNT
- - nvidia,emc-sel-dpd-ctrl : EMC_SEL_DPD_CTRL
- - nvidia,emc-xm2dqspadctrl2 : EMC_XM2DQSPADCTRL2
- - nvidia,emc-zcal-cnt-long : EMC_ZCAL_WAIT_CNT after clock change
- - nvidia,emc-zcal-interval : EMC_ZCAL_INTERVAL
-- nvidia,emc-configuration : EMC timing characterization data. These are the
-registers (see section "15.6.2 EMC Registers" in the TRM) whose values need to
-be specified, according to the board documentation:
-
- EMC_RC
- EMC_RFC
- EMC_RFC_SLR
- EMC_RAS
- EMC_RP
- EMC_R2W
- EMC_W2R
- EMC_R2P
- EMC_W2P
- EMC_RD_RCD
- EMC_WR_RCD
- EMC_RRD
- EMC_REXT
- EMC_WEXT
- EMC_WDV
- EMC_WDV_MASK
- EMC_QUSE
- EMC_QUSE_WIDTH
- EMC_IBDLY
- EMC_EINPUT
- EMC_EINPUT_DURATION
- EMC_PUTERM_EXTRA
- EMC_PUTERM_WIDTH
- EMC_PUTERM_ADJ
- EMC_CDB_CNTL_1
- EMC_CDB_CNTL_2
- EMC_CDB_CNTL_3
- EMC_QRST
- EMC_QSAFE
- EMC_RDV
- EMC_RDV_MASK
- EMC_REFRESH
- EMC_BURST_REFRESH_NUM
- EMC_PRE_REFRESH_REQ_CNT
- EMC_PDEX2WR
- EMC_PDEX2RD
- EMC_PCHG2PDEN
- EMC_ACT2PDEN
- EMC_AR2PDEN
- EMC_RW2PDEN
- EMC_TXSR
- EMC_TXSRDLL
- EMC_TCKE
- EMC_TCKESR
- EMC_TPD
- EMC_TFAW
- EMC_TRPAB
- EMC_TCLKSTABLE
- EMC_TCLKSTOP
- EMC_TREFBW
- EMC_FBIO_CFG6
- EMC_ODT_WRITE
- EMC_ODT_READ
- EMC_FBIO_CFG5
- EMC_CFG_DIG_DLL
- EMC_CFG_DIG_DLL_PERIOD
- EMC_DLL_XFORM_DQS0
- EMC_DLL_XFORM_DQS1
- EMC_DLL_XFORM_DQS2
- EMC_DLL_XFORM_DQS3
- EMC_DLL_XFORM_DQS4
- EMC_DLL_XFORM_DQS5
- EMC_DLL_XFORM_DQS6
- EMC_DLL_XFORM_DQS7
- EMC_DLL_XFORM_DQS8
- EMC_DLL_XFORM_DQS9
- EMC_DLL_XFORM_DQS10
- EMC_DLL_XFORM_DQS11
- EMC_DLL_XFORM_DQS12
- EMC_DLL_XFORM_DQS13
- EMC_DLL_XFORM_DQS14
- EMC_DLL_XFORM_DQS15
- EMC_DLL_XFORM_QUSE0
- EMC_DLL_XFORM_QUSE1
- EMC_DLL_XFORM_QUSE2
- EMC_DLL_XFORM_QUSE3
- EMC_DLL_XFORM_QUSE4
- EMC_DLL_XFORM_QUSE5
- EMC_DLL_XFORM_QUSE6
- EMC_DLL_XFORM_QUSE7
- EMC_DLL_XFORM_ADDR0
- EMC_DLL_XFORM_ADDR1
- EMC_DLL_XFORM_ADDR2
- EMC_DLL_XFORM_ADDR3
- EMC_DLL_XFORM_ADDR4
- EMC_DLL_XFORM_ADDR5
- EMC_DLL_XFORM_QUSE8
- EMC_DLL_XFORM_QUSE9
- EMC_DLL_XFORM_QUSE10
- EMC_DLL_XFORM_QUSE11
- EMC_DLL_XFORM_QUSE12
- EMC_DLL_XFORM_QUSE13
- EMC_DLL_XFORM_QUSE14
- EMC_DLL_XFORM_QUSE15
- EMC_DLI_TRIM_TXDQS0
- EMC_DLI_TRIM_TXDQS1
- EMC_DLI_TRIM_TXDQS2
- EMC_DLI_TRIM_TXDQS3
- EMC_DLI_TRIM_TXDQS4
- EMC_DLI_TRIM_TXDQS5
- EMC_DLI_TRIM_TXDQS6
- EMC_DLI_TRIM_TXDQS7
- EMC_DLI_TRIM_TXDQS8
- EMC_DLI_TRIM_TXDQS9
- EMC_DLI_TRIM_TXDQS10
- EMC_DLI_TRIM_TXDQS11
- EMC_DLI_TRIM_TXDQS12
- EMC_DLI_TRIM_TXDQS13
- EMC_DLI_TRIM_TXDQS14
- EMC_DLI_TRIM_TXDQS15
- EMC_DLL_XFORM_DQ0
- EMC_DLL_XFORM_DQ1
- EMC_DLL_XFORM_DQ2
- EMC_DLL_XFORM_DQ3
- EMC_DLL_XFORM_DQ4
- EMC_DLL_XFORM_DQ5
- EMC_DLL_XFORM_DQ6
- EMC_DLL_XFORM_DQ7
- EMC_XM2CMDPADCTRL
- EMC_XM2CMDPADCTRL4
- EMC_XM2CMDPADCTRL5
- EMC_XM2DQPADCTRL2
- EMC_XM2DQPADCTRL3
- EMC_XM2CLKPADCTRL
- EMC_XM2CLKPADCTRL2
- EMC_XM2COMPPADCTRL
- EMC_XM2VTTGENPADCTRL
- EMC_XM2VTTGENPADCTRL2
- EMC_XM2VTTGENPADCTRL3
- EMC_XM2DQSPADCTRL3
- EMC_XM2DQSPADCTRL4
- EMC_XM2DQSPADCTRL5
- EMC_XM2DQSPADCTRL6
- EMC_DSR_VTTGEN_DRV
- EMC_TXDSRVTTGEN
- EMC_FBIO_SPARE
- EMC_ZCAL_WAIT_CNT
- EMC_MRS_WAIT_CNT2
- EMC_CTT
- EMC_CTT_DURATION
- EMC_CFG_PIPE
- EMC_DYN_SELF_REF_CONTROL
- EMC_QPOP
-
-Example SoC include file:
-
-/ {
- emc@0,7001b000 {
- compatible = "nvidia,tegra124-emc";
- reg = <0x0 0x7001b000 0x0 0x1000>;
-
- nvidia,memory-controller = <&mc>;
- };
-};
-
-Example board file:
-
-/ {
- emc@0,7001b000 {
- emc-timings-3 {
- nvidia,ram-code = <3>;
-
- timing-12750000 {
- clock-frequency = <12750000>;
-
- nvidia,emc-zcal-cnt-long = <0x00000042>;
- nvidia,emc-auto-cal-interval = <0x001fffff>;
- nvidia,emc-ctt-term-ctrl = <0x00000802>;
- nvidia,emc-cfg = <0x73240000>;
- nvidia,emc-cfg-2 = <0x000008c5>;
- nvidia,emc-sel-dpd-ctrl = <0x00040128>;
- nvidia,emc-bgbias-ctl0 = <0x00000008>;
- nvidia,emc-auto-cal-config = <0xa1430000>;
- nvidia,emc-auto-cal-config2 = <0x00000000>;
- nvidia,emc-auto-cal-config3 = <0x00000000>;
- nvidia,emc-mode-reset = <0x80001221>;
- nvidia,emc-mode-1 = <0x80100003>;
- nvidia,emc-mode-2 = <0x80200008>;
- nvidia,emc-mode-4 = <0x00000000>;
-
- nvidia,emc-configuration = <
- 0x00000000 /* EMC_RC */
- 0x00000003 /* EMC_RFC */
- 0x00000000 /* EMC_RFC_SLR */
- 0x00000000 /* EMC_RAS */
- 0x00000000 /* EMC_RP */
- 0x00000004 /* EMC_R2W */
- 0x0000000a /* EMC_W2R */
- 0x00000003 /* EMC_R2P */
- 0x0000000b /* EMC_W2P */
- 0x00000000 /* EMC_RD_RCD */
- 0x00000000 /* EMC_WR_RCD */
- 0x00000003 /* EMC_RRD */
- 0x00000003 /* EMC_REXT */
- 0x00000000 /* EMC_WEXT */
- 0x00000006 /* EMC_WDV */
- 0x00000006 /* EMC_WDV_MASK */
- 0x00000006 /* EMC_QUSE */
- 0x00000002 /* EMC_QUSE_WIDTH */
- 0x00000000 /* EMC_IBDLY */
- 0x00000005 /* EMC_EINPUT */
- 0x00000005 /* EMC_EINPUT_DURATION */
- 0x00010000 /* EMC_PUTERM_EXTRA */
- 0x00000003 /* EMC_PUTERM_WIDTH */
- 0x00000000 /* EMC_PUTERM_ADJ */
- 0x00000000 /* EMC_CDB_CNTL_1 */
- 0x00000000 /* EMC_CDB_CNTL_2 */
- 0x00000000 /* EMC_CDB_CNTL_3 */
- 0x00000004 /* EMC_QRST */
- 0x0000000c /* EMC_QSAFE */
- 0x0000000d /* EMC_RDV */
- 0x0000000f /* EMC_RDV_MASK */
- 0x00000060 /* EMC_REFRESH */
- 0x00000000 /* EMC_BURST_REFRESH_NUM */
- 0x00000018 /* EMC_PRE_REFRESH_REQ_CNT */
- 0x00000002 /* EMC_PDEX2WR */
- 0x00000002 /* EMC_PDEX2RD */
- 0x00000001 /* EMC_PCHG2PDEN */
- 0x00000000 /* EMC_ACT2PDEN */
- 0x00000007 /* EMC_AR2PDEN */
- 0x0000000f /* EMC_RW2PDEN */
- 0x00000005 /* EMC_TXSR */
- 0x00000005 /* EMC_TXSRDLL */
- 0x00000004 /* EMC_TCKE */
- 0x00000005 /* EMC_TCKESR */
- 0x00000004 /* EMC_TPD */
- 0x00000000 /* EMC_TFAW */
- 0x00000000 /* EMC_TRPAB */
- 0x00000005 /* EMC_TCLKSTABLE */
- 0x00000005 /* EMC_TCLKSTOP */
- 0x00000064 /* EMC_TREFBW */
- 0x00000000 /* EMC_FBIO_CFG6 */
- 0x00000000 /* EMC_ODT_WRITE */
- 0x00000000 /* EMC_ODT_READ */
- 0x106aa298 /* EMC_FBIO_CFG5 */
- 0x002c00a0 /* EMC_CFG_DIG_DLL */
- 0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
- 0x00064000 /* EMC_DLL_XFORM_DQS0 */
- 0x00064000 /* EMC_DLL_XFORM_DQS1 */
- 0x00064000 /* EMC_DLL_XFORM_DQS2 */
- 0x00064000 /* EMC_DLL_XFORM_DQS3 */
- 0x00064000 /* EMC_DLL_XFORM_DQS4 */
- 0x00064000 /* EMC_DLL_XFORM_DQS5 */
- 0x00064000 /* EMC_DLL_XFORM_DQS6 */
- 0x00064000 /* EMC_DLL_XFORM_DQS7 */
- 0x00064000 /* EMC_DLL_XFORM_DQS8 */
- 0x00064000 /* EMC_DLL_XFORM_DQS9 */
- 0x00064000 /* EMC_DLL_XFORM_DQS10 */
- 0x00064000 /* EMC_DLL_XFORM_DQS11 */
- 0x00064000 /* EMC_DLL_XFORM_DQS12 */
- 0x00064000 /* EMC_DLL_XFORM_DQS13 */
- 0x00064000 /* EMC_DLL_XFORM_DQS14 */
- 0x00064000 /* EMC_DLL_XFORM_DQS15 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE0 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE1 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE2 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE3 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE4 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE5 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE6 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE7 */
- 0x00000000 /* EMC_DLL_XFORM_ADDR0 */
- 0x00000000 /* EMC_DLL_XFORM_ADDR1 */
- 0x00000000 /* EMC_DLL_XFORM_ADDR2 */
- 0x00000000 /* EMC_DLL_XFORM_ADDR3 */
- 0x00000000 /* EMC_DLL_XFORM_ADDR4 */
- 0x00000000 /* EMC_DLL_XFORM_ADDR5 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE8 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE9 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE10 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE11 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE12 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE13 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE14 */
- 0x00000000 /* EMC_DLL_XFORM_QUSE15 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
- 0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
- 0x000fc000 /* EMC_DLL_XFORM_DQ0 */
- 0x000fc000 /* EMC_DLL_XFORM_DQ1 */
- 0x000fc000 /* EMC_DLL_XFORM_DQ2 */
- 0x000fc000 /* EMC_DLL_XFORM_DQ3 */
- 0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
- 0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
- 0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
- 0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
- 0x10000280 /* EMC_XM2CMDPADCTRL */
- 0x00000000 /* EMC_XM2CMDPADCTRL4 */
- 0x00111111 /* EMC_XM2CMDPADCTRL5 */
- 0x00000000 /* EMC_XM2DQPADCTRL2 */
- 0x00000000 /* EMC_XM2DQPADCTRL3 */
- 0x77ffc081 /* EMC_XM2CLKPADCTRL */
- 0x00000e0e /* EMC_XM2CLKPADCTRL2 */
- 0x81f1f108 /* EMC_XM2COMPPADCTRL */
- 0x07070004 /* EMC_XM2VTTGENPADCTRL */
- 0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
- 0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
- 0x51451400 /* EMC_XM2DQSPADCTRL3 */
- 0x00514514 /* EMC_XM2DQSPADCTRL4 */
- 0x00514514 /* EMC_XM2DQSPADCTRL5 */
- 0x51451400 /* EMC_XM2DQSPADCTRL6 */
- 0x0000003f /* EMC_DSR_VTTGEN_DRV */
- 0x00000007 /* EMC_TXDSRVTTGEN */
- 0x00000000 /* EMC_FBIO_SPARE */
- 0x00000042 /* EMC_ZCAL_WAIT_CNT */
- 0x000e000e /* EMC_MRS_WAIT_CNT2 */
- 0x00000000 /* EMC_CTT */
- 0x00000003 /* EMC_CTT_DURATION */
- 0x0000f2f3 /* EMC_CFG_PIPE */
- 0x800001c5 /* EMC_DYN_SELF_REF_CONTROL */
- 0x0000000a /* EMC_QPOP */
- >;
- };
- };
- };
-};
axp209 (X-Powers)
axp221 (X-Powers)
axp223 (X-Powers)
+axp809 (X-Powers)
Required properties:
- compatible: "x-powers,axp152", "x-powers,axp202", "x-powers,axp209",
- "x-powers,axp221", "x-powers,axp223"
+ "x-powers,axp221", "x-powers,axp223", "x-powers,axp809"
- reg: The I2C slave address or RSB hardware address for the AXP chip
- interrupt-parent: The parent interrupt controller
- interrupts: SoC NMI / GPIO interrupt connected to the PMIC's IRQ pin
Optional properties:
- x-powers,dcdc-freq: defines the work frequency of DC-DC in KHz
- (range: 750-1875). Default: 1.5MHz
+ AXP152/20X: range: 750-1875, Default: 1.5 MHz
+ AXP22X/80X: range: 1800-4050, Default: 3 MHz
+
- <input>-supply: a phandle to the regulator supply node. May be omitted if
inputs are unregulated, such as using the IPSOUT output
from the PMIC.
LDO_IO1 : LDO : ips-supply : GPIO 1
RTC_LDO : LDO : ips-supply : always on
+AXP809 regulators, type, and corresponding input supply names:
+
+Regulator Type Supply Name Notes
+--------- ---- ----------- -----
+DCDC1 : DC-DC buck : vin1-supply
+DCDC2 : DC-DC buck : vin2-supply
+DCDC3 : DC-DC buck : vin3-supply
+DCDC4 : DC-DC buck : vin4-supply
+DCDC5 : DC-DC buck : vin5-supply
+DC1SW : On/Off Switch : : DCDC1 secondary output
+DC5LDO : LDO : : input from DCDC5
+ALDO1 : LDO : aldoin-supply : shared supply
+ALDO2 : LDO : aldoin-supply : shared supply
+ALDO3 : LDO : aldoin-supply : shared supply
+DLDO1 : LDO : dldoin-supply : shared supply
+DLDO2 : LDO : dldoin-supply : shared supply
+ELDO1 : LDO : eldoin-supply : shared supply
+ELDO2 : LDO : eldoin-supply : shared supply
+ELDO3 : LDO : eldoin-supply : shared supply
+LDO_IO0 : LDO : ips-supply : GPIO 0
+LDO_IO1 : LDO : ips-supply : GPIO 1
+RTC_LDO : LDO : ips-supply : always on
+SW : On/Off Switch : swin-supply
+
Example:
axp209: pmic@34 {
--- /dev/null
+Hisilicon Hi655x Power Management Integrated Circuit (PMIC)
+
+The hardware layout for access PMIC Hi655x from AP SoC Hi6220.
+Between PMIC Hi655x and Hi6220, the physical signal channel is SSI.
+We can use memory-mapped I/O to communicate.
+
++----------------+ +-------------+
+| | | |
+| Hi6220 | SSI bus | Hi655x |
+| |-------------| |
+| |(REGMAP_MMIO)| |
++----------------+ +-------------+
+
+Required properties:
+- compatible: Should be "hisilicon,hi655x-pmic".
+- reg: Base address of PMIC on Hi6220 SoC.
+- interrupt-controller: Hi655x has internal IRQs (has own IRQ domain).
+- pmic-gpios: The GPIO used by PMIC IRQ.
+
+Example:
+ pmic: pmic@f8000000 {
+ compatible = "hisilicon,hi655x-pmic";
+ reg = <0x0 0xf8000000 0x0 0x1000>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+ }
--- /dev/null
+MAX77620 Power management IC from Maxim Semiconductor.
+
+Required properties:
+-------------------
+- compatible: Must be one of
+ "maxim,max77620"
+ "maxim,max20024".
+- reg: I2C device address.
+
+Optional properties:
+-------------------
+- interrupts: The interrupt on the parent the controller is
+ connected to.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: is <2> and their usage is compliant to the 2 cells
+ variant of <../interrupt-controller/interrupts.txt>
+ IRQ numbers for different interrupt source of MAX77620
+ are defined at dt-bindings/mfd/max77620.h.
+
+Optional subnodes and their properties:
+=======================================
+
+Flexible power sequence configurations:
+--------------------------------------
+The Flexible Power Sequencer (FPS) allows each regulator to power up under
+hardware or software control. Additionally, each regulator can power on
+independently or among a group of other regulators with an adjustable power-up
+and power-down delays (sequencing). GPIO1, GPIO2, and GPIO3 can be programmed
+to be part of a sequence allowing external regulators to be sequenced along
+with internal regulators. 32KHz clock can be programmed to be part of a
+sequence.
+
+The flexible sequencing structure consists of two hardware enable inputs
+(EN0, EN1), and 3 master sequencing timers called FPS0, FPS1 and FPS2.
+Each master sequencing timer is programmable through its configuration
+register to have a hardware enable source (EN1 or EN2) or a software enable
+source (SW). When enabled/disabled, the master sequencing timer generates
+eight sequencing events on different time periods called slots. The time
+period between each event is programmable within the configuration register.
+Each regulator, GPIO1, GPIO2, GPIO3, and 32KHz clock has a flexible power
+sequence slave register which allows its enable source to be specified as
+a flexible power sequencer timer or a software bit. When a FPS source of
+regulators, GPIOs and clocks specifies the enable source to be a flexible
+power sequencer, the power up and power down delays can be specified in
+the regulators, GPIOs and clocks flexible power sequencer configuration
+registers.
+
+When FPS event cleared (set to LOW), regulators, GPIOs and 32KHz
+clock are set into following state at the sequencing event that
+corresponds to its flexible sequencer configuration register.
+ Sleep state: In this state, regulators, GPIOs
+ and 32KHz clock get disabled at
+ the sequencing event.
+ Global Low Power Mode (GLPM): In this state, regulators are set in
+ low power mode at the sequencing event.
+
+The configuration parameters of FPS is provided through sub-node "fps"
+and their child for FPS specific. The child node name for FPS are "fps0",
+"fps1", and "fps2" for FPS0, FPS1 and FPS2 respectively.
+
+The FPS configurations like FPS source, power up and power down slots for
+regulators, GPIOs and 32kHz clocks are provided in their respective
+configuration nodes which is explained in respective sub-system DT
+binding document.
+
+There is need for different FPS configuration parameters based on system
+state like when system state changed from active to suspend or active to
+power off (shutdown).
+
+Optional properties:
+-------------------
+-maxim,fps-event-source: u32, FPS event source like external
+ hardware input to PMIC i.e. EN0, EN1 or
+ software (SW).
+ The macros are defined on
+ dt-bindings/mfd/max77620.h
+ for different control source.
+ - MAX77620_FPS_EVENT_SRC_EN0
+ for hardware input pin EN0.
+ - MAX77620_FPS_EVENT_SRC_EN1
+ for hardware input pin EN1.
+ - MAX77620_FPS_EVENT_SRC_SW
+ for software control.
+
+-maxim,shutdown-fps-time-period-us: u32, FPS time period in microseconds
+ when system enters in to shutdown
+ state.
+
+-maxim,suspend-fps-time-period-us: u32, FPS time period in microseconds
+ when system enters in to suspend state.
+
+-maxim,device-state-on-disabled-event: u32, describe the PMIC state when FPS
+ event cleared (set to LOW) whether it
+ should go to sleep state or low-power
+ state. Following are valid values:
+ - MAX77620_FPS_INACTIVE_STATE_SLEEP
+ to set the PMIC state to sleep.
+ - MAX77620_FPS_INACTIVE_STATE_LOW_POWER
+ to set the PMIC state to low
+ power.
+ Absence of this property or other value
+ will not change device state when FPS
+ event get cleared.
+
+Here supported time periods by device in microseconds are as follows:
+MAX77620 supports 40, 80, 160, 320, 640, 1280, 2560 and 5120 microseconds.
+MAX20024 supports 20, 40, 80, 160, 320, 640, 1280 and 2540 microseconds.
+
+For DT binding details of different sub modules like GPIO, pincontrol,
+regulator, power, please refer respective device-tree binding document
+under their respective sub-system directories.
+
+Example:
+--------
+#include <dt-bindings/mfd/max77620.h>
+
+max77620@3c {
+ compatible = "maxim,max77620";
+ reg = <0x3c>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <0 86 IRQ_TYPE_NONE>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ fps {
+ fps0 {
+ maxim,shutdown-fps-time-period-us = <1280>;
+ maxim,fps-event-source = <MAX77620_FPS_EVENT_SRC_EN1>;
+ };
+
+ fps1 {
+ maxim,shutdown-fps-time-period-us = <1280>;
+ maxim,fps-event-source = <MAX77620_FPS_EVENT_SRC_EN0>;
+ };
+
+ fps2 {
+ maxim,shutdown-fps-time-period-us = <1280>;
+ maxim,fps-event-source = <MAX77620_FPS_EVENT_SRC_SW>;
+ };
+ };
+};
- qcom,force-mode:
Usage: optional (default if no other qcom,force-mode is specified)
Value type: <u32>
- Defintion: indicates that the regulator should be forced to a
+ Definition: indicates that the regulator should be forced to a
particular mode, valid values are:
QCOM_RPM_FORCE_MODE_NONE - do not force any mode
QCOM_RPM_FORCE_MODE_LPM - force into low power mode
- qcom,force-mode:
Usage: optional
Value type: <u32>
- Defintion: indicates that the regulator should not be forced to any
+ Definition: indicates that the regulator should not be forced to any
particular mode, valid values are:
QCOM_RPM_FORCE_MODE_NONE - do not force any mode
QCOM_RPM_FORCE_MODE_LPM - force into low power mode
* The simple eMMC hardware reset provider
The purpose of this driver is to perform standard eMMC hw reset
-procedure, as descibed by Jedec 4.4 specification. This procedure is
+procedure, as described by Jedec 4.4 specification. This procedure is
performed just after MMC core enabled power to the given mmc host (to
fix possible issues if bootloader has left eMMC card in initialized or
unknown state), and before performing complete system reboot (also in
v7.0. Use this property to describe the rare
earlier versions of this core that include WP
- -- Additonal SoC-specific NAND controller properties --
+ -- Additional SoC-specific NAND controller properties --
The NAND controller is integrated differently on the variety of SoCs on which it
is found. Part of this integration involves providing status and enable bits
specifies a reference to the associating hardware driver node.
see Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
- port-id: is the index of port provided by DSAF (the accelerator). DSAF can
- connect to 8 PHYs. Port 0 to 1 are both used for adminstration purpose. They
+ connect to 8 PHYs. Port 0 to 1 are both used for administration purpose. They
are called debug ports.
The remaining 6 PHYs are taken according to the mode of DSAF.
AXI register inside the DMA module:
- snps,lpi_en: enable Low Power Interface
- snps,xit_frm: unlock on WoL
- - snps,wr_osr_lmt: max write oustanding req. limit
- - snps,rd_osr_lmt: max read oustanding req. limit
+ - snps,wr_osr_lmt: max write outstanding req. limit
+ - snps,rd_osr_lmt: max read outstanding req. limit
- snps,kbbe: do not cross 1KiB boundary.
- snps,axi_all: align address
- snps,blen: this is a vector of supported burst length.
Required properties:
- reg - The ID number for the phy, usually a small integer
- - ti,rx-internal-delay - RGMII Recieve Clock Delay - see dt-bindings/net/ti-dp83867.h
+ - ti,rx-internal-delay - RGMII Receive Clock Delay - see dt-bindings/net/ti-dp83867.h
for applicable values
- ti,tx-internal-delay - RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h
for applicable values
phandle to a OPP table in their DT node. The OPP core will use this phandle to
find the operating points for the device.
-If required, this can be extended for SoC vendor specfic bindings. Such bindings
+If required, this can be extended for SoC vendor specific bindings. Such bindings
should be documented as Documentation/devicetree/bindings/power/<vendor>-opp.txt
and should have a compatible description like: "operating-points-v2-<vendor>".
Example configuration:
- pcie: pcie@0xdffff000 {
+ pcie: pcie@dffff000 {
compatible = "snps,dw-pcie";
reg = <0xdffff000 0x1000>, /* Controller registers */
<0xd0000000 0x2000>; /* PCI config space */
ranges = <0x82000000 0 0x00000000 0x220 0x00000000 0 0x10000000>;
num-lanes = <8>;
port-id = <1>;
- #interrupts-cells = <1>;
- interrupts-map-mask = <0xf800 0 0 7>;
- interrupts-map = <0x0 0 0 1 &mbigen_pcie 1 10
- 0x0 0 0 2 &mbigen_pcie 2 11
- 0x0 0 0 3 &mbigen_pcie 3 12
- 0x0 0 0 4 &mbigen_pcie 4 13>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0xf800 0 0 7>;
+ interrupt-map = <0x0 0 0 1 &mbigen_pcie 1 10
+ 0x0 0 0 2 &mbigen_pcie 2 11
+ 0x0 0 0 3 &mbigen_pcie 3 12
+ 0x0 0 0 4 &mbigen_pcie 4 13>;
status = "ok";
};
usb2_phy: usb2phy@0 {
compatible = "st,stih416-usb-phy";
- #phy-cell = <0>;
+ #phy-cells = <0>;
st,syscfg = <&syscfg_rear>;
clocks = <&clk_sysin>;
clock-names = "osc_phy";
SoC file extract:
-----------------
- padctl@0,7009f000 {
+ padctl@7009f000 {
compatible = "nvidia,tegra124-xusb-padctl";
reg = <0x0 0x7009f000 0x0 0x1000>;
resets = <&tegra_car 142>;
Board file extract:
-------------------
- pcie-controller@0,01003000 {
+ pcie-controller@01003000 {
...
phys = <&padctl 0>;
...
- padctl: padctl@0,7009f000 {
+ padctl: padctl@7009f000 {
pinctrl-0 = <&padctl_default>;
pinctrl-names = "default";
2: 1.5uA (PMIC_GPIO_PULL_UP_1P5)
3: 31.5uA (PMIC_GPIO_PULL_UP_31P5)
4: 1.5uA + 30uA boost (PMIC_GPIO_PULL_UP_1P5_30)
- If this property is ommited 30uA strength will be used if
+ If this property is omitted 30uA strength will be used if
pull up is selected
- bias-high-impedance:
- qcom,charger-disable:
Usage: optional
Value type: <boolean>
- Definition: definining this property disables charging
+ Definition: defining this property disables charging
This charger is a sub-node of one of the 8941 PMIC blocks, and is specified
as a child node in DTS of that node. See ../mfd/qcom,spmi-pmic.txt and
* palmas regulator IP block devicetree bindings
+The tps659038 for the AM57x class have OTP spins that
+have different part numbers but the same functionality. There
+is not a need to add the OTP spins to the palmas driver. The
+spin devices should use the tps659038 as it's compatible value.
+This is the list of those devices:
+tps659037
+
Required properties:
- compatible : Should be from the list
ti,twl6035-pmic
ti,tps65913-pmic
ti,tps65914-pmic
ti,tps65917-pmic
+ ti,tps659038-pmic
and also the generic series names
ti,palmas-pmic
- interrupt-parent : The parent interrupt controller which is palmas.
battery is chargeable or not. If charging battery then driver can
enable the charging.
- ti,backup-battery-charge-high-current: Enable high current charging in
- backup battery. Device supports the < 100mA and > 100mA charging.
- The high current will be > 100mA. Absence of this property will
- charge battery to lower current i.e. < 100mA.
+ backup battery. Device supports the < 100uA and > 100uA charging.
+ The high current will be > 100uA. Absence of this property will
+ charge battery to lower current i.e. < 100uA.
Example:
palmas: tps65913@58 {
--- /dev/null
+* Marvell UART : Non standard UART used in some of Marvell EBU SoCs (e.g., Armada-3700)
+
+Required properties:
+- compatible: "marvell,armada-3700-uart"
+- reg: offset and length of the register set for the device.
+- interrupts: device interrupt
+
+Example:
+ serial@12000 {
+ compatible = "marvell,armada-3700-uart";
+ reg = <0x12000 0x400>;
+ interrupts = <43>;
+ };
- queue-pools : child node classifying the queue ranges into pools.
Queue ranges are grouped into 3 type of pools:
- qpend : pool of qpend(interruptible) queues
- - general-purpose : pool of general queues, primarly used
+ - general-purpose : pool of general queues, primarily used
as free descriptor queues or the
transmit DMA queues.
- accumulator : pool of queues on PDSP accumulator channel
-- qrange : number of queues to use per queue range, specified as
<"base queue #" "# of queues">.
-- interrupts : Optional property to specify the interrupt mapping
- for interruptible queues. The driver additionaly sets
+ for interruptible queues. The driver additionally sets
the interrupt affinity hint based on the cpu mask.
-- qalloc-by-id : Optional property to specify that the queues in this
range can only be allocated by queue id.
latency : time to delay the interrupt, specified
in microseconds.
-- multi-queue : Optional property to specify that the channel has to
- monitor upto 32 queues starting at the base queue #.
+ monitor up to 32 queues starting at the base queue #.
- descriptor-regions : child node describing the memory regions for keystone
navigator packet DMA descriptors. The memory for
descriptors will be allocated by the driver.
Example:
-hda@0,70030000 {
+hda@70030000 {
compatible = "nvidia,tegra124-hda", "nvidia,tegra30-hda";
reg = <0x0 0x70030000 0x0 0x10000>;
interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
compatible = "mmio-sram";
reg = <0x5c000000 0x40000>; /* 256 KiB SRAM at address 0x5c000000 */
- #adress-cells = <1>;
+ #address-cells = <1>;
#size-cells = <1>;
ranges = <0 0x5c000000 0x40000>;
--- /dev/null
+Tegra124 SOCTHERM thermal management system
+
+The SOCTHERM IP block contains thermal sensors, support for polled
+or interrupt-based thermal monitoring, CPU and GPU throttling based
+on temperature trip points, and handling external overcurrent
+notifications. It is also used to manage emergency shutdown in an
+overheating situation.
+
+Required properties :
+- compatible : For Tegra124, must contain "nvidia,tegra124-soctherm".
+ For Tegra132, must contain "nvidia,tegra132-soctherm".
+ For Tegra210, must contain "nvidia,tegra210-soctherm".
+- reg : Should contain 1 entry:
+ - SOCTHERM register set
+- interrupts : Defines the interrupt used by SOCTHERM
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - tsensor
+ - soctherm
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - soctherm
+- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description
+ of this property. See <dt-bindings/thermal/tegra124-soctherm.h> for a
+ list of valid values when referring to thermal sensors.
+
+
+Example :
+
+ soctherm@700e2000 {
+ compatible = "nvidia,tegra124-soctherm";
+ reg = <0x0 0x700e2000 0x0 0x1000>;
+ interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+ <&tegra_car TEGRA124_CLK_SOC_THERM>;
+ clock-names = "tsensor", "soctherm";
+ resets = <&tegra_car 78>;
+ reset-names = "soctherm";
+
+ #thermal-sensor-cells = <1>;
+ };
+
+Example: referring to thermal sensors :
+
+ thermal-zones {
+ cpu {
+ polling-delay-passive = <1000>;
+ polling-delay = <1000>;
+
+ thermal-sensors =
+ <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>;
+ };
+ };
+++ /dev/null
-Tegra124 SOCTHERM thermal management system
-
-The SOCTHERM IP block contains thermal sensors, support for polled
-or interrupt-based thermal monitoring, CPU and GPU throttling based
-on temperature trip points, and handling external overcurrent
-notifications. It is also used to manage emergency shutdown in an
-overheating situation.
-
-Required properties :
-- compatible : For Tegra124, must contain "nvidia,tegra124-soctherm".
- For Tegra132, must contain "nvidia,tegra132-soctherm".
- For Tegra210, must contain "nvidia,tegra210-soctherm".
-- reg : Should contain 1 entry:
- - SOCTHERM register set
-- interrupts : Defines the interrupt used by SOCTHERM
-- clocks : Must contain an entry for each entry in clock-names.
- See ../clocks/clock-bindings.txt for details.
-- clock-names : Must include the following entries:
- - tsensor
- - soctherm
-- resets : Must contain an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
-- reset-names : Must include the following entries:
- - soctherm
-- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description
- of this property. See <dt-bindings/thermal/tegra124-soctherm.h> for a
- list of valid values when referring to thermal sensors.
-
-
-Example :
-
- soctherm@0,700e2000 {
- compatible = "nvidia,tegra124-soctherm";
- reg = <0x0 0x700e2000 0x0 0x1000>;
- interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
- <&tegra_car TEGRA124_CLK_SOC_THERM>;
- clock-names = "tsensor", "soctherm";
- resets = <&tegra_car 78>;
- reset-names = "soctherm";
-
- #thermal-sensor-cells = <1>;
- };
-
-Example: referring to thermal sensors :
-
- thermal-zones {
- cpu {
- polling-delay-passive = <1000>;
- polling-delay = <1000>;
-
- thermal-sensors =
- <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>;
- };
- };
+++ /dev/null
-* Marvell UART : Non standard UART used in some of Marvell EBU SoCs (e.g., Armada-3700)
-
-Required properties:
-- compatible: "marvell,armada-3700-uart"
-- reg: offset and length of the register set for the device.
-- interrupts: device interrupt
-
-Example:
- serial@12000 {
- compatible = "marvell,armada-3700-uart";
- reg = <0x12000 0x400>;
- interrupts = <43>;
- };
arasan Arasan Chip Systems
arm ARM Ltd.
armadeus ARMadeus Systems SARL
+arrow Arrow Electronics
artesyn Artesyn Embedded Technologies Inc.
asahi-kasei Asahi Kasei Corp.
aspeed ASPEED Technology Inc.
compulab CompuLab Ltd.
cortina Cortina Systems, Inc.
cosmic Cosmic Circuits
+creative Creative Technology Ltd
crystalfontz Crystalfontz America, Inc.
cubietech Cubietech, Ltd.
cypress Cypress Semiconductor Corporation
edt Emerging Display Technologies
eeti eGalax_eMPIA Technology Inc
elan Elan Microelectronic Corp.
+embest Shenzhen Embest Technology Co., Ltd.
emmicro EM Microelectronic
energymicro Silicon Laboratories (formerly Energy Micro AS)
epcos EPCOS AG
ifi Ingenieurburo Fur Ic-Technologie (I/F/I)
iom Iomega Corporation
img Imagination Technologies Ltd.
+inforce Inforce Computing
ingenic Ingenic Semiconductor
innolux Innolux Corporation
intel Intel Corporation
devm_kvasprintf()
devm_kzalloc()
+MFD
+ devm_mfd_add_devices()
+
PCI
pcim_enable_device() : after success, all PCI ops become managed
pcim_pin_device() : keep PCI device enabled after release
USB wire. That hardware framebuffer is able to drive the VGA, DVI, or HDMI
monitor with no CPU involvement until a pixel has to change.
-The CPU or other local resource does all the rendering; optinally compares the
+The CPU or other local resource does all the rendering; optionally compares the
result with a local shadow of the remote hardware framebuffer to identify
the minimal set of pixels that have changed; and compresses and sends those
pixels line-by-line via USB bulk transfers.
At that point, a /dev/fb? interface will be present for user-mode applications
to open and begin writing to the framebuffer of the DisplayLink device using
standard fbdev calls. Note that if mmap() is used, by default the user mode
-application must send down damage notifcations to trigger repaints of the
+application must send down damage notifications to trigger repaints of the
changed regions. Alternatively, udlfb can be recompiled with experimental
defio support enabled, to support a page-fault based detection mechanism
-that can work without explicit notifcation.
+that can work without explicit notification.
The most common client of udlfb is xf86-video-displaylink or a modified
xf86-video-fbdev X server. These servers have no real DisplayLink specific
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
- | powerpc: | TODO |
+ | powerpc: | ok |
| s390: | TODO |
| score: | TODO |
| sh: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
- | powerpc: | TODO |
+ | powerpc: | ok |
| s390: | TODO |
| score: | TODO |
| sh: | TODO |
crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
range=start-[end]
-Please note, on arm, the offset is required.
- crashkernel=<range1>:<size1>[,<range2>:<size2>,...]@offset
- range=start-[end]
-
- 'start' is inclusive and 'end' is exclusive.
-
For example:
crashkernel=512M-2G:64M,2G-:128M
on the memory consumption of the kdump system. In general this is not
dependent on the memory size of the production system.
- On arm, use "crashkernel=Y@X". Note that the start address of the kernel
- will be aligned to 128MiB (0x08000000), so if the start address is not then
- any space below the alignment point may be overwritten by the dump-capture kernel,
- which means it is possible that the vmcore is not that precise as expected.
+ On arm, the use of "crashkernel=Y@X" is no longer necessary; the
+ kernel will automatically locate the crash kernel image within the
+ first 512MB of RAM if X is not given.
Load the Dump-capture Kernel
[KNL,SH] Allow user to override the default size for
per-device physically contiguous DMA buffers.
+ memhp_default_state=online/offline
+ [KNL] Set the initial state for the memory hotplug
+ onlining policy. If not specified, the default value is
+ set according to the
+ CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
+ option.
+ See Documentation/memory-hotplug.txt.
+
memmap=exactmap [KNL,X86] Enable setting of an exact
E820 memory map, as specified by the user.
Such memmap=exactmap lines can be constructed based on
for broken drivers that don't call it.
skip_isa_align [X86] do not align io start addr, so can
handle more pci cards
- firmware [ARM] Do not re-enumerate the bus but instead
- just use the configuration from the
- bootloader. This is currently used on
- IXP2000 systems where the bus has to be
- configured a certain way for adjunct CPUs.
noearly [X86] Don't do any early type 1 scanning.
This might help on some broken boards which
machine check when some devices' config space
% cat /sys/devices/system/memory/auto_online_blocks
-The default is "offline" which means the newly added memory is not in a
-ready-to-use state and you have to "online" the newly added memory blocks
-manually. Automatic onlining can be requested by writing "online" to
-"auto_online_blocks" file:
+The default depends on the CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
+option. If it is disabled the default is "offline" which means the newly added
+memory is not in a ready-to-use state and you have to "online" the newly added
+memory blocks manually. Automatic onlining can be requested by writing "online"
+to "auto_online_blocks" file:
% echo online > /sys/devices/system/memory/auto_online_blocks
The IBM POWER-based pSeries and iSeries computers include PCI bus
controller chips that have extended capabilities for detecting and
reporting a large variety of PCI bus error conditions. These features
-go under the name of "EEH", for "Extended Error Handling". The EEH
+go under the name of "EEH", for "Enhanced Error Handling". The EEH
hardware features allow PCI bus errors to be cleared and a PCI
card to be "rebooted", without also having to reboot the operating
system.
- panic_on_oom
- percpu_pagelist_fraction
- stat_interval
+- stat_refresh
- swappiness
- user_reserve_kbytes
- vfs_cache_pressure
==============================================================
+stat_refresh
+
+Any read or write (by root only) flushes all the per-cpu vm statistics
+into their global totals, for more accurate reports when testing
+e.g. cat /proc/sys/vm/stat_refresh /proc/meminfo
+
+As a side-effect, it also checks for negative totals (elsewhere reported
+as 0) and "fails" with EINVAL if any are found, with a warning in dmesg.
+(At time of writing, a few stats are known sometimes to be found negative,
+with no ill effects: errors and warnings on these stats are suppressed.)
+
+==============================================================
+
swappiness
This control is used to define how aggressive the kernel will swap
Refcounting on THP is mostly consistent with refcounting on other compound
pages:
- - get_page()/put_page() and GUP operate in head page's ->_count.
+ - get_page()/put_page() and GUP operate in head page's ->_refcount.
- - ->_count in tail pages is always zero: get_page_unless_zero() never
+ - ->_refcount in tail pages is always zero: get_page_unless_zero() never
succeed on tail pages.
- map/unmap of the pages with PTE entry increment/decrement ->_mapcount
sum of mapcount of all sub-pages plus one (split_huge_page caller must
have reference for head page).
-split_huge_page uses migration entries to stabilize page->_count and
+split_huge_page uses migration entries to stabilize page->_refcount and
page->_mapcount.
We safe against physical memory scanners too: the only legitimate way
scanner can get reference to a page is get_page_unless_zero().
-All tail pages has zero ->_count until atomic_add(). It prevent scanner
+All tail pages has zero ->_refcount until atomic_add(). It prevent scanner
from geting reference to tail page up to the point. After the atomic_add()
-we don't care about ->_count value. We already known how many references
+we don't care about ->_refcount value. We already known how many references
with should uncharge from head page.
For head page get_page_unless_zero() will succeed and we don't mind. It's
S: Supported
F: drivers/tty/serial/atmel_serial.c
+ATMEL AT91 SAMA5D2-Compatible Shutdown Controller
+M: Nicolas Ferre <nicolas.ferre@atmel.com>
+S: Supported
+F: drivers/power/reset/at91-sama5d2_shdwc.c
+
ATMEL SAMA5D2 ADC DRIVER
M: Ludovic Desroches <ludovic.desroches@atmel.com>
L: linux-iio@vger.kernel.org
S: Supported
F: Documentation/powerpc/
F: arch/powerpc/
+F: drivers/char/tpm/tpm_ibmvtpm*
+F: drivers/crypto/nx/
+F: drivers/crypto/vmx/
+F: drivers/net/ethernet/ibm/ibmveth.*
+F: drivers/net/ethernet/ibm/ibmvnic.*
+F: drivers/pci/hotplug/rpa*
+F: drivers/scsi/ibmvscsi/
+N: opal
+N: /pmac
+N: powermac
+N: powernv
+N: [^a-z0-9]ps3
+N: pseries
LINUX FOR POWER MACINTOSH
M: Benjamin Herrenschmidt <benh@kernel.crashing.org>
F: kernel/workqueue.c
F: Documentation/workqueue.txt
+X-POWERS MULTIFUNCTION PMIC DEVICE DRIVERS
+M: Chen-Yu Tsai <wens@csie.org>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+N: axp[128]
+
X.25 NETWORK LAYER
M: Andrew Hendry <andrew.hendry@gmail.com>
L: linux-x25@vger.kernel.org
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);
-#define has_transparent_hugepage() 1
-
/* Generic variants assume pgtable_t is struct page *, hence need for these */
#define __HAVE_ARCH_PGTABLE_DEPOSIT
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
$(call if_changed,objcopy)
@$(kecho) ' Kernel: $@ is ready'
-PHONY += initrd
+PHONY += initrd install zinstall uinstall
initrd:
@test "$(INITRD_PHYS)" != "" || \
(echo This machine does not support INITRD; exit -1)
static inline void dma_mark_clean(void *addr, size_t size) { }
-extern int arm_dma_set_mask(struct device *dev, u64 dma_mask);
-
/**
* arm_dma_alloc - allocate consistent memory for DMA
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
#define ioremap ioremap
#define ioremap_nocache ioremap
+/*
+ * Do not use ioremap_cache for mapping memory. Use memremap instead.
+ */
void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size);
#define ioremap_cache ioremap_cache
+/*
+ * Do not use ioremap_cached in new code. Provided for the benefit of
+ * the pxa2xx-flash MTD driver only.
+ */
+void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size);
+
void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size);
#define ioremap_wc ioremap_wc
#define ioremap_wt ioremap_wc
void iounmap(volatile void __iomem *iomem_cookie);
#define iounmap iounmap
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size);
+#define arch_memremap_wb arch_memremap_wb
+
/*
* io{read,write}{16,32}be() macros
*/
#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x)))
#define pfn_to_kaddr(pfn) __va((phys_addr_t)(pfn) << PAGE_SHIFT)
-extern unsigned long (*arch_virt_to_idmap)(unsigned long x);
+extern long long arch_phys_to_idmap_offset;
/*
- * These are for systems that have a hardware interconnect supported alias of
- * physical memory for idmap purposes. Most cases should leave these
+ * These are for systems that have a hardware interconnect supported alias
+ * of physical memory for idmap purposes. Most cases should leave these
* untouched. Note: this can only return addresses less than 4GiB.
*/
+static inline bool arm_has_idmap_alias(void)
+{
+ return IS_ENABLED(CONFIG_MMU) && arch_phys_to_idmap_offset != 0;
+}
+
+#define IDMAP_INVALID_ADDR ((u32)~0)
+
+static inline unsigned long phys_to_idmap(phys_addr_t addr)
+{
+ if (IS_ENABLED(CONFIG_MMU) && arch_phys_to_idmap_offset) {
+ addr += arch_phys_to_idmap_offset;
+ if (addr > (u32)~0)
+ addr = IDMAP_INVALID_ADDR;
+ }
+ return addr;
+}
+
+static inline phys_addr_t idmap_to_phys(unsigned long idmap)
+{
+ phys_addr_t addr = idmap;
+
+ if (IS_ENABLED(CONFIG_MMU) && arch_phys_to_idmap_offset)
+ addr -= arch_phys_to_idmap_offset;
+
+ return addr;
+}
+
static inline unsigned long __virt_to_idmap(unsigned long x)
{
- if (IS_ENABLED(CONFIG_MMU) && arch_virt_to_idmap)
- return arch_virt_to_idmap(x);
- else
- return __virt_to_phys(x);
+ return phys_to_idmap(__virt_to_phys(x));
}
#define virt_to_idmap(x) __virt_to_idmap((unsigned long)(x))
flush_pmd_entry(pmdp);
}
-static inline int has_transparent_hugepage(void)
-{
- return 1;
-}
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_PGTABLE_3LEVEL_H */
if (!strcmp(str, "debug")) {
debug_pci = 1;
return NULL;
- } else if (!strcmp(str, "firmware")) {
- pci_add_flags(PCI_PROBE_ONLY);
- return NULL;
}
return str;
}
{
local_irq_disable();
smp_send_stop();
-
- local_irq_disable();
while (1);
}
/* Whoops - the platform was unable to reboot. Tell the user! */
printk("Reboot failed -- System halted\n");
- local_irq_disable();
while (1);
}
late_initcall(init_machine_late);
#ifdef CONFIG_KEXEC
+/*
+ * The crash region must be aligned to 128MB to avoid
+ * zImage relocating below the reserved region.
+ */
+#define CRASH_ALIGN (128 << 20)
+
static inline unsigned long long get_total_mem(void)
{
unsigned long total;
if (ret)
return;
+ if (crash_base <= 0) {
+ unsigned long long crash_max = idmap_to_phys((u32)~0);
+ crash_base = memblock_find_in_range(CRASH_ALIGN, crash_max,
+ crash_size, CRASH_ALIGN);
+ if (!crash_base) {
+ pr_err("crashkernel reservation failed - No suitable area found.\n");
+ return;
+ }
+ } else {
+ unsigned long long start;
+
+ start = memblock_find_in_range(crash_base,
+ crash_base + crash_size,
+ crash_size, SECTION_SIZE);
+ if (start != crash_base) {
+ pr_err("crashkernel reservation failed - memory is in use.\n");
+ return;
+ }
+ }
+
ret = memblock_reserve(crash_base, crash_size);
if (ret < 0) {
pr_warn("crashkernel reservation failed - memory is in use (0x%lx)\n",
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
}
-static unsigned long keystone_virt_to_idmap(unsigned long x)
-{
- return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START;
-}
-
static long long __init keystone_pv_fixup(void)
{
long long offset;
offset = KEYSTONE_HIGH_PHYS_START - KEYSTONE_LOW_PHYS_START;
/* Populate the arch idmap hook */
- arch_virt_to_idmap = keystone_virt_to_idmap;
+ arch_phys_to_idmap_offset = -offset;
return offset;
}
aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
}
- /* r3p0 or later has power control register */
- if (rev >= L310_CACHE_ID_RTL_R3P0)
- l2x0_saved_regs.pwr_ctrl = L310_DYNAMIC_CLK_GATING_EN |
- L310_STNDBY_MODE_EN;
-
/*
* Always enable non-secure access to the lockdown registers -
* we write to them as part of the L2C enable sequence so they
u32 filter[2] = { 0, 0 };
u32 assoc;
u32 prefetch;
+ u32 power;
u32 val;
int ret;
}
l2x0_saved_regs.prefetch_ctrl = prefetch;
+
+ power = l2x0_saved_regs.pwr_ctrl |
+ L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN;
+
+ ret = of_property_read_u32(np, "arm,dynamic-clock-gating", &val);
+ if (!ret) {
+ if (!val)
+ power &= ~L310_DYNAMIC_CLK_GATING_EN;
+ } else if (ret != -EINVAL) {
+ pr_err("L2C-310 OF dynamic-clock-gating property value is missing or invalid\n");
+ }
+ ret = of_property_read_u32(np, "arm,standby-mode", &val);
+ if (!ret) {
+ if (!val)
+ power &= ~L310_STNDBY_MODE_EN;
+ } else if (ret != -EINVAL) {
+ pr_err("L2C-310 OF standby-mode property value is missing or invalid\n");
+ }
+
+ l2x0_saved_regs.pwr_ctrl = power;
}
static const struct l2c_init_data of_l2c310_data __initconst = {
void __iomem *ctrl_base;
void __iomem *rev_base;
void __iomem *op_base;
+ void __iomem *way_ctrl_base;
u32 way_present_mask;
u32 way_locked_mask;
u32 nsets;
struct uniphier_cache_data *data,
u32 way_mask)
{
+ unsigned int cpu;
+
data->way_locked_mask = way_mask & data->way_present_mask;
- writel_relaxed(~data->way_locked_mask & data->way_present_mask,
- data->ctrl_base + UNIPHIER_SSCLPDAWCR);
+ for_each_possible_cpu(cpu)
+ writel_relaxed(~data->way_locked_mask & data->way_present_mask,
+ data->way_ctrl_base + 4 * cpu);
}
static void uniphier_cache_maint_range(unsigned long start, unsigned long end,
goto err;
}
+ data->way_ctrl_base = data->ctrl_base + 0xc00;
+
if (*cache_level == 2) {
u32 revision = readl(data->rev_base + UNIPHIER_SSCID);
/*
*/
if (revision <= 0x16)
data->range_op_max_size = (u32)1 << 22;
+
+ /*
+ * Unfortunatly, the offset address of active way control base
+ * varies from SoC to SoC.
+ */
+ switch (revision) {
+ case 0x11: /* sLD3 */
+ data->way_ctrl_base = data->ctrl_base + 0x870;
+ break;
+ case 0x12: /* LD4 */
+ case 0x16: /* sld8 */
+ data->way_ctrl_base = data->ctrl_base + 0x840;
+ break;
+ default:
+ break;
+ }
}
data->range_op_max_size -= data->line_size;
.sync_single_for_device = arm_dma_sync_single_for_device,
.sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
.sync_sg_for_device = arm_dma_sync_sg_for_device,
- .set_dma_mask = arm_dma_set_mask,
};
EXPORT_SYMBOL(arm_dma_ops);
.get_sgtable = arm_dma_get_sgtable,
.map_page = arm_coherent_dma_map_page,
.map_sg = arm_dma_map_sg,
- .set_dma_mask = arm_dma_set_mask,
};
EXPORT_SYMBOL(arm_coherent_dma_ops);
}
EXPORT_SYMBOL(dma_supported);
-int arm_dma_set_mask(struct device *dev, u64 dma_mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
-
- *dev->dma_mask = dma_mask;
-
- return 0;
-}
-
#define PREALLOC_DMA_DEBUG_ENTRIES 4096
static int __init dma_debug_do_init(void)
.unmap_sg = arm_iommu_unmap_sg,
.sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu,
.sync_sg_for_device = arm_iommu_sync_sg_for_device,
-
- .set_dma_mask = arm_dma_set_mask,
};
struct dma_map_ops iommu_coherent_ops = {
.map_sg = arm_coherent_iommu_map_sg,
.unmap_sg = arm_coherent_iommu_unmap_sg,
-
- .set_dma_mask = arm_dma_set_mask,
};
/**
* page tables.
*/
pgd_t *idmap_pgd;
-unsigned long (*arch_virt_to_idmap)(unsigned long x);
+long long arch_phys_to_idmap_offset;
#ifdef CONFIG_ARM_LPAE
static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
}
/*
- * Don't allow RAM to be mapped - this causes problems with ARMv6+
+ * Don't allow RAM to be mapped with mismatched attributes - this
+ * causes problems with ARMv6+
*/
- if (WARN_ON(pfn_valid(pfn)))
+ if (WARN_ON(pfn_valid(pfn) && mtype != MT_MEMORY_RW))
return NULL;
area = get_vm_area_caller(size, VM_IOREMAP, caller);
EXPORT_SYMBOL(ioremap);
void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+ __alias(ioremap_cached);
+
+void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size)
{
return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_cache);
+EXPORT_SYMBOL(ioremap_cached);
void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
{
__builtin_return_address(0));
}
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
+{
+ return (__force void *)arch_ioremap_caller(phys_addr, size,
+ MT_MEMORY_RW,
+ __builtin_return_address(0));
+}
+
void __iounmap(volatile void __iomem *io_addr)
{
void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
EXPORT_SYMBOL(ioremap);
void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+ __alias(ioremap_cached);
+
+void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size)
{
return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_cache);
+EXPORT_SYMBOL(ioremap_cached);
void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
{
}
EXPORT_SYMBOL(ioremap_wc);
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
+{
+ return (void *)phys_addr;
+}
+
void __iounmap(volatile void __iomem *addr)
{
}
# Copyright (C) 2001 Russell King
#
-include/generated/mach-types.h: $(src)/gen-mach-types $(src)/mach-types
- @$(kecho) ' Generating $@'
- @mkdir -p $(dir $@)
- $(Q)$(AWK) -f $^ > $@ || { rm -f $@; /bin/false; }
+quiet_cmd_gen_mach = GEN $@
+ cmd_gen_mach = mkdir -p $(dir $@) && \
+ $(AWK) -f $(filter-out $(PHONY),$^) > $@ || \
+ { rm -f $@; /bin/false; }
+
+include/generated/mach-types.h: $(src)/gen-mach-types $(src)/mach-types FORCE
+ $(call if_changed,gen_mach)
#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
-static inline int has_transparent_hugepage(void)
-{
- return 1;
-}
-
#define __pgprot_modify(prot,mask,bits) \
__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
} else if (ps == PUD_SIZE) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
} else {
+ hugetlb_bad_size();
pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
return 0;
}
if (ps == (1 << HPAGE_SHIFT)) {
hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT);
} else {
+ hugetlb_bad_size();
pr_err("hugepagesz: Unsupported page size %lu M\n",
ps >> 20);
return 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define has_transparent_hugepage has_transparent_hugepage
extern int has_transparent_hugepage(void);
static inline int pmd_trans_huge(pmd_t pmd)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-int __init has_transparent_hugepage(void)
+int has_transparent_hugepage(void)
{
- unsigned int mask;
- unsigned long flags;
-
- local_irq_save(flags);
- write_c0_pagemask(PM_HUGE_MASK);
- back_to_back_c0_hazard();
- mask = read_c0_pagemask();
- write_c0_pagemask(PM_DEFAULT_MASK);
+ static unsigned int mask = -1;
- local_irq_restore(flags);
+ if (mask == -1) { /* first call comes during __init */
+ unsigned long flags;
+ local_irq_save(flags);
+ write_c0_pagemask(PM_HUGE_MASK);
+ back_to_back_c0_hazard();
+ mask = read_c0_pagemask();
+ write_c0_pagemask(PM_DEFAULT_MASK);
+ local_irq_restore(flags);
+ }
return mask == PM_HUGE_MASK;
}
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
config FORCE_MAX_ZONEORDER
int "Maximum zone order"
- range 9 64 if PPC64 && PPC_64K_PAGES
+ range 8 9 if PPC64 && PPC_64K_PAGES
default "9" if PPC64 && PPC_64K_PAGES
- range 13 64 if PPC64 && !PPC_64K_PAGES
+ range 9 13 if PPC64 && !PPC_64K_PAGES
default "13" if PPC64 && !PPC_64K_PAGES
range 9 64 if PPC32 && PPC_16K_PAGES
default "9" if PPC32 && PPC_16K_PAGES
config FSL_LBC
bool "Freescale Local Bus support"
- depends on FSL_SOC
help
Enables reporting of errors from the Freescale local bus
controller. Also contains some common code used by
depends on !PPC_DISABLE_WERROR
default y
-config STRICT_MM_TYPECHECKS
- bool "Do extra type checking on mm types"
- default n
- help
- This option turns on extra type checking for some mm related types.
-
- If you don't know what this means, say N.
-
config PRINT_STACK_DEPTH
int "Stack depth to print" if DEBUG_KERNEL
default 64
$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
$(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb)
-$(obj)/cuImage.%: vmlinux $(obj)/fsl/%.dtb $(wrapperbits)
- $(call if_changed,wrap,cuboot-$*,,$(obj)/fsl/$*.dtb)
-
$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
$(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
$(obj)/%.dtb: $(src)/dts/%.dts FORCE
$(call if_changed_dep,dtc)
+$(obj)/%.dtb: $(src)/dts/fsl/%.dts FORCE
+ $(call if_changed_dep,dtc)
+
# If there isn't a platform selected then just strip the vmlinux.
ifeq (,$(image-y))
image-y := vmlinux.strip
0x0 0x00400000>;
};
};
+
+ pci1: pcie@fef09000 {
+ status = "disabled";
+ };
};
/include/ "mpc8641si-post.dtsi"
model = "GEF_SBC310";
compatible = "gef,sbc310";
- aliases {
- pci1 = &pci1;
- };
-
memory {
device_type = "memory";
reg = <0x0 0x40000000>; // set by uboot
};
pci1: pcie@fef09000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #size-cells = <2>;
- #address-cells = <3>;
reg = <0xfef09000 0x1000>;
- bus-range = <0x0 0xff>;
ranges = <0x02000000 0x0 0xc0000000 0xc0000000 0x0 0x20000000
0x01000000 0x0 0x00000000 0xfe400000 0x0 0x00400000>;
- clock-frequency = <100000000>;
- interrupts = <0x19 0x2 0 0>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0000 0x0 0x0 0x1 &mpic 0x4 0x2
- 0x0000 0x0 0x0 0x2 &mpic 0x5 0x2
- 0x0000 0x0 0x0 0x3 &mpic 0x6 0x2
- 0x0000 0x0 0x0 0x4 &mpic 0x7 0x2
- >;
pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
ranges = <0x02000000 0x0 0xc0000000
0x02000000 0x0 0xc0000000
0x0 0x20000000
0x0 0x00400000>;
};
};
+
+ pci1: pcie@fef09000 {
+ status = "disabled";
+ };
};
/include/ "mpc8641si-post.dtsi"
model = "MPC8641HPCN";
compatible = "fsl,mpc8641hpcn";
- aliases {
- pci1 = &pci1;
- };
-
memory {
device_type = "memory";
reg = <0x00000000 0x40000000>; // 1G at 0x0
};
pci1: pcie@ffe09000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #size-cells = <2>;
- #address-cells = <3>;
reg = <0xffe09000 0x1000>;
- bus-range = <0 0xff>;
ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
0x01000000 0x0 0x00000000 0xffc10000 0x0 0x00010000>;
- clock-frequency = <100000000>;
- interrupts = <25 2 0 0>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x0 */
- 0x0000 0 0 1 &mpic 4 1
- 0x0000 0 0 2 &mpic 5 1
- 0x0000 0 0 3 &mpic 6 1
- 0x0000 0 0 4 &mpic 7 1
- >;
+
pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
ranges = <0x02000000 0x0 0xa0000000
0x02000000 0x0 0xa0000000
0x0 0x20000000
#address-cells = <2>;
#size-cells = <2>;
- aliases {
- pci1 = &pci1;
- };
-
memory {
device_type = "memory";
reg = <0x0 0x00000000 0x0 0x40000000>; // 1G at 0x0
};
pci1: pcie@fffe09000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #size-cells = <2>;
- #address-cells = <3>;
reg = <0x0f 0xffe09000 0x0 0x1000>;
- bus-range = <0x0 0xff>;
ranges = <0x02000000 0x0 0xe0000000 0x0c 0x20000000 0x0 0x20000000
0x01000000 0x0 0x00000000 0x0f 0xffc10000 0x0 0x00010000>;
- clock-frequency = <100000000>;
- interrupts = <25 2 0 0>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x0 */
- 0x0000 0 0 1 &mpic 4 1
- 0x0000 0 0 2 &mpic 5 1
- 0x0000 0 0 3 &mpic 6 1
- 0x0000 0 0 4 &mpic 7 1
- >;
+
pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
ranges = <0x02000000 0x0 0xe0000000
0x02000000 0x0 0xe0000000
0x0 0x20000000
bus-range = <0x0 0xff>;
clock-frequency = <100000000>;
interrupts = <24 2 0 0>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x0000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x0000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x0000 0x0 0x0 0x4 &mpic 0x3 0x1
- >;
+ pcie@0 {
+ reg = <0 0 0 0 0>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+ interrupts = <24 2 0 0>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ 0x0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+ >;
+ };
+};
+
+&pci1 {
+ compatible = "fsl,mpc8641-pcie";
+ device_type = "pci";
+ #size-cells = <2>;
+ #address-cells = <3>;
+ bus-range = <0x0 0xff>;
+ clock-frequency = <100000000>;
+ interrupts = <25 2 0 0>;
pcie@0 {
reg = <0 0 0 0 0>;
+ #interrupt-cells = <1>;
#size-cells = <2>;
#address-cells = <3>;
device_type = "pci";
+ interrupts = <25 2 0 0>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ 0x0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+ >;
};
};
serial0 = &serial0;
serial1 = &serial1;
pci0 = &pci0;
+ pci1 = &pci1;
};
cpus {
model = "SBC8641D";
compatible = "wind,sbc8641";
- aliases {
- pci1 = &pci1;
- };
-
memory {
device_type = "memory";
reg = <0x00000000 0x20000000>; // 512M at 0x0
};
pci1: pcie@f8009000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #size-cells = <2>;
- #address-cells = <3>;
reg = <0xf8009000 0x1000>;
- bus-range = <0 0xff>;
ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>;
- clock-frequency = <100000000>;
- interrupts = <25 2 0 0>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x0 */
- 0x0000 0 0 1 &mpic 4 1
- 0x0000 0 0 2 &mpic 5 1
- 0x0000 0 0 3 &mpic 6 1
- 0x0000 0 0 4 &mpic 7 1
- >;
pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
ranges = <0x02000000 0x0 0xa0000000
0x02000000 0x0 0xa0000000
0x0 0x20000000
};
rcpm: global-utilities@e2000 {
- compatible = "fsl,t1023-rcpm", "fsl,qoriq-rcpm-2.0";
+ compatible = "fsl,t1023-rcpm", "fsl,qoriq-rcpm-2.1";
reg = <0xe2000 0x1000>;
};
};
rcpm: global-utilities@e2000 {
- compatible = "fsl,t1040-rcpm", "fsl,qoriq-rcpm-2.0";
+ compatible = "fsl,t1040-rcpm", "fsl,qoriq-rcpm-2.1";
reg = <0xe2000 0x1000>;
};
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "micron,n25q512a", "jedec,spi-nor";
+ compatible = "micron,n25q512ax3", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <10000000>; /* input clock */
};
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "micron,n25q512a", "jedec,spi-nor";
+ compatible = "micron,n25q512ax3", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <10000000>; /* input clock */
};
#define _PMD_PRESENT_MASK (PAGE_MASK)
#define _PMD_BAD (~PAGE_MASK)
-/* Hash table based platforms need atomic updates of the linux PTE */
-#define PTE_ATOMIC_UPDATES 1
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_BOOK3S_32_HASH_H */
-#ifndef _ASM_POWERPC_MMU_HASH32_H_
-#define _ASM_POWERPC_MMU_HASH32_H_
+#ifndef _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
/*
* 32-bit hash table MMU support
*/
#define mmu_virtual_psize MMU_PAGE_4K
#define mmu_linear_psize MMU_PAGE_256M
-#endif /* _ASM_POWERPC_MMU_HASH32_H_ */
+#endif /* _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_ */
--- /dev/null
+#ifndef _ASM_POWERPC_BOOK3S_32_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_32_PGALLOC_H
+
+#include <linux/threads.h>
+
+/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
+#define MAX_PGTABLE_INDEX_SIZE 0
+
+extern void __bad_pte(pmd_t *pmd);
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+/* #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) */
+#define pmd_free(mm, x) do { } while (0)
+#define __pmd_free_tlb(tlb,x,a) do { } while (0)
+/* #define pgd_populate(mm, pmd, pte) BUG() */
+
+#ifndef CONFIG_BOOKE
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+ pte_t *pte)
+{
+ *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pte_page)
+{
+ *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT);
+}
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+#else
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+ pte_t *pte)
+{
+ *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pte_page)
+{
+ *pmdp = __pmd((unsigned long)lowmem_page_address(pte_page) | _PMD_PRESENT);
+}
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+#endif
+
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+ pgtable_page_dtor(ptepage);
+ __free_page(ptepage);
+}
+
+static inline void pgtable_free(void *table, unsigned index_size)
+{
+ BUG_ON(index_size); /* 32-bit doesn't use this */
+ free_page((unsigned long)table);
+}
+
+#define check_pgt_cache() do { } while (0)
+
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+ void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
+#else
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+ void *table, int shift)
+{
+ pgtable_free(table, shift);
+}
+#endif
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ tlb_flush_pgtable(tlb, address);
+ pgtable_page_dtor(table);
+ pgtable_free_tlb(tlb, page_address(table), 0);
+}
+#endif /* _ASM_POWERPC_BOOK3S_32_PGALLOC_H */
* for each page table entry. The PMD and PGD level use a 32b record for
* each entry by assuming that each entry is page aligned.
*/
-#define PTE_INDEX_SIZE 9
-#define PMD_INDEX_SIZE 7
-#define PUD_INDEX_SIZE 9
-#define PGD_INDEX_SIZE 9
+#define H_PTE_INDEX_SIZE 9
+#define H_PMD_INDEX_SIZE 7
+#define H_PUD_INDEX_SIZE 9
+#define H_PGD_INDEX_SIZE 9
#ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
-#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
-#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
-#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
-#endif /* __ASSEMBLY__ */
-
-#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
-#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE)
-#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
-
-/* PMD_SHIFT determines what a second-level page table entry can map */
-#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
+#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
+#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
+#define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE)
+#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE)
/* With 4k base page size, hugepage PTEs go at the PMD level */
#define MIN_HUGEPTE_SHIFT PMD_SHIFT
-/* PUD_SHIFT determines what a third-level page table entry can map */
-#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
-#define PUD_SIZE (1UL << PUD_SHIFT)
-#define PUD_MASK (~(PUD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
-#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-/* Bits to mask out from a PMD to get to the PTE page */
-#define PMD_MASKED_BITS 0
-/* Bits to mask out from a PUD to get to the PMD page */
-#define PUD_MASKED_BITS 0
-/* Bits to mask out from a PGD to get to the PUD page */
-#define PGD_MASKED_BITS 0
-
/* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
- _PAGE_F_SECOND | _PAGE_F_GIX)
-
-/* shift to put page number into pte */
-#define PTE_RPN_SHIFT (12)
-#define PTE_RPN_SIZE (45) /* gives 57-bit real addresses */
-
-#define _PAGE_4K_PFN 0
-#ifndef __ASSEMBLY__
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
+ H_PAGE_F_SECOND | H_PAGE_F_GIX)
+/*
+ * Not supported by 4k linux page size
+ */
+#define H_PAGE_4K_PFN 0x0
+#define H_PAGE_THP_HUGE 0x0
+#define H_PAGE_COMBO 0x0
+#define H_PTE_FRAG_NR 0
+#define H_PTE_FRAG_SIZE_SHIFT 0
/*
* On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
*/
remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
#ifdef CONFIG_HUGETLB_PAGE
-/*
- * For 4k page size, we support explicit hugepage via hugepd
- */
-static inline int pmd_huge(pmd_t pmd)
+static inline int hash__hugepd_ok(hugepd_t hpd)
+{
+ /*
+ * if it is not a pte and have hugepd shift mask
+ * set, then it is a hugepd directory pointer
+ */
+ if (!(hpd.pd & _PAGE_PTE) &&
+ ((hpd.pd & HUGEPD_SHIFT_MASK) != 0))
+ return true;
+ return false;
+}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+ BUG();
+ return NULL;
+}
+
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
{
+ BUG();
return 0;
}
-static inline int pud_huge(pud_t pud)
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+ int index)
{
+ BUG();
return 0;
}
-static inline int pgd_huge(pgd_t pgd)
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+ unsigned int index, unsigned int hidx)
+{
+ BUG();
+}
+
+static inline int hash__pmd_trans_huge(pmd_t pmd)
{
return 0;
}
-#define pgd_huge pgd_huge
-static inline int hugepd_ok(hugepd_t hpd)
+static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
- /*
- * if it is not a pte and have hugepd shift mask
- * set, then it is a hugepd directory pointer
- */
- if (!(hpd.pd & _PAGE_PTE) &&
- ((hpd.pd & HUGEPD_SHIFT_MASK) != 0))
- return true;
- return false;
+ BUG();
+ return 0;
}
-#define is_hugepd(hpd) (hugepd_ok(hpd))
+
+static inline pmd_t hash__pmd_mkhuge(pmd_t pmd)
+{
+ BUG();
+ return pmd;
+}
+
+extern unsigned long hash__pmd_hugepage_update(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ unsigned long clr, unsigned long set);
+extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp);
+extern int hash__has_transparent_hugepage(void);
#endif
#endif /* !__ASSEMBLY__ */
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
-#define PTE_INDEX_SIZE 8
-#define PMD_INDEX_SIZE 5
-#define PUD_INDEX_SIZE 5
-#define PGD_INDEX_SIZE 12
-
-#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
-#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE)
-#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+#define H_PTE_INDEX_SIZE 8
+#define H_PMD_INDEX_SIZE 5
+#define H_PUD_INDEX_SIZE 5
+#define H_PGD_INDEX_SIZE 12
/* With 4k base page size, hugepage PTEs go at the PMD level */
#define MIN_HUGEPTE_SHIFT PAGE_SHIFT
-/* PMD_SHIFT determines what a second-level page table entry can map */
-#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-
-/* PUD_SHIFT determines what a third-level page table entry can map */
-#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
-#define PUD_SIZE (1UL << PUD_SHIFT)
-#define PUD_MASK (~(PUD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
-#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-#define _PAGE_COMBO 0x00001000 /* this is a combo 4k page */
-#define _PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */
+#define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */
+#define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */
/*
- * Used to track subpage group valid if _PAGE_COMBO is set
- * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
+ * We need to differentiate between explicit huge page and THP huge
+ * page, since THP huge page also need to track real subpage details
*/
-#define _PAGE_COMBO_VALID (_PAGE_F_GIX | _PAGE_F_SECOND)
+#define H_PAGE_THP_HUGE H_PAGE_4K_PFN
-/* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \
- _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO)
-
-/* Shift to put page number into pte.
- *
- * That gives us a max RPN of 41 bits, which means a max of 57 bits
- * of addressable physical space, or 53 bits for the special 4k PFNs.
+/*
+ * Used to track subpage group valid if H_PAGE_COMBO is set
+ * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND
*/
-#define PTE_RPN_SHIFT (16)
-#define PTE_RPN_SIZE (41)
+#define H_PAGE_COMBO_VALID (H_PAGE_F_GIX | H_PAGE_F_SECOND)
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \
+ H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO)
/*
* we support 16 fragments per PTE page of 64K size.
*/
-#define PTE_FRAG_NR 16
+#define H_PTE_FRAG_NR 16
/*
* We use a 2K PTE page fragment and another 2K for storing
* real_pte_t hash index
*/
-#define PTE_FRAG_SIZE_SHIFT 12
+#define H_PTE_FRAG_SIZE_SHIFT 12
#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
-/* Bits to mask out from a PMD to get to the PTE page */
-#define PMD_MASKED_BITS 0xc0000000000000ffUL
-/* Bits to mask out from a PUD to get to the PMD page */
-#define PUD_MASKED_BITS 0xc0000000000000ffUL
-/* Bits to mask out from a PGD to get to the PUD page */
-#define PGD_MASKED_BITS 0xc0000000000000ffUL
-
#ifndef __ASSEMBLY__
+#include <asm/errno.h>
/*
* With 64K pages on hash table, we have a special PTE format that
rpte.pte = pte;
rpte.hidx = 0;
- if (pte_val(pte) & _PAGE_COMBO) {
+ if (pte_val(pte) & H_PAGE_COMBO) {
/*
- * Make sure we order the hidx load against the _PAGE_COMBO
+ * Make sure we order the hidx load against the H_PAGE_COMBO
* check. The store side ordering is done in __hash_page_4K
*/
smp_rmb();
static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
{
- if ((pte_val(rpte.pte) & _PAGE_COMBO))
+ if ((pte_val(rpte.pte) & H_PAGE_COMBO))
return (rpte.hidx >> (index<<2)) & 0xf;
- return (pte_val(rpte.pte) >> _PAGE_F_GIX_SHIFT) & 0xf;
+ return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf;
}
#define __rpte_to_pte(r) ((r).pte)
#define pte_iterate_hashed_end() } while(0); } } while(0)
#define pte_pagesize_index(mm, addr, pte) \
- (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
-
-#define remap_4k_pfn(vma, addr, pfn, prot) \
- (WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL : \
- remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \
- __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
-
-#define PTE_TABLE_SIZE PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE))
-#else
-#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
-#endif
-#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
-#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
-
-#ifdef CONFIG_HUGETLB_PAGE
-/*
- * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
- * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
- *
- * Defined in such a way that we can optimize away code block at build time
- * if CONFIG_HUGETLB_PAGE=n.
- */
-static inline int pmd_huge(pmd_t pmd)
-{
- /*
- * leaf pte for huge page
- */
- return !!(pmd_val(pmd) & _PAGE_PTE);
-}
-
-static inline int pud_huge(pud_t pud)
-{
- /*
- * leaf pte for huge page
- */
- return !!(pud_val(pud) & _PAGE_PTE);
-}
+ (((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
-static inline int pgd_huge(pgd_t pgd)
+extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t);
+static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, pgprot_t prot)
{
- /*
- * leaf pte for huge page
- */
- return !!(pgd_val(pgd) & _PAGE_PTE);
+ if (pfn > (PTE_RPN_MASK >> PAGE_SHIFT)) {
+ WARN(1, "remap_4k_pfn called with wrong pfn value\n");
+ return -EINVAL;
+ }
+ return remap_pfn_range(vma, addr, pfn, PAGE_SIZE,
+ __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN));
}
-#define pgd_huge pgd_huge
-#ifdef CONFIG_DEBUG_VM
-extern int hugepd_ok(hugepd_t hpd);
-#define is_hugepd(hpd) (hugepd_ok(hpd))
+#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
+ (sizeof(unsigned long) << PMD_INDEX_SIZE))
#else
-/*
- * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
- * need to setup hugepage directory for them. Our pte and page directory format
- * enable us to have this enabled.
- */
-static inline int hugepd_ok(hugepd_t hpd)
-{
- return 0;
-}
-#define is_hugepd(pdep) 0
-#endif /* CONFIG_DEBUG_VM */
-
-#endif /* CONFIG_HUGETLB_PAGE */
+#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
+#endif
+#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
+#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
- unsigned long addr,
- pmd_t *pmdp,
- unsigned long clr,
- unsigned long set);
static inline char *get_hpte_slot_array(pmd_t *pmdp)
{
/*
* that for explicit huge pages.
*
*/
-static inline int pmd_trans_huge(pmd_t pmd)
+static inline int hash__pmd_trans_huge(pmd_t pmd)
{
- return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) ==
- (_PAGE_PTE | _PAGE_THP_HUGE));
+ return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) ==
+ (_PAGE_PTE | H_PAGE_THP_HUGE));
}
-static inline int pmd_large(pmd_t pmd)
+static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
- return !!(pmd_val(pmd) & _PAGE_PTE);
+ return (((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
}
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+static inline pmd_t hash__pmd_mkhuge(pmd_t pmd)
{
- return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
-}
-
-#define __HAVE_ARCH_PMD_SAME
-static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
-{
- return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
-}
-
-static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp)
-{
- unsigned long old;
-
- if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
- return 0;
- old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
- return ((old & _PAGE_ACCESSED) != 0);
-}
-
-#define __HAVE_ARCH_PMDP_SET_WRPROTECT
-static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
-{
-
- if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
- return;
-
- pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
+ return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
}
+extern unsigned long hash__pmd_hugepage_update(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ unsigned long clr, unsigned long set);
+extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp);
+extern int hash__has_transparent_hugepage(void);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* __ASSEMBLY__ */
* We could create separate kernel read-only if we used the 3 PP bits
* combinations that newer processors provide but we currently don't.
*/
-#define _PAGE_BIT_SWAP_TYPE 0
-
-#define _PAGE_EXEC 0x00001 /* execute permission */
-#define _PAGE_RW 0x00002 /* read & write access allowed */
-#define _PAGE_READ 0x00004 /* read access allowed */
-#define _PAGE_USER 0x00008 /* page may be accessed by userspace */
-#define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */
-/* M (memory coherence) is always set in the HPTE, so we don't need it here */
-#define _PAGE_COHERENT 0x0
-#define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */
-#define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */
-#define _PAGE_DIRTY 0x00080 /* C: page changed */
-#define _PAGE_ACCESSED 0x00100 /* R: page referenced */
-#define _PAGE_SPECIAL 0x00400 /* software: special page */
-#define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
-
-#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SOFT_DIRTY 0x200 /* software: software dirty tracking */
-#else
-#define _PAGE_SOFT_DIRTY 0x000
-#endif
-
-#define _PAGE_F_GIX_SHIFT 57
-#define _PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */
-#define _PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */
-#define _PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */
-#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */
-#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */
-
-/*
- * We need to differentiate between explicit huge page and THP huge
- * page, since THP huge page also need to track real subpage details
- */
-#define _PAGE_THP_HUGE _PAGE_4K_PFN
-
-/*
- * set of bits not changed in pmd_modify.
- */
-#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
- _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
- _PAGE_SOFT_DIRTY)
-
+#define H_PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
+#define H_PTE_NONE_MASK _PAGE_HPTEFLAGS
+#define H_PAGE_F_GIX_SHIFT 57
+#define H_PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */
+#define H_PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */
+#define H_PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */
#ifdef CONFIG_PPC_64K_PAGES
#include <asm/book3s/64/hash-64k.h>
/*
* Size of EA range mapped by our pagetables.
*/
-#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
- PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
-#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
+#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
+ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
+#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1)
+/*
+ * only with hash we need to use the second half of pmd page table
+ * to store pointer to deposited pgtable_t
+ */
+#define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1)
#else
-#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE
#endif
/*
* Define the address range of the kernel non-linear virtual area
*/
-#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
-#define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000)
+#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
+#define H_KERN_VIRT_SIZE ASM_CONST(0x0000100000000000)
/*
* The vmalloc space starts at the beginning of that region, and
* occupies half of it on hash CPUs and a quarter of it on Book3E
* (we keep a quarter for the virtual memmap)
*/
-#define VMALLOC_START KERN_VIRT_START
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
+#define H_VMALLOC_START H_KERN_VIRT_START
+#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE >> 1)
+#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
/*
* Region IDs
#define REGION_MASK (0xfUL << REGION_SHIFT)
#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
-#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START))
+#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START))
#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
#define USER_REGION_ID (0UL)
* Defines the address of the vmemap area, in its own region on
* hash table CPUs.
*/
-#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
+#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
#ifdef CONFIG_PPC_MM_SLICES
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
#endif /* CONFIG_PPC_MM_SLICES */
-/* No separate kernel read-only */
-#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
-#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
-
-/* Strong Access Ordering */
-#define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
-
-/* No page size encoding in the linux PTE */
-#define _PAGE_PSIZE 0
/* PTEIDX nibble */
#define _PTEIDX_SECONDARY 0x8
#define _PTEIDX_GROUP_IX 0x7
-/* Hash table based platforms need atomic updates of the linux PTE */
-#define PTE_ATOMIC_UPDATES 1
-#define _PTE_NONE_MASK _PAGE_HPTEFLAGS
-/*
- * The mask convered by the RPN must be a ULL on 32-bit platforms with
- * 64-bit PTEs
- */
-#define PTE_RPN_MASK (((1UL << PTE_RPN_SIZE) - 1) << PTE_RPN_SHIFT)
-/*
- * _PAGE_CHG_MASK masks of bits that are to be preserved across
- * pgprot changes
- */
-#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
- _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
- _PAGE_SOFT_DIRTY)
-/*
- * Mask of bits returned by pte_pgprot()
- */
-#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
- _PAGE_WRITETHRU | _PAGE_4K_PFN | \
- _PAGE_USER | _PAGE_ACCESSED | \
- _PAGE_RW | _PAGE_DIRTY | _PAGE_EXEC | \
- _PAGE_SOFT_DIRTY)
-/*
- * We define 2 sets of base prot bits, one for basic pages (ie,
- * cacheable kernel and user pages) and one for non cacheable
- * pages. We always set _PAGE_COHERENT when SMP is enabled or
- * the processor might need it for DMA coherency.
- */
-#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
-#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
-
-/* Permission masks used to generate the __P and __S table,
- *
- * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
- *
- * Write permissions imply read permissions for now (we could make write-only
- * pages on BookE but we don't bother for now). Execute permission control is
- * possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
- */
-#define PAGE_NONE __pgprot(_PAGE_BASE)
-#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
-#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
- _PAGE_EXEC)
-#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
-#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
-
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_X
-#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY_X
-#define __P111 PAGE_COPY_X
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_X
-#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED_X
-#define __S111 PAGE_SHARED_X
-
-/* Permission masks used for kernel mappings */
-#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
-#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
- _PAGE_NO_CACHE)
-#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
- _PAGE_NO_CACHE | _PAGE_GUARDED)
-#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
-#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
-
-/* Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
- defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
-#define PAGE_AGP (PAGE_KERNEL_NC)
-
-#define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
-#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
+#define H_PMD_BAD_BITS (PTE_TABLE_SIZE-1)
+#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1)
#ifndef __ASSEMBLY__
-#define pmd_bad(pmd) (pmd_val(pmd) & PMD_BAD_BITS)
-#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS)
-
-#define pud_bad(pud) (pud_val(pud) & PUD_BAD_BITS)
-#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS)
-
-/* Pointers in the page table tree are physical addresses */
-#define __pgtable_ptr_val(ptr) __pa(ptr)
-
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
-#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
-#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
-#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
+#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
+#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
+static inline int hash__pgd_bad(pgd_t pgd)
+{
+ return (pgd_val(pgd) == 0);
+}
extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long pte, int huge);
extern unsigned long htab_convert_pte_flags(unsigned long pteflags);
/* Atomic PTE updates */
-static inline unsigned long pte_update(struct mm_struct *mm,
- unsigned long addr,
- pte_t *ptep, unsigned long clr,
- unsigned long set,
- int huge)
+static inline unsigned long hash__pte_update(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, unsigned long clr,
+ unsigned long set,
+ int huge)
{
- unsigned long old, tmp;
+ __be64 old_be, tmp_be;
+ unsigned long old;
__asm__ __volatile__(
"1: ldarx %0,0,%3 # pte_update\n\
- andi. %1,%0,%6\n\
+ and. %1,%0,%6\n\
bne- 1b \n\
andc %1,%0,%4 \n\
or %1,%1,%7\n\
stdcx. %1,0,%3 \n\
bne- 1b"
- : "=&r" (old), "=&r" (tmp), "=m" (*ptep)
- : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
+ : "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
+ : "r" (ptep), "r" (cpu_to_be64(clr)), "m" (*ptep),
+ "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
: "cc" );
/* huge pages use the old page table lock */
if (!huge)
assert_pte_locked(mm, addr);
- if (old & _PAGE_HASHPTE)
+ old = be64_to_cpu(old_be);
+ if (old & H_PAGE_HASHPTE)
hpte_need_flush(mm, addr, ptep, old, huge);
return old;
}
-static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- unsigned long old;
-
- if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
- return 0;
- old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
- return (old & _PAGE_ACCESSED) != 0;
-}
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
-({ \
- int __r; \
- __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
- __r; \
-})
-
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
-
- if ((pte_val(*ptep) & _PAGE_RW) == 0)
- return;
-
- pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- if ((pte_val(*ptep) & _PAGE_RW) == 0)
- return;
-
- pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
-}
-
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define ptep_clear_flush_young(__vma, __address, __ptep) \
-({ \
- int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
- __ptep); \
- __young; \
-})
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
- return __pte(old);
-}
-
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t * ptep)
-{
- pte_update(mm, addr, ptep, ~0UL, 0, 0);
-}
-
-
/* Set the dirty and/or accessed bits atomically in a linux PTE, this
* function doesn't need to flush the hash entry
*/
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+static inline void hash__ptep_set_access_flags(pte_t *ptep, pte_t entry)
{
- unsigned long bits = pte_val(entry) &
- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC |
- _PAGE_SOFT_DIRTY);
+ __be64 old, tmp, val, mask;
- unsigned long old, tmp;
+ mask = cpu_to_be64(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_READ | _PAGE_WRITE |
+ _PAGE_EXEC | _PAGE_SOFT_DIRTY);
+
+ val = pte_raw(entry) & mask;
__asm__ __volatile__(
"1: ldarx %0,0,%4\n\
- andi. %1,%0,%6\n\
+ and. %1,%0,%6\n\
bne- 1b \n\
or %0,%3,%0\n\
stdcx. %0,0,%4\n\
bne- 1b"
:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
- :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
+ :"r" (val), "r" (ptep), "m" (*ptep), "r" (cpu_to_be64(H_PAGE_BUSY))
:"cc");
}
-static inline int pgd_bad(pgd_t pgd)
-{
- return (pgd_val(pgd) == 0);
-}
-
-#define __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
-{
- return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS);
-}
-
-
-/* Generic accessors to PTE bits */
-static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
-static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); }
-static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); }
-static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); }
-static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
-
-#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
-static inline bool pte_soft_dirty(pte_t pte)
-{
- return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
-}
-static inline pte_t pte_mksoft_dirty(pte_t pte)
-{
- return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
-}
-
-static inline pte_t pte_clear_soft_dirty(pte_t pte)
-{
- return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
-}
-#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
-
-#ifdef CONFIG_NUMA_BALANCING
-/*
- * These work without NUMA balancing but the kernel does not care. See the
- * comment in include/asm-generic/pgtable.h . On powerpc, this will only
- * work for user pages and always return true for kernel pages.
- */
-static inline int pte_protnone(pte_t pte)
-{
- return (pte_val(pte) &
- (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
-static inline int pte_present(pte_t pte)
-{
- return !!(pte_val(pte) & _PAGE_PRESENT);
-}
-
-/* Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- *
- * Even if PTEs can be unsigned long long, a PFN is always an unsigned
- * long for now.
- */
-static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
-{
- return __pte((((pte_basic_t)(pfn) << PTE_RPN_SHIFT) & PTE_RPN_MASK) |
- pgprot_val(pgprot));
-}
-
-static inline unsigned long pte_pfn(pte_t pte)
-{
- return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
-}
-
-/* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte)
-{
- return __pte(pte_val(pte) & ~_PAGE_RW);
-}
-
-static inline pte_t pte_mkclean(pte_t pte)
-{
- return __pte(pte_val(pte) & ~_PAGE_DIRTY);
-}
-
-static inline pte_t pte_mkold(pte_t pte)
-{
- return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
-}
-
-static inline pte_t pte_mkwrite(pte_t pte)
-{
- return __pte(pte_val(pte) | _PAGE_RW);
-}
-
-static inline pte_t pte_mkdirty(pte_t pte)
+static inline int hash__pte_same(pte_t pte_a, pte_t pte_b)
{
- return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+ return (((pte_raw(pte_a) ^ pte_raw(pte_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
}
-static inline pte_t pte_mkyoung(pte_t pte)
+static inline int hash__pte_none(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_ACCESSED);
-}
-
-static inline pte_t pte_mkspecial(pte_t pte)
-{
- return __pte(pte_val(pte) | _PAGE_SPECIAL);
-}
-
-static inline pte_t pte_mkhuge(pte_t pte)
-{
- return pte;
-}
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
- return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+ return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0;
}
/* This low level function performs the actual PTE insertion
* an horrible mess that I'm not going to try to clean up now but
* I'm keeping it in one place rather than spread around
*/
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte, int percpu)
+static inline void hash__set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, int percpu)
{
/*
* Anything else just stores the PTE normally. That covers all 64-bit
*ptep = pte;
}
-/*
- * Macro to mark a page protection value as "uncacheable".
- */
-
-#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
- _PAGE_WRITETHRU)
-
-#define pgprot_noncached pgprot_noncached
-static inline pgprot_t pgprot_noncached(pgprot_t prot)
-{
- return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
- _PAGE_NO_CACHE | _PAGE_GUARDED);
-}
-
-#define pgprot_noncached_wc pgprot_noncached_wc
-static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
-{
- return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
- _PAGE_NO_CACHE);
-}
-
-#define pgprot_cached pgprot_cached
-static inline pgprot_t pgprot_cached(pgprot_t prot)
-{
- return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
- _PAGE_COHERENT);
-}
-
-#define pgprot_cached_wthru pgprot_cached_wthru
-static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
-{
- return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
- _PAGE_COHERENT | _PAGE_WRITETHRU);
-}
-
-#define pgprot_cached_noncoherent pgprot_cached_noncoherent
-static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
-{
- return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
-}
-
-#define pgprot_writecombine pgprot_writecombine
-static inline pgprot_t pgprot_writecombine(pgprot_t prot)
-{
- return pgprot_noncached_wc(prot);
-}
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long old_pmd);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+extern int hash__map_kernel_page(unsigned long ea, unsigned long pa,
+ unsigned long flags);
+extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys);
+extern void hash__vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size);
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
--- /dev/null
+#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H
+#define _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H
+/*
+ * For radix we want generic code to handle hugetlb. But then if we want
+ * both hash and radix to be enabled together we need to workaround the
+ * limitations.
+ */
+void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern unsigned long
+radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
+#endif
-#ifndef _ASM_POWERPC_MMU_HASH64_H_
-#define _ASM_POWERPC_MMU_HASH64_H_
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
/*
* PowerPC64 memory management structures
*
#define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002)
#define HPTE_V_VALID ASM_CONST(0x0000000000000001)
+/*
+ * ISA 3.0 have a different HPTE format.
+ */
+#define HPTE_R_3_0_SSIZE_SHIFT 58
#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
#define HPTE_R_TS ASM_CONST(0x4000000000000000)
#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
#define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */
#define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */
+#define POWER9_TLB_SETS_RADIX 128 /* # sets in POWER9 TLB Radix mode */
#ifndef __ASSEMBLY__
extern unsigned long htab_size_bytes;
extern unsigned long htab_hash_mask;
-/*
- * Page size definition
- *
- * shift : is the "PAGE_SHIFT" value for that page size
- * sllp : is a bit mask with the value of SLB L || LP to be or'ed
- * directly to a slbmte "vsid" value
- * penc : is the HPTE encoding mask for the "LP" field:
- *
- */
-struct mmu_psize_def
-{
- unsigned int shift; /* number of bits */
- int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
- unsigned int tlbiel; /* tlbiel supported for that page size */
- unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
- unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
-};
-extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
static inline int shift_to_mmu_psize(unsigned int shift)
{
/*
* The current system page and segment sizes
*/
-extern int mmu_linear_psize;
-extern int mmu_virtual_psize;
-extern int mmu_vmalloc_psize;
-extern int mmu_vmemmap_psize;
-extern int mmu_io_psize;
extern int mmu_kernel_ssize;
extern int mmu_highuser_ssize;
extern u16 mmu_slb_size;
*/
v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
v <<= HPTE_V_AVPN_SHIFT;
- v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
return v;
}
* aligned for the requested page size
*/
static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
- int actual_psize)
+ int actual_psize, int ssize)
{
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
+
/* A 4K page needs no special encoding */
if (actual_psize == MMU_PAGE_4K)
return pa & HPTE_R_RPN;
add rt,rt,rx
/* 4 bits per slice and we have one slice per 1TB */
-#define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41)
+#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
#ifndef __ASSEMBLY__
static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
#endif /* CONFIG_PPC_SUBPAGE_PROT */
-typedef unsigned long mm_context_id_t;
-struct spinlock;
-
-typedef struct {
- mm_context_id_t id;
- u16 user_psize; /* page size index */
-
-#ifdef CONFIG_PPC_MM_SLICES
- u64 low_slices_psize; /* SLB page size encodings */
- unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
-#else
- u16 sllp; /* SLB page size encoding */
-#endif
- unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
- struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
-#ifdef CONFIG_PPC_ICSWX
- struct spinlock *cop_lockp; /* guard acop and cop_pid */
- unsigned long acop; /* mask of enabled coprocessor types */
- unsigned int cop_pid; /* pid value used with coprocessors */
-#endif /* CONFIG_PPC_ICSWX */
-#ifdef CONFIG_PPC_64K_PAGES
- /* for 4K PTE fragment support */
- void *pte_frag;
-#endif
-#ifdef CONFIG_SPAPR_TCE_IOMMU
- struct list_head iommu_group_mem_list;
-#endif
-} mm_context_t;
-
-
#if 0
/*
* The code below is equivalent to this function for arguments
/*
* Bad address. We return VSID 0 for that
*/
- if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
+ if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
return 0;
if (ssize == MMU_SEGSIZE_256M)
#endif /* __ASSEMBLY__ */
-#endif /* _ASM_POWERPC_MMU_HASH64_H_ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */
--- /dev/null
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_H_
+
+#ifndef __ASSEMBLY__
+/*
+ * Page size definition
+ *
+ * shift : is the "PAGE_SHIFT" value for that page size
+ * sllp : is a bit mask with the value of SLB L || LP to be or'ed
+ * directly to a slbmte "vsid" value
+ * penc : is the HPTE encoding mask for the "LP" field:
+ *
+ */
+struct mmu_psize_def {
+ unsigned int shift; /* number of bits */
+ int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
+ unsigned int tlbiel; /* tlbiel supported for that page size */
+ unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
+ union {
+ unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
+ unsigned long ap; /* Ap encoding used by PowerISA 3.0 */
+ };
+};
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+#define radix_enabled() mmu_has_feature(MMU_FTR_RADIX)
+
+#endif /* __ASSEMBLY__ */
+
+/* 64-bit classic hash table MMU */
+#include <asm/book3s/64/mmu-hash.h>
+
+#ifndef __ASSEMBLY__
+/*
+ * ISA 3.0 partiton and process table entry format
+ */
+struct prtb_entry {
+ __be64 prtb0;
+ __be64 prtb1;
+};
+extern struct prtb_entry *process_tb;
+
+struct patb_entry {
+ __be64 patb0;
+ __be64 patb1;
+};
+extern struct patb_entry *partition_tb;
+
+#define PATB_HR (1UL << 63)
+#define PATB_GR (1UL << 63)
+#define RPDB_MASK 0x0ffffffffffff00fUL
+#define RPDB_SHIFT (1UL << 8)
+/*
+ * Limit process table to PAGE_SIZE table. This
+ * also limit the max pid we can support.
+ * MAX_USER_CONTEXT * 16 bytes of space.
+ */
+#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4)
+/*
+ * Power9 currently only support 64K partition table size.
+ */
+#define PATB_SIZE_SHIFT 16
+
+typedef unsigned long mm_context_id_t;
+struct spinlock;
+
+typedef struct {
+ mm_context_id_t id;
+ u16 user_psize; /* page size index */
+
+#ifdef CONFIG_PPC_MM_SLICES
+ u64 low_slices_psize; /* SLB page size encodings */
+ unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+#else
+ u16 sllp; /* SLB page size encoding */
+#endif
+ unsigned long vdso_base;
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ struct subpage_prot_table spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+#ifdef CONFIG_PPC_ICSWX
+ struct spinlock *cop_lockp; /* guard acop and cop_pid */
+ unsigned long acop; /* mask of enabled coprocessor types */
+ unsigned int cop_pid; /* pid value used with coprocessors */
+#endif /* CONFIG_PPC_ICSWX */
+#ifdef CONFIG_PPC_64K_PAGES
+ /* for 4K PTE fragment support */
+ void *pte_frag;
+#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ struct list_head iommu_group_mem_list;
+#endif
+} mm_context_t;
+
+/*
+ * The current system page and segment sizes
+ */
+extern int mmu_linear_psize;
+extern int mmu_virtual_psize;
+extern int mmu_vmalloc_psize;
+extern int mmu_vmemmap_psize;
+extern int mmu_io_psize;
+
+/* MMU initialization */
+extern void radix_init_native(void);
+extern void hash__early_init_mmu(void);
+extern void radix__early_init_mmu(void);
+static inline void early_init_mmu(void)
+{
+ if (radix_enabled())
+ return radix__early_init_mmu();
+ return hash__early_init_mmu();
+}
+extern void hash__early_init_mmu_secondary(void);
+extern void radix__early_init_mmu_secondary(void);
+static inline void early_init_mmu_secondary(void)
+{
+ if (radix_enabled())
+ return radix__early_init_mmu_secondary();
+ return hash__early_init_mmu_secondary();
+}
+
+extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size);
+extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size);
+static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+{
+ if (radix_enabled())
+ return radix__setup_initial_memory_limit(first_memblock_base,
+ first_memblock_size);
+ return hash__setup_initial_memory_limit(first_memblock_base,
+ first_memblock_size);
+}
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
--- /dev/null
+#ifndef _ASM_POWERPC_BOOK3S_64_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_64_PGALLOC_H
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+
+struct vmemmap_backing {
+ struct vmemmap_backing *list;
+ unsigned long phys;
+ unsigned long virt_addr;
+};
+extern struct vmemmap_backing *vmemmap_list;
+
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation. For PTE pages (which are linked to a struct
+ * page for now, and drawn from the main get_free_pages() pool), the
+ * allocation size will be (2^index_size * sizeof(pointer)) and
+ * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer. In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value. This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE 0xf
+
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) ({ \
+ BUG_ON(!(shift)); \
+ pgtable_cache[(shift) - 1]; \
+ })
+
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+
+extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern void pte_fragment_free(unsigned long *, int);
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+#ifdef CONFIG_SMP
+extern void __tlb_remove_table(void *_table);
+#endif
+
+static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+ return (pgd_t *)__get_free_page(PGALLOC_GFP);
+#else
+ struct page *page;
+ page = alloc_pages(PGALLOC_GFP, 4);
+ if (!page)
+ return NULL;
+ return (pgd_t *) page_address(page);
+#endif
+}
+
+static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+ free_page((unsigned long)pgd);
+#else
+ free_pages((unsigned long)pgd, 4);
+#endif
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ if (radix_enabled())
+ return radix__pgd_alloc(mm);
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ if (radix_enabled())
+ return radix__pgd_free(mm, pgd);
+ kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+ pgd_set(pgd, __pgtable_ptr_val(pud) | PGD_VAL_BITS);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ pud_set(pud, __pgtable_ptr_val(pmd) | PUD_VAL_BITS);
+}
+
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long address)
+{
+ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+ kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
+}
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long address)
+{
+ return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX);
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *pte)
+{
+ pmd_set(pmd, __pgtable_ptr_val(pte) | PMD_VAL_BITS);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+ pgtable_t pte_page)
+{
+ pmd_set(pmd, __pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
+}
+
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+ return (pgtable_t)pmd_page_vaddr(pmd);
+}
+
+#ifdef CONFIG_PPC_4K_PAGES
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+{
+ return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ struct page *page;
+ pte_t *pte;
+
+ pte = pte_alloc_one_kernel(mm, address);
+ if (!pte)
+ return NULL;
+ page = virt_to_page(pte);
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ return pte;
+}
+#else /* if CONFIG_PPC_64K_PAGES */
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+{
+ return (pte_t *)pte_fragment_alloc(mm, address, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+}
+#endif
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ pte_fragment_free((unsigned long *)pte, 1);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+ pte_fragment_free((unsigned long *)ptepage, 0);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ tlb_flush_pgtable(tlb, address);
+ pgtable_free_tlb(tlb, table, 0);
+}
+
+#define check_pgt_cache() do { } while (0)
+
+#endif /* _ASM_POWERPC_BOOK3S_64_PGALLOC_H */
--- /dev/null
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H
+/*
+ * hash 4k can't share hugetlb and also doesn't support THP
+ */
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_HUGETLB_PAGE
+static inline int pmd_huge(pmd_t pmd)
+{
+ /*
+ * leaf pte for huge page
+ */
+ if (radix_enabled())
+ return !!(pmd_val(pmd) & _PAGE_PTE);
+ return 0;
+}
+
+static inline int pud_huge(pud_t pud)
+{
+ /*
+ * leaf pte for huge page
+ */
+ if (radix_enabled())
+ return !!(pud_val(pud) & _PAGE_PTE);
+ return 0;
+}
+
+static inline int pgd_huge(pgd_t pgd)
+{
+ /*
+ * leaf pte for huge page
+ */
+ if (radix_enabled())
+ return !!(pgd_val(pgd) & _PAGE_PTE);
+ return 0;
+}
+#define pgd_huge pgd_huge
+/*
+ * With radix , we have hugepage ptes in the pud and pmd entries. We don't
+ * need to setup hugepage directory for them. Our pte and page directory format
+ * enable us to have this enabled.
+ */
+static inline int hugepd_ok(hugepd_t hpd)
+{
+ if (radix_enabled())
+ return 0;
+ return hash__hugepd_ok(hpd);
+}
+#define is_hugepd(hpd) (hugepd_ok(hpd))
+#endif /* CONFIG_HUGETLB_PAGE */
+#endif /* __ASSEMBLY__ */
+
+#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */
--- /dev/null
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
+ * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
+ *
+ * Defined in such a way that we can optimize away code block at build time
+ * if CONFIG_HUGETLB_PAGE=n.
+ */
+static inline int pmd_huge(pmd_t pmd)
+{
+ /*
+ * leaf pte for huge page
+ */
+ return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
+static inline int pud_huge(pud_t pud)
+{
+ /*
+ * leaf pte for huge page
+ */
+ return !!(pud_val(pud) & _PAGE_PTE);
+}
+
+static inline int pgd_huge(pgd_t pgd)
+{
+ /*
+ * leaf pte for huge page
+ */
+ return !!(pgd_val(pgd) & _PAGE_PTE);
+}
+#define pgd_huge pgd_huge
+
+#ifdef CONFIG_DEBUG_VM
+extern int hugepd_ok(hugepd_t hpd);
+#define is_hugepd(hpd) (hugepd_ok(hpd))
+#else
+/*
+ * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
+ * need to setup hugepage directory for them. Our pte and page directory format
+ * enable us to have this enabled.
+ */
+static inline int hugepd_ok(hugepd_t hpd)
+{
+ return 0;
+}
+#define is_hugepd(pdep) 0
+#endif /* CONFIG_DEBUG_VM */
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, pgprot_t prot)
+{
+ if (radix_enabled())
+ BUG();
+ return hash__remap_4k_pfn(vma, addr, pfn, prot);
+}
+#endif /* __ASSEMBLY__ */
+#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H */
#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
#define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
+
+/*
+ * Common bits between hash and Radix page table
+ */
+#define _PAGE_BIT_SWAP_TYPE 0
+
+#define _PAGE_EXEC 0x00001 /* execute permission */
+#define _PAGE_WRITE 0x00002 /* write access allowed */
+#define _PAGE_READ 0x00004 /* read access allowed */
+#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
+#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
+#define _PAGE_PRIVILEGED 0x00008 /* kernel access only */
+#define _PAGE_SAO 0x00010 /* Strong access order */
+#define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */
+#define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */
+#define _PAGE_DIRTY 0x00080 /* C: page changed */
+#define _PAGE_ACCESSED 0x00100 /* R: page referenced */
/*
- * This file contains the functions and defines necessary to modify and use
- * the ppc64 hashed page table.
+ * Software bits
*/
+#define _RPAGE_SW0 0x2000000000000000UL
+#define _RPAGE_SW1 0x00800
+#define _RPAGE_SW2 0x00400
+#define _RPAGE_SW3 0x00200
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */
+#else
+#define _PAGE_SOFT_DIRTY 0x00000
+#endif
+#define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */
+
+
+#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */
+#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */
+/*
+ * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
+ * Instead of fixing all of them, add an alternate define which
+ * maps CI pte mapping.
+ */
+#define _PAGE_NO_CACHE _PAGE_TOLERANT
+/*
+ * We support 57 bit real address in pte. Clear everything above 57, and
+ * every thing below PAGE_SHIFT;
+ */
+#define PTE_RPN_MASK (((1UL << 57) - 1) & (PAGE_MASK))
+/*
+ * set of bits not changed in pmd_modify. Even though we have hash specific bits
+ * in here, on radix we expect them to be zero.
+ */
+#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+ _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
+ _PAGE_SOFT_DIRTY)
+/*
+ * user access blocked by key
+ */
+#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ)
+#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \
+ _PAGE_RW | _PAGE_EXEC)
+/*
+ * No page size encoding in the linux PTE
+ */
+#define _PAGE_PSIZE 0
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
+ _PAGE_SOFT_DIRTY)
+/*
+ * Mask of bits returned by pte_pgprot()
+ */
+#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
+ H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
+ _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \
+ _PAGE_SOFT_DIRTY)
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE (_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table,
+ *
+ * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
+ *
+ * Write permissions imply read permissions for now (we could make write-only
+ * pages on BookE but we don't bother for now). Execute permission control is
+ * possible on platforms that define _PAGE_EXEC
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
+ */
+#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
+#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW)
+#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
+#define __P101 PAGE_READONLY_X
+#define __P110 PAGE_COPY_X
+#define __P111 PAGE_COPY_X
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
+#define __S101 PAGE_READONLY_X
+#define __S110 PAGE_SHARED_X
+#define __S111 PAGE_SHARED_X
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+ _PAGE_TOLERANT)
+#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+ _PAGE_NON_IDEMPOTENT)
+#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/*
+ * Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \
+ defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
+#define PAGE_AGP (PAGE_KERNEL_NC)
+
+#ifndef __ASSEMBLY__
+/*
+ * page table defines
+ */
+extern unsigned long __pte_index_size;
+extern unsigned long __pmd_index_size;
+extern unsigned long __pud_index_size;
+extern unsigned long __pgd_index_size;
+extern unsigned long __pmd_cache_index;
+#define PTE_INDEX_SIZE __pte_index_size
+#define PMD_INDEX_SIZE __pmd_index_size
+#define PUD_INDEX_SIZE __pud_index_size
+#define PGD_INDEX_SIZE __pgd_index_size
+#define PMD_CACHE_INDEX __pmd_cache_index
+/*
+ * Because of use of pte fragments and THP, size of page table
+ * are not always derived out of index size above.
+ */
+extern unsigned long __pte_table_size;
+extern unsigned long __pmd_table_size;
+extern unsigned long __pud_table_size;
+extern unsigned long __pgd_table_size;
+#define PTE_TABLE_SIZE __pte_table_size
+#define PMD_TABLE_SIZE __pmd_table_size
+#define PUD_TABLE_SIZE __pud_table_size
+#define PGD_TABLE_SIZE __pgd_table_size
+
+extern unsigned long __pmd_val_bits;
+extern unsigned long __pud_val_bits;
+extern unsigned long __pgd_val_bits;
+#define PMD_VAL_BITS __pmd_val_bits
+#define PUD_VAL_BITS __pud_val_bits
+#define PGD_VAL_BITS __pgd_val_bits
+
+extern unsigned long __pte_frag_nr;
+#define PTE_FRAG_NR __pte_frag_nr
+extern unsigned long __pte_frag_size_shift;
+#define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift
+#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+/*
+ * Pgtable size used by swapper, init in asm code
+ */
+#define MAX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
+
+#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE)
+#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS 0xc0000000000000ffUL
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS 0xc0000000000000ffUL
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS 0xc0000000000000ffUL
+
+extern unsigned long __vmalloc_start;
+extern unsigned long __vmalloc_end;
+#define VMALLOC_START __vmalloc_start
+#define VMALLOC_END __vmalloc_end
+
+extern unsigned long __kernel_virt_start;
+extern unsigned long __kernel_virt_size;
+#define KERN_VIRT_START __kernel_virt_start
+#define KERN_VIRT_SIZE __kernel_virt_size
+extern struct page *vmemmap;
+extern unsigned long ioremap_bot;
+#endif /* __ASSEMBLY__ */
#include <asm/book3s/64/hash.h>
-#include <asm/barrier.h>
+#include <asm/book3s/64/radix.h>
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/pgtable-64k.h>
+#else
+#include <asm/book3s/64/pgtable-4k.h>
+#endif
+
+#include <asm/barrier.h>
/*
* The second half of the kernel virtual space is used for IO mappings,
* it's itself carved into the PIO region (ISA and PHB IO space) and
#define IOREMAP_BASE (PHB_IO_END)
#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
-#define vmemmap ((struct page *)VMEMMAP_BASE)
-
/* Advertise special mapping type for AGP */
#define HAVE_PAGE_AGP
#define __real_pte(e,p) ((real_pte_t){(e)})
#define __rpte_to_pte(r) ((r).pte)
-#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT)
+#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
do { \
#endif /* __real_pte */
+static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long clr,
+ unsigned long set, int huge)
+{
+ if (radix_enabled())
+ return radix__pte_update(mm, addr, ptep, clr, set, huge);
+ return hash__pte_update(mm, addr, ptep, clr, set, huge);
+}
+/*
+ * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update.
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
+ * function for both hash and radix.
+ */
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old;
+
+ if ((pte_val(*ptep) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+ return 0;
+ old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+ return (old & _PAGE_ACCESSED) != 0;
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+({ \
+ int __r; \
+ __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+ __r; \
+})
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+
+ if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
+ return;
+
+ pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ if ((pte_val(*ptep) & _PAGE_WRITE) == 0)
+ return;
+
+ pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
+ return __pte(old);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t * ptep)
+{
+ pte_update(mm, addr, ptep, ~0UL, 0, 0);
+}
+static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_WRITE);}
+static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); }
+static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); }
+static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); }
+static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pte_soft_dirty(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
+}
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * These work without NUMA balancing but the kernel does not care. See the
+ * comment in include/asm-generic/pgtable.h . On powerpc, this will only
+ * work for user pages and always return true for kernel pages.
+ */
+static inline int pte_protnone(pte_t pte)
+{
+ return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) ==
+ (_PAGE_PRESENT | _PAGE_PRIVILEGED);
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline int pte_present(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_PRESENT);
+}
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+ return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) |
+ pgprot_val(pgprot));
+}
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+ return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
+}
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_WRITE);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ /*
+ * write implies read, hence set both
+ */
+ return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ return pte;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ /* FIXME!! check whether this need to be a conditional */
+ return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+static inline bool pte_user(pte_t pte)
+{
+ return !(pte_val(pte) & _PAGE_PRIVILEGED);
+}
+
+/* Encode and de-code a swap entry */
+#define MAX_SWAPFILES_CHECK() do { \
+ BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+ /* \
+ * Don't have overlapping bits with _PAGE_HPTEFLAGS \
+ * We filter HPTEFLAGS on set_pte. \
+ */ \
+ BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
+ BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \
+ } while (0)
+/*
+ * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
+ */
+#define SWP_TYPE_BITS 5
+#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
+ & ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+ ((type) << _PAGE_BIT_SWAP_TYPE) \
+ | (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
+/*
+ * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
+ * swap type and offset we get from swap and convert that to pte to find a
+ * matching pte in linux page table.
+ * Clear bits not found in swap entries here.
+ */
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
+#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE)
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
+#else
+#define _PAGE_SWP_SOFT_DIRTY 0UL
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+}
+static inline bool pte_swp_soft_dirty(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
+}
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+static inline bool check_pte_access(unsigned long access, unsigned long ptev)
+{
+ /*
+ * This check for _PAGE_RWX and _PAGE_PRESENT bits
+ */
+ if (access & ~ptev)
+ return false;
+ /*
+ * This check for access to privilege space
+ */
+ if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
+ return false;
+
+ return true;
+}
+/*
+ * Generic functions with hash/radix callbacks
+ */
+
+static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+ if (radix_enabled())
+ return radix__ptep_set_access_flags(ptep, entry);
+ return hash__ptep_set_access_flags(ptep, entry);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+ if (radix_enabled())
+ return radix__pte_same(pte_a, pte_b);
+ return hash__pte_same(pte_a, pte_b);
+}
+
+static inline int pte_none(pte_t pte)
+{
+ if (radix_enabled())
+ return radix__pte_none(pte);
+ return hash__pte_none(pte);
+}
+
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, int percpu)
+{
+ if (radix_enabled())
+ return radix__set_pte_at(mm, addr, ptep, pte, percpu);
+ return hash__set_pte_at(mm, addr, ptep, pte, percpu);
+}
+
+#define _PAGE_CACHE_CTL (_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
+
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+ _PAGE_NON_IDEMPOTENT);
+}
+
+#define pgprot_noncached_wc pgprot_noncached_wc
+static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+ _PAGE_TOLERANT);
+}
+
+#define pgprot_cached pgprot_cached
+static inline pgprot_t pgprot_cached(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+ return pgprot_noncached_wc(prot);
+}
+/*
+ * check a pte mapping have cache inhibited property
+ */
+static inline bool pte_ci(pte_t pte)
+{
+ unsigned long pte_v = pte_val(pte);
+
+ if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
+ ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
+ return true;
+ return false;
+}
+
static inline void pmd_set(pmd_t *pmdp, unsigned long val)
{
*pmdp = __pmd(val);
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_present(pmd) (!pmd_none(pmd))
+static inline int pmd_bad(pmd_t pmd)
+{
+ if (radix_enabled())
+ return radix__pmd_bad(pmd);
+ return hash__pmd_bad(pmd);
+}
+
static inline void pud_set(pud_t *pudp, unsigned long val)
{
*pudp = __pud(val);
return __pud(pte_val(pte));
}
#define pud_write(pud) pte_write(pud_pte(pud))
+
+static inline int pud_bad(pud_t pud)
+{
+ if (radix_enabled())
+ return radix__pud_bad(pud);
+ return hash__pud_bad(pud);
+}
+
+
#define pgd_write(pgd) pte_write(pgd_pte(pgd))
static inline void pgd_set(pgd_t *pgdp, unsigned long val)
{
return __pgd(pte_val(pte));
}
+static inline int pgd_bad(pgd_t pgd)
+{
+ if (radix_enabled())
+ return radix__pgd_bad(pgd);
+ return hash__pgd_bad(pgd);
+}
+
extern struct page *pgd_page(pgd_t pgd);
+/* Pointers in the page table tree are physical addresses */
+#define __pgtable_ptr_val(ptr) __pa(ptr)
+
+#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS)
+#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS)
+#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS)
+
+#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
+#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
+#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
+#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
+
/*
* Find an entry in a page-table-directory. We combine the address region
* (the high order N bits) and the pgd portion of the address.
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-/* Encode and de-code a swap entry */
-#define MAX_SWAPFILES_CHECK() do { \
- BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
- /* \
- * Don't have overlapping bits with _PAGE_HPTEFLAGS \
- * We filter HPTEFLAGS on set_pte. \
- */ \
- BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
- BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \
- } while (0)
-/*
- * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
- */
-#define SWP_TYPE_BITS 5
-#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
- & ((1UL << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PTE_RPN_SHIFT)
-#define __swp_entry(type, offset) ((swp_entry_t) { \
- ((type) << _PAGE_BIT_SWAP_TYPE) \
- | (((offset) << PTE_RPN_SHIFT) & PTE_RPN_MASK)})
-/*
- * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
- * swap type and offset we get from swap and convert that to pte to find a
- * matching pte in linux page table.
- * Clear bits not found in swap entries here.
- */
-#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
-#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE)
-
-#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
-#else
-#define _PAGE_SWP_SOFT_DIRTY 0UL
-#endif /* CONFIG_MEM_SOFT_DIRTY */
+void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
+void pgtable_cache_init(void);
-#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
-static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+static inline int map_kernel_page(unsigned long ea, unsigned long pa,
+ unsigned long flags)
{
- return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+ if (radix_enabled()) {
+#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
+ unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
+ WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
+#endif
+ return radix__map_kernel_page(ea, pa, __pgprot(flags), PAGE_SIZE);
+ }
+ return hash__map_kernel_page(ea, pa, flags);
}
-static inline bool pte_swp_soft_dirty(pte_t pte)
+
+static inline int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
{
- return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
+ if (radix_enabled())
+ return radix__vmemmap_create_mapping(start, page_size, phys);
+ return hash__vmemmap_create_mapping(start, page_size, phys);
}
-static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline void vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size)
{
- return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
+ if (radix_enabled())
+ return radix__vmemmap_remove_mapping(start, page_size);
+ return hash__vmemmap_remove_mapping(start, page_size);
}
-#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
-
-void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
-void pgtable_cache_init(void);
-
+#endif
struct page *realmode_pfn_to_page(unsigned long pfn);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
-extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
-extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
-extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd);
-extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd);
-extern int has_transparent_hugepage(void);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-
static inline pte_t pmd_pte(pmd_t pmd)
{
return __pte(pmd_val(pmd));
{
return (pte_t *)pmd;
}
-
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd);
+extern int hash__has_transparent_hugepage(void);
+static inline int has_transparent_hugepage(void)
+{
+ if (radix_enabled())
+ return radix__has_transparent_hugepage();
+ return hash__has_transparent_hugepage();
+}
+#define has_transparent_hugepage has_transparent_hugepage
+
+static inline unsigned long
+pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
+ unsigned long clr, unsigned long set)
+{
+ if (radix_enabled())
+ return radix__pmd_hugepage_update(mm, addr, pmdp, clr, set);
+ return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+ return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
+}
+/*
+ * For radix we should always find H_PAGE_HASHPTE zero. Hence
+ * the below will work for radix too
+ */
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ unsigned long old;
+
+ if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+ return 0;
+ old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
+ return ((old & _PAGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp)
+{
+
+ if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0)
+ return;
+
+ pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ if (radix_enabled())
+ return radix__pmd_trans_huge(pmd);
+ return hash__pmd_trans_huge(pmd);
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+ if (radix_enabled())
+ return radix__pmd_same(pmd_a, pmd_b);
+ return hash__pmd_same(pmd_a, pmd_b);
+}
+
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
- return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_THP_HUGE));
+ if (radix_enabled())
+ return radix__pmd_mkhuge(pmd);
+ return hash__pmd_mkhuge(pmd);
}
#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
-extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp);
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (radix_enabled())
+ return radix__pmdp_huge_get_and_clear(mm, addr, pmdp);
+ return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
+}
-extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
+static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ if (radix_enabled())
+ return radix__pmdp_collapse_flush(vma, address, pmdp);
+ return hash__pmdp_collapse_flush(vma, address, pmdp);
+}
#define pmdp_collapse_flush pmdp_collapse_flush
#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable);
+static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
+ pmd_t *pmdp, pgtable_t pgtable)
+{
+ if (radix_enabled())
+ return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+ return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+}
+
#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
+ pmd_t *pmdp)
+{
+ if (radix_enabled())
+ return radix__pgtable_trans_huge_withdraw(mm, pmdp);
+ return hash__pgtable_trans_huge_withdraw(mm, pmdp);
+}
#define __HAVE_ARCH_PMDP_INVALIDATE
extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
-extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
+static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ if (radix_enabled())
+ return radix__pmdp_huge_split_prepare(vma, address, pmdp);
+ return hash__pmdp_huge_split_prepare(vma, address, pmdp);
+}
#define pmd_move_must_withdraw pmd_move_must_withdraw
struct spinlock;
static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
struct spinlock *old_pmd_ptl)
{
+ if (radix_enabled())
+ return false;
/*
* Archs like ppc64 use pgtable to store per pmd
* specific information. So when we switch the pmd,
*/
return true;
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
--- /dev/null
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_4K_H
+#define _ASM_POWERPC_PGTABLE_RADIX_4K_H
+
+/*
+ * For 4K page size supported index is 13/9/9/9
+ */
+#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */
+#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
+#define RADIX_PUD_INDEX_SIZE 9
+#define RADIX_PGD_INDEX_SIZE 13
+
+#endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */
--- /dev/null
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_64K_H
+#define _ASM_POWERPC_PGTABLE_RADIX_64K_H
+
+/*
+ * For 64K page size supported index is 13/9/9/5
+ */
+#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */
+#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
+#define RADIX_PUD_INDEX_SIZE 9
+#define RADIX_PGD_INDEX_SIZE 13
+
+#endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */
--- /dev/null
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_H
+#define _ASM_POWERPC_PGTABLE_RADIX_H
+
+#ifndef __ASSEMBLY__
+#include <asm/cmpxchg.h>
+#endif
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/radix-64k.h>
+#else
+#include <asm/book3s/64/radix-4k.h>
+#endif
+
+/* An empty PTE can still have a R or C writeback */
+#define RADIX_PTE_NONE_MASK (_PAGE_DIRTY | _PAGE_ACCESSED)
+
+/* Bits to set in a RPMD/RPUD/RPGD */
+#define RADIX_PMD_VAL_BITS (0x8000000000000000UL | RADIX_PTE_INDEX_SIZE)
+#define RADIX_PUD_VAL_BITS (0x8000000000000000UL | RADIX_PMD_INDEX_SIZE)
+#define RADIX_PGD_VAL_BITS (0x8000000000000000UL | RADIX_PUD_INDEX_SIZE)
+
+/* Don't have anything in the reserved bits and leaf bits */
+#define RADIX_PMD_BAD_BITS 0x60000000000000e0UL
+#define RADIX_PUD_BAD_BITS 0x60000000000000e0UL
+#define RADIX_PGD_BAD_BITS 0x60000000000000e0UL
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define RADIX_PGTABLE_EADDR_SIZE (RADIX_PTE_INDEX_SIZE + RADIX_PMD_INDEX_SIZE + \
+ RADIX_PUD_INDEX_SIZE + RADIX_PGD_INDEX_SIZE + PAGE_SHIFT)
+#define RADIX_PGTABLE_RANGE (ASM_CONST(1) << RADIX_PGTABLE_EADDR_SIZE)
+
+/*
+ * We support 52 bit address space, Use top bit for kernel
+ * virtual mapping. Also make sure kernel fit in the top
+ * quadrant.
+ *
+ * +------------------+
+ * +------------------+ Kernel virtual map (0xc008000000000000)
+ * | |
+ * | |
+ * | |
+ * 0b11......+------------------+ Kernel linear map (0xc....)
+ * | |
+ * | 2 quadrant |
+ * | |
+ * 0b10......+------------------+
+ * | |
+ * | 1 quadrant |
+ * | |
+ * 0b01......+------------------+
+ * | |
+ * | 0 quadrant |
+ * | |
+ * 0b00......+------------------+
+ *
+ *
+ * 3rd quadrant expanded:
+ * +------------------------------+
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel IO map end (0xc010000000000000)
+ * | |
+ * | |
+ * | 1/2 of virtual map |
+ * | |
+ * | |
+ * +------------------------------+ Kernel IO map start
+ * | |
+ * | 1/4 of virtual map |
+ * | |
+ * +------------------------------+ Kernel vmemap start
+ * | |
+ * | 1/4 of virtual map |
+ * | |
+ * +------------------------------+ Kernel virt start (0xc008000000000000)
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel linear (0xc.....)
+ */
+
+#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
+#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000)
+
+/*
+ * The vmalloc space starts at the beginning of that region, and
+ * occupies a quarter of it on radix config.
+ * (we keep a quarter for the virtual memmap)
+ */
+#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START
+#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2)
+#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)
+/*
+ * Defines the address of the vmemap area, in its own region on
+ * hash table CPUs.
+ */
+#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END)
+
+#ifndef __ASSEMBLY__
+#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
+#define RADIX_PMD_TABLE_SIZE (sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE)
+#define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
+#define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
+
+static inline unsigned long radix__pte_update(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, unsigned long clr,
+ unsigned long set,
+ int huge)
+{
+ pte_t pte;
+ unsigned long old_pte, new_pte;
+
+ do {
+ pte = READ_ONCE(*ptep);
+ old_pte = pte_val(pte);
+ new_pte = (old_pte | set) & ~clr;
+
+ } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+ /* We already do a sync in cmpxchg, is ptesync needed ?*/
+ asm volatile("ptesync" : : : "memory");
+ /* huge pages use the old page table lock */
+ if (!huge)
+ assert_pte_locked(mm, addr);
+
+ return old_pte;
+}
+
+/*
+ * Set the dirty and/or accessed bits atomically in a linux PTE, this
+ * function doesn't need to invalidate tlb.
+ */
+static inline void radix__ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+ pte_t pte;
+ unsigned long old_pte, new_pte;
+ unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
+ _PAGE_RW | _PAGE_EXEC);
+ do {
+ pte = READ_ONCE(*ptep);
+ old_pte = pte_val(pte);
+ new_pte = old_pte | set;
+
+ } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+ /* We already do a sync in cmpxchg, is ptesync needed ?*/
+ asm volatile("ptesync" : : : "memory");
+}
+
+static inline int radix__pte_same(pte_t pte_a, pte_t pte_b)
+{
+ return ((pte_raw(pte_a) ^ pte_raw(pte_b)) == 0);
+}
+
+static inline int radix__pte_none(pte_t pte)
+{
+ return (pte_val(pte) & ~RADIX_PTE_NONE_MASK) == 0;
+}
+
+static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, int percpu)
+{
+ *ptep = pte;
+ asm volatile("ptesync" : : : "memory");
+}
+
+static inline int radix__pmd_bad(pmd_t pmd)
+{
+ return !!(pmd_val(pmd) & RADIX_PMD_BAD_BITS);
+}
+
+static inline int radix__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+ return ((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) == 0);
+}
+
+static inline int radix__pud_bad(pud_t pud)
+{
+ return !!(pud_val(pud) & RADIX_PUD_BAD_BITS);
+}
+
+
+static inline int radix__pgd_bad(pgd_t pgd)
+{
+ return !!(pgd_val(pgd) & RADIX_PGD_BAD_BITS);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+static inline int radix__pmd_trans_huge(pmd_t pmd)
+{
+ return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
+static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) | _PAGE_PTE);
+}
+static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ /* Nothing to do for radix. */
+ return;
+}
+
+extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, unsigned long clr,
+ unsigned long set);
+extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp);
+extern int radix__has_transparent_hugepage(void);
+#endif
+
+extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys);
+extern void radix__vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size);
+
+extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags, unsigned int psz);
+#endif /* __ASSEMBLY__ */
+#endif
#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
-#define MMU_NO_CONTEXT 0
-
/*
* TLB flushing for 64-bit hash-MMU CPUs
*/
static inline void arch_enter_lazy_mmu_mode(void)
{
- struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch;
+ if (radix_enabled())
+ return;
+ batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
static inline void arch_leave_lazy_mmu_mode(void)
{
- struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch;
+
+ if (radix_enabled())
+ return;
+ batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->index)
__flush_tlb_pending(batch);
extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
pmd_t *pmdp, unsigned int psize, int ssize,
unsigned long flags);
-
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
+static inline void hash__local_flush_tlb_mm(struct mm_struct *mm)
{
}
-static inline void flush_tlb_mm(struct mm_struct *mm)
+static inline void hash__flush_tlb_mm(struct mm_struct *mm)
{
}
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
+static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
{
}
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
+static inline void hash__flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
{
}
-static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
- unsigned long vmaddr)
+static inline void hash__flush_tlb_page_nohash(struct vm_area_struct *vma,
+ unsigned long vmaddr)
{
}
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+static inline void hash__flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
{
}
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
+static inline void hash__flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
{
}
+
+struct mmu_gather;
+extern void hash__tlb_flush(struct mmu_gather *tlb);
/* Private function for use by PCI IO mapping code */
extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
unsigned long end);
--- /dev/null
+#ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H
+#define _ASM_POWERPC_TLBFLUSH_RADIX_H
+
+struct vm_area_struct;
+struct mm_struct;
+struct mmu_gather;
+
+static inline int mmu_get_ap(int psize)
+{
+ return mmu_psize_defs[psize].ap;
+}
+
+extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end);
+extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
+extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ unsigned long ap, int nid);
+extern void radix__tlb_flush(struct mmu_gather *tlb);
+#ifdef CONFIG_SMP
+extern void radix__flush_tlb_mm(struct mm_struct *mm);
+extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ unsigned long ap, int nid);
+#else
+#define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm)
+#define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr)
+#define radix___flush_tlb_page(mm,addr,p,i) radix___local_flush_tlb_page(mm,addr,p,i)
+#endif
+
+#endif
--- /dev/null
+#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
+
+#define MMU_NO_CONTEXT ~0UL
+
+
+#include <asm/book3s/64/tlbflush-hash.h>
+#include <asm/book3s/64/tlbflush-radix.h>
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ return radix__flush_tlb_range(vma, start, end);
+ return hash__flush_tlb_range(vma, start, end);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+ if (radix_enabled())
+ return radix__flush_tlb_kernel_range(start, end);
+ return hash__flush_tlb_kernel_range(start, end);
+}
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+ if (radix_enabled())
+ return radix__local_flush_tlb_mm(mm);
+ return hash__local_flush_tlb_mm(mm);
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ return radix__local_flush_tlb_page(vma, vmaddr);
+ return hash__local_flush_tlb_page(vma, vmaddr);
+}
+
+static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ return radix__flush_tlb_page(vma, vmaddr);
+ return hash__flush_tlb_page_nohash(vma, vmaddr);
+}
+
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+ if (radix_enabled())
+ return radix__tlb_flush(tlb);
+ return hash__tlb_flush(tlb);
+}
+
+#ifdef CONFIG_SMP
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+ if (radix_enabled())
+ return radix__flush_tlb_mm(mm);
+ return hash__flush_tlb_mm(mm);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ return radix__flush_tlb_page(vma, vmaddr);
+ return hash__flush_tlb_page(vma, vmaddr);
+}
+#else
+#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
+#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
+#endif /* CONFIG_SMP */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
--- /dev/null
+#ifndef _ASM_POWERPC_BOOK3S_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_PGALLOC_H
+
+#include <linux/mm.h>
+
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
+ unsigned long address)
+{
+
+}
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/pgalloc.h>
+#else
+#include <asm/book3s/32/pgalloc.h>
+#endif
+
+#endif /* _ASM_POWERPC_BOOK3S_PGALLOC_H */
extern struct kmem_cache *hugepte_cache;
#ifdef CONFIG_PPC_BOOK3S_64
+
+#include <asm/book3s/64/hugetlb-radix.h>
/*
* This should work for other subarchs too. But right now we use the
* new format only for 64bit book3s
{
return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
}
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ return radix__flush_hugetlb_page(vma, vmaddr);
+}
+static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ return radix__local_flush_hugetlb_page(vma, vmaddr);
+}
#else
static inline pte_t *hugepd_page(hugepd_t hpd)
return ptel;
}
-static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
+static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
{
- unsigned int wimg = ptel & HPTE_R_WIMG;
+ unsigned int wimg = hptel & HPTE_R_WIMG;
/* Handle SAO */
if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
cpu_has_feature(CPU_FTR_ARCH_206))
wimg = HPTE_R_M;
- if (!io_type)
+ if (!is_ci)
return wimg == HPTE_R_M;
-
- return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
+ /*
+ * if host is mapped cache inhibited, make sure hptel also have
+ * cache inhibited.
+ */
+ if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
+ return false;
+ return !!(wimg & HPTE_R_I);
}
/*
*/
old_pte = READ_ONCE(*ptep);
/*
- * wait until _PAGE_BUSY is clear then set it atomically
+ * wait until H_PAGE_BUSY is clear then set it atomically
*/
- if (unlikely(pte_val(old_pte) & _PAGE_BUSY)) {
+ if (unlikely(pte_val(old_pte) & H_PAGE_BUSY)) {
cpu_relax();
continue;
}
if (writing && pte_write(old_pte))
new_pte = pte_mkdirty(new_pte);
- if (pte_val(old_pte) == __cmpxchg_u64((unsigned long *)ptep,
- pte_val(old_pte),
- pte_val(new_pte))) {
+ if (pte_xchg(ptep, old_pte, new_pte))
break;
- }
}
return new_pte;
}
-
-/* Return HPTE cache control bits corresponding to Linux pte bits */
-static inline unsigned long hpte_cache_bits(unsigned long pte_val)
-{
-#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
- return pte_val & (HPTE_R_W | HPTE_R_I);
-#else
- return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
- ((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
-#endif
-}
-
static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
{
if (key)
#ifdef CONFIG_ARCH_RANDOM
int (*get_random_seed)(unsigned long *v);
#endif
+ int (*update_partition_table)(u64);
};
extern void e500_idle(void);
*/
#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000)
+/*
+ * Radix page table available
+ */
+#define MMU_FTR_RADIX ASM_CONST(0x80000000)
+
/* MMU feature bit sets for various CPUs */
#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
DECLARE_PER_CPU(int, next_tlbcam_idx);
#endif
+enum {
+ MMU_FTRS_POSSIBLE = MMU_FTR_HPTE_TABLE | MMU_FTR_TYPE_8xx |
+ MMU_FTR_TYPE_40x | MMU_FTR_TYPE_44x | MMU_FTR_TYPE_FSL_E |
+ MMU_FTR_TYPE_47x | MMU_FTR_USE_HIGH_BATS | MMU_FTR_BIG_PHYS |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_USE_TLBILX |
+ MMU_FTR_LOCK_BCAST_INVAL | MMU_FTR_NEED_DTLB_SW_LRU |
+ MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS |
+ MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
+ MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
+ MMU_FTR_1T_SEGMENT |
+#ifdef CONFIG_PPC_RADIX_MMU
+ MMU_FTR_RADIX |
+#endif
+ 0,
+};
+
static inline int mmu_has_feature(unsigned long feature)
{
- return (cur_cpu_spec->mmu_features & feature);
+ return (MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
}
static inline void mmu_clear_feature(unsigned long feature)
extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
-/* MMU initialization */
-extern void early_init_mmu(void);
-extern void early_init_mmu_secondary(void);
-
-extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size);
-
#ifdef CONFIG_PPC64
/* This is our real memory area size on ppc64 server, on embedded, we
* make it match the size our of bolted TLB area
#define MMU_PAGE_COUNT 15
-#if defined(CONFIG_PPC_STD_MMU_64)
-/* 64-bit classic hash table MMU */
-#include <asm/book3s/64/mmu-hash.h>
-#elif defined(CONFIG_PPC_STD_MMU_32)
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/mmu.h>
+#else /* CONFIG_PPC_BOOK3S_64 */
+
+#ifndef __ASSEMBLY__
+/* MMU initialization */
+extern void early_init_mmu(void);
+extern void early_init_mmu_secondary(void);
+extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size);
+#endif /* __ASSEMBLY__ */
+#endif
+
+#if defined(CONFIG_PPC_STD_MMU_32)
/* 32-bit classic hash table MMU */
#include <asm/book3s/32/mmu-hash.h>
#elif defined(CONFIG_40x)
# include <asm/mmu-8xx.h>
#endif
+#ifndef radix_enabled
+#define radix_enabled() (0)
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_MMU_H_ */
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#endif
-
-extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
extern void set_context(unsigned long id, pgd_t *pgd);
#ifdef CONFIG_PPC_BOOK3S_64
+extern void radix__switch_mmu_context(struct mm_struct *prev,
+ struct mm_struct *next);
+static inline void switch_mmu_context(struct mm_struct *prev,
+ struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ if (radix_enabled())
+ return radix__switch_mmu_context(prev, next);
+ return switch_slb(tsk, next);
+}
+
extern int __init_new_context(void);
extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { }
#else
+extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
extern unsigned long __init_new_context(void);
extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void);
if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall");
#endif /* CONFIG_ALTIVEC */
-
- /* The actual HW switching method differs between the various
- * sub architectures.
+ /*
+ * The actual HW switching method differs between the various
+ * sub architectures. Out of line for now
*/
-#ifdef CONFIG_PPC_STD_MMU_64
- switch_slb(tsk, next);
-#else
- /* Out of line for now */
- switch_mmu_context(prev, next);
-#endif
-
+ switch_mmu_context(prev, next, tsk);
}
#define deactivate_mm(tsk,mm) do { } while (0)
--- /dev/null
+#ifndef _ASM_POWERPC_PGALLOC_32_H
+#define _ASM_POWERPC_PGALLOC_32_H
+
+#include <linux/threads.h>
+
+/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
+#define MAX_PGTABLE_INDEX_SIZE 0
+
+extern void __bad_pte(pmd_t *pmd);
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+/* #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) */
+#define pmd_free(mm, x) do { } while (0)
+#define __pmd_free_tlb(tlb,x,a) do { } while (0)
+/* #define pgd_populate(mm, pmd, pte) BUG() */
+
+#ifndef CONFIG_BOOKE
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+ pte_t *pte)
+{
+ *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pte_page)
+{
+ *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT);
+}
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+#else
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+ pte_t *pte)
+{
+ *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pte_page)
+{
+ *pmdp = __pmd((unsigned long)lowmem_page_address(pte_page) | _PMD_PRESENT);
+}
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+#endif
+
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+ pgtable_page_dtor(ptepage);
+ __free_page(ptepage);
+}
+
+static inline void pgtable_free(void *table, unsigned index_size)
+{
+ BUG_ON(index_size); /* 32-bit doesn't use this */
+ free_page((unsigned long)table);
+}
+
+#define check_pgt_cache() do { } while (0)
+
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+ void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
+#else
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+ void *table, int shift)
+{
+ pgtable_free(table, shift);
+}
+#endif
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ tlb_flush_pgtable(tlb, address);
+ pgtable_page_dtor(table);
+ pgtable_free_tlb(tlb, page_address(table), 0);
+}
+#endif /* _ASM_POWERPC_PGALLOC_32_H */
--- /dev/null
+#ifndef _ASM_POWERPC_PGALLOC_64_H
+#define _ASM_POWERPC_PGALLOC_64_H
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+
+struct vmemmap_backing {
+ struct vmemmap_backing *list;
+ unsigned long phys;
+ unsigned long virt_addr;
+};
+extern struct vmemmap_backing *vmemmap_list;
+
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation. For PTE pages (which are linked to a struct
+ * page for now, and drawn from the main get_free_pages() pool), the
+ * allocation size will be (2^index_size * sizeof(pointer)) and
+ * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer. In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value. This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE 0xf
+
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) ({ \
+ BUG_ON(!(shift)); \
+ pgtable_cache[(shift) - 1]; \
+ })
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
+#ifndef CONFIG_PPC_64K_PAGES
+
+#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD)
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ pud_set(pud, (unsigned long)pmd);
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *pte)
+{
+ pmd_set(pmd, (unsigned long)pte);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+ pgtable_t pte_page)
+{
+ pmd_set(pmd, (unsigned long)page_address(pte_page));
+}
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+{
+ return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ struct page *page;
+ pte_t *pte;
+
+ pte = pte_alloc_one_kernel(mm, address);
+ if (!pte)
+ return NULL;
+ page = virt_to_page(pte);
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ return page;
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+ pgtable_page_dtor(ptepage);
+ __free_page(ptepage);
+}
+
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+#ifdef CONFIG_SMP
+extern void __tlb_remove_table(void *_table);
+#endif
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ tlb_flush_pgtable(tlb, address);
+ pgtable_free_tlb(tlb, page_address(table), 0);
+}
+
+#else /* if CONFIG_PPC_64K_PAGES */
+
+extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern void pte_fragment_free(unsigned long *, int);
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+#ifdef CONFIG_SMP
+extern void __tlb_remove_table(void *_table);
+#endif
+
+#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *pte)
+{
+ pmd_set(pmd, (unsigned long)pte);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+ pgtable_t pte_page)
+{
+ pmd_set(pmd, (unsigned long)pte_page);
+}
+
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+ return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+{
+ return (pte_t *)pte_fragment_alloc(mm, address, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ pte_fragment_fre((unsigned long *)pte, 1);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+ pte_fragment_free((unsigned long *)ptepage, 0);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ tlb_flush_pgtable(tlb, address);
+ pgtable_free_tlb(tlb, table, 0);
+}
+#endif /* CONFIG_PPC_64K_PAGES */
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+ kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
+}
+
+#define __pmd_free_tlb(tlb, pmd, addr) \
+ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
+#ifndef CONFIG_PPC_64K_PAGES
+#define __pud_free_tlb(tlb, pud, addr) \
+ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
+
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#define check_pgt_cache() do { } while (0)
+
+#endif /* _ASM_POWERPC_PGALLOC_64_H */
#ifndef __ASSEMBLY__
/* pte_clear moved to later in this file */
-/* Pointers in the page table tree are virtual addresses */
-#define __pgtable_ptr_val(ptr) ((unsigned long)(ptr))
-
#define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);
+extern int map_kernel_page(unsigned long ea, unsigned long pa,
+ unsigned long flags);
+extern int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys);
+extern void vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */
--- /dev/null
+#ifndef _ASM_POWERPC_NOHASH_PGALLOC_H
+#define _ASM_POWERPC_NOHASH_PGALLOC_H
+
+#include <linux/mm.h>
+
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+#ifdef CONFIG_PPC64
+extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
+#else
+/* 44x etc which is BOOKE not BOOK3E */
+static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
+ unsigned long address)
+{
+
+}
+#endif /* !CONFIG_PPC_BOOK3E */
+
+#ifdef CONFIG_PPC64
+#include <asm/nohash/64/pgalloc.h>
+#else
+#include <asm/nohash/32/pgalloc.h>
+#endif
+#endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */
};
enum opal_msg_type {
- OPAL_MSG_ASYNC_COMP = 0, /* params[0] = token, params[1] = rc,
+ OPAL_MSG_ASYNC_COMP = 0, /* params[0] = token, params[1] = rc,
* additional params function-specific
*/
- OPAL_MSG_MEM_ERR,
- OPAL_MSG_EPOW,
- OPAL_MSG_SHUTDOWN, /* params[0] = 1 reboot, 0 shutdown */
- OPAL_MSG_HMI_EVT,
- OPAL_MSG_DPO,
- OPAL_MSG_PRD,
- OPAL_MSG_OCC,
+ OPAL_MSG_MEM_ERR = 1,
+ OPAL_MSG_EPOW = 2,
+ OPAL_MSG_SHUTDOWN = 3, /* params[0] = 1 reboot, 0 shutdown */
+ OPAL_MSG_HMI_EVT = 4,
+ OPAL_MSG_DPO = 5,
+ OPAL_MSG_PRD = 6,
+ OPAL_MSG_OCC = 7,
OPAL_MSG_TYPE_MAX,
};
#ifndef __ASSEMBLY__
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/pgtable-be-types.h>
+#else
#include <asm/pgtable-types.h>
+#endif
typedef struct { signed long pd; } hugepd_t;
#endif
struct vm_area_struct;
-
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * For BOOK3s 64 with 4k and 64K linux page size
+ * we want to use pointers, because the page table
+ * actually store pfn
+ */
+typedef pte_t *pgtable_t;
+#else
#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64)
typedef pte_t *pgtable_t;
#else
typedef struct page *pgtable_t;
#endif
+#endif
#include <asm-generic/memory_model.h>
#endif /* __ASSEMBLY__ */
#define SLICE_LOW_TOP (0x100000000ul)
#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
-#define SLICE_NUM_HIGH (PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long len, unsigned int psize);
-#define slice_mm_new_context(mm) ((mm)->context.id == MMU_NO_CONTEXT)
-
#endif /* __ASSEMBLY__ */
#else
#define slice_init()
#define slice_set_range_psize(mm, start, len, psize) \
slice_set_user_psize((mm), (psize))
-#define slice_mm_new_context(mm) 1
#endif /* CONFIG_PPC_MM_SLICES */
#ifdef CONFIG_HUGETLB_PAGE
* PCI controller operations
*/
struct pci_controller_ops {
- void (*dma_dev_setup)(struct pci_dev *dev);
+ void (*dma_dev_setup)(struct pci_dev *pdev);
void (*dma_bus_setup)(struct pci_bus *bus);
- int (*probe_mode)(struct pci_bus *);
+ int (*probe_mode)(struct pci_bus *bus);
/* Called when pci_enable_device() is called. Returns true to
* allow assignment/enabling of the device. */
- bool (*enable_device_hook)(struct pci_dev *);
+ bool (*enable_device_hook)(struct pci_dev *pdev);
- void (*disable_device)(struct pci_dev *);
+ void (*disable_device)(struct pci_dev *pdev);
- void (*release_device)(struct pci_dev *);
+ void (*release_device)(struct pci_dev *pdev);
/* Called during PCI resource reassignment */
- resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type);
- void (*reset_secondary_bus)(struct pci_dev *dev);
+ resource_size_t (*window_alignment)(struct pci_bus *bus,
+ unsigned long type);
+ void (*reset_secondary_bus)(struct pci_dev *pdev);
#ifdef CONFIG_PCI_MSI
- int (*setup_msi_irqs)(struct pci_dev *dev,
+ int (*setup_msi_irqs)(struct pci_dev *pdev,
int nvec, int type);
- void (*teardown_msi_irqs)(struct pci_dev *dev);
+ void (*teardown_msi_irqs)(struct pci_dev *pdev);
#endif
- int (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask);
- u64 (*dma_get_required_mask)(struct pci_dev *dev);
+ int (*dma_set_mask)(struct pci_dev *pdev, u64 dma_mask);
+ u64 (*dma_get_required_mask)(struct pci_dev *pdev);
- void (*shutdown)(struct pci_controller *);
+ void (*shutdown)(struct pci_controller *hose);
};
/*
#ifdef CONFIG_EEH
struct eeh_dev *edev; /* eeh device */
#endif
-#define IODA_INVALID_PE (-1)
+#define IODA_INVALID_PE 0xFFFFFFFF
#ifdef CONFIG_PPC_POWERNV
- int pe_number;
+ unsigned int pe_number;
int vf_index; /* VF index in the PF */
#ifdef CONFIG_PCI_IOV
u16 vfs_expanded; /* number of VFs IOV BAR expanded */
u16 num_vfs; /* number of VFs enabled*/
- int *pe_num_map; /* PE# for the first VF PE or array */
+ unsigned int *pe_num_map; /* PE# for the first VF PE or array */
bool m64_single_mode; /* Use M64 BAR in Single Mode */
#define IODA_INVALID_M64 (-1)
int (*m64_map)[PCI_SRIOV_NUM_BARS];
extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev);
extern void remove_dev_pci_data(struct pci_dev *pdev);
-extern void *update_dn_pci_info(struct device_node *dn, void *data);
+extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
+ struct device_node *dn);
+extern void pci_remove_device_node_info(struct device_node *dn);
static inline int pci_device_from_OF_node(struct device_node *np,
u8 *bus, u8 *devfn)
#endif
/** Find the bus corresponding to the indicated device node */
-extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
+extern struct pci_bus *pci_find_bus_by_node(struct device_node *dn);
/** Remove all of the PCI devices under this bus */
-extern void pcibios_remove_pci_devices(struct pci_bus *bus);
+extern void pci_hp_remove_devices(struct pci_bus *bus);
/** Discover new pci devices under this bus, and add them */
-extern void pcibios_add_pci_devices(struct pci_bus *bus);
+extern void pci_hp_add_devices(struct pci_bus *bus);
extern void isa_bridge_find_early(struct pci_controller *hose);
+++ /dev/null
-#ifndef _ASM_POWERPC_PGALLOC_32_H
-#define _ASM_POWERPC_PGALLOC_32_H
-
-#include <linux/threads.h>
-
-/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
-#define MAX_PGTABLE_INDEX_SIZE 0
-
-extern void __bad_pte(pmd_t *pmd);
-
-extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-/* #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) */
-#define pmd_free(mm, x) do { } while (0)
-#define __pmd_free_tlb(tlb,x,a) do { } while (0)
-/* #define pgd_populate(mm, pmd, pte) BUG() */
-
-#ifndef CONFIG_BOOKE
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
- pte_t *pte)
-{
- *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
-}
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pte_page)
-{
- *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT);
-}
-
-#define pmd_pgtable(pmd) pmd_page(pmd)
-#else
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
- pte_t *pte)
-{
- *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
-}
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pte_page)
-{
- *pmdp = __pmd((unsigned long)lowmem_page_address(pte_page) | _PMD_PRESENT);
-}
-
-#define pmd_pgtable(pmd) pmd_page(pmd)
-#endif
-
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
-
-static inline void pgtable_free(void *table, unsigned index_size)
-{
- BUG_ON(index_size); /* 32-bit doesn't use this */
- free_page((unsigned long)table);
-}
-
-#define check_pgt_cache() do { } while (0)
-
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_page_dtor(table);
- pgtable_free_tlb(tlb, page_address(table), 0);
-}
-#endif /* _ASM_POWERPC_PGALLOC_32_H */
+++ /dev/null
-#ifndef _ASM_POWERPC_PGALLOC_64_H
-#define _ASM_POWERPC_PGALLOC_64_H
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/slab.h>
-#include <linux/cpumask.h>
-#include <linux/percpu.h>
-
-struct vmemmap_backing {
- struct vmemmap_backing *list;
- unsigned long phys;
- unsigned long virt_addr;
-};
-extern struct vmemmap_backing *vmemmap_list;
-
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) ({ \
- BUG_ON(!(shift)); \
- pgtable_cache[(shift) - 1]; \
- })
-
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
-}
-
-#ifndef CONFIG_PPC_64K_PAGES
-
-#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, __pgtable_ptr_val(PUD))
-
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
- GFP_KERNEL|__GFP_REPEAT);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
- kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
-}
-
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
-{
- pud_set(pud, __pgtable_ptr_val(pmd));
-}
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
- pte_t *pte)
-{
- pmd_set(pmd, __pgtable_ptr_val(pte));
-}
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- pgtable_t pte_page)
-{
- pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page)));
-}
-
-#define pmd_pgtable(pmd) pmd_page(pmd)
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
-{
- return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
-{
- struct page *page;
- pte_t *pte;
-
- pte = pte_alloc_one_kernel(mm, address);
- if (!pte)
- return NULL;
- page = virt_to_page(pte);
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
-
-static inline void pgtable_free(void *table, unsigned index_size)
-{
- if (!index_size)
- free_page((unsigned long)table);
- else {
- BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(index_size), table);
- }
-}
-
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-#else /* !CONFIG_SMP */
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif /* CONFIG_SMP */
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_page_dtor(table);
- pgtable_free_tlb(tlb, page_address(table), 0);
-}
-
-#else /* if CONFIG_PPC_64K_PAGES */
-
-extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int);
-extern void page_table_free(struct mm_struct *, unsigned long *, int);
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
-extern void __tlb_remove_table(void *_table);
-#endif
-
-#ifndef __PAGETABLE_PUD_FOLDED
-/* book3s 64 is 4 level page table */
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
-{
- pgd_set(pgd, __pgtable_ptr_val(pud));
-}
-
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
- GFP_KERNEL|__GFP_REPEAT);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
- kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
-}
-#endif
-
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
-{
- pud_set(pud, __pgtable_ptr_val(pmd));
-}
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
- pte_t *pte)
-{
- pmd_set(pmd, __pgtable_ptr_val(pte));
-}
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- pgtable_t pte_page)
-{
- pmd_set(pmd, __pgtable_ptr_val(pte_page));
-}
-
-static inline pgtable_t pmd_pgtable(pmd_t pmd)
-{
- return (pgtable_t)pmd_page_vaddr(pmd);
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
-{
- return (pte_t *)page_table_alloc(mm, address, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
-{
- return (pgtable_t)page_table_alloc(mm, address, 0);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- page_table_free(mm, (unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- page_table_free(mm, (unsigned long *)ptepage, 0);
-}
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_free_tlb(tlb, table, 0);
-}
-#endif /* CONFIG_PPC_64K_PAGES */
-
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
- GFP_KERNEL|__GFP_REPEAT);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
- kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
-}
-
-#define __pmd_free_tlb(tlb, pmd, addr) \
- pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef __PAGETABLE_PUD_FOLDED
-#define __pud_free_tlb(tlb, pud, addr) \
- pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
-
-#endif /* __PAGETABLE_PUD_FOLDED */
-
-#define check_pgt_cache() do { } while (0)
-
-#endif /* _ASM_POWERPC_PGALLOC_64_H */
#ifndef _ASM_POWERPC_PGALLOC_H
#define _ASM_POWERPC_PGALLOC_H
-#ifdef __KERNEL__
#include <linux/mm.h>
-#ifdef CONFIG_PPC_BOOK3E
-extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
-#else /* CONFIG_PPC_BOOK3E */
-static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
- unsigned long address)
-{
-}
-#endif /* !CONFIG_PPC_BOOK3E */
-
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-#ifdef CONFIG_PPC64
-#include <asm/pgalloc-64.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/pgalloc.h>
#else
-#include <asm/pgalloc-32.h>
+#include <asm/nohash/pgalloc.h>
#endif
-#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_PGALLOC_H */
--- /dev/null
+#ifndef _ASM_POWERPC_PGTABLE_BE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_BE_TYPES_H
+
+#include <asm/cmpxchg.h>
+
+/* PTE level */
+typedef struct { __be64 pte; } pte_t;
+#define __pte(x) ((pte_t) { cpu_to_be64(x) })
+static inline unsigned long pte_val(pte_t x)
+{
+ return be64_to_cpu(x.pte);
+}
+
+static inline __be64 pte_raw(pte_t x)
+{
+ return x.pte;
+}
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { __be64 pmd; } pmd_t;
+#define __pmd(x) ((pmd_t) { cpu_to_be64(x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+ return be64_to_cpu(x.pmd);
+}
+
+static inline __be64 pmd_raw(pmd_t x)
+{
+ return x.pmd;
+}
+
+/*
+ * 64 bit hash always use 4 level table. Everybody else use 4 level
+ * only for 4K page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+typedef struct { __be64 pud; } pud_t;
+#define __pud(x) ((pud_t) { cpu_to_be64(x) })
+static inline unsigned long pud_val(pud_t x)
+{
+ return be64_to_cpu(x.pud);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+typedef struct { __be64 pgd; } pgd_t;
+#define __pgd(x) ((pgd_t) { cpu_to_be64(x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+ return be64_to_cpu(x.pgd);
+}
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) })
+
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
+{
+ unsigned long *p = (unsigned long *)ptep;
+ __be64 prev;
+
+ prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old),
+ (__force unsigned long)pte_raw(new));
+
+ return pte_raw(old) == prev;
+}
+
+static inline bool pmd_xchg(pmd_t *pmdp, pmd_t old, pmd_t new)
+{
+ unsigned long *p = (unsigned long *)pmdp;
+ __be64 prev;
+
+ prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pmd_raw(old),
+ (__force unsigned long)pmd_raw(new));
+
+ return pmd_raw(old) == prev;
+}
+
+#endif /* _ASM_POWERPC_PGTABLE_BE_TYPES_H */
#ifndef _ASM_POWERPC_PGTABLE_TYPES_H
#define _ASM_POWERPC_PGTABLE_TYPES_H
-#ifdef CONFIG_STRICT_MM_TYPECHECKS
-/* These are used to make use of C type-checking. */
-
/* PTE level */
typedef struct { pte_basic_t pte; } pte_t;
#define __pte(x) ((pte_t) { (x) })
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) })
-#else
-
-/*
- * .. while these make it easier on the compiler
- */
-
-typedef pte_basic_t pte_t;
-#define __pte(x) (x)
-static inline pte_basic_t pte_val(pte_t pte)
-{
- return pte;
-}
-
-#ifdef CONFIG_PPC64
-typedef unsigned long pmd_t;
-#define __pmd(x) (x)
-static inline unsigned long pmd_val(pmd_t pmd)
-{
- return pmd;
-}
-
-#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
-typedef unsigned long pud_t;
-#define __pud(x) (x)
-static inline unsigned long pud_val(pud_t pud)
-{
- return pud;
-}
-#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
-#endif /* CONFIG_PPC64 */
-
-typedef unsigned long pgd_t;
-#define __pgd(x) (x)
-static inline unsigned long pgd_val(pgd_t pgd)
-{
- return pgd;
-}
-
-typedef unsigned long pgprot_t;
-#define pgprot_val(x) (x)
-#define __pgprot(x) (x)
-
-#endif /* CONFIG_STRICT_MM_TYPECHECKS */
/*
* With hash config 64k pages additionally define a bigger "real PTE" type that
* gathers the "second half" part of the PTE for pseudo 64k pages
#else
typedef struct { pte_t pte; } real_pte_t;
#endif
+
+#ifdef CONFIG_PPC_STD_MMU_64
+#include <asm/cmpxchg.h>
+
+static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
+{
+ unsigned long *p = (unsigned long *)ptep;
+
+ return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new));
+}
+#endif
+
#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
struct page **pages, int *nr);
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
-#define has_transparent_hugepage() 0
#endif
pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *shift);
/* sorted alphabetically */
#define PPC_INST_BHRBE 0x7c00025c
#define PPC_INST_CLRBHRB 0x7c00035c
+#define PPC_INST_CP_ABORT 0x7c00068c
#define PPC_INST_DCBA 0x7c0005ec
#define PPC_INST_DCBA_MASK 0xfc0007fe
#define PPC_INST_DCBAL 0x7c2005ec
#endif
/* Deal with instructions that older assemblers aren't aware of */
+#define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT)
#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \
__PPC_RA(a) | __PPC_RB(b))
#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \
struct device_node;
struct pci_dn;
-typedef void *(*traverse_func)(struct device_node *me, void *data);
-void *traverse_pci_devices(struct device_node *start, traverse_func pre,
- void *data);
+void *pci_traverse_device_nodes(struct device_node *start,
+ void *(*fn)(struct device_node *, void *),
+ void *data);
void *traverse_pci_dn(struct pci_dn *root,
void *(*fn)(struct pci_dn *, void *),
void *data);
li r4,1024; \
mtctr r4; \
lis r4,KERNELBASE@h; \
+ .machine push; \
+ .machine "power4"; \
0: tlbie r4; \
+ .machine pop; \
addi r4,r4,0x1000; \
bdnz 0b
#endif
*/
#ifndef __ASSEMBLY__
extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
+
+/*
+ * Don't just check for any non zero bits in __PAGE_USER, since for book3e
+ * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
+ * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too.
+ */
+static inline bool pte_user(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_USER) == _PAGE_USER;
+}
#endif /* __ASSEMBLY__ */
/* Location of the PFN in the PTE. Most 32-bit platforms use the same
/* Make modules code happy. We don't set RO yet */
#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
-/*
- * Don't just check for any non zero bits in __PAGE_USER, since for book3e
- * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
- * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too.
- */
-#define pte_user(val) ((val & _PAGE_USER) == _PAGE_USER)
-
/* Advertise special mapping type for AGP */
#define PAGE_AGP (PAGE_KERNEL_NC)
#define HAVE_PAGE_AGP
/* Advertise support for _PAGE_SPECIAL */
#define __HAVE_ARCH_PTE_SPECIAL
+#ifndef _PAGE_READ
+/* if not defined, we should not find _PAGE_WRITE too */
+#define _PAGE_READ 0
+#define _PAGE_WRITE _PAGE_RW
+#endif
+
+#ifndef H_PAGE_4K_PFN
+#define H_PAGE_4K_PFN 0
+#endif
#define LPCR_LPES_SH 2
#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */
#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */
+#define LPCR_UPRT 0x00400000 /* Use Process Table (ISA 3) */
#ifndef SPRN_LPID
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
#endif
#define SPRN_PIR 0x3FF /* Processor Identification Register */
#endif
#define SPRN_TIR 0x1BE /* Thread Identification Register */
+#define SPRN_PTCR 0x1D0 /* Partition table control Register */
#define SPRN_PSPB 0x09F /* Problem State Priority Boost reg */
#define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */
#define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */
#define PVR_970GX 0x0045
#define PVR_POWER7p 0x004A
#define PVR_POWER8E 0x004B
+#define PVR_POWER8NVL 0x004C
#define PVR_POWER8 0x004D
#define PVR_BE 0x0070
#define PVR_PA6T 0x0090
#elif defined(CONFIG_PPC_STD_MMU_32)
+#define MMU_NO_CONTEXT (0)
/*
* TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
*/
}
#elif defined(CONFIG_PPC_STD_MMU_64)
-#include <asm/book3s/64/tlbflush-hash.h>
+#include <asm/book3s/64/tlbflush.h>
#else
#error Unsupported MMU type
#endif
--- /dev/null
+#ifndef _UAPI_ASM_POWERPC_PERF_REGS_H
+#define _UAPI_ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+ PERF_REG_POWERPC_R0,
+ PERF_REG_POWERPC_R1,
+ PERF_REG_POWERPC_R2,
+ PERF_REG_POWERPC_R3,
+ PERF_REG_POWERPC_R4,
+ PERF_REG_POWERPC_R5,
+ PERF_REG_POWERPC_R6,
+ PERF_REG_POWERPC_R7,
+ PERF_REG_POWERPC_R8,
+ PERF_REG_POWERPC_R9,
+ PERF_REG_POWERPC_R10,
+ PERF_REG_POWERPC_R11,
+ PERF_REG_POWERPC_R12,
+ PERF_REG_POWERPC_R13,
+ PERF_REG_POWERPC_R14,
+ PERF_REG_POWERPC_R15,
+ PERF_REG_POWERPC_R16,
+ PERF_REG_POWERPC_R17,
+ PERF_REG_POWERPC_R18,
+ PERF_REG_POWERPC_R19,
+ PERF_REG_POWERPC_R20,
+ PERF_REG_POWERPC_R21,
+ PERF_REG_POWERPC_R22,
+ PERF_REG_POWERPC_R23,
+ PERF_REG_POWERPC_R24,
+ PERF_REG_POWERPC_R25,
+ PERF_REG_POWERPC_R26,
+ PERF_REG_POWERPC_R27,
+ PERF_REG_POWERPC_R28,
+ PERF_REG_POWERPC_R29,
+ PERF_REG_POWERPC_R30,
+ PERF_REG_POWERPC_R31,
+ PERF_REG_POWERPC_NIP,
+ PERF_REG_POWERPC_MSR,
+ PERF_REG_POWERPC_ORIG_R3,
+ PERF_REG_POWERPC_CTR,
+ PERF_REG_POWERPC_LINK,
+ PERF_REG_POWERPC_XER,
+ PERF_REG_POWERPC_CCR,
+ PERF_REG_POWERPC_SOFTE,
+ PERF_REG_POWERPC_TRAP,
+ PERF_REG_POWERPC_DAR,
+ PERF_REG_POWERPC_DSISR,
+ PERF_REG_POWERPC_MAX,
+};
+#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
#endif
+#ifdef MAX_PGD_TABLE_SIZE
+ DEFINE(PGD_TABLE_SIZE, MAX_PGD_TABLE_SIZE);
+#else
DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
+#endif
DEFINE(PTE_SIZE, sizeof(pte_t));
#ifdef CONFIG_KVM
offset = ((unsigned long) dispDeviceBase) - base;
size = dispDeviceRowBytes * dispDeviceRect[3] + offset
+ dispDeviceRect[0];
- vbase = __ioremap(base, size, _PAGE_NO_CACHE);
+ vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
if (vbase == 0)
return;
logicalDisplayBase = vbase + offset;
extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_pa6t(void);
extern void __restore_cpu_ppc970(void);
extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power8(void);
extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power9(void);
-extern void __restore_cpu_a2(void);
extern void __flush_tlb_power7(unsigned int action);
extern void __flush_tlb_power8(unsigned int action);
extern void __flush_tlb_power9(unsigned int action);
/** Overview:
- * EEH, or "Extended Error Handling" is a PCI bridge technology for
+ * EEH, or "Enhanced Error Handling" is a PCI bridge technology for
* dealing with PCI bus errors that can't be dealt with within the
* usual PCI framework, except by check-stopping the CPU. Systems
* that are designed for high-availability/reliability cannot afford
struct pci_controller *phb;
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
- if (!edev || !eeh_enabled())
+ if (!edev)
return;
if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
id->subdevice != pdev->subsystem_device)
continue;
- goto reset;
+ return eeh_pe_reset_and_recover(pe);
}
}
return eeh_unfreeze_pe(pe, true);
-
-reset:
- return eeh_pe_reset_and_recover(pe);
}
/**
if (!edev)
return NULL;
+ /*
+ * We cannot access the config space on some adapters.
+ * Otherwise, it will cause fenced PHB. We don't save
+ * the content in their config space and will restore
+ * from the initial config space saved when the EEH
+ * device is created.
+ */
+ if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
+ return NULL;
+
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
return NULL;
if (!edev)
return NULL;
+ /*
+ * The content in the config space isn't saved because
+ * the blocked config space on some adapters. We have
+ * to restore the initial saved config space when the
+ * EEH device is created.
+ */
+ if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
+ if (list_is_last(&edev->list, &edev->pe->edevs))
+ eeh_pe_restore_bars(edev->pe);
+
+ return NULL;
+ }
+
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
return NULL;
int eeh_pe_reset_and_recover(struct eeh_pe *pe)
{
- int result, ret;
+ int ret;
/* Bail if the PE is being recovered */
if (pe->state & EEH_PE_RECOVERING)
/* Save states */
eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
- /* Report error */
- eeh_pe_dev_traverse(pe, eeh_report_error, &result);
-
/* Issue reset */
ret = eeh_reset_pe(pe);
if (ret) {
return ret;
}
- /* Notify completion of reset */
- eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
-
/* Restore device state */
eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
- /* Resume */
- eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
-
/* Clear recovery mode */
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
* We don't remove the corresponding PE instances because
* we need the information afterwords. The attached EEH
* devices are expected to be attached soon when calling
- * into pcibios_add_pci_devices().
+ * into pci_hp_add_devices().
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
if (bus) {
} else {
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_lock_rescan_remove();
- pcibios_remove_pci_devices(bus);
+ pci_hp_remove_devices(bus);
pci_unlock_rescan_remove();
}
} else if (frozen_bus) {
if (pe->type & EEH_PE_VF)
eeh_add_virt_device(edev, NULL);
else
- pcibios_add_pci_devices(bus);
+ pci_hp_add_devices(bus);
} else if (frozen_bus && rmv_data->removed) {
pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
ssleep(5);
if (pe->type & EEH_PE_VF)
eeh_add_virt_device(edev, NULL);
else
- pcibios_add_pci_devices(frozen_bus);
+ pci_hp_add_devices(frozen_bus);
}
eeh_pe_state_clear(pe, EEH_PE_KEEP);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
pci_lock_rescan_remove();
- pcibios_remove_pci_devices(frozen_bus);
+ pci_hp_remove_devices(frozen_bus);
pci_unlock_rescan_remove();
}
}
bus = eeh_pe_bus_get(phb_pe);
eeh_pe_dev_traverse(pe,
eeh_report_failure, NULL);
- pcibios_remove_pci_devices(bus);
+ pci_hp_remove_devices(bus);
}
pci_unlock_rescan_remove();
}
static DEFINE_SPINLOCK(eeh_eventlist_lock);
static struct semaphore eeh_eventlist_sem;
-LIST_HEAD(eeh_eventlist);
+static LIST_HEAD(eeh_eventlist);
/**
* eeh_event_handler - Dispatch EEH events.
} else {
if (edev->pe_config_addr &&
(edev->pe_config_addr == pe->addr))
- return pe;
+ return pe;
}
/* Try BDF address */
#include <asm/hw_irq.h>
#include <asm/context_tracking.h>
#include <asm/tm.h>
+#include <asm/ppc-opcode.h>
/*
* System calls.
ldarx r6,0,r1
END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
+BEGIN_FTR_SECTION
+/*
+ * A cp_abort (copy paste abort) here ensures that when context switching, a
+ * copy from one process can't leak into the paste of another.
+ */
+ PPC_CP_ABORT
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
#ifdef CONFIG_PPC_BOOK3S
/* Cancel all explict user streams as they will have no use after context
* switch and will stop the HW from creating streams itself
std r6,PACACURRENT(r13) /* Set new 'current' */
ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_STD_MMU_64
+BEGIN_MMU_FTR_SECTION
+ b 2f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
BEGIN_FTR_SECTION
clrrdi r6,r8,28 /* get its ESID */
clrrdi r9,r1,28 /* get current sp ESID */
slbmte r7,r0
isync
2:
-#endif /* !CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_STD_MMU_64 */
CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
#endif /* CONFIG_PPC_P7_NAP */
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
- b machine_check_pSeries_early
+ b machine_check_powernv_early
FTR_SECTION_ELSE
b machine_check_pSeries_0
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
std r3,PACA_EXSLB+EX_R3(r13)
mfspr r3,SPRN_DAR
-#ifdef __DISABLED__
- /* Keep that around for when we re-implement dynamic VSIDs */
- cmpdi r3,0
- bge slb_miss_user_pseries
-#endif /* __DISABLED__ */
mfspr r12,SPRN_SRR1
#ifndef CONFIG_RELOCATABLE
b slb_miss_realmode
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480)
std r3,PACA_EXSLB+EX_R3(r13)
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
-#ifdef __DISABLED__
- /* Keep that around for when we re-implement dynamic VSIDs */
- cmpdi r3,0
- bge slb_miss_user_pseries
-#endif /* __DISABLED__ */
mfspr r12,SPRN_SRR1
#ifndef CONFIG_RELOCATABLE
b slb_miss_realmode
.align 7
/* moved from 0x200 */
-machine_check_pSeries_early:
+machine_check_powernv_early:
BEGIN_FTR_SECTION
EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
/*
#endif /* CONFIG_PPC_PSERIES */
-#ifdef __DISABLED__
-/*
- * This is used for when the SLB miss handler has to go virtual,
- * which doesn't happen for now anymore but will once we re-implement
- * dynamic VSIDs for shared page tables
- */
-slb_miss_user_pseries:
- std r10,PACA_EXGEN+EX_R10(r13)
- std r11,PACA_EXGEN+EX_R11(r13)
- std r12,PACA_EXGEN+EX_R12(r13)
- GET_SCRATCH0(r10)
- ld r11,PACA_EXSLB+EX_R9(r13)
- ld r12,PACA_EXSLB+EX_R3(r13)
- std r10,PACA_EXGEN+EX_R13(r13)
- std r11,PACA_EXGEN+EX_R9(r13)
- std r12,PACA_EXGEN+EX_R3(r13)
- clrrdi r12,r13,32
- mfmsr r10
- mfspr r11,SRR0 /* save SRR0 */
- ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
- ori r10,r10,MSR_IR|MSR_DR|MSR_RI
- mtspr SRR0,r12
- mfspr r12,SRR1 /* and SRR1 */
- mtspr SRR1,r10
- rfid
- b . /* prevent spec. execution */
-#endif /* __DISABLED__ */
-
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
kvmppc_skip_interrupt:
/*
#endif
/*
- * Code from here down to __end_handlers is invoked from the
- * exception prologs above. Because the prologs assemble the
- * addresses of these handlers using the LOAD_HANDLER macro,
- * which uses an ori instruction, these handlers must be in
- * the first 64k of the kernel image.
+ * Ensure that any handlers that get invoked from the exception prologs
+ * above are below the first 64KB (0x10000) of the kernel image because
+ * the prologs assemble the addresses of these handlers using the
+ * LOAD_HANDLER macro, which uses an ori instruction.
*/
/*** Common interrupt handlers ***/
#endif
STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
- /* Other future vectors */
- .align 7
- .globl __end_interrupts
-__end_interrupts:
-
.align 7
system_call_entry:
b system_call_common
ld r3,PACA_EXGEN+EX_DAR(r13)
lwz r4,PACA_EXGEN+EX_DSISR(r13)
li r5,0x300
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+BEGIN_MMU_FTR_SECTION
b do_hash_page /* Try to handle as hpte fault */
+MMU_FTR_SECTION_ELSE
+ b handle_page_fault
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
.align 7
.globl h_data_storage_common
ld r3,_NIP(r1)
andis. r4,r12,0x5820
li r5,0x400
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+BEGIN_MMU_FTR_SECTION
b do_hash_page /* Try to handle as hpte fault */
-
- STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
-
-/*
- * Here is the common SLB miss user that is used when going to virtual
- * mode for SLB misses, that is currently not used
- */
-#ifdef __DISABLED__
- .align 7
- .globl slb_miss_user_common
-slb_miss_user_common:
- mflr r10
- std r3,PACA_EXGEN+EX_DAR(r13)
- stw r9,PACA_EXGEN+EX_CCR(r13)
- std r10,PACA_EXGEN+EX_LR(r13)
- std r11,PACA_EXGEN+EX_SRR0(r13)
- bl slb_allocate_user
-
- ld r10,PACA_EXGEN+EX_LR(r13)
- ld r3,PACA_EXGEN+EX_R3(r13)
- lwz r9,PACA_EXGEN+EX_CCR(r13)
- ld r11,PACA_EXGEN+EX_SRR0(r13)
- mtlr r10
- beq- slb_miss_fault
-
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- unrecov_user_slb
- mfmsr r10
-
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
-.machine pop
-
- clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
- mtmsrd r10,1
-
- mtspr SRR0,r11
- mtspr SRR1,r12
-
- ld r9,PACA_EXGEN+EX_R9(r13)
- ld r10,PACA_EXGEN+EX_R10(r13)
- ld r11,PACA_EXGEN+EX_R11(r13)
- ld r12,PACA_EXGEN+EX_R12(r13)
- ld r13,PACA_EXGEN+EX_R13(r13)
- rfid
- b .
-
-slb_miss_fault:
- EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
- ld r4,PACA_EXGEN+EX_DAR(r13)
- li r5,0
- std r4,_DAR(r1)
- std r5,_DSISR(r1)
+MMU_FTR_SECTION_ELSE
b handle_page_fault
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
-unrecov_user_slb:
- EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
- RECONCILE_IRQ_STATE(r10, r11)
- bl save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
-
-#endif /* __DISABLED__ */
-
+ STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
/*
* Machine check is different because we use a different
STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
- .align 7
- .globl __end_handlers
-__end_handlers:
-
/* Equivalents to the above handlers for relocation-on interrupt vectors */
STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
+ /*
+ * The __end_interrupts marker must be past the out-of-line (OOL)
+ * handlers, so that they are copied to real address 0x100 when running
+ * a relocatable kernel. This ensures they can be reached from the short
+ * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch
+ * directly, without using LOAD_HANDLER().
+ */
+ .align 7
+ .globl __end_interrupts
+__end_interrupts:
+
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/*
* Data area reserved for FWNMI option.
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+#ifdef CONFIG_PPC_STD_MMU_64
+BEGIN_MMU_FTR_SECTION
bl slb_allocate_realmode
-
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX)
+#endif
/* All done -- return from exception. */
ld r10,PACA_EXSLB+EX_LR(r13)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10
-
+BEGIN_MMU_FTR_SECTION
+ b 2f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
beq- 2f
*/
.align 7
do_hash_page:
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
-
+#ifdef CONFIG_PPC_STD_MMU_64
andis. r0,r4,0xa410 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
andis. r0,r4,DSISR_DABRMATCH@h
/* Error */
blt- 13f
+#endif /* CONFIG_PPC_STD_MMU_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
12: b ret_from_except_lite
+#ifdef CONFIG_PPC_STD_MMU_64
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
ld r4,_DAR(r1)
bl low_hash_fault
b ret_from_except
+#endif
/*
* We come here as a result of a DSI at a point where we don't want
return sys_call_table[nr*2];
}
#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */
+
+#if defined(CONFIG_PPC64) && (!defined(_CALL_ELF) || _CALL_ELF != 2)
+char *arch_ftrace_match_adjust(char *str, const char *search)
+{
+ if (str[0] == '.' && search[0] != '.')
+ return str + 1;
+ else
+ return str;
+}
+#endif /* defined(CONFIG_PPC64) && (!defined(_CALL_ELF) || _CALL_ELF != 2) */
* This stuff goes at the beginning of the bss, which is page-aligned.
*/
.section ".bss"
+/*
+ * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K.
+ * We will need to find a better way to fix this
+ */
+ .align 16
- .align PAGE_SHIFT
+ .globl swapper_pg_dir
+swapper_pg_dir:
+ .space PGD_TABLE_SIZE
.globl empty_zero_page
empty_zero_page:
.space PAGE_SIZE
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
return len+1;
}
-struct device_attribute ibmebus_bus_device_attrs[] = {
+static struct device_attribute ibmebus_bus_device_attrs[] = {
__ATTR_RO(devspec),
__ATTR_RO(name),
__ATTR_RO(modalias),
size = 0x10000;
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- size, _PAGE_NO_CACHE|_PAGE_GUARDED);
+ size, pgprot_val(pgprot_noncached(__pgprot(0))));
return;
inval_range:
printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
"mapping 64k\n");
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- 0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED);
+ 0x10000, pgprot_val(pgprot_noncached(__pgprot(0))));
}
static void __init export_crashk_values(struct device_node *node)
{
- struct property *prop;
-
/* There might be existing crash kernel properties, but we can't
* be sure what's in them, so remove them. */
- prop = of_find_property(node, "linux,crashkernel-base", NULL);
- if (prop)
- of_remove_property(node, prop);
-
- prop = of_find_property(node, "linux,crashkernel-size", NULL);
- if (prop)
- of_remove_property(node, prop);
+ of_remove_property(node, of_find_property(node,
+ "linux,crashkernel-base", NULL));
+ of_remove_property(node, of_find_property(node,
+ "linux,crashkernel-size", NULL));
if (crashk_res.start != 0) {
crashk_base = cpu_to_be_ulong(crashk_res.start),
static int __init kexec_setup(void)
{
struct device_node *node;
- struct property *prop;
node = of_find_node_by_path("/chosen");
if (!node)
return -ENOENT;
/* remove any stale properties so ours can be found */
- prop = of_find_property(node, kernel_end_prop.name, NULL);
- if (prop)
- of_remove_property(node, prop);
+ of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
/* information needed by userspace when using default_machine_kexec */
kernel_end = cpu_to_be_ulong(__pa(_end));
* end of the blocked region (begin >= high). Use the
* boolean identity !(a || b) === (!a && !b).
*/
+#ifdef CONFIG_PPC_STD_MMU_64
if (htab_address) {
low = __pa(htab_address);
high = low + htab_size_bytes;
return -ETXTBSY;
}
}
+#endif /* CONFIG_PPC_STD_MMU_64 */
/* We also should not overwrite the tce tables */
for_each_node_by_type(node, "pci") {
/* NOTREACHED */
}
-#ifndef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_STD_MMU_64
/* Values we need to export to the second kernel via the device tree. */
static unsigned long htab_base;
static unsigned long htab_size;
static int __init export_htab_values(void)
{
struct device_node *node;
- struct property *prop;
/* On machines with no htab htab_address is NULL */
if (!htab_address)
return -ENODEV;
/* remove any stale propertys so ours can be found */
- prop = of_find_property(node, htab_base_prop.name, NULL);
- if (prop)
- of_remove_property(node, prop);
- prop = of_find_property(node, htab_size_prop.name, NULL);
- if (prop)
- of_remove_property(node, prop);
+ of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
+ of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
htab_base = cpu_to_be64(__pa(htab_address));
of_add_property(node, &htab_base_prop);
return 0;
}
late_initcall(export_htab_values);
-#endif /* !CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_STD_MMU_64 */
static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
static void machine_check_process_queued_event(struct irq_work *work);
-struct irq_work mce_event_process_work = {
+static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
void __flush_tlb_power9(unsigned int action)
{
+ if (radix_enabled())
+ flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
+
flush_tlb_206(POWER9_TLB_SETS_HASH, action);
}
/* flush SLBs and reload */
+#ifdef CONFIG_PPC_STD_MMU_64
static void flush_and_reload_slb(void)
{
struct slb_shadow *slb;
asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
}
}
+#endif
static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
{
* reset the error bits whenever we handle them so that at the end
* we can check whether we handled all of them or not.
* */
+#ifdef CONFIG_PPC_STD_MMU_64
if (dsisr & slb_error_bits) {
flush_and_reload_slb();
/* reset error bits */
/* reset error bits */
dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
}
+#endif
/* Any other errors we don't understand? */
if (dsisr & 0xffffffffUL)
handled = 0;
switch (P7_SRR1_MC_IFETCH(srr1)) {
case 0:
break;
+#ifdef CONFIG_PPC_STD_MMU_64
case P7_SRR1_MC_IFETCH_SLB_PARITY:
case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
/* flush and reload SLBs for SLB errors. */
handled = 1;
}
break;
+#endif
default:
break;
}
handled = mce_handle_common_ierror(srr1);
+#ifdef CONFIG_PPC_STD_MMU_64
if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
flush_and_reload_slb();
handled = 1;
}
+#endif
return handled;
}
handled = mce_handle_common_ierror(srr1);
+#ifdef CONFIG_PPC_STD_MMU_64
if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
flush_and_reload_slb();
handled = 1;
}
+#endif
return handled;
}
mr r4,r10
blr
-_GLOBAL(abs)
- srawi r4,r3,31
- xor r3,r3,r4
- sub r3,r3,r4
- blr
-
#ifdef CONFIG_SMP
_GLOBAL(start_secondary_resume)
/* Reset stack */
* parsing code.
*/
-#include <linux/module.h>
-
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/fs.h>
return rc;
}
-
-static void __exit nvram_cleanup(void)
-{
- misc_deregister( &nvram_dev );
-}
-
-module_init(nvram_init);
-module_exit(nvram_cleanup);
-MODULE_LICENSE("GPL");
+device_initcall(nvram_init);
#include <asm/firmware.h>
#include <asm/eeh.h>
+static struct pci_bus *find_bus_among_children(struct pci_bus *bus,
+ struct device_node *dn)
+{
+ struct pci_bus *child = NULL;
+ struct pci_bus *tmp;
+
+ if (pci_bus_to_OF_node(bus) == dn)
+ return bus;
+
+ list_for_each_entry(tmp, &bus->children, node) {
+ child = find_bus_among_children(tmp, dn);
+ if (child)
+ break;
+ }
+
+ return child;
+}
+
+struct pci_bus *pci_find_bus_by_node(struct device_node *dn)
+{
+ struct pci_dn *pdn = PCI_DN(dn);
+
+ if (!pdn || !pdn->phb || !pdn->phb->bus)
+ return NULL;
+
+ return find_bus_among_children(pdn->phb->bus, dn);
+}
+EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
+
/**
* pcibios_release_device - release PCI device
* @dev: PCI device
}
/**
- * pcibios_remove_pci_devices - remove all devices under this bus
+ * pci_hp_remove_devices - remove all devices under this bus
* @bus: the indicated PCI bus
*
* Remove all of the PCI devices under this bus both from the
* linux pci device tree, and from the powerpc EEH address cache.
*/
-void pcibios_remove_pci_devices(struct pci_bus *bus)
+void pci_hp_remove_devices(struct pci_bus *bus)
{
struct pci_dev *dev, *tmp;
struct pci_bus *child_bus;
/* First go down child busses */
list_for_each_entry(child_bus, &bus->children, node)
- pcibios_remove_pci_devices(child_bus);
+ pci_hp_remove_devices(child_bus);
pr_debug("PCI: Removing devices on bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
pci_stop_and_remove_bus_device(dev);
}
}
-
-EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
+EXPORT_SYMBOL_GPL(pci_hp_remove_devices);
/**
- * pcibios_add_pci_devices - adds new pci devices to bus
+ * pci_hp_add_devices - adds new pci devices to bus
* @bus: the indicated PCI bus
*
* This routine will find and fixup new pci devices under
* is how this routine differs from other, similar pcibios
* routines.)
*/
-void pcibios_add_pci_devices(struct pci_bus * bus)
+void pci_hp_add_devices(struct pci_bus *bus)
{
int slotno, mode, pass, max;
struct pci_dev *dev;
if (mode == PCI_PROBE_DEVTREE) {
/* use ofdt-based probe */
of_rescan_bus(dn, bus);
- } else if (mode == PCI_PROBE_NORMAL) {
+ } else if (mode == PCI_PROBE_NORMAL &&
+ dn->child && PCI_DN(dn->child)) {
/*
* Use legacy probe. In the partial hotplug case, we
* probably have grandchildren devices unplugged. So
}
pcibios_finish_adding_to_bus(bus);
}
-EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
+EXPORT_SYMBOL_GPL(pci_hp_add_devices);
* ISA drivers use hard coded offsets. If no ISA bus exists nothing
* is mapped on the first 64K of IO space
*/
-unsigned long pci_io_base = ISA_IO_BASE;
+unsigned long pci_io_base;
EXPORT_SYMBOL(pci_io_base);
static int __init pcibios_init(void)
printk(KERN_INFO "PCI: Probing PCI hardware\n");
+ pci_io_base = ISA_IO_BASE;
/* For now, override phys_mem_access_prot. If we need it,g
* later, we may move that initialization to each ppc_md
*/
/* Establish the mapping */
if (__ioremap_at(phys_page, area->addr, size_page,
- _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)
+ pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)
return -ENOMEM;
/* Fixup hose IO resource */
#endif /* CONFIG_PCI_IOV */
}
-/*
- * Traverse_func that inits the PCI fields of the device node.
- * NOTE: this *must* be done before read/write config to the device.
- */
-void *update_dn_pci_info(struct device_node *dn, void *data)
+struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
+ struct device_node *dn)
{
- struct pci_controller *phb = data;
const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL);
const __be32 *regs;
struct device_node *parent;
return NULL;
dn->data = pdn;
pdn->node = dn;
- pdn->phb = phb;
+ pdn->phb = hose;
#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
#endif
if (pdn->parent)
list_add_tail(&pdn->list, &pdn->parent->child_list);
- return NULL;
+ return pdn;
}
+EXPORT_SYMBOL_GPL(pci_add_device_node_info);
+
+void pci_remove_device_node_info(struct device_node *dn)
+{
+ struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+ if (edev)
+ edev->pdn = NULL;
+#endif
+
+ if (!pdn)
+ return;
+
+ WARN_ON(!list_empty(&pdn->child_list));
+ list_del(&pdn->list);
+ if (pdn->parent)
+ of_node_put(pdn->parent->node);
+
+ dn->data = NULL;
+ kfree(pdn);
+}
+EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
/*
* Traverse a device tree stopping each PCI device in the tree.
* one of these nodes we also assume its siblings are non-pci for
* performance.
*/
-void *traverse_pci_devices(struct device_node *start, traverse_func pre,
- void *data)
+void *pci_traverse_device_nodes(struct device_node *start,
+ void *(*fn)(struct device_node *, void *),
+ void *data)
{
struct device_node *dn, *nextdn;
void *ret;
if (classp)
class = of_read_number(classp, 1);
- if (pre && ((ret = pre(dn, data)) != NULL))
- return ret;
+ if (fn) {
+ ret = fn(dn, data);
+ if (ret)
+ return ret;
+ }
/* If we are a PCI bridge, go down */
if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
}
return NULL;
}
+EXPORT_SYMBOL_GPL(pci_traverse_device_nodes);
static struct pci_dn *pci_dn_next_one(struct pci_dn *root,
struct pci_dn *pdn)
return NULL;
}
+static void *add_pdn(struct device_node *dn, void *data)
+{
+ struct pci_controller *hose = data;
+ struct pci_dn *pdn;
+
+ pdn = pci_add_device_node_info(hose, dn);
+ if (!pdn)
+ return ERR_PTR(-ENOMEM);
+
+ return NULL;
+}
+
/**
* pci_devs_phb_init_dynamic - setup pci devices under this PHB
* phb: pci-to-host bridge (top-level bridge connecting to cpu)
struct pci_dn *pdn;
/* PHB nodes themselves must not match */
- update_dn_pci_info(dn, phb);
- pdn = dn->data;
+ pdn = pci_add_device_node_info(phb, dn);
if (pdn) {
pdn->devfn = pdn->busno = -1;
pdn->vendor_id = pdn->device_id = pdn->class_code = 0;
}
/* Update dn->phb ptrs for new phb and children devices */
- traverse_pci_devices(dn, update_dn_pci_info, phb);
+ pci_traverse_device_nodes(dn, add_pdn, phb);
}
/**
#include <linux/random.h>
#include <linux/hw_breakpoint.h>
#include <linux/uaccess.h>
+#include <linux/elf-randomize.h>
#include <asm/pgtable.h>
#include <asm/io.h>
#include <asm/firmware.h>
#endif
#include <asm/code-patching.h>
+#include <asm/exec.h>
#include <asm/livepatch.h>
#include <linux/kprobes.h>
}
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_STD_MMU_64
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
__flush_tlb_pending(batch);
batch->active = 0;
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_STD_MMU_64 */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
switch_booke_debug_regs(&new->thread.debug);
last = _switch(old_thread, new_thread);
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_STD_MMU_64
if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (current_thread_info()->task->thread.regs)
restore_math(current_thread_info()->task->thread.regs);
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_STD_MMU_64 */
return last;
}
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ if (radix_enabled())
+ return;
+
if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
<< SLB_VSID_SHIFT_1T;
* the heap, we can put it above 1TB so it is backed by a 1TB
* segment. Otherwise the heap will be in the bottom 1TB
* which always uses 256MB segments and this may result in a
- * performance penalty.
+ * performance penalty. We don't need to worry about radix. For
+ * radix, mmu_highuser_ssize remains unchanged from 256MB.
*/
if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
+#include <linux/cpu.h>
#include <asm/prom.h>
#include <asm/rtas.h>
*/
{CPU_FTR_TM_COMP, 0, 0,
PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0},
+ {0, MMU_FTR_RADIX, 0, 0, 40, 0, 0},
};
static void __init scan_features(unsigned long node, const unsigned char *ftrs,
}
static void rtas_event_scan(struct work_struct *w);
-DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
+static DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
/*
* Delay should be at least one second since some machines have problems if
machine_shutdown();
if (ppc_md.restart)
ppc_md.restart(cmd);
-#ifdef CONFIG_SMP
smp_send_stop();
-#endif
printk(KERN_EMERG "System Halted, OK to turn off power\n");
local_irq_disable();
while (1) ;
machine_shutdown();
if (pm_power_off)
pm_power_off();
-#ifdef CONFIG_SMP
smp_send_stop();
-#endif
printk(KERN_EMERG "System Halted, OK to turn off power\n");
local_irq_disable();
while (1) ;
machine_shutdown();
if (ppc_md.halt)
ppc_md.halt();
-#ifdef CONFIG_SMP
smp_send_stop();
-#endif
printk(KERN_EMERG "System Halted, OK to turn off power\n");
local_irq_disable();
while (1) ;
void restore_processor_state(void)
{
#ifdef CONFIG_PPC32
- switch_mmu_context(current->active_mm, current->active_mm);
+ switch_mmu_context(current->active_mm, current->active_mm, NULL);
#endif
}
#include <linux/delay.h>
#include <linux/irq_work.h>
#include <linux/clk-provider.h>
+#include <linux/suspend.h>
#include <asm/trace.h>
#include <asm/io.h>
* @curr: bytes currently allocated
* @high: high water mark for IO data usage
*/
-struct vio_cmo {
+static struct vio_cmo {
spinlock_t lock;
struct delayed_work balance_q;
struct list_head device_list;
return dma_iommu_ops.get_required_mask(dev);
}
-struct dma_map_ops vio_dma_mapping_ops = {
+static struct dma_map_ops vio_dma_mapping_ops = {
.alloc = vio_dma_iommu_alloc_coherent,
.free = vio_dma_iommu_free_coherent,
.mmap = dma_direct_mmap_coherent,
struct revmap_entry *rev;
struct page *page, *pages[1];
long index, ret, npages;
- unsigned long is_io;
+ bool is_ci;
unsigned int writing, write_ok;
struct vm_area_struct *vma;
unsigned long rcbits;
smp_rmb();
ret = -EFAULT;
- is_io = 0;
+ is_ci = false;
pfn = 0;
page = NULL;
pte_size = PAGE_SIZE;
pfn = vma->vm_pgoff +
((hva - vma->vm_start) >> PAGE_SHIFT);
pte_size = psize;
- is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+ is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
write_ok = vma->vm_flags & VM_WRITE;
}
up_read(¤t->mm->mmap_sem);
goto out_put;
/* Check WIMG vs. the actual page we're accessing */
- if (!hpte_cache_flags_ok(r, is_io)) {
- if (is_io)
+ if (!hpte_cache_flags_ok(r, is_ci)) {
+ if (is_ci)
goto out_put;
-
/*
* Allow guest to map emulated device memory as
* uncacheable, but actually make it cacheable.
if (!cpu_has_feature(CPU_FTR_HVMODE) ||
!cpu_has_feature(CPU_FTR_ARCH_206))
return -EIO;
+ /*
+ * Disable KVM for Power9, untill the required bits merged.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return -EIO;
+
return 0;
}
unsigned long g_ptel;
struct kvm_memory_slot *memslot;
unsigned hpage_shift;
- unsigned long is_io;
+ bool is_ci;
unsigned long *rmap;
pte_t *ptep;
unsigned int writing;
gfn = gpa >> PAGE_SHIFT;
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
pa = 0;
- is_io = ~0ul;
+ is_ci = false;
rmap = NULL;
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
/* Emulated MMIO - mark this with key=31 */
if (writing && !pte_write(pte))
/* make the actual HPTE be read-only */
ptel = hpte_make_readonly(ptel);
- is_io = hpte_cache_bits(pte_val(pte));
+ is_ci = pte_ci(pte);
pa = pte_pfn(pte) << PAGE_SHIFT;
pa |= hva & (host_pte_size - 1);
pa |= gpa & ~PAGE_MASK;
else
pteh |= HPTE_V_ABSENT;
- /* Check WIMG */
- if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
- if (is_io)
+ /*If we had host pte mapping then Check WIMG */
+ if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
+ if (is_ci)
return H_PARAMETER;
/*
* Allow guest to map emulated device memory as
static int kvmppc_core_check_processor_compat_pr(void)
{
- /* we are always compatible */
+ /*
+ * Disable KVM for Power9 untill the required bits merged.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return -EIO;
return 0;
}
bdnz 40b
65: blr
-_GLOBAL(generic_memcpy)
+generic_memcpy:
srwi. r7,r5,3
addi r6,r3,-4
addi r4,r4,-4
}
}
#endif
+ break; /* illegal instruction */
case 31:
switch ((instr >> 1) & 0x3ff) {
case 4:
__get_user_asmx(val, op.ea, err, "lwarx");
break;
+#ifdef __powerpc64__
case 8:
__get_user_asmx(val, op.ea, err, "ldarx");
break;
+#endif
default:
return 0;
}
case 4:
__put_user_asmx(op.val, op.ea, err, "stwcx.", cr);
break;
+#ifdef __powerpc64__
case 8:
__put_user_asmx(op.val, op.ea, err, "stdcx.", cr);
break;
+#endif
default:
return 0;
}
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
+
+/*
+ * Sparse (as at v0.5.0) gets very, very confused by this file.
+ * Make it a bit simpler for it.
+ */
+#if !defined(__CHECKER__)
#include <altivec.h>
+#else
+#define vec_xor(a, b) a ^ b
+#define vector __attribute__((vector_size(16)))
+#endif
#include <linux/preempt.h>
#include <linux/export.h>
tlb_nohash_low.o
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
-obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o slb_low.o slb.o $(hash64-y)
-obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o
-obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(CONFIG_WORD_SIZE).o \
- mmu_context_hash$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
+obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
+obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
+obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
+obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(CONFIG_WORD_SIZE).o
ifeq ($(CONFIG_PPC_STD_MMU_64),y)
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
obj-y += hugetlbpage.o
ifeq ($(CONFIG_HUGETLB_PAGE),y)
obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
+obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
endif
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
TLBCAM[index].MAS7 = (u64)phys >> 32;
/* Below is unlikely -- only for large user pages or similar */
- if (pte_user(flags)) {
+ if (pte_user(__pte(flags))) {
TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
}
old_pte = pte_val(pte);
/* If PTE busy, retry the access */
- if (unlikely(old_pte & _PAGE_BUSY))
+ if (unlikely(old_pte & H_PAGE_BUSY))
return 0;
/* If PTE permissions don't match, take page fault */
- if (unlikely(access & ~old_pte))
+ if (unlikely(!check_pte_access(access, old_pte)))
return 1;
/*
* Try to lock the PTE, add ACCESSED and DIRTY if it was
* a write access. Since this is 4K insert of 64K page size
- * also add _PAGE_COMBO
+ * also add H_PAGE_COMBO
*/
- new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
- if (access & _PAGE_RW)
+ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_WRITE)
new_pte |= _PAGE_DIRTY;
- } while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
- old_pte, new_pte));
+ } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
/*
* PP bits. _PAGE_USER is already PP bit 0x2, so we only
* need to add in 0x1 if it's a read-only user page
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
vpn = hpt_vpn(ea, vsid, ssize);
- if (unlikely(old_pte & _PAGE_HASHPTE)) {
+ if (unlikely(old_pte & H_PAGE_HASHPTE)) {
/*
* There MIGHT be an HPTE for this pte
*/
hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & _PAGE_F_SECOND)
+ if (old_pte & H_PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
+ slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
MMU_PAGE_4K, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
- if (likely(!(old_pte & _PAGE_HASHPTE))) {
+ if (likely(!(old_pte & H_PAGE_HASHPTE))) {
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
hash = hpt_hash(vpn, shift, ssize);
MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
return -1;
}
- new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
- new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+ new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
+ (H_PAGE_F_SECOND | H_PAGE_F_GIX);
}
- *ptep = __pte(new_pte & ~_PAGE_BUSY);
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
unsigned long g_idx;
unsigned long ptev = pte_val(rpte.pte);
- g_idx = (ptev & _PAGE_COMBO_VALID) >> _PAGE_F_GIX_SHIFT;
+ g_idx = (ptev & H_PAGE_COMBO_VALID) >> H_PAGE_F_GIX_SHIFT;
index = index >> 2;
if (g_idx & (0x1 << index))
return true;
{
unsigned long g_idx;
- if (!(ptev & _PAGE_COMBO))
+ if (!(ptev & H_PAGE_COMBO))
return ptev;
index = index >> 2;
g_idx = 0x1 << index;
- return ptev | (g_idx << _PAGE_F_GIX_SHIFT);
+ return ptev | (g_idx << H_PAGE_F_GIX_SHIFT);
}
int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
old_pte = pte_val(pte);
/* If PTE busy, retry the access */
- if (unlikely(old_pte & _PAGE_BUSY))
+ if (unlikely(old_pte & H_PAGE_BUSY))
return 0;
/* If PTE permissions don't match, take page fault */
- if (unlikely(access & ~old_pte))
+ if (unlikely(!check_pte_access(access, old_pte)))
return 1;
/*
* Try to lock the PTE, add ACCESSED and DIRTY if it was
* a write access. Since this is 4K insert of 64K page size
- * also add _PAGE_COMBO
+ * also add H_PAGE_COMBO
*/
- new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
- if (access & _PAGE_RW)
+ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED | H_PAGE_COMBO;
+ if (access & _PAGE_WRITE)
new_pte |= _PAGE_DIRTY;
- } while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
- old_pte, new_pte));
+ } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
/*
* Handle the subpage protection bits
*/
/*
*None of the sub 4k page is hashed
*/
- if (!(old_pte & _PAGE_HASHPTE))
+ if (!(old_pte & H_PAGE_HASHPTE))
goto htab_insert_hpte;
/*
* Check if the pte was already inserted into the hash table
* as a 64k HW page, and invalidate the 64k HPTE if so.
*/
- if (!(old_pte & _PAGE_COMBO)) {
+ if (!(old_pte & H_PAGE_COMBO)) {
flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
/*
* clear the old slot details from the old and new pte.
* On hash insert failure we use old pte value and we don't
* want slot information there if we have a insert failure.
*/
- old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
- new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
+ old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
+ new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
goto htab_insert_hpte;
}
/*
if (ret == -1)
goto htab_insert_hpte;
- *ptep = __pte(new_pte & ~_PAGE_BUSY);
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
htab_insert_hpte:
/*
- * handle _PAGE_4K_PFN case
+ * handle H_PAGE_4K_PFN case
*/
- if (old_pte & _PAGE_4K_PFN) {
+ if (old_pte & H_PAGE_4K_PFN) {
/*
* All the sub 4k page have the same
* physical address.
}
/*
* Insert slot number & secondary bit in PTE second half,
- * clear _PAGE_BUSY and set appropriate HPTE slot bit
- * Since we have _PAGE_BUSY set on ptep, we can be sure
+ * clear H_PAGE_BUSY and set appropriate HPTE slot bit
+ * Since we have H_PAGE_BUSY set on ptep, we can be sure
* nobody is undating hidx.
*/
hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
rpte.hidx &= ~(0xfUL << (subpg_index << 2));
*hidxp = rpte.hidx | (slot << (subpg_index << 2));
new_pte = mark_subptegroup_valid(new_pte, subpg_index);
- new_pte |= _PAGE_HASHPTE;
+ new_pte |= H_PAGE_HASHPTE;
/*
* check __real_pte for details on matching smp_rmb()
*/
smp_wmb();
- *ptep = __pte(new_pte & ~_PAGE_BUSY);
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
unsigned long vsid, pte_t *ptep, unsigned long trap,
unsigned long flags, int ssize)
{
-
unsigned long hpte_group;
unsigned long rflags, pa;
unsigned long old_pte, new_pte;
old_pte = pte_val(pte);
/* If PTE busy, retry the access */
- if (unlikely(old_pte & _PAGE_BUSY))
+ if (unlikely(old_pte & H_PAGE_BUSY))
return 0;
/* If PTE permissions don't match, take page fault */
- if (unlikely(access & ~old_pte))
+ if (unlikely(!check_pte_access(access, old_pte)))
return 1;
/*
* Check if PTE has the cache-inhibit bit set
* If so, bail out and refault as a 4k page
*/
if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) &&
- unlikely(old_pte & _PAGE_NO_CACHE))
+ unlikely(pte_ci(pte)))
return 0;
/*
* Try to lock the PTE, add ACCESSED and DIRTY if it was
* a write access.
*/
- new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
- if (access & _PAGE_RW)
+ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_WRITE)
new_pte |= _PAGE_DIRTY;
- } while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
- old_pte, new_pte));
+ } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
vpn = hpt_vpn(ea, vsid, ssize);
- if (unlikely(old_pte & _PAGE_HASHPTE)) {
+ if (unlikely(old_pte & H_PAGE_HASHPTE)) {
/*
* There MIGHT be an HPTE for this pte
*/
hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & _PAGE_F_SECOND)
+ if (old_pte & H_PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
+ slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
MMU_PAGE_64K, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
- if (likely(!(old_pte & _PAGE_HASHPTE))) {
+ if (likely(!(old_pte & H_PAGE_HASHPTE))) {
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
hash = hpt_hash(vpn, shift, ssize);
MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
return -1;
}
- new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
- new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+ new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
+ (H_PAGE_F_SECOND | H_PAGE_F_GIX);
}
- *ptep = __pte(new_pte & ~_PAGE_BUSY);
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
return -1;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
local_irq_restore(flags);
}
+static int native_update_partition_table(u64 patb1)
+{
+ partition_tb->patb1 = cpu_to_be64(patb1);
+ return 0;
+}
+
void __init hpte_init_native(void)
{
ppc_md.hpte_invalidate = native_hpte_invalidate;
ppc_md.hpte_clear_all = native_hpte_clear;
ppc_md.flush_hash_range = native_flush_hash_range;
ppc_md.hugepage_invalidate = native_hugepage_invalidate;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ ppc_md.update_partition_table = native_update_partition_table;
}
if ((pteflags & _PAGE_EXEC) == 0)
rflags |= HPTE_R_N;
/*
- * PP bits:
+ * PPP bits:
* Linux uses slb key 0 for kernel and 1 for user.
- * kernel areas are mapped with PP=00
- * and there is no kernel RO (_PAGE_KERNEL_RO).
- * User area is mapped with PP=0x2 for read/write
- * or PP=0x3 for read-only (including writeable but clean pages).
+ * kernel RW areas are mapped with PPP=0b000
+ * User area is mapped with PPP=0b010 for read/write
+ * or PPP=0b011 for read-only (including writeable but clean pages).
*/
- if (pteflags & _PAGE_USER) {
- rflags |= 0x2;
- if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY)))
+ if (pteflags & _PAGE_PRIVILEGED) {
+ /*
+ * Kernel read only mapped with ppp bits 0b110
+ */
+ if (!(pteflags & _PAGE_WRITE))
+ rflags |= (HPTE_R_PP0 | 0x2);
+ } else {
+ if (pteflags & _PAGE_RWX)
+ rflags |= 0x2;
+ if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
rflags |= 0x1;
}
/*
/*
* Add in WIG bits
*/
- if (pteflags & _PAGE_WRITETHRU)
- rflags |= HPTE_R_W;
- if (pteflags & _PAGE_NO_CACHE)
+
+ if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
rflags |= HPTE_R_I;
- if (pteflags & _PAGE_GUARDED)
- rflags |= HPTE_R_G;
+ if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT)
+ rflags |= (HPTE_R_I | HPTE_R_G);
+ if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+ rflags |= (HPTE_R_I | HPTE_R_W);
return rflags;
}
}
#endif /* CONFIG_MEMORY_HOTPLUG */
+static void __init hash_init_partition_table(phys_addr_t hash_table,
+ unsigned long pteg_count)
+{
+ unsigned long ps_field;
+ unsigned long htab_size;
+ unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+
+ /*
+ * slb llp encoding for the page size used in VPM real mode.
+ * We can ignore that for lpid 0
+ */
+ ps_field = 0;
+ htab_size = __ilog2(pteg_count) - 11;
+
+ BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
+ partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
+ MEMBLOCK_ALLOC_ANYWHERE));
+
+ /* Initialize the Partition Table with no entries */
+ memset((void *)partition_tb, 0, patb_size);
+ partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size);
+ /*
+ * FIXME!! This should be done via update_partition table
+ * For now UPRT is 0 for us.
+ */
+ partition_tb->patb1 = 0;
+ DBG("Partition table %p\n", partition_tb);
+ /*
+ * update partition table control register,
+ * 64 K size.
+ */
+ mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+
+}
+
static void __init htab_initialize(void)
{
unsigned long table;
/* Initialize the HPT with no entries */
memset((void *)table, 0, htab_size_bytes);
- /* Set SDR1 */
- mtspr(SPRN_SDR1, _SDR1);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ /* Set SDR1 */
+ mtspr(SPRN_SDR1, _SDR1);
+ else
+ hash_init_partition_table(table, pteg_count);
}
prot = pgprot_val(PAGE_KERNEL);
#undef KB
#undef MB
-void __init early_init_mmu(void)
+void __init hash__early_init_mmu(void)
{
+ /*
+ * initialize page table size
+ */
+ __pte_frag_nr = H_PTE_FRAG_NR;
+ __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+
+ __pte_index_size = H_PTE_INDEX_SIZE;
+ __pmd_index_size = H_PMD_INDEX_SIZE;
+ __pud_index_size = H_PUD_INDEX_SIZE;
+ __pgd_index_size = H_PGD_INDEX_SIZE;
+ __pmd_cache_index = H_PMD_CACHE_INDEX;
+ __pte_table_size = H_PTE_TABLE_SIZE;
+ __pmd_table_size = H_PMD_TABLE_SIZE;
+ __pud_table_size = H_PUD_TABLE_SIZE;
+ __pgd_table_size = H_PGD_TABLE_SIZE;
+ /*
+ * 4k use hugepd format, so for hash set then to
+ * zero
+ */
+ __pmd_val_bits = 0;
+ __pud_val_bits = 0;
+ __pgd_val_bits = 0;
+
+ __kernel_virt_start = H_KERN_VIRT_START;
+ __kernel_virt_size = H_KERN_VIRT_SIZE;
+ __vmalloc_start = H_VMALLOC_START;
+ __vmalloc_end = H_VMALLOC_END;
+ vmemmap = (struct page *)H_VMEMMAP_BASE;
+ ioremap_bot = IOREMAP_BASE;
+
/* Initialize the MMU Hash table and create the linear mapping
* of memory. Has to be done before SLB initialization as this is
* currently where the page size encoding is obtained.
}
#ifdef CONFIG_SMP
-void early_init_mmu_secondary(void)
+void hash__early_init_mmu_secondary(void)
{
/* Initialize hash table for that CPU */
- if (!firmware_has_feature(FW_FEATURE_LPAR))
- mtspr(SPRN_SDR1, _SDR1);
-
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ mtspr(SPRN_SDR1, _SDR1);
+ else
+ mtspr(SPRN_PTCR,
+ __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ }
/* Initialize SLB */
slb_initialize();
}
* Userspace sets the subpage permissions using the subpage_prot system call.
*
* Result is 0: full permissions, _PAGE_RW: read-only,
- * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
+ * _PAGE_RWX: no access.
*/
static int subpage_protection(struct mm_struct *mm, unsigned long ea)
{
/* extract 2-bit bitfield for this 4k subpage */
spp >>= 30 - 2 * ((ea >> 12) & 0xf);
- /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */
- spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0);
+ /*
+ * 0 -> full premission
+ * 1 -> Read only
+ * 2 -> no access.
+ * We return the flag that need to be cleared.
+ */
+ spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_WRITE : 0);
return spp;
}
/* Pre-check access permissions (will be re-checked atomically
* in __hash_page_XX but this pre-check is a fast path
*/
- if (access & ~pte_val(*ptep)) {
+ if (!check_pte_access(access, pte_val(*ptep))) {
DBG_LOW(" no access !\n");
rc = 1;
goto bail;
#endif
/* Do actual hashing */
#ifdef CONFIG_PPC_64K_PAGES
- /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
- if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
+ /* If H_PAGE_4K_PFN is set, make sure this is a 4k segment */
+ if ((pte_val(*ptep) & H_PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
demote_segment_4k(mm, ea);
psize = MMU_PAGE_4K;
}
/* If this PTE is non-cacheable and we have restrictions on
* using non cacheable large pages, then we switch to 4k
*/
- if (mmu_ci_restrictions && psize == MMU_PAGE_64K &&
- (pte_val(*ptep) & _PAGE_NO_CACHE)) {
+ if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
if (user_region) {
demote_segment_4k(mm, ea);
psize = MMU_PAGE_4K;
int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
unsigned long dsisr)
{
- unsigned long access = _PAGE_PRESENT;
+ unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
struct mm_struct *mm = current->mm;
flags |= HPTE_NOHPTE_UPDATE;
if (dsisr & DSISR_ISSTORE)
- access |= _PAGE_RW;
+ access |= _PAGE_WRITE;
/*
- * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
- * accessing a userspace segment (even from the kernel). We assume
- * kernel addresses always have the high bit set.
+ * We set _PAGE_PRIVILEGED only when
+ * kernel mode access kernel space.
+ *
+ * _PAGE_PRIVILEGED is NOT set
+ * 1) when kernel mode access user space
+ * 2) user space access kernel space.
*/
+ access |= _PAGE_PRIVILEGED;
if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
- access |= _PAGE_USER;
+ access &= ~_PAGE_PRIVILEGED;
if (trap == 0x400)
access |= _PAGE_EXEC;
return hash_page_mm(mm, ea, access, trap, flags);
}
+#ifdef CONFIG_PPC_MM_SLICES
+static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
+{
+ int psize = get_slice_psize(mm, ea);
+
+ /* We only prefault standard pages for now */
+ if (unlikely(psize != mm->context.user_psize))
+ return false;
+
+ /*
+ * Don't prefault if subpage protection is enabled for the EA.
+ */
+ if (unlikely((psize == MMU_PAGE_4K) && subpage_protection(mm, ea)))
+ return false;
+
+ return true;
+}
+#else
+static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
+{
+ return true;
+}
+#endif
+
void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long access, unsigned long trap)
{
BUG_ON(REGION_ID(ea) != USER_REGION_ID);
-#ifdef CONFIG_PPC_MM_SLICES
- /* We only prefault standard pages for now */
- if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize))
+ if (!should_hash_preload(mm, ea))
return;
-#endif
DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
" trap=%lx\n", mm, mm->pgd, ea, access, trap);
WARN_ON(hugepage_shift);
#ifdef CONFIG_PPC_64K_PAGES
- /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on
+ /* If either H_PAGE_4K_PFN or cache inhibited is set (and we are on
* a 64K kernel), then we don't preload, hash_page() will take
* care of it once we actually try to access the page.
* That way we don't have to duplicate all of the logic for segment
* page size demotion here
*/
- if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
+ if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep))
goto out_exit;
#endif /* CONFIG_PPC_64K_PAGES */
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
/* We don't currently support the first MEMBLOCK not mapping 0
old_pmd = pmd_val(pmd);
/* If PMD busy, retry the access */
- if (unlikely(old_pmd & _PAGE_BUSY))
+ if (unlikely(old_pmd & H_PAGE_BUSY))
return 0;
/* If PMD permissions don't match, take page fault */
- if (unlikely(access & ~old_pmd))
+ if (unlikely(!check_pte_access(access, old_pmd)))
return 1;
/*
* Try to lock the PTE, add ACCESSED and DIRTY if it was
* a write access
*/
- new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
- if (access & _PAGE_RW)
+ new_pmd = old_pmd | H_PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_WRITE)
new_pmd |= _PAGE_DIRTY;
- } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
- old_pmd, new_pmd));
+ } while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd)));
+
rflags = htab_convert_pte_flags(new_pmd);
#if 0
* base page size. This is because demote_segment won't flush
* hash page table entries.
*/
- if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
+ if ((old_pmd & H_PAGE_HASHPTE) && !(old_pmd & H_PAGE_COMBO)) {
flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
ssize, flags);
/*
hash = hpt_hash(vpn, shift, ssize);
/* insert new entry */
pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
- new_pmd |= _PAGE_HASHPTE;
+ new_pmd |= H_PAGE_HASHPTE;
repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
mark_hpte_slot_valid(hpte_slot_array, index, slot);
}
/*
- * Mark the pte with _PAGE_COMBO, if we are trying to hash it with
+ * Mark the pte with H_PAGE_COMBO, if we are trying to hash it with
* base page size 4k.
*/
if (psize == MMU_PAGE_4K)
- new_pmd |= _PAGE_COMBO;
+ new_pmd |= H_PAGE_COMBO;
/*
* The hpte valid is stored in the pgtable whose address is in the
* second half of the PMD. Order this against clearing of the busy bit in
* huge pmd.
*/
smp_wmb();
- *pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
+ *pmdp = __pmd(new_pmd & ~H_PAGE_BUSY);
return 0;
}
do {
old_pte = pte_val(*ptep);
/* If PTE busy, retry the access */
- if (unlikely(old_pte & _PAGE_BUSY))
+ if (unlikely(old_pte & H_PAGE_BUSY))
return 0;
/* If PTE permissions don't match, take page fault */
- if (unlikely(access & ~old_pte))
+ if (unlikely(!check_pte_access(access, old_pte)))
return 1;
+
/* Try to lock the PTE, add ACCESSED and DIRTY if it was
* a write access */
- new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
- if (access & _PAGE_RW)
+ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_WRITE)
new_pte |= _PAGE_DIRTY;
- } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
- old_pte, new_pte));
+ } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
rflags = htab_convert_pte_flags(new_pte);
sz = ((1UL) << shift);
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
/* Check if pte already has an hpte (case 2) */
- if (unlikely(old_pte & _PAGE_HASHPTE)) {
+ if (unlikely(old_pte & H_PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot;
hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & _PAGE_F_SECOND)
+ if (old_pte & H_PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
+ slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
mmu_psize, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
- if (likely(!(old_pte & _PAGE_HASHPTE))) {
+ if (likely(!(old_pte & H_PAGE_HASHPTE))) {
unsigned long hash = hpt_hash(vpn, shift, ssize);
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
/* clear HPTE slot informations in new PTE */
- new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
mmu_psize, ssize);
return -1;
}
- new_pte |= (slot << _PAGE_F_GIX_SHIFT) &
- (_PAGE_F_SECOND | _PAGE_F_GIX);
+ new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
+ (H_PAGE_F_SECOND | H_PAGE_F_GIX);
}
/*
* No need to use ldarx/stdcx here
*/
- *ptep = __pte(new_pte & ~_PAGE_BUSY);
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
--- /dev/null
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+#include <asm/mman.h>
+
+void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ unsigned long ap, shift;
+ struct hstate *hstate = hstate_file(vma->vm_file);
+
+ shift = huge_page_shift(hstate);
+ if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+ ap = mmu_get_ap(MMU_PAGE_2M);
+ else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
+ ap = mmu_get_ap(MMU_PAGE_1G);
+ else {
+ WARN(1, "Wrong huge page shift\n");
+ return ;
+ }
+ radix___flush_tlb_page(vma->vm_mm, vmaddr, ap, 0);
+}
+
+void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ unsigned long ap, shift;
+ struct hstate *hstate = hstate_file(vma->vm_file);
+
+ shift = huge_page_shift(hstate);
+ if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+ ap = mmu_get_ap(MMU_PAGE_2M);
+ else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
+ ap = mmu_get_ap(MMU_PAGE_1G);
+ else {
+ WARN(1, "Wrong huge page shift\n");
+ return ;
+ }
+ radix___local_flush_tlb_page(vma->vm_mm, vmaddr, ap, 0);
+}
+
+/*
+ * A vairant of hugetlb_get_unmapped_area doing topdown search
+ * FIXME!! should we do as x86 does or non hugetlb area does ?
+ * ie, use topdown or not based on mmap_is_legacy check ?
+ */
+unsigned long
+radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED) {
+ if (prepare_hugepage_range(file, addr, len))
+ return -EINVAL;
+ return addr;
+ }
+
+ if (addr) {
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+ /*
+ * We are always doing an topdown search here. Slice code
+ * does that too.
+ */
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = PAGE_SIZE;
+ info.high_limit = current->mm->mmap_base;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+}
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
+ if (radix_enabled())
+ return radix__hugetlb_get_unmapped_area(file, addr, len,
+ pgoff, flags);
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif
{
#ifdef CONFIG_PPC_MM_SLICES
unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
-
- return 1UL << mmu_psize_to_shift(psize);
-#else
+ /* With radix we don't use slice, so derive it from vma*/
+ if (!radix_enabled())
+ return 1UL << mmu_psize_to_shift(psize);
+#endif
if (!is_vm_hugetlb_page(vma))
return PAGE_SIZE;
return huge_page_size(hstate_vma(vma));
-#endif
}
static inline bool is_power_of_4(unsigned long x)
size = memparse(str, &str);
- if (add_huge_page_size(size) != 0)
- printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
+ if (add_huge_page_size(size) != 0) {
+ hugetlb_bad_size();
+ pr_err("Invalid huge page size specified(%llu)\n", size);
+ }
return 1;
}
{
int psize;
- if (!mmu_has_feature(MMU_FTR_16M_PAGE))
+ if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
return -ENODEV;
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
else if (mmu_psize_defs[MMU_PAGE_1M].shift)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_2M].shift)
+ HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
+
return 0;
}
end = pte_end;
pte = READ_ONCE(*ptep);
- mask = _PAGE_PRESENT | _PAGE_USER;
+ mask = _PAGE_PRESENT | _PAGE_READ;
if (write)
- mask |= _PAGE_RW;
+ mask |= _PAGE_WRITE;
if ((pte_val(pte) & mask) != mask)
return 0;
#include "mmu_decl.h"
#ifdef CONFIG_PPC_STD_MMU_64
-#if PGTABLE_RANGE > USER_VSID_RANGE
+#if H_PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif
-#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#if (TASK_SIZE_USER64 < H_PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
#warning TASK_SIZE is smaller than it needs to be.
#endif
#endif /* CONFIG_PPC_STD_MMU_64 */
return 0;
}
-/* On hash-based CPUs, the vmemmap is bolted in the hash table.
- *
- * On Book3E CPUs, the vmemmap is currently mapped in the top half of
- * the vmalloc space using normal page tables, though the size of
- * pages encoded in the PTEs can be different
- */
-
-#ifdef CONFIG_PPC_BOOK3E
-static int __meminit vmemmap_create_mapping(unsigned long start,
- unsigned long page_size,
- unsigned long phys)
-{
- /* Create a PTE encoding without page size */
- unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
- _PAGE_KERNEL_RW;
-
- /* PTEs only contain page size encodings up to 32M */
- BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
-
- /* Encode the size in the PTE */
- flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
-
- /* For each PTE for that area, map things. Note that we don't
- * increment phys because all PTEs are of the large size and
- * thus must have the low bits clear
- */
- for (i = 0; i < page_size; i += PAGE_SIZE)
- BUG_ON(map_kernel_page(start + i, phys, flags));
-
- return 0;
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void vmemmap_remove_mapping(unsigned long start,
- unsigned long page_size)
-{
-}
-#endif
-#else /* CONFIG_PPC_BOOK3E */
-static int __meminit vmemmap_create_mapping(unsigned long start,
- unsigned long page_size,
- unsigned long phys)
-{
- int rc = htab_bolt_mapping(start, start + page_size, phys,
- pgprot_val(PAGE_KERNEL),
- mmu_vmemmap_psize, mmu_kernel_ssize);
- if (rc < 0) {
- int rc2 = htab_remove_mapping(start, start + page_size,
- mmu_vmemmap_psize,
- mmu_kernel_ssize);
- BUG_ON(rc2 && (rc2 != -ENOENT));
- }
- return rc;
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void vmemmap_remove_mapping(unsigned long start,
- unsigned long page_size)
-{
- int rc = htab_remove_mapping(start, start + page_size,
- mmu_vmemmap_psize,
- mmu_kernel_ssize);
- BUG_ON((rc < 0) && (rc != -ENOENT));
- WARN_ON(rc == -ENOENT);
-}
-#endif
-
-#endif /* CONFIG_PPC_BOOK3E */
-
struct vmemmap_backing *vmemmap_list;
static struct vmemmap_backing *next;
static int num_left;
EXPORT_SYMBOL(kmap_pte);
pgprot_t kmap_prot;
EXPORT_SYMBOL(kmap_prot);
+#define TOP_ZONE ZONE_HIGHMEM
static inline pte_t *virt_to_kpte(unsigned long vaddr)
{
return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
vaddr), vaddr), vaddr);
}
+#else
+#define TOP_ZONE ZONE_NORMAL
#endif
int page_is_ram(unsigned long pfn)
*/
int dma_pfn_limit_to_zone(u64 pfn_limit)
{
- enum zone_type top_zone = ZONE_NORMAL;
int i;
-#ifdef CONFIG_HIGHMEM
- top_zone = ZONE_HIGHMEM;
-#endif
-
- for (i = top_zone; i >= 0; i--) {
+ for (i = TOP_ZONE; i >= 0; i--) {
if (max_zone_pfns[i] <= pfn_limit)
return i;
}
{
unsigned long long total_ram = memblock_phys_mem_size();
phys_addr_t top_of_ram = memblock_end_of_DRAM();
- enum zone_type top_zone;
#ifdef CONFIG_PPC32
unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
(long int)((top_of_ram - total_ram) >> 20));
#ifdef CONFIG_HIGHMEM
- top_zone = ZONE_HIGHMEM;
limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
-#else
- top_zone = ZONE_NORMAL;
#endif
-
- limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT);
+ limit_zone_pfn(TOP_ZONE, top_of_ram >> PAGE_SHIFT);
zone_limits_final = true;
free_area_init_nodes(max_zone_pfns);
* We don't need to worry about _PAGE_PRESENT here because we are
* called with either mm->page_table_lock held or ptl lock held
*/
- unsigned long access = 0, trap;
+ unsigned long access, trap;
+
+ if (radix_enabled())
+ return;
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(*ptep) || address >= TASK_SIZE)
*
* We also avoid filling the hash if not coming from a fault
*/
- if (current->thread.regs == NULL)
- return;
- trap = TRAP(current->thread.regs);
- if (trap == 0x400)
- access |= _PAGE_EXEC;
- else if (trap != 0x300)
+
+ trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
+ switch (trap) {
+ case 0x300:
+ access = 0UL;
+ break;
+ case 0x400:
+ access = _PAGE_EXEC;
+ break;
+ default:
return;
+ }
+
hash_preload(vma->vm_mm, address, access, trap);
#endif /* CONFIG_PPC_STD_MMU */
#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/sched.h>
+#include <linux/elf-randomize.h>
+#include <linux/security.h>
+#include <linux/mman.h>
/*
* Top of mmap area (just below the process stack).
return PAGE_ALIGN(TASK_SIZE - gap - rnd);
}
+#ifdef CONFIG_PPC_RADIX_MMU
+/*
+ * Same function as generic code used only for radix, because we don't need to overload
+ * the generic one. But we will have to duplicate, because hash select
+ * HAVE_ARCH_UNMAPPED_AREA
+ */
+static unsigned long
+radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct vm_unmapped_area_info info;
+
+ if (len > TASK_SIZE - mmap_min_addr)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED)
+ return addr;
+
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = 0;
+ return vm_unmapped_area(&info);
+}
+
+static unsigned long
+radix__arch_get_unmapped_area_topdown(struct file *filp,
+ const unsigned long addr0,
+ const unsigned long len,
+ const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+ struct vm_unmapped_area_info info;
+
+ /* requested length too big for entire address space */
+ if (len > TASK_SIZE - mmap_min_addr)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED)
+ return addr;
+
+ /* requesting a specific address */
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+ info.high_limit = mm->mmap_base;
+ info.align_mask = 0;
+ addr = vm_unmapped_area(&info);
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ if (addr & ~PAGE_MASK) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
+
+ return addr;
+}
+
+static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
+ unsigned long random_factor)
+{
+ if (mmap_is_legacy()) {
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->get_unmapped_area = radix__arch_get_unmapped_area;
+ } else {
+ mm->mmap_base = mmap_base(random_factor);
+ mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
+ }
+}
+#else
+/* dummy */
+extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
+ unsigned long random_factor);
+#endif
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
+ if (radix_enabled())
+ return radix__arch_pick_mmap_layout(mm, random_factor);
/*
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
--- /dev/null
+/*
+ * MMU context allocation for 64-bit kernels.
+ *
+ * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+
+#include "icswx.h"
+
+static DEFINE_SPINLOCK(mmu_context_lock);
+static DEFINE_IDA(mmu_context_ida);
+
+int __init_new_context(void)
+{
+ int index;
+ int err;
+
+again:
+ if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
+ return -ENOMEM;
+
+ spin_lock(&mmu_context_lock);
+ err = ida_get_new_above(&mmu_context_ida, 1, &index);
+ spin_unlock(&mmu_context_lock);
+
+ if (err == -EAGAIN)
+ goto again;
+ else if (err)
+ return err;
+
+ if (index > MAX_USER_CONTEXT) {
+ spin_lock(&mmu_context_lock);
+ ida_remove(&mmu_context_ida, index);
+ spin_unlock(&mmu_context_lock);
+ return -ENOMEM;
+ }
+
+ return index;
+}
+EXPORT_SYMBOL_GPL(__init_new_context);
+static int radix__init_new_context(struct mm_struct *mm, int index)
+{
+ unsigned long rts_field;
+
+ /*
+ * set the process table entry,
+ */
+ rts_field = 3ull << PPC_BITLSHIFT(2);
+ process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
+ return 0;
+}
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ int index;
+
+ index = __init_new_context();
+ if (index < 0)
+ return index;
+
+ if (radix_enabled()) {
+ radix__init_new_context(mm, index);
+ } else {
+
+ /* The old code would re-promote on fork, we don't do that
+ * when using slices as it could cause problem promoting slices
+ * that have been forced down to 4K
+ *
+ * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
+ * explicitly against context.id == 0. This ensures that we
+ * properly initialize context slice details for newly allocated
+ * mm's (which will have id == 0) and don't alter context slice
+ * inherited via fork (which will have id != 0).
+ *
+ * We should not be calling init_new_context() on init_mm. Hence a
+ * check against 0 is ok.
+ */
+ if (mm->context.id == 0)
+ slice_set_user_psize(mm, mmu_virtual_psize);
+ subpage_prot_init_new_context(mm);
+ }
+ mm->context.id = index;
+#ifdef CONFIG_PPC_ICSWX
+ mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+ if (!mm->context.cop_lockp) {
+ __destroy_context(index);
+ subpage_prot_free(mm);
+ mm->context.id = MMU_NO_CONTEXT;
+ return -ENOMEM;
+ }
+ spin_lock_init(mm->context.cop_lockp);
+#endif /* CONFIG_PPC_ICSWX */
+
+#ifdef CONFIG_PPC_64K_PAGES
+ mm->context.pte_frag = NULL;
+#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ mm_iommu_init(&mm->context);
+#endif
+ return 0;
+}
+
+void __destroy_context(int context_id)
+{
+ spin_lock(&mmu_context_lock);
+ ida_remove(&mmu_context_ida, context_id);
+ spin_unlock(&mmu_context_lock);
+}
+EXPORT_SYMBOL_GPL(__destroy_context);
+
+#ifdef CONFIG_PPC_64K_PAGES
+static void destroy_pagetable_page(struct mm_struct *mm)
+{
+ int count;
+ void *pte_frag;
+ struct page *page;
+
+ pte_frag = mm->context.pte_frag;
+ if (!pte_frag)
+ return;
+
+ page = virt_to_page(pte_frag);
+ /* drop all the pending references */
+ count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
+ /* We allow PTE_FRAG_NR fragments from a PTE page */
+ if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
+ pgtable_page_dtor(page);
+ free_hot_cold_page(page, 0);
+ }
+}
+
+#else
+static inline void destroy_pagetable_page(struct mm_struct *mm)
+{
+ return;
+}
+#endif
+
+
+void destroy_context(struct mm_struct *mm)
+{
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ mm_iommu_cleanup(&mm->context);
+#endif
+
+#ifdef CONFIG_PPC_ICSWX
+ drop_cop(mm->context.acop, mm);
+ kfree(mm->context.cop_lockp);
+ mm->context.cop_lockp = NULL;
+#endif /* CONFIG_PPC_ICSWX */
+
+ if (radix_enabled())
+ process_tb[mm->context.id].prtb1 = 0;
+ else
+ subpage_prot_free(mm);
+ destroy_pagetable_page(mm);
+ __destroy_context(mm->context.id);
+ mm->context.id = MMU_NO_CONTEXT;
+}
+
+#ifdef CONFIG_PPC_RADIX_MMU
+void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
+{
+ mtspr(SPRN_PID, next->context.id);
+ asm volatile("isync": : :"memory");
+}
+#endif
+++ /dev/null
-/*
- * MMU context allocation for 64-bit kernels.
- *
- * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/slab.h>
-
-#include <asm/mmu_context.h>
-#include <asm/pgalloc.h>
-
-#include "icswx.h"
-
-static DEFINE_SPINLOCK(mmu_context_lock);
-static DEFINE_IDA(mmu_context_ida);
-
-int __init_new_context(void)
-{
- int index;
- int err;
-
-again:
- if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
- return -ENOMEM;
-
- spin_lock(&mmu_context_lock);
- err = ida_get_new_above(&mmu_context_ida, 1, &index);
- spin_unlock(&mmu_context_lock);
-
- if (err == -EAGAIN)
- goto again;
- else if (err)
- return err;
-
- if (index > MAX_USER_CONTEXT) {
- spin_lock(&mmu_context_lock);
- ida_remove(&mmu_context_ida, index);
- spin_unlock(&mmu_context_lock);
- return -ENOMEM;
- }
-
- return index;
-}
-EXPORT_SYMBOL_GPL(__init_new_context);
-
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
- int index;
-
- index = __init_new_context();
- if (index < 0)
- return index;
-
- /* The old code would re-promote on fork, we don't do that
- * when using slices as it could cause problem promoting slices
- * that have been forced down to 4K
- */
- if (slice_mm_new_context(mm))
- slice_set_user_psize(mm, mmu_virtual_psize);
- subpage_prot_init_new_context(mm);
- mm->context.id = index;
-#ifdef CONFIG_PPC_ICSWX
- mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
- if (!mm->context.cop_lockp) {
- __destroy_context(index);
- subpage_prot_free(mm);
- mm->context.id = MMU_NO_CONTEXT;
- return -ENOMEM;
- }
- spin_lock_init(mm->context.cop_lockp);
-#endif /* CONFIG_PPC_ICSWX */
-
-#ifdef CONFIG_PPC_64K_PAGES
- mm->context.pte_frag = NULL;
-#endif
-#ifdef CONFIG_SPAPR_TCE_IOMMU
- mm_iommu_init(&mm->context);
-#endif
- return 0;
-}
-
-void __destroy_context(int context_id)
-{
- spin_lock(&mmu_context_lock);
- ida_remove(&mmu_context_ida, context_id);
- spin_unlock(&mmu_context_lock);
-}
-EXPORT_SYMBOL_GPL(__destroy_context);
-
-#ifdef CONFIG_PPC_64K_PAGES
-static void destroy_pagetable_page(struct mm_struct *mm)
-{
- int count;
- void *pte_frag;
- struct page *page;
-
- pte_frag = mm->context.pte_frag;
- if (!pte_frag)
- return;
-
- page = virt_to_page(pte_frag);
- /* drop all the pending references */
- count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
- /* We allow PTE_FRAG_NR fragments from a PTE page */
- if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
- pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
- }
-}
-
-#else
-static inline void destroy_pagetable_page(struct mm_struct *mm)
-{
- return;
-}
-#endif
-
-
-void destroy_context(struct mm_struct *mm)
-{
-#ifdef CONFIG_SPAPR_TCE_IOMMU
- mm_iommu_cleanup(&mm->context);
-#endif
-
-#ifdef CONFIG_PPC_ICSWX
- drop_cop(mm->context.acop, mm);
- kfree(mm->context.cop_lockp);
- mm->context.cop_lockp = NULL;
-#endif /* CONFIG_PPC_ICSWX */
-
- destroy_pagetable_page(mm);
- __destroy_context(mm->context.id);
- subpage_prot_free(mm);
- mm->context.id = MMU_NO_CONTEXT;
-}
static void context_check_map(void) { }
#endif
-void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
{
unsigned int i, id, cpu = smp_processor_id();
unsigned long *map;
mm->context.active = 0;
#ifdef CONFIG_PPC_MM_SLICES
- if (slice_mm_new_context(mm))
- slice_set_user_psize(mm, mmu_virtual_psize);
+ slice_set_user_psize(mm, mmu_virtual_psize);
#endif
return 0;
#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern int map_kernel_page(unsigned long ea, unsigned long pa,
- unsigned long flags);
-#endif /* CONFIG_PPC64 */
-
extern unsigned long ioremap_bot;
extern unsigned long __max_low_memory;
extern phys_addr_t __initial_memory_limit_addr;
--- /dev/null
+/*
+ * Copyright 2005, Paul Mackerras, IBM Corporation.
+ * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation.
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/dma.h>
+
+#include "mmu_decl.h"
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * On Book3E CPUs, the vmemmap is currently mapped in the top half of
+ * the vmalloc space using normal page tables, though the size of
+ * pages encoded in the PTEs can be different
+ */
+int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
+{
+ /* Create a PTE encoding without page size */
+ unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
+ _PAGE_KERNEL_RW;
+
+ /* PTEs only contain page size encodings up to 32M */
+ BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
+
+ /* Encode the size in the PTE */
+ flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
+
+ /* For each PTE for that area, map things. Note that we don't
+ * increment phys because all PTEs are of the large size and
+ * thus must have the low bits clear
+ */
+ for (i = 0; i < page_size; i += PAGE_SIZE)
+ BUG_ON(map_kernel_page(start + i, phys, flags));
+
+ return 0;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size)
+{
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+static __ref void *early_alloc_pgtable(unsigned long size)
+{
+ void *pt;
+
+ pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
+ memset(pt, 0, size);
+
+ return pt;
+}
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
+{
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE);
+ if (slab_is_available()) {
+ pgdp = pgd_offset_k(ea);
+ pudp = pud_alloc(&init_mm, pgdp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+ __pgprot(flags)));
+ } else {
+ pgdp = pgd_offset_k(ea);
+#ifndef __PAGETABLE_PUD_FOLDED
+ if (pgd_none(*pgdp)) {
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+ BUG_ON(pudp == NULL);
+ pgd_populate(&init_mm, pgdp, pudp);
+ }
+#endif /* !__PAGETABLE_PUD_FOLDED */
+ pudp = pud_offset(pgdp, ea);
+ if (pud_none(*pudp)) {
+ pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+ BUG_ON(pmdp == NULL);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (!pmd_present(*pmdp)) {
+ ptep = early_alloc_pgtable(PAGE_SIZE);
+ BUG_ON(ptep == NULL);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+ __pgprot(flags)));
+ }
+
+ smp_wmb();
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+#include <trace/events/thp.h>
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * This is called when relaxing access to a hugepage. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp, pmd_t entry, int dirty)
+{
+ int changed;
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!pmd_trans_huge(*pmdp));
+ assert_spin_locked(&vma->vm_mm->page_table_lock);
+#endif
+ changed = !pmd_same(*(pmdp), entry);
+ if (changed) {
+ __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
+ /*
+ * Since we are not supporting SW TLB systems, we don't
+ * have any thing similar to flush_tlb_page_nohash()
+ */
+ }
+ return changed;
+}
+
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+/*
+ * set a new huge pmd. We should not be called for updating
+ * an existing pmd entry. That should go via pmd_hugepage_update.
+ */
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+ assert_spin_locked(&mm->page_table_lock);
+ WARN_ON(!pmd_trans_huge(pmd));
+#endif
+ trace_hugepage_set_pmd(addr, pmd_val(pmd));
+ return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
+}
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
+void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ /*
+ * This ensures that generic code that rely on IRQ disabling
+ * to prevent a parallel THP split work as expected.
+ */
+ kick_all_cpus_sync();
+}
+
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
+{
+ return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
+}
+
+pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+ unsigned long pmdv;
+
+ pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+ return pmd_set_protbits(__pmd(pmdv), pgprot);
+}
+
+pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
+{
+ return pfn_pmd(page_to_pfn(page), pgprot);
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ unsigned long pmdv;
+
+ pmdv = pmd_val(pmd);
+ pmdv &= _HPAGE_CHG_MASK;
+ return pmd_set_protbits(__pmd(pmdv), newprot);
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a HUGE PMD entry in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux HUGE PMD entry.
+ */
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd)
+{
+ return;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
--- /dev/null
+/*
+ * Copyright 2005, Paul Mackerras, IBM Corporation.
+ * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation.
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/thp.h>
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * On hash-based CPUs, the vmemmap is bolted in the hash table.
+ *
+ */
+int __meminit hash__vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
+{
+ int rc = htab_bolt_mapping(start, start + page_size, phys,
+ pgprot_val(PAGE_KERNEL),
+ mmu_vmemmap_psize, mmu_kernel_ssize);
+ if (rc < 0) {
+ int rc2 = htab_remove_mapping(start, start + page_size,
+ mmu_vmemmap_psize,
+ mmu_kernel_ssize);
+ BUG_ON(rc2 && (rc2 != -ENOENT));
+ }
+ return rc;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void hash__vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size)
+{
+ int rc = htab_remove_mapping(start, start + page_size,
+ mmu_vmemmap_psize,
+ mmu_kernel_ssize);
+ BUG_ON((rc < 0) && (rc != -ENOENT));
+ WARN_ON(rc == -ENOENT);
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
+{
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE);
+ if (slab_is_available()) {
+ pgdp = pgd_offset_k(ea);
+ pudp = pud_alloc(&init_mm, pgdp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+ __pgprot(flags)));
+ } else {
+ /*
+ * If the mm subsystem is not fully up, we cannot create a
+ * linux page table entry for this mapping. Simply bolt an
+ * entry in the hardware page table.
+ *
+ */
+ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+ mmu_io_psize, mmu_kernel_ssize)) {
+ printk(KERN_ERR "Failed to do bolted mapping IO "
+ "memory at %016lx !\n", pa);
+ return -ENOMEM;
+ }
+ }
+
+ smp_wmb();
+ return 0;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, unsigned long clr,
+ unsigned long set)
+{
+ __be64 old_be, tmp;
+ unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!pmd_trans_huge(*pmdp));
+ assert_spin_locked(&mm->page_table_lock);
+#endif
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3\n\
+ and. %1,%0,%6\n\
+ bne- 1b \n\
+ andc %1,%0,%4 \n\
+ or %1,%1,%7\n\
+ stdcx. %1,0,%3 \n\
+ bne- 1b"
+ : "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp)
+ : "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp),
+ "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
+ : "cc" );
+
+ old = be64_to_cpu(old_be);
+
+ trace_hugepage_update(addr, old, clr, set);
+ if (old & H_PAGE_HASHPTE)
+ hpte_do_hugepage_flush(mm, addr, pmdp, old);
+ return old;
+}
+
+pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ pmd_t pmd;
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(pmd_trans_huge(*pmdp));
+
+ pmd = *pmdp;
+ pmd_clear(pmdp);
+ /*
+ * Wait for all pending hash_page to finish. This is needed
+ * in case of subpage collapse. When we collapse normal pages
+ * to hugepage, we first clear the pmd, then invalidate all
+ * the PTE entries. The assumption here is that any low level
+ * page fault will see a none pmd and take the slow path that
+ * will wait on mmap_sem. But we could very well be in a
+ * hash_page with local ptep pointer value. Such a hash page
+ * can result in adding new HPTE entries for normal subpages.
+ * That means we could be modifying the page content as we
+ * copy them to a huge page. So wait for parallel hash_page
+ * to finish before invalidating HPTE entries. We can do this
+ * by sending an IPI to all the cpus and executing a dummy
+ * function there.
+ */
+ kick_all_cpus_sync();
+ /*
+ * Now invalidate the hpte entries in the range
+ * covered by pmd. This make sure we take a
+ * fault and will find the pmd as none, which will
+ * result in a major fault which takes mmap_sem and
+ * hence wait for collapse to complete. Without this
+ * the __collapse_huge_page_copy can result in copying
+ * the old content.
+ */
+ flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
+ return pmd;
+}
+
+/*
+ * We want to put the pgtable in pmd and use pgtable for tracking
+ * the base page size hptes
+ */
+void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
+{
+ pgtable_t *pgtable_slot;
+ assert_spin_locked(&mm->page_table_lock);
+ /*
+ * we store the pgtable in the second half of PMD
+ */
+ pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+ *pgtable_slot = pgtable;
+ /*
+ * expose the deposited pgtable to other cpus.
+ * before we set the hugepage PTE at pmd level
+ * hash fault code looks at the deposted pgtable
+ * to store hash index values.
+ */
+ smp_wmb();
+}
+
+pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+ pgtable_t pgtable;
+ pgtable_t *pgtable_slot;
+
+ assert_spin_locked(&mm->page_table_lock);
+ pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+ pgtable = *pgtable_slot;
+ /*
+ * Once we withdraw, mark the entry NULL.
+ */
+ *pgtable_slot = NULL;
+ /*
+ * We store HPTE information in the deposited PTE fragment.
+ * zero out the content on withdraw.
+ */
+ memset(pgtable, 0, PTE_FRAG_SIZE);
+ return pgtable;
+}
+
+void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+
+ /*
+ * We can't mark the pmd none here, because that will cause a race
+ * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
+ * we spilt, but at the same time we wan't rest of the ppc64 code
+ * not to insert hash pte on this, because we will be modifying
+ * the deposited pgtable in the caller of this function. Hence
+ * clear the _PAGE_USER so that we move the fault handling to
+ * higher level function and that will serialize against ptl.
+ * We need to flush existing hash pte entries here even though,
+ * the translation is still valid, because we will withdraw
+ * pgtable_t after this.
+ */
+ pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
+}
+
+/*
+ * A linux hugepage PMD was changed and the corresponding hash table entries
+ * neesd to be flushed.
+ */
+void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, unsigned long old_pmd)
+{
+ int ssize;
+ unsigned int psize;
+ unsigned long vsid;
+ unsigned long flags = 0;
+ const struct cpumask *tmp;
+
+ /* get the base page size,vsid and segment size */
+#ifdef CONFIG_DEBUG_VM
+ psize = get_slice_psize(mm, addr);
+ BUG_ON(psize == MMU_PAGE_16M);
+#endif
+ if (old_pmd & H_PAGE_COMBO)
+ psize = MMU_PAGE_4K;
+ else
+ psize = MMU_PAGE_64K;
+
+ if (!is_kernel_addr(addr)) {
+ ssize = user_segment_size(addr);
+ vsid = get_vsid(mm->context.id, addr, ssize);
+ WARN_ON(vsid == 0);
+ } else {
+ vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+ ssize = mmu_kernel_ssize;
+ }
+
+ tmp = cpumask_of(smp_processor_id());
+ if (cpumask_equal(mm_cpumask(mm), tmp))
+ flags |= HPTE_LOCAL_UPDATE;
+
+ return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
+}
+
+pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t old_pmd;
+ pgtable_t pgtable;
+ unsigned long old;
+ pgtable_t *pgtable_slot;
+
+ old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
+ old_pmd = __pmd(old);
+ /*
+ * We have pmd == none and we are holding page_table_lock.
+ * So we can safely go and clear the pgtable hash
+ * index info.
+ */
+ pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+ pgtable = *pgtable_slot;
+ /*
+ * Let's zero out old valid and hash index details
+ * hash fault look at them.
+ */
+ memset(pgtable, 0, PTE_FRAG_SIZE);
+ /*
+ * Serialize against find_linux_pte_or_hugepte which does lock-less
+ * lookup in page tables with local interrupts disabled. For huge pages
+ * it casts pmd_t to pte_t. Since format of pte_t is different from
+ * pmd_t we want to prevent transit from pmd pointing to page table
+ * to pmd pointing to huge page (and back) while interrupts are disabled.
+ * We clear pmd to possibly replace it with page table pointer in
+ * different code paths. So make sure we wait for the parallel
+ * find_linux_pte_or_hugepage to finish.
+ */
+ kick_all_cpus_sync();
+ return old_pmd;
+}
+
+int hash__has_transparent_hugepage(void)
+{
+
+ if (!mmu_has_feature(MMU_FTR_16M_PAGE))
+ return 0;
+ /*
+ * We support THP only if PMD_SIZE is 16MB.
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
+ return 0;
+ /*
+ * We need to make sure that we support 16MB hugepage in a segement
+ * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
+ * of 64K.
+ */
+ /*
+ * If we have 64K HPTE, we will be using that by default
+ */
+ if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+ (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
+ return 0;
+ /*
+ * Ok we only have 4K HPTE
+ */
+ if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
+ return 0;
+
+ return 1;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
--- /dev/null
+/*
+ * Page table handling routines for radix page table.
+ *
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/sched.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/firmware.h>
+
+#include <trace/events/thp.h>
+
+static int native_update_partition_table(u64 patb1)
+{
+ partition_tb->patb1 = cpu_to_be64(patb1);
+ return 0;
+}
+
+static __ref void *early_alloc_pgtable(unsigned long size)
+{
+ void *pt;
+
+ pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
+ memset(pt, 0, size);
+
+ return pt;
+}
+
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size)
+{
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ /*
+ * Make sure task size is correct as per the max adddr
+ */
+ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
+ if (slab_is_available()) {
+ pgdp = pgd_offset_k(ea);
+ pudp = pud_alloc(&init_mm, pgdp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ if (map_page_size == PMD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
+ } else {
+ pgdp = pgd_offset_k(ea);
+ if (pgd_none(*pgdp)) {
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+ BUG_ON(pudp == NULL);
+ pgd_populate(&init_mm, pgdp, pudp);
+ }
+ pudp = pud_offset(pgdp, ea);
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ if (pud_none(*pudp)) {
+ pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+ BUG_ON(pmdp == NULL);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (map_page_size == PMD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ if (!pmd_present(*pmdp)) {
+ ptep = early_alloc_pgtable(PAGE_SIZE);
+ BUG_ON(ptep == NULL);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+ }
+
+set_the_pte:
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+ smp_wmb();
+ return 0;
+}
+
+static void __init radix_init_pgtable(void)
+{
+ int loop_count;
+ u64 base, end, start_addr;
+ unsigned long rts_field;
+ struct memblock_region *reg;
+ unsigned long linear_page_size;
+
+ /* We don't support slb for radix */
+ mmu_slb_size = 0;
+ /*
+ * Create the linear mapping, using standard page size for now
+ */
+ loop_count = 0;
+ for_each_memblock(memory, reg) {
+
+ start_addr = reg->base;
+
+redo:
+ if (loop_count < 1 && mmu_psize_defs[MMU_PAGE_1G].shift)
+ linear_page_size = PUD_SIZE;
+ else if (loop_count < 2 && mmu_psize_defs[MMU_PAGE_2M].shift)
+ linear_page_size = PMD_SIZE;
+ else
+ linear_page_size = PAGE_SIZE;
+
+ base = _ALIGN_UP(start_addr, linear_page_size);
+ end = _ALIGN_DOWN(reg->base + reg->size, linear_page_size);
+
+ pr_info("Mapping range 0x%lx - 0x%lx with 0x%lx\n",
+ (unsigned long)base, (unsigned long)end,
+ linear_page_size);
+
+ while (base < end) {
+ radix__map_kernel_page((unsigned long)__va(base),
+ base, PAGE_KERNEL_X,
+ linear_page_size);
+ base += linear_page_size;
+ }
+ /*
+ * map the rest using lower page size
+ */
+ if (end < reg->base + reg->size) {
+ start_addr = end;
+ loop_count++;
+ goto redo;
+ }
+ }
+ /*
+ * Allocate Partition table and process table for the
+ * host.
+ */
+ BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 23), "Process table size too large.");
+ process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+ /*
+ * Fill in the process table.
+ * we support 52 bits, hence 52-28 = 24, 11000
+ */
+ rts_field = 3ull << PPC_BITLSHIFT(2);
+ process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
+ /*
+ * Fill in the partition table. We are suppose to use effective address
+ * of process table here. But our linear mapping also enable us to use
+ * physical address here.
+ */
+ ppc_md.update_partition_table(__pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR);
+ pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
+}
+
+static void __init radix_init_partition_table(void)
+{
+ unsigned long rts_field;
+ /*
+ * we support 52 bits, hence 52-28 = 24, 11000
+ */
+ rts_field = 3ull << PPC_BITLSHIFT(2);
+
+ BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
+ partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
+ partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
+ RADIX_PGD_INDEX_SIZE | PATB_HR);
+ printk("Partition table %p\n", partition_tb);
+
+ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+ /*
+ * update partition table control register,
+ * 64 K size.
+ */
+ mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+}
+
+void __init radix_init_native(void)
+{
+ ppc_md.update_partition_table = native_update_partition_table;
+}
+
+static int __init get_idx_from_shift(unsigned int shift)
+{
+ int idx = -1;
+
+ switch (shift) {
+ case 0xc:
+ idx = MMU_PAGE_4K;
+ break;
+ case 0x10:
+ idx = MMU_PAGE_64K;
+ break;
+ case 0x15:
+ idx = MMU_PAGE_2M;
+ break;
+ case 0x1e:
+ idx = MMU_PAGE_1G;
+ break;
+ }
+ return idx;
+}
+
+static int __init radix_dt_scan_page_sizes(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ int size = 0;
+ int shift, idx;
+ unsigned int ap;
+ const __be32 *prop;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
+ if (!prop)
+ return 0;
+
+ pr_info("Page sizes from device-tree:\n");
+ for (; size >= 4; size -= 4, ++prop) {
+
+ struct mmu_psize_def *def;
+
+ /* top 3 bit is AP encoding */
+ shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
+ ap = be32_to_cpu(prop[0]) >> 29;
+ pr_info("Page size sift = %d AP=0x%x\n", shift, ap);
+
+ idx = get_idx_from_shift(shift);
+ if (idx < 0)
+ continue;
+
+ def = &mmu_psize_defs[idx];
+ def->shift = shift;
+ def->ap = ap;
+ }
+
+ /* needed ? */
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
+ return 1;
+}
+
+static void __init radix_init_page_sizes(void)
+{
+ int rc;
+
+ /*
+ * Try to find the available page sizes in the device-tree
+ */
+ rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
+ if (rc != 0) /* Found */
+ goto found;
+ /*
+ * let's assume we have page 4k and 64k support
+ */
+ mmu_psize_defs[MMU_PAGE_4K].shift = 12;
+ mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+
+ mmu_psize_defs[MMU_PAGE_64K].shift = 16;
+ mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+found:
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ if (mmu_psize_defs[MMU_PAGE_2M].shift) {
+ /*
+ * map vmemmap using 2M if available
+ */
+ mmu_vmemmap_psize = MMU_PAGE_2M;
+ }
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+ return;
+}
+
+void __init radix__early_init_mmu(void)
+{
+ unsigned long lpcr;
+ /*
+ * setup LPCR UPRT based on mmu_features
+ */
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
+
+#ifdef CONFIG_PPC_64K_PAGES
+ /* PAGE_SIZE mappings */
+ mmu_virtual_psize = MMU_PAGE_64K;
+#else
+ mmu_virtual_psize = MMU_PAGE_4K;
+#endif
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ /* vmemmap mapping */
+ mmu_vmemmap_psize = mmu_virtual_psize;
+#endif
+ /*
+ * initialize page table size
+ */
+ __pte_index_size = RADIX_PTE_INDEX_SIZE;
+ __pmd_index_size = RADIX_PMD_INDEX_SIZE;
+ __pud_index_size = RADIX_PUD_INDEX_SIZE;
+ __pgd_index_size = RADIX_PGD_INDEX_SIZE;
+ __pmd_cache_index = RADIX_PMD_INDEX_SIZE;
+ __pte_table_size = RADIX_PTE_TABLE_SIZE;
+ __pmd_table_size = RADIX_PMD_TABLE_SIZE;
+ __pud_table_size = RADIX_PUD_TABLE_SIZE;
+ __pgd_table_size = RADIX_PGD_TABLE_SIZE;
+
+ __pmd_val_bits = RADIX_PMD_VAL_BITS;
+ __pud_val_bits = RADIX_PUD_VAL_BITS;
+ __pgd_val_bits = RADIX_PGD_VAL_BITS;
+
+ __kernel_virt_start = RADIX_KERN_VIRT_START;
+ __kernel_virt_size = RADIX_KERN_VIRT_SIZE;
+ __vmalloc_start = RADIX_VMALLOC_START;
+ __vmalloc_end = RADIX_VMALLOC_END;
+ vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
+ ioremap_bot = IOREMAP_BASE;
+ /*
+ * For now radix also use the same frag size
+ */
+ __pte_frag_nr = H_PTE_FRAG_NR;
+ __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+
+ radix_init_page_sizes();
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ radix_init_partition_table();
+
+ radix_init_pgtable();
+}
+
+void radix__early_init_mmu_secondary(void)
+{
+ unsigned long lpcr;
+ /*
+ * setup LPCR UPRT based on mmu_features
+ */
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
+ /*
+ * update partition table control register, 64 K size.
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ mtspr(SPRN_PTCR,
+ __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+}
+
+void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+{
+ /* We don't currently support the first MEMBLOCK not mapping 0
+ * physical on those processors
+ */
+ BUG_ON(first_memblock_base != 0);
+ /*
+ * We limit the allocation that depend on ppc64_rma_size
+ * to first_memblock_size. We also clamp it to 1GB to
+ * avoid some funky things such as RTAS bugs.
+ *
+ * On radix config we really don't have a limitation
+ * on real mode access. But keeping it as above works
+ * well enough.
+ */
+ ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+ /*
+ * Finally limit subsequent allocations. We really don't want
+ * to limit the memblock allocations to rma_size. FIXME!! should
+ * we even limit at all ?
+ */
+ memblock_set_current_limit(first_memblock_base + first_memblock_size);
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit radix__vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
+{
+ /* Create a PTE encoding */
+ unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+
+ BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
+ return 0;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+{
+ /* FIXME!! intel does more. We should free page tables mapping vmemmap ? */
+}
+#endif
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, unsigned long clr,
+ unsigned long set)
+{
+ unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!radix__pmd_trans_huge(*pmdp));
+ assert_spin_locked(&mm->page_table_lock);
+#endif
+
+ old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
+ trace_hugepage_update(addr, old, clr, set);
+
+ return old;
+}
+
+pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+
+{
+ pmd_t pmd;
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
+ /*
+ * khugepaged calls this for normal pmd
+ */
+ pmd = *pmdp;
+ pmd_clear(pmdp);
+ /*FIXME!! Verify whether we need this kick below */
+ kick_all_cpus_sync();
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ return pmd;
+}
+
+/*
+ * For us pgtable_t is pte_t *. Inorder to save the deposisted
+ * page table, we consider the allocated page table as a list
+ * head. On withdraw we need to make sure we zero out the used
+ * list_head memory area.
+ */
+void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
+{
+ struct list_head *lh = (struct list_head *) pgtable;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+ pte_t *ptep;
+ pgtable_t pgtable;
+ struct list_head *lh;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ ptep = (pte_t *) pgtable;
+ *ptep = __pte(0);
+ ptep++;
+ *ptep = __pte(0);
+ return pgtable;
+}
+
+
+pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t old_pmd;
+ unsigned long old;
+
+ old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
+ old_pmd = __pmd(old);
+ /*
+ * Serialize against find_linux_pte_or_hugepte which does lock-less
+ * lookup in page tables with local interrupts disabled. For huge pages
+ * it casts pmd_t to pte_t. Since format of pte_t is different from
+ * pmd_t we want to prevent transit from pmd pointing to page table
+ * to pmd pointing to huge page (and back) while interrupts are disabled.
+ * We clear pmd to possibly replace it with page table pointer in
+ * different code paths. So make sure we wait for the parallel
+ * find_linux_pte_or_hugepage to finish.
+ */
+ kick_all_cpus_sync();
+ return old_pmd;
+}
+
+int radix__has_transparent_hugepage(void)
+{
+ /* For radix 2M at PMD level means thp */
+ if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
+ return 1;
+ return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/* We only try to do i/d cache coherency on stuff that looks like
* reasonably "normal" PTEs. We currently require a PTE to be present
- * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that
+ * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that
* on userspace PTEs
*/
static inline int pte_looks_normal(pte_t pte)
{
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+ if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
+ if (pte_ci(pte))
+ return 0;
+ if (pte_user(pte))
+ return 1;
+ }
+ return 0;
+#else
return (pte_val(pte) &
- (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
- (_PAGE_PRESENT | _PAGE_USER);
+ (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
+ (_PAGE_PRESENT | _PAGE_USER);
+#endif
}
static struct page *maybe_pte_to_page(pte_t pte)
static pte_t set_pte_filter(pte_t pte)
{
+ if (radix_enabled())
+ return pte;
+
pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
cpu_has_feature(CPU_FTR_NOEXECUTE))) {
* _PAGE_PRESENT, but we can be sure that it is not in hpte.
* Hence we can use set_pte_at for them.
*/
- VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
- (_PAGE_PRESENT | _PAGE_USER));
+ VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+
/*
* Add the pte bit when tryint set a pte
*/
#include "mmu_decl.h"
-#define CREATE_TRACE_POINTS
-#include <trace/events/thp.h>
-
-/* Some sanity checking */
-#if TASK_SIZE_USER64 > PGTABLE_RANGE
-#error TASK_SIZE_USER64 exceeds pagetable range
-#endif
-
#ifdef CONFIG_PPC_STD_MMU_64
#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
#error TASK_SIZE_USER64 exceeds user VSID range
#endif
#endif
-unsigned long ioremap_bot = IOREMAP_BASE;
-
-#ifdef CONFIG_PPC_MMU_NOHASH
-static __ref void *early_alloc_pgtable(unsigned long size)
-{
- void *pt;
-
- pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
- memset(pt, 0, size);
-
- return pt;
-}
-#endif /* CONFIG_PPC_MMU_NOHASH */
-
+#ifdef CONFIG_PPC_BOOK3S_64
/*
- * map_kernel_page currently only called by __ioremap
- * map_kernel_page adds an entry to the ioremap page table
- * and adds an entry to the HPT, possibly bolting it
+ * partition table and process table for ISA 3.0
*/
-int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
-{
- pgd_t *pgdp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
-
- if (slab_is_available()) {
- pgdp = pgd_offset_k(ea);
- pudp = pud_alloc(&init_mm, pgdp, ea);
- if (!pudp)
- return -ENOMEM;
- pmdp = pmd_alloc(&init_mm, pudp, ea);
- if (!pmdp)
- return -ENOMEM;
- ptep = pte_alloc_kernel(pmdp, ea);
- if (!ptep)
- return -ENOMEM;
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
- __pgprot(flags)));
- } else {
-#ifdef CONFIG_PPC_MMU_NOHASH
- pgdp = pgd_offset_k(ea);
-#ifdef PUD_TABLE_SIZE
- if (pgd_none(*pgdp)) {
- pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
- BUG_ON(pudp == NULL);
- pgd_populate(&init_mm, pgdp, pudp);
- }
-#endif /* PUD_TABLE_SIZE */
- pudp = pud_offset(pgdp, ea);
- if (pud_none(*pudp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- BUG_ON(pmdp == NULL);
- pud_populate(&init_mm, pudp, pmdp);
- }
- pmdp = pmd_offset(pudp, ea);
- if (!pmd_present(*pmdp)) {
- ptep = early_alloc_pgtable(PAGE_SIZE);
- BUG_ON(ptep == NULL);
- pmd_populate_kernel(&init_mm, pmdp, ptep);
- }
- ptep = pte_offset_kernel(pmdp, ea);
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
- __pgprot(flags)));
-#else /* CONFIG_PPC_MMU_NOHASH */
- /*
- * If the mm subsystem is not fully up, we cannot create a
- * linux page table entry for this mapping. Simply bolt an
- * entry in the hardware page table.
- *
- */
- if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
- mmu_io_psize, mmu_kernel_ssize)) {
- printk(KERN_ERR "Failed to do bolted mapping IO "
- "memory at %016lx !\n", pa);
- return -ENOMEM;
- }
-#endif /* !CONFIG_PPC_MMU_NOHASH */
- }
-
- smp_wmb();
- return 0;
-}
-
+struct prtb_entry *process_tb;
+struct patb_entry *partition_tb;
+/*
+ * page table size
+ */
+unsigned long __pte_index_size;
+EXPORT_SYMBOL(__pte_index_size);
+unsigned long __pmd_index_size;
+EXPORT_SYMBOL(__pmd_index_size);
+unsigned long __pud_index_size;
+EXPORT_SYMBOL(__pud_index_size);
+unsigned long __pgd_index_size;
+EXPORT_SYMBOL(__pgd_index_size);
+unsigned long __pmd_cache_index;
+EXPORT_SYMBOL(__pmd_cache_index);
+unsigned long __pte_table_size;
+EXPORT_SYMBOL(__pte_table_size);
+unsigned long __pmd_table_size;
+EXPORT_SYMBOL(__pmd_table_size);
+unsigned long __pud_table_size;
+EXPORT_SYMBOL(__pud_table_size);
+unsigned long __pgd_table_size;
+EXPORT_SYMBOL(__pgd_table_size);
+unsigned long __pmd_val_bits;
+EXPORT_SYMBOL(__pmd_val_bits);
+unsigned long __pud_val_bits;
+EXPORT_SYMBOL(__pud_val_bits);
+unsigned long __pgd_val_bits;
+EXPORT_SYMBOL(__pgd_val_bits);
+unsigned long __kernel_virt_start;
+EXPORT_SYMBOL(__kernel_virt_start);
+unsigned long __kernel_virt_size;
+EXPORT_SYMBOL(__kernel_virt_size);
+unsigned long __vmalloc_start;
+EXPORT_SYMBOL(__vmalloc_start);
+unsigned long __vmalloc_end;
+EXPORT_SYMBOL(__vmalloc_end);
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
+unsigned long __pte_frag_nr;
+EXPORT_SYMBOL(__pte_frag_nr);
+unsigned long __pte_frag_size_shift;
+EXPORT_SYMBOL(__pte_frag_size_shift);
+unsigned long ioremap_bot;
+#else /* !CONFIG_PPC_BOOK3S_64 */
+unsigned long ioremap_bot = IOREMAP_BASE;
+#endif
/**
* __ioremap_at - Low level function to establish the page tables
if ((flags & _PAGE_PRESENT) == 0)
flags |= pgprot_val(PAGE_KERNEL);
- /* Non-cacheable page cannot be coherent */
- if (flags & _PAGE_NO_CACHE)
- flags &= ~_PAGE_COHERENT;
-
/* We don't support the 4K PFN hack with ioremap */
- if (flags & _PAGE_4K_PFN)
+ if (flags & H_PAGE_4K_PFN)
return NULL;
WARN_ON(pa & ~PAGE_MASK);
void __iomem * ioremap(phys_addr_t addr, unsigned long size)
{
- unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED;
+ unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
void *caller = __builtin_return_address(0);
if (ppc_md.ioremap)
void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
{
- unsigned long flags = _PAGE_NO_CACHE;
+ unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
void *caller = __builtin_return_address(0);
if (ppc_md.ioremap)
void *caller = __builtin_return_address(0);
/* writeable implies dirty for kernel addresses */
- if (flags & _PAGE_RW)
+ if (flags & _PAGE_WRITE)
flags |= _PAGE_DIRTY;
- /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
- flags &= ~(_PAGE_USER | _PAGE_EXEC);
+ /* we don't want to let _PAGE_EXEC leak out */
+ flags &= ~_PAGE_EXEC;
+ /*
+ * Force kernel mapping.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64)
+ flags |= _PAGE_PRIVILEGED;
+#else
+ flags &= ~_PAGE_USER;
+#endif
+
#ifdef _PAGE_BAP_SR
/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
return (pte_t *)ret;
}
-pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
{
pte_t *pte;
return __alloc_for_cache(mm, kernel);
}
+#endif /* CONFIG_PPC_64K_PAGES */
-void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
+void pte_fragment_free(unsigned long *table, int kernel)
{
struct page *page = virt_to_page(table);
if (put_page_testzero(page)) {
}
#ifdef CONFIG_SMP
-static void page_table_free_rcu(void *table)
-{
- struct page *page = virt_to_page(table);
- if (put_page_testzero(page)) {
- pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
- }
-}
-
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
{
unsigned long pgf = (unsigned long)table;
if (!shift)
/* PTE page needs special handling */
- page_table_free_rcu(table);
+ pte_fragment_free(table, 0);
else {
BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
kmem_cache_free(PGT_CACHE(shift), table);
{
if (!shift) {
/* PTE page needs special handling */
- struct page *page = virt_to_page(table);
- if (put_page_testzero(page)) {
- pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
- }
+ pte_fragment_free(table, 0);
} else {
BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
kmem_cache_free(PGT_CACHE(shift), table);
}
}
#endif
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-
-/*
- * This is called when relaxing access to a hugepage. It's also called in the page
- * fault path when we don't hit any of the major fault cases, ie, a minor
- * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
- * handled those two for us, we additionally deal with missing execute
- * permission here on some processors
- */
-int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp, pmd_t entry, int dirty)
-{
- int changed;
-#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
- assert_spin_locked(&vma->vm_mm->page_table_lock);
-#endif
- changed = !pmd_same(*(pmdp), entry);
- if (changed) {
- __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
- /*
- * Since we are not supporting SW TLB systems, we don't
- * have any thing similar to flush_tlb_page_nohash()
- */
- }
- return changed;
-}
-
-unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, unsigned long clr,
- unsigned long set)
-{
-
- unsigned long old, tmp;
-
-#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
- assert_spin_locked(&mm->page_table_lock);
-#endif
-
-#ifdef PTE_ATOMIC_UPDATES
- __asm__ __volatile__(
- "1: ldarx %0,0,%3\n\
- andi. %1,%0,%6\n\
- bne- 1b \n\
- andc %1,%0,%4 \n\
- or %1,%1,%7\n\
- stdcx. %1,0,%3 \n\
- bne- 1b"
- : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
- : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
- : "cc" );
-#else
- old = pmd_val(*pmdp);
- *pmdp = __pmd((old & ~clr) | set);
-#endif
- trace_hugepage_update(addr, old, clr, set);
- if (old & _PAGE_HASHPTE)
- hpte_do_hugepage_flush(mm, addr, pmdp, old);
- return old;
-}
-
-pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp)
-{
- pmd_t pmd;
-
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON(pmd_trans_huge(*pmdp));
-
- pmd = *pmdp;
- pmd_clear(pmdp);
- /*
- * Wait for all pending hash_page to finish. This is needed
- * in case of subpage collapse. When we collapse normal pages
- * to hugepage, we first clear the pmd, then invalidate all
- * the PTE entries. The assumption here is that any low level
- * page fault will see a none pmd and take the slow path that
- * will wait on mmap_sem. But we could very well be in a
- * hash_page with local ptep pointer value. Such a hash page
- * can result in adding new HPTE entries for normal subpages.
- * That means we could be modifying the page content as we
- * copy them to a huge page. So wait for parallel hash_page
- * to finish before invalidating HPTE entries. We can do this
- * by sending an IPI to all the cpus and executing a dummy
- * function there.
- */
- kick_all_cpus_sync();
- /*
- * Now invalidate the hpte entries in the range
- * covered by pmd. This make sure we take a
- * fault and will find the pmd as none, which will
- * result in a major fault which takes mmap_sem and
- * hence wait for collapse to complete. Without this
- * the __collapse_huge_page_copy can result in copying
- * the old content.
- */
- flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
- return pmd;
-}
-
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
-}
-
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-int pmdp_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
-}
-
-/*
- * We want to put the pgtable in pmd and use pgtable for tracking
- * the base page size hptes
- */
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable)
-{
- pgtable_t *pgtable_slot;
- assert_spin_locked(&mm->page_table_lock);
- /*
- * we store the pgtable in the second half of PMD
- */
- pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
- *pgtable_slot = pgtable;
- /*
- * expose the deposited pgtable to other cpus.
- * before we set the hugepage PTE at pmd level
- * hash fault code looks at the deposted pgtable
- * to store hash index values.
- */
- smp_wmb();
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
- pgtable_t pgtable;
- pgtable_t *pgtable_slot;
-
- assert_spin_locked(&mm->page_table_lock);
- pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
- pgtable = *pgtable_slot;
- /*
- * Once we withdraw, mark the entry NULL.
- */
- *pgtable_slot = NULL;
- /*
- * We store HPTE information in the deposited PTE fragment.
- * zero out the content on withdraw.
- */
- memset(pgtable, 0, PTE_FRAG_SIZE);
- return pgtable;
-}
-
-void pmdp_huge_split_prepare(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
-
- /*
- * We can't mark the pmd none here, because that will cause a race
- * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
- * we spilt, but at the same time we wan't rest of the ppc64 code
- * not to insert hash pte on this, because we will be modifying
- * the deposited pgtable in the caller of this function. Hence
- * clear the _PAGE_USER so that we move the fault handling to
- * higher level function and that will serialize against ptl.
- * We need to flush existing hash pte entries here even though,
- * the translation is still valid, because we will withdraw
- * pgtable_t after this.
- */
- pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
-}
-
-
-/*
- * set a new huge pmd. We should not be called for updating
- * an existing pmd entry. That should go via pmd_hugepage_update.
- */
-void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd)
-{
-#ifdef CONFIG_DEBUG_VM
- WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
- (_PAGE_PRESENT | _PAGE_USER));
- assert_spin_locked(&mm->page_table_lock);
- WARN_ON(!pmd_trans_huge(pmd));
-#endif
- trace_hugepage_set_pmd(addr, pmd_val(pmd));
- return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
-}
-
-/*
- * We use this to invalidate a pmdp entry before switching from a
- * hugepte to regular pmd entry.
- */
-void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp)
-{
- pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
-
- /*
- * This ensures that generic code that rely on IRQ disabling
- * to prevent a parallel THP split work as expected.
- */
- kick_all_cpus_sync();
-}
-
-/*
- * A linux hugepage PMD was changed and the corresponding hash table entries
- * neesd to be flushed.
- */
-void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, unsigned long old_pmd)
-{
- int ssize;
- unsigned int psize;
- unsigned long vsid;
- unsigned long flags = 0;
- const struct cpumask *tmp;
-
- /* get the base page size,vsid and segment size */
-#ifdef CONFIG_DEBUG_VM
- psize = get_slice_psize(mm, addr);
- BUG_ON(psize == MMU_PAGE_16M);
-#endif
- if (old_pmd & _PAGE_COMBO)
- psize = MMU_PAGE_4K;
- else
- psize = MMU_PAGE_64K;
-
- if (!is_kernel_addr(addr)) {
- ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
- WARN_ON(vsid == 0);
- } else {
- vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
- ssize = mmu_kernel_ssize;
- }
-
- tmp = cpumask_of(smp_processor_id());
- if (cpumask_equal(mm_cpumask(mm), tmp))
- flags |= HPTE_LOCAL_UPDATE;
-
- return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
-}
-
-static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
-{
- return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
-}
-
-pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
-{
- unsigned long pmdv;
-
- pmdv = (pfn << PTE_RPN_SHIFT) & PTE_RPN_MASK;
- return pmd_set_protbits(__pmd(pmdv), pgprot);
-}
-
-pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
-{
- return pfn_pmd(page_to_pfn(page), pgprot);
-}
-
-pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
- unsigned long pmdv;
-
- pmdv = pmd_val(pmd);
- pmdv &= _HPAGE_CHG_MASK;
- return pmd_set_protbits(__pmd(pmdv), newprot);
-}
-
-/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a HUGE PMD entry in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux HUGE PMD entry.
- */
-void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd)
-{
- return;
-}
-
-pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp)
-{
- pmd_t old_pmd;
- pgtable_t pgtable;
- unsigned long old;
- pgtable_t *pgtable_slot;
-
- old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
- old_pmd = __pmd(old);
- /*
- * We have pmd == none and we are holding page_table_lock.
- * So we can safely go and clear the pgtable hash
- * index info.
- */
- pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
- pgtable = *pgtable_slot;
- /*
- * Let's zero out old valid and hash index details
- * hash fault look at them.
- */
- memset(pgtable, 0, PTE_FRAG_SIZE);
- /*
- * Serialize against find_linux_pte_or_hugepte which does lock-less
- * lookup in page tables with local interrupts disabled. For huge pages
- * it casts pmd_t to pte_t. Since format of pte_t is different from
- * pmd_t we want to prevent transit from pmd pointing to page table
- * to pmd pointing to huge page (and back) while interrupts are disabled.
- * We clear pmd to possibly replace it with page table pointer in
- * different code paths. So make sure we wait for the parallel
- * find_linux_pte_or_hugepage to finish.
- */
- kick_all_cpus_sync();
- return old_pmd;
-}
-
-int has_transparent_hugepage(void)
-{
-
- BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) >= MAX_ORDER,
- "hugepages can't be allocated by the buddy allocator");
-
- BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) < 2,
- "We need more than 2 pages to do deferred thp split");
-
- if (!mmu_has_feature(MMU_FTR_16M_PAGE))
- return 0;
- /*
- * We support THP only if PMD_SIZE is 16MB.
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
- return 0;
- /*
- * We need to make sure that we support 16MB hugepage in a segement
- * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
- * of 64K.
- */
- /*
- * If we have 64K HPTE, we will be using that by default
- */
- if (mmu_psize_defs[MMU_PAGE_64K].shift &&
- (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
- return 0;
- /*
- * Ok we only have 4K HPTE
- */
- if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
- return 0;
-
- return 1;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
};
extern void slb_allocate_realmode(unsigned long ea);
-extern void slb_allocate_user(unsigned long ea);
static void slb_allocate(unsigned long ea)
{
* check for bad kernel/user address
* (ea & ~REGION_MASK) >= PGTABLE_RANGE
*/
- rldicr. r9,r3,4,(63 - PGTABLE_EADDR_SIZE - 4)
+ rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4)
bne- 8f
srdi r9,r3,60 /* get region */
* can be demoted from 64K -> 4K dynamically on some machines
*/
clrldi r11,r10,48
- cmpldi r11,(VMALLOC_SIZE >> 28) - 1
+ cmpldi r11,(H_VMALLOC_SIZE >> 28) - 1
bgt 5f
lhz r11,PACAVMALLOCSLLP(r13)
b 6f
li r11,SLB_VSID_USER /* flags don't much matter */
b slb_finish_load
-#ifdef __DISABLED__
-
-/* void slb_allocate_user(unsigned long ea);
- *
- * Create an SLB entry for the given EA (user or kernel).
- * r3 = faulting address, r13 = PACA
- * r9, r10, r11 are clobbered by this function
- * No other registers are examined or changed.
- *
- * It is called with translation enabled in order to be able to walk the
- * page tables. This is not currently used.
- */
-_GLOBAL(slb_allocate_user)
- /* r3 = faulting address */
- srdi r10,r3,28 /* get esid */
-
- crset 4*cr7+lt /* set "user" flag for later */
-
- /* check if we fit in the range covered by the pagetables*/
- srdi. r9,r3,PGTABLE_EADDR_SIZE
- crnot 4*cr0+eq,4*cr0+eq
- beqlr
-
- /* now we need to get to the page tables in order to get the page
- * size encoding from the PMD. In the future, we'll be able to deal
- * with 1T segments too by getting the encoding from the PGD instead
- */
- ld r9,PACAPGDIR(r13)
- cmpldi cr0,r9,0
- beqlr
- rlwinm r11,r10,8,25,28
- ldx r9,r9,r11 /* get pgd_t */
- cmpldi cr0,r9,0
- beqlr
- rlwinm r11,r10,3,17,28
- ldx r9,r9,r11 /* get pmd_t */
- cmpldi cr0,r9,0
- beqlr
-
- /* build vsid flags */
- andi. r11,r9,SLB_VSID_LLP
- ori r11,r11,SLB_VSID_USER
-
- /* get context to calculate proto-VSID */
- ld r9,PACACONTEXTID(r13)
- /* fall through slb_finish_load */
-
-#endif /* __DISABLED__ */
-
-
/*
* Finish loading of an SLB entry and return
*
#include <asm/hugetlb.h>
/* some sanity checks */
-#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
-#error PGTABLE_RANGE exceeds slice_mask high_slices size
+#if (H_PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
+#error H_PGTABLE_RANGE exceeds slice_mask high_slices size
#endif
static DEFINE_SPINLOCK(slice_convert_lock);
/* Sanity checks */
BUG_ON(mm->task_size == 0);
+ VM_BUG_ON(radix_enabled());
slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
unsigned char *hpsizes;
int index, mask_index;
+ /*
+ * Radix doesn't use slice, but can get enabled along with MMU_SLICE
+ */
+ if (radix_enabled()) {
+#ifdef CONFIG_PPC_64K_PAGES
+ return MMU_PAGE_64K;
+#else
+ return MMU_PAGE_4K;
+#endif
+ }
if (addr < SLICE_LOW_TOP) {
u64 lpsizes;
lpsizes = mm->context.low_slices_psize;
slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
+ VM_BUG_ON(radix_enabled());
spin_lock_irqsave(&slice_convert_lock, flags);
old_psize = mm->context.user_psize;
{
struct slice_mask mask = slice_range_to_mask(start, len);
+ VM_BUG_ON(radix_enabled());
slice_convert(mm, mask, psize);
}
struct slice_mask mask, available;
unsigned int psize = mm->context.user_psize;
+ if (radix_enabled())
+ return 0;
+
mask = slice_range_to_mask(addr, len);
available = slice_mask_for_size(mm, psize);
#ifdef CONFIG_PPC_64K_PAGES
--- /dev/null
+/*
+ * TLB flush routines for radix kernels.
+ *
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/memblock.h>
+
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+
+static inline void __tlbiel_pid(unsigned long pid, int set)
+{
+ unsigned long rb,rs,ric,prs,r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rb |= set << PPC_BITLSHIFT(51);
+ rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* raidx format */
+ ric = 2; /* invalidate all the caches */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
+ "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("ptesync": : :"memory");
+}
+
+/*
+ * We use 128 set in radix mode and 256 set in hpt mode.
+ */
+static inline void _tlbiel_pid(unsigned long pid)
+{
+ int set;
+
+ for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
+ __tlbiel_pid(pid, set);
+ }
+ return;
+}
+
+static inline void _tlbie_pid(unsigned long pid)
+{
+ unsigned long rb,rs,ric,prs,r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = pid << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* raidx format */
+ ric = 2; /* invalidate all the caches */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
+ "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long ap)
+{
+ unsigned long rb,rs,ric,prs,r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = pid << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* raidx format */
+ ric = 0; /* no cluster flush yet */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
+ "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("ptesync": : :"memory");
+}
+
+static inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap)
+{
+ unsigned long rb,rs,ric,prs,r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = pid << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* raidx format */
+ ric = 0; /* no cluster flush yet */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
+ "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+/*
+ * Base TLB flushing operations:
+ *
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * - local_* variants of page and mm only apply to the current
+ * processor
+ */
+void radix__local_flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned int pid;
+
+ preempt_disable();
+ pid = mm->context.id;
+ if (pid != MMU_NO_CONTEXT)
+ _tlbiel_pid(pid);
+ preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_mm);
+
+void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ unsigned long ap, int nid)
+{
+ unsigned int pid;
+
+ preempt_disable();
+ pid = mm ? mm->context.id : 0;
+ if (pid != MMU_NO_CONTEXT)
+ _tlbiel_va(vmaddr, pid, ap);
+ preempt_enable();
+}
+
+void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ /* need the return fix for nohash.c */
+ if (vma && is_vm_hugetlb_page(vma))
+ return __local_flush_hugetlb_page(vma, vmaddr);
+#endif
+ radix___local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_get_ap(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_page);
+
+#ifdef CONFIG_SMP
+static int mm_is_core_local(struct mm_struct *mm)
+{
+ return cpumask_subset(mm_cpumask(mm),
+ topology_sibling_cpumask(smp_processor_id()));
+}
+
+void radix__flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned int pid;
+
+ preempt_disable();
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto no_context;
+
+ if (!mm_is_core_local(mm)) {
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+ if (lock_tlbie)
+ raw_spin_lock(&native_tlbie_lock);
+ _tlbie_pid(pid);
+ if (lock_tlbie)
+ raw_spin_unlock(&native_tlbie_lock);
+ } else
+ _tlbiel_pid(pid);
+no_context:
+ preempt_enable();
+}
+EXPORT_SYMBOL(radix__flush_tlb_mm);
+
+void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ unsigned long ap, int nid)
+{
+ unsigned int pid;
+
+ preempt_disable();
+ pid = mm ? mm->context.id : 0;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto bail;
+ if (!mm_is_core_local(mm)) {
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+ if (lock_tlbie)
+ raw_spin_lock(&native_tlbie_lock);
+ _tlbie_va(vmaddr, pid, ap);
+ if (lock_tlbie)
+ raw_spin_unlock(&native_tlbie_lock);
+ } else
+ _tlbiel_va(vmaddr, pid, ap);
+bail:
+ preempt_enable();
+}
+
+void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ if (vma && is_vm_hugetlb_page(vma))
+ return flush_hugetlb_page(vma, vmaddr);
+#endif
+ radix___flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_get_ap(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(radix__flush_tlb_page);
+
+#endif /* CONFIG_SMP */
+
+void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+ if (lock_tlbie)
+ raw_spin_lock(&native_tlbie_lock);
+ _tlbie_pid(0);
+ if (lock_tlbie)
+ raw_spin_unlock(&native_tlbie_lock);
+}
+EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+
+/*
+ * Currently, for range flushing, we just do a full mm flush. Because
+ * we use this in code path where we don' track the page size.
+ */
+void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+
+{
+ struct mm_struct *mm = vma->vm_mm;
+ radix__flush_tlb_mm(mm);
+}
+EXPORT_SYMBOL(radix__flush_tlb_range);
+
+
+void radix__tlb_flush(struct mmu_gather *tlb)
+{
+ struct mm_struct *mm = tlb->mm;
+ radix__flush_tlb_mm(mm);
+}
batch->index = 0;
}
-void tlb_flush(struct mmu_gather *tlb)
+void hash__tlb_flush(struct mmu_gather *tlb)
{
struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
pte = pte_val(*ptep);
if (is_thp)
trace_hugepage_invalidate(start, pte);
- if (!(pte & _PAGE_HASHPTE))
+ if (!(pte & H_PAGE_HASHPTE))
continue;
if (unlikely(is_thp))
hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
start_pte = pte_offset_map(pmd, addr);
for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
unsigned long pteval = pte_val(*pte);
- if (pteval & _PAGE_HASHPTE)
+ if (pteval & H_PAGE_HASHPTE)
hpte_need_flush(mm, addr, pte, pteval, 0);
addr += PAGE_SIZE;
}
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-obj-$(CONFIG_PERF_EVENTS) += callchain.o
+obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
offset = addr & ((1UL << shift) - 1);
pte = READ_ONCE(*ptep);
- if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
+ if (!pte_present(pte) || !pte_user(pte))
goto err_out;
pfn = pte_pfn(pte);
if (!page_is_ram(pfn))
--- /dev/null
+/*
+ * Copyright 2016 Anju T, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <linux/stddef.h>
+#include <asm/ptrace.h>
+#include <asm/perf_regs.h>
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R1, gpr[1]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R2, gpr[2]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R3, gpr[3]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R4, gpr[4]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R5, gpr[5]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R6, gpr[6]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R7, gpr[7]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R8, gpr[8]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R9, gpr[9]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R10, gpr[10]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R11, gpr[11]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R12, gpr[12]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R13, gpr[13]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R14, gpr[14]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R15, gpr[15]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R16, gpr[16]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R17, gpr[17]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R18, gpr[18]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R19, gpr[19]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R20, gpr[20]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R21, gpr[21]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R22, gpr[22]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R23, gpr[23]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R24, gpr[24]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R25, gpr[25]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R26, gpr[26]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R27, gpr[27]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R28, gpr[28]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R29, gpr[29]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R30, gpr[30]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R31, gpr[31]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_LINK, link),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+#ifdef CONFIG_PPC64
+ PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, softe),
+#else
+ PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, mq),
+#endif
+ PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+ return 0;
+
+ return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+#ifdef CONFIG_PPC64
+ if (!test_tsk_thread_flag(task, TIF_32BIT))
+ return PERF_SAMPLE_REGS_ABI_64;
+ else
+#endif
+ return PERF_SAMPLE_REGS_ABI_32;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+ struct pt_regs *regs,
+ struct pt_regs *regs_user_copy)
+{
+ regs_user->regs = task_pt_regs(current);
+ regs_user->abi = perf_reg_abi(current);
+}
EVENT(PM_DTLB_MISS, 0x300fc)
/* ITLB Reloaded */
EVENT(PM_ITLB_MISS, 0x400fc)
+/* Run_Instructions */
+EVENT(PM_RUN_INST_CMPL, 0x500fa)
+/* Alternate event code for PM_RUN_INST_CMPL */
+EVENT(PM_RUN_INST_CMPL_ALT, 0x400fa)
+/* Run_cycles */
+EVENT(PM_RUN_CYC, 0x600f4)
+/* Alternate event code for Run_cycles */
+EVENT(PM_RUN_CYC_ALT, 0x200f4)
+/* Marked store completed */
+EVENT(PM_MRK_ST_CMPL, 0x10134)
+/* Alternate event code for Marked store completed */
+EVENT(PM_MRK_ST_CMPL_ALT, 0x301e2)
+/* Marked two path branch */
+EVENT(PM_BR_MRK_2PATH, 0x10138)
+/* Alternate event code for PM_BR_MRK_2PATH */
+EVENT(PM_BR_MRK_2PATH_ALT, 0x40138)
+/* L3 castouts in Mepf state */
+EVENT(PM_L3_CO_MEPF, 0x18082)
+/* Alternate event code for PM_L3_CO_MEPF */
+EVENT(PM_L3_CO_MEPF_ALT, 0x3e05e)
+/* Data cache was reloaded from a location other than L2 due to a marked load */
+EVENT(PM_MRK_DATA_FROM_L2MISS, 0x1d14e)
+/* Alternate event code for PM_MRK_DATA_FROM_L2MISS */
+EVENT(PM_MRK_DATA_FROM_L2MISS_ALT, 0x401e8)
+/* Alternate event code for PM_CMPLU_STALL */
+EVENT(PM_CMPLU_STALL_ALT, 0x1e054)
+/* Two path branch */
+EVENT(PM_BR_2PATH, 0x20036)
+/* Alternate event code for PM_BR_2PATH */
+EVENT(PM_BR_2PATH_ALT, 0x40036)
+/* # PPC Dispatched */
+EVENT(PM_INST_DISP, 0x200f2)
+/* Alternate event code for PM_INST_DISP */
+EVENT(PM_INST_DISP_ALT, 0x300f2)
+/* Marked filter Match */
+EVENT(PM_MRK_FILT_MATCH, 0x2013c)
+/* Alternate event code for PM_MRK_FILT_MATCH */
+EVENT(PM_MRK_FILT_MATCH_ALT, 0x3012e)
+/* Alternate event code for PM_LD_MISS_L1 */
+EVENT(PM_LD_MISS_L1_ALT, 0x400f0)
/* Ignore Linux defined bits when checking event below */
base_event = event & ~EVENT_LINUX_MASK;
- if (pmc >= 5 && base_event != 0x500fa && base_event != 0x600f4)
+ if (pmc >= 5 && base_event != PM_RUN_INST_CMPL &&
+ base_event != PM_RUN_CYC)
return -1;
mask |= CNST_PMC_MASK(pmc);
/* Table of alternatives, sorted by column 0 */
static const unsigned int event_alternatives[][MAX_ALT] = {
- { 0x10134, 0x301e2 }, /* PM_MRK_ST_CMPL */
- { 0x10138, 0x40138 }, /* PM_BR_MRK_2PATH */
- { 0x18082, 0x3e05e }, /* PM_L3_CO_MEPF */
- { 0x1d14e, 0x401e8 }, /* PM_MRK_DATA_FROM_L2MISS */
- { 0x1e054, 0x4000a }, /* PM_CMPLU_STALL */
- { 0x20036, 0x40036 }, /* PM_BR_2PATH */
- { 0x200f2, 0x300f2 }, /* PM_INST_DISP */
- { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */
- { 0x2013c, 0x3012e }, /* PM_MRK_FILT_MATCH */
- { 0x3e054, 0x400f0 }, /* PM_LD_MISS_L1 */
- { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */
+ { PM_MRK_ST_CMPL, PM_MRK_ST_CMPL_ALT },
+ { PM_BR_MRK_2PATH, PM_BR_MRK_2PATH_ALT },
+ { PM_L3_CO_MEPF, PM_L3_CO_MEPF_ALT },
+ { PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L2MISS_ALT },
+ { PM_CMPLU_STALL_ALT, PM_CMPLU_STALL },
+ { PM_BR_2PATH, PM_BR_2PATH_ALT },
+ { PM_INST_DISP, PM_INST_DISP_ALT },
+ { PM_RUN_CYC_ALT, PM_RUN_CYC },
+ { PM_MRK_FILT_MATCH, PM_MRK_FILT_MATCH_ALT },
+ { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
+ { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
};
/*
j = num_alt;
for (i = 0; i < num_alt; ++i) {
switch (alt[i]) {
- case 0x1e: /* PM_CYC */
- alt[j++] = 0x600f4; /* PM_RUN_CYC */
+ case PM_CYC:
+ alt[j++] = PM_RUN_CYC;
break;
- case 0x600f4: /* PM_RUN_CYC */
- alt[j++] = 0x1e;
+ case PM_RUN_CYC:
+ alt[j++] = PM_CYC;
break;
- case 0x2: /* PM_PPC_CMPL */
- alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
+ case PM_INST_CMPL:
+ alt[j++] = PM_RUN_INST_CMPL;
break;
- case 0x500fa: /* PM_RUN_INST_CMPL */
- alt[j++] = 0x2; /* PM_PPC_CMPL */
+ case PM_RUN_INST_CMPL:
+ alt[j++] = PM_INST_CMPL;
break;
}
}
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select SYS_SUPPORTS_HUGETLBFS
- select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
def_bool y
depends on PPC_STD_MMU && PPC64
+config PPC_RADIX_MMU
+ bool "Radix MMU Support"
+ depends on PPC_BOOK3S_64
+ default y
+ help
+ Enable support for the Power ISA 3.0 Radix style MMU. Currently this
+ is only implemented by IBM Power9 CPUs, if you don't have one of them
+ you can probably disable this.
+
config PPC_MMU_NOHASH
def_bool y
depends on !PPC_STD_MMU
#include <linux/interrupt.h>
#include <linux/list.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/wait.h>
(REGION_ID(ea) != USER_REGION_ID)) {
spin_unlock(&spu->register_lock);
- ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr);
+ ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr);
spin_lock(&spu->register_lock);
if (!ret) {
out:
return ret;
}
-module_init(init_spu_base);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+device_initcall(init_spu_base);
/* we must not hold the lock when entering copro_handle_mm_fault */
spu_release(ctx);
- access = (_PAGE_PRESENT | _PAGE_USER);
- access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+ access = (_PAGE_PRESENT | _PAGE_READ);
+ access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
local_irq_save(flags);
ret = hash_page(ea, access, 0x300, dsisr);
local_irq_restore(flags);
* and P7IOC separately. So we should regard
* PE#0 as valid for PHB3 and P7IOC.
*/
- if (phb->ioda.reserved_pe != 0)
+ if (phb->ioda.reserved_pe_idx != 0)
eeh_add_flag(EEH_VALID_PE_ZERO);
break;
static int pnv_eeh_reset(struct eeh_pe *pe, int option)
{
struct pci_controller *hose = pe->phb;
+ struct pnv_phb *phb;
struct pci_bus *bus;
- int ret;
+ int64_t rc;
/*
* For PHB reset, we always have complete reset. For those PEs whose
* reset. The side effect is that EEH core has to clear the frozen
* state explicitly after BAR restore.
*/
- if (pe->type & EEH_PE_PHB) {
- ret = pnv_eeh_phb_reset(hose, option);
- } else {
- struct pnv_phb *phb;
- s64 rc;
+ if (pe->type & EEH_PE_PHB)
+ return pnv_eeh_phb_reset(hose, option);
- /*
- * The frozen PE might be caused by PAPR error injection
- * registers, which are expected to be cleared after hitting
- * frozen PE as stated in the hardware spec. Unfortunately,
- * that's not true on P7IOC. So we have to clear it manually
- * to avoid recursive EEH errors during recovery.
- */
- phb = hose->private_data;
- if (phb->model == PNV_PHB_MODEL_P7IOC &&
- (option == EEH_RESET_HOT ||
- option == EEH_RESET_FUNDAMENTAL)) {
- rc = opal_pci_reset(phb->opal_id,
- OPAL_RESET_PHB_ERROR,
- OPAL_ASSERT_RESET);
- if (rc != OPAL_SUCCESS) {
- pr_warn("%s: Failure %lld clearing "
- "error injection registers\n",
- __func__, rc);
- return -EIO;
- }
+ /*
+ * The frozen PE might be caused by PAPR error injection
+ * registers, which are expected to be cleared after hitting
+ * frozen PE as stated in the hardware spec. Unfortunately,
+ * that's not true on P7IOC. So we have to clear it manually
+ * to avoid recursive EEH errors during recovery.
+ */
+ phb = hose->private_data;
+ if (phb->model == PNV_PHB_MODEL_P7IOC &&
+ (option == EEH_RESET_HOT ||
+ option == EEH_RESET_FUNDAMENTAL)) {
+ rc = opal_pci_reset(phb->opal_id,
+ OPAL_RESET_PHB_ERROR,
+ OPAL_ASSERT_RESET);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld clearing error injection registers\n",
+ __func__, rc);
+ return -EIO;
}
-
- bus = eeh_pe_bus_get(pe);
- if (pe->type & EEH_PE_VF)
- ret = pnv_eeh_reset_vf_pe(pe, option);
- else if (pci_is_root_bus(bus) ||
- pci_is_root_bus(bus->parent))
- ret = pnv_eeh_root_reset(hose, option);
- else
- ret = pnv_eeh_bridge_reset(bus->self, option);
}
- return ret;
+ bus = eeh_pe_bus_get(pe);
+ if (pe->type & EEH_PE_VF)
+ return pnv_eeh_reset_vf_pe(pe, option);
+
+ if (pci_is_root_bus(bus) ||
+ pci_is_root_bus(bus->parent))
+ return pnv_eeh_root_reset(hose, option);
+
+ return pnv_eeh_bridge_reset(bus->self, option);
}
/**
#include <linux/export.h>
#include <linux/pci.h>
#include <linux/memblock.h>
+#include <linux/iommu.h>
#include <asm/iommu.h>
#include <asm/pnv-pci.h>
* Other types of TCE cache invalidation are not functional in the
* hardware.
*/
-#define TCE_KILL_INVAL_ALL PPC_BIT(0)
-
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
return PCI_DN(dn)->pcidev;
struct pnv_ioda_pe *pe;
struct pci_dn *pdn;
- if (npe->flags & PNV_IODA_PE_PEER) {
- pe = npe->peers[0];
- pdev = pe->pdev;
- } else {
- pdev = pnv_pci_get_gpu_dev(npe->pdev);
- if (!pdev)
- return NULL;
+ pdev = pnv_pci_get_gpu_dev(npe->pdev);
+ if (!pdev)
+ return NULL;
- pdn = pci_get_pdn(pdev);
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return NULL;
+ pdn = pci_get_pdn(pdev);
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return NULL;
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
- pe = &phb->ioda.pe_array[pdn->pe_number];
- }
+ hose = pci_bus_to_host(pdev->bus);
+ phb = hose->private_data;
+ pe = &phb->ioda.pe_array[pdn->pe_number];
if (gpdev)
*gpdev = pdev;
return pe;
}
-void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe)
+long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+ struct iommu_table *tbl)
{
struct pnv_phb *phb = npe->phb;
+ int64_t rc;
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+ const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+ const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+ pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
+ start_addr, start_addr + win_size - 1,
+ IOMMU_PAGE_SIZE(tbl));
+
+ rc = opal_pci_map_pe_dma_window(phb->opal_id,
+ npe->pe_number,
+ npe->pe_number,
+ tbl->it_indirect_levels + 1,
+ __pa(tbl->it_base),
+ size << 3,
+ IOMMU_PAGE_SIZE(tbl));
+ if (rc) {
+ pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
+ return rc;
+ }
+ pnv_pci_ioda2_tce_invalidate_entire(phb, false);
- if (WARN_ON(phb->type != PNV_PHB_NPU ||
- !phb->ioda.tce_inval_reg ||
- !(npe->flags & PNV_IODA_PE_DEV)))
- return;
+ /* Add the table to the list so its TCE cache will get invalidated */
+ pnv_pci_link_table_and_group(phb->hose->node, num,
+ tbl, &npe->table_group);
- mb(); /* Ensure previous TCE table stores are visible */
- __raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL),
- phb->ioda.tce_inval_reg);
+ return 0;
}
-void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
- struct iommu_table *tbl,
- unsigned long index,
- unsigned long npages,
- bool rm)
+long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
{
struct pnv_phb *phb = npe->phb;
+ int64_t rc;
- /* We can only invalidate the whole cache on NPU */
- unsigned long val = TCE_KILL_INVAL_ALL;
-
- if (WARN_ON(phb->type != PNV_PHB_NPU ||
- !phb->ioda.tce_inval_reg ||
- !(npe->flags & PNV_IODA_PE_DEV)))
- return;
-
- mb(); /* Ensure previous TCE table stores are visible */
- if (rm)
- __raw_rm_writeq(cpu_to_be64(val),
- (__be64 __iomem *) phb->ioda.tce_inval_reg_phys);
- else
- __raw_writeq(cpu_to_be64(val),
- phb->ioda.tce_inval_reg);
-}
-
-void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe)
-{
- struct pnv_ioda_pe *gpe;
- struct pci_dev *gpdev;
- int i, avail = -1;
-
- if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
- return;
-
- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (!gpe)
- return;
-
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- /* Nothing to do if the PE is already connected. */
- if (gpe->peers[i] == npe)
- return;
+ pe_info(npe, "Removing DMA window\n");
- if (!gpe->peers[i])
- avail = i;
+ rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
+ npe->pe_number,
+ 0/* levels */, 0/* table address */,
+ 0/* table size */, 0/* page size */);
+ if (rc) {
+ pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
+ return rc;
}
+ pnv_pci_ioda2_tce_invalidate_entire(phb, false);
- if (WARN_ON(avail < 0))
- return;
-
- gpe->peers[avail] = npe;
- gpe->flags |= PNV_IODA_PE_PEER;
+ pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
+ &npe->table_group);
- /*
- * We assume that the NPU devices only have a single peer PE
- * (the GPU PCIe device PE).
- */
- npe->peers[0] = gpe;
- npe->flags |= PNV_IODA_PE_PEER;
+ return 0;
}
/*
- * For the NPU we want to point the TCE table at the same table as the
- * real PCI device.
+ * Enables 32 bit DMA on NPU.
*/
-static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
+static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
{
- struct pnv_phb *phb = npe->phb;
struct pci_dev *gpdev;
struct pnv_ioda_pe *gpe;
- void *addr;
- unsigned int size;
int64_t rc;
/*
if (!gpe)
return;
- addr = (void *)gpe->table_group.tables[0]->it_base;
- size = gpe->table_group.tables[0]->it_size << 3;
- rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
- npe->pe_number, 1, __pa(addr),
- size, 0x1000);
- if (rc != OPAL_SUCCESS)
- pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n",
- __func__, rc, phb->hose->global_number, npe->pe_number);
+ rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
/*
* We don't initialise npu_pe->tce32_table as we always use
}
/*
- * Enable/disable bypass mode on the NPU. The NPU only supports one
+ * Enables bypass mode on the NPU. The NPU only supports one
* window per link, so bypass needs to be explicitly enabled or
* disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
* active at the same time.
*/
-int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable)
+static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
{
struct pnv_phb *phb = npe->phb;
int64_t rc = 0;
+ phys_addr_t top = memblock_end_of_DRAM();
if (phb->type != PNV_PHB_NPU || !npe->pdev)
return -EINVAL;
- if (enable) {
- /* Enable the bypass window */
- phys_addr_t top = memblock_end_of_DRAM();
-
- npe->tce_bypass_base = 0;
- top = roundup_pow_of_two(top);
- dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
- npe->pe_number);
- rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
- npe->pe_number, npe->pe_number,
- npe->tce_bypass_base, top);
- } else {
- /*
- * Disable the bypass window by replacing it with the
- * TCE32 window.
- */
- pnv_npu_disable_bypass(npe);
- }
+ rc = pnv_npu_unset_window(npe, 0);
+ if (rc != OPAL_SUCCESS)
+ return rc;
+
+ /* Enable the bypass window */
+
+ top = roundup_pow_of_two(top);
+ dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
+ npe->pe_number);
+ rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+ npe->pe_number, npe->pe_number,
+ 0 /* bypass base */, top);
+
+ if (rc == OPAL_SUCCESS)
+ pnv_pci_ioda2_tce_invalidate_entire(phb, false);
return rc;
}
-int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
+void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
{
- struct pci_controller *hose = pci_bus_to_host(npdev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct pci_dn *pdn = pci_get_pdn(npdev);
- struct pnv_ioda_pe *npe, *gpe;
- struct pci_dev *gpdev;
- uint64_t top;
- bool bypass = false;
+ int i;
+ struct pnv_phb *phb;
+ struct pci_dn *pdn;
+ struct pnv_ioda_pe *npe;
+ struct pci_dev *npdev;
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return -ENXIO;
+ for (i = 0; ; ++i) {
+ npdev = pnv_pci_get_npu_dev(gpdev, i);
- /* We only do bypass if it's enabled on the linked device */
- npe = &phb->ioda.pe_array[pdn->pe_number];
- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (!gpe)
- return -ENODEV;
+ if (!npdev)
+ break;
+
+ pdn = pci_get_pdn(npdev);
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return;
+
+ phb = pci_bus_to_host(npdev->bus)->private_data;
+
+ /* We only do bypass if it's enabled on the linked device */
+ npe = &phb->ioda.pe_array[pdn->pe_number];
+
+ if (bypass) {
+ dev_info(&npdev->dev,
+ "Using 64-bit DMA iommu bypass\n");
+ pnv_npu_dma_set_bypass(npe);
+ } else {
+ dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+ pnv_npu_dma_set_32(npe);
+ }
+ }
+}
- if (gpe->tce_bypass_enabled) {
- top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1;
- bypass = (dma_mask >= top);
+/* Switch ownership from platform code to external user (e.g. VFIO) */
+void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
+{
+ struct pnv_phb *phb = npe->phb;
+ int64_t rc;
+
+ /*
+ * Note: NPU has just a single TVE in the hardware which means that
+ * while used by the kernel, it can have either 32bit window or
+ * DMA bypass but never both. So we deconfigure 32bit window only
+ * if it was enabled at the moment of ownership change.
+ */
+ if (npe->table_group.tables[0]) {
+ pnv_npu_unset_window(npe, 0);
+ return;
}
- if (bypass)
- dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n");
- else
- dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+ /* Disable bypass */
+ rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+ npe->pe_number, npe->pe_number,
+ 0 /* bypass base */, 0);
+ if (rc) {
+ pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
+ return;
+ }
+ pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
+}
- pnv_npu_dma_set_bypass(npe, bypass);
- *npdev->dev.dma_mask = dma_mask;
+struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
+{
+ struct pnv_phb *phb = npe->phb;
+ struct pci_bus *pbus = phb->hose->bus;
+ struct pci_dev *npdev, *gpdev = NULL, *gptmp;
+ struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- return 0;
+ if (!gpe || !gpdev)
+ return NULL;
+
+ list_for_each_entry(npdev, &pbus->devices, bus_list) {
+ gptmp = pnv_pci_get_gpu_dev(npdev);
+
+ if (gptmp != gpdev)
+ continue;
+
+ pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
+ iommu_group_add_device(gpe->table_group.group, &npdev->dev);
+ }
+
+ return gpe;
}
static void print_checkstop_reason(const char *level,
struct OpalHMIEvent *hmi_evt)
{
- switch (hmi_evt->u.xstop_error.xstop_type) {
+ uint8_t type = hmi_evt->u.xstop_error.xstop_type;
+ switch (type) {
case CHECKSTOP_TYPE_CORE:
print_core_checkstop_reason(level, hmi_evt);
break;
case CHECKSTOP_TYPE_NX:
print_nx_checkstop_reason(level, hmi_evt);
break;
- case CHECKSTOP_TYPE_UNKNOWN:
- printk("%s Unknown Malfunction Alert.\n", level);
+ default:
+ printk("%s Unknown Malfunction Alert of type %d\n",
+ level, type);
break;
}
}
#include "powernv.h"
#include "pci.h"
-/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
-#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
+#define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */
+#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */
+#define PNV_IODA1_DMA32_SEGSIZE 0x10000000
#define POWERNV_IOMMU_DEFAULT_LEVELS 1
#define POWERNV_IOMMU_MAX_LEVELS 5
static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
-static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
{
struct va_format vaf;
va_end(args);
}
-#define pe_err(pe, fmt, ...) \
- pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
-#define pe_warn(pe, fmt, ...) \
- pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
-#define pe_info(pe, fmt, ...) \
- pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
-
static bool pnv_iommu_bypass_disabled __read_mostly;
static int __init iommu_setup(char *str)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
}
+static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
+{
+ phb->ioda.pe_array[pe_no].phb = phb;
+ phb->ioda.pe_array[pe_no].pe_number = pe_no;
+
+ return &phb->ioda.pe_array[pe_no];
+}
+
static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
{
- if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) {
+ if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
pr_warn("%s: Invalid PE %d on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
return;
pr_debug("%s: PE %d was reserved on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
- phb->ioda.pe_array[pe_no].phb = phb;
- phb->ioda.pe_array[pe_no].pe_number = pe_no;
+ pnv_ioda_init_pe(phb, pe_no);
}
-static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
+static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
{
unsigned long pe;
do {
pe = find_next_zero_bit(phb->ioda.pe_alloc,
- phb->ioda.total_pe, 0);
- if (pe >= phb->ioda.total_pe)
- return IODA_INVALID_PE;
+ phb->ioda.total_pe_num, 0);
+ if (pe >= phb->ioda.total_pe_num)
+ return NULL;
} while(test_and_set_bit(pe, phb->ioda.pe_alloc));
- phb->ioda.pe_array[pe].phb = phb;
- phb->ioda.pe_array[pe].pe_number = pe;
- return pe;
+ return pnv_ioda_init_pe(phb, pe);
}
-static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
+static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
{
- WARN_ON(phb->ioda.pe_array[pe].pdev);
+ struct pnv_phb *phb = pe->phb;
+
+ WARN_ON(pe->pdev);
- memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
- clear_bit(pe, phb->ioda.pe_alloc);
+ memset(pe, 0, sizeof(struct pnv_ioda_pe));
+ clear_bit(pe->pe_number, phb->ioda.pe_alloc);
}
/* The default M64 BAR is shared by all PEs */
* expected to be 0 or last one of PE capabicity.
*/
r = &phb->hose->mem_resources[1];
- if (phb->ioda.reserved_pe == 0)
+ if (phb->ioda.reserved_pe_idx == 0)
r->start += phb->ioda.m64_segsize;
- else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
+ else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
r->end -= phb->ioda.m64_segsize;
else
pr_warn(" Cannot strip M64 segment for reserved PE#%d\n",
- phb->ioda.reserved_pe);
+ phb->ioda.reserved_pe_idx);
return 0;
return -EIO;
}
-static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev,
+static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
unsigned long *pe_bitmap)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
}
}
-static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus,
- unsigned long *pe_bitmap,
- bool all)
+static int pnv_ioda1_init_m64(struct pnv_phb *phb)
+{
+ struct resource *r;
+ int index;
+
+ /*
+ * There are 16 M64 BARs, each of which has 8 segments. So
+ * there are as many M64 segments as the maximum number of
+ * PEs, which is 128.
+ */
+ for (index = 0; index < PNV_IODA1_M64_NUM; index++) {
+ unsigned long base, segsz = phb->ioda.m64_segsize;
+ int64_t rc;
+
+ base = phb->ioda.m64_base +
+ index * PNV_IODA1_M64_SEGS * segsz;
+ rc = opal_pci_set_phb_mem_window(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, index, base, 0,
+ PNV_IODA1_M64_SEGS * segsz);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn(" Error %lld setting M64 PHB#%d-BAR#%d\n",
+ rc, phb->hose->global_number, index);
+ goto fail;
+ }
+
+ rc = opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, index,
+ OPAL_ENABLE_M64_SPLIT);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn(" Error %lld enabling M64 PHB#%d-BAR#%d\n",
+ rc, phb->hose->global_number, index);
+ goto fail;
+ }
+ }
+
+ /*
+ * Exclude the segment used by the reserved PE, which
+ * is expected to be 0 or last supported PE#.
+ */
+ r = &phb->hose->mem_resources[1];
+ if (phb->ioda.reserved_pe_idx == 0)
+ r->start += phb->ioda.m64_segsize;
+ else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
+ r->end -= phb->ioda.m64_segsize;
+ else
+ WARN(1, "Wrong reserved PE#%d on PHB#%d\n",
+ phb->ioda.reserved_pe_idx, phb->hose->global_number);
+
+ return 0;
+
+fail:
+ for ( ; index >= 0; index--)
+ opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, index, OPAL_DISABLE_M64);
+
+ return -EIO;
+}
+
+static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
+ unsigned long *pe_bitmap,
+ bool all)
{
struct pci_dev *pdev;
list_for_each_entry(pdev, &bus->devices, bus_list) {
- pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap);
+ pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap);
if (all && pdev->subordinate)
- pnv_ioda2_reserve_m64_pe(pdev->subordinate,
- pe_bitmap, all);
+ pnv_ioda_reserve_m64_pe(pdev->subordinate,
+ pe_bitmap, all);
}
}
-static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
{
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
/* Root bus shouldn't use M64 */
if (pci_is_root_bus(bus))
- return IODA_INVALID_PE;
+ return NULL;
/* Allocate bitmap */
- size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+ size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
pe_alloc = kzalloc(size, GFP_KERNEL);
if (!pe_alloc) {
pr_warn("%s: Out of memory !\n",
__func__);
- return IODA_INVALID_PE;
+ return NULL;
}
/* Figure out reserved PE numbers by the PE */
- pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all);
+ pnv_ioda_reserve_m64_pe(bus, pe_alloc, all);
/*
* the current bus might not own M64 window and that's all
* contributed by its child buses. For the case, we needn't
* pick M64 dependent PE#.
*/
- if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
+ if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
kfree(pe_alloc);
- return IODA_INVALID_PE;
+ return NULL;
}
/*
*/
master_pe = NULL;
i = -1;
- while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
- phb->ioda.total_pe) {
+ while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) <
+ phb->ioda.total_pe_num) {
pe = &phb->ioda.pe_array[i];
+ phb->ioda.m64_segmap[pe->pe_number] = pe->pe_number;
if (!master_pe) {
pe->flags |= PNV_IODA_PE_MASTER;
INIT_LIST_HEAD(&pe->slaves);
pe->master = master_pe;
list_add_tail(&pe->list, &master_pe->slaves);
}
+
+ /*
+ * P7IOC supports M64DT, which helps mapping M64 segment
+ * to one particular PE#. However, PHB3 has fixed mapping
+ * between M64 segment and PE#. In order to have same logic
+ * for P7IOC and PHB3, we enforce fixed mapping between M64
+ * segment and PE# on P7IOC.
+ */
+ if (phb->type == PNV_PHB_IODA1) {
+ int64_t rc;
+
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_M64_WINDOW_TYPE,
+ pe->pe_number / PNV_IODA1_M64_SEGS,
+ pe->pe_number % PNV_IODA1_M64_SEGS);
+ if (rc != OPAL_SUCCESS)
+ pr_warn("%s: Error %lld mapping M64 for PHB#%d-PE#%d\n",
+ __func__, rc, phb->hose->global_number,
+ pe->pe_number);
+ }
}
kfree(pe_alloc);
- return master_pe->pe_number;
+ return master_pe;
}
static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
const u32 *r;
u64 pci_addr;
- /* FIXME: Support M64 for P7IOC */
- if (phb->type != PNV_PHB_IODA2) {
+ if (phb->type != PNV_PHB_IODA1 && phb->type != PNV_PHB_IODA2) {
pr_info(" Not support M64 window\n");
return;
}
hose->mem_offset[1] = res->start - pci_addr;
phb->ioda.m64_size = resource_size(res);
- phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
+ phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num;
phb->ioda.m64_base = pci_addr;
pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n",
/* Use last M64 BAR to cover M64 window */
phb->ioda.m64_bar_idx = 15;
- phb->init_m64 = pnv_ioda2_init_m64;
- phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe;
- phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+ if (phb->type == PNV_PHB_IODA1)
+ phb->init_m64 = pnv_ioda1_init_m64;
+ else
+ phb->init_m64 = pnv_ioda2_init_m64;
+ phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe;
+ phb->pick_m64_pe = pnv_ioda_pick_m64_pe;
}
static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
s64 rc;
/* Sanity check on PE number */
- if (pe_no < 0 || pe_no >= phb->ioda.total_pe)
+ if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num)
return OPAL_EEH_STOPPED_PERM_UNAVAIL;
/*
return 0;
}
-static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
-{
- struct pnv_ioda_pe *lpe;
-
- list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
- if (lpe->dma_weight < pe->dma_weight) {
- list_add_tail(&pe->dma_link, &lpe->dma_link);
- return;
- }
- }
- list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
-}
-
-static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
-{
- /* This is quite simplistic. The "base" weight of a device
- * is 10. 0 means no DMA is to be accounted for it.
- */
-
- /* If it's a bridge, no DMA */
- if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
- return 0;
-
- /* Reduce the weight of slow USB controllers */
- if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
- dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
- dev->class == PCI_CLASS_SERIAL_USB_EHCI)
- return 3;
-
- /* Increase the weight of RAID (includes Obsidian) */
- if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
- return 15;
-
- /* Default */
- return 10;
-}
-
#ifdef CONFIG_PCI_IOV
static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
{
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(dev);
struct pnv_ioda_pe *pe;
- int pe_num;
if (!pdn) {
pr_err("%s: Device tree node not associated properly\n",
if (pdn->pe_number != IODA_INVALID_PE)
return NULL;
- pe_num = pnv_ioda_alloc_pe(phb);
- if (pe_num == IODA_INVALID_PE) {
+ pe = pnv_ioda_alloc_pe(phb);
+ if (!pe) {
pr_warning("%s: Not enough PE# available, disabling device\n",
pci_name(dev));
return NULL;
*
* At some point we want to remove the PDN completely anyways
*/
- pe = &phb->ioda.pe_array[pe_num];
pci_dev_get(dev);
pdn->pcidev = dev;
- pdn->pe_number = pe_num;
+ pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
pe->pbus = NULL;
- pe->tce32_seg = -1;
pe->mve_number = -1;
pe->rid = dev->bus->number << 8 | pdn->devfn;
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
- if (pe_num)
- pnv_ioda_free_pe(phb, pe_num);
+ pnv_ioda_free_pe(pe);
pdn->pe_number = IODA_INVALID_PE;
pe->pdev = NULL;
pci_dev_put(dev);
return NULL;
}
- /* Assign a DMA weight to the device */
- pe->dma_weight = pnv_ioda_dma_weight(dev);
- if (pe->dma_weight != 0) {
- phb->ioda.dma_weight += pe->dma_weight;
- phb->ioda.dma_pe_count++;
- }
-
- /* Link the PE */
- pnv_ioda_link_pe_by_weight(phb, pe);
+ /* Put PE to the list */
+ list_add_tail(&pe->list, &phb->ioda.pe_list);
return pe;
}
}
pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
- pe->dma_weight += pnv_ioda_dma_weight(dev);
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_same_PE(dev->subordinate, pe);
}
* subordinate PCI devices and buses. The second type of PE is normally
* orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
*/
-static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
{
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
- struct pnv_ioda_pe *pe;
- int pe_num = IODA_INVALID_PE;
+ struct pnv_ioda_pe *pe = NULL;
/* Check if PE is determined by M64 */
if (phb->pick_m64_pe)
- pe_num = phb->pick_m64_pe(bus, all);
+ pe = phb->pick_m64_pe(bus, all);
/* The PE number isn't pinned by M64 */
- if (pe_num == IODA_INVALID_PE)
- pe_num = pnv_ioda_alloc_pe(phb);
+ if (!pe)
+ pe = pnv_ioda_alloc_pe(phb);
- if (pe_num == IODA_INVALID_PE) {
+ if (!pe) {
pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
__func__, pci_domain_nr(bus), bus->number);
- return;
+ return NULL;
}
- pe = &phb->ioda.pe_array[pe_num];
pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
pe->pbus = bus;
pe->pdev = NULL;
- pe->tce32_seg = -1;
pe->mve_number = -1;
pe->rid = bus->busn_res.start << 8;
- pe->dma_weight = 0;
if (all)
pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
- bus->busn_res.start, bus->busn_res.end, pe_num);
+ bus->busn_res.start, bus->busn_res.end, pe->pe_number);
else
pe_info(pe, "Secondary bus %d associated with PE#%d\n",
- bus->busn_res.start, pe_num);
+ bus->busn_res.start, pe->pe_number);
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
- if (pe_num)
- pnv_ioda_free_pe(phb, pe_num);
+ pnv_ioda_free_pe(pe);
pe->pbus = NULL;
- return;
+ return NULL;
}
/* Associate it with all child devices */
/* Put PE to the list */
list_add_tail(&pe->list, &phb->ioda.pe_list);
- /* Account for one DMA PE if at least one DMA capable device exist
- * below the bridge
- */
- if (pe->dma_weight != 0) {
- phb->ioda.dma_weight += pe->dma_weight;
- phb->ioda.dma_pe_count++;
- }
-
- /* Link the PE */
- pnv_ioda_link_pe_by_weight(phb, pe);
+ return pe;
}
static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
* same GPU get assigned the same PE.
*/
gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
- for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) {
+ for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
pe = &phb->ioda.pe_array[pe_num];
if (!pe->pdev)
continue;
rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
npu_pdn->pcidev = npu_pdev;
npu_pdn->pe_number = pe_num;
- pe->dma_weight += pnv_ioda_dma_weight(npu_pdev);
phb->ioda.pe_rmap[rid] = pe->pe_number;
/* Map the PE to this link */
pnv_ioda_deconfigure_pe(phb, pe);
- pnv_ioda_free_pe(phb, pe->pe_number);
+ pnv_ioda_free_pe(pe);
}
}
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
struct pci_dn *pdn;
struct pci_sriov *iov;
u16 num_vfs, i;
/* Release PE numbers */
if (pdn->m64_single_mode) {
for (i = 0; i < num_vfs; i++) {
- if (pdn->pe_num_map[i] != IODA_INVALID_PE)
- pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ if (pdn->pe_num_map[i] == IODA_INVALID_PE)
+ continue;
+
+ pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
+ pnv_ioda_free_pe(pe);
}
} else
bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
pe->flags = PNV_IODA_PE_VF;
pe->pbus = NULL;
pe->parent_dev = pdev;
- pe->tce32_seg = -1;
pe->mve_number = -1;
pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
pci_iov_virtfn_devfn(pdev, vf_index);
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
- if (pe_num)
- pnv_ioda_free_pe(phb, pe_num);
+ pnv_ioda_free_pe(pe);
pe->pdev = NULL;
continue;
}
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
struct pci_dn *pdn;
int ret;
u16 i;
/* Calculate available PE for required VFs */
if (pdn->m64_single_mode) {
for (i = 0; i < num_vfs; i++) {
- pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb);
- if (pdn->pe_num_map[i] == IODA_INVALID_PE) {
+ pe = pnv_ioda_alloc_pe(phb);
+ if (!pe) {
ret = -EBUSY;
goto m64_failed;
}
+
+ pdn->pe_num_map[i] = pe->pe_number;
}
} else {
mutex_lock(&phb->ioda.pe_alloc_mutex);
*pdn->pe_num_map = bitmap_find_next_zero_area(
- phb->ioda.pe_alloc, phb->ioda.total_pe,
+ phb->ioda.pe_alloc, phb->ioda.total_pe_num,
0, num_vfs, 0);
- if (*pdn->pe_num_map >= phb->ioda.total_pe) {
+ if (*pdn->pe_num_map >= phb->ioda.total_pe_num) {
mutex_unlock(&phb->ioda.pe_alloc_mutex);
dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
kfree(pdn->pe_num_map);
m64_failed:
if (pdn->m64_single_mode) {
for (i = 0; i < num_vfs; i++) {
- if (pdn->pe_num_map[i] != IODA_INVALID_PE)
- pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ if (pdn->pe_num_map[i] == IODA_INVALID_PE)
+ continue;
+
+ pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
+ pnv_ioda_free_pe(pe);
}
} else
bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
struct pnv_ioda_pe *pe;
uint64_t top;
bool bypass = false;
- struct pci_dev *linked_npu_dev;
- int i;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;;
*pdev->dev.dma_mask = dma_mask;
/* Update peer npu devices */
- if (pe->flags & PNV_IODA_PE_PEER)
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- if (!pe->peers[i])
- continue;
-
- linked_npu_dev = pe->peers[i]->pdev;
- if (dma_get_mask(&linked_npu_dev->dev) != dma_mask)
- dma_set_mask(&linked_npu_dev->dev, dma_mask);
- }
+ pnv_npu_try_dma_set_bypass(pdev, bypass);
return 0;
}
.get = pnv_tce_get,
};
-static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+#define TCE_KILL_INVAL_ALL PPC_BIT(0)
+#define TCE_KILL_INVAL_PE PPC_BIT(1)
+#define TCE_KILL_INVAL_TCE PPC_BIT(2)
+
+void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
+{
+ const unsigned long val = TCE_KILL_INVAL_ALL;
+
+ mb(); /* Ensure previous TCE table stores are visible */
+ if (rm)
+ __raw_rm_writeq(cpu_to_be64(val),
+ (__be64 __iomem *)
+ phb->ioda.tce_inval_reg_phys);
+ else
+ __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+}
+
+static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe)
{
/* 01xb - invalidate TCEs that match the specified PE# */
- unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
+ unsigned long val = TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF);
struct pnv_phb *phb = pe->phb;
- struct pnv_ioda_pe *npe;
- int i;
if (!phb->ioda.tce_inval_reg)
return;
mb(); /* Ensure above stores are visible */
__raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
-
- if (pe->flags & PNV_IODA_PE_PEER)
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- npe = pe->peers[i];
- if (!npe || npe->phb->type != PNV_PHB_NPU)
- continue;
-
- pnv_npu_tce_invalidate_entire(npe);
- }
}
static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
unsigned long start, end, inc;
/* We'll invalidate DMA address in PE scope */
- start = 0x2ull << 60;
+ start = TCE_KILL_INVAL_TCE;
start |= (pe_number & 0xFF);
end = start;
struct iommu_table_group_link *tgl;
list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
- struct pnv_ioda_pe *npe;
struct pnv_ioda_pe *pe = container_of(tgl->table_group,
struct pnv_ioda_pe, table_group);
__be64 __iomem *invalidate = rm ?
(__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
pe->phb->ioda.tce_inval_reg;
- int i;
+ if (pe->phb->type == PNV_PHB_NPU) {
+ /*
+ * The NVLink hardware does not support TCE kill
+ * per TCE entry so we have to invalidate
+ * the entire cache for it.
+ */
+ pnv_pci_ioda2_tce_invalidate_entire(pe->phb, rm);
+ continue;
+ }
pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
invalidate, tbl->it_page_shift,
index, npages);
-
- if (pe->flags & PNV_IODA_PE_PEER)
- /* Invalidate PEs using the same TCE table */
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- npe = pe->peers[i];
- if (!npe || npe->phb->type != PNV_PHB_NPU)
- continue;
-
- pnv_npu_tce_invalidate(npe, tbl, index,
- npages, rm);
- }
}
}
.free = pnv_ioda2_table_free,
};
-static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe, unsigned int base,
- unsigned int segs)
+static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
+{
+ unsigned int *weight = (unsigned int *)data;
+
+ /* This is quite simplistic. The "base" weight of a device
+ * is 10. 0 means no DMA is to be accounted for it.
+ */
+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+ return 0;
+
+ if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
+ dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
+ dev->class == PCI_CLASS_SERIAL_USB_EHCI)
+ *weight += 3;
+ else if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
+ *weight += 15;
+ else
+ *weight += 10;
+
+ return 0;
+}
+
+static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe)
+{
+ unsigned int weight = 0;
+
+ /* SRIOV VF has same DMA32 weight as its PF */
+#ifdef CONFIG_PCI_IOV
+ if ((pe->flags & PNV_IODA_PE_VF) && pe->parent_dev) {
+ pnv_pci_ioda_dev_dma_weight(pe->parent_dev, &weight);
+ return weight;
+ }
+#endif
+
+ if ((pe->flags & PNV_IODA_PE_DEV) && pe->pdev) {
+ pnv_pci_ioda_dev_dma_weight(pe->pdev, &weight);
+ } else if ((pe->flags & PNV_IODA_PE_BUS) && pe->pbus) {
+ struct pci_dev *pdev;
+
+ list_for_each_entry(pdev, &pe->pbus->devices, bus_list)
+ pnv_pci_ioda_dev_dma_weight(pdev, &weight);
+ } else if ((pe->flags & PNV_IODA_PE_BUS_ALL) && pe->pbus) {
+ pci_walk_bus(pe->pbus, pnv_pci_ioda_dev_dma_weight, &weight);
+ }
+
+ return weight;
+}
+
+static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
{
struct page *tce_mem = NULL;
struct iommu_table *tbl;
- unsigned int i;
+ unsigned int weight, total_weight = 0;
+ unsigned int tce32_segsz, base, segs, avail, i;
int64_t rc;
void *addr;
/* XXX FIXME: Handle 64-bit only DMA devices */
/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
/* XXX FIXME: Allocate multi-level tables on PHB3 */
+ weight = pnv_pci_ioda_pe_dma_weight(pe);
+ if (!weight)
+ return;
- /* We shouldn't already have a 32-bit DMA associated */
- if (WARN_ON(pe->tce32_seg >= 0))
+ pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight,
+ &total_weight);
+ segs = (weight * phb->ioda.dma32_count) / total_weight;
+ if (!segs)
+ segs = 1;
+
+ /*
+ * Allocate contiguous DMA32 segments. We begin with the expected
+ * number of segments. With one more attempt, the number of DMA32
+ * segments to be allocated is decreased by one until one segment
+ * is allocated successfully.
+ */
+ do {
+ for (base = 0; base <= phb->ioda.dma32_count - segs; base++) {
+ for (avail = 0, i = base; i < base + segs; i++) {
+ if (phb->ioda.dma32_segmap[i] ==
+ IODA_INVALID_PE)
+ avail++;
+ }
+
+ if (avail == segs)
+ goto found;
+ }
+ } while (--segs);
+
+ if (!segs) {
+ pe_warn(pe, "No available DMA32 segments\n");
return;
+ }
+found:
tbl = pnv_pci_table_alloc(phb->hose->node);
iommu_register_group(&pe->table_group, phb->hose->global_number,
pe->pe_number);
pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
/* Grab a 32-bit TCE table */
- pe->tce32_seg = base;
+ pe_info(pe, "DMA weight %d (%d), assigned (%d) %d DMA32 segments\n",
+ weight, total_weight, base, segs);
pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
- (base << 28), ((base + segs) << 28) - 1);
+ base * PNV_IODA1_DMA32_SEGSIZE,
+ (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1);
/* XXX Currently, we allocate one big contiguous table for the
* TCEs. We only really need one chunk per 256M of TCE space
* (ie per segment) but that's an optimization for later, it
* requires some added smarts with our get/put_tce implementation
+ *
+ * Each TCE page is 4KB in size and each TCE entry occupies 8
+ * bytes
*/
+ tce32_segsz = PNV_IODA1_DMA32_SEGSIZE >> (IOMMU_PAGE_SHIFT_4K - 3);
tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
- get_order(TCE32_TABLE_SIZE * segs));
+ get_order(tce32_segsz * segs));
if (!tce_mem) {
pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
goto fail;
}
addr = page_address(tce_mem);
- memset(addr, 0, TCE32_TABLE_SIZE * segs);
+ memset(addr, 0, tce32_segsz * segs);
/* Configure HW */
for (i = 0; i < segs; i++) {
rc = opal_pci_map_pe_dma_window(phb->opal_id,
pe->pe_number,
base + i, 1,
- __pa(addr) + TCE32_TABLE_SIZE * i,
- TCE32_TABLE_SIZE, 0x1000);
+ __pa(addr) + tce32_segsz * i,
+ tce32_segsz, IOMMU_PAGE_SIZE_4K);
if (rc) {
pe_err(pe, " Failed to configure 32-bit TCE table,"
" err %ld\n", rc);
}
}
+ /* Setup DMA32 segment mapping */
+ for (i = base; i < base + segs; i++)
+ phb->ioda.dma32_segmap[i] = pe->pe_number;
+
/* Setup linux iommu table */
- pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
- base << 28, IOMMU_PAGE_SHIFT_4K);
+ pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs,
+ base * PNV_IODA1_DMA32_SEGSIZE,
+ IOMMU_PAGE_SHIFT_4K);
/* OPAL variant of P7IOC SW invalidated TCEs */
if (phb->ioda.tce_inval_reg)
return;
fail:
/* XXX Failure: Try to fallback to 64-bit only ? */
- if (pe->tce32_seg >= 0)
- pe->tce32_seg = -1;
if (tce_mem)
- __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+ __free_pages(tce_mem, get_order(tce32_segsz * segs));
if (tbl) {
pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
iommu_free_table(tbl, "pnv");
pnv_pci_link_table_and_group(phb->hose->node, num,
tbl, &pe->table_group);
- pnv_pci_ioda2_tce_invalidate_entire(pe);
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
return 0;
}
if (ret)
pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
else
- pnv_pci_ioda2_tce_invalidate_entire(pe);
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
.take_ownership = pnv_ioda2_take_ownership,
.release_ownership = pnv_ioda2_release_ownership,
};
+
+static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe **ptmppe = opaque;
+ struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+ return 0;
+
+ hose = pci_bus_to_host(pdev->bus);
+ phb = hose->private_data;
+ if (phb->type != PNV_PHB_NPU)
+ return 0;
+
+ *ptmppe = &phb->ioda.pe_array[pdn->pe_number];
+
+ return 1;
+}
+
+/*
+ * This returns PE of associated NPU.
+ * This assumes that NPU is in the same IOMMU group with GPU and there is
+ * no other PEs.
+ */
+static struct pnv_ioda_pe *gpe_table_group_to_npe(
+ struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *npe = NULL;
+ int ret = iommu_group_for_each_dev(table_group->group, &npe,
+ gpe_table_group_to_npe_cb);
+
+ BUG_ON(!ret || !npe);
+
+ return npe;
+}
+
+static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
+
+ if (ret)
+ return ret;
+
+ ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl);
+ if (ret)
+ pnv_pci_ioda2_unset_window(table_group, num);
+
+ return ret;
+}
+
+static long pnv_pci_ioda2_npu_unset_window(
+ struct iommu_table_group *table_group,
+ int num)
+{
+ long ret = pnv_pci_ioda2_unset_window(table_group, num);
+
+ if (ret)
+ return ret;
+
+ return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num);
+}
+
+static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
+{
+ /*
+ * Detach NPU first as pnv_ioda2_take_ownership() will destroy
+ * the iommu_table if 32bit DMA is enabled.
+ */
+ pnv_npu_take_ownership(gpe_table_group_to_npe(table_group));
+ pnv_ioda2_take_ownership(table_group);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+ .create_table = pnv_pci_ioda2_create_table,
+ .set_window = pnv_pci_ioda2_npu_set_window,
+ .unset_window = pnv_pci_ioda2_npu_unset_window,
+ .take_ownership = pnv_ioda2_npu_take_ownership,
+ .release_ownership = pnv_ioda2_release_ownership,
+};
+
+static void pnv_pci_ioda_setup_iommu_api(void)
+{
+ struct pci_controller *hose, *tmp;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe, *gpe;
+
+ /*
+ * Now we have all PHBs discovered, time to add NPU devices to
+ * the corresponding IOMMU groups.
+ */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ if (phb->type != PNV_PHB_NPU)
+ continue;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ gpe = pnv_pci_npu_setup_iommu(pe);
+ if (gpe)
+ gpe->table_group.ops = &pnv_pci_ioda2_npu_ops;
+ }
+ }
+}
+#else /* !CONFIG_IOMMU_API */
+static void pnv_pci_ioda_setup_iommu_api(void) { };
#endif
static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
{
int64_t rc;
- /* We shouldn't already have a 32-bit DMA associated */
- if (WARN_ON(pe->tce32_seg >= 0))
- return;
-
/* TVE #1 is selected by PCI address bit 59 */
pe->tce_bypass_base = 1ull << 59;
pe->pe_number);
/* The PE will reserve all possible 32-bits space */
- pe->tce32_seg = 0;
pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
phb->ioda.m32_pci_base);
#endif
rc = pnv_pci_ioda2_setup_default_config(pe);
- if (rc) {
- if (pe->tce32_seg >= 0)
- pe->tce32_seg = -1;
+ if (rc)
return;
- }
if (pe->flags & PNV_IODA_PE_DEV)
iommu_add_device(&pe->pdev->dev);
static void pnv_ioda_setup_dma(struct pnv_phb *phb)
{
struct pci_controller *hose = phb->hose;
- unsigned int residual, remaining, segs, tw, base;
struct pnv_ioda_pe *pe;
+ unsigned int weight;
/* If we have more PE# than segments available, hand out one
* per PE until we run out and let the rest fail. If not,
* then we assign at least one segment per PE, plus more based
* on the amount of devices under that PE
*/
- if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
- residual = 0;
- else
- residual = phb->ioda.tce32_count -
- phb->ioda.dma_pe_count;
-
- pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
- hose->global_number, phb->ioda.tce32_count);
- pr_info("PCI: %d PE# for a total weight of %d\n",
- phb->ioda.dma_pe_count, phb->ioda.dma_weight);
+ pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n",
+ hose->global_number, phb->ioda.dma32_count);
pnv_pci_ioda_setup_opal_tce_kill(phb);
- /* Walk our PE list and configure their DMA segments, hand them
- * out one base segment plus any residual segments based on
- * weight
- */
- remaining = phb->ioda.tce32_count;
- tw = phb->ioda.dma_weight;
- base = 0;
- list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
- if (!pe->dma_weight)
- continue;
- if (!remaining) {
- pe_warn(pe, "No DMA32 resources available\n");
+ /* Walk our PE list and configure their DMA segments */
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ weight = pnv_pci_ioda_pe_dma_weight(pe);
+ if (!weight)
continue;
- }
- segs = 1;
- if (residual) {
- segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
- if (segs > remaining)
- segs = remaining;
- }
/*
* For IODA2 compliant PHB3, we needn't care about the weight.
* the specific PE.
*/
if (phb->type == PNV_PHB_IODA1) {
- pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
- pe->dma_weight, segs);
- pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+ pnv_pci_ioda1_setup_dma_pe(phb, pe);
} else if (phb->type == PNV_PHB_IODA2) {
pe_info(pe, "Assign DMA32 space\n");
- segs = 0;
pnv_pci_ioda2_setup_dma_pe(phb, pe);
} else if (phb->type == PNV_PHB_NPU) {
/*
* as the PHB3 TVT.
*/
}
-
- remaining -= segs;
- base += segs;
}
}
pdn->m64_single_mode = false;
total_vfs = pci_sriov_get_totalvfs(pdev);
- mul = phb->ioda.total_pe;
+ mul = phb->ioda.total_pe_num;
total_vf_bar_sz = 0;
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
}
#endif /* CONFIG_PCI_IOV */
+static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
+ struct resource *res)
+{
+ struct pnv_phb *phb = pe->phb;
+ struct pci_bus_region region;
+ int index;
+ int64_t rc;
+
+ if (!res || !res->flags || res->start > res->end)
+ return;
+
+ if (res->flags & IORESOURCE_IO) {
+ region.start = res->start - phb->ioda.io_pci_base;
+ region.end = res->end - phb->ioda.io_pci_base;
+ index = region.start / phb->ioda.io_segsize;
+
+ while (index < phb->ioda.total_pe_num &&
+ region.start <= region.end) {
+ phb->ioda.io_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: Error %lld mapping IO segment#%d to PE#%d\n",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.io_segsize;
+ index++;
+ }
+ } else if ((res->flags & IORESOURCE_MEM) &&
+ !pnv_pci_is_mem_pref_64(res->flags)) {
+ region.start = res->start -
+ phb->hose->mem_offset[0] -
+ phb->ioda.m32_pci_base;
+ region.end = res->end -
+ phb->hose->mem_offset[0] -
+ phb->ioda.m32_pci_base;
+ index = region.start / phb->ioda.m32_segsize;
+
+ while (index < phb->ioda.total_pe_num &&
+ region.start <= region.end) {
+ phb->ioda.m32_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: Error %lld mapping M32 segment#%d to PE#%d",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.m32_segsize;
+ index++;
+ }
+ }
+}
+
/*
* This function is supposed to be called on basis of PE from top
* to bottom style. So the the I/O or MMIO segment assigned to
* parent PE could be overrided by its child PEs if necessary.
*/
-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
- struct pnv_ioda_pe *pe)
+static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
{
- struct pnv_phb *phb = hose->private_data;
- struct pci_bus_region region;
- struct resource *res;
- int i, index;
- int rc;
+ struct pci_dev *pdev;
+ int i;
/*
* NOTE: We only care PCI bus based PE for now. For PCI
*/
BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
- pci_bus_for_each_resource(pe->pbus, res, i) {
- if (!res || !res->flags ||
- res->start > res->end)
- continue;
+ list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+ pnv_ioda_setup_pe_res(pe, &pdev->resource[i]);
- if (res->flags & IORESOURCE_IO) {
- region.start = res->start - phb->ioda.io_pci_base;
- region.end = res->end - phb->ioda.io_pci_base;
- index = region.start / phb->ioda.io_segsize;
-
- while (index < phb->ioda.total_pe &&
- region.start <= region.end) {
- phb->ioda.io_segmap[index] = pe->pe_number;
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
- if (rc != OPAL_SUCCESS) {
- pr_err("%s: OPAL error %d when mapping IO "
- "segment #%d to PE#%d\n",
- __func__, rc, index, pe->pe_number);
- break;
- }
-
- region.start += phb->ioda.io_segsize;
- index++;
- }
- } else if ((res->flags & IORESOURCE_MEM) &&
- !pnv_pci_is_mem_pref_64(res->flags)) {
- region.start = res->start -
- hose->mem_offset[0] -
- phb->ioda.m32_pci_base;
- region.end = res->end -
- hose->mem_offset[0] -
- phb->ioda.m32_pci_base;
- index = region.start / phb->ioda.m32_segsize;
-
- while (index < phb->ioda.total_pe &&
- region.start <= region.end) {
- phb->ioda.m32_segmap[index] = pe->pe_number;
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
- if (rc != OPAL_SUCCESS) {
- pr_err("%s: OPAL error %d when mapping M32 "
- "segment#%d to PE#%d",
- __func__, rc, index, pe->pe_number);
- break;
- }
-
- region.start += phb->ioda.m32_segsize;
- index++;
- }
- }
+ /*
+ * If the PE contains all subordinate PCI buses, the
+ * windows of the child bridges should be mapped to
+ * the PE as well.
+ */
+ if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev))
+ continue;
+ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+ pnv_ioda_setup_pe_res(pe,
+ &pdev->resource[PCI_BRIDGE_RESOURCES + i]);
}
}
continue;
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- pnv_ioda_setup_pe_seg(hose, pe);
+ pnv_ioda_setup_pe_seg(pe);
}
}
}
phb = hose->private_data;
phb->initialized = 1;
}
+
+ pnv_pci_ioda_setup_iommu_api();
}
static void pnv_pci_ioda_create_dbgfs(void)
#endif /* CONFIG_DEBUG_FS */
}
-static void pnv_npu_ioda_fixup(void)
-{
- bool enable_bypass;
- struct pci_controller *hose, *tmp;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
-
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU)
- continue;
-
- list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
- enable_bypass = dma_get_mask(&pe->pdev->dev) ==
- DMA_BIT_MASK(64);
- pnv_npu_init_dma_pe(pe);
- pnv_npu_dma_set_bypass(pe, enable_bypass);
- }
- }
-}
-
static void pnv_pci_ioda_fixup(void)
{
pnv_pci_ioda_setup_PEs();
eeh_init();
eeh_addr_cache_build();
#endif
-
- /* Link NPU IODA tables to their PCI devices. */
- pnv_npu_ioda_fixup();
}
/*
return true;
}
-static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
- u32 devfn)
-{
- return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
-}
-
static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
}
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
- .dma_bus_setup = pnv_pci_dma_bus_setup,
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_dma_bus_setup,
#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
#endif
- .enable_device_hook = pnv_pci_enable_device_hook,
- .window_alignment = pnv_pci_window_alignment,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_pci_ioda_dma_set_mask,
- .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
- .shutdown = pnv_pci_ioda_shutdown,
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .dma_set_mask = pnv_pci_ioda_dma_set_mask,
+ .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
};
+static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
+{
+ dev_err_once(&npdev->dev,
+ "%s operation unsupported for NVLink devices\n",
+ __func__);
+ return -EPERM;
+}
+
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
#endif
- .enable_device_hook = pnv_pci_enable_device_hook,
- .window_alignment = pnv_pci_window_alignment,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_npu_dma_set_mask,
- .shutdown = pnv_pci_ioda_shutdown,
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .dma_set_mask = pnv_npu_dma_set_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
};
static void __init pnv_pci_init_ioda_phb(struct device_node *np,
{
struct pci_controller *hose;
struct pnv_phb *phb;
- unsigned long size, m32map_off, pemap_off, iomap_off = 0;
+ unsigned long size, m64map_off, m32map_off, pemap_off;
+ unsigned long iomap_off = 0, dma32map_off = 0;
const __be64 *prop64;
const __be32 *prop32;
int len;
+ unsigned int segno;
u64 phb_id;
void *aux;
long rc;
pr_err(" Failed to map registers !\n");
/* Initialize more IODA stuff */
- phb->ioda.total_pe = 1;
+ phb->ioda.total_pe_num = 1;
prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
if (prop32)
- phb->ioda.total_pe = be32_to_cpup(prop32);
+ phb->ioda.total_pe_num = be32_to_cpup(prop32);
prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
if (prop32)
- phb->ioda.reserved_pe = be32_to_cpup(prop32);
+ phb->ioda.reserved_pe_idx = be32_to_cpup(prop32);
/* Parse 64-bit MMIO range */
pnv_ioda_parse_m64_window(phb);
/* FW Has already off top 64k of M32 space (MSI space) */
phb->ioda.m32_size += 0x10000;
- phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
+ phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe_num;
phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
phb->ioda.io_size = hose->pci_io_size;
- phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
+ phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num;
phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
+ /* Calculate how many 32-bit TCE segments we have */
+ phb->ioda.dma32_count = phb->ioda.m32_pci_base /
+ PNV_IODA1_DMA32_SEGSIZE;
+
/* Allocate aux data & arrays. We don't have IO ports on PHB3 */
- size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+ size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
+ sizeof(unsigned long));
+ m64map_off = size;
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]);
m32map_off = size;
- size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]);
if (phb->type == PNV_PHB_IODA1) {
iomap_off = size;
- size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]);
+ dma32map_off = size;
+ size += phb->ioda.dma32_count *
+ sizeof(phb->ioda.dma32_segmap[0]);
}
pemap_off = size;
- size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
+ size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
aux = memblock_virt_alloc(size, 0);
phb->ioda.pe_alloc = aux;
+ phb->ioda.m64_segmap = aux + m64map_off;
phb->ioda.m32_segmap = aux + m32map_off;
- if (phb->type == PNV_PHB_IODA1)
+ for (segno = 0; segno < phb->ioda.total_pe_num; segno++) {
+ phb->ioda.m64_segmap[segno] = IODA_INVALID_PE;
+ phb->ioda.m32_segmap[segno] = IODA_INVALID_PE;
+ }
+ if (phb->type == PNV_PHB_IODA1) {
phb->ioda.io_segmap = aux + iomap_off;
+ for (segno = 0; segno < phb->ioda.total_pe_num; segno++)
+ phb->ioda.io_segmap[segno] = IODA_INVALID_PE;
+
+ phb->ioda.dma32_segmap = aux + dma32map_off;
+ for (segno = 0; segno < phb->ioda.dma32_count; segno++)
+ phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE;
+ }
phb->ioda.pe_array = aux + pemap_off;
- set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
+ set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc);
- INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
INIT_LIST_HEAD(&phb->ioda.pe_list);
mutex_init(&phb->ioda.pe_list_mutex);
/* Calculate how many 32-bit TCE segments we have */
- phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
+ phb->ioda.dma32_count = phb->ioda.m32_pci_base /
+ PNV_IODA1_DMA32_SEGSIZE;
#if 0 /* We should really do that ... */
rc = opal_pci_set_phb_mem_window(opal->phb_id,
#endif
pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
- phb->ioda.total_pe, phb->ioda.reserved_pe,
+ phb->ioda.total_pe_num, phb->ioda.reserved_pe_idx,
phb->ioda.m32_size, phb->ioda.m32_segsize);
if (phb->ioda.m64_size)
pr_info(" M64: 0x%lx [segment=0x%lx]\n",
phb->freeze_pe = pnv_ioda_freeze_pe;
phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
- /* Setup RID -> PE mapping function */
- phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
-
- /* Setup TCEs */
- phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
-
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
*/
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
- if (phb->type == PNV_PHB_NPU)
+ if (phb->type == PNV_PHB_NPU) {
hose->controller_ops = pnv_npu_ioda_controller_ops;
- else
+ } else {
+ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
hose->controller_ops = pnv_pci_ioda_controller_ops;
+ }
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
/* Delay in usec */
#define PCI_RESET_DELAY_US 3000000
-#define cfg_dbg(fmt...) do { } while(0)
-//#define cfg_dbg(fmt...) printk(fmt)
-
#ifdef CONFIG_PCI_MSI
int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
struct pnv_phb *phb = pdn->phb->private_data;
u8 fstate;
__be16 pcierr;
- int pe_no;
+ unsigned int pe_no;
s64 rc;
/*
*/
pe_no = pdn->pe_number;
if (pe_no == IODA_INVALID_PE) {
- pe_no = phb->ioda.reserved_pe;
+ pe_no = phb->ioda.reserved_pe_idx;
}
/*
}
}
- cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
- (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
+ pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
+ (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
/* Clear the frozen state if applicable */
if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
return PCIBIOS_FUNC_NOT_SUPPORTED;
}
- cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
- __func__, pdn->busno, pdn->devfn, where, size, *val);
+ pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+ __func__, pdn->busno, pdn->devfn, where, size, *val);
return PCIBIOS_SUCCESSFUL;
}
struct pnv_phb *phb = pdn->phb->private_data;
u32 bdfn = (pdn->busno << 8) | pdn->devfn;
- cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
- pdn->busno, pdn->devfn, where, size, val);
+ pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+ __func__, pdn->busno, pdn->devfn, where, size, val);
switch (size) {
case 1:
opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
-#define PNV_IODA_PE_PEER (1 << 6) /* PE has peers */
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_phb;
unsigned long flags;
struct pnv_phb *phb;
-#define PNV_IODA_MAX_PEER_PES 8
- struct pnv_ioda_pe *peers[PNV_IODA_MAX_PEER_PES];
-
/* A PE can be associated with a single device or an
* entire bus (& children). In the former case, pdev
* is populated, in the later case, pbus is.
/* PE number */
unsigned int pe_number;
- /* "Weight" assigned to the PE for the sake of DMA resource
- * allocations
- */
- unsigned int dma_weight;
-
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
- int tce32_seg;
- int tce32_segcount;
struct iommu_table_group table_group;
/* 64-bit TCE bypass region */
struct list_head slaves;
/* Link in list of PE#s */
- struct list_head dma_link;
struct list_head list;
};
unsigned int is_64, struct msi_msg *msg);
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
- u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
int (*init_m64)(struct pnv_phb *phb);
void (*reserve_m64_pe)(struct pci_bus *bus,
unsigned long *pe_bitmap, bool all);
- int (*pick_m64_pe)(struct pci_bus *bus, bool all);
+ struct pnv_ioda_pe *(*pick_m64_pe)(struct pci_bus *bus, bool all);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
struct {
/* Global bridge info */
- unsigned int total_pe;
- unsigned int reserved_pe;
+ unsigned int total_pe_num;
+ unsigned int reserved_pe_idx;
/* 32-bit MMIO window */
unsigned int m32_size;
unsigned int io_segsize;
unsigned int io_pci_base;
- /* PE allocation bitmap */
- unsigned long *pe_alloc;
- /* PE allocation mutex */
+ /* PE allocation */
struct mutex pe_alloc_mutex;
+ unsigned long *pe_alloc;
+ struct pnv_ioda_pe *pe_array;
/* M32 & IO segment maps */
+ unsigned int *m64_segmap;
unsigned int *m32_segmap;
unsigned int *io_segmap;
- struct pnv_ioda_pe *pe_array;
+
+ /* DMA32 segment maps - IODA1 only */
+ unsigned int dma32_count;
+ unsigned int *dma32_segmap;
/* IRQ chip */
int irq_chip_init;
*/
unsigned char pe_rmap[0x10000];
- /* 32-bit TCE tables allocation */
- unsigned long tce32_count;
-
- /* Total "weight" for the sake of DMA resources
- * allocation
- */
- unsigned int dma_weight;
- unsigned int dma_pe_count;
-
- /* Sorted list of used PE's, sorted at
- * boot for resource allocation purposes
- */
- struct list_head pe_dma_list;
-
/* TCE cache invalidate registers (physical and
* remapped)
*/
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
+extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+ const char *fmt, ...);
+#define pe_err(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
+#define pe_warn(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define pe_info(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
+
/* Nvlink functions */
-extern void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe);
-extern void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
- struct iommu_table *tbl,
- unsigned long index,
- unsigned long npages,
- bool rm);
-extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe);
-extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe);
-extern int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enabled);
-extern int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask);
+extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
+extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
+extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe);
+extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+ struct iommu_table *tbl);
+extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num);
+extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe);
+extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe);
#endif /* __POWERNV_PCI_H */
if (!of_flat_dt_is_compatible(root, "ibm,powernv"))
return 0;
- hpte_init_native();
+ if (IS_ENABLED(CONFIG_PPC_RADIX_MMU) && radix_enabled())
+ radix_init_native();
+ else if (IS_ENABLED(CONFIG_PPC_STD_MMU_64))
+ hpte_init_native();
if (firmware_has_feature(FW_FEATURE_OPAL))
pnv_setup_machdep_opal();
vflags &= ~HPTE_V_SECONDARY;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
+ hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags;
spin_lock_irqsave(&ps3_htab_lock, flags);
static int __init setup_areas(struct spu *spu)
{
struct table {char* name; unsigned long addr; unsigned long size;};
- static const unsigned long shadow_flags = _PAGE_NO_CACHE | 3;
+ unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO));
spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr,
sizeof(struct spe_shadow),
}
spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
- LS_SIZE, _PAGE_NO_CACHE);
+ LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
if (!spu->local_store) {
pr_debug("%s:%d: ioremap local_store failed\n",
return new_prop;
}
+static void dlpar_update_drconf_property(struct device_node *dn,
+ struct property *prop)
+{
+ struct of_drconf_cell *lmbs;
+ u32 num_lmbs, *p;
+ int i;
+
+ /* Convert the property back to BE */
+ p = prop->value;
+ num_lmbs = *p;
+ *p = cpu_to_be32(*p);
+ p++;
+
+ lmbs = (struct of_drconf_cell *)p;
+ for (i = 0; i < num_lmbs; i++) {
+ lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
+ lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+ lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
+ }
+
+ rtas_hp_event = true;
+ of_update_property(dn, prop);
+ rtas_hp_event = false;
+}
+
+static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb)
+{
+ struct device_node *dn;
+ struct property *prop;
+ struct of_drconf_cell *lmbs;
+ u32 *p, num_lmbs;
+ int i;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return -ENODEV;
+
+ prop = dlpar_clone_drconf_property(dn);
+ if (!prop) {
+ of_node_put(dn);
+ return -ENODEV;
+ }
+
+ p = prop->value;
+ num_lmbs = *p++;
+ lmbs = (struct of_drconf_cell *)p;
+
+ for (i = 0; i < num_lmbs; i++) {
+ if (lmbs[i].drc_index == lmb->drc_index) {
+ lmbs[i].flags = lmb->flags;
+ lmbs[i].aa_index = lmb->aa_index;
+
+ dlpar_update_drconf_property(dn, prop);
+ break;
+ }
+ }
+
+ of_node_put(dn);
+ return 0;
+}
+
+static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb)
+{
+ struct device_node *parent, *lmb_node, *dr_node;
+ const u32 *lmb_assoc;
+ const u32 *assoc_arrays;
+ u32 aa_index;
+ int aa_arrays, aa_array_entries, aa_array_sz;
+ int i;
+
+ parent = of_find_node_by_path("/");
+ if (!parent)
+ return -ENODEV;
+
+ lmb_node = dlpar_configure_connector(cpu_to_be32(lmb->drc_index),
+ parent);
+ of_node_put(parent);
+ if (!lmb_node)
+ return -EINVAL;
+
+ lmb_assoc = of_get_property(lmb_node, "ibm,associativity", NULL);
+ if (!lmb_assoc) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dr_node) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ assoc_arrays = of_get_property(dr_node,
+ "ibm,associativity-lookup-arrays",
+ NULL);
+ of_node_put(dr_node);
+ if (!assoc_arrays) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ /* The ibm,associativity-lookup-arrays property is defined to be
+ * a 32-bit value specifying the number of associativity arrays
+ * followed by a 32-bitvalue specifying the number of entries per
+ * array, followed by the associativity arrays.
+ */
+ aa_arrays = be32_to_cpu(assoc_arrays[0]);
+ aa_array_entries = be32_to_cpu(assoc_arrays[1]);
+ aa_array_sz = aa_array_entries * sizeof(u32);
+
+ aa_index = -1;
+ for (i = 0; i < aa_arrays; i++) {
+ int indx = (i * aa_array_entries) + 2;
+
+ if (memcmp(&assoc_arrays[indx], &lmb_assoc[1], aa_array_sz))
+ continue;
+
+ aa_index = i;
+ break;
+ }
+
+ dlpar_free_cc_nodes(lmb_node);
+ return aa_index;
+}
+
+static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb)
+{
+ int aa_index;
+
+ lmb->flags |= DRCONF_MEM_ASSIGNED;
+
+ aa_index = lookup_lmb_associativity_index(lmb);
+ if (aa_index < 0) {
+ pr_err("Couldn't find associativity index for drc index %x\n",
+ lmb->drc_index);
+ return aa_index;
+ }
+
+ lmb->aa_index = aa_index;
+ return dlpar_update_device_tree_lmb(lmb);
+}
+
+static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
+{
+ lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+ lmb->aa_index = 0xffffffff;
+ return dlpar_update_device_tree_lmb(lmb);
+}
+
static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
{
unsigned long section_nr;
memblock_remove(lmb->base_addr, block_sz);
dlpar_release_drc(lmb->drc_index);
+ dlpar_remove_device_tree_lmb(lmb);
- lmb->flags &= ~DRCONF_MEM_ASSIGNED;
return 0;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
-static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+static int dlpar_add_lmb_memory(struct of_drconf_cell *lmb)
{
struct memory_block *mem_block;
unsigned long block_sz;
int nid, rc;
- if (lmb->flags & DRCONF_MEM_ASSIGNED)
- return -EINVAL;
-
block_sz = memory_block_size_bytes();
- rc = dlpar_acquire_drc(lmb->drc_index);
- if (rc)
- return rc;
-
/* Find the node id for this address */
nid = memory_add_physaddr_to_nid(lmb->base_addr);
/* Add the memory */
rc = add_memory(nid, lmb->base_addr, block_sz);
- if (rc) {
- dlpar_release_drc(lmb->drc_index);
+ if (rc)
return rc;
- }
/* Register this block of memory */
rc = memblock_add(lmb->base_addr, block_sz);
if (rc) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_release_drc(lmb->drc_index);
return rc;
}
mem_block = lmb_to_memblock(lmb);
if (!mem_block) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_release_drc(lmb->drc_index);
return -EINVAL;
}
put_device(&mem_block->dev);
if (rc) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_release_drc(lmb->drc_index);
return rc;
}
return 0;
}
+static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+{
+ int rc;
+
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
+ return -EINVAL;
+
+ rc = dlpar_acquire_drc(lmb->drc_index);
+ if (rc)
+ return rc;
+
+ rc = dlpar_add_device_tree_lmb(lmb);
+ if (rc) {
+ pr_err("Couldn't update device tree for drc index %x\n",
+ lmb->drc_index);
+ dlpar_release_drc(lmb->drc_index);
+ return rc;
+ }
+
+ rc = dlpar_add_lmb_memory(lmb);
+ if (rc) {
+ dlpar_remove_device_tree_lmb(lmb);
+ dlpar_release_drc(lmb->drc_index);
+ }
+
+ return rc;
+}
+
static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
{
struct of_drconf_cell *lmbs;
return rc;
}
-static void dlpar_update_drconf_property(struct device_node *dn,
- struct property *prop)
-{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i;
-
- /* Convert the property back to BE */
- p = prop->value;
- num_lmbs = *p;
- *p = cpu_to_be32(*p);
- p++;
-
- lmbs = (struct of_drconf_cell *)p;
- for (i = 0; i < num_lmbs; i++) {
- lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
- lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
- lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
- }
-
- rtas_hp_event = true;
- of_update_property(dn, prop);
- rtas_hp_event = false;
-}
-
int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
{
struct device_node *dn;
break;
}
- if (rc)
- dlpar_free_drconf_property(prop);
- else
- dlpar_update_drconf_property(dn, prop);
+ dlpar_free_drconf_property(prop);
dlpar_memory_out:
of_node_put(dn);
static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
struct ddw_query_response *query)
{
- struct eeh_dev *edev;
+ struct device_node *dn;
+ struct pci_dn *pdn;
u32 cfg_addr;
u64 buid;
int ret;
* Retrieve them from the pci device, not the node with the
* dma-window property
*/
- edev = pci_dev_to_eeh_dev(dev);
- cfg_addr = edev->config_addr;
- if (edev->pe_config_addr)
- cfg_addr = edev->pe_config_addr;
- buid = edev->phb->buid;
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = (pdn->busno << 8) | pdn->devfn;
ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
cfg_addr, BUID_HI(buid), BUID_LO(buid));
struct ddw_create_response *create, int page_shift,
int window_shift)
{
- struct eeh_dev *edev;
+ struct device_node *dn;
+ struct pci_dn *pdn;
u32 cfg_addr;
u64 buid;
int ret;
* Retrieve them from the pci device, not the node with the
* dma-window property
*/
- edev = pci_dev_to_eeh_dev(dev);
- cfg_addr = edev->config_addr;
- if (edev->pe_config_addr)
- cfg_addr = edev->pe_config_addr;
- buid = edev->phb->buid;
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = (pdn->busno << 8) | pdn->devfn;
do {
/* extra outputs are LIOBN and dma-addr (hi, lo) */
"%lx failed with %ld\n", cpu, hwcpu, addr, ret);
return;
}
+
+#ifdef CONFIG_PPC_STD_MMU_64
/*
* PAPR says this feature is SLB-Buffer but firmware never
* reports that. All SPLPAR support SLB shadow buffer.
*/
- addr = __pa(paca[cpu].slb_shadow_ptr);
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ addr = __pa(paca[cpu].slb_shadow_ptr);
ret = register_slb_shadow(hwcpu, addr);
if (ret)
pr_err("WARNING: SLB shadow buffer registration for "
"cpu %d (hw %d) of area %lx failed with %ld\n",
cpu, hwcpu, addr, ret);
}
+#endif /* CONFIG_PPC_STD_MMU_64 */
/*
* Register dispatch trace log, if one has been allocated.
}
}
+#ifdef CONFIG_PPC_STD_MMU_64
+
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long vpn, unsigned long pa,
unsigned long rflags, unsigned long vflags,
hpte_group, vpn, pa, rflags, vflags, psize);
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
if (!(vflags & HPTE_V_BOLTED))
pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
/* Exact = 0 */
flags = 0;
- /* Make pHyp happy */
- if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
- hpte_r &= ~HPTE_R_M;
-
if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
flags |= H_COALESCE_CAND;
void arch_free_page(struct page *page, int order)
{
+ if (radix_enabled())
+ return;
if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO))
return;
}
EXPORT_SYMBOL(arch_free_page);
-#endif
+#endif /* CONFIG_PPC_SMLPAR */
+#endif /* CONFIG_PPC_STD_MMU_64 */
#ifdef CONFIG_TRACEPOINTS
#ifdef HAVE_JUMP_LABEL
seq_printf(m, "shared_processor_mode=%d\n",
lppaca_shared_proc(get_lppaca()));
+#ifdef CONFIG_PPC_STD_MMU_64
seq_printf(m, "slb_size=%d\n", mmu_slb_size);
-
+#endif
parse_em_data(m);
return 0;
break;
case 0x80000000:
- prop = of_find_property(dn, prop_name, NULL);
- of_remove_property(dn, prop);
+ of_remove_property(dn, of_find_property(dn,
+ prop_name, NULL));
prop = NULL;
break;
memset(&counts, 0, sizeof(struct msi_counts));
/* Work out how many devices we have below this PE */
- traverse_pci_devices(pe_dn, count_non_bridge_devices, &counts);
+ pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, &counts);
if (counts.num_devices == 0) {
pr_err("rtas_msi: found 0 devices under PE for %s\n",
/* else, we have some more calculating to do */
counts.requestor = pci_device_to_OF_node(dev);
counts.request = request;
- traverse_pci_devices(pe_dn, count_spare_msis, &counts);
+ pci_traverse_device_nodes(pe_dn, count_spare_msis, &counts);
/* If the quota isn't an integer multiple of the total, we can
* use the remainder as spare MSIs for anyone that wants them. */
#include "pseries.h"
-static struct pci_bus *
-find_bus_among_children(struct pci_bus *bus,
- struct device_node *dn)
-{
- struct pci_bus *child = NULL;
- struct pci_bus *tmp;
- struct device_node *busdn;
-
- busdn = pci_bus_to_OF_node(bus);
- if (busdn == dn)
- return bus;
-
- list_for_each_entry(tmp, &bus->children, node) {
- child = find_bus_among_children(tmp, dn);
- if (child)
- break;
- };
- return child;
-}
-
-struct pci_bus *
-pcibios_find_pci_bus(struct device_node *dn)
-{
- struct pci_dn *pdn = dn->data;
-
- if (!pdn || !pdn->phb || !pdn->phb->bus)
- return NULL;
-
- return find_bus_among_children(pdn->phb->bus, dn);
-}
-EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
-
struct pci_controller *init_phb_dynamic(struct device_node *dn)
{
struct pci_controller *phb;
{
struct device_node *np;
char *tmp;
- struct property *prop;
buf = parse_node(buf, bufsize, &np);
if (!np)
if (strlen(buf) == 0)
return -EINVAL;
- prop = of_find_property(np, buf, NULL);
-
- return of_remove_property(np, prop);
+ return of_remove_property(np, of_find_property(np, buf, NULL));
}
static int do_update_property(char *buf, size_t bufsize)
for_each_node_by_name(np, "interrupt-controller") {
typep = of_get_property(np, "compatible", NULL);
+ if (!typep)
+ continue;
if (strstr(typep, "open-pic")) {
pSeries_mpic_node = of_node_get(np);
ppc_md.init_IRQ = pseries_mpic_init_IRQ;
pdn = parent ? PCI_DN(parent) : NULL;
if (pdn) {
/* Create pdn and EEH device */
- update_dn_pci_info(np, pdn->phb);
+ pci_add_device_node_info(pdn->phb, np);
eeh_dev_init(PCI_DN(np), pdn->phb);
}
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
#include <asm/machdep.h>
+#include <asm/mpc85xx.h>
#include <asm/disassemble.h>
#include <asm/ppc-opcode.h>
#include <sysdev/fsl_soc.h>
u8 hdr_type, progif;
struct device_node *dev;
struct ccsr_pci __iomem *pci;
+ u16 temp;
+ u32 svr = mfspr(SPRN_SVR);
dev = pdev->dev.of_node;
PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS;
if (fsl_pcie_check_link(hose))
hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK;
+ } else {
+ /*
+ * Set PBFR(PCI Bus Function Register)[10] = 1 to
+ * disable the combining of crossing cacheline
+ * boundary requests into one burst transaction.
+ * PCI-X operation is not affected.
+ * Fix erratum PCI 5 on MPC8548
+ */
+#define PCI_BUS_FUNCTION 0x44
+#define PCI_BUS_FUNCTION_MDS 0x400 /* Master disable streaming */
+ if (((SVR_SOC_VER(svr) == SVR_8543) ||
+ (SVR_SOC_VER(svr) == SVR_8545) ||
+ (SVR_SOC_VER(svr) == SVR_8547) ||
+ (SVR_SOC_VER(svr) == SVR_8548)) &&
+ !early_find_capability(hose, 0, 0, PCI_CAP_ID_PCIX)) {
+ early_read_config_word(hose, 0, 0,
+ PCI_BUS_FUNCTION, &temp);
+ temp |= PCI_BUS_FUNCTION_MDS;
+ early_write_config_word(hose, 0, 0,
+ PCI_BUS_FUNCTION, temp);
+ }
}
printk(KERN_INFO "Found FSL PCI host bridge at 0x%016llx. "
static int mpic_init_sys(void)
{
+ int rc;
+
register_syscore_ops(&mpic_syscore_ops);
- subsys_system_register(&mpic_subsys, NULL);
+ rc = subsys_system_register(&mpic_subsys, NULL);
+ if (rc) {
+ unregister_syscore_ops(&mpic_syscore_ops);
+ pr_err("mpic: Failed to register subsystem!\n");
+ return rc;
+ }
return 0;
}
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-obj-y += xmon.o nonstdio.o
+obj-y += xmon.o nonstdio.o spr_access.o
ifdef CONFIG_XMON_DISASSEMBLY
obj-y += ppc-dis.o ppc-opc.o
--- /dev/null
+#include <asm/ppc_asm.h>
+
+/* unsigned long xmon_mfspr(sprn, default_value) */
+_GLOBAL(xmon_mfspr)
+ ld r5, .Lmfspr_table@got(r2)
+ b xmon_mxspr
+
+/* void xmon_mtspr(sprn, new_value) */
+_GLOBAL(xmon_mtspr)
+ ld r5, .Lmtspr_table@got(r2)
+ b xmon_mxspr
+
+/*
+ * r3 = sprn
+ * r4 = default or new value
+ * r5 = table base
+ */
+xmon_mxspr:
+ /*
+ * To index into the table of mxsprs we need:
+ * i = (sprn & 0x3ff) * 8
+ * or using rwlinm:
+ * i = (sprn << 3) & (0x3ff << 3)
+ */
+ rlwinm r3, r3, 3, 0x3ff << 3
+ add r5, r5, r3
+ mtctr r5
+ mr r3, r4 /* put default_value in r3 for mfspr */
+ bctr
+
+.Lmfspr_table:
+ spr = 0
+ .rept 1024
+ mfspr r3, spr
+ blr
+ spr = spr + 1
+ .endr
+
+.Lmtspr_table:
+ spr = 0
+ .rept 1024
+ mtspr spr, r4
+ blr
+ spr = spr + 1
+ .endr
static long bus_error_jmp[JMP_BUF_LEN];
static int catch_memory_errors;
+static int catch_spr_faults;
static long *xmon_fault_jmp[NR_CPUS];
/* Breakpoint stuff */
static void flush_input(void);
static int inchar(void);
static void take_input(char *);
-static unsigned long read_spr(int);
+static int read_spr(int, unsigned long *);
static void write_spr(int, unsigned long);
static void super_regs(void);
static void remove_bpts(void);
sdi # disassemble spu local store for spu # (in hex)\n"
#endif
" S print special registers\n\
+ Sa print all SPRs\n\
+ Sr # read SPR #\n\
+ Sw #v write v to SPR #\n\
t print backtrace\n\
x exit monitor and recover\n\
X exit monitor and don't recover\n"
#ifdef CONFIG_SMP
cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
+ /*
+ * We catch SPR read/write faults here because the 0x700, 0xf60
+ * etc. handlers don't call debugger_fault_handler().
+ */
+ if (catch_spr_faults)
+ longjmp(bus_error_jmp, 1);
get_output_lock();
excprint(regs);
printf("cpu 0x%x: Exception %lx %s in xmon, "
catch_memory_errors = 0;
}
-static unsigned long
-read_spr(int n)
+extern unsigned long xmon_mfspr(int spr, unsigned long default_value);
+extern void xmon_mtspr(int spr, unsigned long value);
+
+static int
+read_spr(int n, unsigned long *vp)
{
- unsigned int instrs[2];
- unsigned long (*code)(void);
unsigned long ret = -1UL;
-#ifdef CONFIG_PPC64
- unsigned long opd[3];
-
- opd[0] = (unsigned long)instrs;
- opd[1] = 0;
- opd[2] = 0;
- code = (unsigned long (*)(void)) opd;
-#else
- code = (unsigned long (*)(void)) instrs;
-#endif
-
- /* mfspr r3,n; blr */
- instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
- instrs[1] = 0x4e800020;
- store_inst(instrs);
- store_inst(instrs+1);
+ int ok = 0;
if (setjmp(bus_error_jmp) == 0) {
- catch_memory_errors = 1;
+ catch_spr_faults = 1;
sync();
- ret = code();
+ ret = xmon_mfspr(n, *vp);
sync();
- /* wait a little while to see if we get a machine check */
- __delay(200);
- n = size;
+ *vp = ret;
+ ok = 1;
}
+ catch_spr_faults = 0;
- return ret;
+ return ok;
}
static void
write_spr(int n, unsigned long val)
{
- unsigned int instrs[2];
- unsigned long (*code)(unsigned long);
-#ifdef CONFIG_PPC64
- unsigned long opd[3];
-
- opd[0] = (unsigned long)instrs;
- opd[1] = 0;
- opd[2] = 0;
- code = (unsigned long (*)(unsigned long)) opd;
-#else
- code = (unsigned long (*)(unsigned long)) instrs;
-#endif
-
- instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
- instrs[1] = 0x4e800020;
- store_inst(instrs);
- store_inst(instrs+1);
-
if (setjmp(bus_error_jmp) == 0) {
- catch_memory_errors = 1;
+ catch_spr_faults = 1;
sync();
- code(val);
+ xmon_mtspr(n, val);
sync();
- /* wait a little while to see if we get a machine check */
- __delay(200);
- n = size;
+ } else {
+ printf("SPR 0x%03x (%4d) Faulted during write\n", n, n);
}
+ catch_spr_faults = 0;
}
static unsigned long regno;
extern char exc_prolog;
extern char dec_exc;
+static void dump_one_spr(int spr, bool show_unimplemented)
+{
+ unsigned long val;
+
+ val = 0xdeadbeef;
+ if (!read_spr(spr, &val)) {
+ printf("SPR 0x%03x (%4d) Faulted during read\n", spr, spr);
+ return;
+ }
+
+ if (val == 0xdeadbeef) {
+ /* Looks like read was a nop, confirm */
+ val = 0x0badcafe;
+ if (!read_spr(spr, &val)) {
+ printf("SPR 0x%03x (%4d) Faulted during read\n", spr, spr);
+ return;
+ }
+
+ if (val == 0x0badcafe) {
+ if (show_unimplemented)
+ printf("SPR 0x%03x (%4d) Unimplemented\n", spr, spr);
+ return;
+ }
+ }
+
+ printf("SPR 0x%03x (%4d) = 0x%lx\n", spr, spr, val);
+}
+
static void super_regs(void)
{
int cmd;
- unsigned long val;
+ int spr;
cmd = skipbl();
- if (cmd == '\n') {
+
+ switch (cmd) {
+ case '\n': {
unsigned long sp, toc;
asm("mr %0,1" : "=r" (sp) :);
asm("mr %0,2" : "=r" (toc) :);
mfspr(SPRN_DEC), mfspr(SPRN_SPRG2));
printf("sp = "REG" sprg3= "REG"\n", sp, mfspr(SPRN_SPRG3));
printf("toc = "REG" dar = "REG"\n", toc, mfspr(SPRN_DAR));
-
return;
}
-
- scanhex(®no);
- switch (cmd) {
- case 'w':
- val = read_spr(regno);
+ case 'w': {
+ unsigned long val;
+ scanhex(®no);
+ val = 0;
+ read_spr(regno, &val);
scanhex(&val);
write_spr(regno, val);
- /* fall through */
+ dump_one_spr(regno, true);
+ break;
+ }
case 'r':
- printf("spr %lx = %lx\n", regno, read_spr(regno));
+ scanhex(®no);
+ dump_one_spr(regno, true);
+ break;
+ case 'a':
+ /* dump ALL SPRs */
+ for (spr = 1; spr < 1024; ++spr)
+ dump_one_spr(spr, false);
break;
}
+
scannl();
}
printf("%s", after);
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_STD_MMU_64
void dump_segments(void)
{
int i;
return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
}
+#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
return MACHINE_HAS_HPAGE ? 1 : 0;
return pte_val(pte) & _PAGE_PMD_HUGE;
}
-#define has_transparent_hugepage() 1
-
static inline pmd_t pmd_mkold(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define has_transparent_hugepage() 1
#define pmd_trans_huge pmd_huge_page
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
cpumask_set_cpu(best_cpu, &node_2_cpu_mask[node]);
cpu_2_node[best_cpu] = node;
cpumask_clear_cpu(best_cpu, &unbound_cpus);
- node = next_node(node, default_nodes);
- if (node == MAX_NUMNODES)
- node = first_node(default_nodes);
+ node = next_node_in(node, default_nodes);
}
/* Print out node assignments and set defaults for disabled cpus */
static __init int setup_hugepagesz(char *opt)
{
+ int rc;
+
if (!saw_hugepagesz) {
saw_hugepagesz = true;
memset(huge_shift, 0, sizeof(huge_shift));
}
- return __setup_hugepagesz(memparse(opt, NULL));
+ rc = __setup_hugepagesz(memparse(opt, NULL));
+ if (rc)
+ hugetlb_bad_size();
+ return rc;
}
__setup("hugepagesz=", setup_hugepagesz);
* Hacky direct set to avoid unnecessary
* lock take/release for EVERY page here.
*/
- p->_count.counter = 0;
+ p->_refcount.counter = 0;
p->_mapcount.counter = -1;
}
init_page_count(page);
#
ENTRY(sha1_x8_avx2)
- push RSP_SAVE
+ # save callee-saved clobbered registers to comply with C function ABI
+ push %r12
+ push %r13
+ push %r14
+ push %r15
#save rsp
mov %rsp, RSP_SAVE
## Postamble
mov RSP_SAVE, %rsp
- pop RSP_SAVE
+
+ # restore callee-saved clobbered registers
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
ret
ENDPROC(sha1_x8_avx2)
return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
}
+#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
return boot_cpu_has(X86_FEATURE_PSE);
} else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
} else {
+ hugetlb_bad_size();
printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
ps >> 20);
return 0;
if (early_cpu_to_node(i) != NUMA_NO_NODE)
continue;
numa_set_node(i, rr);
- rr = next_node(rr, node_online_map);
- if (rr == MAX_NUMNODES)
- rr = first_node(node_online_map);
+ rr = next_node_in(rr, node_online_map);
}
}
kfree(d);
}
-/**
- * amba_device_add - add a previously allocated AMBA device structure
- * @dev: AMBA device allocated by amba_device_alloc
- * @parent: resource parent for this devices resources
- *
- * Claim the resource, and read the device cell ID if not already
- * initialized. Register the AMBA device with the Linux device
- * manager.
- */
-int amba_device_add(struct amba_device *dev, struct resource *parent)
+static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
{
u32 size;
void __iomem *tmp;
goto err_release;
}
+ ret = dev_pm_domain_attach(&dev->dev, true);
+ if (ret == -EPROBE_DEFER) {
+ iounmap(tmp);
+ goto err_release;
+ }
+
ret = amba_get_enable_pclk(dev);
if (ret == 0) {
u32 pid, cid;
}
iounmap(tmp);
+ dev_pm_domain_detach(&dev->dev, true);
if (ret)
goto err_release;
err_out:
return ret;
}
+
+/*
+ * Registration of AMBA device require reading its pid and cid registers.
+ * To do this, the device must be turned on (if it is a part of power domain)
+ * and have clocks enabled. However in some cases those resources might not be
+ * yet available. Returning EPROBE_DEFER is not a solution in such case,
+ * because callers don't handle this special error code. Instead such devices
+ * are added to the special list and their registration is retried from
+ * periodic worker, until all resources are available and registration succeeds.
+ */
+struct deferred_device {
+ struct amba_device *dev;
+ struct resource *parent;
+ struct list_head node;
+};
+
+static LIST_HEAD(deferred_devices);
+static DEFINE_MUTEX(deferred_devices_lock);
+
+static void amba_deferred_retry_func(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(deferred_retry_work, amba_deferred_retry_func);
+
+#define DEFERRED_DEVICE_TIMEOUT (msecs_to_jiffies(5 * 1000))
+
+static void amba_deferred_retry_func(struct work_struct *dummy)
+{
+ struct deferred_device *ddev, *tmp;
+
+ mutex_lock(&deferred_devices_lock);
+
+ list_for_each_entry_safe(ddev, tmp, &deferred_devices, node) {
+ int ret = amba_device_try_add(ddev->dev, ddev->parent);
+
+ if (ret == -EPROBE_DEFER)
+ continue;
+
+ list_del_init(&ddev->node);
+ kfree(ddev);
+ }
+
+ if (!list_empty(&deferred_devices))
+ schedule_delayed_work(&deferred_retry_work,
+ DEFERRED_DEVICE_TIMEOUT);
+
+ mutex_unlock(&deferred_devices_lock);
+}
+
+/**
+ * amba_device_add - add a previously allocated AMBA device structure
+ * @dev: AMBA device allocated by amba_device_alloc
+ * @parent: resource parent for this devices resources
+ *
+ * Claim the resource, and read the device cell ID if not already
+ * initialized. Register the AMBA device with the Linux device
+ * manager.
+ */
+int amba_device_add(struct amba_device *dev, struct resource *parent)
+{
+ int ret = amba_device_try_add(dev, parent);
+
+ if (ret == -EPROBE_DEFER) {
+ struct deferred_device *ddev;
+
+ ddev = kmalloc(sizeof(*ddev), GFP_KERNEL);
+ if (!ddev)
+ return -ENOMEM;
+
+ ddev->dev = dev;
+ ddev->parent = parent;
+ ret = 0;
+
+ mutex_lock(&deferred_devices_lock);
+
+ if (list_empty(&deferred_devices))
+ schedule_delayed_work(&deferred_retry_work,
+ DEFERRED_DEVICE_TIMEOUT);
+ list_add_tail(&ddev->node, &deferred_devices);
+
+ mutex_unlock(&deferred_devices_lock);
+ }
+ return ret;
+}
EXPORT_SYMBOL_GPL(amba_device_add);
static struct amba_device *
* discussion.
*
* We cannot use get_page in the workaround, because it insists on a
- * positive page count as a precondition. So we use _count directly.
+ * positive page count as a precondition. So we use _refcount directly.
*/
static void
bio_pageinc(struct bio *bio)
_set_L3CR(save_l3cr);
/* Restore userland MMU context */
- switch_mmu_context(NULL, current->active_mm);
+ switch_mmu_context(NULL, current->active_mm, NULL);
#ifdef DEBUG_FREQ
printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1));
if (!overlay_data || kfree_table_add(kft, overlay_data))
return NULL;
- of_fdt_unflatten_tree(overlay_data, &overlay);
+ of_fdt_unflatten_tree(overlay_data, NULL, &overlay);
if (!overlay) {
pr_warn("%s: Unfattening overlay tree failed\n", __func__);
return NULL;
config OMAP_SSI
tristate "OMAP SSI hardware driver"
- depends on HSI && OF && (ARCH_OMAP3 || (ARM && COMPILE_TEST))
+ depends on HSI && OF && ARM && COMMON_CLK
+ depends on ARCH_OMAP3 || COMPILE_TEST
---help---
SSI is a legacy version of HSI. It is usually used to connect
an application engine with a cellular modem.
If you say Y here, you will enable the OMAP SSI hardware driver.
If unsure, say N.
-
-config OMAP_SSI_PORT
- tristate
- default m if OMAP_SSI=m
- default y if OMAP_SSI=y
# Makefile for HSI controllers drivers
#
-obj-$(CONFIG_OMAP_SSI) += omap_ssi.o
-obj-$(CONFIG_OMAP_SSI_PORT) += omap_ssi_port.o
+omap_ssi-objs += omap_ssi_core.o omap_ssi_port.o
+obj-$(CONFIG_OMAP_SSI) += omap_ssi.o
+++ /dev/null
-/* OMAP SSI driver.
- *
- * Copyright (C) 2010 Nokia Corporation. All rights reserved.
- * Copyright (C) 2014 Sebastian Reichel <sre@kernel.org>
- *
- * Contact: Carlos Chinea <carlos.chinea@nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
-
-#include <linux/compiler.h>
-#include <linux/err.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-#include <linux/gpio.h>
-#include <linux/clk.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/dma-mapping.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/scatterlist.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/debugfs.h>
-#include <linux/pm_runtime.h>
-#include <linux/of_platform.h>
-#include <linux/hsi/hsi.h>
-#include <linux/idr.h>
-
-#include "omap_ssi_regs.h"
-#include "omap_ssi.h"
-
-/* For automatically allocated device IDs */
-static DEFINE_IDA(platform_omap_ssi_ida);
-
-#ifdef CONFIG_DEBUG_FS
-static int ssi_debug_show(struct seq_file *m, void *p __maybe_unused)
-{
- struct hsi_controller *ssi = m->private;
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
- void __iomem *sys = omap_ssi->sys;
-
- pm_runtime_get_sync(ssi->device.parent);
- seq_printf(m, "REVISION\t: 0x%08x\n", readl(sys + SSI_REVISION_REG));
- seq_printf(m, "SYSCONFIG\t: 0x%08x\n", readl(sys + SSI_SYSCONFIG_REG));
- seq_printf(m, "SYSSTATUS\t: 0x%08x\n", readl(sys + SSI_SYSSTATUS_REG));
- pm_runtime_put_sync(ssi->device.parent);
-
- return 0;
-}
-
-static int ssi_debug_gdd_show(struct seq_file *m, void *p __maybe_unused)
-{
- struct hsi_controller *ssi = m->private;
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
- void __iomem *gdd = omap_ssi->gdd;
- void __iomem *sys = omap_ssi->sys;
- int lch;
-
- pm_runtime_get_sync(ssi->device.parent);
-
- seq_printf(m, "GDD_MPU_STATUS\t: 0x%08x\n",
- readl(sys + SSI_GDD_MPU_IRQ_STATUS_REG));
- seq_printf(m, "GDD_MPU_ENABLE\t: 0x%08x\n\n",
- readl(sys + SSI_GDD_MPU_IRQ_ENABLE_REG));
- seq_printf(m, "HW_ID\t\t: 0x%08x\n",
- readl(gdd + SSI_GDD_HW_ID_REG));
- seq_printf(m, "PPORT_ID\t: 0x%08x\n",
- readl(gdd + SSI_GDD_PPORT_ID_REG));
- seq_printf(m, "MPORT_ID\t: 0x%08x\n",
- readl(gdd + SSI_GDD_MPORT_ID_REG));
- seq_printf(m, "TEST\t\t: 0x%08x\n",
- readl(gdd + SSI_GDD_TEST_REG));
- seq_printf(m, "GCR\t\t: 0x%08x\n",
- readl(gdd + SSI_GDD_GCR_REG));
-
- for (lch = 0; lch < SSI_MAX_GDD_LCH; lch++) {
- seq_printf(m, "\nGDD LCH %d\n=========\n", lch);
- seq_printf(m, "CSDP\t\t: 0x%04x\n",
- readw(gdd + SSI_GDD_CSDP_REG(lch)));
- seq_printf(m, "CCR\t\t: 0x%04x\n",
- readw(gdd + SSI_GDD_CCR_REG(lch)));
- seq_printf(m, "CICR\t\t: 0x%04x\n",
- readw(gdd + SSI_GDD_CICR_REG(lch)));
- seq_printf(m, "CSR\t\t: 0x%04x\n",
- readw(gdd + SSI_GDD_CSR_REG(lch)));
- seq_printf(m, "CSSA\t\t: 0x%08x\n",
- readl(gdd + SSI_GDD_CSSA_REG(lch)));
- seq_printf(m, "CDSA\t\t: 0x%08x\n",
- readl(gdd + SSI_GDD_CDSA_REG(lch)));
- seq_printf(m, "CEN\t\t: 0x%04x\n",
- readw(gdd + SSI_GDD_CEN_REG(lch)));
- seq_printf(m, "CSAC\t\t: 0x%04x\n",
- readw(gdd + SSI_GDD_CSAC_REG(lch)));
- seq_printf(m, "CDAC\t\t: 0x%04x\n",
- readw(gdd + SSI_GDD_CDAC_REG(lch)));
- seq_printf(m, "CLNK_CTRL\t: 0x%04x\n",
- readw(gdd + SSI_GDD_CLNK_CTRL_REG(lch)));
- }
-
- pm_runtime_put_sync(ssi->device.parent);
-
- return 0;
-}
-
-static int ssi_regs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ssi_debug_show, inode->i_private);
-}
-
-static int ssi_gdd_regs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ssi_debug_gdd_show, inode->i_private);
-}
-
-static const struct file_operations ssi_regs_fops = {
- .open = ssi_regs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static const struct file_operations ssi_gdd_regs_fops = {
- .open = ssi_gdd_regs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int __init ssi_debug_add_ctrl(struct hsi_controller *ssi)
-{
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
- struct dentry *dir;
-
- /* SSI controller */
- omap_ssi->dir = debugfs_create_dir(dev_name(&ssi->device), NULL);
- if (!omap_ssi->dir)
- return -ENOMEM;
-
- debugfs_create_file("regs", S_IRUGO, omap_ssi->dir, ssi,
- &ssi_regs_fops);
- /* SSI GDD (DMA) */
- dir = debugfs_create_dir("gdd", omap_ssi->dir);
- if (!dir)
- goto rback;
- debugfs_create_file("regs", S_IRUGO, dir, ssi, &ssi_gdd_regs_fops);
-
- return 0;
-rback:
- debugfs_remove_recursive(omap_ssi->dir);
-
- return -ENOMEM;
-}
-
-static void ssi_debug_remove_ctrl(struct hsi_controller *ssi)
-{
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
-
- debugfs_remove_recursive(omap_ssi->dir);
-}
-#endif /* CONFIG_DEBUG_FS */
-
-/*
- * FIXME: Horrible HACK needed until we remove the useless wakeline test
- * in the CMT. To be removed !!!!
- */
-void ssi_waketest(struct hsi_client *cl, unsigned int enable)
-{
- struct hsi_port *port = hsi_get_port(cl);
- struct omap_ssi_port *omap_port = hsi_port_drvdata(port);
- struct hsi_controller *ssi = to_hsi_controller(port->device.parent);
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
-
- omap_port->wktest = !!enable;
- if (omap_port->wktest) {
- pm_runtime_get_sync(ssi->device.parent);
- writel_relaxed(SSI_WAKE(0),
- omap_ssi->sys + SSI_SET_WAKE_REG(port->num));
- } else {
- writel_relaxed(SSI_WAKE(0),
- omap_ssi->sys + SSI_CLEAR_WAKE_REG(port->num));
- pm_runtime_put_sync(ssi->device.parent);
- }
-}
-EXPORT_SYMBOL_GPL(ssi_waketest);
-
-static void ssi_gdd_complete(struct hsi_controller *ssi, unsigned int lch)
-{
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
- struct hsi_msg *msg = omap_ssi->gdd_trn[lch].msg;
- struct hsi_port *port = to_hsi_port(msg->cl->device.parent);
- struct omap_ssi_port *omap_port = hsi_port_drvdata(port);
- unsigned int dir;
- u32 csr;
- u32 val;
-
- spin_lock(&omap_ssi->lock);
-
- val = readl(omap_ssi->sys + SSI_GDD_MPU_IRQ_ENABLE_REG);
- val &= ~SSI_GDD_LCH(lch);
- writel_relaxed(val, omap_ssi->sys + SSI_GDD_MPU_IRQ_ENABLE_REG);
-
- if (msg->ttype == HSI_MSG_READ) {
- dir = DMA_FROM_DEVICE;
- val = SSI_DATAAVAILABLE(msg->channel);
- pm_runtime_put_sync(ssi->device.parent);
- } else {
- dir = DMA_TO_DEVICE;
- val = SSI_DATAACCEPT(msg->channel);
- /* Keep clocks reference for write pio event */
- }
- dma_unmap_sg(&ssi->device, msg->sgt.sgl, msg->sgt.nents, dir);
- csr = readw(omap_ssi->gdd + SSI_GDD_CSR_REG(lch));
- omap_ssi->gdd_trn[lch].msg = NULL; /* release GDD lch */
- dev_dbg(&port->device, "DMA completed ch %d ttype %d\n",
- msg->channel, msg->ttype);
- spin_unlock(&omap_ssi->lock);
- if (csr & SSI_CSR_TOUR) { /* Timeout error */
- msg->status = HSI_STATUS_ERROR;
- msg->actual_len = 0;
- spin_lock(&omap_port->lock);
- list_del(&msg->link); /* Dequeue msg */
- spin_unlock(&omap_port->lock);
- msg->complete(msg);
- return;
- }
- spin_lock(&omap_port->lock);
- val |= readl(omap_ssi->sys + SSI_MPU_ENABLE_REG(port->num, 0));
- writel_relaxed(val, omap_ssi->sys + SSI_MPU_ENABLE_REG(port->num, 0));
- spin_unlock(&omap_port->lock);
-
- msg->status = HSI_STATUS_COMPLETED;
- msg->actual_len = sg_dma_len(msg->sgt.sgl);
-}
-
-static void ssi_gdd_tasklet(unsigned long dev)
-{
- struct hsi_controller *ssi = (struct hsi_controller *)dev;
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
- void __iomem *sys = omap_ssi->sys;
- unsigned int lch;
- u32 status_reg;
-
- pm_runtime_get_sync(ssi->device.parent);
-
- status_reg = readl(sys + SSI_GDD_MPU_IRQ_STATUS_REG);
- for (lch = 0; lch < SSI_MAX_GDD_LCH; lch++) {
- if (status_reg & SSI_GDD_LCH(lch))
- ssi_gdd_complete(ssi, lch);
- }
- writel_relaxed(status_reg, sys + SSI_GDD_MPU_IRQ_STATUS_REG);
- status_reg = readl(sys + SSI_GDD_MPU_IRQ_STATUS_REG);
-
- pm_runtime_put_sync(ssi->device.parent);
-
- if (status_reg)
- tasklet_hi_schedule(&omap_ssi->gdd_tasklet);
- else
- enable_irq(omap_ssi->gdd_irq);
-
-}
-
-static irqreturn_t ssi_gdd_isr(int irq, void *ssi)
-{
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
-
- tasklet_hi_schedule(&omap_ssi->gdd_tasklet);
- disable_irq_nosync(irq);
-
- return IRQ_HANDLED;
-}
-
-static unsigned long ssi_get_clk_rate(struct hsi_controller *ssi)
-{
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
- unsigned long rate = clk_get_rate(omap_ssi->fck);
- return rate;
-}
-
-static int __init ssi_get_iomem(struct platform_device *pd,
- const char *name, void __iomem **pbase, dma_addr_t *phy)
-{
- struct resource *mem;
- void __iomem *base;
- struct hsi_controller *ssi = platform_get_drvdata(pd);
-
- mem = platform_get_resource_byname(pd, IORESOURCE_MEM, name);
- base = devm_ioremap_resource(&ssi->device, mem);
- if (IS_ERR(base))
- return PTR_ERR(base);
-
- *pbase = base;
-
- if (phy)
- *phy = mem->start;
-
- return 0;
-}
-
-static int __init ssi_add_controller(struct hsi_controller *ssi,
- struct platform_device *pd)
-{
- struct omap_ssi_controller *omap_ssi;
- int err;
-
- omap_ssi = devm_kzalloc(&ssi->device, sizeof(*omap_ssi), GFP_KERNEL);
- if (!omap_ssi) {
- dev_err(&pd->dev, "not enough memory for omap ssi\n");
- return -ENOMEM;
- }
-
- err = ida_simple_get(&platform_omap_ssi_ida, 0, 0, GFP_KERNEL);
- if (err < 0)
- goto out_err;
- ssi->id = err;
-
- ssi->owner = THIS_MODULE;
- ssi->device.parent = &pd->dev;
- dev_set_name(&ssi->device, "ssi%d", ssi->id);
- hsi_controller_set_drvdata(ssi, omap_ssi);
- omap_ssi->dev = &ssi->device;
- err = ssi_get_iomem(pd, "sys", &omap_ssi->sys, NULL);
- if (err < 0)
- goto out_err;
- err = ssi_get_iomem(pd, "gdd", &omap_ssi->gdd, NULL);
- if (err < 0)
- goto out_err;
- err = platform_get_irq_byname(pd, "gdd_mpu");
- if (err < 0) {
- dev_err(&pd->dev, "GDD IRQ resource missing\n");
- goto out_err;
- }
- omap_ssi->gdd_irq = err;
- tasklet_init(&omap_ssi->gdd_tasklet, ssi_gdd_tasklet,
- (unsigned long)ssi);
- err = devm_request_irq(&ssi->device, omap_ssi->gdd_irq, ssi_gdd_isr,
- 0, "gdd_mpu", ssi);
- if (err < 0) {
- dev_err(&ssi->device, "Request GDD IRQ %d failed (%d)",
- omap_ssi->gdd_irq, err);
- goto out_err;
- }
-
- omap_ssi->port = devm_kzalloc(&ssi->device,
- sizeof(struct omap_ssi_port *) * ssi->num_ports, GFP_KERNEL);
- if (!omap_ssi->port) {
- err = -ENOMEM;
- goto out_err;
- }
-
- omap_ssi->fck = devm_clk_get(&ssi->device, "ssi_ssr_fck");
- if (IS_ERR(omap_ssi->fck)) {
- dev_err(&pd->dev, "Could not acquire clock \"ssi_ssr_fck\": %li\n",
- PTR_ERR(omap_ssi->fck));
- err = -ENODEV;
- goto out_err;
- }
-
- /* TODO: find register, which can be used to detect context loss */
- omap_ssi->get_loss = NULL;
-
- omap_ssi->max_speed = UINT_MAX;
- spin_lock_init(&omap_ssi->lock);
- err = hsi_register_controller(ssi);
-
- if (err < 0)
- goto out_err;
-
- return 0;
-
-out_err:
- ida_simple_remove(&platform_omap_ssi_ida, ssi->id);
- return err;
-}
-
-static int __init ssi_hw_init(struct hsi_controller *ssi)
-{
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
- unsigned int i;
- u32 val;
- int err;
-
- err = pm_runtime_get_sync(ssi->device.parent);
- if (err < 0) {
- dev_err(&ssi->device, "runtime PM failed %d\n", err);
- return err;
- }
- /* Reseting SSI controller */
- writel_relaxed(SSI_SOFTRESET, omap_ssi->sys + SSI_SYSCONFIG_REG);
- val = readl(omap_ssi->sys + SSI_SYSSTATUS_REG);
- for (i = 0; ((i < 20) && !(val & SSI_RESETDONE)); i++) {
- msleep(20);
- val = readl(omap_ssi->sys + SSI_SYSSTATUS_REG);
- }
- if (!(val & SSI_RESETDONE)) {
- dev_err(&ssi->device, "SSI HW reset failed\n");
- pm_runtime_put_sync(ssi->device.parent);
- return -EIO;
- }
- /* Reseting GDD */
- writel_relaxed(SSI_SWRESET, omap_ssi->gdd + SSI_GDD_GRST_REG);
- /* Get FCK rate in KHz */
- omap_ssi->fck_rate = DIV_ROUND_CLOSEST(ssi_get_clk_rate(ssi), 1000);
- dev_dbg(&ssi->device, "SSI fck rate %lu KHz\n", omap_ssi->fck_rate);
- /* Set default PM settings */
- val = SSI_AUTOIDLE | SSI_SIDLEMODE_SMART | SSI_MIDLEMODE_SMART;
- writel_relaxed(val, omap_ssi->sys + SSI_SYSCONFIG_REG);
- omap_ssi->sysconfig = val;
- writel_relaxed(SSI_CLK_AUTOGATING_ON, omap_ssi->sys + SSI_GDD_GCR_REG);
- omap_ssi->gdd_gcr = SSI_CLK_AUTOGATING_ON;
- pm_runtime_put_sync(ssi->device.parent);
-
- return 0;
-}
-
-static void ssi_remove_controller(struct hsi_controller *ssi)
-{
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
- int id = ssi->id;
- tasklet_kill(&omap_ssi->gdd_tasklet);
- hsi_unregister_controller(ssi);
- ida_simple_remove(&platform_omap_ssi_ida, id);
-}
-
-static inline int ssi_of_get_available_ports_count(const struct device_node *np)
-{
- struct device_node *child;
- int num = 0;
-
- for_each_available_child_of_node(np, child)
- if (of_device_is_compatible(child, "ti,omap3-ssi-port"))
- num++;
-
- return num;
-}
-
-static int ssi_remove_ports(struct device *dev, void *c)
-{
- struct platform_device *pdev = to_platform_device(dev);
-
- of_device_unregister(pdev);
-
- return 0;
-}
-
-static int __init ssi_probe(struct platform_device *pd)
-{
- struct platform_device *childpdev;
- struct device_node *np = pd->dev.of_node;
- struct device_node *child;
- struct hsi_controller *ssi;
- int err;
- int num_ports;
-
- if (!np) {
- dev_err(&pd->dev, "missing device tree data\n");
- return -EINVAL;
- }
-
- num_ports = ssi_of_get_available_ports_count(np);
-
- ssi = hsi_alloc_controller(num_ports, GFP_KERNEL);
- if (!ssi) {
- dev_err(&pd->dev, "No memory for controller\n");
- return -ENOMEM;
- }
-
- platform_set_drvdata(pd, ssi);
-
- err = ssi_add_controller(ssi, pd);
- if (err < 0)
- goto out1;
-
- pm_runtime_irq_safe(&pd->dev);
- pm_runtime_enable(&pd->dev);
-
- err = ssi_hw_init(ssi);
- if (err < 0)
- goto out2;
-#ifdef CONFIG_DEBUG_FS
- err = ssi_debug_add_ctrl(ssi);
- if (err < 0)
- goto out2;
-#endif
-
- for_each_available_child_of_node(np, child) {
- if (!of_device_is_compatible(child, "ti,omap3-ssi-port"))
- continue;
-
- childpdev = of_platform_device_create(child, NULL, &pd->dev);
- if (!childpdev) {
- err = -ENODEV;
- dev_err(&pd->dev, "failed to create ssi controller port\n");
- goto out3;
- }
- }
-
- dev_info(&pd->dev, "ssi controller %d initialized (%d ports)!\n",
- ssi->id, num_ports);
- return err;
-out3:
- device_for_each_child(&pd->dev, NULL, ssi_remove_ports);
-out2:
- ssi_remove_controller(ssi);
-out1:
- platform_set_drvdata(pd, NULL);
- pm_runtime_disable(&pd->dev);
-
- return err;
-}
-
-static int __exit ssi_remove(struct platform_device *pd)
-{
- struct hsi_controller *ssi = platform_get_drvdata(pd);
-
-#ifdef CONFIG_DEBUG_FS
- ssi_debug_remove_ctrl(ssi);
-#endif
- ssi_remove_controller(ssi);
- platform_set_drvdata(pd, NULL);
-
- pm_runtime_disable(&pd->dev);
-
- /* cleanup of of_platform_populate() call */
- device_for_each_child(&pd->dev, NULL, ssi_remove_ports);
-
- return 0;
-}
-
-#ifdef CONFIG_PM
-static int omap_ssi_runtime_suspend(struct device *dev)
-{
- struct hsi_controller *ssi = dev_get_drvdata(dev);
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
-
- dev_dbg(dev, "runtime suspend!\n");
-
- if (omap_ssi->get_loss)
- omap_ssi->loss_count =
- omap_ssi->get_loss(ssi->device.parent);
-
- return 0;
-}
-
-static int omap_ssi_runtime_resume(struct device *dev)
-{
- struct hsi_controller *ssi = dev_get_drvdata(dev);
- struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
-
- dev_dbg(dev, "runtime resume!\n");
-
- if ((omap_ssi->get_loss) && (omap_ssi->loss_count ==
- omap_ssi->get_loss(ssi->device.parent)))
- return 0;
-
- writel_relaxed(omap_ssi->gdd_gcr, omap_ssi->gdd + SSI_GDD_GCR_REG);
-
- return 0;
-}
-
-static const struct dev_pm_ops omap_ssi_pm_ops = {
- SET_RUNTIME_PM_OPS(omap_ssi_runtime_suspend, omap_ssi_runtime_resume,
- NULL)
-};
-
-#define DEV_PM_OPS (&omap_ssi_pm_ops)
-#else
-#define DEV_PM_OPS NULL
-#endif
-
-#ifdef CONFIG_OF
-static const struct of_device_id omap_ssi_of_match[] = {
- { .compatible = "ti,omap3-ssi", },
- {},
-};
-MODULE_DEVICE_TABLE(of, omap_ssi_of_match);
-#else
-#define omap_ssi_of_match NULL
-#endif
-
-static struct platform_driver ssi_pdriver = {
- .remove = __exit_p(ssi_remove),
- .driver = {
- .name = "omap_ssi",
- .pm = DEV_PM_OPS,
- .of_match_table = omap_ssi_of_match,
- },
-};
-
-module_platform_driver_probe(ssi_pdriver, ssi_probe);
-
-MODULE_ALIAS("platform:omap_ssi");
-MODULE_AUTHOR("Carlos Chinea <carlos.chinea@nokia.com>");
-MODULE_AUTHOR("Sebastian Reichel <sre@kernel.org>");
-MODULE_DESCRIPTION("Synchronous Serial Interface Driver");
-MODULE_LICENSE("GPL v2");
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/hsi/hsi.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/interrupt.h>
#include <linux/io.h>
struct list_head brkqueue;
unsigned int irq;
int wake_irq;
- int wake_gpio;
+ struct gpio_desc *wake_gpio;
struct tasklet_struct pio_tasklet;
struct tasklet_struct wake_tasklet;
bool wktest:1; /* FIXME: HACK to be removed */
* @gdd_tasklet: bottom half for DMA transfers
* @gdd_trn: Array of GDD transaction data for ongoing GDD transfers
* @lock: lock to serialize access to GDD
+ * @fck_nb: DVFS notfifier block
+ * @fck_rate: clock rate
* @loss_count: To follow if we need to restore context or not
* @max_speed: Maximum TX speed (Kb/s) set by the clients.
* @sysconfig: SSI controller saved context
struct tasklet_struct gdd_tasklet;
struct gdd_trn gdd_trn[SSI_MAX_GDD_LCH];
spinlock_t lock;
+ struct notifier_block fck_nb;
unsigned long fck_rate;
u32 loss_count;
u32 max_speed;
#endif
};
+void omap_ssi_port_update_fclk(struct hsi_controller *ssi,
+ struct omap_ssi_port *omap_port);
+
+extern struct platform_driver ssi_port_pdriver;
+
#endif /* __LINUX_HSI_OMAP_SSI_H__ */
--- /dev/null
+/* OMAP SSI driver.
+ *
+ * Copyright (C) 2010 Nokia Corporation. All rights reserved.
+ * Copyright (C) 2014 Sebastian Reichel <sre@kernel.org>
+ *
+ * Contact: Carlos Chinea <carlos.chinea@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/scatterlist.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pm_runtime.h>
+#include <linux/of_platform.h>
+#include <linux/hsi/hsi.h>
+#include <linux/idr.h>
+
+#include "omap_ssi_regs.h"
+#include "omap_ssi.h"
+
+/* For automatically allocated device IDs */
+static DEFINE_IDA(platform_omap_ssi_ida);
+
+#ifdef CONFIG_DEBUG_FS
+static int ssi_debug_show(struct seq_file *m, void *p __maybe_unused)
+{
+ struct hsi_controller *ssi = m->private;
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+ void __iomem *sys = omap_ssi->sys;
+
+ pm_runtime_get_sync(ssi->device.parent);
+ seq_printf(m, "REVISION\t: 0x%08x\n", readl(sys + SSI_REVISION_REG));
+ seq_printf(m, "SYSCONFIG\t: 0x%08x\n", readl(sys + SSI_SYSCONFIG_REG));
+ seq_printf(m, "SYSSTATUS\t: 0x%08x\n", readl(sys + SSI_SYSSTATUS_REG));
+ pm_runtime_put_sync(ssi->device.parent);
+
+ return 0;
+}
+
+static int ssi_debug_gdd_show(struct seq_file *m, void *p __maybe_unused)
+{
+ struct hsi_controller *ssi = m->private;
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+ void __iomem *gdd = omap_ssi->gdd;
+ void __iomem *sys = omap_ssi->sys;
+ int lch;
+
+ pm_runtime_get_sync(ssi->device.parent);
+
+ seq_printf(m, "GDD_MPU_STATUS\t: 0x%08x\n",
+ readl(sys + SSI_GDD_MPU_IRQ_STATUS_REG));
+ seq_printf(m, "GDD_MPU_ENABLE\t: 0x%08x\n\n",
+ readl(sys + SSI_GDD_MPU_IRQ_ENABLE_REG));
+ seq_printf(m, "HW_ID\t\t: 0x%08x\n",
+ readl(gdd + SSI_GDD_HW_ID_REG));
+ seq_printf(m, "PPORT_ID\t: 0x%08x\n",
+ readl(gdd + SSI_GDD_PPORT_ID_REG));
+ seq_printf(m, "MPORT_ID\t: 0x%08x\n",
+ readl(gdd + SSI_GDD_MPORT_ID_REG));
+ seq_printf(m, "TEST\t\t: 0x%08x\n",
+ readl(gdd + SSI_GDD_TEST_REG));
+ seq_printf(m, "GCR\t\t: 0x%08x\n",
+ readl(gdd + SSI_GDD_GCR_REG));
+
+ for (lch = 0; lch < SSI_MAX_GDD_LCH; lch++) {
+ seq_printf(m, "\nGDD LCH %d\n=========\n", lch);
+ seq_printf(m, "CSDP\t\t: 0x%04x\n",
+ readw(gdd + SSI_GDD_CSDP_REG(lch)));
+ seq_printf(m, "CCR\t\t: 0x%04x\n",
+ readw(gdd + SSI_GDD_CCR_REG(lch)));
+ seq_printf(m, "CICR\t\t: 0x%04x\n",
+ readw(gdd + SSI_GDD_CICR_REG(lch)));
+ seq_printf(m, "CSR\t\t: 0x%04x\n",
+ readw(gdd + SSI_GDD_CSR_REG(lch)));
+ seq_printf(m, "CSSA\t\t: 0x%08x\n",
+ readl(gdd + SSI_GDD_CSSA_REG(lch)));
+ seq_printf(m, "CDSA\t\t: 0x%08x\n",
+ readl(gdd + SSI_GDD_CDSA_REG(lch)));
+ seq_printf(m, "CEN\t\t: 0x%04x\n",
+ readw(gdd + SSI_GDD_CEN_REG(lch)));
+ seq_printf(m, "CSAC\t\t: 0x%04x\n",
+ readw(gdd + SSI_GDD_CSAC_REG(lch)));
+ seq_printf(m, "CDAC\t\t: 0x%04x\n",
+ readw(gdd + SSI_GDD_CDAC_REG(lch)));
+ seq_printf(m, "CLNK_CTRL\t: 0x%04x\n",
+ readw(gdd + SSI_GDD_CLNK_CTRL_REG(lch)));
+ }
+
+ pm_runtime_put_sync(ssi->device.parent);
+
+ return 0;
+}
+
+static int ssi_regs_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ssi_debug_show, inode->i_private);
+}
+
+static int ssi_gdd_regs_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ssi_debug_gdd_show, inode->i_private);
+}
+
+static const struct file_operations ssi_regs_fops = {
+ .open = ssi_regs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations ssi_gdd_regs_fops = {
+ .open = ssi_gdd_regs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int ssi_debug_add_ctrl(struct hsi_controller *ssi)
+{
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+ struct dentry *dir;
+
+ /* SSI controller */
+ omap_ssi->dir = debugfs_create_dir(dev_name(&ssi->device), NULL);
+ if (!omap_ssi->dir)
+ return -ENOMEM;
+
+ debugfs_create_file("regs", S_IRUGO, omap_ssi->dir, ssi,
+ &ssi_regs_fops);
+ /* SSI GDD (DMA) */
+ dir = debugfs_create_dir("gdd", omap_ssi->dir);
+ if (!dir)
+ goto rback;
+ debugfs_create_file("regs", S_IRUGO, dir, ssi, &ssi_gdd_regs_fops);
+
+ return 0;
+rback:
+ debugfs_remove_recursive(omap_ssi->dir);
+
+ return -ENOMEM;
+}
+
+static void ssi_debug_remove_ctrl(struct hsi_controller *ssi)
+{
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+
+ debugfs_remove_recursive(omap_ssi->dir);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ * FIXME: Horrible HACK needed until we remove the useless wakeline test
+ * in the CMT. To be removed !!!!
+ */
+void ssi_waketest(struct hsi_client *cl, unsigned int enable)
+{
+ struct hsi_port *port = hsi_get_port(cl);
+ struct omap_ssi_port *omap_port = hsi_port_drvdata(port);
+ struct hsi_controller *ssi = to_hsi_controller(port->device.parent);
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+
+ omap_port->wktest = !!enable;
+ if (omap_port->wktest) {
+ pm_runtime_get_sync(ssi->device.parent);
+ writel_relaxed(SSI_WAKE(0),
+ omap_ssi->sys + SSI_SET_WAKE_REG(port->num));
+ } else {
+ writel_relaxed(SSI_WAKE(0),
+ omap_ssi->sys + SSI_CLEAR_WAKE_REG(port->num));
+ pm_runtime_put_sync(ssi->device.parent);
+ }
+}
+EXPORT_SYMBOL_GPL(ssi_waketest);
+
+static void ssi_gdd_complete(struct hsi_controller *ssi, unsigned int lch)
+{
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+ struct hsi_msg *msg = omap_ssi->gdd_trn[lch].msg;
+ struct hsi_port *port = to_hsi_port(msg->cl->device.parent);
+ struct omap_ssi_port *omap_port = hsi_port_drvdata(port);
+ unsigned int dir;
+ u32 csr;
+ u32 val;
+
+ spin_lock(&omap_ssi->lock);
+
+ val = readl(omap_ssi->sys + SSI_GDD_MPU_IRQ_ENABLE_REG);
+ val &= ~SSI_GDD_LCH(lch);
+ writel_relaxed(val, omap_ssi->sys + SSI_GDD_MPU_IRQ_ENABLE_REG);
+
+ if (msg->ttype == HSI_MSG_READ) {
+ dir = DMA_FROM_DEVICE;
+ val = SSI_DATAAVAILABLE(msg->channel);
+ pm_runtime_put_sync(ssi->device.parent);
+ } else {
+ dir = DMA_TO_DEVICE;
+ val = SSI_DATAACCEPT(msg->channel);
+ /* Keep clocks reference for write pio event */
+ }
+ dma_unmap_sg(&ssi->device, msg->sgt.sgl, msg->sgt.nents, dir);
+ csr = readw(omap_ssi->gdd + SSI_GDD_CSR_REG(lch));
+ omap_ssi->gdd_trn[lch].msg = NULL; /* release GDD lch */
+ dev_dbg(&port->device, "DMA completed ch %d ttype %d\n",
+ msg->channel, msg->ttype);
+ spin_unlock(&omap_ssi->lock);
+ if (csr & SSI_CSR_TOUR) { /* Timeout error */
+ msg->status = HSI_STATUS_ERROR;
+ msg->actual_len = 0;
+ spin_lock(&omap_port->lock);
+ list_del(&msg->link); /* Dequeue msg */
+ spin_unlock(&omap_port->lock);
+ msg->complete(msg);
+ return;
+ }
+ spin_lock(&omap_port->lock);
+ val |= readl(omap_ssi->sys + SSI_MPU_ENABLE_REG(port->num, 0));
+ writel_relaxed(val, omap_ssi->sys + SSI_MPU_ENABLE_REG(port->num, 0));
+ spin_unlock(&omap_port->lock);
+
+ msg->status = HSI_STATUS_COMPLETED;
+ msg->actual_len = sg_dma_len(msg->sgt.sgl);
+}
+
+static void ssi_gdd_tasklet(unsigned long dev)
+{
+ struct hsi_controller *ssi = (struct hsi_controller *)dev;
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+ void __iomem *sys = omap_ssi->sys;
+ unsigned int lch;
+ u32 status_reg;
+
+ pm_runtime_get_sync(ssi->device.parent);
+
+ status_reg = readl(sys + SSI_GDD_MPU_IRQ_STATUS_REG);
+ for (lch = 0; lch < SSI_MAX_GDD_LCH; lch++) {
+ if (status_reg & SSI_GDD_LCH(lch))
+ ssi_gdd_complete(ssi, lch);
+ }
+ writel_relaxed(status_reg, sys + SSI_GDD_MPU_IRQ_STATUS_REG);
+ status_reg = readl(sys + SSI_GDD_MPU_IRQ_STATUS_REG);
+
+ pm_runtime_put_sync(ssi->device.parent);
+
+ if (status_reg)
+ tasklet_hi_schedule(&omap_ssi->gdd_tasklet);
+ else
+ enable_irq(omap_ssi->gdd_irq);
+
+}
+
+static irqreturn_t ssi_gdd_isr(int irq, void *ssi)
+{
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+
+ tasklet_hi_schedule(&omap_ssi->gdd_tasklet);
+ disable_irq_nosync(irq);
+
+ return IRQ_HANDLED;
+}
+
+static unsigned long ssi_get_clk_rate(struct hsi_controller *ssi)
+{
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+ unsigned long rate = clk_get_rate(omap_ssi->fck);
+ return rate;
+}
+
+static int ssi_clk_event(struct notifier_block *nb, unsigned long event,
+ void *data)
+{
+ struct omap_ssi_controller *omap_ssi = container_of(nb,
+ struct omap_ssi_controller, fck_nb);
+ struct hsi_controller *ssi = to_hsi_controller(omap_ssi->dev);
+ struct clk_notifier_data *clk_data = data;
+ struct omap_ssi_port *omap_port;
+ int i;
+
+ switch (event) {
+ case PRE_RATE_CHANGE:
+ dev_dbg(&ssi->device, "pre rate change\n");
+
+ for (i = 0; i < ssi->num_ports; i++) {
+ omap_port = omap_ssi->port[i];
+
+ if (!omap_port)
+ continue;
+
+ /* Workaround for SWBREAK + CAwake down race in CMT */
+ tasklet_disable(&omap_port->wake_tasklet);
+
+ /* stop all ssi communication */
+ pinctrl_pm_select_idle_state(omap_port->pdev);
+ udelay(1); /* wait for racing frames */
+ }
+
+ break;
+ case ABORT_RATE_CHANGE:
+ dev_dbg(&ssi->device, "abort rate change\n");
+ /* Fall through */
+ case POST_RATE_CHANGE:
+ dev_dbg(&ssi->device, "post rate change (%lu -> %lu)\n",
+ clk_data->old_rate, clk_data->new_rate);
+ omap_ssi->fck_rate = DIV_ROUND_CLOSEST(clk_data->new_rate, 1000); /* KHz */
+
+ for (i = 0; i < ssi->num_ports; i++) {
+ omap_port = omap_ssi->port[i];
+
+ if (!omap_port)
+ continue;
+
+ omap_ssi_port_update_fclk(ssi, omap_port);
+
+ /* resume ssi communication */
+ pinctrl_pm_select_default_state(omap_port->pdev);
+ tasklet_enable(&omap_port->wake_tasklet);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int ssi_get_iomem(struct platform_device *pd,
+ const char *name, void __iomem **pbase, dma_addr_t *phy)
+{
+ struct resource *mem;
+ void __iomem *base;
+ struct hsi_controller *ssi = platform_get_drvdata(pd);
+
+ mem = platform_get_resource_byname(pd, IORESOURCE_MEM, name);
+ base = devm_ioremap_resource(&ssi->device, mem);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ *pbase = base;
+
+ if (phy)
+ *phy = mem->start;
+
+ return 0;
+}
+
+static int ssi_add_controller(struct hsi_controller *ssi,
+ struct platform_device *pd)
+{
+ struct omap_ssi_controller *omap_ssi;
+ int err;
+
+ omap_ssi = devm_kzalloc(&ssi->device, sizeof(*omap_ssi), GFP_KERNEL);
+ if (!omap_ssi) {
+ dev_err(&pd->dev, "not enough memory for omap ssi\n");
+ return -ENOMEM;
+ }
+
+ err = ida_simple_get(&platform_omap_ssi_ida, 0, 0, GFP_KERNEL);
+ if (err < 0)
+ goto out_err;
+ ssi->id = err;
+
+ ssi->owner = THIS_MODULE;
+ ssi->device.parent = &pd->dev;
+ dev_set_name(&ssi->device, "ssi%d", ssi->id);
+ hsi_controller_set_drvdata(ssi, omap_ssi);
+ omap_ssi->dev = &ssi->device;
+ err = ssi_get_iomem(pd, "sys", &omap_ssi->sys, NULL);
+ if (err < 0)
+ goto out_err;
+ err = ssi_get_iomem(pd, "gdd", &omap_ssi->gdd, NULL);
+ if (err < 0)
+ goto out_err;
+ err = platform_get_irq_byname(pd, "gdd_mpu");
+ if (err < 0) {
+ dev_err(&pd->dev, "GDD IRQ resource missing\n");
+ goto out_err;
+ }
+ omap_ssi->gdd_irq = err;
+ tasklet_init(&omap_ssi->gdd_tasklet, ssi_gdd_tasklet,
+ (unsigned long)ssi);
+ err = devm_request_irq(&ssi->device, omap_ssi->gdd_irq, ssi_gdd_isr,
+ 0, "gdd_mpu", ssi);
+ if (err < 0) {
+ dev_err(&ssi->device, "Request GDD IRQ %d failed (%d)",
+ omap_ssi->gdd_irq, err);
+ goto out_err;
+ }
+
+ omap_ssi->port = devm_kzalloc(&ssi->device,
+ sizeof(struct omap_ssi_port *) * ssi->num_ports, GFP_KERNEL);
+ if (!omap_ssi->port) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ omap_ssi->fck = devm_clk_get(&ssi->device, "ssi_ssr_fck");
+ if (IS_ERR(omap_ssi->fck)) {
+ dev_err(&pd->dev, "Could not acquire clock \"ssi_ssr_fck\": %li\n",
+ PTR_ERR(omap_ssi->fck));
+ err = -ENODEV;
+ goto out_err;
+ }
+
+ omap_ssi->fck_nb.notifier_call = ssi_clk_event;
+ omap_ssi->fck_nb.priority = INT_MAX;
+ clk_notifier_register(omap_ssi->fck, &omap_ssi->fck_nb);
+
+ /* TODO: find register, which can be used to detect context loss */
+ omap_ssi->get_loss = NULL;
+
+ omap_ssi->max_speed = UINT_MAX;
+ spin_lock_init(&omap_ssi->lock);
+ err = hsi_register_controller(ssi);
+
+ if (err < 0)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ ida_simple_remove(&platform_omap_ssi_ida, ssi->id);
+ return err;
+}
+
+static int ssi_hw_init(struct hsi_controller *ssi)
+{
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+ unsigned int i;
+ u32 val;
+ int err;
+
+ err = pm_runtime_get_sync(ssi->device.parent);
+ if (err < 0) {
+ dev_err(&ssi->device, "runtime PM failed %d\n", err);
+ return err;
+ }
+ /* Reseting SSI controller */
+ writel_relaxed(SSI_SOFTRESET, omap_ssi->sys + SSI_SYSCONFIG_REG);
+ val = readl(omap_ssi->sys + SSI_SYSSTATUS_REG);
+ for (i = 0; ((i < 20) && !(val & SSI_RESETDONE)); i++) {
+ msleep(20);
+ val = readl(omap_ssi->sys + SSI_SYSSTATUS_REG);
+ }
+ if (!(val & SSI_RESETDONE)) {
+ dev_err(&ssi->device, "SSI HW reset failed\n");
+ pm_runtime_put_sync(ssi->device.parent);
+ return -EIO;
+ }
+ /* Reseting GDD */
+ writel_relaxed(SSI_SWRESET, omap_ssi->gdd + SSI_GDD_GRST_REG);
+ /* Get FCK rate in KHz */
+ omap_ssi->fck_rate = DIV_ROUND_CLOSEST(ssi_get_clk_rate(ssi), 1000);
+ dev_dbg(&ssi->device, "SSI fck rate %lu KHz\n", omap_ssi->fck_rate);
+ /* Set default PM settings */
+ val = SSI_AUTOIDLE | SSI_SIDLEMODE_SMART | SSI_MIDLEMODE_SMART;
+ writel_relaxed(val, omap_ssi->sys + SSI_SYSCONFIG_REG);
+ omap_ssi->sysconfig = val;
+ writel_relaxed(SSI_CLK_AUTOGATING_ON, omap_ssi->sys + SSI_GDD_GCR_REG);
+ omap_ssi->gdd_gcr = SSI_CLK_AUTOGATING_ON;
+ pm_runtime_put_sync(ssi->device.parent);
+
+ return 0;
+}
+
+static void ssi_remove_controller(struct hsi_controller *ssi)
+{
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+ int id = ssi->id;
+ tasklet_kill(&omap_ssi->gdd_tasklet);
+ hsi_unregister_controller(ssi);
+ clk_notifier_unregister(omap_ssi->fck, &omap_ssi->fck_nb);
+ ida_simple_remove(&platform_omap_ssi_ida, id);
+}
+
+static inline int ssi_of_get_available_ports_count(const struct device_node *np)
+{
+ struct device_node *child;
+ int num = 0;
+
+ for_each_available_child_of_node(np, child)
+ if (of_device_is_compatible(child, "ti,omap3-ssi-port"))
+ num++;
+
+ return num;
+}
+
+static int ssi_remove_ports(struct device *dev, void *c)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+
+ if (!dev->of_node)
+ return 0;
+
+ of_node_clear_flag(dev->of_node, OF_POPULATED);
+ of_device_unregister(pdev);
+
+ return 0;
+}
+
+static int ssi_probe(struct platform_device *pd)
+{
+ struct platform_device *childpdev;
+ struct device_node *np = pd->dev.of_node;
+ struct device_node *child;
+ struct hsi_controller *ssi;
+ int err;
+ int num_ports;
+
+ if (!np) {
+ dev_err(&pd->dev, "missing device tree data\n");
+ return -EINVAL;
+ }
+
+ num_ports = ssi_of_get_available_ports_count(np);
+
+ ssi = hsi_alloc_controller(num_ports, GFP_KERNEL);
+ if (!ssi) {
+ dev_err(&pd->dev, "No memory for controller\n");
+ return -ENOMEM;
+ }
+
+ platform_set_drvdata(pd, ssi);
+
+ err = ssi_add_controller(ssi, pd);
+ if (err < 0)
+ goto out1;
+
+ pm_runtime_irq_safe(&pd->dev);
+ pm_runtime_enable(&pd->dev);
+
+ err = ssi_hw_init(ssi);
+ if (err < 0)
+ goto out2;
+#ifdef CONFIG_DEBUG_FS
+ err = ssi_debug_add_ctrl(ssi);
+ if (err < 0)
+ goto out2;
+#endif
+
+ for_each_available_child_of_node(np, child) {
+ if (!of_device_is_compatible(child, "ti,omap3-ssi-port"))
+ continue;
+
+ childpdev = of_platform_device_create(child, NULL, &pd->dev);
+ if (!childpdev) {
+ err = -ENODEV;
+ dev_err(&pd->dev, "failed to create ssi controller port\n");
+ goto out3;
+ }
+ }
+
+ dev_info(&pd->dev, "ssi controller %d initialized (%d ports)!\n",
+ ssi->id, num_ports);
+ return err;
+out3:
+ device_for_each_child(&pd->dev, NULL, ssi_remove_ports);
+out2:
+ ssi_remove_controller(ssi);
+out1:
+ platform_set_drvdata(pd, NULL);
+ pm_runtime_disable(&pd->dev);
+
+ return err;
+}
+
+static int ssi_remove(struct platform_device *pd)
+{
+ struct hsi_controller *ssi = platform_get_drvdata(pd);
+
+ /* cleanup of of_platform_populate() call */
+ device_for_each_child(&pd->dev, NULL, ssi_remove_ports);
+
+#ifdef CONFIG_DEBUG_FS
+ ssi_debug_remove_ctrl(ssi);
+#endif
+ ssi_remove_controller(ssi);
+ platform_set_drvdata(pd, NULL);
+
+ pm_runtime_disable(&pd->dev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int omap_ssi_runtime_suspend(struct device *dev)
+{
+ struct hsi_controller *ssi = dev_get_drvdata(dev);
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+
+ dev_dbg(dev, "runtime suspend!\n");
+
+ if (omap_ssi->get_loss)
+ omap_ssi->loss_count =
+ omap_ssi->get_loss(ssi->device.parent);
+
+ return 0;
+}
+
+static int omap_ssi_runtime_resume(struct device *dev)
+{
+ struct hsi_controller *ssi = dev_get_drvdata(dev);
+ struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
+
+ dev_dbg(dev, "runtime resume!\n");
+
+ if ((omap_ssi->get_loss) && (omap_ssi->loss_count ==
+ omap_ssi->get_loss(ssi->device.parent)))
+ return 0;
+
+ writel_relaxed(omap_ssi->gdd_gcr, omap_ssi->gdd + SSI_GDD_GCR_REG);
+
+ return 0;
+}
+
+static const struct dev_pm_ops omap_ssi_pm_ops = {
+ SET_RUNTIME_PM_OPS(omap_ssi_runtime_suspend, omap_ssi_runtime_resume,
+ NULL)
+};
+
+#define DEV_PM_OPS (&omap_ssi_pm_ops)
+#else
+#define DEV_PM_OPS NULL
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id omap_ssi_of_match[] = {
+ { .compatible = "ti,omap3-ssi", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, omap_ssi_of_match);
+#else
+#define omap_ssi_of_match NULL
+#endif
+
+static struct platform_driver ssi_pdriver = {
+ .probe = ssi_probe,
+ .remove = ssi_remove,
+ .driver = {
+ .name = "omap_ssi",
+ .pm = DEV_PM_OPS,
+ .of_match_table = omap_ssi_of_match,
+ },
+};
+
+static int __init ssi_init(void) {
+ int ret;
+
+ ret = platform_driver_register(&ssi_pdriver);
+ if (ret)
+ return ret;
+
+ return platform_driver_register(&ssi_port_pdriver);
+}
+module_init(ssi_init);
+
+static void __exit ssi_exit(void) {
+ platform_driver_unregister(&ssi_port_pdriver);
+ platform_driver_unregister(&ssi_pdriver);
+}
+module_exit(ssi_exit);
+
+MODULE_ALIAS("platform:omap_ssi");
+MODULE_AUTHOR("Carlos Chinea <carlos.chinea@nokia.com>");
+MODULE_AUTHOR("Sebastian Reichel <sre@kernel.org>");
+MODULE_DESCRIPTION("Synchronous Serial Interface Driver");
+MODULE_LICENSE("GPL v2");
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
#include <linux/pm_runtime.h>
+#include <linux/delay.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/pinctrl/consumer.h>
#include <linux/debugfs.h>
#include "omap_ssi_regs.h"
static inline unsigned int ssi_wakein(struct hsi_port *port)
{
struct omap_ssi_port *omap_port = hsi_port_drvdata(port);
- return gpio_get_value(omap_port->wake_gpio);
+ return gpiod_get_value(omap_port->wake_gpio);
}
#ifdef CONFIG_DEBUG_FS
DEFINE_SIMPLE_ATTRIBUTE(ssi_sst_div_fops, ssi_div_get, ssi_div_set, "%llu\n");
-static int __init ssi_debug_add_port(struct omap_ssi_port *omap_port,
+static int ssi_debug_add_port(struct omap_ssi_port *omap_port,
struct dentry *dir)
{
struct hsi_port *port = to_hsi_port(omap_port->dev);
pm_runtime_get_sync(omap_port->pdev);
spin_lock_bh(&omap_port->lock);
+
+ /* stop all ssi communication */
+ pinctrl_pm_select_idle_state(omap_port->pdev);
+ udelay(1); /* wait for racing frames */
+
/* Stop all DMA transfers */
for (i = 0; i < SSI_MAX_GDD_LCH; i++) {
msg = omap_ssi->gdd_trn[i].msg;
ssi_flush_queue(&omap_port->rxqueue[i], NULL);
}
ssi_flush_queue(&omap_port->brkqueue, NULL);
+
+ /* Resume SSI communication */
+ pinctrl_pm_select_default_state(omap_port->pdev);
+
spin_unlock_bh(&omap_port->lock);
pm_runtime_put_sync(omap_port->pdev);
return IRQ_HANDLED;
}
-static int __init ssi_port_irq(struct hsi_port *port,
+static int ssi_port_irq(struct hsi_port *port,
struct platform_device *pd)
{
struct omap_ssi_port *omap_port = hsi_port_drvdata(port);
return err;
}
-static int __init ssi_wake_irq(struct hsi_port *port,
+static int ssi_wake_irq(struct hsi_port *port,
struct platform_device *pd)
{
struct omap_ssi_port *omap_port = hsi_port_drvdata(port);
int cawake_irq;
int err;
- if (omap_port->wake_gpio == -1) {
+ if (!omap_port->wake_gpio) {
omap_port->wake_irq = -1;
return 0;
}
- cawake_irq = gpio_to_irq(omap_port->wake_gpio);
+ cawake_irq = gpiod_to_irq(omap_port->wake_gpio);
omap_port->wake_irq = cawake_irq;
tasklet_init(&omap_port->wake_tasklet, ssi_wake_tasklet,
return err;
}
-static void __init ssi_queues_init(struct omap_ssi_port *omap_port)
+static void ssi_queues_init(struct omap_ssi_port *omap_port)
{
unsigned int ch;
INIT_LIST_HEAD(&omap_port->brkqueue);
}
-static int __init ssi_port_get_iomem(struct platform_device *pd,
+static int ssi_port_get_iomem(struct platform_device *pd,
const char *name, void __iomem **pbase, dma_addr_t *phy)
{
struct hsi_port *port = platform_get_drvdata(pd);
return 0;
}
-static int __init ssi_port_probe(struct platform_device *pd)
+static int ssi_port_probe(struct platform_device *pd)
{
struct device_node *np = pd->dev.of_node;
struct hsi_port *port;
struct omap_ssi_port *omap_port;
struct hsi_controller *ssi = dev_get_drvdata(pd->dev.parent);
struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi);
- int cawake_gpio = 0;
+ struct gpio_desc *cawake_gpio = NULL;
u32 port_id;
int err;
dev_dbg(&pd->dev, "init ssi port...\n");
- if (!try_module_get(ssi->owner)) {
- dev_err(&pd->dev, "could not increment parent module refcount\n");
- return -ENODEV;
- }
-
if (!ssi->port || !omap_ssi->port) {
dev_err(&pd->dev, "ssi controller not initialized!\n");
err = -ENODEV;
goto error;
}
- err = of_get_named_gpio(np, "ti,ssi-cawake-gpio", 0);
- if (err < 0) {
- dev_err(&pd->dev, "DT data is missing cawake gpio (err=%d)\n",
- err);
- goto error;
- }
- cawake_gpio = err;
-
- err = devm_gpio_request_one(&port->device, cawake_gpio, GPIOF_DIR_IN,
- "cawake");
- if (err) {
- dev_err(&pd->dev, "could not request cawake gpio (err=%d)!\n",
- err);
- err = -ENXIO;
+ cawake_gpio = devm_gpiod_get(&pd->dev, "ti,ssi-cawake", GPIOD_IN);
+ if (IS_ERR(cawake_gpio)) {
+ err = PTR_ERR(cawake_gpio);
+ dev_err(&pd->dev, "couldn't get cawake gpio (err=%d)!\n", err);
goto error;
}
hsi_add_clients_from_dt(port, np);
- dev_info(&pd->dev, "ssi port %u successfully initialized (cawake=%d)\n",
- port_id, cawake_gpio);
+ dev_info(&pd->dev, "ssi port %u successfully initialized\n", port_id);
return 0;
return err;
}
-static int __exit ssi_port_remove(struct platform_device *pd)
+static int ssi_port_remove(struct platform_device *pd)
{
struct hsi_port *port = platform_get_drvdata(pd);
struct omap_ssi_port *omap_port = hsi_port_drvdata(port);
omap_ssi->port[omap_port->port_id] = NULL;
platform_set_drvdata(pd, NULL);
- module_put(ssi->owner);
pm_runtime_disable(&pd->dev);
return 0;
}
+static int ssi_restore_divisor(struct omap_ssi_port *omap_port)
+{
+ writel_relaxed(omap_port->sst.divisor,
+ omap_port->sst_base + SSI_SST_DIVISOR_REG);
+
+ return 0;
+}
+
+void omap_ssi_port_update_fclk(struct hsi_controller *ssi,
+ struct omap_ssi_port *omap_port)
+{
+ /* update divisor */
+ u32 div = ssi_calculate_div(ssi);
+ omap_port->sst.divisor = div;
+ ssi_restore_divisor(omap_port);
+}
+
#ifdef CONFIG_PM
static int ssi_save_port_ctx(struct omap_ssi_port *omap_port)
{
return 0;
}
-static int ssi_restore_divisor(struct omap_ssi_port *omap_port)
-{
- writel_relaxed(omap_port->sst.divisor,
- omap_port->sst_base + SSI_SST_DIVISOR_REG);
-
- return 0;
-}
-
static int omap_ssi_port_runtime_suspend(struct device *dev)
{
struct hsi_port *port = dev_get_drvdata(dev);
#define omap_ssi_port_of_match NULL
#endif
-static struct platform_driver ssi_port_pdriver = {
- .remove = __exit_p(ssi_port_remove),
+struct platform_driver ssi_port_pdriver = {
+ .probe = ssi_port_probe,
+ .remove = ssi_port_remove,
.driver = {
.name = "omap_ssi_port",
.of_match_table = omap_ssi_port_of_match,
.pm = DEV_PM_OPS,
},
};
-
-module_platform_driver_probe(ssi_port_pdriver, ssi_port_probe);
-
-MODULE_ALIAS("platform:omap_ssi_port");
-MODULE_AUTHOR("Carlos Chinea <carlos.chinea@nokia.com>");
-MODULE_AUTHOR("Sebastian Reichel <sre@kernel.org>");
-MODULE_DESCRIPTION("Synchronous Serial Interface Port Driver");
-MODULE_LICENSE("GPL v2");
if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex))
return;
- /* drop page _counts */
+ /* drop page _refcounts */
for (pg = 0; pg < msc->nr_pages; pg++) {
struct page *page = msc_buffer_get_page(msc, pg);
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
-ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \
+ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
- roce_gid_mgmt.o
+ roce_gid_mgmt.o mr_pool.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
if (id->device != pd->device)
return -EINVAL;
+ qp_init_attr->port_num = id->port_num;
qp = ib_create_qp(pd, qp_init_attr);
if (IS_ERR(qp))
return PTR_ERR(qp);
if (ret)
goto err;
- if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
+ if (ibnl_add_client(RDMA_NL_RDMA_CM, ARRAY_SIZE(cma_cb_table),
+ cma_cb_table))
pr_warn("RDMA CMA: failed to add netlink callback\n");
cma_configfs_init();
if (pm_addr->ss_family == AF_INET) {
struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr;
- if (pm4_addr->sin_addr.s_addr == INADDR_ANY) {
+ if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) {
struct sockaddr_in *cm4_addr =
(struct sockaddr_in *)cm_addr;
struct sockaddr_in *cm4_outaddr =
if (ret)
pr_err("iw_cm: couldn't init iwpm\n");
- ret = ibnl_add_client(RDMA_NL_IWCM, RDMA_NL_IWPM_NUM_OPS,
+ ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table),
iwcm_nl_cb_table);
if (ret)
pr_err("iw_cm: couldn't register netlink callbacks\n");
if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
pr_warn("%s Unable to put NLMSG_DONE\n", __func__);
+ dev_kfree_skb(skb);
return -ENOMEM;
}
nlh->nlmsg_type = NLMSG_DONE;
--- /dev/null
+/*
+ * Copyright (c) 2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <rdma/ib_verbs.h>
+#include <rdma/mr_pool.h>
+
+struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list)
+{
+ struct ib_mr *mr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ mr = list_first_entry_or_null(list, struct ib_mr, qp_entry);
+ if (mr) {
+ list_del(&mr->qp_entry);
+ qp->mrs_used++;
+ }
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_mr_pool_get);
+
+void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ list_add(&mr->qp_entry, list);
+ qp->mrs_used--;
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+}
+EXPORT_SYMBOL(ib_mr_pool_put);
+
+int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
+ enum ib_mr_type type, u32 max_num_sg)
+{
+ struct ib_mr *mr;
+ unsigned long flags;
+ int ret, i;
+
+ for (i = 0; i < nr; i++) {
+ mr = ib_alloc_mr(qp->pd, type, max_num_sg);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto out;
+ }
+
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ list_add_tail(&mr->qp_entry, list);
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+ }
+
+ return 0;
+out:
+ ib_mr_pool_destroy(qp, list);
+ return ret;
+}
+EXPORT_SYMBOL(ib_mr_pool_init);
+
+void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list)
+{
+ struct ib_mr *mr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ while (!list_empty(list)) {
+ mr = list_first_entry(list, struct ib_mr, qp_entry);
+ list_del(&mr->qp_entry);
+
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+ ib_dereg_mr(mr);
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ }
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+}
+EXPORT_SYMBOL(ib_mr_pool_destroy);
struct ibnl_client *client;
int type = nlh->nlmsg_type;
int index = RDMA_NL_GET_CLIENT(type);
- int op = RDMA_NL_GET_OP(type);
+ unsigned int op = RDMA_NL_GET_OP(type);
list_for_each_entry(client, &client_list, list) {
if (client->index == index) {
- if (op < 0 || op >= client->nops ||
- !client->cb_table[op].dump)
+ if (op >= client->nops || !client->cb_table[op].dump)
return -EINVAL;
/*
--- /dev/null
+/*
+ * Copyright (c) 2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <rdma/mr_pool.h>
+#include <rdma/rw.h>
+
+enum {
+ RDMA_RW_SINGLE_WR,
+ RDMA_RW_MULTI_WR,
+ RDMA_RW_MR,
+ RDMA_RW_SIG_MR,
+};
+
+static bool rdma_rw_force_mr;
+module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
+MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
+
+/*
+ * Check if the device might use memory registration. This is currently only
+ * true for iWarp devices. In the future we can hopefully fine tune this based
+ * on HCA driver input.
+ */
+static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
+{
+ if (rdma_protocol_iwarp(dev, port_num))
+ return true;
+ if (unlikely(rdma_rw_force_mr))
+ return true;
+ return false;
+}
+
+/*
+ * Check if the device will use memory registration for this RW operation.
+ * We currently always use memory registrations for iWarp RDMA READs, and
+ * have a debug option to force usage of MRs.
+ *
+ * XXX: In the future we can hopefully fine tune this based on HCA driver
+ * input.
+ */
+static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
+ enum dma_data_direction dir, int dma_nents)
+{
+ if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE)
+ return true;
+ if (unlikely(rdma_rw_force_mr))
+ return true;
+ return false;
+}
+
+static inline u32 rdma_rw_max_sge(struct ib_device *dev,
+ enum dma_data_direction dir)
+{
+ return dir == DMA_TO_DEVICE ?
+ dev->attrs.max_sge : dev->attrs.max_sge_rd;
+}
+
+static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev)
+{
+ /* arbitrary limit to avoid allocating gigantic resources */
+ return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256);
+}
+
+static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
+ struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
+ u32 sg_cnt, u32 offset)
+{
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 nents = min(sg_cnt, pages_per_mr);
+ int count = 0, ret;
+
+ reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs);
+ if (!reg->mr)
+ return -EAGAIN;
+
+ if (reg->mr->need_inval) {
+ reg->inv_wr.opcode = IB_WR_LOCAL_INV;
+ reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
+ reg->inv_wr.next = ®->reg_wr.wr;
+ count++;
+ } else {
+ reg->inv_wr.next = NULL;
+ }
+
+ ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
+ if (ret < nents) {
+ ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
+ return -EINVAL;
+ }
+
+ reg->reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg->reg_wr.mr = reg->mr;
+ reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
+ if (rdma_protocol_iwarp(qp->device, port_num))
+ reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
+ count++;
+
+ reg->sge.addr = reg->mr->iova;
+ reg->sge.length = reg->mr->length;
+ return count;
+}
+
+static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+{
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ int i, j, ret = 0, count = 0;
+
+ ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
+ ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
+ if (!ctx->reg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < ctx->nr_ops; i++) {
+ struct rdma_rw_reg_ctx *prev = i ? &ctx->reg[i - 1] : NULL;
+ struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
+ u32 nents = min(sg_cnt, pages_per_mr);
+
+ ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt,
+ offset);
+ if (ret < 0)
+ goto out_free;
+ count += ret;
+
+ if (prev) {
+ if (reg->mr->need_inval)
+ prev->wr.wr.next = ®->inv_wr;
+ else
+ prev->wr.wr.next = ®->reg_wr.wr;
+ }
+
+ reg->reg_wr.wr.next = ®->wr.wr;
+
+ reg->wr.wr.sg_list = ®->sge;
+ reg->wr.wr.num_sge = 1;
+ reg->wr.remote_addr = remote_addr;
+ reg->wr.rkey = rkey;
+ if (dir == DMA_TO_DEVICE) {
+ reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
+ } else if (!rdma_cap_read_inv(qp->device, port_num)) {
+ reg->wr.wr.opcode = IB_WR_RDMA_READ;
+ } else {
+ reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
+ }
+ count++;
+
+ remote_addr += reg->sge.length;
+ sg_cnt -= nents;
+ for (j = 0; j < nents; j++)
+ sg = sg_next(sg);
+ offset = 0;
+ }
+
+ ctx->type = RDMA_RW_MR;
+ return count;
+
+out_free:
+ while (--i >= 0)
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
+ kfree(ctx->reg);
+out:
+ return ret;
+}
+
+static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ struct scatterlist *sg, u32 sg_cnt, u32 offset,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+{
+ struct ib_device *dev = qp->pd->device;
+ u32 max_sge = rdma_rw_max_sge(dev, dir);
+ struct ib_sge *sge;
+ u32 total_len = 0, i, j;
+
+ ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge);
+
+ ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL);
+ if (!ctx->map.sges)
+ goto out;
+
+ ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL);
+ if (!ctx->map.wrs)
+ goto out_free_sges;
+
+ for (i = 0; i < ctx->nr_ops; i++) {
+ struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
+ u32 nr_sge = min(sg_cnt, max_sge);
+
+ if (dir == DMA_TO_DEVICE)
+ rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+ else
+ rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+ rdma_wr->remote_addr = remote_addr + total_len;
+ rdma_wr->rkey = rkey;
+ rdma_wr->wr.sg_list = sge;
+
+ for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
+ rdma_wr->wr.num_sge++;
+
+ sge->addr = ib_sg_dma_address(dev, sg) + offset;
+ sge->length = ib_sg_dma_len(dev, sg) - offset;
+ sge->lkey = qp->pd->local_dma_lkey;
+
+ total_len += sge->length;
+ sge++;
+ sg_cnt--;
+ offset = 0;
+ }
+
+ if (i + 1 < ctx->nr_ops)
+ rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr;
+ }
+
+ ctx->type = RDMA_RW_MULTI_WR;
+ return ctx->nr_ops;
+
+out_free_sges:
+ kfree(ctx->map.sges);
+out:
+ return -ENOMEM;
+}
+
+static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
+ enum dma_data_direction dir)
+{
+ struct ib_device *dev = qp->pd->device;
+ struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
+
+ ctx->nr_ops = 1;
+
+ ctx->single.sge.lkey = qp->pd->local_dma_lkey;
+ ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset;
+ ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset;
+
+ memset(rdma_wr, 0, sizeof(*rdma_wr));
+ if (dir == DMA_TO_DEVICE)
+ rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+ else
+ rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+ rdma_wr->wr.sg_list = &ctx->single.sge;
+ rdma_wr->wr.num_sge = 1;
+ rdma_wr->remote_addr = remote_addr;
+ rdma_wr->rkey = rkey;
+
+ ctx->type = RDMA_RW_SINGLE_WR;
+ return 1;
+}
+
+/**
+ * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
+ * @ctx: context to initialize
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @sg: scatterlist to READ/WRITE from/to
+ * @sg_cnt: number of entries in @sg
+ * @sg_offset: current byte offset into @sg
+ * @remote_addr:remote address to read/write (relative to @rkey)
+ * @rkey: remote key to operate on
+ * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
+ *
+ * Returns the number of WQEs that will be needed on the workqueue if
+ * successful, or a negative error code.
+ */
+int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+{
+ struct ib_device *dev = qp->pd->device;
+ int ret;
+
+ ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
+ if (!ret)
+ return -ENOMEM;
+ sg_cnt = ret;
+
+ /*
+ * Skip to the S/G entry that sg_offset falls into:
+ */
+ for (;;) {
+ u32 len = ib_sg_dma_len(dev, sg);
+
+ if (sg_offset < len)
+ break;
+
+ sg = sg_next(sg);
+ sg_offset -= len;
+ sg_cnt--;
+ }
+
+ ret = -EIO;
+ if (WARN_ON_ONCE(sg_cnt == 0))
+ goto out_unmap_sg;
+
+ if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) {
+ ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt,
+ sg_offset, remote_addr, rkey, dir);
+ } else if (sg_cnt > 1) {
+ ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset,
+ remote_addr, rkey, dir);
+ } else {
+ ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset,
+ remote_addr, rkey, dir);
+ }
+
+ if (ret < 0)
+ goto out_unmap_sg;
+ return ret;
+
+out_unmap_sg:
+ ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_rw_ctx_init);
+
+/**
+ * rdma_rw_ctx_signature init - initialize a RW context with signature offload
+ * @ctx: context to initialize
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @sg: scatterlist to READ/WRITE from/to
+ * @sg_cnt: number of entries in @sg
+ * @prot_sg: scatterlist to READ/WRITE protection information from/to
+ * @prot_sg_cnt: number of entries in @prot_sg
+ * @sig_attrs: signature offloading algorithms
+ * @remote_addr:remote address to read/write (relative to @rkey)
+ * @rkey: remote key to operate on
+ * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
+ *
+ * Returns the number of WQEs that will be needed on the workqueue if
+ * successful, or a negative error code.
+ */
+int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ struct scatterlist *prot_sg, u32 prot_sg_cnt,
+ struct ib_sig_attrs *sig_attrs,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+{
+ struct ib_device *dev = qp->pd->device;
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ struct ib_rdma_wr *rdma_wr;
+ struct ib_send_wr *prev_wr = NULL;
+ int count = 0, ret;
+
+ if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
+ pr_err("SG count too large\n");
+ return -EINVAL;
+ }
+
+ ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
+ if (!ret)
+ return -ENOMEM;
+ sg_cnt = ret;
+
+ ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
+ if (!ret) {
+ ret = -ENOMEM;
+ goto out_unmap_sg;
+ }
+ prot_sg_cnt = ret;
+
+ ctx->type = RDMA_RW_SIG_MR;
+ ctx->nr_ops = 1;
+ ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL);
+ if (!ctx->sig) {
+ ret = -ENOMEM;
+ goto out_unmap_prot_sg;
+ }
+
+ ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0);
+ if (ret < 0)
+ goto out_free_ctx;
+ count += ret;
+ prev_wr = &ctx->sig->data.reg_wr.wr;
+
+ if (prot_sg_cnt) {
+ ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot,
+ prot_sg, prot_sg_cnt, 0);
+ if (ret < 0)
+ goto out_destroy_data_mr;
+ count += ret;
+
+ if (ctx->sig->prot.inv_wr.next)
+ prev_wr->next = &ctx->sig->prot.inv_wr;
+ else
+ prev_wr->next = &ctx->sig->prot.reg_wr.wr;
+ prev_wr = &ctx->sig->prot.reg_wr.wr;
+ } else {
+ ctx->sig->prot.mr = NULL;
+ }
+
+ ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs);
+ if (!ctx->sig->sig_mr) {
+ ret = -EAGAIN;
+ goto out_destroy_prot_mr;
+ }
+
+ if (ctx->sig->sig_mr->need_inval) {
+ memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr));
+
+ ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV;
+ ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey;
+
+ prev_wr->next = &ctx->sig->sig_inv_wr;
+ prev_wr = &ctx->sig->sig_inv_wr;
+ }
+
+ ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
+ ctx->sig->sig_wr.wr.wr_cqe = NULL;
+ ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge;
+ ctx->sig->sig_wr.wr.num_sge = 1;
+ ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
+ ctx->sig->sig_wr.sig_attrs = sig_attrs;
+ ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr;
+ if (prot_sg_cnt)
+ ctx->sig->sig_wr.prot = &ctx->sig->prot.sge;
+ prev_wr->next = &ctx->sig->sig_wr.wr;
+ prev_wr = &ctx->sig->sig_wr.wr;
+ count++;
+
+ ctx->sig->sig_sge.addr = 0;
+ ctx->sig->sig_sge.length = ctx->sig->data.sge.length;
+ if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE)
+ ctx->sig->sig_sge.length += ctx->sig->prot.sge.length;
+
+ rdma_wr = &ctx->sig->data.wr;
+ rdma_wr->wr.sg_list = &ctx->sig->sig_sge;
+ rdma_wr->wr.num_sge = 1;
+ rdma_wr->remote_addr = remote_addr;
+ rdma_wr->rkey = rkey;
+ if (dir == DMA_TO_DEVICE)
+ rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+ else
+ rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+ prev_wr->next = &rdma_wr->wr;
+ prev_wr = &rdma_wr->wr;
+ count++;
+
+ return count;
+
+out_destroy_prot_mr:
+ if (prot_sg_cnt)
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
+out_destroy_data_mr:
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
+out_free_ctx:
+ kfree(ctx->sig);
+out_unmap_prot_sg:
+ ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
+out_unmap_sg:
+ ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
+
+/*
+ * Now that we are going to post the WRs we can update the lkey and need_inval
+ * state on the MRs. If we were doing this at init time, we would get double
+ * or missing invalidations if a context was initialized but not actually
+ * posted.
+ */
+static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
+{
+ reg->mr->need_inval = need_inval;
+ ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey));
+ reg->reg_wr.key = reg->mr->lkey;
+ reg->sge.lkey = reg->mr->lkey;
+}
+
+/**
+ * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation
+ * @ctx: context to operate on
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @cqe: completion queue entry for the last WR
+ * @chain_wr: WR to append to the posted chain
+ *
+ * Return the WR chain for the set of RDMA READ/WRITE operations described by
+ * @ctx, as well as any memory registration operations needed. If @chain_wr
+ * is non-NULL the WR it points to will be appended to the chain of WRs posted.
+ * If @chain_wr is not set @cqe must be set so that the caller gets a
+ * completion notification.
+ */
+struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
+{
+ struct ib_send_wr *first_wr, *last_wr;
+ int i;
+
+ switch (ctx->type) {
+ case RDMA_RW_SIG_MR:
+ rdma_rw_update_lkey(&ctx->sig->data, true);
+ if (ctx->sig->prot.mr)
+ rdma_rw_update_lkey(&ctx->sig->prot, true);
+
+ ctx->sig->sig_mr->need_inval = true;
+ ib_update_fast_reg_key(ctx->sig->sig_mr,
+ ib_inc_rkey(ctx->sig->sig_mr->lkey));
+ ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey;
+
+ if (ctx->sig->data.inv_wr.next)
+ first_wr = &ctx->sig->data.inv_wr;
+ else
+ first_wr = &ctx->sig->data.reg_wr.wr;
+ last_wr = &ctx->sig->data.wr.wr;
+ break;
+ case RDMA_RW_MR:
+ for (i = 0; i < ctx->nr_ops; i++) {
+ rdma_rw_update_lkey(&ctx->reg[i],
+ ctx->reg[i].wr.wr.opcode !=
+ IB_WR_RDMA_READ_WITH_INV);
+ }
+
+ if (ctx->reg[0].inv_wr.next)
+ first_wr = &ctx->reg[0].inv_wr;
+ else
+ first_wr = &ctx->reg[0].reg_wr.wr;
+ last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr;
+ break;
+ case RDMA_RW_MULTI_WR:
+ first_wr = &ctx->map.wrs[0].wr;
+ last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr;
+ break;
+ case RDMA_RW_SINGLE_WR:
+ first_wr = &ctx->single.wr.wr;
+ last_wr = &ctx->single.wr.wr;
+ break;
+ default:
+ BUG();
+ }
+
+ if (chain_wr) {
+ last_wr->next = chain_wr;
+ } else {
+ last_wr->wr_cqe = cqe;
+ last_wr->send_flags |= IB_SEND_SIGNALED;
+ }
+
+ return first_wr;
+}
+EXPORT_SYMBOL(rdma_rw_ctx_wrs);
+
+/**
+ * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation
+ * @ctx: context to operate on
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @cqe: completion queue entry for the last WR
+ * @chain_wr: WR to append to the posted chain
+ *
+ * Post the set of RDMA READ/WRITE operations described by @ctx, as well as
+ * any memory registration operations needed. If @chain_wr is non-NULL the
+ * WR it points to will be appended to the chain of WRs posted. If @chain_wr
+ * is not set @cqe must be set so that the caller gets a completion
+ * notification.
+ */
+int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
+{
+ struct ib_send_wr *first_wr, *bad_wr;
+
+ first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
+ return ib_post_send(qp, first_wr, &bad_wr);
+}
+EXPORT_SYMBOL(rdma_rw_ctx_post);
+
+/**
+ * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init
+ * @ctx: context to release
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @sg: scatterlist that was used for the READ/WRITE
+ * @sg_cnt: number of entries in @sg
+ * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
+ */
+void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir)
+{
+ int i;
+
+ switch (ctx->type) {
+ case RDMA_RW_MR:
+ for (i = 0; i < ctx->nr_ops; i++)
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
+ kfree(ctx->reg);
+ break;
+ case RDMA_RW_MULTI_WR:
+ kfree(ctx->map.wrs);
+ kfree(ctx->map.sges);
+ break;
+ case RDMA_RW_SINGLE_WR:
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+}
+EXPORT_SYMBOL(rdma_rw_ctx_destroy);
+
+/**
+ * rdma_rw_ctx_destroy_signature - release all resources allocated by
+ * rdma_rw_ctx_init_signature
+ * @ctx: context to release
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @sg: scatterlist that was used for the READ/WRITE
+ * @sg_cnt: number of entries in @sg
+ * @prot_sg: scatterlist that was used for the READ/WRITE of the PI
+ * @prot_sg_cnt: number of entries in @prot_sg
+ * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
+ */
+void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ struct scatterlist *prot_sg, u32 prot_sg_cnt,
+ enum dma_data_direction dir)
+{
+ if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
+ return;
+
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+
+ if (ctx->sig->prot.mr) {
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
+ ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
+ }
+
+ ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr);
+ kfree(ctx->sig);
+}
+EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
+
+void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
+{
+ u32 factor;
+
+ WARN_ON_ONCE(attr->port_num == 0);
+
+ /*
+ * Each context needs at least one RDMA READ or WRITE WR.
+ *
+ * For some hardware we might need more, eventually we should ask the
+ * HCA driver for a multiplier here.
+ */
+ factor = 1;
+
+ /*
+ * If the devices needs MRs to perform RDMA READ or WRITE operations,
+ * we'll need two additional MRs for the registrations and the
+ * invalidation.
+ */
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
+ factor += 6; /* (inv + reg) * (data + prot + sig) */
+ else if (rdma_rw_can_use_mr(dev, attr->port_num))
+ factor += 2; /* inv + reg */
+
+ attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
+
+ /*
+ * But maybe we were just too high in the sky and the device doesn't
+ * even support all we need, and we'll have to live with what we get..
+ */
+ attr->cap.max_send_wr =
+ min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
+}
+
+int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
+{
+ struct ib_device *dev = qp->pd->device;
+ u32 nr_mrs = 0, nr_sig_mrs = 0;
+ int ret = 0;
+
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) {
+ nr_sig_mrs = attr->cap.max_rdma_ctxs;
+ nr_mrs = attr->cap.max_rdma_ctxs * 2;
+ } else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
+ nr_mrs = attr->cap.max_rdma_ctxs;
+ }
+
+ if (nr_mrs) {
+ ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
+ IB_MR_TYPE_MEM_REG,
+ rdma_rw_fr_page_list_len(dev));
+ if (ret) {
+ pr_err("%s: failed to allocated %d MRs\n",
+ __func__, nr_mrs);
+ return ret;
+ }
+ }
+
+ if (nr_sig_mrs) {
+ ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
+ IB_MR_TYPE_SIGNATURE, 2);
+ if (ret) {
+ pr_err("%s: failed to allocated %d SIG MRs\n",
+ __func__, nr_mrs);
+ goto out_free_rdma_mrs;
+ }
+ }
+
+ return 0;
+
+out_free_rdma_mrs:
+ ib_mr_pool_destroy(qp, &qp->rdma_mrs);
+ return ret;
+}
+
+void rdma_rw_cleanup_mrs(struct ib_qp *qp)
+{
+ ib_mr_pool_destroy(qp, &qp->sig_mrs);
+ ib_mr_pool_destroy(qp, &qp->rdma_mrs);
+}
data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
if (!data) {
- kfree_skb(skb);
+ nlmsg_free(skb);
return -EMSGSIZE;
}
goto err3;
}
- if (ibnl_add_client(RDMA_NL_LS, RDMA_NL_LS_NUM_OPS,
+ if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table),
ib_sa_cb_table)) {
pr_err("Failed to add netlink callback\n");
ret = -EINVAL;
if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
- IB_QP_CREATE_MANAGED_RECV)) {
+ IB_QP_CREATE_MANAGED_RECV |
+ IB_QP_CREATE_SCATTER_FCS)) {
ret = -EINVAL;
goto err_put;
}
if (cmd.comp_mask)
return -EINVAL;
- if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
- !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
+ if (!capable(CAP_NET_RAW))
return -EPERM;
if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)
resp.hca_core_clock = attr.hca_core_clock;
resp.response_length += sizeof(resp.hca_core_clock);
+ if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
+ goto end;
+
+ resp.device_cap_flags_ex = attr.device_cap_flags;
+ resp.response_length += sizeof(resp.device_cap_flags_ex);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
+#include <rdma/rw.h>
#include "core_priv.h"
}
EXPORT_SYMBOL(ib_open_qp);
+static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct ib_qp *real_qp = qp;
+
+ qp->event_handler = __ib_shared_qp_event_handler;
+ qp->qp_context = qp;
+ qp->pd = NULL;
+ qp->send_cq = qp->recv_cq = NULL;
+ qp->srq = NULL;
+ qp->xrcd = qp_init_attr->xrcd;
+ atomic_inc(&qp_init_attr->xrcd->usecnt);
+ INIT_LIST_HEAD(&qp->open_list);
+
+ qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
+ qp_init_attr->qp_context);
+ if (!IS_ERR(qp))
+ __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
+ else
+ real_qp->device->destroy_qp(real_qp);
+ return qp;
+}
+
struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr)
{
- struct ib_qp *qp, *real_qp;
- struct ib_device *device;
+ struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
+ struct ib_qp *qp;
+ int ret;
+
+ /*
+ * If the callers is using the RDMA API calculate the resources
+ * needed for the RDMA READ/WRITE operations.
+ *
+ * Note that these callers need to pass in a port number.
+ */
+ if (qp_init_attr->cap.max_rdma_ctxs)
+ rdma_rw_init_qp(device, qp_init_attr);
- device = pd ? pd->device : qp_init_attr->xrcd->device;
qp = device->create_qp(pd, qp_init_attr, NULL);
+ if (IS_ERR(qp))
+ return qp;
+
+ qp->device = device;
+ qp->real_qp = qp;
+ qp->uobject = NULL;
+ qp->qp_type = qp_init_attr->qp_type;
+
+ atomic_set(&qp->usecnt, 0);
+ qp->mrs_used = 0;
+ spin_lock_init(&qp->mr_lock);
+ INIT_LIST_HEAD(&qp->rdma_mrs);
+ INIT_LIST_HEAD(&qp->sig_mrs);
+
+ if (qp_init_attr->qp_type == IB_QPT_XRC_TGT)
+ return ib_create_xrc_qp(qp, qp_init_attr);
+
+ qp->event_handler = qp_init_attr->event_handler;
+ qp->qp_context = qp_init_attr->qp_context;
+ if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
+ qp->recv_cq = NULL;
+ qp->srq = NULL;
+ } else {
+ qp->recv_cq = qp_init_attr->recv_cq;
+ atomic_inc(&qp_init_attr->recv_cq->usecnt);
+ qp->srq = qp_init_attr->srq;
+ if (qp->srq)
+ atomic_inc(&qp_init_attr->srq->usecnt);
+ }
- if (!IS_ERR(qp)) {
- qp->device = device;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = qp_init_attr->qp_type;
-
- atomic_set(&qp->usecnt, 0);
- if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
- qp->event_handler = __ib_shared_qp_event_handler;
- qp->qp_context = qp;
- qp->pd = NULL;
- qp->send_cq = qp->recv_cq = NULL;
- qp->srq = NULL;
- qp->xrcd = qp_init_attr->xrcd;
- atomic_inc(&qp_init_attr->xrcd->usecnt);
- INIT_LIST_HEAD(&qp->open_list);
-
- real_qp = qp;
- qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
- qp_init_attr->qp_context);
- if (!IS_ERR(qp))
- __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
- else
- real_qp->device->destroy_qp(real_qp);
- } else {
- qp->event_handler = qp_init_attr->event_handler;
- qp->qp_context = qp_init_attr->qp_context;
- if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
- qp->recv_cq = NULL;
- qp->srq = NULL;
- } else {
- qp->recv_cq = qp_init_attr->recv_cq;
- atomic_inc(&qp_init_attr->recv_cq->usecnt);
- qp->srq = qp_init_attr->srq;
- if (qp->srq)
- atomic_inc(&qp_init_attr->srq->usecnt);
- }
+ qp->pd = pd;
+ qp->send_cq = qp_init_attr->send_cq;
+ qp->xrcd = NULL;
- qp->pd = pd;
- qp->send_cq = qp_init_attr->send_cq;
- qp->xrcd = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&qp_init_attr->send_cq->usecnt);
- atomic_inc(&pd->usecnt);
- atomic_inc(&qp_init_attr->send_cq->usecnt);
+ if (qp_init_attr->cap.max_rdma_ctxs) {
+ ret = rdma_rw_init_mrs(qp, qp_init_attr);
+ if (ret) {
+ pr_err("failed to init MR pool ret= %d\n", ret);
+ ib_destroy_qp(qp);
+ qp = ERR_PTR(ret);
}
}
struct ib_srq *srq;
int ret;
+ WARN_ON_ONCE(qp->mrs_used > 0);
+
if (atomic_read(&qp->usecnt))
return -EBUSY;
rcq = qp->recv_cq;
srq = qp->srq;
+ if (!qp->uobject)
+ rdma_rw_cleanup_mrs(qp);
+
ret = qp->device->destroy_qp(qp);
if (!ret) {
if (pd)
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
}
return mr;
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
}
return mr;
* @mr: memory region
* @sg: dma mapped scatterlist
* @sg_nents: number of entries in sg
+ * @sg_offset: offset in bytes into sg
* @page_size: page vector desired page size
*
* Constraints:
* After this completes successfully, the memory region
* is ready for registration.
*/
-int ib_map_mr_sg(struct ib_mr *mr,
- struct scatterlist *sg,
- int sg_nents,
- unsigned int page_size)
+int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset, unsigned int page_size)
{
if (unlikely(!mr->device->map_mr_sg))
return -ENOSYS;
mr->page_size = page_size;
- return mr->device->map_mr_sg(mr, sg, sg_nents);
+ return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset);
}
EXPORT_SYMBOL(ib_map_mr_sg);
* @mr: memory region
* @sgl: dma mapped scatterlist
* @sg_nents: number of entries in sg
+ * @sg_offset_p: IN: start offset in bytes into sg
+ * OUT: offset in bytes for element n of the sg of the first
+ * byte that has not been processed where n is the return
+ * value of this function.
* @set_page: driver page assignment function pointer
*
* Core service helper for drivers to convert the largest
* Returns the number of sg elements that were assigned to
* a page vector.
*/
-int ib_sg_to_pages(struct ib_mr *mr,
- struct scatterlist *sgl,
- int sg_nents,
- int (*set_page)(struct ib_mr *, u64))
+int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
+ unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64))
{
struct scatterlist *sg;
u64 last_end_dma_addr = 0;
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
unsigned int last_page_off = 0;
u64 page_mask = ~((u64)mr->page_size - 1);
int i, ret;
- mr->iova = sg_dma_address(&sgl[0]);
+ if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0])))
+ return -EINVAL;
+
+ mr->iova = sg_dma_address(&sgl[0]) + sg_offset;
mr->length = 0;
for_each_sg(sgl, sg, sg_nents, i) {
- u64 dma_addr = sg_dma_address(sg);
- unsigned int dma_len = sg_dma_len(sg);
+ u64 dma_addr = sg_dma_address(sg) + sg_offset;
+ u64 prev_addr = dma_addr;
+ unsigned int dma_len = sg_dma_len(sg) - sg_offset;
u64 end_dma_addr = dma_addr + dma_len;
u64 page_addr = dma_addr & page_mask;
do {
ret = set_page(mr, page_addr);
- if (unlikely(ret < 0))
- return i ? : ret;
+ if (unlikely(ret < 0)) {
+ sg_offset = prev_addr - sg_dma_address(sg);
+ mr->length += prev_addr - dma_addr;
+ if (sg_offset_p)
+ *sg_offset_p = sg_offset;
+ return i || sg_offset ? i : ret;
+ }
+ prev_addr = page_addr;
next_page:
page_addr += mr->page_size;
} while (page_addr < end_dma_addr);
mr->length += dma_len;
last_end_dma_addr = end_dma_addr;
last_page_off = end_dma_addr & ~page_mask;
+
+ sg_offset = 0;
}
+ if (sg_offset_p)
+ *sg_offset_p = 0;
return i;
}
EXPORT_SYMBOL(ib_sg_to_pages);
return 0;
}
-static int iwch_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
{
struct iwch_mr *mhp = to_iwch_mr(ibmr);
mhp->npages = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page);
}
static int iwch_destroy_qp(struct ib_qp *ib_qp)
static int mpa_rev = 2;
module_param(mpa_rev, int, 0644);
MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
- "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
+ "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft"
" compliant (default=2)");
static int markers_enabled;
static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
static void ep_timeout(unsigned long arg);
static void connect_reply_upcall(struct c4iw_ep *ep, int status);
+static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
static LIST_HEAD(timeout_list);
static spinlock_t timeout_lock;
+static void deref_cm_id(struct c4iw_ep_common *epc)
+{
+ epc->cm_id->rem_ref(epc->cm_id);
+ epc->cm_id = NULL;
+ set_bit(CM_ID_DEREFED, &epc->history);
+}
+
+static void ref_cm_id(struct c4iw_ep_common *epc)
+{
+ set_bit(CM_ID_REFED, &epc->history);
+ epc->cm_id->add_ref(epc->cm_id);
+}
+
static void deref_qp(struct c4iw_ep *ep)
{
c4iw_qp_rem_ref(&ep->com.qp->ibqp);
clear_bit(QP_REFERENCED, &ep->com.flags);
+ set_bit(QP_DEREFED, &ep->com.history);
}
static void ref_qp(struct c4iw_ep *ep)
{
set_bit(QP_REFERENCED, &ep->com.flags);
+ set_bit(QP_REFED, &ep->com.history);
c4iw_qp_add_ref(&ep->com.qp->ibqp);
}
error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
if (error < 0)
kfree_skb(skb);
+ else if (error == NET_XMIT_DROP)
+ return -ENOMEM;
return error < 0 ? error : 0;
}
return epc;
}
+static void remove_ep_tid(struct c4iw_ep *ep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ep->com.dev->lock, flags);
+ _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
+ spin_unlock_irqrestore(&ep->com.dev->lock, flags);
+}
+
+static void insert_ep_tid(struct c4iw_ep *ep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ep->com.dev->lock, flags);
+ _insert_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep, ep->hwtid, 0);
+ spin_unlock_irqrestore(&ep->com.dev->lock, flags);
+}
+
+/*
+ * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
+ */
+static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
+{
+ struct c4iw_ep *ep;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->lock, flags);
+ ep = idr_find(&dev->hwtid_idr, tid);
+ if (ep)
+ c4iw_get_ep(&ep->com);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ return ep;
+}
+
+/*
+ * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
+ */
+static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
+ unsigned int stid)
+{
+ struct c4iw_listen_ep *ep;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->lock, flags);
+ ep = idr_find(&dev->stid_idr, stid);
+ if (ep)
+ c4iw_get_ep(&ep->com);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ return ep;
+}
+
void _c4iw_free_ep(struct kref *kref)
{
struct c4iw_ep *ep;
ep = container_of(kref, struct c4iw_ep, com.kref);
- PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+ PDBG("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
if (test_bit(QP_REFERENCED, &ep->com.flags))
deref_qp(ep);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
(const u32 *)&sin6->sin6_addr.s6_addr,
1);
}
- remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
+ if (ep->mpa_skb)
+ kfree_skb(ep->mpa_skb);
}
kfree(ep);
}
static void release_ep_resources(struct c4iw_ep *ep)
{
set_bit(RELEASE_RESOURCES, &ep->com.flags);
+
+ /*
+ * If we have a hwtid, then remove it from the idr table
+ * so lookups will no longer find this endpoint. Otherwise
+ * we have a race where one thread finds the ep ptr just
+ * before the other thread is freeing the ep memory.
+ */
+ if (ep->hwtid != -1)
+ remove_ep_tid(ep);
c4iw_put_ep(&ep->com);
}
static void arp_failure_discard(void *handle, struct sk_buff *skb)
{
- PDBG("%s c4iw_dev %p\n", __func__, handle);
+ pr_err(MOD "ARP failure\n");
kfree_skb(skb);
}
+static void mpa_start_arp_failure(void *handle, struct sk_buff *skb)
+{
+ pr_err("ARP failure during MPA Negotiation - Closing Connection\n");
+}
+
+enum {
+ NUM_FAKE_CPLS = 2,
+ FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0,
+ FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1,
+};
+
+static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+
+ ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
+ release_ep_resources(ep);
+ return 0;
+}
+
+static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+
+ ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
+ c4iw_put_ep(&ep->parent_ep->com);
+ release_ep_resources(ep);
+ return 0;
+}
+
+/*
+ * Fake up a special CPL opcode and call sched() so process_work() will call
+ * _put_ep_safe() in a safe context to free the ep resources. This is needed
+ * because ARP error handlers are called in an ATOMIC context, and
+ * _c4iw_free_ep() needs to block.
+ */
+static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb,
+ int cpl)
+{
+ struct cpl_act_establish *rpl = cplhdr(skb);
+
+ /* Set our special ARP_FAILURE opcode */
+ rpl->ot.opcode = cpl;
+
+ /*
+ * Save ep in the skb->cb area, after where sched() will save the dev
+ * ptr.
+ */
+ *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
+ sched(ep->com.dev, skb);
+}
+
+/* Handle an ARP failure for an accept */
+static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep = handle;
+
+ pr_err(MOD "ARP failure during accept - tid %u -dropping connection\n",
+ ep->hwtid);
+
+ __state_set(&ep->com, DEAD);
+ queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE);
+}
+
/*
* Handle an ARP failure for an active open.
*/
struct c4iw_ep *ep = handle;
printk(KERN_ERR MOD "ARP failure during connect\n");
- kfree_skb(skb);
connect_reply_upcall(ep, -EHOSTUNREACH);
- state_set(&ep->com, DEAD);
+ __state_set(&ep->com, DEAD);
if (ep->com.remote_addr.ss_family == AF_INET6) {
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)&ep->com.local_addr;
}
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
- dst_release(ep->dst);
- cxgb4_l2t_release(ep->l2t);
- c4iw_put_ep(&ep->com);
+ queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
}
/*
*/
static void abort_arp_failure(void *handle, struct sk_buff *skb)
{
- struct c4iw_rdev *rdev = handle;
+ int ret;
+ struct c4iw_ep *ep = handle;
+ struct c4iw_rdev *rdev = &ep->com.dev->rdev;
struct cpl_abort_req *req = cplhdr(skb);
PDBG("%s rdev %p\n", __func__, rdev);
req->cmd = CPL_ABORT_NO_RST;
- c4iw_ofld_send(rdev, skb);
+ ret = c4iw_ofld_send(rdev, skb);
+ if (ret) {
+ __state_set(&ep->com, DEAD);
+ queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
+ }
}
-static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
+static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
{
unsigned int flowclen = 80;
struct fw_flowc_wr *flowc;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- c4iw_ofld_send(&ep->com.dev->rdev, skb);
+ return c4iw_ofld_send(&ep->com.dev->rdev, skb);
}
static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure);
+ t4_set_arp_err_handler(skb, ep, abort_arp_failure);
req = (struct cpl_abort_req *) skb_put(skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
return ret;
}
-static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
- u8 mpa_rev_to_use)
+static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
+ u8 mpa_rev_to_use)
{
- int mpalen, wrlen;
+ int mpalen, wrlen, ret;
struct fw_ofld_tx_data_wr *req;
struct mpa_message *mpa;
struct mpa_v2_conn_params mpa_v2_params;
skb = get_skb(skb, wrlen, GFP_KERNEL);
if (!skb) {
connect_reply_upcall(ep, -ENOMEM);
- return;
+ return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
- c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ if (ret)
+ return ret;
start_ep_timer(ep);
__state_set(&ep->com, MPA_REQ_SENT);
ep->mpa_attr.initiator = 1;
ep->snd_seq += mpalen;
- return;
+ return ret;
}
static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
*/
skb_get(skb);
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+ t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
ep->snd_seq += mpalen;
* Function fw4_ack() will deref it.
*/
skb_get(skb);
- t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+ t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
ep->mpa_skb = skb;
__state_set(&ep->com, MPA_REP_SENT);
ep->snd_seq += mpalen;
unsigned int tid = GET_TID(req);
unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
struct tid_info *t = dev->rdev.lldi.tids;
+ int ret;
ep = lookup_atid(t, atid);
/* setup the hwtid for this connection */
ep->hwtid = tid;
cxgb4_insert_tid(t, ep, tid);
- insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid);
+ insert_ep_tid(ep);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
set_bit(ACT_ESTAB, &ep->com.history);
/* start MPA negotiation */
- send_flowc(ep, NULL);
+ ret = send_flowc(ep, NULL);
+ if (ret)
+ goto err;
if (ep->retry_with_mpa_v1)
- send_mpa_req(ep, skb, 1);
+ ret = send_mpa_req(ep, skb, 1);
else
- send_mpa_req(ep, skb, mpa_rev);
+ ret = send_mpa_req(ep, skb, mpa_rev);
+ if (ret)
+ goto err;
mutex_unlock(&ep->com.mutex);
return 0;
+err:
+ mutex_unlock(&ep->com.mutex);
+ connect_reply_upcall(ep, -ENOMEM);
+ c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ return 0;
}
static void close_complete_upcall(struct c4iw_ep *ep, int status)
PDBG("close complete delivered ep %p cm_id %p tid %u\n",
ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
+ deref_cm_id(&ep->com);
set_bit(CLOSE_UPCALL, &ep->com.history);
}
}
-static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- __state_set(&ep->com, ABORTING);
- set_bit(ABORT_CONN, &ep->com.history);
- return send_abort(ep, skb, gfp);
-}
-
static void peer_close_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
+ deref_cm_id(&ep->com);
set_bit(ABORT_UPCALL, &ep->com.history);
}
}
set_bit(CONN_RPL_UPCALL, &ep->com.history);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- if (status < 0) {
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- }
+ if (status < 0)
+ deref_cm_id(&ep->com);
}
static int connect_request_upcall(struct c4iw_ep *ep)
#define RELAXED_IRD_NEGOTIATION 1
+/*
+ * process_mpa_reply - process streaming mode MPA reply
+ *
+ * Returns:
+ *
+ * 0 upon success indicating a connect request was delivered to the ULP
+ * or the mpa request is incomplete but valid so far.
+ *
+ * 1 if a failure requires the caller to close the connection.
+ *
+ * 2 if a failure requires the caller to abort the connection.
+ */
static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
{
struct mpa_message *mpa;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- /*
- * Stop mpa timer. If it expired, then
- * we ignore the MPA reply. process_timeout()
- * will abort the connection.
- */
- if (stop_ep_timer(ep))
- return 0;
-
/*
* If we get more than the supported amount of private data
* then we must fail this connection.
*/
if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
err = -EINVAL;
- goto err;
+ goto err_stop_timer;
}
/*
printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
" Received = %d\n", __func__, mpa_rev, mpa->revision);
err = -EPROTO;
- goto err;
+ goto err_stop_timer;
}
if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
err = -EPROTO;
- goto err;
+ goto err_stop_timer;
}
plen = ntohs(mpa->private_data_size);
*/
if (plen > MPA_MAX_PRIVATE_DATA) {
err = -EPROTO;
- goto err;
+ goto err_stop_timer;
}
/*
*/
if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
err = -EPROTO;
- goto err;
+ goto err_stop_timer;
}
ep->plen = (u8) plen;
if (mpa->flags & MPA_REJECT) {
err = -ECONNREFUSED;
- goto err;
+ goto err_stop_timer;
}
+ /*
+ * Stop mpa timer. If it expired, then
+ * we ignore the MPA reply. process_timeout()
+ * will abort the connection.
+ */
+ if (stop_ep_timer(ep))
+ return 0;
+
/*
* If we get here we have accumulated the entire mpa
* start reply message including private data. And
goto out;
}
goto out;
+err_stop_timer:
+ stop_ep_timer(ep);
err:
- __state_set(&ep->com, ABORTING);
- send_abort(ep, skb, GFP_KERNEL);
+ disconnect = 2;
out:
connect_reply_upcall(ep, err);
return disconnect;
}
-static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
+/*
+ * process_mpa_request - process streaming mode MPA request
+ *
+ * Returns:
+ *
+ * 0 upon success indicating a connect request was delivered to the ULP
+ * or the mpa request is incomplete but valid so far.
+ *
+ * 1 if a failure requires the caller to close the connection.
+ *
+ * 2 if a failure requires the caller to abort the connection.
+ */
+static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
{
struct mpa_message *mpa;
struct mpa_v2_conn_params *mpa_v2_params;
* If we get more than the supported amount of private data
* then we must fail this connection.
*/
- if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
+ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
+ goto err_stop_timer;
PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
* We'll continue process when more data arrives.
*/
if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
+ return 0;
PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
mpa = (struct mpa_message *) ep->mpa_pkt;
if (mpa->revision > mpa_rev) {
printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
" Received = %d\n", __func__, mpa_rev, mpa->revision);
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
+ goto err_stop_timer;
}
- if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
+ if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
+ goto err_stop_timer;
plen = ntohs(mpa->private_data_size);
/*
* Fail if there's too much private data.
*/
- if (plen > MPA_MAX_PRIVATE_DATA) {
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
+ if (plen > MPA_MAX_PRIVATE_DATA)
+ goto err_stop_timer;
/*
* If plen does not account for pkt size
*/
- if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
+ goto err_stop_timer;
ep->plen = (u8) plen;
/*
* If we don't have all the pdata yet, then bail.
*/
if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
+ return 0;
/*
* If we get here we have accumulated the entire mpa
ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
ep->mpa_attr.p2p_type);
- /*
- * If the endpoint timer already expired, then we ignore
- * the start request. process_timeout() will abort
- * the connection.
- */
- if (!stop_ep_timer(ep)) {
- __state_set(&ep->com, MPA_REQ_RCVD);
-
- /* drive upcall */
- mutex_lock_nested(&ep->parent_ep->com.mutex,
- SINGLE_DEPTH_NESTING);
- if (ep->parent_ep->com.state != DEAD) {
- if (connect_request_upcall(ep))
- abort_connection(ep, skb, GFP_KERNEL);
- } else {
- abort_connection(ep, skb, GFP_KERNEL);
- }
- mutex_unlock(&ep->parent_ep->com.mutex);
+ __state_set(&ep->com, MPA_REQ_RCVD);
+
+ /* drive upcall */
+ mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
+ if (ep->parent_ep->com.state != DEAD) {
+ if (connect_request_upcall(ep))
+ goto err_unlock_parent;
+ } else {
+ goto err_unlock_parent;
}
- return;
+ mutex_unlock(&ep->parent_ep->com.mutex);
+ return 0;
+
+err_unlock_parent:
+ mutex_unlock(&ep->parent_ep->com.mutex);
+ goto err_out;
+err_stop_timer:
+ (void)stop_ep_timer(ep);
+err_out:
+ return 2;
}
static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_rx_data *hdr = cplhdr(skb);
unsigned int dlen = ntohs(hdr->len);
unsigned int tid = GET_TID(hdr);
- struct tid_info *t = dev->rdev.lldi.tids;
__u8 status = hdr->status;
int disconnect = 0;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
break;
case MPA_REQ_WAIT:
ep->rcv_seq += dlen;
- process_mpa_request(ep, skb);
+ disconnect = process_mpa_request(ep, skb);
break;
case FPDU_MODE: {
struct c4iw_qp_attributes attrs;
}
mutex_unlock(&ep->com.mutex);
if (disconnect)
- c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
+ c4iw_put_ep(&ep->com);
return 0;
}
struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
int release = 0;
unsigned int tid = GET_TID(rpl);
- struct tid_info *t = dev->rdev.lldi.tids;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
if (!ep) {
printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
return 0;
if (release)
release_ep_resources(ep);
+ c4iw_put_ep(&ep->com);
return 0;
}
-static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
+static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
{
struct sk_buff *skb;
struct fw_ofld_connection_wr *req;
req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
set_bit(ACT_OFLD_CONN, &ep->com.history);
- c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
/*
PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
init_timer(&ep->timer);
+ c4iw_init_wr_wait(&ep->com.wr_wait);
/*
* Allocate an active TID to initiate a TCP connection.
struct sockaddr_in *ra;
struct sockaddr_in6 *la6;
struct sockaddr_in6 *ra6;
+ int ret = 0;
ep = lookup_atid(t, atid);
la = (struct sockaddr_in *)&ep->com.local_addr;
mutex_unlock(&dev->rdev.stats.lock);
if (ep->com.local_addr.ss_family == AF_INET &&
dev->rdev.lldi.enable_fw_ofld_conn) {
- send_fw_act_open_req(ep,
- TID_TID_G(AOPEN_ATID_G(
- ntohl(rpl->atid_status))));
+ ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G(
+ ntohl(rpl->atid_status))));
+ if (ret)
+ goto fail;
return 0;
}
break;
break;
}
+fail:
connect_reply_upcall(ep, status2errno(status));
state_set(&ep->com, DEAD);
static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_pass_open_rpl *rpl = cplhdr(skb);
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int stid = GET_TID(rpl);
- struct c4iw_listen_ep *ep = lookup_stid(t, stid);
+ struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
if (!ep) {
PDBG("%s stid %d lookup failure!\n", __func__, stid);
PDBG("%s ep %p status %d error %d\n", __func__, ep,
rpl->status, status2errno(rpl->status));
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
-
+ c4iw_put_ep(&ep->com);
out:
return 0;
}
static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int stid = GET_TID(rpl);
- struct c4iw_listen_ep *ep = lookup_stid(t, stid);
+ struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
PDBG("%s ep %p\n", __func__, ep);
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
+ c4iw_put_ep(&ep->com);
return 0;
}
-static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
- struct cpl_pass_accept_req *req)
+static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
+ struct cpl_pass_accept_req *req)
{
struct cpl_pass_accept_rpl *rpl;
unsigned int mtu_idx;
rpl->opt0 = cpu_to_be64(opt0);
rpl->opt2 = cpu_to_be32(opt2);
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
- t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
- c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure);
- return;
+ return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
unsigned short hdrs;
u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
- parent_ep = lookup_stid(t, stid);
+ parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!parent_ep) {
PDBG("%s connect request on invalid stid %d\n", __func__, stid);
goto reject;
init_timer(&child_ep->timer);
cxgb4_insert_tid(t, child_ep, hwtid);
- insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid);
- accept_cr(child_ep, skb, req);
- set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
+ insert_ep_tid(child_ep);
+ if (accept_cr(child_ep, skb, req)) {
+ c4iw_put_ep(&parent_ep->com);
+ release_ep_resources(child_ep);
+ } else {
+ set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
+ }
if (iptype == 6) {
sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
goto out;
reject:
reject_cr(dev, hwtid, skb);
+ if (parent_ep)
+ c4iw_put_ep(&parent_ep->com);
out:
return 0;
}
{
struct c4iw_ep *ep;
struct cpl_pass_establish *req = cplhdr(skb);
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(req);
+ int ret;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
set_emss(ep, ntohs(req->tcp_opt));
dst_confirm(ep->dst);
- state_set(&ep->com, MPA_REQ_WAIT);
+ mutex_lock(&ep->com.mutex);
+ ep->com.state = MPA_REQ_WAIT;
start_ep_timer(ep);
- send_flowc(ep, skb);
set_bit(PASS_ESTAB, &ep->com.history);
+ ret = send_flowc(ep, skb);
+ mutex_unlock(&ep->com.mutex);
+ if (ret)
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
+ c4iw_put_ep(&ep->com);
return 0;
}
struct c4iw_qp_attributes attrs;
int disconnect = 1;
int release = 0;
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(hdr);
int ret;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
+
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
dst_confirm(ep->dst);
c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
if (release)
release_ep_resources(ep);
+ c4iw_put_ep(&ep->com);
return 0;
}
struct c4iw_qp_attributes attrs;
int ret;
int release = 0;
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(req);
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
+
if (is_neg_adv(req->status)) {
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
mutex_unlock(&dev->rdev.stats.lock);
- return 0;
+ goto deref_ep;
}
PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
ep->com.state);
mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
case CONNECTING:
+ c4iw_put_ep(&ep->parent_ep->com);
break;
case MPA_REQ_WAIT:
(void)stop_ep_timer(ep);
case DEAD:
PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
mutex_unlock(&ep->com.mutex);
- return 0;
+ goto deref_ep;
default:
BUG_ON(1);
break;
c4iw_reconnect(ep);
}
+deref_ep:
+ c4iw_put_ep(&ep->com);
+ /* Dereferencing ep, referenced in peer_abort_intr() */
+ c4iw_put_ep(&ep->com);
return 0;
}
struct c4iw_qp_attributes attrs;
struct cpl_close_con_rpl *rpl = cplhdr(skb);
int release = 0;
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(rpl);
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(!ep);
/* The cm_id may be null if we failed to connect */
mutex_lock(&ep->com.mutex);
+ set_bit(CLOSE_CON_RPL, &ep->com.history);
switch (ep->com.state) {
case CLOSING:
__state_set(&ep->com, MORIBUND);
mutex_unlock(&ep->com.mutex);
if (release)
release_ep_resources(ep);
+ c4iw_put_ep(&ep->com);
return 0;
}
static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_rdma_terminate *rpl = cplhdr(skb);
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(rpl);
struct c4iw_ep *ep;
struct c4iw_qp_attributes attrs;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
BUG_ON(!ep);
if (ep && ep->com.qp) {
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
} else
printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
+ c4iw_put_ep(&ep->com);
return 0;
}
struct cpl_fw4_ack *hdr = cplhdr(skb);
u8 credits = hdr->credits;
unsigned int tid = GET_TID(hdr);
- struct tid_info *t = dev->rdev.lldi.tids;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
if (credits == 0) {
PDBG("%s 0 credit ack ep %p tid %u state %u\n",
__func__, ep, ep->hwtid, state_read(&ep->com));
- return 0;
+ goto out;
}
dst_confirm(ep->dst);
state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
+ mutex_lock(&ep->com.mutex);
+ if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
+ stop_ep_timer(ep);
+ mutex_unlock(&ep->com.mutex);
}
+out:
+ c4iw_put_ep(&ep->com);
return 0;
}
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
- if (ep->com.state == DEAD) {
+ if (ep->com.state != MPA_REQ_RCVD) {
mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return -ECONNRESET;
}
set_bit(ULP_REJECT, &ep->com.history);
- BUG_ON(ep->com.state != MPA_REQ_RCVD);
if (mpa_rev == 0)
- abort_connection(ep, NULL, GFP_KERNEL);
+ disconnect = 2;
else {
err = send_mpa_reject(ep, pdata, pdata_len);
disconnect = 1;
}
mutex_unlock(&ep->com.mutex);
- if (disconnect)
- err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ if (disconnect) {
+ stop_ep_timer(ep);
+ err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
+ }
c4iw_put_ep(&ep->com);
return 0;
}
struct c4iw_ep *ep = to_ep(cm_id);
struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
+ int abort = 0;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
- if (ep->com.state == DEAD) {
+ if (ep->com.state != MPA_REQ_RCVD) {
err = -ECONNRESET;
- goto err;
+ goto err_out;
}
- BUG_ON(ep->com.state != MPA_REQ_RCVD);
BUG_ON(!qp);
set_bit(ULP_ACCEPT, &ep->com.history);
if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
(conn_param->ird > cur_max_read_depth(ep->com.dev))) {
- abort_connection(ep, NULL, GFP_KERNEL);
err = -EINVAL;
- goto err;
+ goto err_abort;
}
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
ep->ord = conn_param->ord;
send_mpa_reject(ep, conn_param->private_data,
conn_param->private_data_len);
- abort_connection(ep, NULL, GFP_KERNEL);
err = -ENOMEM;
- goto err;
+ goto err_abort;
}
}
if (conn_param->ird < ep->ord) {
ep->ord <= h->rdev.lldi.max_ordird_qp) {
conn_param->ird = ep->ord;
} else {
- abort_connection(ep, NULL, GFP_KERNEL);
err = -ENOMEM;
- goto err;
+ goto err_abort;
}
}
}
PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
- cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
+ ref_cm_id(&ep->com);
ep->com.qp = qp;
ref_qp(ep);
err = c4iw_modify_qp(ep->com.qp->rhp,
ep->com.qp, mask, &attrs, 1);
if (err)
- goto err1;
+ goto err_deref_cm_id;
+
+ set_bit(STOP_MPA_TIMER, &ep->com.flags);
err = send_mpa_reply(ep, conn_param->private_data,
conn_param->private_data_len);
if (err)
- goto err1;
+ goto err_deref_cm_id;
__state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return 0;
-err1:
- ep->com.cm_id = NULL;
- abort_connection(ep, NULL, GFP_KERNEL);
- cm_id->rem_ref(cm_id);
-err:
+err_deref_cm_id:
+ deref_cm_id(&ep->com);
+err_abort:
+ abort = 1;
+err_out:
mutex_unlock(&ep->com.mutex);
+ if (abort)
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
return err;
}
if (peer2peer && ep->ord == 0)
ep->ord = 1;
- cm_id->add_ref(cm_id);
- ep->com.dev = dev;
ep->com.cm_id = cm_id;
+ ref_cm_id(&ep->com);
+ ep->com.dev = dev;
ep->com.qp = get_qhp(dev, conn_param->qpn);
if (!ep->com.qp) {
PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
/*
* Handle loopback requests to INADDR_ANY.
*/
- if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
+ if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
goto fail1;
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
fail1:
- cm_id->rem_ref(cm_id);
+ deref_cm_id(&ep->com);
c4iw_put_ep(&ep->com);
out:
return err;
goto fail1;
}
PDBG("%s ep %p\n", __func__, ep);
- cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
+ ref_cm_id(&ep->com);
ep->com.dev = dev;
ep->backlog = backlog;
memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
ep->com.local_addr.ss_family);
fail2:
- cm_id->rem_ref(cm_id);
+ deref_cm_id(&ep->com);
c4iw_put_ep(&ep->com);
fail1:
out:
cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
ep->com.local_addr.ss_family);
done:
- cm_id->rem_ref(cm_id);
+ deref_cm_id(&ep->com);
c4iw_put_ep(&ep->com);
return err;
}
PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
states[ep->com.state], abrupt);
+ /*
+ * Ref the ep here in case we have fatal errors causing the
+ * ep to be released and freed.
+ */
+ c4iw_get_ep(&ep->com);
+
rdev = &ep->com.dev->rdev;
if (c4iw_fatal_error(rdev)) {
fatal = 1;
set_bit(EP_DISC_CLOSE, &ep->com.history);
ret = send_halfclose(ep, gfp);
}
- if (ret)
+ if (ret) {
+ set_bit(EP_DISC_FAIL, &ep->com.history);
+ if (!abrupt) {
+ stop_ep_timer(ep);
+ close_complete_upcall(ep, -EIO);
+ }
+ if (ep->com.qp) {
+ struct c4iw_qp_attributes attrs;
+
+ attrs.next_state = C4IW_QP_STATE_ERROR;
+ ret = c4iw_modify_qp(ep->com.qp->rhp,
+ ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (ret)
+ pr_err(MOD
+ "%s - qp <- error failed!\n",
+ __func__);
+ }
fatal = 1;
+ }
}
mutex_unlock(&ep->com.mutex);
+ c4iw_put_ep(&ep->com);
if (fatal)
release_ep_resources(ep);
return ret;
struct cpl_pass_accept_req *req = (void *)(rss + 1);
struct l2t_entry *e;
struct dst_entry *dst;
- struct c4iw_ep *lep;
+ struct c4iw_ep *lep = NULL;
u16 window;
struct port_info *pi;
struct net_device *pdev;
*/
stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
- lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid);
+ lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!lep) {
PDBG("%s connect request on invalid stid %d\n", __func__, stid);
goto reject;
free_dst:
dst_release(dst);
reject:
+ if (lep)
+ c4iw_put_ep(&lep->com);
return 0;
}
* These are the real handlers that are called from a
* work queue.
*/
-static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
+static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
[CPL_ACT_ESTABLISH] = act_establish,
[CPL_ACT_OPEN_RPL] = act_open_rpl,
[CPL_RX_DATA] = rx_data,
[CPL_RDMA_TERMINATE] = terminate,
[CPL_FW4_ACK] = fw4_ack,
[CPL_FW6_MSG] = deferred_fw6_msg,
- [CPL_RX_PKT] = rx_pkt
+ [CPL_RX_PKT] = rx_pkt,
+ [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
+ [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
};
static void process_timeout(struct c4iw_ep *ep)
set_bit(TIMEDOUT, &ep->com.history);
switch (ep->com.state) {
case MPA_REQ_SENT:
- __state_set(&ep->com, ABORTING);
connect_reply_upcall(ep, -ETIMEDOUT);
break;
case MPA_REQ_WAIT:
- __state_set(&ep->com, ABORTING);
+ case MPA_REQ_RCVD:
+ case MPA_REP_SENT:
+ case FPDU_MODE:
break;
case CLOSING:
case MORIBUND:
ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
- __state_set(&ep->com, ABORTING);
close_complete_upcall(ep, -ETIMEDOUT);
break;
case ABORTING:
__func__, ep, ep->hwtid, ep->com.state);
abort = 0;
}
- if (abort)
- abort_connection(ep, NULL, GFP_KERNEL);
mutex_unlock(&ep->com.mutex);
+ if (abort)
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
}
{
struct cpl_abort_req_rss *req = cplhdr(skb);
struct c4iw_ep *ep;
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(req);
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ /* This EP will be dereferenced in peer_abort() */
if (!ep) {
printk(KERN_WARNING MOD
"Abort on non-existent endpoint, tid %d\n", tid);
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
neg_adv_str(req->status));
- ep->stats.abort_neg_adv++;
- dev->rdev.stats.neg_adv++;
- kfree_skb(skb);
- return 0;
+ goto out;
}
PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
ep->com.state);
- /*
- * Wake up any threads in rdma_init() or rdma_fini().
- * However, if we are on MPAv2 and want to retry with MPAv1
- * then, don't wake up yet.
- */
- if (mpa_rev == 2 && !ep->tried_with_mpa_v1) {
- if (ep->com.state != MPA_REQ_SENT)
- c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
- } else
- c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+out:
sched(dev, skb);
return 0;
}
CLOSE_SENT = 3,
TIMEOUT = 4,
QP_REFERENCED = 5,
+ STOP_MPA_TIMER = 7,
};
enum c4iw_ep_history {
EP_DISC_ABORT = 18,
CONN_RPL_UPCALL = 19,
ACT_RETRY_NOMEM = 20,
- ACT_RETRY_INUSE = 21
+ ACT_RETRY_INUSE = 21,
+ CLOSE_CON_RPL = 22,
+ EP_DISC_FAIL = 24,
+ QP_REFED = 25,
+ QP_DEREFED = 26,
+ CM_ID_REFED = 27,
+ CM_ID_DEREFED = 28,
};
struct c4iw_ep_common {
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-int c4iw_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents);
+int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
int c4iw_dealloc_mw(struct ib_mw *mw);
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
(wait ? FW_WR_COMPL_F : 0));
req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16)));
- req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE));
- req->cmd |= cpu_to_be32(T5_ULP_MEMIO_ORDER_V(1));
+ req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
+ T5_ULP_MEMIO_ORDER_V(1) |
+ T5_ULP_MEMIO_FID_V(rdev->lldi.rxq_ids[0]));
req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(len>>5));
req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16));
req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr));
return 0;
}
-int c4iw_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
{
struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
mhp->mpl_len = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page);
}
int c4iw_dereg_mr(struct ib_mr *ib_mr)
#include <rdma/ib_pack.h>
#include <rdma/rdma_cm.h>
#include <rdma/iw_cm.h>
-#include <rdma/iw_portmap.h>
-#include <rdma/rdma_netlink.h>
#include <crypto/hash.h>
#include "i40iw_status.h"
u32 arp_table_size;
u32 next_arp_index;
spinlock_t resource_lock; /* hw resource access */
+ spinlock_t qptable_lock;
u32 vendor_id;
u32 vendor_part_id;
u32 of_device_registered;
void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
unsigned char *mac_addr,
- __be32 *ip_addr,
+ u32 *ip_addr,
bool ipv4,
u32 action);
struct i40iw_qp_flush_info *info,
bool wait);
-void i40iw_copy_ip_ntohl(u32 *dst, u32 *src);
+void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src);
struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd,
u64 addr,
u64 size,
{
struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+ u16 ctrl_ird, ctrl_ord;
/* initialize the upper 5 bytes of the frame */
i40iw_build_mpa_v1(cm_node, start_addr, mpa_key);
/* initialize RTR msg */
if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
- rtr_msg->ctrl_ird = IETF_NO_IRD_ORD;
- rtr_msg->ctrl_ord = IETF_NO_IRD_ORD;
+ ctrl_ird = IETF_NO_IRD_ORD;
+ ctrl_ord = IETF_NO_IRD_ORD;
} else {
- rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+ ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
IETF_NO_IRD_ORD : cm_node->ird_size;
- rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+ ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
IETF_NO_IRD_ORD : cm_node->ord_size;
}
- rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER;
- rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN;
+ ctrl_ird |= IETF_PEER_TO_PEER;
+ ctrl_ird |= IETF_FLPDU_ZERO_LEN;
switch (mpa_key) {
case MPA_KEY_REQUEST:
- rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
- rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ ctrl_ord |= IETF_RDMA0_READ;
break;
case MPA_KEY_REPLY:
switch (cm_node->send_rdma0_op) {
case SEND_RDMA_WRITE_ZERO:
- rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+ ctrl_ord |= IETF_RDMA0_WRITE;
break;
case SEND_RDMA_READ_ZERO:
- rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+ ctrl_ord |= IETF_RDMA0_READ;
break;
}
break;
default:
break;
}
- rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird);
- rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord);
+ rtr_msg->ctrl_ird = htons(ctrl_ird);
+ rtr_msg->ctrl_ord = htons(ctrl_ord);
}
/**
struct in6_addr raddr6;
i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
- return (!memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6));
+ return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6);
}
/**
cm_node->tcp_cntxt.rcv_wnd =
I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
ts = current_kernel_time();
- cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+ cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
cm_node->tcp_cntxt.mss = iwdev->mss;
cm_node->iwdev = iwdev;
if (cm_node->listener) {
i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
} else {
- if (!i40iw_listen_port_in_use(cm_core, htons(cm_node->loc_port)) &&
+ if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
cm_node->apbvt_set && cm_node->iwdev) {
i40iw_manage_apbvt(cm_node->iwdev,
cm_node->loc_port,
void *private_data,
struct i40iw_cm_info *cm_info)
{
- int ret;
struct i40iw_cm_node *cm_node;
struct i40iw_cm_listener *loopback_remotelistener;
struct i40iw_cm_node *loopback_remotenode;
memcpy(cm_node->pdata_buf, private_data, private_data_len);
cm_node->state = I40IW_CM_STATE_SYN_SENT;
- ret = i40iw_send_syn(cm_node, 0);
-
- if (ret) {
- if (cm_node->ipv4)
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI4",
- cm_node->rem_addr);
- else
- i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI6",
- cm_node->rem_addr);
- i40iw_rem_ref_cm_node(cm_node);
- cm_node = NULL;
- }
-
- if (cm_node)
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
- cm_node->rem_port,
- cm_node,
- cm_node->cm_id);
-
return cm_node;
}
tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]);
tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]);
- tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(iwqp->iwdev,
- &tcp_info->dest_ip_addr3,
- true,
- NULL,
- I40IW_ARP_RESOLVE));
+ tcp_info->arp_idx =
+ cpu_to_le16((u16)i40iw_arp_table(
+ iwqp->iwdev,
+ &tcp_info->dest_ip_addr3,
+ true,
+ NULL,
+ I40IW_ARP_RESOLVE));
} else {
tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]);
tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]);
tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]);
- tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(
- iwqp->iwdev,
- &tcp_info->dest_ip_addr0,
- false,
- NULL,
- I40IW_ARP_RESOLVE));
+ tcp_info->arp_idx =
+ cpu_to_le16((u16)i40iw_arp_table(
+ iwqp->iwdev,
+ &tcp_info->dest_ip_addr0,
+ false,
+ NULL,
+ I40IW_ARP_RESOLVE));
}
}
struct i40iw_cm_node *cm_node;
struct ib_qp_attr attr;
int passive_state;
- struct i40iw_ib_device *iwibdev;
struct ib_mr *ibmr;
struct i40iw_pd *iwpd;
u16 buf_len = 0;
!i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
(!cm_node->ipv4 &&
!i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) {
- iwibdev = iwdev->iwibdev;
iwpd = iwqp->iwpd;
tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
ibmr = i40iw_reg_phys_mr(&iwpd->ibpd,
struct sockaddr_in *raddr;
struct sockaddr_in6 *laddr6;
struct sockaddr_in6 *raddr6;
+ bool qhash_set = false;
int apbvt_set = 0;
enum i40iw_status_code status;
true);
if (status)
return -EINVAL;
+ qhash_set = true;
}
status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
if (status) {
conn_param->private_data_len,
(void *)conn_param->private_data,
&cm_info);
- if (!cm_node) {
- i40iw_manage_qhash(iwdev,
- &cm_info,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
-
- if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
- cm_info.loc_port))
- i40iw_manage_apbvt(iwdev,
- cm_info.loc_port,
- I40IW_MANAGE_APBVT_DEL);
- cm_id->rem_ref(cm_id);
- iwdev->cm_core.stats_connect_errs++;
- return -ENOMEM;
- }
+ if (!cm_node)
+ goto err;
i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
cm_node->ord_size = 1;
cm_node->apbvt_set = apbvt_set;
- cm_node->qhash_set = true;
+ cm_node->qhash_set = qhash_set;
iwqp->cm_node = cm_node;
cm_node->iwqp = iwqp;
iwqp->cm_id = cm_id;
i40iw_add_ref(&iwqp->ibqp);
+
+ if (cm_node->state == I40IW_CM_STATE_SYN_SENT) {
+ if (i40iw_send_syn(cm_node, 0)) {
+ i40iw_rem_ref_cm_node(cm_node);
+ goto err;
+ }
+ }
+
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_port,
+ cm_node,
+ cm_node->cm_id);
return 0;
+
+err:
+ if (cm_node) {
+ if (cm_node->ipv4)
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI4",
+ cm_node->rem_addr);
+ else
+ i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI6",
+ cm_node->rem_addr);
+ }
+ i40iw_manage_qhash(iwdev,
+ &cm_info,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+
+ if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
+ cm_info.loc_port))
+ i40iw_manage_apbvt(iwdev,
+ cm_info.loc_port,
+ I40IW_MANAGE_APBVT_DEL);
+ cm_id->rem_ref(cm_id);
+ iwdev->cm_core.stats_connect_errs++;
+ return -ENOMEM;
}
/**
/*******************************************************************************
*
-* Copyright (c) 2015 Intel Corporation. All rights reserved.
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
u8 loc_mac[ETH_ALEN];
u32 loc_addr[4];
u16 loc_port;
- u32 map_loc_addr[4];
- u16 map_loc_port;
struct iw_cm_id *cm_id;
atomic_t ref_count;
struct i40iw_device *iwdev;
struct i40iw_cm_node {
u32 loc_addr[4], rem_addr[4];
u16 loc_port, rem_port;
- u32 map_loc_addr[4], map_rem_addr[4];
- u16 map_loc_port, map_rem_port;
u16 vlan_id;
enum i40iw_cm_node_state state;
u8 loc_mac[ETH_ALEN];
u16 rem_port;
u32 loc_addr[4];
u32 rem_addr[4];
- u16 map_loc_port;
- u16 map_rem_port;
- u32 map_loc_addr[4];
- u32 map_rem_addr[4];
u16 vlan_id;
int backlog;
u16 user_pri;
* i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer
* @buf: ptr to fpm commit buffer
* @info: ptr to i40iw_hmc_obj_info struct
+ * @sd: number of SDs for HMC objects
*
* parses fpm commit info and copy base value
* of hmc objects in hmc_info
*/
static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
u64 *buf,
- struct i40iw_hmc_obj_info *info)
+ struct i40iw_hmc_obj_info *info,
+ u32 *sd)
{
u64 temp;
+ u64 size;
+ u64 base = 0;
u32 i, j;
+ u32 k = 0;
u32 low;
/* copy base values in obj_info */
i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
get_64bit_val(buf, j, &temp);
info[i].base = RS_64_1(temp, 32) * 512;
+ if (info[i].base > base) {
+ base = info[i].base;
+ k = i;
+ }
low = (u32)(temp);
if (low)
info[i].cnt = low;
}
+ size = info[k].cnt * info[k].size + info[k].base;
+ if (size & 0x1FFFFF)
+ *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */
+ else
+ *sd = (u32)(size >> 21);
+
return 0;
}
return 0;
}
+/**
+ * i40iw_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp
+ * @qp: sc qp struct
+ * @info: fast mr info
+ * @post_sq: flag for cqp db to ring
+ */
+enum i40iw_status_code i40iw_sc_mr_fast_register(
+ struct i40iw_sc_qp *qp,
+ struct i40iw_fast_reg_stag_info *info,
+ bool post_sq)
+{
+ u64 temp, header;
+ u64 *wqe;
+ u32 wqe_idx;
+
+ wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE,
+ 0, info->wr_id);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ i40iw_debug(qp->dev, I40IW_DEBUG_MR, "%s: wr_id[%llxh] wqe_idx[%04d] location[%p]\n",
+ __func__, info->wr_id, wqe_idx,
+ &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid);
+ temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
+ set_64bit_val(wqe, 0, temp);
+
+ temp = RS_64(info->first_pm_pbl_index >> 16, I40IWQPSQ_FIRSTPMPBLIDXHI);
+ set_64bit_val(wqe,
+ 8,
+ LS_64(temp, I40IWQPSQ_FIRSTPMPBLIDXHI) |
+ LS_64(info->reg_addr_pa >> I40IWQPSQ_PBLADDR_SHIFT, I40IWQPSQ_PBLADDR));
+
+ set_64bit_val(wqe,
+ 16,
+ info->total_len |
+ LS_64(info->first_pm_pbl_index, I40IWQPSQ_FIRSTPMPBLIDXLO));
+
+ header = LS_64(info->stag_key, I40IWQPSQ_STAGKEY) |
+ LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) |
+ LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) |
+ LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) |
+ LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) |
+ LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) |
+ LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) |
+ LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+ LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "FAST_REG WQE",
+ wqe, I40IW_QP_WQE_MIN_SIZE);
+
+ if (post_sq)
+ i40iw_qp_post_wr(&qp->qp_uk);
+ return 0;
+}
+
/**
* i40iw_sc_send_lsmm - send last streaming mode message
* @qp: sc qp struct
i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
/* parse the fpm_commit_buf and fill hmc obj info */
- i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj);
+ i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj, &hmc_info->sd_table.sd_cnt);
mem_size = sizeof(struct i40iw_hmc_sd_entry) *
(hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index);
ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
/* parse the fpm_commit_buf and fill hmc obj info */
if (!ret_code)
- ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, hmc_info->hmc_obj);
+ ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf,
+ hmc_info->hmc_obj,
+ &hmc_info->sd_table.sd_cnt);
i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER",
commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE);
return I40IW_RING_FULL_ERR(cqp->sq_ring);
}
+/**
+ * i40iw_est_sd - returns approximate number of SDs for HMC
+ * @dev: sc device struct
+ * @hmc_info: hmc structure, size and count for HMC objects
+ */
+static u64 i40iw_est_sd(struct i40iw_sc_dev *dev, struct i40iw_hmc_info *hmc_info)
+{
+ int i;
+ u64 size = 0;
+ u64 sd;
+
+ for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_PBLE; i++)
+ size += hmc_info->hmc_obj[i].cnt * hmc_info->hmc_obj[i].size;
+
+ if (dev->is_pf)
+ size += hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
+
+ if (size & 0x1FFFFF)
+ sd = (size >> 21) + 1; /* add 1 for remainder */
+ else
+ sd = size >> 21;
+
+ if (!dev->is_pf) {
+ /* 2MB alignment for VF PBLE HMC */
+ size = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
+ if (size & 0x1FFFFF)
+ sd += (size >> 21) + 1; /* add 1 for remainder */
+ else
+ sd += size >> 21;
+ }
+
+ return sd;
+}
+
/**
* i40iw_config_fpm_values - configure HMC objects
* @dev: sc device struct
u32 i, mem_size;
u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
u32 powerof2;
- u64 sd_needed, bytes_needed;
+ u64 sd_needed;
u32 loop_count = 0;
struct i40iw_hmc_info *hmc_info;
return ret_code;
}
- bytes_needed = 0;
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
+ for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
- bytes_needed +=
- (hmc_info->hmc_obj[i].max_cnt) * (hmc_info->hmc_obj[i].size);
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s i[%04d] max_cnt[0x%04X] size[0x%04llx]\n",
- __func__, i, hmc_info->hmc_obj[i].max_cnt,
- hmc_info->hmc_obj[i].size);
- }
- sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up */
+ sd_needed = i40iw_est_sd(dev, hmc_info);
i40iw_debug(dev, I40IW_DEBUG_HMC,
"%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n",
__func__, sd_needed, hmc_info->first_sd_index);
i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: bytes_needed=0x%llx sd count %d where max sd is %d\n",
- __func__, bytes_needed, hmc_info->sd_table.sd_cnt,
+ "%s: sd count %d where max sd is %d\n",
+ __func__, hmc_info->sd_table.sd_cnt,
hmc_fpm_misc->max_sds);
qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt);
hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted;
/* How much memory is needed for all the objects. */
- bytes_needed = 0;
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
- bytes_needed +=
- (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
- sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1;
+ sd_needed = i40iw_est_sd(dev, hmc_info);
if ((loop_count > 1000) ||
((!(loop_count % 10)) &&
(qpwanted > qpwantedoriginal * 2 / 3))) {
pblewanted -= FPM_MULTIPLIER * 1000;
} while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
- bytes_needed = 0;
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
- bytes_needed += (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s i[%04d] cnt[0x%04x] size[0x%04llx]\n",
- __func__, i, hmc_info->hmc_obj[i].cnt,
- hmc_info->hmc_obj[i].size);
- }
- sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up not truncate. */
+ sd_needed = i40iw_est_sd(dev, hmc_info);
i40iw_debug(dev, I40IW_DEBUG_HMC,
"loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
return ret_code;
}
- hmc_info->sd_table.sd_cnt = (u32)sd_needed;
-
mem_size = sizeof(struct i40iw_hmc_sd_entry) *
(hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1);
ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
*/
static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt)
{
- u16 *mpa;
+ __be16 *mpa;
u32 opcode = 0xffffffff;
if (info->q2_data_written) {
- mpa = (u16 *)pkt;
+ mpa = (__be16 *)pkt;
opcode = ntohs(mpa[1]) & 0xf;
}
return opcode;
if (info->q2_data_written) {
/* Use data from offending packet to fill in ddp & rdma hdrs */
pkt = i40iw_locate_mpa(pkt);
- ddp_seg_len = ntohs(*(u16 *)pkt);
+ ddp_seg_len = ntohs(*(__be16 *)pkt);
if (ddp_seg_len) {
copy_len = 2;
termhdr->hdrct = DDP_LEN_FLAG;
void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
{
u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
- u32 *mpa;
+ __be32 *mpa;
u8 ddp_ctl;
u8 rdma_ctl;
u16 aeq_id = 0;
struct i40iw_terminate_hdr *termhdr;
- mpa = (u32 *)i40iw_locate_mpa(pkt);
+ mpa = (__be32 *)i40iw_locate_mpa(pkt);
if (info->q2_data_written) {
/* did not validate the frame - do it now */
ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
};
static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
- i40iw_sc_qp_init,
- i40iw_sc_qp_create,
- i40iw_sc_qp_modify,
- i40iw_sc_qp_destroy,
- i40iw_sc_qp_flush_wqes,
- i40iw_sc_qp_upload_context,
- i40iw_sc_qp_setctx,
- i40iw_sc_send_lsmm,
- i40iw_sc_send_lsmm_nostag,
- i40iw_sc_send_rtt,
- i40iw_sc_post_wqe0,
+ .qp_init = i40iw_sc_qp_init,
+ .qp_create = i40iw_sc_qp_create,
+ .qp_modify = i40iw_sc_qp_modify,
+ .qp_destroy = i40iw_sc_qp_destroy,
+ .qp_flush_wqes = i40iw_sc_qp_flush_wqes,
+ .qp_upload_context = i40iw_sc_qp_upload_context,
+ .qp_setctx = i40iw_sc_qp_setctx,
+ .qp_send_lsmm = i40iw_sc_send_lsmm,
+ .qp_send_lsmm_nostag = i40iw_sc_send_lsmm_nostag,
+ .qp_send_rtt = i40iw_sc_send_rtt,
+ .qp_post_wqe0 = i40iw_sc_post_wqe0,
+ .iw_mr_fast_register = i40iw_sc_mr_fast_register
};
static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
/* wqe size considering 32 bytes per wqe*/
#define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */
-#define I40IWQP_SW_MAX_WQSIZE 16384 /* 524288 bytes */
+#define I40IWQP_SW_MAX_WQSIZE 2048 /* 2048 bytes */
#define I40IWQP_OP_RDMA_WRITE 0
#define I40IWQP_OP_RDMA_READ 1
I40IW_SD_BUF_ALIGNMENT = 0x100
};
+#define I40IW_WQE_SIZE_64 64
+
#define I40IW_QP_WQE_MIN_SIZE 32
#define I40IW_QP_WQE_MAX_SIZE 128
set_bit(2, iwdev->allocated_pds);
spin_lock_init(&iwdev->resource_lock);
- mrdrvbits = 24 - get_count_order(iwdev->max_mr);
+ spin_lock_init(&iwdev->qptable_lock);
+ /* stag index mask has a minimum of 14 bits */
+ mrdrvbits = 24 - max(get_count_order(iwdev->max_mr), 14);
iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
return 0;
}
"%s ae_id = 0x%x bool qp=%d qp_id = %d\n",
__func__, info->ae_id, info->qp, info->qp_cq_id);
if (info->qp) {
+ spin_lock_irqsave(&iwdev->qptable_lock, flags);
iwqp = iwdev->qp_table[info->qp_cq_id];
if (!iwqp) {
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id);
continue;
}
+ i40iw_add_ref(&iwqp->ibqp);
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
qp = &iwqp->sc_qp;
spin_lock_irqsave(&iwqp->lock, flags);
iwqp->hw_tcp_state = info->tcp_state;
i40iw_terminate_connection(qp, info);
break;
}
+ if (info->qp)
+ i40iw_rem_ref(&iwqp->ibqp);
} while (1);
if (aeqcnt)
*/
void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
unsigned char *mac_addr,
- __be32 *ip_addr,
+ u32 *ip_addr,
bool ipv4,
u32 action)
{
cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY;
info = &cqp_info->in.u.add_arp_cache_entry.info;
memset(info, 0, sizeof(*info));
- info->arp_index = cpu_to_le32(arp_index);
+ info->arp_index = cpu_to_le16((u16)arp_index);
info->permanent = true;
ether_addr_copy(info->mac_addr, mac_addr);
cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
else
i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
- synchronize_irq(msix_vec->irq);
free_irq(msix_vec->irq, dev_id);
}
if (!status) {
status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
(u8)iwdev->mac_ip_table_idx);
- if (!status)
- status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
- (u8)iwdev->mac_ip_table_idx);
- else
+ if (status)
i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
}
return status;
struct net_device *ip_dev;
struct inet6_dev *idev;
struct inet6_ifaddr *ifp;
- __be32 local_ipaddr6[4];
+ u32 local_ipaddr6[4];
rcu_read_lock();
for_each_netdev_rcu(&init_net, ip_dev) {
I40IW_HMC_PROFILE_DEFAULT;
iwdev->max_rdma_vfs =
(iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ? max_rdma_vfs : 0;
+ iwdev->max_enabled_vfs = iwdev->max_rdma_vfs;
iwdev->netdev = ldev->netdev;
hdl->client = client;
iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS;
goto exit;
iwdev->obj_next = iwdev->obj_mem;
iwdev->push_mode = push_mode;
+
init_waitqueue_head(&iwdev->vchnl_waitq);
+ init_waitqueue_head(&dev->vf_reqs);
+
status = i40iw_initialize_dev(iwdev, ldev);
exit:
if (status) {
for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
continue;
-
/* free all resources allocated on behalf of vf */
tmp_vfdev = dev->vf_dev[i];
spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
dev = &hdl->device.sc_dev;
iwdev = dev->back_dev;
- i40iw_debug(dev, I40IW_DEBUG_VIRT, "msg %p, message length %u\n", msg, len);
-
if (dev->vchnl_if.vchnl_recv) {
ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len);
if (!dev->is_pf) {
return ret_code;
}
+/**
+ * i40iw_vf_clear_to_send - wait to send virtual channel message
+ * @dev: iwarp device *
+ * Wait for until virtual channel is clear
+ * before sending the next message
+ *
+ * Returns false if error
+ * Returns true if clear to send
+ */
+bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev)
+{
+ struct i40iw_device *iwdev;
+ wait_queue_t wait;
+
+ iwdev = dev->back_dev;
+
+ if (!wq_has_sleeper(&dev->vf_reqs) &&
+ (atomic_read(&iwdev->vchnl_msgs) == 0))
+ return true; /* virtual channel is clear */
+
+ init_wait(&wait);
+ add_wait_queue_exclusive(&dev->vf_reqs, &wait);
+
+ if (!wait_event_timeout(dev->vf_reqs,
+ (atomic_read(&iwdev->vchnl_msgs) == 0),
+ I40IW_VCHNL_EVENT_TIMEOUT))
+ dev->vchnl_up = false;
+
+ remove_wait_queue(&dev->vf_reqs, &wait);
+
+ return dev->vchnl_up;
+}
+
/**
* i40iw_virtchnl_send - send a message through the virtual channel
* @dev: iwarp device
{
struct i40iw_device *iwdev;
struct i40e_info *ldev;
- enum i40iw_status_code ret_code = I40IW_ERR_BAD_PTR;
if (!dev || !dev->back_dev)
- return ret_code;
+ return I40IW_ERR_BAD_PTR;
iwdev = dev->back_dev;
ldev = iwdev->ldev;
if (ldev && ldev->ops && ldev->ops->virtchnl_send)
- ret_code = ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
-
- return ret_code;
+ return ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
+ return I40IW_ERR_BAD_PTR;
}
/* client interface functions */
u8 __iomem *i40iw_get_hw_addr(void *dev);
void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev);
+bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev);
enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr,
u32 length, u32 value);
struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf);
sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa;
if (sd_entry->valid)
return 0;
- if (dev->is_pf)
+ if (dev->is_pf) {
ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id,
sd_reg_val, idx->sd_idx,
sd_entry->entry_type, true);
- if (ret_code) {
- i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
- goto error;
+ if (ret_code) {
+ i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
+ goto error;
+ }
}
sd_entry->valid = true;
ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
while (datalen) {
- fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(u16 *)datap));
+ fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(__be16 *)datap));
if (fpdu_len > pfpdu->max_fpdu_data) {
i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
"%s: error bad fpdu_len\n", __func__);
I40IW_ERR_INVALID_MAC_ADDR = -65,
I40IW_ERR_BAD_STAG = -66,
I40IW_ERR_CQ_COMPL_ERROR = -67,
+ I40IW_ERR_QUEUE_DESTROYED = -68
};
#endif
struct i40iw_virt_mem ieq_mem;
struct i40iw_puda_rsrc *ieq;
- struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
+ const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
struct i40iw_hmc_fpm_misc hmc_fpm_misc;
u16 qs_handle;
- u32 debug_mask;
+ u32 debug_mask;
u16 exception_lan_queue;
u8 hmc_fn_id;
bool is_pf;
bool vchnl_up;
u8 vf_id;
+ wait_queue_head_t vf_reqs;
u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY];
struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf;
u8 hw_rev;
u32 qp_num;
u32 dest_ip[4];
u32 src_ip[4];
- u32 dest_port;
- u32 src_port;
+ u16 dest_port;
+ u16 src_port;
};
struct i40iw_local_mac_ipaddr_entry_info {
void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32);
void (*qp_send_rtt)(struct i40iw_sc_qp *, bool);
enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8);
+ enum i40iw_status_code (*iw_mr_fast_register)(struct i40iw_sc_qp *,
+ struct i40iw_fast_reg_stag_info *,
+ bool);
};
struct i40iw_priv_cq_ops {
enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *,
struct i40iw_hmc_fpm_misc *);
enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8);
- enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *);
+ enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *, u32 *sd);
enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev,
struct i40iw_hmc_create_obj_info *);
enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev,
wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
wqe = qp->sq_base[wqe_idx].elem;
+
+ qp->sq_wrtrk_array[wqe_idx].wqe_size = I40IW_QP_WQE_MIN_SIZE;
+
peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size;
wqe_0 = qp->sq_base[peek_head].elem;
if (peek_head)
*/
u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
u32 *wqe_idx,
- u8 wqe_size)
+ u8 wqe_size,
+ u32 total_size,
+ u64 wr_id
+ )
{
u64 *wqe = NULL;
u64 wqe_ptr;
if (!*wqe_idx)
qp->swqe_polarity = !qp->swqe_polarity;
}
+
+ if (((*wqe_idx & 3) == 1) && (wqe_size == I40IW_WQE_SIZE_64)) {
+ i40iw_nop_1(qp);
+ I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+ if (ret_code)
+ return NULL;
+ *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ }
+
for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) {
I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
if (ret_code)
peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
wqe_0 = qp->sq_base[peek_head].elem;
- if (peek_head & 0x3)
- wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ if (((peek_head & 3) == 1) || ((peek_head & 3) == 3)) {
+ if (RS_64(wqe_0[3], I40IWQPSQ_VALID) != !qp->swqe_polarity)
+ wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+ }
+
+ qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id;
+ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
+ qp->sq_wrtrk_array[*wqe_idx].wqe_size = wqe_size;
return wqe;
}
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
set_64bit_val(wqe, 16,
LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
if (!op_info->rem_addr.stag)
ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size);
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->lo_addr.len, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->lo_addr.len;
local_fence |= info->local_fence;
set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
read_fence |= info->read_fence;
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
set_64bit_val(wqe, 16, 0);
header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
LS_64(info->op_type, I40IWQPSQ_OPCODE) |
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
read_fence |= info->read_fence;
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
set_64bit_val(wqe, 16,
LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
read_fence |= info->read_fence;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
LS_64(info->op_type, I40IWQPSQ_OPCODE) |
LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
op_info = &info->op.inv_local_stag;
local_fence = info->local_fence;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
set_64bit_val(wqe, 0, 0);
set_64bit_val(wqe, 8,
LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG));
op_info = &info->op.bind_window;
local_fence |= info->local_fence;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
set_64bit_val(wqe, 8,
LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) |
enum i40iw_status_code ret_code2 = 0;
bool move_cq_head = true;
u8 polarity;
- u8 addl_frag_cnt, addl_wqes = 0;
+ u8 addl_wqes = 0;
if (cq->avoid_mem_cflct)
cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq);
info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ);
qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
+ if (!qp) {
+ ret_code = I40IW_ERR_QUEUE_DESTROYED;
+ goto exit;
+ }
wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
info->qp_handle = (i40iw_qp_handle)(unsigned long)qp;
info->op_type = (u8)RS_64(qword3, I40IWCQ_OP);
sw_wqe = qp->sq_base[wqe_idx].elem;
get_64bit_val(sw_wqe, 24, &wqe_qword);
- addl_frag_cnt =
- (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
- i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
- addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+ addl_wqes = qp->sq_wrtrk_array[wqe_idx].wqe_size / I40IW_QP_WQE_MIN_SIZE;
I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes));
} else {
do {
get_64bit_val(sw_wqe, 24, &wqe_qword);
op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE);
info->op_type = op_type;
- addl_frag_cnt = (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
- i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
- addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+ addl_wqes = qp->sq_wrtrk_array[tail].wqe_size / I40IW_QP_WQE_MIN_SIZE;
I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes));
if (op_type != I40IWQP_OP_NOP) {
info->wr_id = qp->sq_wrtrk_array[tail].wrid;
ret_code = 0;
+exit:
if (!ret_code &&
(info->comp_status == I40IW_COMPL_STATUS_FLUSHED))
if (pring && (I40IW_RING_MORE_WORK(*pring)))
* i40iw_get_wqe_shift - get shift count for maximum wqe size
* @wqdepth: depth of wq required.
* @sge: Maximum Scatter Gather Elements wqe
+ * @inline_data: Maximum inline data size
* @shift: Returns the shift needed based on sge
*
- * Shift can be used to left shift the wqe size based on sge.
- * If sge, == 1, shift =0 (wqe_size of 32 bytes), for sge=2 and 3, shift =1
- * (64 bytes wqes) and 2 otherwise (128 bytes wqe).
+ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
+ * For 1 SGE or inline data <= 16, shift = 0 (wqe size of 32 bytes).
+ * For 2 or 3 SGEs or inline data <= 48, shift = 1 (wqe size of 64 bytes).
+ * Shift of 2 otherwise (wqe size of 128 bytes).
*/
-enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift)
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift)
{
u32 size;
*shift = 0;
- if (sge > 1)
- *shift = (sge < 4) ? 1 : 2;
+ if (sge > 1 || inline_data > 16)
+ *shift = (sge < 4 && inline_data <= 48) ? 1 : 2;
/* check if wqdepth is multiple of 2 or not */
if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
return I40IW_ERR_INVALID_FRAG_COUNT;
- ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, &sqshift);
+ ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, info->max_inline_data, &sqshift);
if (ret_code)
return ret_code;
- ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, &rqshift);
+ ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift);
if (ret_code)
return ret_code;
u64 header, *wqe;
u32 wqe_idx;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
set_64bit_val(wqe, 0, 0);
set_64bit_val(wqe, 8, 0);
set_64bit_val(wqe, 16, 0);
* @frag_cnt: number of fragments
* @wqe_size: size of sq wqe returned
*/
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size)
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size)
{
switch (frag_cnt) {
case 0:
* @frag_cnt: number of fragments
* @wqe_size: size of rq wqe returned
*/
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size)
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size)
{
switch (frag_cnt) {
case 0:
I40IW_MAX_CQ_SIZE = 1048575,
I40IW_MAX_AEQ_ALLOCATE_COUNT = 255,
I40IW_DB_ID_ZERO = 0,
- I40IW_MAX_WQ_FRAGMENT_COUNT = 6,
+ I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
I40IW_MAX_SGE_RD = 1,
I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647,
I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647,
I40IW_MAX_VF_FPM_ID = 47,
I40IW_MAX_VF_PER_PF = 127,
I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
- I40IW_MAX_INLINE_DATA_SIZE = 112,
- I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 112,
+ I40IW_MAX_INLINE_DATA_SIZE = 48,
+ I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
I40IW_MAX_IRD_SIZE = 32,
I40IW_QPCTX_ENCD_MAXIRD = 3,
I40IW_MAX_WQ_ENTRIES = 2048,
#define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8)
+#define I40IW_MAX_MR_SIZE 0x10000000000L
+
struct i40iw_qp_uk;
struct i40iw_cq_uk;
struct i40iw_srq_uk;
struct i40iw_post_send {
i40iw_sgl sg_list;
- u8 num_sges;
+ u32 num_sges;
};
struct i40iw_post_inline_send {
struct i40iw_rdma_write {
i40iw_sgl lo_sg_list;
- u8 num_lo_sges;
+ u32 num_lo_sges;
struct i40iw_sge rem_addr;
};
struct i40iw_sq_uk_wr_trk_info {
u64 wrid;
- u64 wr_len;
+ u32 wr_len;
+ u8 wqe_size;
+ u8 reserved[3];
};
struct i40iw_qp_quanta {
u32 qp_id;
u32 sq_size;
u32 rq_size;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
struct i40iw_qp_uk_ops ops;
bool use_srq;
u8 swqe_polarity;
u8 rwqe_polarity;
u8 rq_wqe_size;
u8 rq_wqe_size_multiplier;
- u8 max_sq_frag_cnt;
- u8 max_rq_frag_cnt;
bool deferred_flag;
};
u32 qp_id;
u32 sq_size;
u32 rq_size;
- u8 max_sq_frag_cnt;
- u8 max_rq_frag_cnt;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
+ u32 max_inline_data;
};
void i40iw_qp_post_wr(struct i40iw_qp_uk *qp);
u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx,
- u8 wqe_size);
+ u8 wqe_size,
+ u32 total_size,
+ u64 wr_id
+ );
u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx);
u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx);
void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq);
enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id,
bool signaled, bool post_sq);
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size);
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size);
enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
u8 *wqe_size);
-enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift);
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift);
#endif
* @action: modify, delete or add
*/
int i40iw_arp_table(struct i40iw_device *iwdev,
- __be32 *ip_addr,
+ u32 *ip_addr,
bool ipv4,
u8 *mac_addr,
u32 action)
struct net_device *upper_dev;
struct i40iw_device *iwdev;
struct i40iw_handler *hdl;
- __be32 local_ipaddr;
+ u32 local_ipaddr;
hdl = i40iw_find_netdev(event_netdev);
if (!hdl)
switch (event) {
case NETDEV_DOWN:
if (upper_dev)
- local_ipaddr =
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ local_ipaddr = ntohl(
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
else
- local_ipaddr = ifa->ifa_address;
- local_ipaddr = ntohl(local_ipaddr);
+ local_ipaddr = ntohl(ifa->ifa_address);
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
&local_ipaddr,
return NOTIFY_OK;
case NETDEV_UP:
if (upper_dev)
- local_ipaddr =
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ local_ipaddr = ntohl(
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
else
- local_ipaddr = ifa->ifa_address;
- local_ipaddr = ntohl(local_ipaddr);
+ local_ipaddr = ntohl(ifa->ifa_address);
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
&local_ipaddr,
case NETDEV_CHANGEADDR:
/* Add the address to the IP table */
if (upper_dev)
- local_ipaddr =
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ local_ipaddr = ntohl(
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
else
- local_ipaddr = ifa->ifa_address;
+ local_ipaddr = ntohl(ifa->ifa_address);
- local_ipaddr = ntohl(local_ipaddr);
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
&local_ipaddr,
struct net_device *netdev;
struct i40iw_device *iwdev;
struct i40iw_handler *hdl;
- __be32 local_ipaddr6[4];
+ u32 local_ipaddr6[4];
hdl = i40iw_find_netdev(event_netdev);
if (!hdl)
struct cqp_commands_info *cqp_info;
struct i40iw_device *iwdev;
u32 qp_num;
+ unsigned long flags;
iwqp = to_iwqp(ibqp);
- if (!atomic_dec_and_test(&iwqp->refcount))
+ iwdev = iwqp->iwdev;
+ spin_lock_irqsave(&iwdev->qptable_lock, flags);
+ if (!atomic_dec_and_test(&iwqp->refcount)) {
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
return;
+ }
- iwdev = iwqp->iwdev;
qp_num = iwqp->ibqp.qp_num;
iwdev->qp_table[qp_num] = NULL;
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
if (!cqp_request)
return;
enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
{
struct i40iw_device *iwdev = dev->back_dev;
- enum i40iw_status_code err_code = 0;
int timeout_ret;
i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n",
__func__, __LINE__, dev, iwdev);
- atomic_add(2, &iwdev->vchnl_msgs);
+
+ atomic_set(&iwdev->vchnl_msgs, 2);
timeout_ret = wait_event_timeout(iwdev->vchnl_waitq,
(atomic_read(&iwdev->vchnl_msgs) == 1),
I40IW_VCHNL_EVENT_TIMEOUT);
atomic_dec(&iwdev->vchnl_msgs);
if (!timeout_ret) {
i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret);
- err_code = I40IW_ERR_TIMEOUT;
+ atomic_set(&iwdev->vchnl_msgs, 0);
+ dev->vchnl_up = false;
+ return I40IW_ERR_TIMEOUT;
}
- return err_code;
+ wake_up(&dev->vf_reqs);
+ return 0;
}
/**
ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
props->fw_ver = I40IW_FW_VERSION;
props->device_cap_flags = iwdev->device_cap_flags;
- props->vendor_id = iwdev->vendor_id;
- props->vendor_part_id = iwdev->vendor_part_id;
+ props->vendor_id = iwdev->ldev->pcidev->vendor;
+ props->vendor_part_id = iwdev->ldev->pcidev->device;
props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
props->max_qp = iwdev->max_qp;
props->max_cqe = iwdev->max_cqe;
props->max_mr = iwdev->max_mr;
props->max_pd = iwdev->max_pd;
- props->max_sge_rd = 1;
+ props->max_sge_rd = I40IW_MAX_SGE_RD;
props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
props->max_qp_init_rd_atom = props->max_qp_rd_atom;
props->atomic_cap = IB_ATOMIC_NONE;
props->pkey_tbl_len = 1;
props->active_width = IB_WIDTH_4X;
props->active_speed = 1;
- props->max_msg_sz = 0x80000000;
+ props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
return 0;
}
kfree(iwqp->kqp.wrid_mem);
iwqp->kqp.wrid_mem = NULL;
kfree(iwqp->allocated_buffer);
- iwqp->allocated_buffer = NULL;
}
/**
enum i40iw_status_code status;
struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
- ukinfo->max_sq_frag_cnt = I40IW_MAX_WQ_FRAGMENT_COUNT;
-
sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1);
rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1);
- status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, &sqshift);
+ status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift);
if (!status)
- status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, &rqshift);
+ status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift);
if (status)
return -ENOSYS;
if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+ if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
+ init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+
memset(&init_info, 0, sizeof(init_info));
sq_size = init_attr->cap.max_send_wr;
init_info.qp_uk_init_info.rq_size = rq_size;
init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
+ init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
if (!mem)
iwarp_info = &iwqp->iwarp_info;
iwarp_info->rd_enable = true;
iwarp_info->wr_rdresp_en = true;
- if (!iwqp->user_mode)
+ if (!iwqp->user_mode) {
+ iwarp_info->fast_reg_en = true;
iwarp_info->priv_mode_en = true;
+ }
iwarp_info->ddp_ver = 1;
iwarp_info->rdmap_ver = 1;
return ERR_PTR(err_code);
}
}
+ init_completion(&iwqp->sq_drained);
+ init_completion(&iwqp->rq_drained);
return &iwqp->ibqp;
error:
return err;
}
+/**
+ * i40iw_hw_alloc_stag - cqp command to allocate stag
+ * @iwdev: iwarp device
+ * @iwmr: iwarp mr pointer
+ */
+static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr)
+{
+ struct i40iw_allocate_stag_info *info;
+ struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ enum i40iw_status_code status;
+ int err = 0;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.alloc_stag.info;
+ memset(info, 0, sizeof(*info));
+ info->page_size = PAGE_SIZE;
+ info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->total_len = iwmr->length;
+ cqp_info->cqp_cmd = OP_ALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev;
+ cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
+
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status) {
+ err = -ENOMEM;
+ i40iw_pr_err("CQP-OP MR Reg fail");
+ }
+ return err;
+}
+
+/**
+ * i40iw_alloc_mr - register stag for fast memory registration
+ * @pd: ibpd pointer
+ * @mr_type: memory for stag registrion
+ * @max_num_sg: man number of pages
+ */
+static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct i40iw_pd *iwpd = to_iwpd(pd);
+ struct i40iw_device *iwdev = to_iwdev(pd->device);
+ struct i40iw_pble_alloc *palloc;
+ struct i40iw_pbl *iwpbl;
+ struct i40iw_mr *iwmr;
+ enum i40iw_status_code status;
+ u32 stag;
+ int err_code = -ENOMEM;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ stag = i40iw_create_stag(iwdev);
+ if (!stag) {
+ err_code = -EOVERFLOW;
+ goto err;
+ }
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->type = IW_MEMREG_TYPE_MEM;
+ palloc = &iwpbl->pble_alloc;
+ iwmr->page_cnt = max_num_sg;
+ mutex_lock(&iwdev->pbl_mutex);
+ status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
+ mutex_unlock(&iwdev->pbl_mutex);
+ if (!status)
+ goto err1;
+
+ if (palloc->level != I40IW_LEVEL_1)
+ goto err2;
+ err_code = i40iw_hw_alloc_stag(iwdev, iwmr);
+ if (err_code)
+ goto err2;
+ iwpbl->pbl_allocated = true;
+ i40iw_add_pdusecount(iwpd);
+ return &iwmr->ibmr;
+err2:
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+err1:
+ i40iw_free_stag(iwdev, stag);
+err:
+ kfree(iwmr);
+ return ERR_PTR(err_code);
+}
+
+/**
+ * i40iw_set_page - populate pbl list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @addr: page dma address fro pbl list
+ */
+static int i40iw_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct i40iw_mr *iwmr = to_iwmr(ibmr);
+ struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+ struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+ u64 *pbl;
+
+ if (unlikely(iwmr->npages == iwmr->page_cnt))
+ return -ENOMEM;
+
+ pbl = (u64 *)palloc->level1.addr;
+ pbl[iwmr->npages++] = cpu_to_le64(addr);
+ return 0;
+}
+
+/**
+ * i40iw_map_mr_sg - map of sg list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @sg: scatter gather list for fmr
+ * @sg_nents: number of sg pages
+ */
+static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
+{
+ struct i40iw_mr *iwmr = to_iwmr(ibmr);
+
+ iwmr->npages = 0;
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, i40iw_set_page);
+}
+
+/**
+ * i40iw_drain_sq - drain the send queue
+ * @ibqp: ib qp pointer
+ */
+static void i40iw_drain_sq(struct ib_qp *ibqp)
+{
+ struct i40iw_qp *iwqp = to_iwqp(ibqp);
+ struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+ if (I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
+ wait_for_completion(&iwqp->sq_drained);
+}
+
+/**
+ * i40iw_drain_rq - drain the receive queue
+ * @ibqp: ib qp pointer
+ */
+static void i40iw_drain_rq(struct ib_qp *ibqp)
+{
+ struct i40iw_qp *iwqp = to_iwqp(ibqp);
+ struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+ if (I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
+ wait_for_completion(&iwqp->rq_drained);
+}
+
/**
* i40iw_hwreg_mr - send cqp command for memory registration
* @iwdev: iwarp device
struct i40iw_mr *iwmr;
struct ib_umem *region;
struct i40iw_mem_reg_req req;
- u32 pbl_depth = 0;
+ u64 pbl_depth = 0;
u32 stag = 0;
u16 access;
- u32 region_length;
+ u64 region_length;
bool use_pbles = false;
unsigned long flags;
int err = -ENOSYS;
+ if (length > I40IW_MAX_MR_SIZE)
+ return ERR_PTR(-EINVAL);
region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(region))
return (struct ib_mr *)region;
palloc = &iwpbl->pble_alloc;
iwmr->type = req.reg_type;
- iwmr->page_cnt = pbl_depth;
+ iwmr->page_cnt = (u32)pbl_depth;
switch (req.reg_type) {
case IW_MEMREG_TYPE_QP:
enum i40iw_status_code ret;
int err = 0;
unsigned long flags;
+ bool inv_stag;
iwqp = (struct i40iw_qp *)ibqp;
ukqp = &iwqp->sc_qp.qp_uk;
spin_lock_irqsave(&iwqp->lock, flags);
while (ib_wr) {
+ inv_stag = false;
memset(&info, 0, sizeof(info));
info.wr_id = (u64)(ib_wr->wr_id);
if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
switch (ib_wr->opcode) {
case IB_WR_SEND:
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- info.op_type = I40IW_OP_TYPE_SEND_SOL;
- else
- info.op_type = I40IW_OP_TYPE_SEND;
+ /* fall-through */
+ case IB_WR_SEND_WITH_INV:
+ if (ib_wr->opcode == IB_WR_SEND) {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = I40IW_OP_TYPE_SEND_SOL;
+ else
+ info.op_type = I40IW_OP_TYPE_SEND;
+ } else {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = I40IW_OP_TYPE_SEND_SOL_INV;
+ else
+ info.op_type = I40IW_OP_TYPE_SEND_INV;
+ }
if (ib_wr->send_flags & IB_SEND_INLINE) {
info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
info.op.inline_send.len = ib_wr->sg_list[0].length;
- ret = ukqp->ops.iw_inline_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+ ret = ukqp->ops.iw_inline_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
} else {
info.op.send.num_sges = ib_wr->num_sge;
info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list;
- ret = ukqp->ops.iw_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+ ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
}
if (ret)
if (ret)
err = -EIO;
break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ inv_stag = true;
+ /* fall-through*/
case IB_WR_RDMA_READ:
+ if (ib_wr->num_sge > I40IW_MAX_SGE_RD) {
+ err = -EINVAL;
+ break;
+ }
info.op_type = I40IW_OP_TYPE_RDMA_READ;
info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
- ret = ukqp->ops.iw_rdma_read(ukqp, &info, false, false);
+ ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
if (ret)
err = -EIO;
break;
+ case IB_WR_LOCAL_INV:
+ info.op_type = I40IW_OP_TYPE_INV_STAG;
+ info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
+ ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
+ if (ret)
+ err = -EIO;
+ break;
+ case IB_WR_REG_MR:
+ {
+ struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
+ int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
+ int flags = reg_wr(ib_wr)->access;
+ struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
+ struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
+ struct i40iw_fast_reg_stag_info info;
+
+ info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
+ info.access_rights |= i40iw_get_user_access(flags);
+ info.stag_key = reg_wr(ib_wr)->key & 0xff;
+ info.stag_idx = reg_wr(ib_wr)->key >> 8;
+ info.wr_id = ib_wr->wr_id;
+
+ info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
+ info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
+ info.total_len = iwmr->ibmr.length;
+ info.first_pm_pbl_index = palloc->level1.idx;
+ info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
+ info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
+
+ if (page_shift == 21)
+ info.page_size = 1; /* 2M page */
+
+ ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
+ if (ret)
+ err = -EIO;
+ break;
+ }
default:
err = -EINVAL;
i40iw_pr_err(" upost_send bad opcode = 0x%x\n",
enum i40iw_status_code ret;
struct i40iw_cq_uk *ukcq;
struct i40iw_sc_qp *qp;
+ struct i40iw_qp *iwqp;
unsigned long flags;
iwcq = (struct i40iw_cq *)ibcq;
ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true);
if (ret == I40IW_ERR_QUEUE_EMPTY) {
break;
+ } else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
+ continue;
} else if (ret) {
if (!cqe_count)
cqe_count = -1;
}
entry->wc_flags = 0;
entry->wr_id = cq_poll_info.wr_id;
- if (!cq_poll_info.error)
- entry->status = IB_WC_SUCCESS;
- else
+ if (cq_poll_info.error) {
entry->status = IB_WC_WR_FLUSH_ERR;
+ entry->vendor_err = cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
+ } else {
+ entry->status = IB_WC_SUCCESS;
+ }
switch (cq_poll_info.op_type) {
case I40IW_OP_TYPE_RDMA_WRITE:
break;
}
- entry->vendor_err =
- cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
entry->ex.imm_data = 0;
qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle;
entry->qp = (struct ib_qp *)qp->back_qp;
entry->src_qp = cq_poll_info.qp_id;
+ iwqp = (struct i40iw_qp *)qp->back_qp;
+ if (iwqp->iwarp_state > I40IW_QP_STATE_RTS) {
+ if (!I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
+ complete(&iwqp->sq_drained);
+ if (!I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
+ complete(&iwqp->rq_drained);
+ }
entry->byte_len = cq_poll_info.bytes_xfered;
entry++;
cqe_count++;
struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
struct timespec curr_time;
static struct timespec last_rd_time = {0, 0};
- enum i40iw_status_code status = 0;
unsigned long flags;
curr_time = current_kernel_time();
spin_unlock_irqrestore(&devstat->stats_lock, flags);
} else {
if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1)
- status = i40iw_vchnl_vf_get_pe_stats(dev,
- &devstat->hw_stats);
-
- if (status)
- return -ENOSYS;
+ if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
+ return -ENOSYS;
}
stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] +
iwibdev->ibdev.query_device = i40iw_query_device;
iwibdev->ibdev.create_ah = i40iw_create_ah;
iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
+ iwibdev->ibdev.drain_sq = i40iw_drain_sq;
+ iwibdev->ibdev.drain_rq = i40iw_drain_rq;
+ iwibdev->ibdev.alloc_mr = i40iw_alloc_mr;
+ iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg;
iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
if (!iwibdev->ibdev.iwcm) {
ib_dealloc_device(&iwibdev->ibdev);
struct ib_umem *region;
u16 type;
u32 page_cnt;
+ u32 npages;
u32 stag;
u64 length;
u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS];
struct i40iw_pbl *iwpbl;
struct i40iw_dma_mem q2_ctx_mem;
struct i40iw_dma_mem ietf_mem;
+ struct completion sq_drained;
+ struct completion rq_drained;
};
#endif
return 0;
}
-struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
+const struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
i40iw_manage_vf_pble_bp
};
u64 scratch,
bool post_sq);
-extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
+extern const struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
#endif
static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
u32 vf_id,
struct i40iw_virtchnl_op_buf *vchnl_msg,
- struct i40iw_dev_hw_stats hw_stats)
+ struct i40iw_dev_hw_stats *hw_stats)
{
enum i40iw_status_code ret_code;
u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1];
vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
- *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats;
+ *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = *hw_stats;
ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
if (ret_code)
i40iw_debug(dev, I40IW_DEBUG_VIRT,
vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
return I40IW_SUCCESS;
}
- for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT;
- iw_vf_idx++) {
+ for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
if (!dev->vf_dev[iw_vf_idx]) {
- if (first_avail_iw_vf ==
- I40IW_MAX_PE_ENABLED_VF_COUNT)
+ if (first_avail_iw_vf == I40IW_MAX_PE_ENABLED_VF_COUNT)
first_avail_iw_vf = iw_vf_idx;
continue;
}
devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats);
spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
vf_dev->msg_count--;
- vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats);
+ vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats);
break;
default:
i40iw_debug(dev, I40IW_DEBUG_VIRT,
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.parm = vchnl_ver;
vchnl_req.parm_len = sizeof(*vchnl_ver);
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
/**
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.parm = hmc_fcn;
vchnl_req.parm_len = sizeof(*hmc_fcn);
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
/**
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_add_hmc_objs_req(dev,
&vchnl_req,
rsrc_type,
start_index,
rsrc_count);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
/**
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_del_hmc_objs_req(dev,
&vchnl_req,
rsrc_type,
start_index,
rsrc_count);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
/**
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.parm = hw_stats;
vchnl_req.parm_len = sizeof(*hw_stats);
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
u8 scope_join_state;
u8 proxy_join;
u8 reserved[2];
-};
+} __packed __aligned(4);
struct mcast_group {
struct ib_sa_mcmember_data rec;
__be64 tid,
union ib_gid *new_mgid)
{
- struct mcast_group *group = NULL, *cur_group;
+ struct mcast_group *group = NULL, *cur_group, *n;
struct mcast_req *req;
- struct list_head *pos;
- struct list_head *n;
mutex_lock(&ctx->mcg_table_lock);
- list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) {
- group = list_entry(pos, struct mcast_group, mgid0_list);
+ list_for_each_entry_safe(group, n, &ctx->mcg_mgid0_list, mgid0_list) {
mutex_lock(&group->lock);
if (group->last_req_tid == tid) {
if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents);
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
return 0;
}
-int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
int rc;
sizeof(u64) * mr->max_pages,
DMA_TO_DEVICE);
- rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page);
+ rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
sizeof(u64) * mr->max_pages,
mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
cq->mcq.irqn = irqn;
- cq->mcq.comp = mlx5_ib_cq_comp;
+ if (context)
+ cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
+ else
+ cq->mcq.comp = mlx5_ib_cq_comp;
cq->mcq.event = mlx5_ib_cq_event;
INIT_LIST_HEAD(&cq->wc_list);
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/io-mapping.h>
+#if defined(CONFIG_X86)
+#include <asm/pat.h>
+#endif
#include <linux/sched.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, scatter_fcs))
+ props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
+
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
return get_arg(offset);
}
+static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
+{
+ switch (cmd) {
+ case MLX5_IB_MMAP_WC_PAGE:
+ return "WC";
+ case MLX5_IB_MMAP_REGULAR_PAGE:
+ return "best effort WC";
+ case MLX5_IB_MMAP_NC_PAGE:
+ return "NC";
+ default:
+ return NULL;
+ }
+}
+
+static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
+ struct vm_area_struct *vma, struct mlx5_uuar_info *uuari)
+{
+ int err;
+ unsigned long idx;
+ phys_addr_t pfn, pa;
+ pgprot_t prot;
+
+ switch (cmd) {
+ case MLX5_IB_MMAP_WC_PAGE:
+/* Some architectures don't support WC memory */
+#if defined(CONFIG_X86)
+ if (!pat_enabled())
+ return -EPERM;
+#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
+ return -EPERM;
+#endif
+ /* fall through */
+ case MLX5_IB_MMAP_REGULAR_PAGE:
+ /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ break;
+ case MLX5_IB_MMAP_NC_PAGE:
+ prot = pgprot_noncached(vma->vm_page_prot);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ idx = get_index(vma->vm_pgoff);
+ if (idx >= uuari->num_uars)
+ return -EINVAL;
+
+ pfn = uar_index2pfn(dev, uuari->uars[idx].index);
+ mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
+
+ vma->vm_page_prot = prot;
+ err = io_remap_pfn_range(vma, vma->vm_start, pfn,
+ PAGE_SIZE, vma->vm_page_prot);
+ if (err) {
+ mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
+ err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
+ return -EAGAIN;
+ }
+
+ pa = pfn << PAGE_SHIFT;
+ mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
+ vma->vm_start, &pa);
+
+ return 0;
+}
+
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_uuar_info *uuari = &context->uuari;
unsigned long command;
- unsigned long idx;
phys_addr_t pfn;
command = get_command(vma->vm_pgoff);
switch (command) {
+ case MLX5_IB_MMAP_WC_PAGE:
+ case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
-
- idx = get_index(vma->vm_pgoff);
- if (idx >= uuari->num_uars)
- return -EINVAL;
-
- pfn = uar_index2pfn(dev, uuari->uars[idx].index);
- mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
- (unsigned long long)pfn);
-
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
- vma->vm_start,
- (unsigned long long)pfn << PAGE_SHIFT);
- break;
+ return uar_mmap(dev, command, vma, uuari);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
- if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ if (vma->vm_flags & VM_WRITE)
return -EPERM;
/* Don't expose to user-space information it shouldn't have */
enum mlx5_ib_mmap_cmd {
MLX5_IB_MMAP_REGULAR_PAGE = 0,
MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ MLX5_IB_MMAP_WC_PAGE = 2,
+ MLX5_IB_MMAP_NC_PAGE = 3,
/* 5 is chosen in order to be compatible with old versions of libmlx5 */
MLX5_IB_MMAP_CORE_CLOCK = 5,
};
MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5,
/* QP uses 1 as its source QP number */
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
+ MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
};
struct mlx5_umr_wr {
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents);
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
struct scatterlist *sgl,
- unsigned short sg_nents)
+ unsigned short sg_nents,
+ unsigned int *sg_offset_p)
{
struct scatterlist *sg = sgl;
struct mlx5_klm *klms = mr->descs;
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
u32 lkey = mr->ibmr.pd->local_dma_lkey;
int i;
- mr->ibmr.iova = sg_dma_address(sg);
+ mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
mr->ibmr.length = 0;
mr->ndescs = sg_nents;
for_each_sg(sgl, sg, sg_nents, i) {
if (unlikely(i > mr->max_descs))
break;
- klms[i].va = cpu_to_be64(sg_dma_address(sg));
- klms[i].bcount = cpu_to_be32(sg_dma_len(sg));
+ klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
+ klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
klms[i].key = cpu_to_be32(lkey);
mr->ibmr.length += sg_dma_len(sg);
+
+ sg_offset = 0;
}
+ if (sg_offset_p)
+ *sg_offset_p = sg_offset;
+
return i;
}
return 0;
}
-int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int n;
DMA_TO_DEVICE);
if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
- n = mlx5_ib_sg_to_klms(mr, sg, sg_nents);
+ n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
else
- n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
+ mlx5_set_page);
ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
mr->desc_size * mr->max_descs,
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, void *qpin)
{
+ struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
__be64 *qp_pas;
void *in;
MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+ if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS)
+ MLX5_SET(rqc, rqc, scatter_fcs, 1);
+
wq = MLX5_ADDR_OF(rqc, rqc, wq);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
MLX5_SET(wq, wq, end_padding_mode,
}
if (qp->rq.wqe_cnt) {
+ rq->base.container_mibqp = qp;
+
err = create_raw_packet_qp_rq(dev, rq, in);
if (err)
goto err_destroy_sq;
- rq->base.container_mibqp = qp;
err = create_raw_packet_qp_tir(dev, rq, tdn);
if (err)
return -EOPNOTSUPP;
}
+ if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs");
+ return -EOPNOTSUPP;
+ }
+ if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) ||
+ !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
+ mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+ qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS;
+ }
+
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/init.h>
+#include <linux/kernel.h>
#include <asm/io.h>
#include <asm/irq.h>
*/
void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length)
{
- char xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
- 'a', 'b', 'c', 'd', 'e', 'f'};
- char *ptr;
- char hex_buf[80];
- char ascii_buf[20];
- int num_char;
- int num_ascii;
- int num_hex;
-
if (!(nes_debug_level & dump_debug_level)) {
return;
}
- ptr = addr;
if (length > 0x100) {
nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100);
length = 0x100;
}
- nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length);
-
- memset(ascii_buf, 0, 20);
- memset(hex_buf, 0, 80);
-
- num_ascii = 0;
- num_hex = 0;
- for (num_char = 0; num_char < length; num_char++) {
- if (num_ascii == 8) {
- ascii_buf[num_ascii++] = ' ';
- hex_buf[num_hex++] = '-';
- hex_buf[num_hex++] = ' ';
- }
-
- if (*ptr < 0x20 || *ptr > 0x7e)
- ascii_buf[num_ascii++] = '.';
- else
- ascii_buf[num_ascii++] = *ptr;
- hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)];
- hex_buf[num_hex++] = xlate[*ptr & 0x0f];
- hex_buf[num_hex++] = ' ';
- ptr++;
-
- if (num_ascii >= 17) {
- /* output line and reset */
- nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf);
- memset(ascii_buf, 0, 20);
- memset(hex_buf, 0, 80);
- num_ascii = 0;
- num_hex = 0;
- }
- }
+ nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", addr, length, length);
- /* output the rest */
- if (num_ascii) {
- while (num_ascii < 17) {
- if (num_ascii == 8) {
- hex_buf[num_hex++] = ' ';
- hex_buf[num_hex++] = ' ';
- }
- hex_buf[num_hex++] = ' ';
- hex_buf[num_hex++] = ' ';
- hex_buf[num_hex++] = ' ';
- num_ascii++;
- }
-
- nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf);
- }
+ print_hex_dump(KERN_ERR, PFX, DUMP_PREFIX_NONE, 16, 1, addr, length, true);
}
return 0;
}
-static int nes_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
{
struct nes_mr *nesmr = to_nesmr(ibmr);
nesmr->npages = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, nes_set_page);
}
/**
/**
* nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory.
*/
-static inline void nes_free_qp_mem(struct nes_device *nesdev,
+static void nes_free_qp_mem(struct nes_device *nesdev,
struct nes_qp *nesqp, int virt_wqs)
{
unsigned long flags;
nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type);
return ERR_PTR(-EINVAL);
}
+ init_completion(&nesqp->sq_drained);
+ init_completion(&nesqp->rq_drained);
nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
init_timer(&nesqp->terminate_timer);
return err;
}
+/**
+ * nes_drain_sq - drain sq
+ * @ibqp: pointer to ibqp
+ */
+static void nes_drain_sq(struct ib_qp *ibqp)
+{
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+
+ if (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)
+ wait_for_completion(&nesqp->sq_drained);
+}
+
+/**
+ * nes_drain_rq - drain rq
+ * @ibqp: pointer to ibqp
+ */
+static void nes_drain_rq(struct ib_qp *ibqp)
+{
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+
+ if (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)
+ wait_for_completion(&nesqp->rq_drained);
+}
/**
* nes_poll_cq
}
}
+ if (nesqp->iwarp_state > NES_CQP_QP_IWARP_STATE_RTS) {
+ if (nesqp->hwqp.sq_tail == nesqp->hwqp.sq_head)
+ complete(&nesqp->sq_drained);
+ if (nesqp->hwqp.rq_tail == nesqp->hwqp.rq_head)
+ complete(&nesqp->rq_drained);
+ }
+
entry->wr_id = wrid;
entry++;
cqe_count++;
nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
nesibdev->ibdev.post_send = nes_post_send;
nesibdev->ibdev.post_recv = nes_post_recv;
+ nesibdev->ibdev.drain_sq = nes_drain_sq;
+ nesibdev->ibdev.drain_rq = nes_drain_rq;
nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL);
if (nesibdev->ibdev.iwcm == NULL) {
u8 pau_pending;
u8 pau_state;
__u64 nesuqp_addr;
+ struct completion sq_drained;
+ struct completion rq_drained;
};
struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
return 0;
}
-int ocrdma_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
{
struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
mr->npages = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page);
}
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-int ocrdma_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents);
+int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
#endif /* __OCRDMA_VERBS_H__ */
phys = dd->physaddr + piobufs;
#if defined(__powerpc__)
- /* There isn't a generic way to specify writethrough mappings */
- pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
- pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
- pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
#endif
/*
qib_dbg_ibdev_exit(&dd->verbs_dev);
#endif
free_percpu(dd->int_counter);
- ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
+ rvt_dealloc_device(&dd->verbs_dev.rdi);
}
u64 qib_int_counter(struct qib_devdata *dd)
bail:
if (!list_empty(&dd->list))
list_del_init(&dd->list);
- ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
+ rvt_dealloc_device(&dd->verbs_dev.rdi);
return ERR_PTR(ret);
}
addr = pci_resource_start(pdev, 0);
len = pci_resource_len(pdev, 0);
-#if defined(__powerpc__)
- /* There isn't a generic way to specify writethrough mappings */
- dd->kregbase = __ioremap(addr, len, _PAGE_NO_CACHE | _PAGE_WRITETHRU);
-#else
dd->kregbase = ioremap_nocache(addr, len);
-#endif
-
if (!dd->kregbase)
return -ENOMEM;
*
* Return 1 if constructed; otherwise, return 0.
*/
-int qib_make_rc_req(struct rvt_qp *qp)
+int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device);
struct qib_qp_priv *priv = qp->priv;
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
- int (*make_req)(struct rvt_qp *qp);
+ int (*make_req)(struct rvt_qp *qp, unsigned long *flags);
unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->s_hdrwords = 0;
spin_lock_irqsave(&qp->s_lock, flags);
}
- } while (make_req(qp));
+ } while (make_req(qp, &flags));
spin_unlock_irqrestore(&qp->s_lock, flags);
}
*
* Return 1 if constructed; otherwise, return 0.
*/
-int qib_make_uc_req(struct rvt_qp *qp)
+int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr;
*
* Return 1 if constructed; otherwise, return 0.
*/
-int qib_make_ud_req(struct rvt_qp *qp)
+int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr;
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
- unsigned long flags;
+ unsigned long tflags = *flags;
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
goto bail;
}
qp->s_cur = next_cur;
- local_irq_save(flags);
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irqrestore(&qp->s_lock, tflags);
qib_ud_loopback(qp, wqe);
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irqsave(&qp->s_lock, tflags);
+ *flags = tflags;
qib_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
void qib_send_rc_ack(struct rvt_qp *qp);
-int qib_make_rc_req(struct rvt_qp *qp);
+int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags);
-int qib_make_uc_req(struct rvt_qp *qp);
+int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags);
-int qib_make_ud_req(struct rvt_qp *qp);
+int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags);
int qib_register_ib_device(struct qib_devdata *);
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
- qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK;
+ qp->allowed_ops = IB_OPCODE_UD;
break;
case IB_QPT_RC:
- qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+ qp->allowed_ops = IB_OPCODE_RC;
break;
case IB_QPT_UC:
- qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+ qp->allowed_ops = IB_OPCODE_UC;
break;
default:
ret = ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL(rvt_alloc_device);
+/**
+ * rvt_dealloc_device - deallocate rdi
+ * @rdi: structure to free
+ *
+ * Free a structure allocated with rvt_alloc_device()
+ */
+void rvt_dealloc_device(struct rvt_dev_info *rdi)
+{
+ kfree(rdi->ports);
+ ib_dealloc_device(&rdi->ibdev);
+}
+EXPORT_SYMBOL(rvt_dealloc_device);
+
static int rvt_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
#include "ipoib.h"
+struct ipoib_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ int stat_offset;
+};
+
+#define IPOIB_NETDEV_STAT(m) { \
+ .stat_string = #m, \
+ .stat_offset = offsetof(struct rtnl_link_stats64, m) }
+
+static const struct ipoib_stats ipoib_gstrings_stats[] = {
+ IPOIB_NETDEV_STAT(rx_packets),
+ IPOIB_NETDEV_STAT(tx_packets),
+ IPOIB_NETDEV_STAT(rx_bytes),
+ IPOIB_NETDEV_STAT(tx_bytes),
+ IPOIB_NETDEV_STAT(tx_errors),
+ IPOIB_NETDEV_STAT(rx_dropped),
+ IPOIB_NETDEV_STAT(tx_dropped)
+};
+
+#define IPOIB_GLOBAL_STATS_LEN ARRAY_SIZE(ipoib_gstrings_stats)
+
static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
return 0;
}
+static void ipoib_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats __always_unused *stats,
+ u64 *data)
+{
+ int i;
+ struct net_device_stats *net_stats = &dev->stats;
+ u8 *p = (u8 *)net_stats;
+
+ for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++)
+ data[i] = *(u64 *)(p + ipoib_gstrings_stats[i].stat_offset);
+
+}
+static void ipoib_get_strings(struct net_device __always_unused *dev,
+ u32 stringset, u8 *data)
+{
+ u8 *p = data;
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) {
+ memcpy(p, ipoib_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ break;
+ case ETH_SS_TEST:
+ default:
+ break;
+ }
+}
+static int ipoib_get_sset_count(struct net_device __always_unused *dev,
+ int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return IPOIB_GLOBAL_STATS_LEN;
+ case ETH_SS_TEST:
+ default:
+ break;
+ }
+ return -EOPNOTSUPP;
+}
static const struct ethtool_ops ipoib_ethtool_ops = {
.get_drvinfo = ipoib_get_drvinfo,
.get_coalesce = ipoib_get_coalesce,
.set_coalesce = ipoib_set_coalesce,
+ .get_strings = ipoib_get_strings,
+ .get_ethtool_stats = ipoib_get_ethtool_stats,
+ .get_sset_count = ipoib_get_sset_count,
};
void ipoib_set_ethtool_ops(struct net_device *dev)
"Enable data path debug tracing if > 0");
#endif
-static DEFINE_MUTEX(pkey_mutex);
-
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
struct ib_pd *pd, struct ib_ah_attr *attr)
{
page_vec->npages = 0;
page_vec->fake_mr.page_size = SIZE_4K;
plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
- mem->size, iser_set_page);
+ mem->size, NULL, iser_set_page);
if (unlikely(plen < mem->size)) {
iser_err("page vec too short to hold this SG\n");
iser_data_buf_dump(mem, device->ib_device);
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
- n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
+ n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K);
if (unlikely(n != mem->size)) {
iser_err("failed to map sg (%d/%d)\n",
n, mem->size);
#define ISERT_MAX_CONN 8
#define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN)
-#define ISER_MAX_TX_CQ_LEN (ISERT_QP_MAX_REQ_DTOS * ISERT_MAX_CONN)
+#define ISER_MAX_TX_CQ_LEN \
+ ((ISERT_QP_MAX_REQ_DTOS + ISCSI_DEF_XMIT_CMDS_MAX) * ISERT_MAX_CONN)
#define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \
ISERT_MAX_CONN)
static struct workqueue_struct *isert_comp_wq;
static struct workqueue_struct *isert_release_wq;
-static void
-isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
-static int
-isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn);
-static void
-isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
-static int
-isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn);
static int
isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
static int
attr.recv_cq = comp->cq;
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
+ attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX;
attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
device->ib_device->attrs.max_sge_rd);
device->ib_device->num_comp_vectors));
isert_info("Using %d CQs, %s supports %d vectors support "
- "Fast registration %d pi_capable %d\n",
+ "pi_capable %d\n",
device->comps_used, device->ib_device->name,
- device->ib_device->num_comp_vectors, device->use_fastreg,
+ device->ib_device->num_comp_vectors,
device->pi_capable);
device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
- /* asign function handlers */
- if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
- ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
- device->use_fastreg = 1;
- device->reg_rdma_mem = isert_reg_rdma;
- device->unreg_rdma_mem = isert_unreg_rdma;
- } else {
- device->use_fastreg = 0;
- device->reg_rdma_mem = isert_map_rdma;
- device->unreg_rdma_mem = isert_unmap_cmd;
- }
-
ret = isert_alloc_comps(device);
if (ret)
goto out;
return device;
}
-static void
-isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
-{
- struct fast_reg_descriptor *fr_desc, *tmp;
- int i = 0;
-
- if (list_empty(&isert_conn->fr_pool))
- return;
-
- isert_info("Freeing conn %p fastreg pool", isert_conn);
-
- list_for_each_entry_safe(fr_desc, tmp,
- &isert_conn->fr_pool, list) {
- list_del(&fr_desc->list);
- ib_dereg_mr(fr_desc->data_mr);
- if (fr_desc->pi_ctx) {
- ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
- ib_dereg_mr(fr_desc->pi_ctx->sig_mr);
- kfree(fr_desc->pi_ctx);
- }
- kfree(fr_desc);
- ++i;
- }
-
- if (i < isert_conn->fr_pool_size)
- isert_warn("Pool still has %d regions registered\n",
- isert_conn->fr_pool_size - i);
-}
-
-static int
-isert_create_pi_ctx(struct fast_reg_descriptor *desc,
- struct ib_device *device,
- struct ib_pd *pd)
-{
- struct pi_context *pi_ctx;
- int ret;
-
- pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
- if (!pi_ctx) {
- isert_err("Failed to allocate pi context\n");
- return -ENOMEM;
- }
-
- pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(pi_ctx->prot_mr)) {
- isert_err("Failed to allocate prot frmr err=%ld\n",
- PTR_ERR(pi_ctx->prot_mr));
- ret = PTR_ERR(pi_ctx->prot_mr);
- goto err_pi_ctx;
- }
- desc->ind |= ISERT_PROT_KEY_VALID;
-
- pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2);
- if (IS_ERR(pi_ctx->sig_mr)) {
- isert_err("Failed to allocate signature enabled mr err=%ld\n",
- PTR_ERR(pi_ctx->sig_mr));
- ret = PTR_ERR(pi_ctx->sig_mr);
- goto err_prot_mr;
- }
-
- desc->pi_ctx = pi_ctx;
- desc->ind |= ISERT_SIG_KEY_VALID;
- desc->ind &= ~ISERT_PROTECTED;
-
- return 0;
-
-err_prot_mr:
- ib_dereg_mr(pi_ctx->prot_mr);
-err_pi_ctx:
- kfree(pi_ctx);
-
- return ret;
-}
-
-static int
-isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
- struct fast_reg_descriptor *fr_desc)
-{
- fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(fr_desc->data_mr)) {
- isert_err("Failed to allocate data frmr err=%ld\n",
- PTR_ERR(fr_desc->data_mr));
- return PTR_ERR(fr_desc->data_mr);
- }
- fr_desc->ind |= ISERT_DATA_KEY_VALID;
-
- isert_dbg("Created fr_desc %p\n", fr_desc);
-
- return 0;
-}
-
-static int
-isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
-{
- struct fast_reg_descriptor *fr_desc;
- struct isert_device *device = isert_conn->device;
- struct se_session *se_sess = isert_conn->conn->sess->se_sess;
- struct se_node_acl *se_nacl = se_sess->se_node_acl;
- int i, ret, tag_num;
- /*
- * Setup the number of FRMRs based upon the number of tags
- * available to session in iscsi_target_locate_portal().
- */
- tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth);
- tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS;
-
- isert_conn->fr_pool_size = 0;
- for (i = 0; i < tag_num; i++) {
- fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
- if (!fr_desc) {
- isert_err("Failed to allocate fast_reg descriptor\n");
- ret = -ENOMEM;
- goto err;
- }
-
- ret = isert_create_fr_desc(device->ib_device,
- device->pd, fr_desc);
- if (ret) {
- isert_err("Failed to create fastreg descriptor err=%d\n",
- ret);
- kfree(fr_desc);
- goto err;
- }
-
- list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
- isert_conn->fr_pool_size++;
- }
-
- isert_dbg("Creating conn %p fastreg pool size=%d",
- isert_conn, isert_conn->fr_pool_size);
-
- return 0;
-
-err:
- isert_conn_free_fastreg_pool(isert_conn);
- return ret;
-}
-
static void
isert_init_conn(struct isert_conn *isert_conn)
{
init_completion(&isert_conn->login_req_comp);
kref_init(&isert_conn->kref);
mutex_init(&isert_conn->mutex);
- spin_lock_init(&isert_conn->pool_lock);
- INIT_LIST_HEAD(&isert_conn->fr_pool);
INIT_WORK(&isert_conn->release_work, isert_release_work);
}
BUG_ON(!device);
- if (device->use_fastreg)
- isert_conn_free_fastreg_pool(isert_conn);
-
isert_free_rx_descriptors(isert_conn);
if (isert_conn->cm_id)
rdma_destroy_id(isert_conn->cm_id);
{
struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc;
- isert_cmd->iser_ib_op = ISER_IB_SEND;
tx_desc->tx_cqe.done = isert_send_done;
send_wr->wr_cqe = &tx_desc->tx_cqe;
}
if (!login->login_failed) {
if (login->login_complete) {
- if (!conn->sess->sess_ops->SessionType &&
- isert_conn->device->use_fastreg) {
- ret = isert_conn_create_fastreg_pool(isert_conn);
- if (ret) {
- isert_err("Conn: %p failed to create"
- " fastreg pool\n", isert_conn);
- return ret;
- }
- }
-
ret = isert_alloc_rx_descriptors(isert_conn);
if (ret)
return ret;
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
}
-static int
-isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
- struct scatterlist *sg, u32 nents, u32 length, u32 offset,
- enum iser_ib_op_code op, struct isert_data_buf *data)
-{
- struct ib_device *ib_dev = isert_conn->cm_id->device;
-
- data->dma_dir = op == ISER_IB_RDMA_WRITE ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE;
-
- data->len = length - offset;
- data->offset = offset;
- data->sg_off = data->offset / PAGE_SIZE;
-
- data->sg = &sg[data->sg_off];
- data->nents = min_t(unsigned int, nents - data->sg_off,
- ISCSI_ISER_SG_TABLESIZE);
- data->len = min_t(unsigned int, data->len, ISCSI_ISER_SG_TABLESIZE *
- PAGE_SIZE);
-
- data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents,
- data->dma_dir);
- if (unlikely(!data->dma_nents)) {
- isert_err("Cmd: unable to dma map SGs %p\n", sg);
- return -EINVAL;
- }
-
- isert_dbg("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
- isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
-
- return 0;
-}
-
static void
-isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data)
+isert_rdma_rw_ctx_destroy(struct isert_cmd *cmd, struct isert_conn *conn)
{
- struct ib_device *ib_dev = isert_conn->cm_id->device;
-
- ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir);
- memset(data, 0, sizeof(*data));
-}
-
-
-
-static void
-isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
-{
- isert_dbg("Cmd %p\n", isert_cmd);
+ struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd;
+ enum dma_data_direction dir = target_reverse_dma_direction(se_cmd);
- if (isert_cmd->data.sg) {
- isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
- isert_unmap_data_buf(isert_conn, &isert_cmd->data);
- }
-
- if (isert_cmd->rdma_wr) {
- isert_dbg("Cmd %p free send_wr\n", isert_cmd);
- kfree(isert_cmd->rdma_wr);
- isert_cmd->rdma_wr = NULL;
- }
-
- if (isert_cmd->ib_sge) {
- isert_dbg("Cmd %p free ib_sge\n", isert_cmd);
- kfree(isert_cmd->ib_sge);
- isert_cmd->ib_sge = NULL;
- }
-}
-
-static void
-isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
-{
- isert_dbg("Cmd %p\n", isert_cmd);
-
- if (isert_cmd->fr_desc) {
- isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, isert_cmd->fr_desc);
- if (isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
- isert_unmap_data_buf(isert_conn, &isert_cmd->prot);
- isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
- }
- spin_lock_bh(&isert_conn->pool_lock);
- list_add_tail(&isert_cmd->fr_desc->list, &isert_conn->fr_pool);
- spin_unlock_bh(&isert_conn->pool_lock);
- isert_cmd->fr_desc = NULL;
- }
+ if (!cmd->rw.nr_ops)
+ return;
- if (isert_cmd->data.sg) {
- isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
- isert_unmap_data_buf(isert_conn, &isert_cmd->data);
+ if (isert_prot_cmd(conn, se_cmd)) {
+ rdma_rw_ctx_destroy_signature(&cmd->rw, conn->qp,
+ conn->cm_id->port_num, se_cmd->t_data_sg,
+ se_cmd->t_data_nents, se_cmd->t_prot_sg,
+ se_cmd->t_prot_nents, dir);
+ } else {
+ rdma_rw_ctx_destroy(&cmd->rw, conn->qp, conn->cm_id->port_num,
+ se_cmd->t_data_sg, se_cmd->t_data_nents, dir);
}
- isert_cmd->ib_sge = NULL;
- isert_cmd->rdma_wr = NULL;
+ cmd->rw.nr_ops = 0;
}
static void
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct isert_conn *isert_conn = isert_cmd->conn;
struct iscsi_conn *conn = isert_conn->conn;
- struct isert_device *device = isert_conn->device;
struct iscsi_text_rsp *hdr;
isert_dbg("Cmd %p\n", isert_cmd);
}
}
- device->unreg_rdma_mem(isert_cmd, isert_conn);
+ isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
transport_generic_free_cmd(&cmd->se_cmd, 0);
break;
case ISCSI_OP_SCSI_TMFUNC:
isert_dbg("Cmd %p\n", isert_cmd);
- if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
- ret = isert_check_pi_status(cmd,
- isert_cmd->fr_desc->pi_ctx->sig_mr);
- isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
- }
+ ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr);
+ isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
- device->unreg_rdma_mem(isert_cmd, isert_conn);
- isert_cmd->rdma_wr_num = 0;
if (ret)
transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0);
else
isert_dbg("Cmd %p\n", isert_cmd);
- if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
- ret = isert_check_pi_status(se_cmd,
- isert_cmd->fr_desc->pi_ctx->sig_mr);
- isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
- }
-
iscsit_stop_dataout_timer(cmd);
- device->unreg_rdma_mem(isert_cmd, isert_conn);
- cmd->write_data_done = isert_cmd->data.len;
- isert_cmd->rdma_wr_num = 0;
+
+ if (isert_prot_cmd(isert_conn, se_cmd))
+ ret = isert_check_pi_status(se_cmd, isert_cmd->rw.sig->sig_mr);
+ isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
+ cmd->write_data_done = 0;
isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
spin_lock_bh(&cmd->istate_lock);
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
- struct isert_device *device = isert_conn->device;
spin_lock_bh(&conn->cmd_lock);
if (!list_empty(&cmd->i_conn_node))
if (cmd->data_direction == DMA_TO_DEVICE)
iscsit_stop_dataout_timer(cmd);
-
- device->unreg_rdma_mem(isert_cmd, isert_conn);
+ isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
}
static enum target_prot_op
return isert_post_response(isert_conn, isert_cmd);
}
-static int
-isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
- struct ib_sge *ib_sge, struct ib_rdma_wr *rdma_wr,
- u32 data_left, u32 offset)
-{
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
- struct scatterlist *sg_start, *tmp_sg;
- struct isert_device *device = isert_conn->device;
- struct ib_device *ib_dev = device->ib_device;
- u32 sg_off, page_off;
- int i = 0, sg_nents;
-
- sg_off = offset / PAGE_SIZE;
- sg_start = &cmd->se_cmd.t_data_sg[sg_off];
- sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge);
- page_off = offset % PAGE_SIZE;
-
- rdma_wr->wr.sg_list = ib_sge;
- rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe;
-
- /*
- * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
- */
- for_each_sg(sg_start, tmp_sg, sg_nents, i) {
- isert_dbg("RDMA from SGL dma_addr: 0x%llx dma_len: %u, "
- "page_off: %u\n",
- (unsigned long long)tmp_sg->dma_address,
- tmp_sg->length, page_off);
-
- ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
- ib_sge->length = min_t(u32, data_left,
- ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
- ib_sge->lkey = device->pd->local_dma_lkey;
-
- isert_dbg("RDMA ib_sge: addr: 0x%llx length: %u lkey: %x\n",
- ib_sge->addr, ib_sge->length, ib_sge->lkey);
- page_off = 0;
- data_left -= ib_sge->length;
- if (!data_left)
- break;
- ib_sge++;
- isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
- }
-
- rdma_wr->wr.num_sge = ++i;
- isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
- rdma_wr->wr.sg_list, rdma_wr->wr.num_sge);
-
- return rdma_wr->wr.num_sge;
-}
-
-static int
-isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn)
-{
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
- struct se_cmd *se_cmd = &cmd->se_cmd;
- struct isert_conn *isert_conn = conn->context;
- struct isert_data_buf *data = &isert_cmd->data;
- struct ib_rdma_wr *rdma_wr;
- struct ib_sge *ib_sge;
- u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
- int ret = 0, i, ib_sge_cnt;
-
- offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ?
- cmd->write_data_done : 0;
- ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
- se_cmd->t_data_nents, se_cmd->data_length,
- offset, isert_cmd->iser_ib_op,
- &isert_cmd->data);
- if (ret)
- return ret;
-
- data_left = data->len;
- offset = data->offset;
-
- ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL);
- if (!ib_sge) {
- isert_warn("Unable to allocate ib_sge\n");
- ret = -ENOMEM;
- goto unmap_cmd;
- }
- isert_cmd->ib_sge = ib_sge;
-
- isert_cmd->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
- isert_cmd->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) *
- isert_cmd->rdma_wr_num, GFP_KERNEL);
- if (!isert_cmd->rdma_wr) {
- isert_dbg("Unable to allocate isert_cmd->rdma_wr\n");
- ret = -ENOMEM;
- goto unmap_cmd;
- }
-
- rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
-
- for (i = 0; i < isert_cmd->rdma_wr_num; i++) {
- rdma_wr = &isert_cmd->rdma_wr[i];
- data_len = min(data_left, rdma_write_max);
-
- rdma_wr->wr.send_flags = 0;
- if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) {
- isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
-
- rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
- rdma_wr->remote_addr = isert_cmd->read_va + offset;
- rdma_wr->rkey = isert_cmd->read_stag;
- if (i + 1 == isert_cmd->rdma_wr_num)
- rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr;
- else
- rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr;
- } else {
- isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
-
- rdma_wr->wr.opcode = IB_WR_RDMA_READ;
- rdma_wr->remote_addr = isert_cmd->write_va + va_offset;
- rdma_wr->rkey = isert_cmd->write_stag;
- if (i + 1 == isert_cmd->rdma_wr_num)
- rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
- else
- rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr;
- }
-
- ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
- rdma_wr, data_len, offset);
- ib_sge += ib_sge_cnt;
-
- offset += data_len;
- va_offset += data_len;
- data_left -= data_len;
- }
-
- return 0;
-unmap_cmd:
- isert_unmap_data_buf(isert_conn, data);
-
- return ret;
-}
-
-static inline void
-isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
-{
- u32 rkey;
-
- memset(inv_wr, 0, sizeof(*inv_wr));
- inv_wr->wr_cqe = NULL;
- inv_wr->opcode = IB_WR_LOCAL_INV;
- inv_wr->ex.invalidate_rkey = mr->rkey;
-
- /* Bump the key */
- rkey = ib_inc_rkey(mr->rkey);
- ib_update_fast_reg_key(mr, rkey);
-}
-
-static int
-isert_fast_reg_mr(struct isert_conn *isert_conn,
- struct fast_reg_descriptor *fr_desc,
- struct isert_data_buf *mem,
- enum isert_indicator ind,
- struct ib_sge *sge)
-{
- struct isert_device *device = isert_conn->device;
- struct ib_device *ib_dev = device->ib_device;
- struct ib_mr *mr;
- struct ib_reg_wr reg_wr;
- struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
- int ret, n;
-
- if (mem->dma_nents == 1) {
- sge->lkey = device->pd->local_dma_lkey;
- sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
- sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
- isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
- sge->addr, sge->length, sge->lkey);
- return 0;
- }
-
- if (ind == ISERT_DATA_KEY_VALID)
- /* Registering data buffer */
- mr = fr_desc->data_mr;
- else
- /* Registering protection buffer */
- mr = fr_desc->pi_ctx->prot_mr;
-
- if (!(fr_desc->ind & ind)) {
- isert_inv_rkey(&inv_wr, mr);
- wr = &inv_wr;
- }
-
- n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE);
- if (unlikely(n != mem->nents)) {
- isert_err("failed to map mr sg (%d/%d)\n",
- n, mem->nents);
- return n < 0 ? n : -EINVAL;
- }
-
- isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
- fr_desc, mem->nents, mem->offset);
-
- reg_wr.wr.next = NULL;
- reg_wr.wr.opcode = IB_WR_REG_MR;
- reg_wr.wr.wr_cqe = NULL;
- reg_wr.wr.send_flags = 0;
- reg_wr.wr.num_sge = 0;
- reg_wr.mr = mr;
- reg_wr.key = mr->lkey;
- reg_wr.access = IB_ACCESS_LOCAL_WRITE;
-
- if (!wr)
- wr = ®_wr.wr;
- else
- wr->next = ®_wr.wr;
-
- ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
- if (ret) {
- isert_err("fast registration failed, ret:%d\n", ret);
- return ret;
- }
- fr_desc->ind &= ~ind;
-
- sge->lkey = mr->lkey;
- sge->addr = mr->iova;
- sge->length = mr->length;
-
- isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
- sge->addr, sge->length, sge->lkey);
-
- return ret;
-}
-
static inline void
isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs,
struct ib_sig_domain *domain)
static int
isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
{
+ memset(sig_attrs, 0, sizeof(*sig_attrs));
+
switch (se_cmd->prot_op) {
case TARGET_PROT_DIN_INSERT:
case TARGET_PROT_DOUT_STRIP:
return -EINVAL;
}
+ sig_attrs->check_mask =
+ (se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) |
+ (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
+ (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
return 0;
}
-static inline u8
-isert_set_prot_checks(u8 prot_checks)
-{
- return (prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) |
- (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
- (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
-}
-
-static int
-isert_reg_sig_mr(struct isert_conn *isert_conn,
- struct isert_cmd *isert_cmd,
- struct fast_reg_descriptor *fr_desc)
-{
- struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
- struct ib_sig_handover_wr sig_wr;
- struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
- struct pi_context *pi_ctx = fr_desc->pi_ctx;
- struct ib_sig_attrs sig_attrs;
- int ret;
-
- memset(&sig_attrs, 0, sizeof(sig_attrs));
- ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
- if (ret)
- goto err;
-
- sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks);
-
- if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) {
- isert_inv_rkey(&inv_wr, pi_ctx->sig_mr);
- wr = &inv_wr;
- }
-
- memset(&sig_wr, 0, sizeof(sig_wr));
- sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
- sig_wr.wr.wr_cqe = NULL;
- sig_wr.wr.sg_list = &isert_cmd->ib_sg[DATA];
- sig_wr.wr.num_sge = 1;
- sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
- sig_wr.sig_attrs = &sig_attrs;
- sig_wr.sig_mr = pi_ctx->sig_mr;
- if (se_cmd->t_prot_sg)
- sig_wr.prot = &isert_cmd->ib_sg[PROT];
-
- if (!wr)
- wr = &sig_wr.wr;
- else
- wr->next = &sig_wr.wr;
-
- ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
- if (ret) {
- isert_err("fast registration failed, ret:%d\n", ret);
- goto err;
- }
- fr_desc->ind &= ~ISERT_SIG_KEY_VALID;
-
- isert_cmd->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey;
- isert_cmd->ib_sg[SIG].addr = 0;
- isert_cmd->ib_sg[SIG].length = se_cmd->data_length;
- if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP &&
- se_cmd->prot_op != TARGET_PROT_DOUT_INSERT)
- /*
- * We have protection guards on the wire
- * so we need to set a larget transfer
- */
- isert_cmd->ib_sg[SIG].length += se_cmd->prot_length;
-
- isert_dbg("sig_sge: addr: 0x%llx length: %u lkey: %x\n",
- isert_cmd->ib_sg[SIG].addr, isert_cmd->ib_sg[SIG].length,
- isert_cmd->ib_sg[SIG].lkey);
-err:
- return ret;
-}
-
static int
-isert_handle_prot_cmd(struct isert_conn *isert_conn,
- struct isert_cmd *isert_cmd)
-{
- struct isert_device *device = isert_conn->device;
- struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
+isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn,
+ struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
+{
+ struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd;
+ enum dma_data_direction dir = target_reverse_dma_direction(se_cmd);
+ u8 port_num = conn->cm_id->port_num;
+ u64 addr;
+ u32 rkey, offset;
int ret;
- if (!isert_cmd->fr_desc->pi_ctx) {
- ret = isert_create_pi_ctx(isert_cmd->fr_desc,
- device->ib_device,
- device->pd);
- if (ret) {
- isert_err("conn %p failed to allocate pi_ctx\n",
- isert_conn);
- return ret;
- }
- }
-
- if (se_cmd->t_prot_sg) {
- ret = isert_map_data_buf(isert_conn, isert_cmd,
- se_cmd->t_prot_sg,
- se_cmd->t_prot_nents,
- se_cmd->prot_length,
- 0,
- isert_cmd->iser_ib_op,
- &isert_cmd->prot);
- if (ret) {
- isert_err("conn %p failed to map protection buffer\n",
- isert_conn);
- return ret;
- }
-
- memset(&isert_cmd->ib_sg[PROT], 0, sizeof(isert_cmd->ib_sg[PROT]));
- ret = isert_fast_reg_mr(isert_conn, isert_cmd->fr_desc,
- &isert_cmd->prot,
- ISERT_PROT_KEY_VALID,
- &isert_cmd->ib_sg[PROT]);
- if (ret) {
- isert_err("conn %p failed to fast reg mr\n",
- isert_conn);
- goto unmap_prot_cmd;
- }
- }
-
- ret = isert_reg_sig_mr(isert_conn, isert_cmd, isert_cmd->fr_desc);
- if (ret) {
- isert_err("conn %p failed to fast reg mr\n",
- isert_conn);
- goto unmap_prot_cmd;
- }
- isert_cmd->fr_desc->ind |= ISERT_PROTECTED;
-
- return 0;
-
-unmap_prot_cmd:
- if (se_cmd->t_prot_sg)
- isert_unmap_data_buf(isert_conn, &isert_cmd->prot);
-
- return ret;
-}
-
-static int
-isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn)
-{
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
- struct se_cmd *se_cmd = &cmd->se_cmd;
- struct isert_conn *isert_conn = conn->context;
- struct fast_reg_descriptor *fr_desc = NULL;
- struct ib_rdma_wr *rdma_wr;
- struct ib_sge *ib_sg;
- u32 offset;
- int ret = 0;
- unsigned long flags;
-
- offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ?
- cmd->write_data_done : 0;
- ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
- se_cmd->t_data_nents, se_cmd->data_length,
- offset, isert_cmd->iser_ib_op,
- &isert_cmd->data);
- if (ret)
- return ret;
-
- if (isert_cmd->data.dma_nents != 1 ||
- isert_prot_cmd(isert_conn, se_cmd)) {
- spin_lock_irqsave(&isert_conn->pool_lock, flags);
- fr_desc = list_first_entry(&isert_conn->fr_pool,
- struct fast_reg_descriptor, list);
- list_del(&fr_desc->list);
- spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
- isert_cmd->fr_desc = fr_desc;
- }
-
- ret = isert_fast_reg_mr(isert_conn, fr_desc, &isert_cmd->data,
- ISERT_DATA_KEY_VALID, &isert_cmd->ib_sg[DATA]);
- if (ret)
- goto unmap_cmd;
-
- if (isert_prot_cmd(isert_conn, se_cmd)) {
- ret = isert_handle_prot_cmd(isert_conn, isert_cmd);
- if (ret)
- goto unmap_cmd;
-
- ib_sg = &isert_cmd->ib_sg[SIG];
+ if (dir == DMA_FROM_DEVICE) {
+ addr = cmd->write_va;
+ rkey = cmd->write_stag;
+ offset = cmd->iscsi_cmd->write_data_done;
} else {
- ib_sg = &isert_cmd->ib_sg[DATA];
+ addr = cmd->read_va;
+ rkey = cmd->read_stag;
+ offset = 0;
}
- memcpy(&isert_cmd->s_ib_sge, ib_sg, sizeof(*ib_sg));
- isert_cmd->ib_sge = &isert_cmd->s_ib_sge;
- isert_cmd->rdma_wr_num = 1;
- memset(&isert_cmd->s_rdma_wr, 0, sizeof(isert_cmd->s_rdma_wr));
- isert_cmd->rdma_wr = &isert_cmd->s_rdma_wr;
+ if (isert_prot_cmd(conn, se_cmd)) {
+ struct ib_sig_attrs sig_attrs;
- rdma_wr = &isert_cmd->s_rdma_wr;
- rdma_wr->wr.sg_list = &isert_cmd->s_ib_sge;
- rdma_wr->wr.num_sge = 1;
- rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe;
- if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) {
- isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
+ ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
+ if (ret)
+ return ret;
- rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
- rdma_wr->remote_addr = isert_cmd->read_va;
- rdma_wr->rkey = isert_cmd->read_stag;
- rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
- 0 : IB_SEND_SIGNALED;
+ WARN_ON_ONCE(offset);
+ ret = rdma_rw_ctx_signature_init(&cmd->rw, conn->qp, port_num,
+ se_cmd->t_data_sg, se_cmd->t_data_nents,
+ se_cmd->t_prot_sg, se_cmd->t_prot_nents,
+ &sig_attrs, addr, rkey, dir);
} else {
- isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
-
- rdma_wr->wr.opcode = IB_WR_RDMA_READ;
- rdma_wr->remote_addr = isert_cmd->write_va;
- rdma_wr->rkey = isert_cmd->write_stag;
- rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
+ ret = rdma_rw_ctx_init(&cmd->rw, conn->qp, port_num,
+ se_cmd->t_data_sg, se_cmd->t_data_nents,
+ offset, addr, rkey, dir);
}
-
- return 0;
-
-unmap_cmd:
- if (fr_desc) {
- spin_lock_irqsave(&isert_conn->pool_lock, flags);
- list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
- spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
+ if (ret < 0) {
+ isert_err("Cmd: %p failed to prepare RDMA res\n", cmd);
+ return ret;
}
- isert_unmap_data_buf(isert_conn, &isert_cmd->data);
+ ret = rdma_rw_ctx_post(&cmd->rw, conn->qp, port_num, cqe, chain_wr);
+ if (ret < 0)
+ isert_err("Cmd: %p failed to post RDMA res\n", cmd);
return ret;
}
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
- struct isert_device *device = isert_conn->device;
- struct ib_send_wr *wr_failed;
+ struct ib_cqe *cqe = NULL;
+ struct ib_send_wr *chain_wr = NULL;
int rc;
isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n",
isert_cmd, se_cmd->data_length);
- isert_cmd->iser_ib_op = ISER_IB_RDMA_WRITE;
- rc = device->reg_rdma_mem(isert_cmd, conn);
- if (rc) {
- isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
- return rc;
- }
-
- if (!isert_prot_cmd(isert_conn, se_cmd)) {
+ if (isert_prot_cmd(isert_conn, se_cmd)) {
+ isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
+ cqe = &isert_cmd->tx_desc.tx_cqe;
+ } else {
/*
* Build isert_conn->tx_desc for iSCSI response PDU and attach
*/
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
isert_init_send_wr(isert_conn, isert_cmd,
&isert_cmd->tx_desc.send_wr);
- isert_cmd->s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr;
- isert_cmd->rdma_wr_num += 1;
rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
if (rc) {
isert_err("ib_post_recv failed with %d\n", rc);
return rc;
}
- }
- rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed);
- if (rc)
- isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
-
- if (!isert_prot_cmd(isert_conn, se_cmd))
- isert_dbg("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
- "READ\n", isert_cmd);
- else
- isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
- isert_cmd);
+ chain_wr = &isert_cmd->tx_desc.send_wr;
+ }
+ isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
+ isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd);
return 1;
}
static int
isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
{
- struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = conn->context;
- struct isert_device *device = isert_conn->device;
- struct ib_send_wr *wr_failed;
- int rc;
isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
- isert_cmd, se_cmd->data_length, cmd->write_data_done);
- isert_cmd->iser_ib_op = ISER_IB_RDMA_READ;
- rc = device->reg_rdma_mem(isert_cmd, conn);
- if (rc) {
- isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
- return rc;
- }
+ isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done);
- rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed);
- if (rc)
- isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
+ isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
+ isert_rdma_rw_ctx_post(isert_cmd, conn->context,
+ &isert_cmd->tx_desc.tx_cqe, NULL);
isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
isert_cmd);
-
return 0;
}
#include <linux/in6.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
+#include <rdma/rw.h>
#include <scsi/iser.h>
#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
-#define ISERT_INFLIGHT_DATAOUTS 8
-
-#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
- (1 + ISERT_INFLIGHT_DATAOUTS) + \
+#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \
ISERT_MAX_TX_MISC_PDUS + \
ISERT_MAX_RX_MISC_PDUS)
ISCSI_TX_DATAIN
};
-enum iser_ib_op_code {
- ISER_IB_RECV,
- ISER_IB_SEND,
- ISER_IB_RDMA_WRITE,
- ISER_IB_RDMA_READ,
-};
-
enum iser_conn_state {
ISER_CONN_INIT,
ISER_CONN_UP,
return container_of(cqe, struct iser_tx_desc, tx_cqe);
}
-
-enum isert_indicator {
- ISERT_PROTECTED = 1 << 0,
- ISERT_DATA_KEY_VALID = 1 << 1,
- ISERT_PROT_KEY_VALID = 1 << 2,
- ISERT_SIG_KEY_VALID = 1 << 3,
-};
-
-struct pi_context {
- struct ib_mr *prot_mr;
- struct ib_mr *sig_mr;
-};
-
-struct fast_reg_descriptor {
- struct list_head list;
- struct ib_mr *data_mr;
- u8 ind;
- struct pi_context *pi_ctx;
-};
-
-struct isert_data_buf {
- struct scatterlist *sg;
- int nents;
- u32 sg_off;
- u32 len; /* cur_rdma_length */
- u32 offset;
- unsigned int dma_nents;
- enum dma_data_direction dma_dir;
-};
-
-enum {
- DATA = 0,
- PROT = 1,
- SIG = 2,
-};
-
struct isert_cmd {
uint32_t read_stag;
uint32_t write_stag;
struct iscsi_cmd *iscsi_cmd;
struct iser_tx_desc tx_desc;
struct iser_rx_desc *rx_desc;
- enum iser_ib_op_code iser_ib_op;
- struct ib_sge *ib_sge;
- struct ib_sge s_ib_sge;
- int rdma_wr_num;
- struct ib_rdma_wr *rdma_wr;
- struct ib_rdma_wr s_rdma_wr;
- struct ib_sge ib_sg[3];
- struct isert_data_buf data;
- struct isert_data_buf prot;
- struct fast_reg_descriptor *fr_desc;
+ struct rdma_rw_ctx rw;
struct work_struct comp_work;
struct scatterlist sg;
};
struct isert_device *device;
struct mutex mutex;
struct kref kref;
- struct list_head fr_pool;
- int fr_pool_size;
- /* lock to protect fastreg pool */
- spinlock_t pool_lock;
struct work_struct release_work;
bool logout_posted;
bool snd_w_inv;
};
struct isert_device {
- int use_fastreg;
bool pi_capable;
int refcount;
struct ib_device *ib_device;
struct isert_comp *comps;
int comps_used;
struct list_head dev_node;
- int (*reg_rdma_mem)(struct isert_cmd *isert_cmd,
- struct iscsi_conn *conn);
- void (*unreg_rdma_mem)(struct isert_cmd *isert_cmd,
- struct isert_conn *isert_conn);
};
struct isert_np {
static bool allow_ext_sg;
static bool prefer_fr = true;
static bool register_always = true;
+static bool never_register;
static int topspin_workarounds = 1;
module_param(srp_sg_tablesize, uint, 0444);
MODULE_PARM_DESC(register_always,
"Use memory registration even for contiguous memory regions");
+module_param(never_register, bool, 0444);
+MODULE_PARM_DESC(never_register, "Never register memory");
+
static const struct kernel_param_ops srp_tmo_ops;
static int srp_reconnect_delay = 10;
struct ib_fmr_pool_param fmr_param;
memset(&fmr_param, 0, sizeof(fmr_param));
- fmr_param.pool_size = target->scsi_host->can_queue;
+ fmr_param.pool_size = target->mr_pool_size;
fmr_param.dirty_watermark = fmr_param.pool_size / 4;
fmr_param.cache = 1;
fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
{
struct srp_device *dev = target->srp_host->srp_dev;
- return srp_create_fr_pool(dev->dev, dev->pd,
- target->scsi_host->can_queue,
+ return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
dev->max_pages_per_mr);
}
/**
* srp_destroy_qp() - destroy an RDMA queue pair
- * @ch: SRP RDMA channel.
+ * @qp: RDMA queue pair.
*
* Drain the qp before destroying it. This avoids that the receive
* completion handler can access the queue pair while it is
* being destroyed.
*/
-static void srp_destroy_qp(struct srp_rdma_ch *ch)
+static void srp_destroy_qp(struct ib_qp *qp)
{
- ib_drain_rq(ch->qp);
- ib_destroy_qp(ch->qp);
+ ib_drain_rq(qp);
+ ib_destroy_qp(qp);
}
static int srp_create_ch_ib(struct srp_rdma_ch *ch)
struct ib_qp *qp;
struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
- const int m = dev->use_fast_reg ? 3 : 1;
+ const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
}
if (ch->qp)
- srp_destroy_qp(ch);
+ srp_destroy_qp(ch->qp);
if (ch->recv_cq)
ib_free_cq(ch->recv_cq);
if (ch->send_cq)
return 0;
err_qp:
- srp_destroy_qp(ch);
+ srp_destroy_qp(qp);
err_send_cq:
ib_free_cq(send_cq);
ib_destroy_fmr_pool(ch->fmr_pool);
}
- srp_destroy_qp(ch);
+ srp_destroy_qp(ch->qp);
ib_free_cq(ch->send_cq);
ib_free_cq(ch->recv_cq);
for (i = 0; i < target->req_ring_size; ++i) {
req = &ch->req_ring[i];
- mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
+ mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
GFP_KERNEL);
if (!mr_list)
goto out;
}
/**
- * srp_free_req() - Unmap data and add request to the free request list.
+ * srp_free_req() - Unmap data and adjust ch->req_lim.
* @ch: SRP RDMA channel.
* @req: Request to be freed.
* @scmnd: SCSI command associated with @req.
srp_handle_qp_err(cq, wc, "FAST REG");
}
+/*
+ * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
+ * where to start in the first element. If sg_offset_p != NULL then
+ * *sg_offset_p is updated to the offset in state->sg[retval] of the first
+ * byte that has not yet been mapped.
+ */
static int srp_map_finish_fr(struct srp_map_state *state,
struct srp_request *req,
- struct srp_rdma_ch *ch, int sg_nents)
+ struct srp_rdma_ch *ch, int sg_nents,
+ unsigned int *sg_offset_p)
{
struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
WARN_ON_ONCE(!dev->use_fast_reg);
- if (sg_nents == 0)
- return 0;
-
if (sg_nents == 1 && target->global_mr) {
- srp_map_desc(state, sg_dma_address(state->sg),
- sg_dma_len(state->sg),
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
+
+ srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
+ sg_dma_len(state->sg) - sg_offset,
target->global_mr->rkey);
+ if (sg_offset_p)
+ *sg_offset_p = 0;
return 1;
}
rkey = ib_inc_rkey(desc->mr->rkey);
ib_update_fast_reg_key(desc->mr, rkey);
- n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
- if (unlikely(n < 0))
+ n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
+ dev->mr_page_size);
+ if (unlikely(n < 0)) {
+ srp_fr_pool_put(ch->fr_pool, &desc, 1);
+ pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
+ dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
+ sg_offset_p ? *sg_offset_p : -1, n);
return n;
+ }
+
+ WARN_ON_ONCE(desc->mr->length == 0);
req->reg_cqe.done = srp_reg_mr_err_done;
desc->mr->length, desc->mr->rkey);
err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
- if (unlikely(err))
+ if (unlikely(err)) {
+ WARN_ON_ONCE(err == -ENOMEM);
return err;
+ }
return n;
}
/*
* If the last entry of the MR wasn't a full page, then we need to
* close it out and start a new one -- we can only merge at page
- * boundries.
+ * boundaries.
*/
ret = 0;
if (len != dev->mr_page_size)
struct scatterlist *sg;
int i, ret;
- state->desc = req->indirect_desc;
state->pages = req->map_page;
state->fmr.next = req->fmr_list;
- state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
+ state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
for_each_sg(scat, sg, count, i) {
ret = srp_map_sg_entry(state, ch, sg, i);
if (ret)
return ret;
- req->nmdesc = state->nmdesc;
-
return 0;
}
struct srp_request *req, struct scatterlist *scat,
int count)
{
+ unsigned int sg_offset = 0;
+
state->desc = req->indirect_desc;
state->fr.next = req->fr_list;
- state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
+ state->fr.end = req->fr_list + ch->target->mr_per_cmd;
state->sg = scat;
+ if (count == 0)
+ return 0;
+
while (count) {
int i, n;
- n = srp_map_finish_fr(state, req, ch, count);
+ n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
if (unlikely(n < 0))
return n;
state->sg = sg_next(state->sg);
}
- req->nmdesc = state->nmdesc;
-
return 0;
}
target->global_mr->rkey);
}
- req->nmdesc = state->nmdesc;
-
return 0;
}
if (dev->use_fast_reg) {
state.sg = idb_sg;
- sg_set_buf(idb_sg, req->indirect_desc, idb_len);
+ sg_init_one(idb_sg, req->indirect_desc, idb_len);
idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
#ifdef CONFIG_NEED_SG_DMA_LENGTH
idb_sg->dma_length = idb_sg->length; /* hack^2 */
#endif
- ret = srp_map_finish_fr(&state, req, ch, 1);
+ ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
if (ret < 0)
return ret;
+ WARN_ON_ONCE(ret < 1);
} else if (dev->use_fmr) {
state.pages = idb_pages;
state.pages[0] = (req->indirect_dma_addr &
return 0;
}
+#if defined(DYNAMIC_DATA_DEBUG)
+static void srp_check_mapping(struct srp_map_state *state,
+ struct srp_rdma_ch *ch, struct srp_request *req,
+ struct scatterlist *scat, int count)
+{
+ struct srp_device *dev = ch->target->srp_host->srp_dev;
+ struct srp_fr_desc **pfr;
+ u64 desc_len = 0, mr_len = 0;
+ int i;
+
+ for (i = 0; i < state->ndesc; i++)
+ desc_len += be32_to_cpu(req->indirect_desc[i].len);
+ if (dev->use_fast_reg)
+ for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
+ mr_len += (*pfr)->mr->length;
+ else if (dev->use_fmr)
+ for (i = 0; i < state->nmdesc; i++)
+ mr_len += be32_to_cpu(req->indirect_desc[i].len);
+ if (desc_len != scsi_bufflen(req->scmnd) ||
+ mr_len > scsi_bufflen(req->scmnd))
+ pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
+ scsi_bufflen(req->scmnd), desc_len, mr_len,
+ state->ndesc, state->nmdesc);
+}
+#endif
+
+/**
+ * srp_map_data() - map SCSI data buffer onto an SRP request
+ * @scmnd: SCSI command to map
+ * @ch: SRP RDMA channel
+ * @req: SRP request
+ *
+ * Returns the length in bytes of the SRP_CMD IU or a negative value if
+ * mapping failed.
+ */
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
struct srp_request *req)
{
memset(&state, 0, sizeof(state));
if (dev->use_fast_reg)
- srp_map_sg_fr(&state, ch, req, scat, count);
+ ret = srp_map_sg_fr(&state, ch, req, scat, count);
else if (dev->use_fmr)
- srp_map_sg_fmr(&state, ch, req, scat, count);
+ ret = srp_map_sg_fmr(&state, ch, req, scat, count);
else
- srp_map_sg_dma(&state, ch, req, scat, count);
+ ret = srp_map_sg_dma(&state, ch, req, scat, count);
+ req->nmdesc = state.nmdesc;
+ if (ret < 0)
+ goto unmap;
+
+#if defined(DYNAMIC_DEBUG)
+ {
+ DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
+ "Memory mapping consistency check");
+ if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
+ srp_check_mapping(&state, ch, req, scat, count);
+ }
+#endif
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
!target->allow_ext_sg)) {
shost_printk(KERN_ERR, target->scsi_host,
"Could not fit S/G list into SRP_CMD\n");
- return -EIO;
+ ret = -EIO;
+ goto unmap;
}
count = min(state.ndesc, target->cmd_sg_cnt);
ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
idb_len, &idb_rkey);
if (ret < 0)
- return ret;
+ goto unmap;
req->nmdesc++;
} else {
idb_rkey = cpu_to_be32(target->global_mr->rkey);
cmd->buf_fmt = fmt;
return len;
+
+unmap:
+ srp_unmap_data(scmnd, ch, req);
+ if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
+ ret = -E2BIG;
+ return ret;
}
/*
return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
}
+static int srp_slave_alloc(struct scsi_device *sdev)
+{
+ struct Scsi_Host *shost = sdev->host;
+ struct srp_target_port *target = host_to_target(shost);
+ struct srp_device *srp_dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
+
+ if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ blk_queue_virt_boundary(sdev->request_queue,
+ ~srp_dev->mr_page_mask);
+
+ return 0;
+}
+
static int srp_slave_configure(struct scsi_device *sdev)
{
struct Scsi_Host *shost = sdev->host;
.module = THIS_MODULE,
.name = "InfiniBand SRP initiator",
.proc_name = DRV_NAME,
+ .slave_alloc = srp_slave_alloc,
.slave_configure = srp_slave_configure,
.info = srp_target_info,
.queuecommand = srp_queuecommand,
goto out;
}
- pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
+ pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
dev_name(&target->scsi_host->shost_gendev),
srp_sdev_count(target->scsi_host));
struct srp_device *srp_dev = host->srp_dev;
struct ib_device *ibdev = srp_dev->dev;
int ret, node_idx, node, cpu, i;
+ unsigned int max_sectors_per_mr, mr_per_cmd = 0;
bool multich = false;
target_host = scsi_host_alloc(&srp_template,
target->sg_tablesize = target->cmd_sg_cnt;
}
+ if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
+ /*
+ * FR and FMR can only map one HCA page per entry. If the
+ * start address is not aligned on a HCA page boundary two
+ * entries will be used for the head and the tail although
+ * these two entries combined contain at most one HCA page of
+ * data. Hence the "+ 1" in the calculation below.
+ *
+ * The indirect data buffer descriptor is contiguous so the
+ * memory for that buffer will only be registered if
+ * register_always is true. Hence add one to mr_per_cmd if
+ * register_always has been set.
+ */
+ max_sectors_per_mr = srp_dev->max_pages_per_mr <<
+ (ilog2(srp_dev->mr_page_size) - 9);
+ mr_per_cmd = register_always +
+ (target->scsi_host->max_sectors + 1 +
+ max_sectors_per_mr - 1) / max_sectors_per_mr;
+ pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
+ target->scsi_host->max_sectors,
+ srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
+ max_sectors_per_mr, mr_per_cmd);
+ }
+
target_host->sg_tablesize = target->sg_tablesize;
+ target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
+ target->mr_per_cmd = mr_per_cmd;
target->indirect_size = target->sg_tablesize *
sizeof (struct srp_direct_buf);
target->max_iu_len = sizeof (struct srp_cmd) +
if (!srp_dev)
return;
- srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
- device->map_phys_fmr && device->unmap_fmr);
- srp_dev->has_fr = (device->attrs.device_cap_flags &
- IB_DEVICE_MEM_MGT_EXTENSIONS);
- if (!srp_dev->has_fmr && !srp_dev->has_fr)
- dev_warn(&device->dev, "neither FMR nor FR is supported\n");
-
- srp_dev->use_fast_reg = (srp_dev->has_fr &&
- (!srp_dev->has_fmr || prefer_fr));
- srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
-
/*
* Use the smallest page size supported by the HCA, down to a
* minimum of 4096 bytes. We're unlikely to build large sglists
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
max_pages_per_mr = device->attrs.max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
+ pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
+ device->attrs.max_mr_size, srp_dev->mr_page_size,
+ max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
+
+ srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+ device->map_phys_fmr && device->unmap_fmr);
+ srp_dev->has_fr = (device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
+ dev_warn(&device->dev, "neither FMR nor FR is supported\n");
+ } else if (!never_register &&
+ device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
+ srp_dev->use_fast_reg = (srp_dev->has_fr &&
+ (!srp_dev->has_fmr || prefer_fr));
+ srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
+ }
+
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
if (IS_ERR(srp_dev->pd))
goto free_dev;
- if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
+ if (never_register || !register_always ||
+ (!srp_dev->has_fmr && !srp_dev->has_fr)) {
srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
char target_name[32];
unsigned int scsi_id;
unsigned int sg_tablesize;
+ int mr_pool_size;
+ int mr_per_cmd;
int queue_size;
int req_ring_size;
int comp_vector;
return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
}
-/**
- * srpt_post_send() - Post an IB send request.
- *
- * Returns zero upon success and a non-zero value upon failure.
- */
-static int srpt_post_send(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx, int len)
-{
- struct ib_sge list;
- struct ib_send_wr wr, *bad_wr;
- struct srpt_device *sdev = ch->sport->sdev;
- int ret;
-
- atomic_inc(&ch->req_lim);
-
- ret = -ENOMEM;
- if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
- pr_warn("IB send queue full (needed 1)\n");
- goto out;
- }
-
- ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len,
- DMA_TO_DEVICE);
-
- list.addr = ioctx->ioctx.dma;
- list.length = len;
- list.lkey = sdev->pd->local_dma_lkey;
-
- ioctx->ioctx.cqe.done = srpt_send_done;
- wr.next = NULL;
- wr.wr_cqe = &ioctx->ioctx.cqe;
- wr.sg_list = &list;
- wr.num_sge = 1;
- wr.opcode = IB_WR_SEND;
- wr.send_flags = IB_SEND_SIGNALED;
-
- ret = ib_post_send(ch->qp, &wr, &bad_wr);
-
-out:
- if (ret < 0) {
- atomic_inc(&ch->sq_wr_avail);
- atomic_dec(&ch->req_lim);
- }
- return ret;
-}
-
/**
* srpt_zerolength_write() - Perform a zero-length RDMA write.
*
}
}
+static int srpt_alloc_rw_ctxs(struct srpt_send_ioctx *ioctx,
+ struct srp_direct_buf *db, int nbufs, struct scatterlist **sg,
+ unsigned *sg_cnt)
+{
+ enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd);
+ struct srpt_rdma_ch *ch = ioctx->ch;
+ struct scatterlist *prev = NULL;
+ unsigned prev_nents;
+ int ret, i;
+
+ if (nbufs == 1) {
+ ioctx->rw_ctxs = &ioctx->s_rw_ctx;
+ } else {
+ ioctx->rw_ctxs = kmalloc_array(nbufs, sizeof(*ioctx->rw_ctxs),
+ GFP_KERNEL);
+ if (!ioctx->rw_ctxs)
+ return -ENOMEM;
+ }
+
+ for (i = ioctx->n_rw_ctx; i < nbufs; i++, db++) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+ u64 remote_addr = be64_to_cpu(db->va);
+ u32 size = be32_to_cpu(db->len);
+ u32 rkey = be32_to_cpu(db->key);
+
+ ret = target_alloc_sgl(&ctx->sg, &ctx->nents, size, false,
+ i < nbufs - 1);
+ if (ret)
+ goto unwind;
+
+ ret = rdma_rw_ctx_init(&ctx->rw, ch->qp, ch->sport->port,
+ ctx->sg, ctx->nents, 0, remote_addr, rkey, dir);
+ if (ret < 0) {
+ target_free_sgl(ctx->sg, ctx->nents);
+ goto unwind;
+ }
+
+ ioctx->n_rdma += ret;
+ ioctx->n_rw_ctx++;
+
+ if (prev) {
+ sg_unmark_end(&prev[prev_nents - 1]);
+ sg_chain(prev, prev_nents + 1, ctx->sg);
+ } else {
+ *sg = ctx->sg;
+ }
+
+ prev = ctx->sg;
+ prev_nents = ctx->nents;
+
+ *sg_cnt += ctx->nents;
+ }
+
+ return 0;
+
+unwind:
+ while (--i >= 0) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+
+ rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port,
+ ctx->sg, ctx->nents, dir);
+ target_free_sgl(ctx->sg, ctx->nents);
+ }
+ if (ioctx->rw_ctxs != &ioctx->s_rw_ctx)
+ kfree(ioctx->rw_ctxs);
+ return ret;
+}
+
+static void srpt_free_rw_ctxs(struct srpt_rdma_ch *ch,
+ struct srpt_send_ioctx *ioctx)
+{
+ enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd);
+ int i;
+
+ for (i = 0; i < ioctx->n_rw_ctx; i++) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+
+ rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port,
+ ctx->sg, ctx->nents, dir);
+ target_free_sgl(ctx->sg, ctx->nents);
+ }
+
+ if (ioctx->rw_ctxs != &ioctx->s_rw_ctx)
+ kfree(ioctx->rw_ctxs);
+}
+
+static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd)
+{
+ /*
+ * The pointer computations below will only be compiled correctly
+ * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
+ * whether srp_cmd::add_data has been declared as a byte pointer.
+ */
+ BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) &&
+ !__same_type(srp_cmd->add_data[0], (u8)0));
+
+ /*
+ * According to the SRP spec, the lower two bits of the 'ADDITIONAL
+ * CDB LENGTH' field are reserved and the size in bytes of this field
+ * is four times the value specified in bits 3..7. Hence the "& ~3".
+ */
+ return srp_cmd->add_data + (srp_cmd->add_cdb_len & ~3);
+}
+
/**
* srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
* @ioctx: Pointer to the I/O context associated with the request.
* -ENOMEM when memory allocation fails and zero upon success.
*/
static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
- struct srp_cmd *srp_cmd,
- enum dma_data_direction *dir, u64 *data_len)
+ struct srp_cmd *srp_cmd, enum dma_data_direction *dir,
+ struct scatterlist **sg, unsigned *sg_cnt, u64 *data_len)
{
- struct srp_indirect_buf *idb;
- struct srp_direct_buf *db;
- unsigned add_cdb_offset;
- int ret;
-
- /*
- * The pointer computations below will only be compiled correctly
- * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
- * whether srp_cmd::add_data has been declared as a byte pointer.
- */
- BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
- && !__same_type(srp_cmd->add_data[0], (u8)0));
-
BUG_ON(!dir);
BUG_ON(!data_len);
- ret = 0;
- *data_len = 0;
-
/*
* The lower four bits of the buffer format field contain the DATA-IN
* buffer descriptor format, and the highest four bits contain the
* DATA-OUT buffer descriptor format.
*/
- *dir = DMA_NONE;
if (srp_cmd->buf_fmt & 0xf)
/* DATA-IN: transfer data from target to initiator (read). */
*dir = DMA_FROM_DEVICE;
else if (srp_cmd->buf_fmt >> 4)
/* DATA-OUT: transfer data from initiator to target (write). */
*dir = DMA_TO_DEVICE;
+ else
+ *dir = DMA_NONE;
+
+ /* initialize data_direction early as srpt_alloc_rw_ctxs needs it */
+ ioctx->cmd.data_direction = *dir;
- /*
- * According to the SRP spec, the lower two bits of the 'ADDITIONAL
- * CDB LENGTH' field are reserved and the size in bytes of this field
- * is four times the value specified in bits 3..7. Hence the "& ~3".
- */
- add_cdb_offset = srp_cmd->add_cdb_len & ~3;
if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
- ioctx->n_rbuf = 1;
- ioctx->rbufs = &ioctx->single_rbuf;
+ struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd);
- db = (struct srp_direct_buf *)(srp_cmd->add_data
- + add_cdb_offset);
- memcpy(ioctx->rbufs, db, sizeof(*db));
*data_len = be32_to_cpu(db->len);
+ return srpt_alloc_rw_ctxs(ioctx, db, 1, sg, sg_cnt);
} else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
- idb = (struct srp_indirect_buf *)(srp_cmd->add_data
- + add_cdb_offset);
+ struct srp_indirect_buf *idb = srpt_get_desc_buf(srp_cmd);
+ int nbufs = be32_to_cpu(idb->table_desc.len) /
+ sizeof(struct srp_direct_buf);
- ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db);
-
- if (ioctx->n_rbuf >
+ if (nbufs >
(srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
pr_err("received unsupported SRP_CMD request"
" type (%u out + %u in != %u / %zu)\n",
srp_cmd->data_out_desc_cnt,
srp_cmd->data_in_desc_cnt,
be32_to_cpu(idb->table_desc.len),
- sizeof(*db));
- ioctx->n_rbuf = 0;
- ret = -EINVAL;
- goto out;
- }
-
- if (ioctx->n_rbuf == 1)
- ioctx->rbufs = &ioctx->single_rbuf;
- else {
- ioctx->rbufs =
- kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC);
- if (!ioctx->rbufs) {
- ioctx->n_rbuf = 0;
- ret = -ENOMEM;
- goto out;
- }
+ sizeof(struct srp_direct_buf));
+ return -EINVAL;
}
- db = idb->desc_list;
- memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db));
*data_len = be32_to_cpu(idb->len);
+ return srpt_alloc_rw_ctxs(ioctx, idb->desc_list, nbufs,
+ sg, sg_cnt);
+ } else {
+ *data_len = 0;
+ return 0;
}
-out:
- return ret;
}
/**
return ib_modify_qp(ch->qp, &qp_attr, IB_QP_STATE);
}
-/**
- * srpt_unmap_sg_to_ib_sge() - Unmap an IB SGE list.
- */
-static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- struct scatterlist *sg;
- enum dma_data_direction dir;
-
- BUG_ON(!ch);
- BUG_ON(!ioctx);
- BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
-
- while (ioctx->n_rdma)
- kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
-
- kfree(ioctx->rdma_wrs);
- ioctx->rdma_wrs = NULL;
-
- if (ioctx->mapped_sg_count) {
- sg = ioctx->sg;
- WARN_ON(!sg);
- dir = ioctx->cmd.data_direction;
- BUG_ON(dir == DMA_NONE);
- ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt,
- target_reverse_dma_direction(&ioctx->cmd));
- ioctx->mapped_sg_count = 0;
- }
-}
-
-/**
- * srpt_map_sg_to_ib_sge() - Map an SG list to an IB SGE list.
- */
-static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- struct ib_device *dev = ch->sport->sdev->device;
- struct se_cmd *cmd;
- struct scatterlist *sg, *sg_orig;
- int sg_cnt;
- enum dma_data_direction dir;
- struct ib_rdma_wr *riu;
- struct srp_direct_buf *db;
- dma_addr_t dma_addr;
- struct ib_sge *sge;
- u64 raddr;
- u32 rsize;
- u32 tsize;
- u32 dma_len;
- int count, nrdma;
- int i, j, k;
-
- BUG_ON(!ch);
- BUG_ON(!ioctx);
- cmd = &ioctx->cmd;
- dir = cmd->data_direction;
- BUG_ON(dir == DMA_NONE);
-
- ioctx->sg = sg = sg_orig = cmd->t_data_sg;
- ioctx->sg_cnt = sg_cnt = cmd->t_data_nents;
-
- count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt,
- target_reverse_dma_direction(cmd));
- if (unlikely(!count))
- return -EAGAIN;
-
- ioctx->mapped_sg_count = count;
-
- if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
- nrdma = ioctx->n_rdma_wrs;
- else {
- nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
- + ioctx->n_rbuf;
-
- ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
- GFP_KERNEL);
- if (!ioctx->rdma_wrs)
- goto free_mem;
-
- ioctx->n_rdma_wrs = nrdma;
- }
-
- db = ioctx->rbufs;
- tsize = cmd->data_length;
- dma_len = ib_sg_dma_len(dev, &sg[0]);
- riu = ioctx->rdma_wrs;
-
- /*
- * For each remote desc - calculate the #ib_sge.
- * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
- * each remote desc rdma_iu is required a rdma wr;
- * else
- * we need to allocate extra rdma_iu to carry extra #ib_sge in
- * another rdma wr
- */
- for (i = 0, j = 0;
- j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
- rsize = be32_to_cpu(db->len);
- raddr = be64_to_cpu(db->va);
- riu->remote_addr = raddr;
- riu->rkey = be32_to_cpu(db->key);
- riu->wr.num_sge = 0;
-
- /* calculate how many sge required for this remote_buf */
- while (rsize > 0 && tsize > 0) {
-
- if (rsize >= dma_len) {
- tsize -= dma_len;
- rsize -= dma_len;
- raddr += dma_len;
-
- if (tsize > 0) {
- ++j;
- if (j < count) {
- sg = sg_next(sg);
- dma_len = ib_sg_dma_len(
- dev, sg);
- }
- }
- } else {
- tsize -= rsize;
- dma_len -= rsize;
- rsize = 0;
- }
-
- ++riu->wr.num_sge;
-
- if (rsize > 0 &&
- riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
- ++ioctx->n_rdma;
- riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
- sizeof(*riu->wr.sg_list),
- GFP_KERNEL);
- if (!riu->wr.sg_list)
- goto free_mem;
-
- ++riu;
- riu->wr.num_sge = 0;
- riu->remote_addr = raddr;
- riu->rkey = be32_to_cpu(db->key);
- }
- }
-
- ++ioctx->n_rdma;
- riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
- sizeof(*riu->wr.sg_list),
- GFP_KERNEL);
- if (!riu->wr.sg_list)
- goto free_mem;
- }
-
- db = ioctx->rbufs;
- tsize = cmd->data_length;
- riu = ioctx->rdma_wrs;
- sg = sg_orig;
- dma_len = ib_sg_dma_len(dev, &sg[0]);
- dma_addr = ib_sg_dma_address(dev, &sg[0]);
-
- /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
- for (i = 0, j = 0;
- j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
- rsize = be32_to_cpu(db->len);
- sge = riu->wr.sg_list;
- k = 0;
-
- while (rsize > 0 && tsize > 0) {
- sge->addr = dma_addr;
- sge->lkey = ch->sport->sdev->pd->local_dma_lkey;
-
- if (rsize >= dma_len) {
- sge->length =
- (tsize < dma_len) ? tsize : dma_len;
- tsize -= dma_len;
- rsize -= dma_len;
-
- if (tsize > 0) {
- ++j;
- if (j < count) {
- sg = sg_next(sg);
- dma_len = ib_sg_dma_len(
- dev, sg);
- dma_addr = ib_sg_dma_address(
- dev, sg);
- }
- }
- } else {
- sge->length = (tsize < rsize) ? tsize : rsize;
- tsize -= rsize;
- dma_len -= rsize;
- dma_addr += rsize;
- rsize = 0;
- }
-
- ++k;
- if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
- ++riu;
- sge = riu->wr.sg_list;
- k = 0;
- } else if (rsize > 0 && tsize > 0)
- ++sge;
- }
- }
-
- return 0;
-
-free_mem:
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
-
- return -ENOMEM;
-}
-
/**
* srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator.
*/
BUG_ON(ioctx->ch != ch);
spin_lock_init(&ioctx->spinlock);
ioctx->state = SRPT_STATE_NEW;
- ioctx->n_rbuf = 0;
- ioctx->rbufs = NULL;
ioctx->n_rdma = 0;
- ioctx->n_rdma_wrs = 0;
- ioctx->rdma_wrs = NULL;
- ioctx->mapped_sg_count = 0;
+ ioctx->n_rw_ctx = 0;
init_completion(&ioctx->tx_done);
ioctx->queue_status_only = false;
/*
* SRP_RSP sending failed or the SRP_RSP send completion has
* not been received in time.
*/
- srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
transport_generic_free_cmd(&ioctx->cmd, 0);
break;
case SRPT_STATE_MGMT_RSP_SENT:
WARN_ON(ioctx->n_rdma <= 0);
atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
+ ioctx->n_rdma = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
__LINE__, srpt_get_cmd_state(ioctx));
}
-static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct srpt_send_ioctx *ioctx =
- container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
-
- if (unlikely(wc->status != IB_WC_SUCCESS)) {
- /*
- * Note: if an RDMA write error completion is received that
- * means that a SEND also has been posted. Defer further
- * processing of the associated command until the send error
- * completion has been received.
- */
- pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
- ioctx, wc->status);
- }
-}
-
/**
* srpt_build_cmd_rsp() - Build an SRP_RSP response.
* @ch: RDMA channel through which the request has been received.
{
struct se_cmd *cmd;
struct srp_cmd *srp_cmd;
+ struct scatterlist *sg = NULL;
+ unsigned sg_cnt = 0;
u64 data_len;
enum dma_data_direction dir;
int rc;
break;
}
- if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
- pr_err("0x%llx: parsing SRP descriptor table failed.\n",
- srp_cmd->tag);
+ rc = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &sg, &sg_cnt,
+ &data_len);
+ if (rc) {
+ if (rc != -EAGAIN) {
+ pr_err("0x%llx: parsing SRP descriptor table failed.\n",
+ srp_cmd->tag);
+ }
goto release_ioctx;
}
- rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb,
+ rc = target_submit_cmd_map_sgls(cmd, ch->sess, srp_cmd->cdb,
&send_ioctx->sense_data[0],
scsilun_to_int(&srp_cmd->lun), data_len,
- TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
+ TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF,
+ sg, sg_cnt, NULL, 0, NULL, 0);
if (rc != 0) {
pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc,
srp_cmd->tag);
recv_ioctx->ioctx.dma, srp_max_req_size,
DMA_FROM_DEVICE);
- if (unlikely(ch->state == CH_CONNECTING)) {
- list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
- goto out;
- }
+ if (unlikely(ch->state == CH_CONNECTING))
+ goto out_wait;
if (unlikely(ch->state != CH_LIVE))
- goto out;
+ return;
srp_cmd = recv_ioctx->ioctx.buf;
if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) {
- if (!send_ioctx)
+ if (!send_ioctx) {
+ if (!list_empty(&ch->cmd_wait_list))
+ goto out_wait;
send_ioctx = srpt_get_send_ioctx(ch);
- if (unlikely(!send_ioctx)) {
- list_add_tail(&recv_ioctx->wait_list,
- &ch->cmd_wait_list);
- goto out;
}
+ if (unlikely(!send_ioctx))
+ goto out_wait;
}
switch (srp_cmd->opcode) {
}
srpt_post_recv(ch->sport->sdev, recv_ioctx);
-out:
return;
+
+out_wait:
+ list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
}
static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
state != SRPT_STATE_MGMT_RSP_SENT);
- atomic_inc(&ch->sq_wr_avail);
+ atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail);
if (wc->status != IB_WC_SUCCESS)
pr_info("sending response for ioctx 0x%p failed"
" with status %d\n", ioctx, wc->status);
if (state != SRPT_STATE_DONE) {
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
pr_err("IB completion has been received too late for"
qp_init->srq = sdev->srq;
qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
qp_init->qp_type = IB_QPT_RC;
- qp_init->cap.max_send_wr = srp_sq_size;
- qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
+ /*
+ * We divide up our send queue size into half SEND WRs to send the
+ * completions, and half R/W contexts to actually do the RDMA
+ * READ/WRITE transfers. Note that we need to allocate CQ slots for
+ * both both, as RDMA contexts will also post completions for the
+ * RDMA READ case.
+ */
+ qp_init->cap.max_send_wr = srp_sq_size / 2;
+ qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
+ qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd,
+ sdev->device->attrs.max_sge);
+ qp_init->port_num = ch->sport->port;
ch->qp = ib_create_qp(sdev->pd, qp_init);
if (IS_ERR(ch->qp)) {
return ret;
}
-/**
- * srpt_perform_rdmas() - Perform IB RDMA.
- *
- * Returns zero upon success or a negative number upon failure.
- */
-static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- struct ib_send_wr *bad_wr;
- int sq_wr_avail, ret, i;
- enum dma_data_direction dir;
- const int n_rdma = ioctx->n_rdma;
-
- dir = ioctx->cmd.data_direction;
- if (dir == DMA_TO_DEVICE) {
- /* write */
- ret = -ENOMEM;
- sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
- if (sq_wr_avail < 0) {
- pr_warn("IB send queue full (needed %d)\n",
- n_rdma);
- goto out;
- }
- }
-
- for (i = 0; i < n_rdma; i++) {
- struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
-
- wr->opcode = (dir == DMA_FROM_DEVICE) ?
- IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
-
- if (i == n_rdma - 1) {
- /* only get completion event for the last rdma read */
- if (dir == DMA_TO_DEVICE) {
- wr->send_flags = IB_SEND_SIGNALED;
- ioctx->rdma_cqe.done = srpt_rdma_read_done;
- } else {
- ioctx->rdma_cqe.done = srpt_rdma_write_done;
- }
- wr->wr_cqe = &ioctx->rdma_cqe;
- wr->next = NULL;
- } else {
- wr->wr_cqe = NULL;
- wr->next = &ioctx->rdma_wrs[i + 1].wr;
- }
- }
-
- ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
- if (ret)
- pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
- __func__, __LINE__, ret, i, n_rdma);
-out:
- if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
- atomic_add(n_rdma, &ch->sq_wr_avail);
- return ret;
-}
-
-/**
- * srpt_xfer_data() - Start data transfer from initiator to target.
- */
-static int srpt_xfer_data(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- int ret;
-
- ret = srpt_map_sg_to_ib_sge(ch, ioctx);
- if (ret) {
- pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret);
- goto out;
- }
-
- ret = srpt_perform_rdmas(ch, ioctx);
- if (ret) {
- if (ret == -EAGAIN || ret == -ENOMEM)
- pr_info("%s[%d] queue full -- ret=%d\n",
- __func__, __LINE__, ret);
- else
- pr_err("%s[%d] fatal error -- ret=%d\n",
- __func__, __LINE__, ret);
- goto out_unmap;
- }
-
-out:
- return ret;
-out_unmap:
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
- goto out;
-}
-
static int srpt_write_pending_status(struct se_cmd *se_cmd)
{
struct srpt_send_ioctx *ioctx;
struct srpt_send_ioctx *ioctx =
container_of(se_cmd, struct srpt_send_ioctx, cmd);
struct srpt_rdma_ch *ch = ioctx->ch;
+ struct ib_send_wr *first_wr = NULL, *bad_wr;
+ struct ib_cqe *cqe = &ioctx->rdma_cqe;
enum srpt_command_state new_state;
+ int ret, i;
new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
WARN_ON(new_state == SRPT_STATE_DONE);
- return srpt_xfer_data(ch, ioctx);
+
+ if (atomic_sub_return(ioctx->n_rdma, &ch->sq_wr_avail) < 0) {
+ pr_warn("%s: IB send queue full (needed %d)\n",
+ __func__, ioctx->n_rdma);
+ ret = -ENOMEM;
+ goto out_undo;
+ }
+
+ cqe->done = srpt_rdma_read_done;
+ for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+
+ first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, ch->sport->port,
+ cqe, first_wr);
+ cqe = NULL;
+ }
+
+ ret = ib_post_send(ch->qp, first_wr, &bad_wr);
+ if (ret) {
+ pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n",
+ __func__, ret, ioctx->n_rdma,
+ atomic_read(&ch->sq_wr_avail));
+ goto out_undo;
+ }
+
+ return 0;
+out_undo:
+ atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
+ return ret;
}
static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
*/
static void srpt_queue_response(struct se_cmd *cmd)
{
- struct srpt_rdma_ch *ch;
- struct srpt_send_ioctx *ioctx;
+ struct srpt_send_ioctx *ioctx =
+ container_of(cmd, struct srpt_send_ioctx, cmd);
+ struct srpt_rdma_ch *ch = ioctx->ch;
+ struct srpt_device *sdev = ch->sport->sdev;
+ struct ib_send_wr send_wr, *first_wr = NULL, *bad_wr;
+ struct ib_sge sge;
enum srpt_command_state state;
unsigned long flags;
- int ret;
- enum dma_data_direction dir;
- int resp_len;
+ int resp_len, ret, i;
u8 srp_tm_status;
- ioctx = container_of(cmd, struct srpt_send_ioctx, cmd);
- ch = ioctx->ch;
BUG_ON(!ch);
spin_lock_irqsave(&ioctx->spinlock, flags);
return;
}
- dir = ioctx->cmd.data_direction;
-
/* For read commands, transfer the data to the initiator. */
- if (dir == DMA_FROM_DEVICE && ioctx->cmd.data_length &&
+ if (ioctx->cmd.data_direction == DMA_FROM_DEVICE &&
+ ioctx->cmd.data_length &&
!ioctx->queue_status_only) {
- ret = srpt_xfer_data(ch, ioctx);
- if (ret) {
- pr_err("xfer_data failed for tag %llu\n",
- ioctx->cmd.tag);
- return;
+ for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+
+ first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp,
+ ch->sport->port, NULL,
+ first_wr ? first_wr : &send_wr);
}
+ } else {
+ first_wr = &send_wr;
}
if (state != SRPT_STATE_MGMT)
resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status,
ioctx->cmd.tag);
}
- ret = srpt_post_send(ch, ioctx, resp_len);
- if (ret) {
- pr_err("sending cmd response failed for tag %llu\n",
- ioctx->cmd.tag);
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
- srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
- target_put_sess_cmd(&ioctx->cmd);
+
+ atomic_inc(&ch->req_lim);
+
+ if (unlikely(atomic_sub_return(1 + ioctx->n_rdma,
+ &ch->sq_wr_avail) < 0)) {
+ pr_warn("%s: IB send queue full (needed %d)\n",
+ __func__, ioctx->n_rdma);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, resp_len,
+ DMA_TO_DEVICE);
+
+ sge.addr = ioctx->ioctx.dma;
+ sge.length = resp_len;
+ sge.lkey = sdev->pd->local_dma_lkey;
+
+ ioctx->ioctx.cqe.done = srpt_send_done;
+ send_wr.next = NULL;
+ send_wr.wr_cqe = &ioctx->ioctx.cqe;
+ send_wr.sg_list = &sge;
+ send_wr.num_sge = 1;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(ch->qp, first_wr, &bad_wr);
+ if (ret < 0) {
+ pr_err("%s: sending cmd response failed for tag %llu (%d)\n",
+ __func__, ioctx->cmd.tag, ret);
+ goto out;
}
+
+ return;
+
+out:
+ atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail);
+ atomic_dec(&ch->req_lim);
+ srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
+ target_put_sess_cmd(&ioctx->cmd);
}
static int srpt_queue_data_in(struct se_cmd *cmd)
static void srpt_aborted_task(struct se_cmd *cmd)
{
- struct srpt_send_ioctx *ioctx = container_of(cmd,
- struct srpt_send_ioctx, cmd);
-
- srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
}
static int srpt_queue_status(struct se_cmd *cmd)
unsigned long flags;
WARN_ON(ioctx->state != SRPT_STATE_DONE);
- WARN_ON(ioctx->mapped_sg_count != 0);
- if (ioctx->n_rbuf > 1) {
- kfree(ioctx->rbufs);
- ioctx->rbufs = NULL;
- ioctx->n_rbuf = 0;
+ if (ioctx->n_rw_ctx) {
+ srpt_free_rw_ctxs(ch, ioctx);
+ ioctx->n_rw_ctx = 0;
}
spin_lock_irqsave(&ch->spinlock, flags);
#include <rdma/ib_verbs.h>
#include <rdma/ib_sa.h>
#include <rdma/ib_cm.h>
+#include <rdma/rw.h>
#include <scsi/srp.h>
SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
SRPT_DEF_SG_TABLESIZE = 128,
- SRPT_DEF_SG_PER_WQE = 16,
MIN_SRPT_SQ_SIZE = 16,
DEF_SRPT_SQ_SIZE = 4096,
struct srpt_ioctx ioctx;
struct list_head wait_list;
};
+
+struct srpt_rw_ctx {
+ struct rdma_rw_ctx rw;
+ struct scatterlist *sg;
+ unsigned int nents;
+};
/**
* struct srpt_send_ioctx - SRPT send I/O context.
* @ioctx: See above.
* @ch: Channel pointer.
- * @free_list: Node in srpt_rdma_ch.free_list.
- * @n_rbuf: Number of data buffers in the received SRP command.
- * @rbufs: Pointer to SRP data buffer array.
- * @single_rbuf: SRP data buffer if the command has only a single buffer.
- * @sg: Pointer to sg-list associated with this I/O context.
- * @sg_cnt: SG-list size.
- * @mapped_sg_count: ib_dma_map_sg() return value.
- * @n_rdma_wrs: Number of elements in the rdma_wrs array.
- * @rdma_wrs: Array with information about the RDMA mapping.
- * @tag: Tag of the received SRP information unit.
* @spinlock: Protects 'state'.
* @state: I/O context state.
* @cmd: Target core command data structure.
struct srpt_send_ioctx {
struct srpt_ioctx ioctx;
struct srpt_rdma_ch *ch;
- struct ib_rdma_wr *rdma_wrs;
+
+ struct srpt_rw_ctx s_rw_ctx;
+ struct srpt_rw_ctx *rw_ctxs;
+
struct ib_cqe rdma_cqe;
- struct srp_direct_buf *rbufs;
- struct srp_direct_buf single_rbuf;
- struct scatterlist *sg;
struct list_head free_list;
spinlock_t spinlock;
enum srpt_command_state state;
struct se_cmd cmd;
struct completion tx_done;
- int sg_cnt;
- int mapped_sg_count;
- u16 n_rdma_wrs;
u8 n_rdma;
- u8 n_rbuf;
+ u8 n_rw_ctx;
bool queue_status_only;
u8 sense_data[TRANSPORT_SENSE_BUFFER];
};
#include "io-pgtable.h"
/* Maximum number of stream IDs assigned to a single device */
-#define MAX_MASTER_STREAMIDS MAX_PHANDLE_ARGS
+#define MAX_MASTER_STREAMIDS 128
/* Maximum number of context banks per SMMU */
#define ARM_SMMU_MAX_CBS 128
struct iommu_domain domain;
};
+struct arm_smmu_phandle_args {
+ struct device_node *np;
+ int args_count;
+ uint32_t args[MAX_MASTER_STREAMIDS];
+};
+
static DEFINE_SPINLOCK(arm_smmu_devices_lock);
static LIST_HEAD(arm_smmu_devices);
static int register_smmu_master(struct arm_smmu_device *smmu,
struct device *dev,
- struct of_phandle_args *masterspec)
+ struct arm_smmu_phandle_args *masterspec)
{
int i;
struct arm_smmu_master *master;
struct arm_smmu_device *smmu;
struct device *dev = &pdev->dev;
struct rb_node *node;
- struct of_phandle_args masterspec;
+ struct of_phandle_iterator it;
+ struct arm_smmu_phandle_args *masterspec;
int num_irqs, i, err;
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
i = 0;
smmu->masters = RB_ROOT;
- while (!of_parse_phandle_with_args(dev->of_node, "mmu-masters",
- "#stream-id-cells", i,
- &masterspec)) {
- err = register_smmu_master(smmu, dev, &masterspec);
+
+ err = -ENOMEM;
+ /* No need to zero the memory for masterspec */
+ masterspec = kmalloc(sizeof(*masterspec), GFP_KERNEL);
+ if (!masterspec)
+ goto out_put_masters;
+
+ of_for_each_phandle(&it, err, dev->of_node,
+ "mmu-masters", "#stream-id-cells", 0) {
+ int count = of_phandle_iterator_args(&it, masterspec->args,
+ MAX_MASTER_STREAMIDS);
+ masterspec->np = of_node_get(it.node);
+ masterspec->args_count = count;
+
+ err = register_smmu_master(smmu, dev, masterspec);
if (err) {
dev_err(dev, "failed to add master %s\n",
- masterspec.np->name);
+ masterspec->np->name);
+ kfree(masterspec);
goto out_put_masters;
}
i++;
}
+
dev_notice(dev, "registered %d master devices\n", i);
+ kfree(masterspec);
+
parse_driver_options(smmu);
if (smmu->version == ARM_SMMU_V2 &&
config EZNPS_GIC
bool "NPS400 Global Interrupt Manager (GIM)"
+ depends on ARC || (COMPILE_TEST && !64BIT)
select IRQ_DOMAIN
help
Support the EZchip NPS400 global interrupt controller
DBDMA_DO_STOP(rm->dma_regs);
return;
}
- memset(rdma->buf1, 0, SAMPLE_COUNT & sizeof(u32));
- memset(rdma->buf2, 0, SAMPLE_COUNT & sizeof(u32));
+ memset(rdma->buf1, 0, ARRAY_SIZE(rdma->buf1));
+ memset(rdma->buf2, 0, ARRAY_SIZE(rdma->buf2));
rm->dma_buf_v->mark = 0;
total_idle_ticks = get_cpu_idle_time(cpu);
idle_ticks = (unsigned int) (total_idle_ticks - rcpu->prev_idle);
+ idle_ticks = min(idle_ticks, total_ticks);
rcpu->prev_idle = total_idle_ticks;
/* We do a very dumb calculation to update the LEDs for now,
_set_L2CR(save_l2cr);
/* Restore userland MMU context */
- switch_mmu_context(NULL, current->active_mm);
+ switch_mmu_context(NULL, current->active_mm, NULL);
/* Power things up */
pmu_unlock();
_set_L3CR(save_l3cr);
/* Restore userland MMU context */
- switch_mmu_context(NULL, current->active_mm);
+ switch_mmu_context(NULL, current->active_mm, NULL);
/* Tell PMU we are ready */
pmu_unlock();
if (nb_packet_buffer_size < 1)
nb_packet_buffer_size = 1;
- /* get the fimware version */
+ /* get the firmware version */
usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
REQUEST_GET_VERSION,
USB_TYPE_VENDOR | USB_DIR_IN, 0, 0,
select MFD_CORE
select CHROME_PLATFORMS
select CROS_EC_PROTO
- depends on X86 || ARM || COMPILE_TEST
+ depends on X86 || ARM || ARM64 || COMPILE_TEST
help
If you say Y here you get support for the ChromeOS Embedded
Controller (EC) providing keyboard, battery and power services.
menus in order to enable them.
We communicate with the Hi6421 via memory-mapped I/O.
+config MFD_HI655X_PMIC
+ tristate "HiSilicon Hi655X series PMU/Codec IC"
+ depends on ARCH_HISI || COMPILE_TEST
+ depends on OF
+ select MFD_CORE
+ select REGMAP_MMIO
+ select REGMAP_IRQ
+ help
+ Select this option to enable Hisilicon hi655x series pmic driver.
+
config HTC_EGPIO
bool "HTC EGPIO support"
depends on GPIOLIB && ARM
additional drivers must be enabled in order to use the functionality
of the device.
+config MFD_MAX77620
+ bool "Maxim Semiconductor MAX77620 and MAX20024 PMIC Support"
+ depends on I2C=y
+ depends on OF
+ select MFD_CORE
+ select REGMAP_I2C
+ select REGMAP_IRQ
+ select IRQ_DOMAIN
+ help
+ Say yes here to add support for Maxim Semiconductor MAX77620 and
+ MAX20024 which are Power Management IC with General purpose pins,
+ RTC, regulators, clock generator, watchdog etc. This driver
+ provides common support for accessing the device; additional drivers
+ must be enabled in order to use the functionality of the device.
+
config MFD_MAX77686
tristate "Maxim Semiconductor MAX77686/802 PMIC Support"
depends on I2C
of the device.
config MFD_MAX77693
- bool "Maxim Semiconductor MAX77693 PMIC Support"
- depends on I2C=y
+ tristate "Maxim Semiconductor MAX77693 PMIC Support"
+ depends on I2C
select MFD_CORE
select REGMAP_I2C
select REGMAP_IRQ
config MFD_VEXPRESS_SYSREG
bool "Versatile Express System Registers"
- depends on VEXPRESS_CONFIG && GPIOLIB
+ depends on VEXPRESS_CONFIG && GPIOLIB && !ARCH_USES_GETTIMEOFFSET
default y
select CLKSRC_MMIO
select GPIO_GENERIC_PLATFORM
obj-$(CONFIG_MFD_DA9150) += da9150-core.o
obj-$(CONFIG_MFD_MAX14577) += max14577.o
+obj-$(CONFIG_MFD_MAX77620) += max77620.o
obj-$(CONFIG_MFD_MAX77686) += max77686.o
obj-$(CONFIG_MFD_MAX77693) += max77693.o
obj-$(CONFIG_MFD_MAX77843) += max77843.o
obj-$(CONFIG_MFD_IPAQ_MICRO) += ipaq-micro.o
obj-$(CONFIG_MFD_MENF21BMC) += menf21bmc.o
obj-$(CONFIG_MFD_HI6421_PMIC) += hi6421-pmic-core.o
+obj-$(CONFIG_MFD_HI655X_PMIC) += hi655x-pmic.o
obj-$(CONFIG_MFD_DLN2) += dln2.o
obj-$(CONFIG_MFD_RT5033) += rt5033.o
obj-$(CONFIG_MFD_SKY81452) += sky81452.o
i2c_set_clientdata(i2c, regmap);
- ret = mfd_add_devices(&i2c->dev, PLATFORM_DEVID_NONE, act8945a_devs,
- ARRAY_SIZE(act8945a_devs), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(&i2c->dev, PLATFORM_DEVID_NONE,
+ act8945a_devs, ARRAY_SIZE(act8945a_devs),
+ NULL, 0, NULL);
if (ret) {
dev_err(&i2c->dev, "Failed to add sub devices\n");
return ret;
return 0;
}
-static int act8945a_i2c_remove(struct i2c_client *i2c)
-{
- mfd_remove_devices(&i2c->dev);
-
- return 0;
-}
-
static const struct i2c_device_id act8945a_i2c_id[] = {
{ "act8945a", 0 },
{}
.of_match_table = of_match_ptr(act8945a_of_match),
},
.probe = act8945a_i2c_probe,
- .remove = act8945a_i2c_remove,
.id_table = act8945a_i2c_id,
};
static const struct mfd_cell wm5102_devs[] = {
{ .name = "arizona-micsupp" },
+ { .name = "arizona-gpio" },
{
.name = "arizona-extcon",
.parent_supplies = wm5102_supplies,
.num_parent_supplies = 1, /* We only need MICVDD */
},
- { .name = "arizona-gpio" },
{ .name = "arizona-haptics" },
{ .name = "arizona-pwm" },
{
static const struct mfd_cell wm5110_devs[] = {
{ .name = "arizona-micsupp" },
+ { .name = "arizona-gpio" },
{
.name = "arizona-extcon",
.parent_supplies = wm5102_supplies,
.num_parent_supplies = 1, /* We only need MICVDD */
},
- { .name = "arizona-gpio" },
{ .name = "arizona-haptics" },
{ .name = "arizona-pwm" },
{
static const struct mfd_cell wm8997_devs[] = {
{ .name = "arizona-micsupp" },
+ { .name = "arizona-gpio" },
{
.name = "arizona-extcon",
.parent_supplies = wm8997_supplies,
.num_parent_supplies = 1, /* We only need MICVDD */
},
- { .name = "arizona-gpio" },
{ .name = "arizona-haptics" },
{ .name = "arizona-pwm" },
{
};
static const struct mfd_cell wm8998_devs[] = {
+ { .name = "arizona-micsupp" },
+ { .name = "arizona-gpio" },
{
.name = "arizona-extcon",
.parent_supplies = wm5102_supplies,
.num_parent_supplies = 1, /* We only need MICVDD */
},
- { .name = "arizona-gpio" },
{ .name = "arizona-haptics" },
{ .name = "arizona-pwm" },
{
.parent_supplies = wm5102_supplies,
.num_parent_supplies = ARRAY_SIZE(wm5102_supplies),
},
- { .name = "arizona-micsupp" },
};
int arizona_dev_init(struct arizona *arizona)
.irq_set_wake = arizona_irq_set_wake,
};
+static struct lock_class_key arizona_irq_lock_class;
+
static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
irq_hw_number_t hw)
{
struct arizona *data = h->host_data;
irq_set_chip_data(virq, data);
+ irq_set_lockdep_class(virq, &arizona_irq_lock_class);
irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq);
irq_set_nested_thread(virq, 1);
irq_set_noprobe(virq);
as3711_subdevs[AS3711_BACKLIGHT].pdata_size = 0;
}
- ret = mfd_add_devices(as3711->dev, -1, as3711_subdevs,
- ARRAY_SIZE(as3711_subdevs), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(as3711->dev, -1, as3711_subdevs,
+ ARRAY_SIZE(as3711_subdevs), NULL, 0, NULL);
if (ret < 0)
dev_err(&client->dev, "add mfd devices failed: %d\n", ret);
return ret;
}
-static int as3711_i2c_remove(struct i2c_client *client)
-{
- struct as3711 *as3711 = i2c_get_clientdata(client);
-
- mfd_remove_devices(as3711->dev);
- return 0;
-}
-
static const struct i2c_device_id as3711_i2c_id[] = {
{.name = "as3711", .driver_data = 0},
{}
.of_match_table = of_match_ptr(as3711_of_match),
},
.probe = as3711_i2c_probe,
- .remove = as3711_i2c_remove,
.id_table = as3711_i2c_id,
};
return ret;
irq_flags = as3722->irq_flags | IRQF_ONESHOT;
- ret = regmap_add_irq_chip(as3722->regmap, as3722->chip_irq,
- irq_flags, -1, &as3722_irq_chip,
- &as3722->irq_data);
+ ret = devm_regmap_add_irq_chip(as3722->dev, as3722->regmap,
+ as3722->chip_irq,
+ irq_flags, -1, &as3722_irq_chip,
+ &as3722->irq_data);
if (ret < 0) {
dev_err(as3722->dev, "Failed to add regmap irq: %d\n", ret);
return ret;
ret = as3722_configure_pullups(as3722);
if (ret < 0)
- goto scrub;
+ return ret;
- ret = mfd_add_devices(&i2c->dev, -1, as3722_devs,
- ARRAY_SIZE(as3722_devs), NULL, 0,
- regmap_irq_get_domain(as3722->irq_data));
+ ret = devm_mfd_add_devices(&i2c->dev, -1, as3722_devs,
+ ARRAY_SIZE(as3722_devs), NULL, 0,
+ regmap_irq_get_domain(as3722->irq_data));
if (ret) {
dev_err(as3722->dev, "Failed to add MFD devices: %d\n", ret);
- goto scrub;
+ return ret;
}
device_init_wakeup(as3722->dev, true);
dev_dbg(as3722->dev, "AS3722 core driver initialized successfully\n");
return 0;
-
-scrub:
- regmap_del_irq_chip(as3722->chip_irq, as3722->irq_data);
- return ret;
-}
-
-static int as3722_i2c_remove(struct i2c_client *i2c)
-{
- struct as3722 *as3722 = i2c_get_clientdata(i2c);
-
- mfd_remove_devices(as3722->dev);
- regmap_del_irq_chip(as3722->chip_irq, as3722->irq_data);
- return 0;
}
static int __maybe_unused as3722_i2c_suspend(struct device *dev)
.pm = &as3722_pm_ops,
},
.probe = as3722_i2c_probe,
- .remove = as3722_i2c_remove,
.id_table = as3722_i2c_id,
};
unsigned long flags;
struct asic3 *asic;
- asic = container_of(chip, struct asic3, gpio);
+ asic = gpiochip_get_data(chip);
gpio_base = ASIC3_GPIO_TO_BASE(offset);
if (gpio_base > ASIC3_GPIO_D_BASE) {
u32 mask = ASIC3_GPIO_TO_MASK(offset);
struct asic3 *asic;
- asic = container_of(chip, struct asic3, gpio);
+ asic = gpiochip_get_data(chip);
gpio_base = ASIC3_GPIO_TO_BASE(offset);
if (gpio_base > ASIC3_GPIO_D_BASE) {
unsigned long flags;
struct asic3 *asic;
- asic = container_of(chip, struct asic3, gpio);
+ asic = gpiochip_get_data(chip);
gpio_base = ASIC3_GPIO_TO_BASE(offset);
if (gpio_base > ASIC3_GPIO_D_BASE) {
static int asic3_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
{
- struct asic3 *asic = container_of(chip, struct asic3, gpio);
+ struct asic3 *asic = gpiochip_get_data(chip);
return asic->irq_base + offset;
}
alt_reg[i]);
}
- return gpiochip_add(&asic->gpio);
+ return gpiochip_add_data(&asic->gpio, asic);
}
static int asic3_gpio_remove(struct platform_device *pdev)
dev_set_drvdata(dev, hlcdc);
- return mfd_add_devices(dev, -1, atmel_hlcdc_cells,
- ARRAY_SIZE(atmel_hlcdc_cells),
- NULL, 0, NULL);
-}
-
-static int atmel_hlcdc_remove(struct platform_device *pdev)
-{
- mfd_remove_devices(&pdev->dev);
-
- return 0;
+ return devm_mfd_add_devices(dev, -1, atmel_hlcdc_cells,
+ ARRAY_SIZE(atmel_hlcdc_cells),
+ NULL, 0, NULL);
}
static const struct of_device_id atmel_hlcdc_match[] = {
static struct platform_driver atmel_hlcdc_driver = {
.probe = atmel_hlcdc_probe,
- .remove = atmel_hlcdc_remove,
.driver = {
.name = "atmel-hlcdc",
.of_match_table = atmel_hlcdc_match,
static const struct of_device_id axp20x_rsb_of_match[] = {
{ .compatible = "x-powers,axp223", .data = (void *)AXP223_ID },
+ { .compatible = "x-powers,axp809", .data = (void *)AXP809_ID },
{ },
};
MODULE_DEVICE_TABLE(of, axp20x_rsb_of_match);
"AXP221",
"AXP223",
"AXP288",
+ "AXP809",
};
static const struct regmap_range axp152_writeable_ranges[] = {
.n_yes_ranges = ARRAY_SIZE(axp20x_volatile_ranges),
};
+/* AXP22x ranges are shared with the AXP809, as they cover the same range */
static const struct regmap_range axp22x_writeable_ranges[] = {
regmap_reg_range(AXP20X_DATACACHE(0), AXP20X_IRQ5_STATE),
regmap_reg_range(AXP20X_DCDC_MODE, AXP22X_BATLOW_THRES1),
DEFINE_RES_IRQ_NAMED(AXP152_IRQ_PEK_FAL_EDGE, "PEK_DBF"),
};
+static struct resource axp20x_ac_power_supply_resources[] = {
+ DEFINE_RES_IRQ_NAMED(AXP20X_IRQ_ACIN_PLUGIN, "ACIN_PLUGIN"),
+ DEFINE_RES_IRQ_NAMED(AXP20X_IRQ_ACIN_REMOVAL, "ACIN_REMOVAL"),
+ DEFINE_RES_IRQ_NAMED(AXP20X_IRQ_ACIN_OVER_V, "ACIN_OVER_V"),
+};
+
static struct resource axp20x_pek_resources[] = {
{
.name = "PEK_DBR",
},
};
+static struct resource axp809_pek_resources[] = {
+ {
+ .name = "PEK_DBR",
+ .start = AXP809_IRQ_PEK_RIS_EDGE,
+ .end = AXP809_IRQ_PEK_RIS_EDGE,
+ .flags = IORESOURCE_IRQ,
+ }, {
+ .name = "PEK_DBF",
+ .start = AXP809_IRQ_PEK_FAL_EDGE,
+ .end = AXP809_IRQ_PEK_FAL_EDGE,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
static const struct regmap_config axp152_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
INIT_REGMAP_IRQ(AXP288, BC_USB_CHNG, 5, 1),
};
+static const struct regmap_irq axp809_regmap_irqs[] = {
+ INIT_REGMAP_IRQ(AXP809, ACIN_OVER_V, 0, 7),
+ INIT_REGMAP_IRQ(AXP809, ACIN_PLUGIN, 0, 6),
+ INIT_REGMAP_IRQ(AXP809, ACIN_REMOVAL, 0, 5),
+ INIT_REGMAP_IRQ(AXP809, VBUS_OVER_V, 0, 4),
+ INIT_REGMAP_IRQ(AXP809, VBUS_PLUGIN, 0, 3),
+ INIT_REGMAP_IRQ(AXP809, VBUS_REMOVAL, 0, 2),
+ INIT_REGMAP_IRQ(AXP809, VBUS_V_LOW, 0, 1),
+ INIT_REGMAP_IRQ(AXP809, BATT_PLUGIN, 1, 7),
+ INIT_REGMAP_IRQ(AXP809, BATT_REMOVAL, 1, 6),
+ INIT_REGMAP_IRQ(AXP809, BATT_ENT_ACT_MODE, 1, 5),
+ INIT_REGMAP_IRQ(AXP809, BATT_EXIT_ACT_MODE, 1, 4),
+ INIT_REGMAP_IRQ(AXP809, CHARG, 1, 3),
+ INIT_REGMAP_IRQ(AXP809, CHARG_DONE, 1, 2),
+ INIT_REGMAP_IRQ(AXP809, BATT_CHG_TEMP_HIGH, 2, 7),
+ INIT_REGMAP_IRQ(AXP809, BATT_CHG_TEMP_HIGH_END, 2, 6),
+ INIT_REGMAP_IRQ(AXP809, BATT_CHG_TEMP_LOW, 2, 5),
+ INIT_REGMAP_IRQ(AXP809, BATT_CHG_TEMP_LOW_END, 2, 4),
+ INIT_REGMAP_IRQ(AXP809, BATT_ACT_TEMP_HIGH, 2, 3),
+ INIT_REGMAP_IRQ(AXP809, BATT_ACT_TEMP_HIGH_END, 2, 2),
+ INIT_REGMAP_IRQ(AXP809, BATT_ACT_TEMP_LOW, 2, 1),
+ INIT_REGMAP_IRQ(AXP809, BATT_ACT_TEMP_LOW_END, 2, 0),
+ INIT_REGMAP_IRQ(AXP809, DIE_TEMP_HIGH, 3, 7),
+ INIT_REGMAP_IRQ(AXP809, LOW_PWR_LVL1, 3, 1),
+ INIT_REGMAP_IRQ(AXP809, LOW_PWR_LVL2, 3, 0),
+ INIT_REGMAP_IRQ(AXP809, TIMER, 4, 7),
+ INIT_REGMAP_IRQ(AXP809, PEK_RIS_EDGE, 4, 6),
+ INIT_REGMAP_IRQ(AXP809, PEK_FAL_EDGE, 4, 5),
+ INIT_REGMAP_IRQ(AXP809, PEK_SHORT, 4, 4),
+ INIT_REGMAP_IRQ(AXP809, PEK_LONG, 4, 3),
+ INIT_REGMAP_IRQ(AXP809, PEK_OVER_OFF, 4, 2),
+ INIT_REGMAP_IRQ(AXP809, GPIO1_INPUT, 4, 1),
+ INIT_REGMAP_IRQ(AXP809, GPIO0_INPUT, 4, 0),
+};
+
static const struct regmap_irq_chip axp152_regmap_irq_chip = {
.name = "axp152_irq_chip",
.status_base = AXP152_IRQ1_STATE,
};
+static const struct regmap_irq_chip axp809_regmap_irq_chip = {
+ .name = "axp809",
+ .status_base = AXP20X_IRQ1_STATE,
+ .ack_base = AXP20X_IRQ1_STATE,
+ .mask_base = AXP20X_IRQ1_EN,
+ .mask_invert = true,
+ .init_ack_masked = true,
+ .irqs = axp809_regmap_irqs,
+ .num_irqs = ARRAY_SIZE(axp809_regmap_irqs),
+ .num_regs = 5,
+};
+
static struct mfd_cell axp20x_cells[] = {
{
.name = "axp20x-pek",
.resources = axp20x_pek_resources,
}, {
.name = "axp20x-regulator",
+ }, {
+ .name = "axp20x-ac-power-supply",
+ .of_compatible = "x-powers,axp202-ac-power-supply",
+ .num_resources = ARRAY_SIZE(axp20x_ac_power_supply_resources),
+ .resources = axp20x_ac_power_supply_resources,
}, {
.name = "axp20x-usb-power-supply",
.of_compatible = "x-powers,axp202-usb-power-supply",
},
};
+static struct mfd_cell axp809_cells[] = {
+ {
+ .name = "axp20x-pek",
+ .num_resources = ARRAY_SIZE(axp809_pek_resources),
+ .resources = axp809_pek_resources,
+ }, {
+ .name = "axp20x-regulator",
+ },
+};
+
static struct axp20x_dev *axp20x_pm_power_off;
static void axp20x_power_off(void)
{
axp20x->regmap_cfg = &axp288_regmap_config;
axp20x->regmap_irq_chip = &axp288_regmap_irq_chip;
break;
+ case AXP809_ID:
+ axp20x->nr_cells = ARRAY_SIZE(axp809_cells);
+ axp20x->cells = axp809_cells;
+ axp20x->regmap_cfg = &axp22x_regmap_config;
+ axp20x->regmap_irq_chip = &axp809_regmap_irq_chip;
+ break;
default:
dev_err(dev, "unsupported AXP20X ID %lu\n", axp20x->variant);
return -EINVAL;
goto err;
}
- ret = mfd_add_devices(&i2c_pri->dev, -1, bcm590xx_devs,
- ARRAY_SIZE(bcm590xx_devs), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(&i2c_pri->dev, -1, bcm590xx_devs,
+ ARRAY_SIZE(bcm590xx_devs), NULL, 0, NULL);
if (ret < 0) {
dev_err(&i2c_pri->dev, "failed to add sub-devices: %d\n", ret);
goto err;
return ret;
}
-static int bcm590xx_i2c_remove(struct i2c_client *i2c)
-{
- mfd_remove_devices(&i2c->dev);
- return 0;
-}
-
static const struct of_device_id bcm590xx_of_match[] = {
{ .compatible = "brcm,bcm59056" },
{ }
.of_match_table = of_match_ptr(bcm590xx_of_match),
},
.probe = bcm590xx_i2c_probe,
- .remove = bcm590xx_i2c_remove,
.id_table = bcm590xx_i2c_id,
};
module_i2c_driver(bcm590xx_i2c_driver);
#define DA9063_REG_EVENT_B_OFFSET 1
#define DA9063_REG_EVENT_C_OFFSET 2
#define DA9063_REG_EVENT_D_OFFSET 3
-#define EVENTS_BUF_LEN 4
-
-static const u8 mask_events_buf[] = { [0 ... (EVENTS_BUF_LEN - 1)] = ~0 };
-
-struct da9063_irq_data {
- u16 reg;
- u8 mask;
-};
static const struct regmap_irq da9063_irqs[] = {
/* DA9063 event A register */
* This driver was tested with firmware revision A4.
*/
-#if defined(CONFIG_INPUT_DM355EVM) || defined(CONFIG_INPUT_DM355EVM_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_DM355EVM)
#define msp_has_keyboard() true
#else
#define msp_has_keyboard() false
#endif
-#if defined(CONFIG_LEDS_GPIO) || defined(CONFIG_LEDS_GPIO_MODULE)
+#if IS_ENABLED(CONFIG_LEDS_GPIO)
#define msp_has_leds() true
#else
#define msp_has_leds() false
#endif
-#if defined(CONFIG_RTC_DRV_DM355EVM) || defined(CONFIG_RTC_DRV_DM355EVM_MODULE)
+#if IS_ENABLED(CONFIG_RTC_DRV_DM355EVM)
#define msp_has_rtc() true
#else
#define msp_has_rtc() false
#endif
-#if defined(CONFIG_VIDEO_TVP514X) || defined(CONFIG_VIDEO_TVP514X_MODULE)
+#if IS_ENABLED(CONFIG_VIDEO_TVP514X)
#define msp_has_tvp() true
#else
#define msp_has_tvp() false
/* GPIO-ish stuff */
dm355evm_msp_gpio.parent = &client->dev;
- status = gpiochip_add(&dm355evm_msp_gpio);
+ status = gpiochip_add_data(&dm355evm_msp_gpio, NULL);
if (status < 0)
return status;
platform_set_drvdata(pdev, pmic);
- ret = mfd_add_devices(&pdev->dev, 0, hi6421_devs,
- ARRAY_SIZE(hi6421_devs), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(&pdev->dev, 0, hi6421_devs,
+ ARRAY_SIZE(hi6421_devs), NULL, 0, NULL);
if (ret) {
dev_err(&pdev->dev, "add mfd devices failed: %d\n", ret);
return ret;
return 0;
}
-static int hi6421_pmic_remove(struct platform_device *pdev)
-{
- mfd_remove_devices(&pdev->dev);
-
- return 0;
-}
-
static const struct of_device_id of_hi6421_pmic_match_tbl[] = {
{ .compatible = "hisilicon,hi6421-pmic", },
{ },
.of_match_table = of_hi6421_pmic_match_tbl,
},
.probe = hi6421_pmic_probe,
- .remove = hi6421_pmic_remove,
};
module_platform_driver(hi6421_pmic_driver);
--- /dev/null
+/*
+ * Device driver for MFD hi655x PMIC
+ *
+ * Copyright (c) 2016 Hisilicon.
+ *
+ * Authors:
+ * Chen Feng <puck.chen@hisilicon.com>
+ * Fei Wang <w.f@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/hi655x-pmic.h>
+#include <linux/module.h>
+#include <linux/of_gpio.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+static const struct mfd_cell hi655x_pmic_devs[] = {
+ { .name = "hi655x-regulator", },
+};
+
+static const struct regmap_irq hi655x_irqs[] = {
+ { .reg_offset = 0, .mask = OTMP_D1R_INT },
+ { .reg_offset = 0, .mask = VSYS_2P5_R_INT },
+ { .reg_offset = 0, .mask = VSYS_UV_D3R_INT },
+ { .reg_offset = 0, .mask = VSYS_6P0_D200UR_INT },
+ { .reg_offset = 0, .mask = PWRON_D4SR_INT },
+ { .reg_offset = 0, .mask = PWRON_D20F_INT },
+ { .reg_offset = 0, .mask = PWRON_D20R_INT },
+ { .reg_offset = 0, .mask = RESERVE_INT },
+};
+
+static const struct regmap_irq_chip hi655x_irq_chip = {
+ .name = "hi655x-pmic",
+ .irqs = hi655x_irqs,
+ .num_regs = 1,
+ .num_irqs = ARRAY_SIZE(hi655x_irqs),
+ .status_base = HI655X_IRQ_STAT_BASE,
+ .mask_base = HI655X_IRQ_MASK_BASE,
+};
+
+static struct regmap_config hi655x_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = HI655X_STRIDE,
+ .val_bits = 8,
+ .max_register = HI655X_BUS_ADDR(0xFFF),
+};
+
+static void hi655x_local_irq_clear(struct regmap *map)
+{
+ int i;
+
+ regmap_write(map, HI655X_ANA_IRQM_BASE, HI655X_IRQ_CLR);
+ for (i = 0; i < HI655X_IRQ_ARRAY; i++) {
+ regmap_write(map, HI655X_IRQ_STAT_BASE + i * HI655X_STRIDE,
+ HI655X_IRQ_CLR);
+ }
+}
+
+static int hi655x_pmic_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct hi655x_pmic *pmic;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ void __iomem *base;
+
+ pmic = devm_kzalloc(dev, sizeof(*pmic), GFP_KERNEL);
+ if (!pmic)
+ return -ENOMEM;
+ pmic->dev = dev;
+
+ pmic->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!pmic->res)
+ return -ENOENT;
+
+ base = devm_ioremap_resource(dev, pmic->res);
+ if (!base)
+ return -ENOMEM;
+
+ pmic->regmap = devm_regmap_init_mmio_clk(dev, NULL, base,
+ &hi655x_regmap_config);
+
+ regmap_read(pmic->regmap, HI655X_BUS_ADDR(HI655X_VER_REG), &pmic->ver);
+ if ((pmic->ver < PMU_VER_START) || (pmic->ver > PMU_VER_END)) {
+ dev_warn(dev, "PMU version %d unsupported\n", pmic->ver);
+ return -EINVAL;
+ }
+
+ hi655x_local_irq_clear(pmic->regmap);
+
+ pmic->gpio = of_get_named_gpio(np, "pmic-gpios", 0);
+ if (!gpio_is_valid(pmic->gpio)) {
+ dev_err(dev, "Failed to get the pmic-gpios\n");
+ return -ENODEV;
+ }
+
+ ret = devm_gpio_request_one(dev, pmic->gpio, GPIOF_IN,
+ "hi655x_pmic_irq");
+ if (ret < 0) {
+ dev_err(dev, "Failed to request gpio %d ret = %d\n",
+ pmic->gpio, ret);
+ return ret;
+ }
+
+ ret = regmap_add_irq_chip(pmic->regmap, gpio_to_irq(pmic->gpio),
+ IRQF_TRIGGER_LOW | IRQF_NO_SUSPEND, 0,
+ &hi655x_irq_chip, &pmic->irq_data);
+ if (ret) {
+ dev_err(dev, "Failed to obtain 'hi655x_pmic_irq' %d\n", ret);
+ return ret;
+ }
+
+ platform_set_drvdata(pdev, pmic);
+
+ ret = mfd_add_devices(dev, PLATFORM_DEVID_AUTO, hi655x_pmic_devs,
+ ARRAY_SIZE(hi655x_pmic_devs), NULL, 0, NULL);
+ if (ret) {
+ dev_err(dev, "Failed to register device %d\n", ret);
+ regmap_del_irq_chip(gpio_to_irq(pmic->gpio), pmic->irq_data);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hi655x_pmic_remove(struct platform_device *pdev)
+{
+ struct hi655x_pmic *pmic = platform_get_drvdata(pdev);
+
+ regmap_del_irq_chip(gpio_to_irq(pmic->gpio), pmic->irq_data);
+ mfd_remove_devices(&pdev->dev);
+ return 0;
+}
+
+static const struct of_device_id hi655x_pmic_match[] = {
+ { .compatible = "hisilicon,hi655x-pmic", },
+ {},
+};
+
+static struct platform_driver hi655x_pmic_driver = {
+ .driver = {
+ .name = "hi655x-pmic",
+ .of_match_table = of_match_ptr(hi655x_pmic_match),
+ },
+ .probe = hi655x_pmic_probe,
+ .remove = hi655x_pmic_remove,
+};
+module_platform_driver(hi655x_pmic_driver);
+
+MODULE_AUTHOR("Chen Feng <puck.chen@hisilicon.com>");
+MODULE_DESCRIPTION("Hisilicon hi655x PMIC driver");
+MODULE_LICENSE("GPL v2");
pr_debug("egpio_get_value(%d)\n", chip->base + offset);
- egpio = container_of(chip, struct egpio_chip, chip);
+ egpio = gpiochip_get_data(chip);
ei = dev_get_drvdata(egpio->dev);
bit = egpio_bit(ei, offset);
reg = egpio->reg_start + egpio_pos(ei, offset);
{
struct egpio_chip *egpio;
- egpio = container_of(chip, struct egpio_chip, chip);
+ egpio = gpiochip_get_data(chip);
return test_bit(offset, &egpio->is_out) ? -EINVAL : 0;
}
pr_debug("egpio_set(%s, %d(%d), %d)\n",
chip->label, offset, offset+chip->base, value);
- egpio = container_of(chip, struct egpio_chip, chip);
+ egpio = gpiochip_get_data(chip);
ei = dev_get_drvdata(egpio->dev);
bit = egpio_bit(ei, offset);
pos = egpio_pos(ei, offset);
{
struct egpio_chip *egpio;
- egpio = container_of(chip, struct egpio_chip, chip);
+ egpio = gpiochip_get_data(chip);
if (test_bit(offset, &egpio->is_out)) {
egpio_set(chip, offset, value);
return 0;
chip->base = pdata->chip[i].gpio_base;
chip->ngpio = pdata->chip[i].num_gpios;
- gpiochip_add(chip);
+ gpiochip_add_data(chip, &ei->chip[i]);
}
/* Set initial pin values */
static void htcpld_chip_set(struct gpio_chip *chip, unsigned offset, int val)
{
struct i2c_client *client;
- struct htcpld_chip *chip_data =
- container_of(chip, struct htcpld_chip, chip_out);
+ struct htcpld_chip *chip_data = gpiochip_get_data(chip);
unsigned long flags;
client = chip_data->client;
static int htcpld_chip_get(struct gpio_chip *chip, unsigned offset)
{
- struct htcpld_chip *chip_data;
+ struct htcpld_chip *chip_data = gpiochip_get_data(chip);
u8 cache;
if (!strncmp(chip->label, "htcpld-out", 10)) {
- chip_data = container_of(chip, struct htcpld_chip, chip_out);
cache = chip_data->cache_out;
} else if (!strncmp(chip->label, "htcpld-in", 9)) {
- chip_data = container_of(chip, struct htcpld_chip, chip_in);
cache = chip_data->cache_in;
} else
return -EINVAL;
static int htcpld_chip_to_irq(struct gpio_chip *chip, unsigned offset)
{
- struct htcpld_chip *chip_data;
-
- chip_data = container_of(chip, struct htcpld_chip, chip_in);
+ struct htcpld_chip *chip_data = gpiochip_get_data(chip);
if (offset < chip_data->nirqs)
return chip_data->irq_start + offset;
gpio_chip->ngpio = plat_chip_data->num_gpios;
/* Add the GPIO chips */
- ret = gpiochip_add(&(chip->chip_out));
+ ret = gpiochip_add_data(&(chip->chip_out), chip);
if (ret) {
dev_warn(dev, "Unable to register output GPIOs for 0x%x: %d\n",
plat_chip_data->addr, ret);
return ret;
}
- ret = gpiochip_add(&(chip->chip_in));
+ ret = gpiochip_add_data(&(chip->chip_in), chip);
if (ret) {
dev_warn(dev, "Unable to register input GPIOs for 0x%x: %d\n",
plat_chip_data->addr, ret);
#define LPSS_DEV_SIZE 0x200
#define LPSS_PRIV_OFFSET 0x200
#define LPSS_PRIV_SIZE 0x100
+#define LPSS_PRIV_REG_COUNT (LPSS_PRIV_SIZE / 4)
#define LPSS_IDMA64_OFFSET 0x800
#define LPSS_IDMA64_SIZE 0x800
struct mfd_cell *cell;
struct device *dev;
void __iomem *priv;
+ u32 priv_ctx[LPSS_PRIV_REG_COUNT];
int devid;
u32 caps;
u32 active_ltr;
return 0;
/* Root clock */
- clk = clk_register_fixed_rate(NULL, dev_name(lpss->dev), NULL,
- CLK_IS_ROOT, lpss->info->clk_rate);
+ clk = clk_register_fixed_rate(NULL, dev_name(lpss->dev), NULL, 0,
+ lpss->info->clk_rate);
if (IS_ERR(clk))
return PTR_ERR(clk);
int intel_lpss_suspend(struct device *dev)
{
+ struct intel_lpss *lpss = dev_get_drvdata(dev);
+ unsigned int i;
+
+ /* Save device context */
+ for (i = 0; i < LPSS_PRIV_REG_COUNT; i++)
+ lpss->priv_ctx[i] = readl(lpss->priv + i * 4);
+
+ /* Put the device into reset state */
+ writel(0, lpss->priv + LPSS_PRIV_RESETS);
+
return 0;
}
EXPORT_SYMBOL_GPL(intel_lpss_suspend);
int intel_lpss_resume(struct device *dev)
{
struct intel_lpss *lpss = dev_get_drvdata(dev);
+ unsigned int i;
- intel_lpss_init_dev(lpss);
+ intel_lpss_deassert_reset(lpss);
+
+ /* Restore device context */
+ for (i = 0; i < LPSS_PRIV_REG_COUNT; i++)
+ writel(lpss->priv_ctx[i], lpss->priv + i * 4);
return 0;
}
#define INTEL_QUARK_I2C_CLK_HZ 33000000
struct intel_quark_mfd {
- struct pci_dev *pdev;
+ struct device *dev;
struct clk *i2c_clk;
struct clk_lookup *i2c_clk_lookup;
};
};
MODULE_DEVICE_TABLE(pci, intel_quark_mfd_ids);
-static int intel_quark_register_i2c_clk(struct intel_quark_mfd *quark_mfd)
+static int intel_quark_register_i2c_clk(struct device *dev)
{
- struct pci_dev *pdev = quark_mfd->pdev;
+ struct intel_quark_mfd *quark_mfd = dev_get_drvdata(dev);
struct clk *i2c_clk;
- i2c_clk = clk_register_fixed_rate(&pdev->dev,
+ i2c_clk = clk_register_fixed_rate(dev,
INTEL_QUARK_I2C_CONTROLLER_CLK, NULL,
- CLK_IS_ROOT, INTEL_QUARK_I2C_CLK_HZ);
+ 0, INTEL_QUARK_I2C_CLK_HZ);
if (IS_ERR(i2c_clk))
return PTR_ERR(i2c_clk);
INTEL_QUARK_I2C_CONTROLLER_CLK);
if (!quark_mfd->i2c_clk_lookup) {
- dev_err(&pdev->dev, "Fixed clk register failed\n");
+ clk_unregister(quark_mfd->i2c_clk);
+ dev_err(dev, "Fixed clk register failed\n");
return -ENOMEM;
}
return 0;
}
-static void intel_quark_unregister_i2c_clk(struct pci_dev *pdev)
+static void intel_quark_unregister_i2c_clk(struct device *dev)
{
- struct intel_quark_mfd *quark_mfd = dev_get_drvdata(&pdev->dev);
+ struct intel_quark_mfd *quark_mfd = dev_get_drvdata(dev);
- if (!quark_mfd->i2c_clk || !quark_mfd->i2c_clk_lookup)
+ if (!quark_mfd->i2c_clk_lookup)
return;
clkdev_drop(quark_mfd->i2c_clk_lookup);
quark_mfd = devm_kzalloc(&pdev->dev, sizeof(*quark_mfd), GFP_KERNEL);
if (!quark_mfd)
return -ENOMEM;
- quark_mfd->pdev = pdev;
- ret = intel_quark_register_i2c_clk(quark_mfd);
+ quark_mfd->dev = &pdev->dev;
+ dev_set_drvdata(&pdev->dev, quark_mfd);
+
+ ret = intel_quark_register_i2c_clk(&pdev->dev);
if (ret)
return ret;
- dev_set_drvdata(&pdev->dev, quark_mfd);
-
ret = intel_quark_i2c_setup(pdev, &intel_quark_mfd_cells[1]);
if (ret)
- return ret;
+ goto err_unregister_i2c_clk;
ret = intel_quark_gpio_setup(pdev, &intel_quark_mfd_cells[0]);
if (ret)
- return ret;
+ goto err_unregister_i2c_clk;
+
+ ret = mfd_add_devices(&pdev->dev, 0, intel_quark_mfd_cells,
+ ARRAY_SIZE(intel_quark_mfd_cells), NULL, 0,
+ NULL);
+ if (ret)
+ goto err_unregister_i2c_clk;
+
+ return 0;
- return mfd_add_devices(&pdev->dev, 0, intel_quark_mfd_cells,
- ARRAY_SIZE(intel_quark_mfd_cells), NULL, 0,
- NULL);
+err_unregister_i2c_clk:
+ intel_quark_unregister_i2c_clk(&pdev->dev);
+ return ret;
}
static void intel_quark_mfd_remove(struct pci_dev *pdev)
{
- intel_quark_unregister_i2c_clk(pdev);
+ intel_quark_unregister_i2c_clk(&pdev->dev);
mfd_remove_devices(&pdev->dev);
}
.table = {
/* Panel EN/DISABLE */
GPIO_LOOKUP("gpio_crystalcove", 94, "panel", GPIO_ACTIVE_HIGH),
+ { },
},
};
lp3943->mux_cfg = lp3943_mux_cfg;
i2c_set_clientdata(cl, lp3943);
- return mfd_add_devices(dev, -1, lp3943_devs, ARRAY_SIZE(lp3943_devs),
- NULL, 0, NULL);
-}
-
-static int lp3943_remove(struct i2c_client *cl)
-{
- struct lp3943 *lp3943 = i2c_get_clientdata(cl);
-
- mfd_remove_devices(lp3943->dev);
- return 0;
+ return devm_mfd_add_devices(dev, -1, lp3943_devs,
+ ARRAY_SIZE(lp3943_devs),
+ NULL, 0, NULL);
}
static const struct i2c_device_id lp3943_ids[] = {
static struct i2c_driver lp3943_driver = {
.probe = lp3943_probe,
- .remove = lp3943_remove,
.driver = {
.name = "lp3943",
.of_match_table = of_match_ptr(lp3943_of_match),
struct lp8788_irq_data *irqd = ptr;
struct lp8788 *lp = irqd->lp;
u8 status[NUM_REGS], addr, mask;
- bool handled;
+ bool handled = false;
int i;
if (lp8788_read_multi_bytes(lp, LP8788_INT_1, status, NUM_REGS))
--- /dev/null
+/*
+ * Maxim MAX77620 MFD Driver
+ *
+ * Copyright (C) 2016 NVIDIA CORPORATION. All rights reserved.
+ *
+ * Author:
+ * Laxman Dewangan <ldewangan@nvidia.com>
+ * Chaitanya Bandi <bandik@nvidia.com>
+ * Mallikarjun Kasoju <mkasoju@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/****************** Teminology used in driver ********************
+ * Here are some terminology used from datasheet for quick reference:
+ * Flexible Power Sequence (FPS):
+ * The Flexible Power Sequencer (FPS) allows each regulator to power up under
+ * hardware or software control. Additionally, each regulator can power on
+ * independently or among a group of other regulators with an adjustable
+ * power-up and power-down delays (sequencing). GPIO1, GPIO2, and GPIO3 can
+ * be programmed to be part of a sequence allowing external regulators to be
+ * sequenced along with internal regulators. 32KHz clock can be programmed to
+ * be part of a sequence.
+ * There is 3 FPS confguration registers and all resources are configured to
+ * any of these FPS or no FPS.
+ */
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/max77620.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+static struct resource gpio_resources[] = {
+ DEFINE_RES_IRQ(MAX77620_IRQ_TOP_GPIO),
+};
+
+static struct resource power_resources[] = {
+ DEFINE_RES_IRQ(MAX77620_IRQ_LBT_MBATLOW),
+};
+
+static struct resource rtc_resources[] = {
+ DEFINE_RES_IRQ(MAX77620_IRQ_TOP_RTC),
+};
+
+static struct resource thermal_resources[] = {
+ DEFINE_RES_IRQ(MAX77620_IRQ_LBT_TJALRM1),
+ DEFINE_RES_IRQ(MAX77620_IRQ_LBT_TJALRM2),
+};
+
+static const struct regmap_irq max77620_top_irqs[] = {
+ REGMAP_IRQ_REG(MAX77620_IRQ_TOP_GLBL, 0, MAX77620_IRQ_TOP_GLBL_MASK),
+ REGMAP_IRQ_REG(MAX77620_IRQ_TOP_SD, 0, MAX77620_IRQ_TOP_SD_MASK),
+ REGMAP_IRQ_REG(MAX77620_IRQ_TOP_LDO, 0, MAX77620_IRQ_TOP_LDO_MASK),
+ REGMAP_IRQ_REG(MAX77620_IRQ_TOP_GPIO, 0, MAX77620_IRQ_TOP_GPIO_MASK),
+ REGMAP_IRQ_REG(MAX77620_IRQ_TOP_RTC, 0, MAX77620_IRQ_TOP_RTC_MASK),
+ REGMAP_IRQ_REG(MAX77620_IRQ_TOP_32K, 0, MAX77620_IRQ_TOP_32K_MASK),
+ REGMAP_IRQ_REG(MAX77620_IRQ_TOP_ONOFF, 0, MAX77620_IRQ_TOP_ONOFF_MASK),
+ REGMAP_IRQ_REG(MAX77620_IRQ_LBT_MBATLOW, 1, MAX77620_IRQ_LBM_MASK),
+ REGMAP_IRQ_REG(MAX77620_IRQ_LBT_TJALRM1, 1, MAX77620_IRQ_TJALRM1_MASK),
+ REGMAP_IRQ_REG(MAX77620_IRQ_LBT_TJALRM2, 1, MAX77620_IRQ_TJALRM2_MASK),
+};
+
+static const struct mfd_cell max77620_children[] = {
+ { .name = "max77620-pinctrl", },
+ { .name = "max77620-clock", },
+ { .name = "max77620-pmic", },
+ { .name = "max77620-watchdog", },
+ {
+ .name = "max77620-gpio",
+ .resources = gpio_resources,
+ .num_resources = ARRAY_SIZE(gpio_resources),
+ }, {
+ .name = "max77620-rtc",
+ .resources = rtc_resources,
+ .num_resources = ARRAY_SIZE(rtc_resources),
+ }, {
+ .name = "max77620-power",
+ .resources = power_resources,
+ .num_resources = ARRAY_SIZE(power_resources),
+ }, {
+ .name = "max77620-thermal",
+ .resources = thermal_resources,
+ .num_resources = ARRAY_SIZE(thermal_resources),
+ },
+};
+
+static const struct mfd_cell max20024_children[] = {
+ { .name = "max20024-pinctrl", },
+ { .name = "max77620-clock", },
+ { .name = "max20024-pmic", },
+ { .name = "max77620-watchdog", },
+ {
+ .name = "max77620-gpio",
+ .resources = gpio_resources,
+ .num_resources = ARRAY_SIZE(gpio_resources),
+ }, {
+ .name = "max77620-rtc",
+ .resources = rtc_resources,
+ .num_resources = ARRAY_SIZE(rtc_resources),
+ }, {
+ .name = "max20024-power",
+ .resources = power_resources,
+ .num_resources = ARRAY_SIZE(power_resources),
+ },
+};
+
+static struct regmap_irq_chip max77620_top_irq_chip = {
+ .name = "max77620-top",
+ .irqs = max77620_top_irqs,
+ .num_irqs = ARRAY_SIZE(max77620_top_irqs),
+ .num_regs = 2,
+ .status_base = MAX77620_REG_IRQTOP,
+ .mask_base = MAX77620_REG_IRQTOPM,
+};
+
+static const struct regmap_range max77620_readable_ranges[] = {
+ regmap_reg_range(MAX77620_REG_CNFGGLBL1, MAX77620_REG_DVSSD4),
+};
+
+static const struct regmap_access_table max77620_readable_table = {
+ .yes_ranges = max77620_readable_ranges,
+ .n_yes_ranges = ARRAY_SIZE(max77620_readable_ranges),
+};
+
+static const struct regmap_range max20024_readable_ranges[] = {
+ regmap_reg_range(MAX77620_REG_CNFGGLBL1, MAX77620_REG_DVSSD4),
+ regmap_reg_range(MAX20024_REG_MAX_ADD, MAX20024_REG_MAX_ADD),
+};
+
+static const struct regmap_access_table max20024_readable_table = {
+ .yes_ranges = max20024_readable_ranges,
+ .n_yes_ranges = ARRAY_SIZE(max20024_readable_ranges),
+};
+
+static const struct regmap_range max77620_writable_ranges[] = {
+ regmap_reg_range(MAX77620_REG_CNFGGLBL1, MAX77620_REG_DVSSD4),
+};
+
+static const struct regmap_access_table max77620_writable_table = {
+ .yes_ranges = max77620_writable_ranges,
+ .n_yes_ranges = ARRAY_SIZE(max77620_writable_ranges),
+};
+
+static const struct regmap_range max77620_cacheable_ranges[] = {
+ regmap_reg_range(MAX77620_REG_SD0_CFG, MAX77620_REG_LDO_CFG3),
+ regmap_reg_range(MAX77620_REG_FPS_CFG0, MAX77620_REG_FPS_SD3),
+};
+
+static const struct regmap_access_table max77620_volatile_table = {
+ .no_ranges = max77620_cacheable_ranges,
+ .n_no_ranges = ARRAY_SIZE(max77620_cacheable_ranges),
+};
+
+static const struct regmap_config max77620_regmap_config = {
+ .name = "power-slave",
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = MAX77620_REG_DVSSD4 + 1,
+ .cache_type = REGCACHE_RBTREE,
+ .rd_table = &max77620_readable_table,
+ .wr_table = &max77620_writable_table,
+ .volatile_table = &max77620_volatile_table,
+};
+
+static const struct regmap_config max20024_regmap_config = {
+ .name = "power-slave",
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = MAX20024_REG_MAX_ADD + 1,
+ .cache_type = REGCACHE_RBTREE,
+ .rd_table = &max20024_readable_table,
+ .wr_table = &max77620_writable_table,
+ .volatile_table = &max77620_volatile_table,
+};
+
+/* max77620_get_fps_period_reg_value: Get FPS bit field value from
+ * requested periods.
+ * MAX77620 supports the FPS period of 40, 80, 160, 320, 540, 1280, 2560
+ * and 5120 microseconds. MAX20024 supports the FPS period of 20, 40, 80,
+ * 160, 320, 540, 1280 and 2560 microseconds.
+ * The FPS register has 3 bits field to set the FPS period as
+ * bits max77620 max20024
+ * 000 40 20
+ * 001 80 40
+ * :::
+*/
+static int max77620_get_fps_period_reg_value(struct max77620_chip *chip,
+ int tperiod)
+{
+ int fps_min_period;
+ int i;
+
+ switch (chip->chip_id) {
+ case MAX20024:
+ fps_min_period = MAX20024_FPS_PERIOD_MIN_US;
+ break;
+ case MAX77620:
+ fps_min_period = MAX77620_FPS_PERIOD_MIN_US;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 7; i++) {
+ if (fps_min_period >= tperiod)
+ return i;
+ fps_min_period *= 2;
+ }
+
+ return i;
+}
+
+/* max77620_config_fps: Configure FPS configuration registers
+ * based on platform specific information.
+ */
+static int max77620_config_fps(struct max77620_chip *chip,
+ struct device_node *fps_np)
+{
+ struct device *dev = chip->dev;
+ unsigned int mask = 0, config = 0;
+ u32 fps_max_period;
+ u32 param_val;
+ int tperiod, fps_id;
+ int ret;
+ char fps_name[10];
+
+ switch (chip->chip_id) {
+ case MAX20024:
+ fps_max_period = MAX20024_FPS_PERIOD_MAX_US;
+ break;
+ case MAX77620:
+ fps_max_period = MAX77620_FPS_PERIOD_MAX_US;
+ default:
+ return -EINVAL;
+ }
+
+ for (fps_id = 0; fps_id < MAX77620_FPS_COUNT; fps_id++) {
+ sprintf(fps_name, "fps%d", fps_id);
+ if (!strcmp(fps_np->name, fps_name))
+ break;
+ }
+
+ if (fps_id == MAX77620_FPS_COUNT) {
+ dev_err(dev, "FPS node name %s is not valid\n", fps_np->name);
+ return -EINVAL;
+ }
+
+ ret = of_property_read_u32(fps_np, "maxim,shutdown-fps-time-period-us",
+ ¶m_val);
+ if (!ret) {
+ mask |= MAX77620_FPS_TIME_PERIOD_MASK;
+ chip->shutdown_fps_period[fps_id] = min(param_val,
+ fps_max_period);
+ tperiod = max77620_get_fps_period_reg_value(chip,
+ chip->shutdown_fps_period[fps_id]);
+ config |= tperiod << MAX77620_FPS_TIME_PERIOD_SHIFT;
+ }
+
+ ret = of_property_read_u32(fps_np, "maxim,suspend-fps-time-period-us",
+ ¶m_val);
+ if (!ret)
+ chip->suspend_fps_period[fps_id] = min(param_val,
+ fps_max_period);
+
+ ret = of_property_read_u32(fps_np, "maxim,fps-event-source",
+ ¶m_val);
+ if (!ret) {
+ if (param_val > 2) {
+ dev_err(dev, "FPS%d event-source invalid\n", fps_id);
+ return -EINVAL;
+ }
+ mask |= MAX77620_FPS_EN_SRC_MASK;
+ config |= param_val << MAX77620_FPS_EN_SRC_SHIFT;
+ if (param_val == 2) {
+ mask |= MAX77620_FPS_ENFPS_SW_MASK;
+ config |= MAX77620_FPS_ENFPS_SW;
+ }
+ }
+
+ if (!chip->sleep_enable && !chip->enable_global_lpm) {
+ ret = of_property_read_u32(fps_np,
+ "maxim,device-state-on-disabled-event",
+ ¶m_val);
+ if (!ret) {
+ if (param_val == 0)
+ chip->sleep_enable = true;
+ else if (param_val == 1)
+ chip->enable_global_lpm = true;
+ }
+ }
+
+ ret = regmap_update_bits(chip->rmap, MAX77620_REG_FPS_CFG0 + fps_id,
+ mask, config);
+ if (ret < 0) {
+ dev_err(dev, "Failed to update FPS CFG: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int max77620_initialise_fps(struct max77620_chip *chip)
+{
+ struct device *dev = chip->dev;
+ struct device_node *fps_np, *fps_child;
+ u8 config;
+ int fps_id;
+ int ret;
+
+ for (fps_id = 0; fps_id < MAX77620_FPS_COUNT; fps_id++) {
+ chip->shutdown_fps_period[fps_id] = -1;
+ chip->suspend_fps_period[fps_id] = -1;
+ }
+
+ fps_np = of_get_child_by_name(dev->of_node, "fps");
+ if (!fps_np)
+ goto skip_fps;
+
+ for_each_child_of_node(fps_np, fps_child) {
+ ret = max77620_config_fps(chip, fps_child);
+ if (ret < 0)
+ return ret;
+ }
+
+ config = chip->enable_global_lpm ? MAX77620_ONOFFCNFG2_SLP_LPM_MSK : 0;
+ ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG2,
+ MAX77620_ONOFFCNFG2_SLP_LPM_MSK, config);
+ if (ret < 0) {
+ dev_err(dev, "Failed to update SLP_LPM: %d\n", ret);
+ return ret;
+ }
+
+skip_fps:
+ /* Enable wake on EN0 pin */
+ ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG2,
+ MAX77620_ONOFFCNFG2_WK_EN0,
+ MAX77620_ONOFFCNFG2_WK_EN0);
+ if (ret < 0) {
+ dev_err(dev, "Failed to update WK_EN0: %d\n", ret);
+ return ret;
+ }
+
+ /* For MAX20024, SLPEN will be POR reset if CLRSE is b11 */
+ if ((chip->chip_id == MAX20024) && chip->sleep_enable) {
+ config = MAX77620_ONOFFCNFG1_SLPEN | MAX20024_ONOFFCNFG1_CLRSE;
+ ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG1,
+ config, config);
+ if (ret < 0) {
+ dev_err(dev, "Failed to update SLPEN: %d\n", ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int max77620_read_es_version(struct max77620_chip *chip)
+{
+ unsigned int val;
+ u8 cid_val[6];
+ int i;
+ int ret;
+
+ for (i = MAX77620_REG_CID0; i <= MAX77620_REG_CID5; i++) {
+ ret = regmap_read(chip->rmap, i, &val);
+ if (ret < 0) {
+ dev_err(chip->dev, "Failed to read CID: %d\n", ret);
+ return ret;
+ }
+ dev_dbg(chip->dev, "CID%d: 0x%02x\n",
+ i - MAX77620_REG_CID0, val);
+ cid_val[i - MAX77620_REG_CID0] = val;
+ }
+
+ /* CID4 is OTP Version and CID5 is ES version */
+ dev_info(chip->dev, "PMIC Version OTP:0x%02X and ES:0x%X\n",
+ cid_val[4], MAX77620_CID5_DIDM(cid_val[5]));
+
+ return ret;
+}
+
+static int max77620_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ const struct regmap_config *rmap_config;
+ struct max77620_chip *chip;
+ const struct mfd_cell *mfd_cells;
+ int n_mfd_cells;
+ int ret;
+
+ chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ i2c_set_clientdata(client, chip);
+ chip->dev = &client->dev;
+ chip->irq_base = -1;
+ chip->chip_irq = client->irq;
+ chip->chip_id = (enum max77620_chip_id)id->driver_data;
+
+ switch (chip->chip_id) {
+ case MAX77620:
+ mfd_cells = max77620_children;
+ n_mfd_cells = ARRAY_SIZE(max77620_children);
+ rmap_config = &max77620_regmap_config;
+ break;
+ case MAX20024:
+ mfd_cells = max20024_children;
+ n_mfd_cells = ARRAY_SIZE(max20024_children);
+ rmap_config = &max20024_regmap_config;
+ break;
+ default:
+ dev_err(chip->dev, "ChipID is invalid %d\n", chip->chip_id);
+ return -EINVAL;
+ }
+
+ chip->rmap = devm_regmap_init_i2c(client, rmap_config);
+ if (IS_ERR(chip->rmap)) {
+ ret = PTR_ERR(chip->rmap);
+ dev_err(chip->dev, "Failed to intialise regmap: %d\n", ret);
+ return ret;
+ }
+
+ ret = max77620_read_es_version(chip);
+ if (ret < 0)
+ return ret;
+
+ ret = devm_regmap_add_irq_chip(chip->dev, chip->rmap, client->irq,
+ IRQF_ONESHOT | IRQF_SHARED,
+ chip->irq_base, &max77620_top_irq_chip,
+ &chip->top_irq_data);
+ if (ret < 0) {
+ dev_err(chip->dev, "Failed to add regmap irq: %d\n", ret);
+ return ret;
+ }
+
+ ret = max77620_initialise_fps(chip);
+ if (ret < 0)
+ return ret;
+
+ ret = devm_mfd_add_devices(chip->dev, PLATFORM_DEVID_NONE,
+ mfd_cells, n_mfd_cells, NULL, 0,
+ regmap_irq_get_domain(chip->top_irq_data));
+ if (ret < 0) {
+ dev_err(chip->dev, "Failed to add MFD children: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int max77620_set_fps_period(struct max77620_chip *chip,
+ int fps_id, int time_period)
+{
+ int period = max77620_get_fps_period_reg_value(chip, time_period);
+ int ret;
+
+ ret = regmap_update_bits(chip->rmap, MAX77620_REG_FPS_CFG0 + fps_id,
+ MAX77620_FPS_TIME_PERIOD_MASK,
+ period << MAX77620_FPS_TIME_PERIOD_SHIFT);
+ if (ret < 0) {
+ dev_err(chip->dev, "Failed to update FPS period: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int max77620_i2c_suspend(struct device *dev)
+{
+ struct max77620_chip *chip = dev_get_drvdata(dev);
+ struct i2c_client *client = to_i2c_client(dev);
+ unsigned int config;
+ int fps;
+ int ret;
+
+ for (fps = 0; fps < MAX77620_FPS_COUNT; fps++) {
+ if (chip->suspend_fps_period[fps] < 0)
+ continue;
+
+ ret = max77620_set_fps_period(chip, fps,
+ chip->suspend_fps_period[fps]);
+ if (ret < 0)
+ return ret;
+ }
+
+ /*
+ * For MAX20024: No need to configure SLPEN on suspend as
+ * it will be configured on Init.
+ */
+ if (chip->chip_id == MAX20024)
+ goto out;
+
+ config = (chip->sleep_enable) ? MAX77620_ONOFFCNFG1_SLPEN : 0;
+ ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG1,
+ MAX77620_ONOFFCNFG1_SLPEN,
+ config);
+ if (ret < 0) {
+ dev_err(dev, "Failed to configure sleep in suspend: %d\n", ret);
+ return ret;
+ }
+
+ /* Disable WK_EN0 */
+ ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG2,
+ MAX77620_ONOFFCNFG2_WK_EN0, 0);
+ if (ret < 0) {
+ dev_err(dev, "Failed to configure WK_EN in suspend: %d\n", ret);
+ return ret;
+ }
+
+out:
+ disable_irq(client->irq);
+
+ return 0;
+}
+
+static int max77620_i2c_resume(struct device *dev)
+{
+ struct max77620_chip *chip = dev_get_drvdata(dev);
+ struct i2c_client *client = to_i2c_client(dev);
+ int ret;
+ int fps;
+
+ for (fps = 0; fps < MAX77620_FPS_COUNT; fps++) {
+ if (chip->shutdown_fps_period[fps] < 0)
+ continue;
+
+ ret = max77620_set_fps_period(chip, fps,
+ chip->shutdown_fps_period[fps]);
+ if (ret < 0)
+ return ret;
+ }
+
+ /*
+ * For MAX20024: No need to configure WKEN0 on resume as
+ * it is configured on Init.
+ */
+ if (chip->chip_id == MAX20024)
+ goto out;
+
+ /* Enable WK_EN0 */
+ ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG2,
+ MAX77620_ONOFFCNFG2_WK_EN0,
+ MAX77620_ONOFFCNFG2_WK_EN0);
+ if (ret < 0) {
+ dev_err(dev, "Failed to configure WK_EN0 n resume: %d\n", ret);
+ return ret;
+ }
+
+out:
+ enable_irq(client->irq);
+
+ return 0;
+}
+#endif
+
+static const struct i2c_device_id max77620_id[] = {
+ {"max77620", MAX77620},
+ {"max20024", MAX20024},
+ {},
+};
+MODULE_DEVICE_TABLE(i2c, max77620_id);
+
+static const struct dev_pm_ops max77620_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(max77620_i2c_suspend, max77620_i2c_resume)
+};
+
+static struct i2c_driver max77620_driver = {
+ .driver = {
+ .name = "max77620",
+ .pm = &max77620_pm_ops,
+ },
+ .probe = max77620_probe,
+ .id_table = max77620_id,
+};
+
+module_i2c_driver(max77620_driver);
+
+MODULE_DESCRIPTION("MAX77620/MAX20024 Multi Function Device Core Driver");
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_AUTHOR("Chaitanya Bandi <bandik@nvidia.com>");
+MODULE_AUTHOR("Mallikarjun Kasoju <mkasoju@nvidia.com>");
+MODULE_LICENSE("GPL v2");
* max77686.c - mfd core driver for the Maxim 77686/802
*
* Copyright (C) 2012 Samsung Electronics
- * Chiwoong Byun <woong.byun@smasung.com>
+ * Chiwoong Byun <woong.byun@samsung.com>
* Jonghwa Lee <jonghwa3.lee@samsung.com>
*
* This program is free software; you can redistribute it and/or modify
return -ENODEV;
}
- ret = regmap_add_irq_chip(max77686->regmap, max77686->irq,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
- IRQF_SHARED, 0, irq_chip,
- &max77686->irq_data);
+ ret = devm_regmap_add_irq_chip(&i2c->dev, max77686->regmap,
+ max77686->irq,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+ IRQF_SHARED, 0, irq_chip,
+ &max77686->irq_data);
if (ret < 0) {
dev_err(&i2c->dev, "failed to add PMIC irq chip: %d\n", ret);
return ret;
}
- ret = mfd_add_devices(max77686->dev, -1, cells, n_devs, NULL, 0, NULL);
+ ret = devm_mfd_add_devices(max77686->dev, -1, cells, n_devs, NULL,
+ 0, NULL);
if (ret < 0) {
dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret);
- goto err_del_irqc;
+ return ret;
}
- return 0;
-
-err_del_irqc:
- regmap_del_irq_chip(max77686->irq, max77686->irq_data);
-
- return ret;
-}
-
-static int max77686_i2c_remove(struct i2c_client *i2c)
-{
- struct max77686_dev *max77686 = i2c_get_clientdata(i2c);
-
- mfd_remove_devices(max77686->dev);
-
- regmap_del_irq_chip(max77686->irq, max77686->irq_data);
-
return 0;
}
.of_match_table = of_match_ptr(max77686_pmic_dt_match),
},
.probe = max77686_i2c_probe,
- .remove = max77686_i2c_remove,
.id_table = max77686_i2c_id,
};
-static int __init max77686_i2c_init(void)
-{
- return i2c_add_driver(&max77686_i2c_driver);
-}
-/* init early so consumer devices can complete system boot */
-subsys_initcall(max77686_i2c_init);
-
-static void __exit max77686_i2c_exit(void)
-{
- i2c_del_driver(&max77686_i2c_driver);
-}
-module_exit(max77686_i2c_exit);
+module_i2c_driver(max77686_i2c_driver);
MODULE_DESCRIPTION("MAXIM 77686/802 multi-function core driver");
MODULE_AUTHOR("Chiwoong Byun <woong.byun@samsung.com>");
* max77693.c - mfd core driver for the MAX 77693
*
* Copyright (C) 2012 Samsung Electronics
- * SangYoung Son <hello.son@smasung.com>
+ * SangYoung Son <hello.son@samsung.com>
*
* This program is not provided / owned by Maxim Integrated Products.
*
{ .compatible = "maxim,max77693" },
{},
};
+MODULE_DEVICE_TABLE(of, max77693_dt_match);
#endif
static struct i2c_driver max77693_i2c_driver = {
.id_table = max77693_i2c_id,
};
-static int __init max77693_i2c_init(void)
-{
- return i2c_add_driver(&max77693_i2c_driver);
-}
-/* init early so consumer devices can complete system boot */
-subsys_initcall(max77693_i2c_init);
-
-static void __exit max77693_i2c_exit(void)
-{
- i2c_del_driver(&max77693_i2c_driver);
-}
-module_exit(max77693_i2c_exit);
+module_i2c_driver(max77693_i2c_driver);
MODULE_DESCRIPTION("MAXIM 77693 multi-function core driver");
MODULE_AUTHOR("SangYoung, Son <hello.son@samsung.com>");
return ret;
}
- ret = mfd_add_devices(&client->dev, 0, menf21bmc_cell,
- ARRAY_SIZE(menf21bmc_cell), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(&client->dev, 0, menf21bmc_cell,
+ ARRAY_SIZE(menf21bmc_cell), NULL, 0, NULL);
if (ret < 0) {
dev_err(&client->dev, "failed to add BMC sub-devices\n");
return ret;
return 0;
}
-static int menf21bmc_remove(struct i2c_client *client)
-{
- mfd_remove_devices(&client->dev);
- return 0;
-}
-
static const struct i2c_device_id menf21bmc_id_table[] = {
{ "menf21bmc" },
{ }
.driver.name = "menf21bmc",
.id_table = menf21bmc_id_table,
.probe = menf21bmc_probe,
- .remove = menf21bmc_remove,
};
module_i2c_driver(menf21bmc_driver);
strlcpy(ids[0].id, match->pnpid, sizeof(ids[0].id));
list_for_each_entry(child, &parent->children, node) {
- if (acpi_match_device_ids(child, ids)) {
+ if (!acpi_match_device_ids(child, ids)) {
adev = child;
break;
}
}
EXPORT_SYMBOL(mfd_remove_devices);
+static void devm_mfd_dev_release(struct device *dev, void *res)
+{
+ mfd_remove_devices(dev);
+}
+
+/**
+ * devm_mfd_add_devices - Resource managed version of mfd_add_devices()
+ *
+ * Returns 0 on success or an appropriate negative error number on failure.
+ * All child-devices of the MFD will automatically be removed when it gets
+ * unbinded.
+ */
+int devm_mfd_add_devices(struct device *dev, int id,
+ const struct mfd_cell *cells, int n_devs,
+ struct resource *mem_base,
+ int irq_base, struct irq_domain *domain)
+{
+ struct device **ptr;
+ int ret;
+
+ ptr = devres_alloc(devm_mfd_dev_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ ret = mfd_add_devices(dev, id, cells, n_devs, mem_base,
+ irq_base, domain);
+ if (ret < 0) {
+ devres_free(ptr);
+ return ret;
+ }
+
+ *ptr = dev;
+ devres_add(dev, ptr);
+
+ return ret;
+}
+EXPORT_SYMBOL(devm_mfd_add_devices);
+
int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones)
{
struct mfd_cell cell_entry;
ret = regmap_read(pmic->regmap, MT6397_CID, &id);
if (ret) {
dev_err(pmic->dev, "Failed to read chip id: %d\n", ret);
- goto fail_irq;
+ return ret;
}
+ pmic->irq = platform_get_irq(pdev, 0);
+ if (pmic->irq <= 0)
+ return pmic->irq;
+
switch (id & 0xff) {
case MT6323_CID_CODE:
pmic->int_con[0] = MT6323_INT_CON0;
pmic->int_con[1] = MT6323_INT_CON1;
pmic->int_status[0] = MT6323_INT_STATUS0;
pmic->int_status[1] = MT6323_INT_STATUS1;
- ret = mfd_add_devices(&pdev->dev, -1, mt6323_devs,
- ARRAY_SIZE(mt6323_devs), NULL, 0, NULL);
+ ret = mt6397_irq_init(pmic);
+ if (ret)
+ return ret;
+
+ ret = devm_mfd_add_devices(&pdev->dev, -1, mt6323_devs,
+ ARRAY_SIZE(mt6323_devs), NULL,
+ 0, NULL);
break;
case MT6397_CID_CODE:
pmic->int_con[1] = MT6397_INT_CON1;
pmic->int_status[0] = MT6397_INT_STATUS0;
pmic->int_status[1] = MT6397_INT_STATUS1;
- ret = mfd_add_devices(&pdev->dev, -1, mt6397_devs,
- ARRAY_SIZE(mt6397_devs), NULL, 0, NULL);
+ ret = mt6397_irq_init(pmic);
+ if (ret)
+ return ret;
+
+ ret = devm_mfd_add_devices(&pdev->dev, -1, mt6397_devs,
+ ARRAY_SIZE(mt6397_devs), NULL,
+ 0, NULL);
break;
default:
break;
}
- pmic->irq = platform_get_irq(pdev, 0);
- if (pmic->irq > 0) {
- ret = mt6397_irq_init(pmic);
- if (ret)
- return ret;
- }
-
-fail_irq:
if (ret) {
irq_domain_remove(pmic->irq_domain);
dev_err(&pdev->dev, "failed to add child devices: %d\n", ret);
return ret;
}
-static int mt6397_remove(struct platform_device *pdev)
-{
- mfd_remove_devices(&pdev->dev);
-
- return 0;
-}
-
static const struct of_device_id mt6397_of_match[] = {
{ .compatible = "mediatek,mt6397" },
{ .compatible = "mediatek,mt6323" },
static struct platform_driver mt6397_driver = {
.probe = mt6397_probe,
- .remove = mt6397_remove,
.driver = {
.name = "mt6397",
.of_match_table = of_match_ptr(mt6397_of_match),
if (IS_ERR(tll->ch_clk[i]))
dev_dbg(dev, "can't get clock : %s\n", clkname);
+ else
+ clk_prepare(tll->ch_clk[i]);
}
pm_runtime_put_sync(dev);
tll_dev = NULL;
spin_unlock(&tll_lock);
- for (i = 0; i < tll->nch; i++)
- if (!IS_ERR(tll->ch_clk[i]))
+ for (i = 0; i < tll->nch; i++) {
+ if (!IS_ERR(tll->ch_clk[i])) {
+ clk_unprepare(tll->ch_clk[i]);
clk_put(tll->ch_clk[i]);
+ }
+ }
pm_runtime_disable(&pdev->dev);
return 0;
if (IS_ERR(tll->ch_clk[i]))
continue;
- r = clk_prepare_enable(tll->ch_clk[i]);
+ r = clk_enable(tll->ch_clk[i]);
if (r) {
dev_err(tll_dev,
"Error enabling ch %d clock: %d\n", i, r);
for (i = 0; i < tll->nch; i++) {
if (omap_usb_mode_needs_tll(pdata->port_mode[i])) {
if (!IS_ERR(tll->ch_clk[i]))
- clk_disable_unprepare(tll->ch_clk[i]);
+ clk_disable(tll->ch_clk[i]);
}
}
irq_clear_status_flags(__irq, IRQ_NOREQUEST);
}
- ret = request_threaded_irq(irq, NULL, rc5t583_irq, IRQF_ONESHOT,
- "rc5t583", rc5t583);
+ ret = devm_request_threaded_irq(rc5t583->dev, irq, NULL, rc5t583_irq,
+ IRQF_ONESHOT, "rc5t583", rc5t583);
if (ret < 0)
dev_err(rc5t583->dev,
"Error in registering interrupt error: %d\n", ret);
return ret;
}
-
-int rc5t583_irq_exit(struct rc5t583 *rc5t583)
-{
- if (rc5t583->chip_irq)
- free_irq(rc5t583->chip_irq, rc5t583);
- return 0;
-}
struct rc5t583 *rc5t583;
struct rc5t583_platform_data *pdata = dev_get_platdata(&i2c->dev);
int ret;
- bool irq_init_success = false;
if (!pdata) {
dev_err(&i2c->dev, "Err: Platform data not found\n");
/* Still continue with warning, if irq init fails */
if (ret)
dev_warn(&i2c->dev, "IRQ init failed: %d\n", ret);
- else
- irq_init_success = true;
}
- ret = mfd_add_devices(rc5t583->dev, -1, rc5t583_subdevs,
- ARRAY_SIZE(rc5t583_subdevs), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(rc5t583->dev, -1, rc5t583_subdevs,
+ ARRAY_SIZE(rc5t583_subdevs), NULL, 0, NULL);
if (ret) {
dev_err(&i2c->dev, "add mfd devices failed: %d\n", ret);
- goto err_add_devs;
+ return ret;
}
return 0;
-
-err_add_devs:
- if (irq_init_success)
- rc5t583_irq_exit(rc5t583);
- return ret;
-}
-
-static int rc5t583_i2c_remove(struct i2c_client *i2c)
-{
- struct rc5t583 *rc5t583 = i2c_get_clientdata(i2c);
-
- mfd_remove_devices(rc5t583->dev);
- rc5t583_irq_exit(rc5t583);
- return 0;
}
static const struct i2c_device_id rc5t583_i2c_id[] = {
.name = "rc5t583",
},
.probe = rc5t583_i2c_probe,
- .remove = rc5t583_i2c_remove,
.id_table = rc5t583_i2c_id,
};
rdc321x_gpio_pdata.sb_pdev = pdev;
rdc321x_wdt_pdata.sb_pdev = pdev;
- return mfd_add_devices(&pdev->dev, -1,
- rdc321x_sb_cells, ARRAY_SIZE(rdc321x_sb_cells),
- NULL, 0, NULL);
-}
-
-static void rdc321x_sb_remove(struct pci_dev *pdev)
-{
- mfd_remove_devices(&pdev->dev);
+ return devm_mfd_add_devices(&pdev->dev, -1,
+ rdc321x_sb_cells,
+ ARRAY_SIZE(rdc321x_sb_cells),
+ NULL, 0, NULL);
}
static const struct pci_device_id rdc321x_sb_table[] = {
.name = "RDC321x Southbridge",
.id_table = rdc321x_sb_table,
.probe = rdc321x_sb_probe,
- .remove = rdc321x_sb_remove,
};
module_pci_driver(rdc321x_sb_driver);
rk808->i2c = client;
i2c_set_clientdata(client, rk808);
- ret = mfd_add_devices(&client->dev, -1,
- rk808s, ARRAY_SIZE(rk808s),
- NULL, 0, regmap_irq_get_domain(rk808->irq_data));
+ ret = devm_mfd_add_devices(&client->dev, -1,
+ rk808s, ARRAY_SIZE(rk808s), NULL, 0,
+ regmap_irq_get_domain(rk808->irq_data));
if (ret) {
dev_err(&client->dev, "failed to add MFD devices %d\n", ret);
goto err_irq;
struct rk808 *rk808 = i2c_get_clientdata(client);
regmap_del_irq_chip(client->irq, rk808->irq_data);
- mfd_remove_devices(&client->dev);
pm_power_off = NULL;
return 0;
return ret;
}
- ret = mfd_add_devices(&i2c->dev, -1, rn5t618_cells,
- ARRAY_SIZE(rn5t618_cells), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(&i2c->dev, -1, rn5t618_cells,
+ ARRAY_SIZE(rn5t618_cells), NULL, 0, NULL);
if (ret) {
dev_err(&i2c->dev, "failed to add sub-devices: %d\n", ret);
return ret;
pm_power_off = NULL;
}
- mfd_remove_devices(&i2c->dev);
return 0;
}
return ret;
}
- ret = mfd_add_devices(rt5033->dev, -1, rt5033_devs,
- ARRAY_SIZE(rt5033_devs), NULL, 0,
- regmap_irq_get_domain(rt5033->irq_data));
+ ret = devm_mfd_add_devices(rt5033->dev, -1, rt5033_devs,
+ ARRAY_SIZE(rt5033_devs), NULL, 0,
+ regmap_irq_get_domain(rt5033->irq_data));
if (ret < 0) {
dev_err(&i2c->dev, "Failed to add RT5033 child devices.\n");
return ret;
return 0;
}
-static int rt5033_i2c_remove(struct i2c_client *i2c)
-{
- mfd_remove_devices(&i2c->dev);
-
- return 0;
-}
-
static const struct i2c_device_id rt5033_i2c_id[] = {
{ "rt5033", },
{ }
.of_match_table = of_match_ptr(rt5033_dt_match),
},
.probe = rt5033_i2c_probe,
- .remove = rt5033_i2c_remove,
.id_table = rt5033_i2c_id,
};
module_i2c_driver(rt5033_driver);
/* If this happens the probe function is problem */
BUG();
}
- ret = mfd_add_devices(sec_pmic->dev, -1, sec_devs, num_sec_devs, NULL,
- 0, NULL);
+ ret = devm_mfd_add_devices(sec_pmic->dev, -1, sec_devs, num_sec_devs,
+ NULL, 0, NULL);
if (ret)
- goto err_mfd;
+ return ret;
device_init_wakeup(sec_pmic->dev, sec_pmic->wakeup);
sec_pmic_configure(sec_pmic);
sec_pmic_dump_rev(sec_pmic);
return ret;
-
-err_mfd:
- sec_irq_exit(sec_pmic);
- return ret;
-}
-
-static int sec_pmic_remove(struct i2c_client *i2c)
-{
- struct sec_pmic_dev *sec_pmic = i2c_get_clientdata(i2c);
-
- mfd_remove_devices(sec_pmic->dev);
- sec_irq_exit(sec_pmic);
- return 0;
}
static void sec_pmic_shutdown(struct i2c_client *i2c)
.of_match_table = of_match_ptr(sec_dt_match),
},
.probe = sec_pmic_probe,
- .remove = sec_pmic_remove,
.shutdown = sec_pmic_shutdown,
.id_table = sec_pmic_id,
};
return -EINVAL;
}
- ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- sec_pmic->irq_base, sec_irq_chip,
- &sec_pmic->irq_data);
+ ret = devm_regmap_add_irq_chip(sec_pmic->dev, sec_pmic->regmap_pmic,
+ sec_pmic->irq,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ sec_pmic->irq_base, sec_irq_chip,
+ &sec_pmic->irq_data);
if (ret != 0) {
dev_err(sec_pmic->dev, "Failed to register IRQ chip: %d\n", ret);
return ret;
return 0;
}
-
-void sec_irq_exit(struct sec_pmic_dev *sec_pmic)
-{
- regmap_del_irq_chip(sec_pmic->irq, sec_pmic->irq_data);
-}
cells[1].platform_data = pdata->regulator_init_data;
cells[1].pdata_size = sizeof(*pdata->regulator_init_data);
- ret = mfd_add_devices(dev, -1, cells, ARRAY_SIZE(cells), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(dev, -1, cells, ARRAY_SIZE(cells),
+ NULL, 0, NULL);
if (ret)
dev_err(dev, "failed to add child devices. err=%d\n", ret);
return ret;
}
-static int sky81452_remove(struct i2c_client *client)
-{
- mfd_remove_devices(&client->dev);
- return 0;
-}
-
static const struct i2c_device_id sky81452_ids[] = {
{ "sky81452" },
{ }
.of_match_table = of_match_ptr(sky81452_of_match),
},
.probe = sky81452_probe,
- .remove = sky81452_remove,
.id_table = sky81452_ids,
};
#ifdef CONFIG_MFD_SM501_GPIO
-static inline struct sm501_gpio_chip *to_sm501_gpio(struct gpio_chip *gc)
-{
- return container_of(gc, struct sm501_gpio_chip, gpio);
-}
-
static inline struct sm501_devdata *sm501_gpio_to_dev(struct sm501_gpio *gpio)
{
return container_of(gpio, struct sm501_devdata, gpio);
static int sm501_gpio_get(struct gpio_chip *chip, unsigned offset)
{
- struct sm501_gpio_chip *smgpio = to_sm501_gpio(chip);
+ struct sm501_gpio_chip *smgpio = gpiochip_get_data(chip);
unsigned long result;
result = smc501_readl(smgpio->regbase + SM501_GPIO_DATA_LOW);
static void sm501_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
{
- struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+ struct sm501_gpio_chip *smchip = gpiochip_get_data(chip);
struct sm501_gpio *smgpio = smchip->ourgpio;
unsigned long bit = 1 << offset;
void __iomem *regs = smchip->regbase;
static int sm501_gpio_input(struct gpio_chip *chip, unsigned offset)
{
- struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+ struct sm501_gpio_chip *smchip = gpiochip_get_data(chip);
struct sm501_gpio *smgpio = smchip->ourgpio;
void __iomem *regs = smchip->regbase;
unsigned long bit = 1 << offset;
static int sm501_gpio_output(struct gpio_chip *chip,
unsigned offset, int value)
{
- struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+ struct sm501_gpio_chip *smchip = gpiochip_get_data(chip);
struct sm501_gpio *smgpio = smchip->ourgpio;
unsigned long bit = 1 << offset;
void __iomem *regs = smchip->regbase;
gchip->base = base;
chip->ourgpio = gpio;
- return gpiochip_add(gchip);
+ return gpiochip_add_data(gchip, chip);
}
static int sm501_register_gpio(struct sm501_devdata *sm)
return ret;
}
-static int smsc_i2c_remove(struct i2c_client *i2c)
-{
- struct smsc *smsc = i2c_get_clientdata(i2c);
-
- mfd_remove_devices(smsc->dev);
-
- return 0;
-}
-
static const struct i2c_device_id smsc_i2c_id[] = {
{ "smscece1099", 0},
{},
.name = "smsc",
},
.probe = smsc_i2c_probe,
- .remove = smsc_i2c_remove,
.id_table = smsc_i2c_id,
};
stw481x_cells[i].pdata_size = sizeof(*stw481x);
}
- ret = mfd_add_devices(&client->dev, 0, stw481x_cells,
- ARRAY_SIZE(stw481x_cells), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(&client->dev, 0, stw481x_cells,
+ ARRAY_SIZE(stw481x_cells), NULL, 0, NULL);
if (ret)
return ret;
return ret;
}
-static int stw481x_remove(struct i2c_client *client)
-{
- mfd_remove_devices(&client->dev);
- return 0;
-}
-
/*
* This ID table is completely unused, as this is a pure
* device-tree probed driver, but it has to be here due to
.of_match_table = stw481x_match,
},
.probe = stw481x_probe,
- .remove = stw481x_remove,
.id_table = stw481x_id,
};
#include <linux/mfd/core.h>
#include <linux/mfd/tmio.h>
#include <linux/mfd/tc6393xb.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/slab.h>
#define SCR_REVID 0x08 /* b Revision ID */
static int tc6393xb_gpio_get(struct gpio_chip *chip,
unsigned offset)
{
- struct tc6393xb *tc6393xb = container_of(chip, struct tc6393xb, gpio);
+ struct tc6393xb *tc6393xb = gpiochip_get_data(chip);
/* XXX: does dsr also represent inputs? */
return !!(tmio_ioread8(tc6393xb->scr + SCR_GPO_DSR(offset / 8))
static void __tc6393xb_gpio_set(struct gpio_chip *chip,
unsigned offset, int value)
{
- struct tc6393xb *tc6393xb = container_of(chip, struct tc6393xb, gpio);
+ struct tc6393xb *tc6393xb = gpiochip_get_data(chip);
u8 dsr;
dsr = tmio_ioread8(tc6393xb->scr + SCR_GPO_DSR(offset / 8));
static void tc6393xb_gpio_set(struct gpio_chip *chip,
unsigned offset, int value)
{
- struct tc6393xb *tc6393xb = container_of(chip, struct tc6393xb, gpio);
+ struct tc6393xb *tc6393xb = gpiochip_get_data(chip);
unsigned long flags;
spin_lock_irqsave(&tc6393xb->lock, flags);
static int tc6393xb_gpio_direction_input(struct gpio_chip *chip,
unsigned offset)
{
- struct tc6393xb *tc6393xb = container_of(chip, struct tc6393xb, gpio);
+ struct tc6393xb *tc6393xb = gpiochip_get_data(chip);
unsigned long flags;
u8 doecr;
static int tc6393xb_gpio_direction_output(struct gpio_chip *chip,
unsigned offset, int value)
{
- struct tc6393xb *tc6393xb = container_of(chip, struct tc6393xb, gpio);
+ struct tc6393xb *tc6393xb = gpiochip_get_data(chip);
unsigned long flags;
u8 doecr;
tc6393xb->gpio.direction_input = tc6393xb_gpio_direction_input;
tc6393xb->gpio.direction_output = tc6393xb_gpio_direction_output;
- return gpiochip_add(&tc6393xb->gpio);
+ return gpiochip_add_data(&tc6393xb->gpio, tc6393xb);
}
/*--------------------------------------------------------------------------*/
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/err.h>
-#include <linux/regulator/driver.h>
#include <linux/mfd/core.h>
#include <linux/mfd/tps6105x.h>
#include <linux/i2c/tps65010.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
/*-------------------------------------------------------------------------*/
if (offset < 4) {
struct tps65010 *tps;
- tps = container_of(chip, struct tps65010, chip);
+ tps = gpiochip_get_data(chip);
if (!(tps->outmask & (1 << offset)))
return -EINVAL;
tps65010_set_gpio_out_value(offset + 1, value);
int value;
struct tps65010 *tps;
- tps = container_of(chip, struct tps65010, chip);
+ tps = gpiochip_get_data(chip);
if (offset < 4) {
value = i2c_smbus_read_byte_data(tps->client, TPS_DEFGPIO);
tps->chip.ngpio = 7;
tps->chip.can_sleep = 1;
- status = gpiochip_add(&tps->chip);
+ status = gpiochip_add_data(&tps->chip, tps);
if (status < 0)
dev_err(&client->dev, "can't add gpiochip, err %d\n",
status);
tps6507x->read_dev = tps6507x_i2c_read_device;
tps6507x->write_dev = tps6507x_i2c_write_device;
- return mfd_add_devices(tps6507x->dev, -1, tps6507x_devs,
- ARRAY_SIZE(tps6507x_devs), NULL, 0, NULL);
-}
-
-static int tps6507x_i2c_remove(struct i2c_client *i2c)
-{
- struct tps6507x_dev *tps6507x = i2c_get_clientdata(i2c);
-
- mfd_remove_devices(tps6507x->dev);
- return 0;
+ return devm_mfd_add_devices(tps6507x->dev, -1, tps6507x_devs,
+ ARRAY_SIZE(tps6507x_devs), NULL, 0, NULL);
}
static const struct i2c_device_id tps6507x_i2c_id[] = {
.of_match_table = of_match_ptr(tps6507x_of_match),
},
.probe = tps6507x_i2c_probe,
- .remove = tps6507x_i2c_remove,
.id_table = tps6507x_i2c_id,
};
return ret;
}
- ret = mfd_add_devices(tps->dev, -1, tps65217s,
- ARRAY_SIZE(tps65217s), NULL, 0, NULL);
+ ret = devm_mfd_add_devices(tps->dev, -1, tps65217s,
+ ARRAY_SIZE(tps65217s), NULL, 0, NULL);
if (ret < 0) {
dev_err(tps->dev, "mfd_add_devices failed: %d\n", ret);
return ret;
return 0;
}
-static int tps65217_remove(struct i2c_client *client)
-{
- struct tps65217 *tps = i2c_get_clientdata(client);
-
- mfd_remove_devices(tps->dev);
-
- return 0;
-}
-
static const struct i2c_device_id tps65217_id_table[] = {
{"tps65217", TPS65217},
{ /* sentinel */ }
},
.id_table = tps65217_id_table,
.probe = tps65217_probe,
- .remove = tps65217_remove,
};
static int __init tps65217_init(void)
}
tps65910->chip_irq = irq;
- ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq,
- IRQF_ONESHOT, pdata->irq_base,
- tps6591x_irqs_chip, &tps65910->irq_data);
+ ret = devm_regmap_add_irq_chip(tps65910->dev, tps65910->regmap,
+ tps65910->chip_irq,
+ IRQF_ONESHOT, pdata->irq_base,
+ tps6591x_irqs_chip, &tps65910->irq_data);
if (ret < 0) {
dev_warn(tps65910->dev, "Failed to add irq_chip %d\n", ret);
tps65910->chip_irq = 0;
return ret;
}
-static int tps65910_irq_exit(struct tps65910 *tps65910)
-{
- if (tps65910->chip_irq > 0)
- regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data);
- return 0;
-}
-
static bool is_volatile_reg(struct device *dev, unsigned int reg)
{
struct tps65910 *tps65910 = dev_get_drvdata(dev);
pm_power_off = tps65910_power_off;
}
- ret = mfd_add_devices(tps65910->dev, -1,
- tps65910s, ARRAY_SIZE(tps65910s),
- NULL, 0,
- regmap_irq_get_domain(tps65910->irq_data));
+ ret = devm_mfd_add_devices(tps65910->dev, -1,
+ tps65910s, ARRAY_SIZE(tps65910s),
+ NULL, 0,
+ regmap_irq_get_domain(tps65910->irq_data));
if (ret < 0) {
dev_err(&i2c->dev, "mfd_add_devices failed: %d\n", ret);
- tps65910_irq_exit(tps65910);
return ret;
}
return ret;
}
-static int tps65910_i2c_remove(struct i2c_client *i2c)
-{
- struct tps65910 *tps65910 = i2c_get_clientdata(i2c);
-
- tps65910_irq_exit(tps65910);
- mfd_remove_devices(tps65910->dev);
-
- return 0;
-}
-
static const struct i2c_device_id tps65910_i2c_id[] = {
{ "tps65910", TPS65910 },
{ "tps65911", TPS65911 },
.of_match_table = of_match_ptr(tps65910_of_match),
},
.probe = tps65910_i2c_probe,
- .remove = tps65910_i2c_remove,
.id_table = tps65910_i2c_id,
};
/*
- * linux/drivers/i2c/chips/twl4030-power.c
*
* Handle TWL4030 Power initialization
*
if (twl6040->power_count++)
goto out;
- clk_prepare_enable(twl6040->clk32k);
+ ret = clk_prepare_enable(twl6040->clk32k);
+ if (ret) {
+ twl6040->power_count = 0;
+ goto out;
+ }
/* Allow writes to the chip */
regcache_cache_only(twl6040->regmap, false);
/* use automatic power-up sequence */
ret = twl6040_power_up_automatic(twl6040);
if (ret) {
+ clk_disable_unprepare(twl6040->clk32k);
twl6040->power_count = 0;
goto out;
}
/* use manual power-up sequence */
ret = twl6040_power_up_manual(twl6040);
if (ret) {
+ clk_disable_unprepare(twl6040->clk32k);
twl6040->power_count = 0;
goto out;
}
#include <linux/mutex.h>
#include <linux/mfd/ucb1x00.h>
#include <linux/pm.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
static DEFINE_MUTEX(ucb1x00_mutex);
static LIST_HEAD(ucb1x00_drivers);
static void ucb1x00_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
{
- struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+ struct ucb1x00 *ucb = gpiochip_get_data(chip);
unsigned long flags;
spin_lock_irqsave(&ucb->io_lock, flags);
static int ucb1x00_gpio_get(struct gpio_chip *chip, unsigned offset)
{
- struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+ struct ucb1x00 *ucb = gpiochip_get_data(chip);
unsigned val;
ucb1x00_enable(ucb);
static int ucb1x00_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
{
- struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+ struct ucb1x00 *ucb = gpiochip_get_data(chip);
unsigned long flags;
spin_lock_irqsave(&ucb->io_lock, flags);
static int ucb1x00_gpio_direction_output(struct gpio_chip *chip, unsigned offset
, int value)
{
- struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+ struct ucb1x00 *ucb = gpiochip_get_data(chip);
unsigned long flags;
unsigned old, mask = 1 << offset;
static int ucb1x00_to_irq(struct gpio_chip *chip, unsigned offset)
{
- struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+ struct ucb1x00 *ucb = gpiochip_get_data(chip);
return ucb->irq_base > 0 ? ucb->irq_base + offset : -ENXIO;
}
ucb->gpio.direction_input = ucb1x00_gpio_direction_input;
ucb->gpio.direction_output = ucb1x00_gpio_direction_output;
ucb->gpio.to_irq = ucb1x00_to_irq;
- ret = gpiochip_add(&ucb->gpio);
+ ret = gpiochip_add_data(&ucb->gpio, ucb);
if (ret)
goto err_gpio_add;
} else
bgpio_init(mmc_gpio_chip, &pdev->dev, 0x4, base + SYS_MCI,
NULL, NULL, NULL, NULL, 0);
mmc_gpio_chip->ngpio = 2;
- gpiochip_add(mmc_gpio_chip);
+ gpiochip_add_data(mmc_gpio_chip, NULL);
return mfd_add_devices(&pdev->dev, PLATFORM_DEVID_AUTO,
vexpress_sysreg_cells,
return 0;
}
-static int wl1273_core_remove(struct i2c_client *client)
-{
- dev_dbg(&client->dev, "%s\n", __func__);
-
- mfd_remove_devices(&client->dev);
-
- return 0;
-}
-
static int wl1273_core_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
dev_dbg(&client->dev, "%s: number of children: %d.\n",
__func__, children);
- r = mfd_add_devices(&client->dev, -1, core->cells,
- children, NULL, 0, NULL);
+ r = devm_mfd_add_devices(&client->dev, -1, core->cells,
+ children, NULL, 0, NULL);
if (r)
goto err;
},
.probe = wl1273_core_probe,
.id_table = wl1273_driver_id_table,
- .remove = wl1273_core_remove,
};
static int __init wl1273_core_init(void)
case ARIZONA_AOD_IRQ_RAW_STATUS:
case ARIZONA_FX_CTRL2:
case ARIZONA_ASRC_STATUS:
+ case ARIZONA_CLOCK_CONTROL:
case ARIZONA_DSP_STATUS:
case ARIZONA_DSP1_STATUS_1:
case ARIZONA_DSP1_STATUS_2:
}
}
-/**
- * wm8400_reg_read - Single register read
- *
- * @wm8400: Pointer to wm8400 control structure
- * @reg: Register to read
- *
- * @return Read value
- */
-u16 wm8400_reg_read(struct wm8400 *wm8400, u8 reg)
-{
- unsigned int val;
- int ret;
-
- ret = regmap_read(wm8400->regmap, reg, &val);
- if (ret < 0)
- return ret;
-
- return val;
-}
-EXPORT_SYMBOL_GPL(wm8400_reg_read);
-
int wm8400_block_read(struct wm8400 *wm8400, u8 reg, int count, u16 *data)
{
return regmap_bulk_read(wm8400->regmap, reg, data, count);
.pdata_size = sizeof(*wm8400),
};
- return mfd_add_devices(wm8400->dev, -1, &cell, 1, NULL, 0, NULL);
+ return devm_mfd_add_devices(wm8400->dev, -1, &cell, 1, NULL, 0, NULL);
}
/*
ret = wm8400_register_codec(wm8400);
if (ret != 0) {
dev_err(wm8400->dev, "Failed to register codec\n");
- goto err_children;
+ return ret;
}
if (pdata && pdata->platform_init) {
if (ret != 0) {
dev_err(wm8400->dev, "Platform init failed: %d\n",
ret);
- goto err_children;
+ return ret;
}
} else
dev_warn(wm8400->dev, "No platform initialisation supplied\n");
return 0;
-
-err_children:
- mfd_remove_devices(wm8400->dev);
- return ret;
-}
-
-static void wm8400_release(struct wm8400 *wm8400)
-{
- mfd_remove_devices(wm8400->dev);
}
static const struct regmap_config wm8400_regmap_config = {
}
EXPORT_SYMBOL_GPL(wm8400_reset_codec_reg_cache);
-#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
+#if IS_ENABLED(CONFIG_I2C)
static int wm8400_i2c_probe(struct i2c_client *i2c,
const struct i2c_device_id *id)
{
return wm8400_init(wm8400, dev_get_platdata(&i2c->dev));
}
-static int wm8400_i2c_remove(struct i2c_client *i2c)
-{
- struct wm8400 *wm8400 = i2c_get_clientdata(i2c);
-
- wm8400_release(wm8400);
-
- return 0;
-}
-
static const struct i2c_device_id wm8400_i2c_id[] = {
{ "wm8400", 0 },
{ }
.name = "WM8400",
},
.probe = wm8400_i2c_probe,
- .remove = wm8400_i2c_remove,
.id_table = wm8400_i2c_id,
};
#endif
{
int ret = -ENODEV;
-#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
+#if IS_ENABLED(CONFIG_I2C)
ret = i2c_add_driver(&wm8400_i2c_driver);
if (ret != 0)
pr_err("Failed to register I2C driver: %d\n", ret);
static void __exit wm8400_module_exit(void)
{
-#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
+#if IS_ENABLED(CONFIG_I2C)
i2c_del_driver(&wm8400_i2c_driver);
#endif
}
}
EXPORT_SYMBOL_GPL(cxl_get_context);
-struct device *cxl_get_phys_dev(struct pci_dev *dev)
-{
- struct cxl_afu *afu;
-
- afu = cxl_pci_to_afu(dev);
-
- return afu->adapter->dev.parent;
-}
-
int cxl_release_context(struct cxl_context *ctx)
{
if (ctx->status >= STARTED)
ctx->pid = get_task_pid(task, PIDTYPE_PID);
ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
kernel = false;
+ ctx->real_mode = false;
}
cxl_ctx_get();
}
EXPORT_SYMBOL_GPL(cxl_set_master);
+int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode)
+{
+ if (ctx->status == STARTED) {
+ /*
+ * We could potentially update the PE and issue an update LLCMD
+ * to support this, but it doesn't seem to have a good use case
+ * since it's trivial to just create a second kernel context
+ * with different translation modes, so until someone convinces
+ * me otherwise:
+ */
+ return -EBUSY;
+ }
+
+ ctx->real_mode = real_mode;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_translation_mode);
+
/* wrappers around afu_* file ops which are EXPORTED */
int cxl_fd_open(struct inode *inode, struct file *file)
{
if (ctx->kernelapi)
kfree(ctx->mapping);
- if (ctx->irq_bitmap)
- kfree(ctx->irq_bitmap);
+ kfree(ctx->irq_bitmap);
/* Drop ref to the afu device taken during cxl_context_init */
cxl_afu_put(ctx->afu);
#define CXL_PSL_SR_An_MP (1ull << (63-62)) /* Master Process */
#define CXL_PSL_SR_An_LE (1ull << (63-63)) /* Little Endian */
-/****** CXL_PSL_LLCMD_An ****************************************************/
-#define CXL_LLCMD_TERMINATE 0x0001000000000000ULL
-#define CXL_LLCMD_REMOVE 0x0002000000000000ULL
-#define CXL_LLCMD_SUSPEND 0x0003000000000000ULL
-#define CXL_LLCMD_RESUME 0x0004000000000000ULL
-#define CXL_LLCMD_ADD 0x0005000000000000ULL
-#define CXL_LLCMD_UPDATE 0x0006000000000000ULL
-#define CXL_LLCMD_HANDLE_MASK 0x000000000000ffffULL
-
/****** CXL_PSL_ID_An ****************************************************/
#define CXL_PSL_ID_An_F (1ull << (63-31))
#define CXL_PSL_ID_An_L (1ull << (63-30))
};
struct cxl_afu_guest {
+ struct cxl_afu *parent;
u64 handle;
phys_addr_t p2n_phys;
u64 p2n_size;
int max_ints;
- struct mutex recovery_lock;
+ bool handle_err;
+ struct delayed_work work_err;
int previous_state;
};
bool pe_inserted;
bool master;
bool kernel;
+ bool real_mode;
bool pending_irq;
bool pending_fault;
bool pending_afu_err;
bool perst_loads_image;
bool perst_select_user;
bool perst_same_image;
+ bool psl_timebase_synced;
};
int cxl_pci_alloc_one_irq(struct cxl *adapter);
* update_mmu_cache() will not have loaded the hash since current->trap
* is not a 0x400 or 0x300, so just call hash_page_mm() here.
*/
- access = _PAGE_PRESENT;
+ access = _PAGE_PRESENT | _PAGE_READ;
if (dsisr & CXL_PSL_DSISR_An_S)
- access |= _PAGE_RW;
- if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
- access |= _PAGE_USER;
+ access |= _PAGE_WRITE;
+
+ access |= _PAGE_PRIVILEGED;
+ if ((!ctx->kernel) || (REGION_ID(dar) == USER_REGION_ID))
+ access &= ~_PAGE_PRIVILEGED;
if (dsisr & DSISR_NOHPTE)
inv_flags |= HPTE_NOHPTE_UPDATE;
u64 state;
int rc = 0;
+ if (!afu)
+ return -EIO;
+
rc = cxl_h_read_error_state(afu->guest->handle, &state);
if (!rc) {
WARN_ON(state != H_STATE_NORMAL &&
elem->common.sstp0 = cpu_to_be64(ctx->sstp0);
elem->common.sstp1 = cpu_to_be64(ctx->sstp1);
+
+ /*
+ * Ensure we have at least one interrupt allocated to take faults for
+ * kernel contexts that may not have allocated any AFU IRQs at all:
+ */
+ if (ctx->irqs.range[0] == 0) {
+ rc = afu_register_irqs(ctx, 0);
+ if (rc)
+ goto out_free;
+ }
+
for (r = 0; r < CXL_IRQ_RANGES; r++) {
for (i = 0; i < ctx->irqs.range[r]; i++) {
if (r == 0 && i == 0) {
enable_afu_irqs(ctx);
}
+out_free:
free_page((u64)elem);
return rc;
}
{
pr_devel("in %s\n", __func__);
+ if (ctx->real_mode)
+ return -EPERM;
+
ctx->kernel = kernel;
if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
return attach_afu_directed(ctx, wed, amr);
switch (cur_state) {
case H_STATE_NORMAL:
afu->guest->previous_state = cur_state;
- rc = 1;
break;
case H_STATE_DISABLE:
pci_error_handlers(afu, CXL_SLOT_RESET_EVENT,
pci_channel_io_normal);
pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
- rc = 1;
}
afu->guest->previous_state = 0;
break;
return rc;
}
-static int afu_do_recovery(struct cxl_afu *afu)
+static void afu_handle_errstate(struct work_struct *work)
{
- int rc;
+ struct cxl_afu_guest *afu_guest =
+ container_of(to_delayed_work(work), struct cxl_afu_guest, work_err);
- /* many threads can arrive here, in case of detach_all for example.
- * Only one needs to drive the recovery
- */
- if (mutex_trylock(&afu->guest->recovery_lock)) {
- rc = afu_update_state(afu);
- mutex_unlock(&afu->guest->recovery_lock);
- return rc;
- }
- return 0;
+ if (!afu_update_state(afu_guest->parent) &&
+ afu_guest->previous_state == H_STATE_PERM_UNAVAILABLE)
+ return;
+
+ if (afu_guest->handle_err == true)
+ schedule_delayed_work(&afu_guest->work_err,
+ msecs_to_jiffies(3000));
}
static bool guest_link_ok(struct cxl *cxl, struct cxl_afu *afu)
{
int state;
- if (afu) {
- if (afu_read_error_state(afu, &state) ||
- state != H_STATE_NORMAL) {
- if (afu_do_recovery(afu) > 0) {
- /* check again in case we've just fixed it */
- if (!afu_read_error_state(afu, &state) &&
- state == H_STATE_NORMAL)
- return true;
- }
- return false;
- }
+ if (afu && (!afu_read_error_state(afu, &state))) {
+ if (state == H_STATE_NORMAL)
+ return true;
}
- return true;
+ return false;
}
static int afu_properties_look_ok(struct cxl_afu *afu)
return -ENOMEM;
}
- mutex_init(&afu->guest->recovery_lock);
-
if ((rc = dev_set_name(&afu->dev, "afu%i.%i",
adapter->adapter_num,
slice)))
afu->enabled = true;
+ /*
+ * wake up the cpu periodically to check the state
+ * of the AFU using "afu" stored in the guest structure.
+ */
+ afu->guest->parent = afu;
+ afu->guest->handle_err = true;
+ INIT_DELAYED_WORK(&afu->guest->work_err, afu_handle_errstate);
+ schedule_delayed_work(&afu->guest->work_err, msecs_to_jiffies(1000));
+
if ((rc = cxl_pci_vphb_add(afu)))
dev_info(&afu->dev, "Can't register vPHB\n");
if (!afu)
return;
+ /* flush and stop pending job */
+ afu->guest->handle_err = false;
+ flush_delayed_work(&afu->guest->work_err);
+
cxl_pci_vphb_remove(afu);
cxl_sysfs_afu_remove(afu);
adapter->dev.release = release_adapter;
dev_set_drvdata(&pdev->dev, adapter);
+ /*
+ * Hypervisor controls PSL timebase initialization (p1 register).
+ * On FW840, PSL is initialized.
+ */
+ adapter->psl_timebase_synced = true;
+
if ((rc = cxl_of_read_adapter_handle(adapter, np)))
goto err1;
int cxl_alloc_spa(struct cxl_afu *afu)
{
+ unsigned spa_size;
+
/* Work out how many pages to allocate */
afu->native->spa_order = 0;
do {
afu->native->spa_order++;
- afu->native->spa_size = (1 << afu->native->spa_order) * PAGE_SIZE;
+ spa_size = (1 << afu->native->spa_order) * PAGE_SIZE;
+
+ if (spa_size > 0x100000) {
+ dev_warn(&afu->dev, "num_of_processes too large for the SPA, limiting to %i (0x%x)\n",
+ afu->native->spa_max_procs, afu->native->spa_size);
+ afu->num_procs = afu->native->spa_max_procs;
+ break;
+ }
+
+ afu->native->spa_size = spa_size;
afu->native->spa_max_procs = spa_max_procs(afu->native->spa_size);
} while (afu->native->spa_max_procs < afu->num_procs);
- WARN_ON(afu->native->spa_size > 0x100000); /* Max size supported by the hardware */
-
if (!(afu->native->spa = (struct cxl_process_element *)
__get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->native->spa_order))) {
pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n");
if (mfspr(SPRN_LPCR) & LPCR_TC)
sr |= CXL_PSL_SR_An_TC;
if (ctx->kernel) {
- sr |= CXL_PSL_SR_An_R | (mfmsr() & MSR_SF);
- sr |= CXL_PSL_SR_An_HV;
+ if (!ctx->real_mode)
+ sr |= CXL_PSL_SR_An_R;
+ sr |= (mfmsr() & MSR_SF) | CXL_PSL_SR_An_HV;
} else {
sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
sr &= ~(CXL_PSL_SR_An_HV);
ctx->elem->common.sstp0 = cpu_to_be64(ctx->sstp0);
ctx->elem->common.sstp1 = cpu_to_be64(ctx->sstp1);
+ /*
+ * Ensure we have the multiplexed PSL interrupt set up to take faults
+ * for kernel contexts that may not have allocated any AFU IRQs at all:
+ */
+ if (ctx->irqs.range[0] == 0) {
+ ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq;
+ ctx->irqs.range[0] = 1;
+ }
+
for (r = 0; r < CXL_IRQ_RANGES; r++) {
ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]);
ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]);
#include <asm/msi_bitmap.h>
#include <asm/pnv-pci.h>
#include <asm/io.h>
+#include <asm/reg.h>
#include "cxl.h"
#include <misc/cxl.h>
#undef show_reg
}
+#define CAPP_UNIT0_ID 0xBA
+#define CAPP_UNIT1_ID 0XBE
+
+static u64 get_capp_unit_id(struct device_node *np)
+{
+ u32 phb_index;
+
+ /*
+ * For chips other than POWER8NVL, we only have CAPP 0,
+ * irrespective of which PHB is used.
+ */
+ if (!pvr_version_is(PVR_POWER8NVL))
+ return CAPP_UNIT0_ID;
+
+ /*
+ * For POWER8NVL, assume CAPP 0 is attached to PHB0 and
+ * CAPP 1 is attached to PHB1.
+ */
+ if (of_property_read_u32(np, "ibm,phb-index", &phb_index))
+ return 0;
+
+ if (phb_index == 0)
+ return CAPP_UNIT0_ID;
+
+ if (phb_index == 1)
+ return CAPP_UNIT1_ID;
+
+ return 0;
+}
+
static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev *dev)
{
struct device_node *np;
const __be32 *prop;
u64 psl_dsnctl;
u64 chipid;
+ u64 capp_unit_id;
if (!(np = pnv_pci_get_phb_node(dev)))
return -ENODEV;
if (!np)
return -ENODEV;
chipid = be32_to_cpup(prop);
+ capp_unit_id = get_capp_unit_id(np);
of_node_put(np);
+ if (!capp_unit_id) {
+ pr_err("cxl: invalid capp unit id\n");
+ return -ENODEV;
+ }
+ psl_dsnctl = 0x0000900000000000ULL; /* pteupd ttype, scdone */
+ psl_dsnctl |= (0x2ULL << (63-38)); /* MMIO hang pulse: 256 us */
/* Tell PSL where to route data to */
- psl_dsnctl = 0x02E8900002000000ULL | (chipid << (63-5));
+ psl_dsnctl |= (chipid << (63-5));
+ psl_dsnctl |= (capp_unit_id << (63-13));
+
cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl);
cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
/* snoop write mask */
#define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6))
#define _2048_250MHZ_CYCLES 1
-static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
{
u64 psl_tb;
int delta;
unsigned int retry = 0;
struct device_node *np;
+ adapter->psl_timebase_synced = false;
+
if (!(np = pnv_pci_get_phb_node(dev)))
- return -ENODEV;
+ return;
/* Do not fail when CAPP timebase sync is not supported by OPAL */
of_node_get(np);
if (! of_get_property(np, "ibm,capp-timebase-sync", NULL)) {
of_node_put(np);
- pr_err("PSL: Timebase sync: OPAL support missing\n");
- return 0;
+ dev_info(&dev->dev, "PSL timebase inactive: OPAL support missing\n");
+ return;
}
of_node_put(np);
do {
msleep(1);
if (retry++ > 5) {
- pr_err("PSL: Timebase sync: giving up!\n");
- return -EIO;
+ dev_info(&dev->dev, "PSL timebase can't synchronize\n");
+ return;
}
psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase);
delta = mftb() - psl_tb;
delta = -delta;
} while (tb_to_ns(delta) > 16000);
- return 0;
+ adapter->psl_timebase_synced = true;
+ return;
}
static int init_implementation_afu_regs(struct cxl_afu *afu)
if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON)))
goto err;
- if ((rc = cxl_setup_psl_timebase(adapter, dev)))
- goto err;
+ /* Ignore error, adapter init is not dependant on timebase sync */
+ cxl_setup_psl_timebase(adapter, dev);
if ((rc = cxl_native_register_psl_err_irq(adapter)))
goto err;
return scnprintf(buf, PAGE_SIZE, "factory\n");
}
+static ssize_t psl_timebase_synced_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
+}
+
static ssize_t reset_adapter_store(struct device *device,
struct device_attribute *attr,
const char *buf, size_t count)
__ATTR_RO(psl_revision),
__ATTR_RO(base_image),
__ATTR_RO(image_loaded),
+ __ATTR_RO(psl_timebase_synced),
__ATTR_RW(load_image_on_perst),
__ATTR_RW(perst_reloads_same_image),
__ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
info->map.name);
return -ENOMEM;
}
- info->map.cached = memremap(info->map.phys, info->map.size,
- MEMREMAP_WB);
+ info->map.cached =
+ ioremap_cached(info->map.phys, info->map.size);
if (!info->map.cached)
printk(KERN_WARNING "Failed to ioremap cached %s\n",
info->map.name);
map_destroy(info->mtd);
iounmap(info->map.virt);
if (info->map.cached)
- memunmap(info->map.cached);
+ iounmap(info->map.cached);
kfree(info);
return 0;
}
if (!nic->rb_pageref || !nic->rb_page)
return;
- atomic_add(nic->rb_pageref, &nic->rb_page->_count);
+ page_ref_add(nic->rb_page, nic->rb_pageref);
nic->rb_pageref = 0;
}
#define T5_ULP_MEMIO_ORDER_V(x) ((x) << T5_ULP_MEMIO_ORDER_S)
#define T5_ULP_MEMIO_ORDER_F T5_ULP_MEMIO_ORDER_V(1U)
+#define T5_ULP_MEMIO_FID_S 4
+#define T5_ULP_MEMIO_FID_M 0x7ff
+#define T5_ULP_MEMIO_FID_V(x) ((x) << T5_ULP_MEMIO_FID_S)
+
/* ulp_mem_io.lock_addr fields */
#define ULP_MEMIO_ADDR_S 0
#define ULP_MEMIO_ADDR_V(x) ((x) << ULP_MEMIO_ADDR_S)
#include <linux/mlx5/cq.h>
#include "mlx5_core.h"
+#define TASKLET_MAX_TIME 2
+#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
+
+void mlx5_cq_tasklet_cb(unsigned long data)
+{
+ unsigned long flags;
+ unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
+ struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
+ struct mlx5_core_cq *mcq;
+ struct mlx5_core_cq *temp;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ list_splice_tail_init(&ctx->list, &ctx->process_list);
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ list_for_each_entry_safe(mcq, temp, &ctx->process_list,
+ tasklet_ctx.list) {
+ list_del_init(&mcq->tasklet_ctx.list);
+ mcq->tasklet_ctx.comp(mcq);
+ if (atomic_dec_and_test(&mcq->refcount))
+ complete(&mcq->free);
+ if (time_after(jiffies, end))
+ break;
+ }
+
+ if (!list_empty(&ctx->process_list))
+ tasklet_schedule(&ctx->task);
+}
+
+static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
+{
+ unsigned long flags;
+ struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
+
+ spin_lock_irqsave(&tasklet_ctx->lock, flags);
+ /* When migrating CQs between EQs will be implemented, please note
+ * that you need to sync this point. It is possible that
+ * while migrating a CQ, completions on the old EQs could
+ * still arrive.
+ */
+ if (list_empty_careful(&cq->tasklet_ctx.list)) {
+ atomic_inc(&cq->refcount);
+ list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
+ }
+ spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
+}
+
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
{
struct mlx5_core_cq *cq;
struct mlx5_create_cq_mbox_out out;
struct mlx5_destroy_cq_mbox_in din;
struct mlx5_destroy_cq_mbox_out dout;
+ int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+ c_eqn);
+ struct mlx5_eq *eq;
+
+ eq = mlx5_eqn2eq(dev, eqn);
+ if (IS_ERR(eq))
+ return PTR_ERR(eq);
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ);
memset(&out, 0, sizeof(out));
cq->arm_sn = 0;
atomic_set(&cq->refcount, 1);
init_completion(&cq->free);
+ if (!cq->comp)
+ cq->comp = mlx5_add_cq_to_tasklet;
+ /* assuming CQ will be deleted before the EQ */
+ cq->tasklet_ctx.priv = &eq->tasklet_ctx;
+ INIT_LIST_HEAD(&cq->tasklet_ctx.list);
spin_lock_irq(&table->lock);
err = radix_tree_insert(&table->tree, cq->cqn, cq);
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i)))
goto err_unmap;
- atomic_add(mlx5e_mpwqe_strides_per_page(rq),
- &wi->umr.dma_info[i].page->_count);
+ page_ref_add(wi->umr.dma_info[i].page,
+ mlx5e_mpwqe_strides_per_page(rq));
wi->skbs_frags[i] = 0;
}
while (--i >= 0) {
dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
- atomic_sub(mlx5e_mpwqe_strides_per_page(rq),
- &wi->umr.dma_info[i].page->_count);
+ page_ref_sub(wi->umr.dma_info[i].page,
+ mlx5e_mpwqe_strides_per_page(rq));
put_page(wi->umr.dma_info[i].page);
}
dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE);
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
- atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
- &wi->umr.dma_info[i].page->_count);
+ page_ref_sub(wi->umr.dma_info[i].page,
+ mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]);
put_page(wi->umr.dma_info[i].page);
}
dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE);
*/
split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER);
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
- atomic_add(mlx5e_mpwqe_strides_per_page(rq),
- &wi->dma_info.page[i]._count);
+ page_ref_add(&wi->dma_info.page[i],
+ mlx5e_mpwqe_strides_per_page(rq));
wi->skbs_frags[i] = 0;
}
dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz,
PCI_DMA_FROMDEVICE);
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
- atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
- &wi->dma_info.page[i]._count);
+ page_ref_sub(&wi->dma_info.page[i],
+ mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]);
put_page(&wi->dma_info.page[i]);
}
}
struct mlx5_eqe *eqe;
int eqes_found = 0;
int set_ci = 0;
- u32 cqn;
+ u32 cqn = -1;
u32 rsn;
u8 port;
eq_update_ci(eq, 1);
+ if (cqn != -1)
+ tasklet_schedule(&eq->tasklet_ctx.task);
+
return eqes_found;
}
if (err)
goto err_irq;
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+ (unsigned long)&eq->tasklet_ctx);
+
/* EQs are created in ARMED state
*/
eq_update_ci(eq, 1);
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
synchronize_irq(eq->irqn);
+ tasklet_disable(&eq->tasklet_ctx.task);
mlx5_buf_free(dev, &eq->buf);
return err;
}
EXPORT_SYMBOL(mlx5_vector2eqn);
+struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq;
+
+ spin_lock(&table->lock);
+ list_for_each_entry(eq, &table->comp_eqs_list, list)
+ if (eq->eqn == eqn) {
+ spin_unlock(&table->lock);
+ return eq;
+ }
+
+ spin_unlock(&table->lock);
+
+ return ERR_PTR(-ENOENT);
+}
+
static void free_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = &dev->priv.eq_table;
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev);
u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
+struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
+void mlx5_cq_tasklet_cb(unsigned long data);
void mlx5e_init(void);
void mlx5e_cleanup(void);
* network stack to take the ownership of the page
* which can be recycled multiple times by the driver.
*/
- atomic_inc(&curr_cons->data->_count);
+ page_ref_inc(curr_cons->data);
qede_reuse_page(edev, rxq, curr_cons);
}
/* Incr page ref count to reuse on allocation failure
* so that it doesn't get freed while freeing SKB.
*/
- atomic_inc(¤t_bd->data->_count);
+ page_ref_inc(current_bd->data);
goto out;
}
* freeing SKB.
*/
- atomic_inc(&sw_rx_data->data->_count);
+ page_ref_inc(sw_rx_data->data);
rxq->rx_alloc_errors++;
qede_recycle_rx_bd_ring(rxq, edev,
fp_cqe->bd_num);
* before booting secondary cores. This function uses arch_match_cpu_phys_id
* which can be overridden by architecture specific implementation.
*
- * Returns a node pointer for the logical cpu if found, else NULL.
+ * Returns a node pointer for the logical cpu with refcount incremented, use
+ * of_node_put() on it when done. Returns NULL if not found.
*/
struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
{
printk("\n");
}
-static int __of_parse_phandle_with_args(const struct device_node *np,
- const char *list_name,
- const char *cells_name,
- int cell_count, int index,
- struct of_phandle_args *out_args)
+int of_phandle_iterator_init(struct of_phandle_iterator *it,
+ const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int cell_count)
{
- const __be32 *list, *list_end;
- int rc = 0, size, cur_index = 0;
- uint32_t count = 0;
- struct device_node *node = NULL;
- phandle phandle;
+ const __be32 *list;
+ int size;
+
+ memset(it, 0, sizeof(*it));
- /* Retrieve the phandle list property */
list = of_get_property(np, list_name, &size);
if (!list)
return -ENOENT;
- list_end = list + size / sizeof(*list);
- /* Loop over the phandles until all the requested entry is found */
- while (list < list_end) {
- rc = -EINVAL;
- count = 0;
+ it->cells_name = cells_name;
+ it->cell_count = cell_count;
+ it->parent = np;
+ it->list_end = list + size / sizeof(*list);
+ it->phandle_end = list;
+ it->cur = list;
+
+ return 0;
+}
+
+int of_phandle_iterator_next(struct of_phandle_iterator *it)
+{
+ uint32_t count = 0;
+
+ if (it->node) {
+ of_node_put(it->node);
+ it->node = NULL;
+ }
+
+ if (!it->cur || it->phandle_end >= it->list_end)
+ return -ENOENT;
+
+ it->cur = it->phandle_end;
+
+ /* If phandle is 0, then it is an empty entry with no arguments. */
+ it->phandle = be32_to_cpup(it->cur++);
+
+ if (it->phandle) {
/*
- * If phandle is 0, then it is an empty entry with no
- * arguments. Skip forward to the next entry.
+ * Find the provider node and parse the #*-cells property to
+ * determine the argument length.
*/
- phandle = be32_to_cpup(list++);
- if (phandle) {
- /*
- * Find the provider node and parse the #*-cells
- * property to determine the argument length.
- *
- * This is not needed if the cell count is hard-coded
- * (i.e. cells_name not set, but cell_count is set),
- * except when we're going to return the found node
- * below.
- */
- if (cells_name || cur_index == index) {
- node = of_find_node_by_phandle(phandle);
- if (!node) {
- pr_err("%s: could not find phandle\n",
- np->full_name);
- goto err;
- }
- }
+ it->node = of_find_node_by_phandle(it->phandle);
- if (cells_name) {
- if (of_property_read_u32(node, cells_name,
- &count)) {
- pr_err("%s: could not get %s for %s\n",
- np->full_name, cells_name,
- node->full_name);
- goto err;
- }
- } else {
- count = cell_count;
+ if (it->cells_name) {
+ if (!it->node) {
+ pr_err("%s: could not find phandle\n",
+ it->parent->full_name);
+ goto err;
}
- /*
- * Make sure that the arguments actually fit in the
- * remaining property data length
- */
- if (list + count > list_end) {
- pr_err("%s: arguments longer than property\n",
- np->full_name);
+ if (of_property_read_u32(it->node, it->cells_name,
+ &count)) {
+ pr_err("%s: could not get %s for %s\n",
+ it->parent->full_name,
+ it->cells_name,
+ it->node->full_name);
goto err;
}
+ } else {
+ count = it->cell_count;
+ }
+
+ /*
+ * Make sure that the arguments actually fit in the remaining
+ * property data length
+ */
+ if (it->cur + count > it->list_end) {
+ pr_err("%s: arguments longer than property\n",
+ it->parent->full_name);
+ goto err;
}
+ }
+
+ it->phandle_end = it->cur + count;
+ it->cur_count = count;
+
+ return 0;
+
+err:
+ if (it->node) {
+ of_node_put(it->node);
+ it->node = NULL;
+ }
+
+ return -EINVAL;
+}
+
+int of_phandle_iterator_args(struct of_phandle_iterator *it,
+ uint32_t *args,
+ int size)
+{
+ int i, count;
+
+ count = it->cur_count;
+
+ if (WARN_ON(size < count))
+ count = size;
+
+ for (i = 0; i < count; i++)
+ args[i] = be32_to_cpup(it->cur++);
+
+ return count;
+}
+
+static int __of_parse_phandle_with_args(const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int cell_count, int index,
+ struct of_phandle_args *out_args)
+{
+ struct of_phandle_iterator it;
+ int rc, cur_index = 0;
+ /* Loop over the phandles until all the requested entry is found */
+ of_for_each_phandle(&it, rc, np, list_name, cells_name, cell_count) {
/*
- * All of the error cases above bail out of the loop, so at
+ * All of the error cases bail out of the loop, so at
* this point, the parsing is successful. If the requested
* index matches, then fill the out_args structure and return,
* or return -ENOENT for an empty entry.
*/
rc = -ENOENT;
if (cur_index == index) {
- if (!phandle)
+ if (!it.phandle)
goto err;
if (out_args) {
- int i;
- if (WARN_ON(count > MAX_PHANDLE_ARGS))
- count = MAX_PHANDLE_ARGS;
- out_args->np = node;
- out_args->args_count = count;
- for (i = 0; i < count; i++)
- out_args->args[i] = be32_to_cpup(list++);
+ int c;
+
+ c = of_phandle_iterator_args(&it,
+ out_args->args,
+ MAX_PHANDLE_ARGS);
+ out_args->np = it.node;
+ out_args->args_count = c;
} else {
- of_node_put(node);
+ of_node_put(it.node);
}
/* Found it! return success */
return 0;
}
- of_node_put(node);
- node = NULL;
- list += count;
cur_index++;
}
* Unlock node before returning result; will be one of:
* -ENOENT : index is for empty phandle
* -EINVAL : parsing error on data
- * [1..n] : Number of phandle (count mode; when index = -1)
*/
- rc = index < 0 ? cur_index : -ENOENT;
+
err:
- if (node)
- of_node_put(node);
+ if (it.node)
+ of_node_put(it.node);
return rc;
}
int of_count_phandle_with_args(const struct device_node *np, const char *list_name,
const char *cells_name)
{
- return __of_parse_phandle_with_args(np, list_name, cells_name, 0, -1,
- NULL);
+ struct of_phandle_iterator it;
+ int rc, cur_index = 0;
+
+ rc = of_phandle_iterator_init(&it, np, list_name, cells_name, 0);
+ if (rc)
+ return rc;
+
+ while ((rc = of_phandle_iterator_next(&it)) == 0)
+ cur_index += 1;
+
+ if (rc != -ENOENT)
+ return rc;
+
+ return cur_index;
}
EXPORT_SYMBOL(of_count_phandle_with_args);
unsigned long flags;
int rc;
+ if (!prop)
+ return -ENODEV;
+
mutex_lock(&of_mutex);
raw_spin_lock_irqsave(&devtree_lock, flags);
return rc;
}
+EXPORT_SYMBOL_GPL(of_detach_node);
/**
* of_node_release() - release a dynamically allocated node
case OF_RECONFIG_UPDATE_PROPERTY:
rce->old_prop = ce->prop;
rce->prop = ce->old_prop;
+ /* update was used but original property did not exist */
+ if (!rce->prop) {
+ rce->action = OF_RECONFIG_REMOVE_PROPERTY;
+ rce->prop = ce->prop;
+ }
break;
}
}
return res;
}
-/**
- * unflatten_dt_node - Alloc and populate a device_node from the flat tree
- * @blob: The parent device tree blob
- * @mem: Memory chunk to use for allocating device nodes and properties
- * @poffset: pointer to node in flat tree
- * @dad: Parent struct device_node
- * @nodepp: The device_node tree created by the call
- * @fpsize: Size of the node path up at the current depth.
- * @dryrun: If true, do not allocate device nodes but still calculate needed
- * memory size
- */
-static void * unflatten_dt_node(const void *blob,
- void *mem,
- int *poffset,
- struct device_node *dad,
- struct device_node **nodepp,
- unsigned long fpsize,
+static void populate_properties(const void *blob,
+ int offset,
+ void **mem,
+ struct device_node *np,
+ const char *nodename,
bool dryrun)
{
- const __be32 *p;
+ struct property *pp, **pprev = NULL;
+ int cur;
+ bool has_name = false;
+
+ pprev = &np->properties;
+ for (cur = fdt_first_property_offset(blob, offset);
+ cur >= 0;
+ cur = fdt_next_property_offset(blob, cur)) {
+ const __be32 *val;
+ const char *pname;
+ u32 sz;
+
+ val = fdt_getprop_by_offset(blob, cur, &pname, &sz);
+ if (!val) {
+ pr_warn("%s: Cannot locate property at 0x%x\n",
+ __func__, cur);
+ continue;
+ }
+
+ if (!pname) {
+ pr_warn("%s: Cannot find property name at 0x%x\n",
+ __func__, cur);
+ continue;
+ }
+
+ if (!strcmp(pname, "name"))
+ has_name = true;
+
+ pp = unflatten_dt_alloc(mem, sizeof(struct property),
+ __alignof__(struct property));
+ if (dryrun)
+ continue;
+
+ /* We accept flattened tree phandles either in
+ * ePAPR-style "phandle" properties, or the
+ * legacy "linux,phandle" properties. If both
+ * appear and have different values, things
+ * will get weird. Don't do that.
+ */
+ if (!strcmp(pname, "phandle") ||
+ !strcmp(pname, "linux,phandle")) {
+ if (!np->phandle)
+ np->phandle = be32_to_cpup(val);
+ }
+
+ /* And we process the "ibm,phandle" property
+ * used in pSeries dynamic device tree
+ * stuff
+ */
+ if (!strcmp(pname, "ibm,phandle"))
+ np->phandle = be32_to_cpup(val);
+
+ pp->name = (char *)pname;
+ pp->length = sz;
+ pp->value = (__be32 *)val;
+ *pprev = pp;
+ pprev = &pp->next;
+ }
+
+ /* With version 0x10 we may not have the name property,
+ * recreate it here from the unit name if absent
+ */
+ if (!has_name) {
+ const char *p = nodename, *ps = p, *pa = NULL;
+ int len;
+
+ while (*p) {
+ if ((*p) == '@')
+ pa = p;
+ else if ((*p) == '/')
+ ps = p + 1;
+ p++;
+ }
+
+ if (pa < ps)
+ pa = p;
+ len = (pa - ps) + 1;
+ pp = unflatten_dt_alloc(mem, sizeof(struct property) + len,
+ __alignof__(struct property));
+ if (!dryrun) {
+ pp->name = "name";
+ pp->length = len;
+ pp->value = pp + 1;
+ *pprev = pp;
+ pprev = &pp->next;
+ memcpy(pp->value, ps, len - 1);
+ ((char *)pp->value)[len - 1] = 0;
+ pr_debug("fixed up name for %s -> %s\n",
+ nodename, (char *)pp->value);
+ }
+ }
+
+ if (!dryrun)
+ *pprev = NULL;
+}
+
+static unsigned int populate_node(const void *blob,
+ int offset,
+ void **mem,
+ struct device_node *dad,
+ unsigned int fpsize,
+ struct device_node **pnp,
+ bool dryrun)
+{
struct device_node *np;
- struct property *pp, **prev_pp = NULL;
const char *pathp;
unsigned int l, allocl;
- static int depth;
- int old_depth;
- int offset;
- int has_name = 0;
int new_format = 0;
- pathp = fdt_get_name(blob, *poffset, &l);
- if (!pathp)
- return mem;
+ pathp = fdt_get_name(blob, offset, &l);
+ if (!pathp) {
+ *pnp = NULL;
+ return 0;
+ }
allocl = ++l;
}
}
- np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
+ np = unflatten_dt_alloc(mem, sizeof(struct device_node) + allocl,
__alignof__(struct device_node));
if (!dryrun) {
char *fn;
}
memcpy(fn, pathp, l);
- prev_pp = &np->properties;
if (dad != NULL) {
np->parent = dad;
np->sibling = dad->child;
dad->child = np;
}
}
- /* process properties */
- for (offset = fdt_first_property_offset(blob, *poffset);
- (offset >= 0);
- (offset = fdt_next_property_offset(blob, offset))) {
- const char *pname;
- u32 sz;
-
- if (!(p = fdt_getprop_by_offset(blob, offset, &pname, &sz))) {
- offset = -FDT_ERR_INTERNAL;
- break;
- }
- if (pname == NULL) {
- pr_info("Can't find property name in list !\n");
- break;
- }
- if (strcmp(pname, "name") == 0)
- has_name = 1;
- pp = unflatten_dt_alloc(&mem, sizeof(struct property),
- __alignof__(struct property));
- if (!dryrun) {
- /* We accept flattened tree phandles either in
- * ePAPR-style "phandle" properties, or the
- * legacy "linux,phandle" properties. If both
- * appear and have different values, things
- * will get weird. Don't do that. */
- if ((strcmp(pname, "phandle") == 0) ||
- (strcmp(pname, "linux,phandle") == 0)) {
- if (np->phandle == 0)
- np->phandle = be32_to_cpup(p);
- }
- /* And we process the "ibm,phandle" property
- * used in pSeries dynamic device tree
- * stuff */
- if (strcmp(pname, "ibm,phandle") == 0)
- np->phandle = be32_to_cpup(p);
- pp->name = (char *)pname;
- pp->length = sz;
- pp->value = (__be32 *)p;
- *prev_pp = pp;
- prev_pp = &pp->next;
- }
- }
- /* with version 0x10 we may not have the name property, recreate
- * it here from the unit name if absent
- */
- if (!has_name) {
- const char *p1 = pathp, *ps = pathp, *pa = NULL;
- int sz;
-
- while (*p1) {
- if ((*p1) == '@')
- pa = p1;
- if ((*p1) == '/')
- ps = p1 + 1;
- p1++;
- }
- if (pa < ps)
- pa = p1;
- sz = (pa - ps) + 1;
- pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
- __alignof__(struct property));
- if (!dryrun) {
- pp->name = "name";
- pp->length = sz;
- pp->value = pp + 1;
- *prev_pp = pp;
- prev_pp = &pp->next;
- memcpy(pp->value, ps, sz - 1);
- ((char *)pp->value)[sz - 1] = 0;
- pr_debug("fixed up name for %s -> %s\n", pathp,
- (char *)pp->value);
- }
- }
+ populate_properties(blob, offset, mem, np, pathp, dryrun);
if (!dryrun) {
- *prev_pp = NULL;
np->name = of_get_property(np, "name", NULL);
np->type = of_get_property(np, "device_type", NULL);
np->type = "<NULL>";
}
- old_depth = depth;
- *poffset = fdt_next_node(blob, *poffset, &depth);
- if (depth < 0)
- depth = 0;
- while (*poffset > 0 && depth > old_depth)
- mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
- fpsize, dryrun);
+ *pnp = np;
+ return fpsize;
+}
+
+static void reverse_nodes(struct device_node *parent)
+{
+ struct device_node *child, *next;
+
+ /* In-depth first */
+ child = parent->child;
+ while (child) {
+ reverse_nodes(child);
+
+ child = child->sibling;
+ }
+
+ /* Reverse the nodes in the child list */
+ child = parent->child;
+ parent->child = NULL;
+ while (child) {
+ next = child->sibling;
+
+ child->sibling = parent->child;
+ parent->child = child;
+ child = next;
+ }
+}
+
+/**
+ * unflatten_dt_nodes - Alloc and populate a device_node from the flat tree
+ * @blob: The parent device tree blob
+ * @mem: Memory chunk to use for allocating device nodes and properties
+ * @dad: Parent struct device_node
+ * @nodepp: The device_node tree created by the call
+ *
+ * It returns the size of unflattened device tree or error code
+ */
+static int unflatten_dt_nodes(const void *blob,
+ void *mem,
+ struct device_node *dad,
+ struct device_node **nodepp)
+{
+ struct device_node *root;
+ int offset = 0, depth = 0;
+#define FDT_MAX_DEPTH 64
+ unsigned int fpsizes[FDT_MAX_DEPTH];
+ struct device_node *nps[FDT_MAX_DEPTH];
+ void *base = mem;
+ bool dryrun = !base;
- if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
- pr_err("unflatten: error %d processing FDT\n", *poffset);
+ if (nodepp)
+ *nodepp = NULL;
+
+ root = dad;
+ fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
+ nps[depth] = dad;
+ for (offset = 0;
+ offset >= 0 && depth >= 0;
+ offset = fdt_next_node(blob, offset, &depth)) {
+ if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH))
+ continue;
+
+ fpsizes[depth+1] = populate_node(blob, offset, &mem,
+ nps[depth],
+ fpsizes[depth],
+ &nps[depth+1], dryrun);
+ if (!fpsizes[depth+1])
+ return mem - base;
+
+ if (!dryrun && nodepp && !*nodepp)
+ *nodepp = nps[depth+1];
+ if (!dryrun && !root)
+ root = nps[depth+1];
+ }
+
+ if (offset < 0 && offset != -FDT_ERR_NOTFOUND) {
+ pr_err("%s: Error %d processing FDT\n", __func__, offset);
+ return -EINVAL;
+ }
/*
* Reverse the child list. Some drivers assumes node order matches .dts
* node order
*/
- if (!dryrun && np->child) {
- struct device_node *child = np->child;
- np->child = NULL;
- while (child) {
- struct device_node *next = child->sibling;
- child->sibling = np->child;
- np->child = child;
- child = next;
- }
- }
-
- if (nodepp)
- *nodepp = np;
+ if (!dryrun)
+ reverse_nodes(root);
- return mem;
+ return mem - base;
}
/**
* pointers of the nodes so the normal device-tree walking functions
* can be used.
* @blob: The blob to expand
+ * @dad: Parent device node
* @mynodes: The device_node tree created by the call
* @dt_alloc: An allocator that provides a virtual address to memory
* for the resulting tree
+ *
+ * Returns NULL on failure or the memory chunk containing the unflattened
+ * device tree on success.
*/
-static void __unflatten_device_tree(const void *blob,
- struct device_node **mynodes,
- void * (*dt_alloc)(u64 size, u64 align))
+static void *__unflatten_device_tree(const void *blob,
+ struct device_node *dad,
+ struct device_node **mynodes,
+ void *(*dt_alloc)(u64 size, u64 align))
{
- unsigned long size;
- int start;
+ int size;
void *mem;
pr_debug(" -> unflatten_device_tree()\n");
if (!blob) {
pr_debug("No device tree pointer\n");
- return;
+ return NULL;
}
pr_debug("Unflattening device tree:\n");
if (fdt_check_header(blob)) {
pr_err("Invalid device tree blob header\n");
- return;
+ return NULL;
}
/* First pass, scan for size */
- start = 0;
- size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true);
- size = ALIGN(size, 4);
+ size = unflatten_dt_nodes(blob, NULL, dad, NULL);
+ if (size < 0)
+ return NULL;
- pr_debug(" size is %lx, allocating...\n", size);
+ size = ALIGN(size, 4);
+ pr_debug(" size is %d, allocating...\n", size);
/* Allocate memory for the expanded device tree */
mem = dt_alloc(size + 4, __alignof__(struct device_node));
pr_debug(" unflattening %p...\n", mem);
/* Second pass, do actual unflattening */
- start = 0;
- unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false);
+ unflatten_dt_nodes(blob, mem, dad, mynodes);
if (be32_to_cpup(mem + size) != 0xdeadbeef)
pr_warning("End of tree marker overwritten: %08x\n",
be32_to_cpup(mem + size));
pr_debug(" <- unflatten_device_tree()\n");
+ return mem;
}
static void *kernel_tree_alloc(u64 size, u64 align)
/**
* of_fdt_unflatten_tree - create tree of device_nodes from flat blob
+ * @blob: Flat device tree blob
+ * @dad: Parent device node
+ * @mynodes: The device tree created by the call
*
* unflattens the device-tree passed by the firmware, creating the
* tree of struct device_node. It also fills the "name" and "type"
* pointers of the nodes so the normal device-tree walking functions
* can be used.
+ *
+ * Returns NULL on failure or the memory chunk containing the unflattened
+ * device tree on success.
*/
-void of_fdt_unflatten_tree(const unsigned long *blob,
- struct device_node **mynodes)
+void *of_fdt_unflatten_tree(const unsigned long *blob,
+ struct device_node *dad,
+ struct device_node **mynodes)
{
+ void *mem;
+
mutex_lock(&of_fdt_unflatten_mutex);
- __unflatten_device_tree(blob, mynodes, &kernel_tree_alloc);
+ mem = __unflatten_device_tree(blob, dad, mynodes, &kernel_tree_alloc);
mutex_unlock(&of_fdt_unflatten_mutex);
+
+ return mem;
}
EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree);
* is set in which case we override whatever was found earlier.
*/
#ifdef CONFIG_CMDLINE
-#ifndef CONFIG_CMDLINE_FORCE
+#if defined(CONFIG_CMDLINE_EXTEND)
+ strlcat(data, " ", COMMAND_LINE_SIZE);
+ strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#elif defined(CONFIG_CMDLINE_FORCE)
+ strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#else
+ /* No arguments from boot loader, use kernel's cmdl*/
if (!((char *)data)[0])
-#endif
strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#endif
#endif /* CONFIG_CMDLINE */
pr_debug("Command line is: %s\n", (char*)data);
*/
void __init unflatten_device_tree(void)
{
- __unflatten_device_tree(initial_boot_params, &of_root,
+ __unflatten_device_tree(initial_boot_params, NULL, &of_root,
early_init_dt_alloc_memory_arch);
/* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/hashtable.h>
-#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/of_irq.h>
"not running tests\n", __func__);
return -ENOMEM;
}
- of_fdt_unflatten_tree(unittest_data, &unittest_data_node);
+ of_fdt_unflatten_tree(unittest_data, NULL, &unittest_data_node);
if (!unittest_data_node) {
pr_warn("%s: No tree to attach; not running tests\n", __func__);
return -ENODATA;
struct pci_dev *dev;
struct pci_controller *phb;
- if (pcibios_find_pci_bus(dn))
+ if (pci_find_bus_by_node(dn))
return -EINVAL;
/* Add pci bus */
struct pci_dn *pdn;
int rc = 0;
- if (!pcibios_find_pci_bus(dn))
+ if (!pci_find_bus_by_node(dn))
return -EINVAL;
/* If pci slot is hotpluggable, use hotplug to remove it */
pci_lock_rescan_remove();
- bus = pcibios_find_pci_bus(dn);
+ bus = pci_find_bus_by_node(dn);
if (!bus) {
ret = -EINVAL;
goto out;
}
/* Remove all devices below slot */
- pcibios_remove_pci_devices(bus);
+ pci_hp_remove_devices(bus);
/* Unmap PCI IO space */
if (pcibios_unmap_io_space(bus)) {
if (state == PRESENT) {
pci_lock_rescan_remove();
- pcibios_add_pci_devices(slot->bus);
+ pci_hp_add_devices(slot->bus);
pci_unlock_rescan_remove();
slot->state = CONFIGURED;
} else if (state == EMPTY) {
return -EINVAL;
pci_lock_rescan_remove();
- pcibios_remove_pci_devices(slot->bus);
+ pci_hp_remove_devices(slot->bus);
pci_unlock_rescan_remove();
vm_unmap_aliases();
if (rc)
return rc;
- bus = pcibios_find_pci_bus(slot->dn);
+ bus = pci_find_bus_by_node(slot->dn);
if (!bus) {
err("%s: no pci_bus for dn %s\n", __func__, slot->dn->full_name);
return -EINVAL;
}
if (list_empty(&bus->devices))
- pcibios_add_pci_devices(bus);
+ pci_hp_add_devices(bus);
if (!list_empty(&bus->devices)) {
info->adapter_status = CONFIGURED;
if (!cf->mem_base || !cf->io_virt || !cf->gpio_base ||
(__ioremap_at(io.start, cf->io_virt, cf->io_size,
- _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)) {
+ pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)) {
dev_err(device, "can't ioremap ranges\n");
status = -ENOMEM;
goto fail1;
return 0;
ac_err:
- power_supply_unregister(micro_ac_power);
+ power_supply_unregister(micro_batt_power);
batt_err:
cancel_delayed_work_sync(&mb->update);
destroy_workqueue(mb->wq);
info->usb = power_supply_register(&pdev->dev, &usb_desc, &psy_cfg);
if (IS_ERR(info->usb)) {
ret = PTR_ERR(info->usb);
- goto out_usb;
+ goto out_unregister_ac;
}
info->usb->dev.parent = &pdev->dev;
info->battery = power_supply_register(&pdev->dev, &battery_desc, NULL);
if (IS_ERR(info->battery)) {
ret = PTR_ERR(info->battery);
- goto out_battery;
+ goto out_unregister_usb;
}
info->battery->dev.parent = &pdev->dev;
max8925_init_charger(chip, info);
return 0;
-out_battery:
- power_supply_unregister(info->battery);
-out_usb:
+out_unregister_usb:
+ power_supply_unregister(info->usb);
+out_unregister_ac:
power_supply_unregister(info->ac);
out:
return ret;
This driver supports restart for Atmel AT91SAM9 and SAMA5
SoCs
+config POWER_RESET_AT91_SAMA5D2_SHDWC
+ tristate "Atmel AT91 SAMA5D2-Compatible shutdown controller driver"
+ depends on ARCH_AT91 || COMPILE_TEST
+ default SOC_SAMA5
+ help
+ This driver supports the alternate shutdown controller for some Atmel
+ SAMA5 SoCs. It is present for example on SAMA5D2 SoC.
+
config POWER_RESET_AXXIA
bool "LSI Axxia reset driver"
depends on ARCH_AXXIA
obj-$(CONFIG_POWER_RESET_AS3722) += as3722-poweroff.o
obj-$(CONFIG_POWER_RESET_AT91_POWEROFF) += at91-poweroff.o
obj-$(CONFIG_POWER_RESET_AT91_RESET) += at91-reset.o
+obj-$(CONFIG_POWER_RESET_AT91_SAMA5D2_SHDWC) += at91-sama5d2_shdwc.o
obj-$(CONFIG_POWER_RESET_AXXIA) += axxia-reset.o
obj-$(CONFIG_POWER_RESET_BRCMSTB) += brcmstb-reboot.o
obj-$(CONFIG_POWER_RESET_GPIO) += gpio-poweroff.o
--- /dev/null
+/*
+ * Atmel SAMA5D2-Compatible Shutdown Controller (SHDWC) driver.
+ * Found on some SoCs as the sama5d2 (obviously).
+ *
+ * Copyright (C) 2015 Atmel Corporation,
+ * Nicolas Ferre <nicolas.ferre@atmel.com>
+ *
+ * Evolved from driver at91-poweroff.c.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * TODO:
+ * - addition to status of other wake-up inputs [1 - 15]
+ * - Analog Comparator wake-up alarm
+ * - Serial RX wake-up alarm
+ * - low power debouncer
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+
+#define SLOW_CLOCK_FREQ 32768
+
+#define AT91_SHDW_CR 0x00 /* Shut Down Control Register */
+#define AT91_SHDW_SHDW BIT(0) /* Shut Down command */
+#define AT91_SHDW_KEY (0xa5UL << 24) /* KEY Password */
+
+#define AT91_SHDW_MR 0x04 /* Shut Down Mode Register */
+#define AT91_SHDW_WKUPDBC_SHIFT 24
+#define AT91_SHDW_WKUPDBC_MASK GENMASK(31, 16)
+#define AT91_SHDW_WKUPDBC(x) (((x) << AT91_SHDW_WKUPDBC_SHIFT) \
+ & AT91_SHDW_WKUPDBC_MASK)
+
+#define AT91_SHDW_SR 0x08 /* Shut Down Status Register */
+#define AT91_SHDW_WKUPIS_SHIFT 16
+#define AT91_SHDW_WKUPIS_MASK GENMASK(31, 16)
+#define AT91_SHDW_WKUPIS(x) ((1 << (x)) << AT91_SHDW_WKUPIS_SHIFT \
+ & AT91_SHDW_WKUPIS_MASK)
+
+#define AT91_SHDW_WUIR 0x0c /* Shutdown Wake-up Inputs Register */
+#define AT91_SHDW_WKUPEN_MASK GENMASK(15, 0)
+#define AT91_SHDW_WKUPEN(x) ((1 << (x)) & AT91_SHDW_WKUPEN_MASK)
+#define AT91_SHDW_WKUPT_SHIFT 16
+#define AT91_SHDW_WKUPT_MASK GENMASK(31, 16)
+#define AT91_SHDW_WKUPT(x) ((1 << (x)) << AT91_SHDW_WKUPT_SHIFT \
+ & AT91_SHDW_WKUPT_MASK)
+
+#define SHDW_WK_PIN(reg, cfg) ((reg) & AT91_SHDW_WKUPIS((cfg)->wkup_pin_input))
+#define SHDW_RTCWK(reg, cfg) (((reg) >> ((cfg)->sr_rtcwk_shift)) & 0x1)
+#define SHDW_RTCWKEN(cfg) (1 << ((cfg)->mr_rtcwk_shift))
+
+#define DBC_PERIOD_US(x) DIV_ROUND_UP_ULL((1000000 * (x)), \
+ SLOW_CLOCK_FREQ)
+
+struct shdwc_config {
+ u8 wkup_pin_input;
+ u8 mr_rtcwk_shift;
+ u8 sr_rtcwk_shift;
+};
+
+struct shdwc {
+ struct shdwc_config *cfg;
+ void __iomem *at91_shdwc_base;
+};
+
+/*
+ * Hold configuration here, cannot be more than one instance of the driver
+ * since pm_power_off itself is global.
+ */
+static struct shdwc *at91_shdwc;
+static struct clk *sclk;
+
+static const unsigned long long sdwc_dbc_period[] = {
+ 0, 3, 32, 512, 4096, 32768,
+};
+
+static void __init at91_wakeup_status(struct platform_device *pdev)
+{
+ struct shdwc *shdw = platform_get_drvdata(pdev);
+ u32 reg;
+ char *reason = "unknown";
+
+ reg = readl(shdw->at91_shdwc_base + AT91_SHDW_SR);
+
+ dev_dbg(&pdev->dev, "%s: status = %#x\n", __func__, reg);
+
+ /* Simple power-on, just bail out */
+ if (!reg)
+ return;
+
+ if (SHDW_WK_PIN(reg, shdw->cfg))
+ reason = "WKUP pin";
+ else if (SHDW_RTCWK(reg, shdw->cfg))
+ reason = "RTC";
+
+ pr_info("AT91: Wake-Up source: %s\n", reason);
+}
+
+static void at91_poweroff(void)
+{
+ writel(AT91_SHDW_KEY | AT91_SHDW_SHDW,
+ at91_shdwc->at91_shdwc_base + AT91_SHDW_CR);
+}
+
+static u32 at91_shdwc_debouncer_value(struct platform_device *pdev,
+ u32 in_period_us)
+{
+ int i;
+ int max_idx = ARRAY_SIZE(sdwc_dbc_period) - 1;
+ unsigned long long period_us;
+ unsigned long long max_period_us = DBC_PERIOD_US(sdwc_dbc_period[max_idx]);
+
+ if (in_period_us > max_period_us) {
+ dev_warn(&pdev->dev,
+ "debouncer period %u too big, reduced to %llu us\n",
+ in_period_us, max_period_us);
+ return max_idx;
+ }
+
+ for (i = max_idx - 1; i > 0; i--) {
+ period_us = DBC_PERIOD_US(sdwc_dbc_period[i]);
+ dev_dbg(&pdev->dev, "%s: ref[%d] = %llu\n",
+ __func__, i, period_us);
+ if (in_period_us > period_us)
+ break;
+ }
+
+ return i + 1;
+}
+
+static u32 at91_shdwc_get_wakeup_input(struct platform_device *pdev,
+ struct device_node *np)
+{
+ struct device_node *cnp;
+ u32 wk_input_mask;
+ u32 wuir = 0;
+ u32 wk_input;
+
+ for_each_child_of_node(np, cnp) {
+ if (of_property_read_u32(cnp, "reg", &wk_input)) {
+ dev_warn(&pdev->dev, "reg property is missing for %s\n",
+ cnp->full_name);
+ continue;
+ }
+
+ wk_input_mask = 1 << wk_input;
+ if (!(wk_input_mask & AT91_SHDW_WKUPEN_MASK)) {
+ dev_warn(&pdev->dev,
+ "wake-up input %d out of bounds ignore\n",
+ wk_input);
+ continue;
+ }
+ wuir |= wk_input_mask;
+
+ if (of_property_read_bool(cnp, "atmel,wakeup-active-high"))
+ wuir |= AT91_SHDW_WKUPT(wk_input);
+
+ dev_dbg(&pdev->dev, "%s: (child %d) wuir = %#x\n",
+ __func__, wk_input, wuir);
+ }
+
+ return wuir;
+}
+
+static void at91_shdwc_dt_configure(struct platform_device *pdev)
+{
+ struct shdwc *shdw = platform_get_drvdata(pdev);
+ struct device_node *np = pdev->dev.of_node;
+ u32 mode = 0, tmp, input;
+
+ if (!np) {
+ dev_err(&pdev->dev, "device node not found\n");
+ return;
+ }
+
+ if (!of_property_read_u32(np, "debounce-delay-us", &tmp))
+ mode |= AT91_SHDW_WKUPDBC(at91_shdwc_debouncer_value(pdev, tmp));
+
+ if (of_property_read_bool(np, "atmel,wakeup-rtc-timer"))
+ mode |= SHDW_RTCWKEN(shdw->cfg);
+
+ dev_dbg(&pdev->dev, "%s: mode = %#x\n", __func__, mode);
+ writel(mode, shdw->at91_shdwc_base + AT91_SHDW_MR);
+
+ input = at91_shdwc_get_wakeup_input(pdev, np);
+ writel(input, shdw->at91_shdwc_base + AT91_SHDW_WUIR);
+}
+
+static const struct shdwc_config sama5d2_shdwc_config = {
+ .wkup_pin_input = 0,
+ .mr_rtcwk_shift = 17,
+ .sr_rtcwk_shift = 5,
+};
+
+static const struct of_device_id at91_shdwc_of_match[] = {
+ {
+ .compatible = "atmel,sama5d2-shdwc",
+ .data = &sama5d2_shdwc_config,
+ }, {
+ /*sentinel*/
+ }
+};
+MODULE_DEVICE_TABLE(of, at91_shdwc_of_match);
+
+static int __init at91_shdwc_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ const struct of_device_id *match;
+ int ret;
+
+ if (!pdev->dev.of_node)
+ return -ENODEV;
+
+ at91_shdwc = devm_kzalloc(&pdev->dev, sizeof(*at91_shdwc), GFP_KERNEL);
+ if (!at91_shdwc)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, at91_shdwc);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ at91_shdwc->at91_shdwc_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(at91_shdwc->at91_shdwc_base)) {
+ dev_err(&pdev->dev, "Could not map reset controller address\n");
+ return PTR_ERR(at91_shdwc->at91_shdwc_base);
+ }
+
+ match = of_match_node(at91_shdwc_of_match, pdev->dev.of_node);
+ at91_shdwc->cfg = (struct shdwc_config *)(match->data);
+
+ sclk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(sclk))
+ return PTR_ERR(sclk);
+
+ ret = clk_prepare_enable(sclk);
+ if (ret) {
+ dev_err(&pdev->dev, "Could not enable slow clock\n");
+ return ret;
+ }
+
+ at91_wakeup_status(pdev);
+
+ at91_shdwc_dt_configure(pdev);
+
+ pm_power_off = at91_poweroff;
+
+ return 0;
+}
+
+static int __exit at91_shdwc_remove(struct platform_device *pdev)
+{
+ struct shdwc *shdw = platform_get_drvdata(pdev);
+
+ if (pm_power_off == at91_poweroff)
+ pm_power_off = NULL;
+
+ /* Reset values to disable wake-up features */
+ writel(0, shdw->at91_shdwc_base + AT91_SHDW_MR);
+ writel(0, shdw->at91_shdwc_base + AT91_SHDW_WUIR);
+
+ clk_disable_unprepare(sclk);
+
+ return 0;
+}
+
+static struct platform_driver at91_shdwc_driver = {
+ .remove = __exit_p(at91_shdwc_remove),
+ .driver = {
+ .name = "at91-shdwc",
+ .of_match_table = at91_shdwc_of_match,
+ },
+};
+module_platform_driver_probe(at91_shdwc_driver, at91_shdwc_probe);
+
+MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>");
+MODULE_DESCRIPTION("Atmel shutdown controller driver");
+MODULE_LICENSE("GPL v2");
if (ret & BATTERY_FULL_CHARGED)
val->intval = POWER_SUPPLY_STATUS_FULL;
- else if (ret & BATTERY_FULL_DISCHARGED)
- val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
else if (ret & BATTERY_DISCHARGING)
val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
else
if (ret & BATTERY_FULL_CHARGED)
ret = POWER_SUPPLY_STATUS_FULL;
- else if (ret & BATTERY_FULL_DISCHARGED)
- ret = POWER_SUPPLY_STATUS_NOT_CHARGING;
else if (ret & BATTERY_DISCHARGING)
ret = POWER_SUPPLY_STATUS_DISCHARGING;
else
u8 port0_mode; /* device mode for port 0 */
u8 port1_mode; /* device mode for port 1 */
u32 exec; /* exec vector */
- u32 bootenv; /* fimware boot env */
+ u32 bootenv; /* firmware boot env */
u32 rsvd_b[2];
struct bfi_ioc_fwver_s fwver;
u32 md5sum[BFI_IOC_MD5SUM_SZ];
* Much of the functionality of this driver was determined from reading
* the source code for the Windows driver.
*
- * The FPGA on the board requires fimware, which is available from
+ * The FPGA on the board requires firmware, which is available from
* http://www.comedi.org in the comedi_nonfree_firmware tarball.
*
* Configuration options: not applicable, uses PCI auto config
#include "sdma.h"
#include "trace.h"
-struct cpu_mask_set {
- struct cpumask mask;
- struct cpumask used;
- uint gen;
-};
-
-struct hfi1_affinity {
- struct cpu_mask_set def_intr;
- struct cpu_mask_set rcv_intr;
- struct cpu_mask_set proc;
- /* spin lock to protect affinity struct */
- spinlock_t lock;
-};
-
/* Name of IRQ types, indexed by enum irq_type */
static const char * const irq_type_names[] = {
"SDMA",
set->gen = 0;
}
+/* Initialize non-HT cpu cores mask */
+int init_real_cpu_mask(struct hfi1_devdata *dd)
+{
+ struct hfi1_affinity *info;
+ int possible, curr_cpu, i, ht;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ cpumask_clear(&info->real_cpu_mask);
+
+ /* Start with cpu online mask as the real cpu mask */
+ cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
+
+ /*
+ * Remove HT cores from the real cpu mask. Do this in two steps below.
+ */
+ possible = cpumask_weight(&info->real_cpu_mask);
+ ht = cpumask_weight(topology_sibling_cpumask(
+ cpumask_first(&info->real_cpu_mask)));
+ /*
+ * Step 1. Skip over the first N HT siblings and use them as the
+ * "real" cores. Assumes that HT cores are not enumerated in
+ * succession (except in the single core case).
+ */
+ curr_cpu = cpumask_first(&info->real_cpu_mask);
+ for (i = 0; i < possible / ht; i++)
+ curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
+ /*
+ * Step 2. Remove the remaining HT siblings. Use cpumask_next() to
+ * skip any gaps.
+ */
+ for (; i < possible; i++) {
+ cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
+ curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
+ }
+
+ dd->affinity = info;
+ return 0;
+}
+
/*
* Interrupt affinity.
*
* to the node relative 1 as necessary.
*
*/
-int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
+void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{
int node = pcibus_to_node(dd->pcidev->bus);
- struct hfi1_affinity *info;
+ struct hfi1_affinity *info = dd->affinity;
const struct cpumask *local_mask;
- int curr_cpu, possible, i, ht;
+ int curr_cpu, possible, i;
if (node < 0)
node = numa_node_id();
dd->node = node;
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
spin_lock_init(&info->lock);
init_cpu_mask_set(&info->def_intr);
local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
- /* use local mask as default */
- cpumask_copy(&info->def_intr.mask, local_mask);
- /*
- * Remove HT cores from the default mask. Do this in two steps below.
- */
- possible = cpumask_weight(&info->def_intr.mask);
- ht = cpumask_weight(topology_sibling_cpumask(
- cpumask_first(&info->def_intr.mask)));
- /*
- * Step 1. Skip over the first N HT siblings and use them as the
- * "real" cores. Assumes that HT cores are not enumerated in
- * succession (except in the single core case).
- */
- curr_cpu = cpumask_first(&info->def_intr.mask);
- for (i = 0; i < possible / ht; i++)
- curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
- /*
- * Step 2. Remove the remaining HT siblings. Use cpumask_next() to
- * skip any gaps.
- */
- for (; i < possible; i++) {
- cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
- curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
- }
+ /* Use the "real" cpu mask of this node as the default */
+ cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
/* fill in the receive list */
possible = cpumask_weight(&info->def_intr.mask);
}
cpumask_copy(&info->proc.mask, cpu_online_mask);
- dd->affinity = info;
- return 0;
}
void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
AFF_IRQ_LOCAL
};
+struct cpu_mask_set {
+ struct cpumask mask;
+ struct cpumask used;
+ uint gen;
+};
+
+struct hfi1_affinity {
+ struct cpu_mask_set def_intr;
+ struct cpu_mask_set rcv_intr;
+ struct cpu_mask_set proc;
+ struct cpumask real_cpu_mask;
+ /* spin lock to protect affinity struct */
+ spinlock_t lock;
+};
+
struct hfi1_msix_entry;
+/* Initialize non-HT cpu cores mask */
+int init_real_cpu_mask(struct hfi1_devdata *);
/* Initialize driver affinity data */
-int hfi1_dev_affinity_init(struct hfi1_devdata *);
+void hfi1_dev_affinity_init(struct hfi1_devdata *);
/* Free driver affinity data */
void hfi1_dev_affinity_free(struct hfi1_devdata *);
/*
#define MIN_KERNEL_KCTXTS 2
#define FIRST_KERNEL_KCTXT 1
+/* sizes for both the QP and RSM map tables */
+#define NUM_MAP_ENTRIES 256
#define NUM_MAP_REGS 32
/* Bit offset into the GUID which carries HFI id information */
static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
+static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *);
static void dc_shutdown(struct hfi1_devdata *);
static void dc_start(struct hfi1_devdata *);
+static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+ unsigned int *np);
/*
* Error interrupt table entry. This is used as input to the interrupt
sci = &dd->send_contexts[sw_index];
/* there is no information for user (PSM) and ack contexts */
- if (sci->type != SC_KERNEL)
+ if ((sci->type != SC_KERNEL) && (sci->type != SC_VL15))
return -1;
sc = sci->sc;
/*
* Handle host requests from the 8051.
- *
- * This is a work-queue function outside of the interrupt.
*/
-void handle_8051_request(struct work_struct *work)
+static void handle_8051_request(struct hfi1_pportdata *ppd)
{
- struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
- dc_host_req_work);
struct hfi1_devdata *dd = ppd->dd;
u64 reg;
u16 data = 0;
- u8 type, i, lanes, *cache = ppd->qsfp_info.cache;
- u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
+ u8 type;
reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
case HREQ_READ_CONFIG:
case HREQ_SET_TX_EQ_ABS:
case HREQ_SET_TX_EQ_REL:
+ case HREQ_ENABLE:
dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
type);
hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
break;
-
- case HREQ_ENABLE:
- lanes = data & 0xF;
- for (i = 0; lanes; lanes >>= 1, i++) {
- if (!(lanes & 1))
- continue;
- if (data & 0x200) {
- /* enable TX CDR */
- if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
- cache[QSFP_CDR_INFO_OFFS] & 0x80)
- cdr_ctrl_byte |= (1 << (i + 4));
- } else {
- /* disable TX CDR */
- if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
- cache[QSFP_CDR_INFO_OFFS] & 0x80)
- cdr_ctrl_byte &= ~(1 << (i + 4));
- }
-
- if (data & 0x800) {
- /* enable RX CDR */
- if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
- cache[QSFP_CDR_INFO_OFFS] & 0x40)
- cdr_ctrl_byte |= (1 << i);
- } else {
- /* disable RX CDR */
- if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
- cache[QSFP_CDR_INFO_OFFS] & 0x40)
- cdr_ctrl_byte &= ~(1 << i);
- }
- }
- one_qsfp_write(ppd, dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
- &cdr_ctrl_byte, 1);
- hreq_response(dd, HREQ_SUCCESS, data);
- refresh_qsfp_cache(ppd, &ppd->qsfp_info);
- break;
-
case HREQ_CONFIG_DONE:
hreq_response(dd, HREQ_SUCCESS, 0);
break;
case HREQ_INTERFACE_TEST:
hreq_response(dd, HREQ_SUCCESS, data);
break;
-
default:
dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
ppd->neighbor_fm_security = 0;
}
+static const char * const link_down_reason_strs[] = {
+ [OPA_LINKDOWN_REASON_NONE] = "None",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0",
+ [OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length",
+ [OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long",
+ [OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short",
+ [OPA_LINKDOWN_REASON_BAD_SLID] = "Bad SLID",
+ [OPA_LINKDOWN_REASON_BAD_DLID] = "Bad DLID",
+ [OPA_LINKDOWN_REASON_BAD_L2] = "Bad L2",
+ [OPA_LINKDOWN_REASON_BAD_SC] = "Bad SC",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_8] = "Receive error 8",
+ [OPA_LINKDOWN_REASON_BAD_MID_TAIL] = "Bad mid tail",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_10] = "Receive error 10",
+ [OPA_LINKDOWN_REASON_PREEMPT_ERROR] = "Preempt error",
+ [OPA_LINKDOWN_REASON_PREEMPT_VL15] = "Preempt vl15",
+ [OPA_LINKDOWN_REASON_BAD_VL_MARKER] = "Bad VL marker",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_14] = "Receive error 14",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_15] = "Receive error 15",
+ [OPA_LINKDOWN_REASON_BAD_HEAD_DIST] = "Bad head distance",
+ [OPA_LINKDOWN_REASON_BAD_TAIL_DIST] = "Bad tail distance",
+ [OPA_LINKDOWN_REASON_BAD_CTRL_DIST] = "Bad control distance",
+ [OPA_LINKDOWN_REASON_BAD_CREDIT_ACK] = "Bad credit ack",
+ [OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER] = "Unsupported VL marker",
+ [OPA_LINKDOWN_REASON_BAD_PREEMPT] = "Bad preempt",
+ [OPA_LINKDOWN_REASON_BAD_CONTROL_FLIT] = "Bad control flit",
+ [OPA_LINKDOWN_REASON_EXCEED_MULTICAST_LIMIT] = "Exceed multicast limit",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_24] = "Receive error 24",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_25] = "Receive error 25",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_26] = "Receive error 26",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_27] = "Receive error 27",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_28] = "Receive error 28",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_29] = "Receive error 29",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_30] = "Receive error 30",
+ [OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN] =
+ "Excessive buffer overrun",
+ [OPA_LINKDOWN_REASON_UNKNOWN] = "Unknown",
+ [OPA_LINKDOWN_REASON_REBOOT] = "Reboot",
+ [OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN] = "Neighbor unknown",
+ [OPA_LINKDOWN_REASON_FM_BOUNCE] = "FM bounce",
+ [OPA_LINKDOWN_REASON_SPEED_POLICY] = "Speed policy",
+ [OPA_LINKDOWN_REASON_WIDTH_POLICY] = "Width policy",
+ [OPA_LINKDOWN_REASON_DISCONNECTED] = "Disconnected",
+ [OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED] =
+ "Local media not installed",
+ [OPA_LINKDOWN_REASON_NOT_INSTALLED] = "Not installed",
+ [OPA_LINKDOWN_REASON_CHASSIS_CONFIG] = "Chassis config",
+ [OPA_LINKDOWN_REASON_END_TO_END_NOT_INSTALLED] =
+ "End to end not installed",
+ [OPA_LINKDOWN_REASON_POWER_POLICY] = "Power policy",
+ [OPA_LINKDOWN_REASON_LINKSPEED_POLICY] = "Link speed policy",
+ [OPA_LINKDOWN_REASON_LINKWIDTH_POLICY] = "Link width policy",
+ [OPA_LINKDOWN_REASON_SWITCH_MGMT] = "Switch management",
+ [OPA_LINKDOWN_REASON_SMA_DISABLED] = "SMA disabled",
+ [OPA_LINKDOWN_REASON_TRANSIENT] = "Transient"
+};
+
+/* return the neighbor link down reason string */
+static const char *link_down_reason_str(u8 reason)
+{
+ const char *str = NULL;
+
+ if (reason < ARRAY_SIZE(link_down_reason_strs))
+ str = link_down_reason_strs[reason];
+ if (!str)
+ str = "(invalid)";
+
+ return str;
+}
+
/*
* Handle a link down interrupt from the 8051.
*
void handle_link_down(struct work_struct *work)
{
u8 lcl_reason, neigh_reason = 0;
+ u8 link_down_reason;
struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
- link_down_work);
+ link_down_work);
+ int was_up;
+ static const char ldr_str[] = "Link down reason: ";
if ((ppd->host_link_state &
(HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) &&
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED);
/* Go offline first, then deal with reading/writing through 8051 */
+ was_up = !!(ppd->host_link_state & HLS_UP);
set_link_state(ppd, HLS_DN_OFFLINE);
- lcl_reason = 0;
- read_planned_down_reason_code(ppd->dd, &neigh_reason);
+ if (was_up) {
+ lcl_reason = 0;
+ /* link down reason is only valid if the link was up */
+ read_link_down_reason(ppd->dd, &link_down_reason);
+ switch (link_down_reason) {
+ case LDR_LINK_TRANSFER_ACTIVE_LOW:
+ /* the link went down, no idle message reason */
+ dd_dev_info(ppd->dd, "%sUnexpected link down\n",
+ ldr_str);
+ break;
+ case LDR_RECEIVED_LINKDOWN_IDLE_MSG:
+ /*
+ * The neighbor reason is only valid if an idle message
+ * was received for it.
+ */
+ read_planned_down_reason_code(ppd->dd, &neigh_reason);
+ dd_dev_info(ppd->dd,
+ "%sNeighbor link down message %d, %s\n",
+ ldr_str, neigh_reason,
+ link_down_reason_str(neigh_reason));
+ break;
+ case LDR_RECEIVED_HOST_OFFLINE_REQ:
+ dd_dev_info(ppd->dd,
+ "%sHost requested link to go offline\n",
+ ldr_str);
+ break;
+ default:
+ dd_dev_info(ppd->dd, "%sUnknown reason 0x%x\n",
+ ldr_str, link_down_reason);
+ break;
+ }
- /*
- * If no reason, assume peer-initiated but missed
- * LinkGoingDown idle flits.
- */
- if (neigh_reason == 0)
- lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
+ /*
+ * If no reason, assume peer-initiated but missed
+ * LinkGoingDown idle flits.
+ */
+ if (neigh_reason == 0)
+ lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
+ } else {
+ /* went down while polling or going up */
+ lcl_reason = OPA_LINKDOWN_REASON_TRANSIENT;
+ }
set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
+ /* inform the SMA when the link transitions from up to down */
+ if (was_up && ppd->local_link_down_reason.sma == 0 &&
+ ppd->neigh_link_down_reason.sma == 0) {
+ ppd->local_link_down_reason.sma =
+ ppd->local_link_down_reason.latest;
+ ppd->neigh_link_down_reason.sma =
+ ppd->neigh_link_down_reason.latest;
+ }
+
reset_neighbor_info(ppd);
/* disable the port */
* If there is no cable attached, turn the DC off. Otherwise,
* start the link bring up.
*/
- if (!qsfp_mod_present(ppd)) {
+ if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) {
dc_shutdown(ppd->dd);
} else {
tune_serdes(ppd);
ppd->link_width_downgrade_rx_active = rx;
}
- if (lwde == 0) {
+ if (ppd->link_width_downgrade_tx_active == 0 ||
+ ppd->link_width_downgrade_rx_active == 0) {
+ /* the 8051 reported a dead link as a downgrade */
+ dd_dev_err(ppd->dd, "Link downgrade is really a link down, ignoring\n");
+ } else if (lwde == 0) {
/* downgrade is disabled */
/* bounce if not at starting active width */
host_msg &= ~(u64)LINKUP_ACHIEVED;
}
if (host_msg & EXT_DEVICE_CFG_REQ) {
- queue_work(ppd->hfi1_wq, &ppd->dc_host_req_work);
+ handle_8051_request(ppd);
host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
}
if (host_msg & VERIFY_CAP_FRAME) {
*pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
}
+static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr)
+{
+ u32 frame;
+
+ read_8051_config(dd, LINK_DOWN_REASON, GENERAL_CONFIG, &frame);
+ *ldr = (frame & 0xff);
+}
+
static int read_tx_settings(struct hfi1_devdata *dd,
u8 *enable_lane_tx,
u8 *tx_polarity_inversion,
}
/*
- * Call this to start the link. Schedule a retry if the cable is not
- * present or if unable to start polling. Do not do anything if the
- * link is disabled. Returns 0 if link is disabled or moved to polling
+ * Call this to start the link.
+ * Do not do anything if the link is disabled.
+ * Returns 0 if link is disabled, moved to polling, or the driver is not ready.
*/
int start_link(struct hfi1_pportdata *ppd)
{
return 0;
}
- if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
- loopback == LOOPBACK_LCB ||
- ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
- return set_link_state(ppd, HLS_DN_POLL);
-
- dd_dev_info(ppd->dd,
- "%s: stopping link start because no cable is present\n",
- __func__);
- return -EAGAIN;
+ return set_link_state(ppd, HLS_DN_POLL);
}
static void wait_for_qsfp_init(struct hfi1_pportdata *ppd)
return 0;
}
-/* This routine will only be scheduled if the QSFP module is present */
+/* This routine will only be scheduled if the QSFP module present is asserted */
void qsfp_event(struct work_struct *work)
{
struct qsfp_data *qd;
& SEND_LEN_CHECK1_LEN_VL15_MASK) <<
SEND_LEN_CHECK1_LEN_VL15_SHIFT;
int i;
+ u32 thres;
for (i = 0; i < ppd->vls_supported; i++) {
if (dd->vld[i].mtu > maxvlmtu)
/* adjust kernel credit return thresholds based on new MTUs */
/* all kernel receive contexts have the same hdrqentsize */
for (i = 0; i < ppd->vls_supported; i++) {
- sc_set_cr_threshold(dd->vld[i].sc,
- sc_mtu_to_threshold(dd->vld[i].sc,
- dd->vld[i].mtu,
- dd->rcd[0]->
- rcvhdrqentsize));
- }
- sc_set_cr_threshold(dd->vld[15].sc,
- sc_mtu_to_threshold(dd->vld[15].sc,
- dd->vld[15].mtu,
+ thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
+ sc_mtu_to_threshold(dd->vld[i].sc,
+ dd->vld[i].mtu,
dd->rcd[0]->rcvhdrqentsize));
+ sc_set_cr_threshold(dd->vld[i].sc, thres);
+ }
+ thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50),
+ sc_mtu_to_threshold(dd->vld[15].sc,
+ dd->vld[15].mtu,
+ dd->rcd[0]->rcvhdrqentsize));
+ sc_set_cr_threshold(dd->vld[15].sc, thres);
/* Adjust maximum MTU for the port in DC */
dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
struct hfi1_devdata *dd = ppd->dd;
struct ib_event event = {.device = NULL};
int ret1, ret = 0;
- int was_up, is_down;
int orig_new_state, poll_bounce;
mutex_lock(&ppd->hls_lock);
poll_bounce ? "(bounce) " : "",
link_state_reason_name(ppd, state));
- was_up = !!(ppd->host_link_state & HLS_UP);
-
/*
* If we're going to a (HLS_*) link state that implies the logical
* link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
break;
}
- is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
- HLS_DN_DISABLE | HLS_DN_OFFLINE));
-
- if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
- ppd->neigh_link_down_reason.sma == 0) {
- ppd->local_link_down_reason.sma =
- ppd->local_link_down_reason.latest;
- ppd->neigh_link_down_reason.sma =
- ppd->neigh_link_down_reason.latest;
- }
-
goto done;
unexpected:
int total_contexts;
int ret;
unsigned ngroups;
+ int qos_rmt_count;
+ int user_rmt_reduced;
/*
- * Kernel contexts: (to be fixed later):
- * - min or 2 or 1 context/numa
+ * Kernel receive contexts:
+ * - min of 2 or 1 context/numa (excluding control context)
* - Context 0 - control context (VL15/multicast/error)
- * - Context 1 - default context
+ * - Context 1 - first kernel context
+ * - Context 2 - second kernel context
+ * ...
*/
if (n_krcvqs)
/*
- * Don't count context 0 in n_krcvqs since
- * is isn't used for normal verbs traffic.
- *
- * krcvqs will reflect number of kernel
- * receive contexts above 0.
+ * n_krcvqs is the sum of module parameter kernel receive
+ * contexts, krcvqs[]. It does not include the control
+ * context, so add that.
*/
- num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1;
+ num_kernel_contexts = n_krcvqs + 1;
else
num_kernel_contexts = num_online_nodes() + 1;
num_kernel_contexts =
num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
}
/*
- * User contexts: (to be fixed later)
- * - default to 1 user context per CPU if num_user_contexts is
- * negative
+ * User contexts:
+ * - default to 1 user context per real (non-HT) CPU core if
+ * num_user_contexts is negative
*/
if (num_user_contexts < 0)
- num_user_contexts = num_online_cpus();
+ num_user_contexts =
+ cpumask_weight(&dd->affinity->real_cpu_mask);
total_contexts = num_kernel_contexts + num_user_contexts;
total_contexts = num_kernel_contexts + num_user_contexts;
}
+ /* each user context requires an entry in the RMT */
+ qos_rmt_count = qos_rmt_entries(dd, NULL, NULL);
+ if (qos_rmt_count + num_user_contexts > NUM_MAP_ENTRIES) {
+ user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count;
+ dd_dev_err(dd,
+ "RMT size is reducing the number of user receive contexts from %d to %d\n",
+ (int)num_user_contexts,
+ user_rmt_reduced);
+ /* recalculate */
+ num_user_contexts = user_rmt_reduced;
+ total_contexts = num_kernel_contexts + num_user_contexts;
+ }
+
/* the first N are kernel contexts, the rest are user contexts */
dd->num_rcv_contexts = total_contexts;
dd->n_krcv_queues = num_kernel_contexts;
dd->num_send_contexts = ret;
dd_dev_info(
dd,
- "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
+ "send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n",
dd->chip_send_contexts,
dd->num_send_contexts,
dd->sc_sizes[SC_KERNEL].count,
dd->sc_sizes[SC_ACK].count,
- dd->sc_sizes[SC_USER].count);
+ dd->sc_sizes[SC_USER].count,
+ dd->sc_sizes[SC_VL15].count);
ret = 0; /* success */
}
int i;
u64 ctxt = first_ctxt;
- for (i = 0; i < 256;) {
+ for (i = 0; i < 256; i++) {
reg |= ctxt << (8 * (i % 8));
- i++;
ctxt++;
if (ctxt > last_ctxt)
ctxt = first_ctxt;
- if (i % 8 == 0) {
+ if (i % 8 == 7) {
write_csr(dd, regno, reg);
reg = 0;
regno += 8;
}
}
- if (i % 8)
- write_csr(dd, regno, reg);
add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
| RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
}
-/**
- * init_qos - init RX qos
- * @dd - device data
- * @first_context
- *
- * This routine initializes Rule 0 and the
- * RSM map table to implement qos.
- *
- * If all of the limit tests succeed,
- * qos is applied based on the array
- * interpretation of krcvqs where
- * entry 0 is VL0.
- *
- * The number of vl bits (n) and the number of qpn
- * bits (m) are computed to feed both the RSM map table
- * and the single rule.
- *
+struct rsm_map_table {
+ u64 map[NUM_MAP_REGS];
+ unsigned int used;
+};
+
+struct rsm_rule_data {
+ u8 offset;
+ u8 pkt_type;
+ u32 field1_off;
+ u32 field2_off;
+ u32 index1_off;
+ u32 index1_width;
+ u32 index2_off;
+ u32 index2_width;
+ u32 mask1;
+ u32 value1;
+ u32 mask2;
+ u32 value2;
+};
+
+/*
+ * Return an initialized RMT map table for users to fill in. OK if it
+ * returns NULL, indicating no table.
*/
-static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
+static struct rsm_map_table *alloc_rsm_map_table(struct hfi1_devdata *dd)
{
+ struct rsm_map_table *rmt;
+ u8 rxcontext = is_ax(dd) ? 0 : 0xff; /* 0 is default if a0 ver. */
+
+ rmt = kmalloc(sizeof(*rmt), GFP_KERNEL);
+ if (rmt) {
+ memset(rmt->map, rxcontext, sizeof(rmt->map));
+ rmt->used = 0;
+ }
+
+ return rmt;
+}
+
+/*
+ * Write the final RMT map table to the chip and free the table. OK if
+ * table is NULL.
+ */
+static void complete_rsm_map_table(struct hfi1_devdata *dd,
+ struct rsm_map_table *rmt)
+{
+ int i;
+
+ if (rmt) {
+ /* write table to chip */
+ for (i = 0; i < NUM_MAP_REGS; i++)
+ write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rmt->map[i]);
+
+ /* enable RSM */
+ add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+ }
+}
+
+/*
+ * Add a receive side mapping rule.
+ */
+static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index,
+ struct rsm_rule_data *rrd)
+{
+ write_csr(dd, RCV_RSM_CFG + (8 * rule_index),
+ (u64)rrd->offset << RCV_RSM_CFG_OFFSET_SHIFT |
+ 1ull << rule_index | /* enable bit */
+ (u64)rrd->pkt_type << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
+ write_csr(dd, RCV_RSM_SELECT + (8 * rule_index),
+ (u64)rrd->field1_off << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
+ (u64)rrd->field2_off << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
+ (u64)rrd->index1_off << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
+ (u64)rrd->index1_width << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
+ (u64)rrd->index2_off << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
+ (u64)rrd->index2_width << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
+ write_csr(dd, RCV_RSM_MATCH + (8 * rule_index),
+ (u64)rrd->mask1 << RCV_RSM_MATCH_MASK1_SHIFT |
+ (u64)rrd->value1 << RCV_RSM_MATCH_VALUE1_SHIFT |
+ (u64)rrd->mask2 << RCV_RSM_MATCH_MASK2_SHIFT |
+ (u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT);
+}
+
+/* return the number of RSM map table entries that will be used for QOS */
+static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+ unsigned int *np)
+{
+ int i;
+ unsigned int m, n;
u8 max_by_vl = 0;
- unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
- u64 *rsmmap;
- u64 reg;
- u8 rxcontext = is_ax(dd) ? 0 : 0xff; /* 0 is default if a0 ver. */
- /* validate */
+ /* is QOS active at all? */
if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
num_vls == 1 ||
krcvqsset <= 1)
- goto bail;
- for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
+ goto no_qos;
+
+ /* determine bits for qpn */
+ for (i = 0; i < min_t(unsigned int, num_vls, krcvqsset); i++)
if (krcvqs[i] > max_by_vl)
max_by_vl = krcvqs[i];
if (max_by_vl > 32)
- goto bail;
- qpns_per_vl = __roundup_pow_of_two(max_by_vl);
- /* determine bits vl */
- n = ilog2(num_vls);
- /* determine bits for qpn */
- m = ilog2(qpns_per_vl);
+ goto no_qos;
+ m = ilog2(__roundup_pow_of_two(max_by_vl));
+
+ /* determine bits for vl */
+ n = ilog2(__roundup_pow_of_two(num_vls));
+
+ /* reject if too much is used */
if ((m + n) > 7)
+ goto no_qos;
+
+ if (mp)
+ *mp = m;
+ if (np)
+ *np = n;
+
+ return 1 << (m + n);
+
+no_qos:
+ if (mp)
+ *mp = 0;
+ if (np)
+ *np = 0;
+ return 0;
+}
+
+/**
+ * init_qos - init RX qos
+ * @dd - device data
+ * @rmt - RSM map table
+ *
+ * This routine initializes Rule 0 and the RSM map table to implement
+ * quality of service (qos).
+ *
+ * If all of the limit tests succeed, qos is applied based on the array
+ * interpretation of krcvqs where entry 0 is VL0.
+ *
+ * The number of vl bits (n) and the number of qpn bits (m) are computed to
+ * feed both the RSM map table and the single rule.
+ */
+static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
+{
+ struct rsm_rule_data rrd;
+ unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
+ unsigned int rmt_entries;
+ u64 reg;
+
+ if (!rmt)
goto bail;
- if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
+ rmt_entries = qos_rmt_entries(dd, &m, &n);
+ if (rmt_entries == 0)
goto bail;
- rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
- if (!rsmmap)
+ qpns_per_vl = 1 << m;
+
+ /* enough room in the map table? */
+ rmt_entries = 1 << (m + n);
+ if (rmt->used + rmt_entries >= NUM_MAP_ENTRIES)
goto bail;
- memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
- /* init the local copy of the table */
- for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
+
+ /* add qos entries to the the RSM map table */
+ for (i = 0, ctxt = FIRST_KERNEL_KCTXT; i < num_vls; i++) {
unsigned tctxt;
for (qpn = 0, tctxt = ctxt;
krcvqs[i] && qpn < qpns_per_vl; qpn++) {
unsigned idx, regoff, regidx;
- /* generate index <= 128 */
- idx = (qpn << n) ^ i;
+ /* generate the index the hardware will produce */
+ idx = rmt->used + ((qpn << n) ^ i);
regoff = (idx % 8) * 8;
regidx = idx / 8;
- reg = rsmmap[regidx];
- /* replace 0xff with context number */
+ /* replace default with context number */
+ reg = rmt->map[regidx];
reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
<< regoff);
reg |= (u64)(tctxt++) << regoff;
- rsmmap[regidx] = reg;
+ rmt->map[regidx] = reg;
if (tctxt == ctxt + krcvqs[i])
tctxt = ctxt;
}
ctxt += krcvqs[i];
}
- /* flush cached copies to chip */
- for (i = 0; i < NUM_MAP_REGS; i++)
- write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
- /* add rule0 */
- write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
- RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK <<
- RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
- 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
- write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
- LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
- LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
- LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
- ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
- QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
- ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
- write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
- LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
- LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
- LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
- LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
- /* Enable RSM */
- add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
- kfree(rsmmap);
- /* map everything else to first context */
- init_qpmap_table(dd, FIRST_KERNEL_KCTXT, MIN_KERNEL_KCTXTS - 1);
+
+ rrd.offset = rmt->used;
+ rrd.pkt_type = 2;
+ rrd.field1_off = LRH_BTH_MATCH_OFFSET;
+ rrd.field2_off = LRH_SC_MATCH_OFFSET;
+ rrd.index1_off = LRH_SC_SELECT_OFFSET;
+ rrd.index1_width = n;
+ rrd.index2_off = QPN_SELECT_OFFSET;
+ rrd.index2_width = m + n;
+ rrd.mask1 = LRH_BTH_MASK;
+ rrd.value1 = LRH_BTH_VALUE;
+ rrd.mask2 = LRH_SC_MASK;
+ rrd.value2 = LRH_SC_VALUE;
+
+ /* add rule 0 */
+ add_rsm_rule(dd, 0, &rrd);
+
+ /* mark RSM map entries as used */
+ rmt->used += rmt_entries;
+ /* map everything else to the mcast/err/vl15 context */
+ init_qpmap_table(dd, HFI1_CTRL_CTXT, HFI1_CTRL_CTXT);
dd->qos_shift = n + 1;
return;
bail:
init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
}
+static void init_user_fecn_handling(struct hfi1_devdata *dd,
+ struct rsm_map_table *rmt)
+{
+ struct rsm_rule_data rrd;
+ u64 reg;
+ int i, idx, regoff, regidx;
+ u8 offset;
+
+ /* there needs to be enough room in the map table */
+ if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) {
+ dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
+ return;
+ }
+
+ /*
+ * RSM will extract the destination context as an index into the
+ * map table. The destination contexts are a sequential block
+ * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive).
+ * Map entries are accessed as offset + extracted value. Adjust
+ * the added offset so this sequence can be placed anywhere in
+ * the table - as long as the entries themselves do not wrap.
+ * There are only enough bits in offset for the table size, so
+ * start with that to allow for a "negative" offset.
+ */
+ offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
+ (int)dd->first_user_ctxt);
+
+ for (i = dd->first_user_ctxt, idx = rmt->used;
+ i < dd->num_rcv_contexts; i++, idx++) {
+ /* replace with identity mapping */
+ regoff = (idx % 8) * 8;
+ regidx = idx / 8;
+ reg = rmt->map[regidx];
+ reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK << regoff);
+ reg |= (u64)i << regoff;
+ rmt->map[regidx] = reg;
+ }
+
+ /*
+ * For RSM intercept of Expected FECN packets:
+ * o packet type 0 - expected
+ * o match on F (bit 95), using select/match 1, and
+ * o match on SH (bit 133), using select/match 2.
+ *
+ * Use index 1 to extract the 8-bit receive context from DestQP
+ * (start at bit 64). Use that as the RSM map table index.
+ */
+ rrd.offset = offset;
+ rrd.pkt_type = 0;
+ rrd.field1_off = 95;
+ rrd.field2_off = 133;
+ rrd.index1_off = 64;
+ rrd.index1_width = 8;
+ rrd.index2_off = 0;
+ rrd.index2_width = 0;
+ rrd.mask1 = 1;
+ rrd.value1 = 1;
+ rrd.mask2 = 1;
+ rrd.value2 = 1;
+
+ /* add rule 1 */
+ add_rsm_rule(dd, 1, &rrd);
+
+ rmt->used += dd->num_user_contexts;
+}
+
static void init_rxe(struct hfi1_devdata *dd)
{
+ struct rsm_map_table *rmt;
+
/* enable all receive errors */
write_csr(dd, RCV_ERR_MASK, ~0ull);
- /* setup QPN map table - start where VL15 context leaves off */
- init_qos(dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ?
- MIN_KERNEL_KCTXTS : 0);
+
+ rmt = alloc_rsm_map_table(dd);
+ /* set up QOS, including the QPN map table */
+ init_qos(dd, rmt);
+ init_user_fecn_handling(dd, rmt);
+ complete_rsm_map_table(dd, rmt);
+ kfree(rmt);
+
/*
* make sure RcvCtrl.RcvWcb <= PCIe Device Control
* Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
+ reg &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK;
write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
done:
return ret;
(dd->revision >> CCE_REVISION_SW_SHIFT)
& CCE_REVISION_SW_MASK);
+ /*
+ * The real cpu mask is part of the affinity struct but has to be
+ * initialized earlier than the rest of the affinity struct because it
+ * is needed to calculate the number of user contexts in
+ * set_up_context_variables(). However, hfi1_dev_affinity_init(),
+ * which initializes the rest of the affinity struct members,
+ * depends on set_up_context_variables() for the number of kernel
+ * contexts, so it cannot be called before set_up_context_variables().
+ */
+ ret = init_real_cpu_mask(dd);
+ if (ret)
+ goto bail_cleanup;
+
ret = set_up_context_variables(dd);
if (ret)
goto bail_cleanup;
/* set up KDETH QP prefix in both RX and TX CSRs */
init_kdeth_qp(dd);
- ret = hfi1_dev_affinity_init(dd);
- if (ret)
- goto bail_cleanup;
+ hfi1_dev_affinity_init(dd);
/* send contexts must be set up before receive contexts */
ret = init_send_contexts(dd);
#define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
+#define LINK_DOWN_REASON 0x16
/* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00
#define PWRM_BER_CONTROL 0x1
#define PWRM_BANDWIDTH_CONTROL 0x2
+/* 8051 link down reasons */
+#define LDR_LINK_TRANSFER_ACTIVE_LOW 0xa
+#define LDR_RECEIVED_LINKDOWN_IDLE_MSG 0xb
+#define LDR_RECEIVED_HOST_OFFLINE_REQ 0xc
+
/* verify capability fabric CRC size bits */
enum {
CAP_CRC_14B = (1 << 0), /* 14b CRC */
void handle_freeze(struct work_struct *work);
void handle_link_up(struct work_struct *work);
void handle_link_down(struct work_struct *work);
-void handle_8051_request(struct work_struct *work);
void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
#define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK 0x1ull
#define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT 0
#define RCV_RSM_CFG_PACKET_TYPE_SHIFT 60
+#define RCV_RSM_CFG_OFFSET_SHIFT 32
#define RCV_RSM_MAP_TABLE (RXE + 0x000000000900)
#define RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK 0xFFull
#define RCV_RSM_MATCH (RXE + 0x000000000800)
goto bail;
}
/* can only use kernel contexts */
- if (dd->send_contexts[dp->sw_index].type != SC_KERNEL) {
+ if (dd->send_contexts[dp->sw_index].type != SC_KERNEL &&
+ dd->send_contexts[dp->sw_index].type != SC_VL15) {
ret = -EINVAL;
goto bail;
}
unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO);
-MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is 8192");
+MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is " __stringify(
+ HFI1_DEFAULT_MAX_MTU));
unsigned int hfi1_cu = 1;
module_param_named(cu, hfi1_cu, uint, S_IRUGO);
if (resource & CR_DYN_MASK) {
/* a dynamic resource is in use if either HFI has set the bit */
- all_bits = resource_mask(0, resource) |
+ if (dd->pcidev->device == PCI_DEVICE_ID_INTEL0 &&
+ (resource & (CR_I2C1 | CR_I2C2))) {
+ /* discrete devices must serialize across both chains */
+ all_bits = resource_mask(0, CR_I2C1 | CR_I2C2) |
+ resource_mask(1, CR_I2C1 | CR_I2C2);
+ } else {
+ all_bits = resource_mask(0, resource) |
resource_mask(1, resource);
+ }
my_bit = resource_mask(dd->hfi1_id, resource);
} else {
/* non-dynamic resources are not split between HFIs */
#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
/* use this MTU size if none other is given */
-#define HFI1_DEFAULT_ACTIVE_MTU 8192
+#define HFI1_DEFAULT_ACTIVE_MTU 10240
/* use this MTU size as the default maximum */
-#define HFI1_DEFAULT_MAX_MTU 8192
+#define HFI1_DEFAULT_MAX_MTU 10240
/* default partition key */
#define DEFAULT_PKEY 0xffff
struct work_struct link_vc_work;
struct work_struct link_up_work;
struct work_struct link_down_work;
- struct work_struct dc_host_req_work;
struct work_struct sma_message_work;
struct work_struct freeze_work;
struct work_struct link_downgrade_work;
static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
{
return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
- ((!!(rhf & RHF_DC_INFO_MASK)) << 4);
+ ((!!(rhf & RHF_DC_INFO_SMASK)) << 4);
}
static inline u16 generate_jkey(kuid_t uid)
void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 pkey, u32 slid, u32 dlid, u8 sc5,
const struct ib_grh *old_grh);
+#define PKEY_CHECK_INVALID -1
+int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth,
+ u8 sc5, int8_t s_pkey_index);
#define PACKET_EGRESS_TIMEOUT 350
static inline void pause_for_credit_return(struct hfi1_devdata *dd)
#define HFI1_PKT_USER_SC_INTEGRITY \
(SEND_CTXT_CHECK_ENABLE_DISALLOW_NON_KDETH_PACKETS_SMASK \
+ | SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK \
| SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_SMASK \
| SEND_CTXT_CHECK_ENABLE_DISALLOW_GRH_SMASK)
struct cca_timer *cca_timer;
struct hfi1_pportdata *ppd;
int sl;
- u16 ccti, ccti_timer, ccti_min;
+ u16 ccti_timer, ccti_min;
struct cc_state *cc_state;
unsigned long flags;
+ enum hrtimer_restart ret = HRTIMER_NORESTART;
cca_timer = container_of(t, struct cca_timer, hrtimer);
ppd = cca_timer->ppd;
spin_lock_irqsave(&ppd->cca_timer_lock, flags);
- ccti = cca_timer->ccti;
-
- if (ccti > ccti_min) {
+ if (cca_timer->ccti > ccti_min) {
cca_timer->ccti--;
set_link_ipg(ppd);
}
- spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
-
- rcu_read_unlock();
-
- if (ccti > ccti_min) {
+ if (cca_timer->ccti > ccti_min) {
unsigned long nsec = 1024 * ccti_timer;
/* ccti_timer is in units of 1.024 usec */
hrtimer_forward_now(t, ns_to_ktime(nsec));
- return HRTIMER_RESTART;
+ ret = HRTIMER_RESTART;
}
- return HRTIMER_NORESTART;
+
+ spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
+ rcu_read_unlock();
+ return ret;
}
/*
INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
INIT_WORK(&ppd->link_up_work, handle_link_up);
INIT_WORK(&ppd->link_down_work, handle_link_down);
- INIT_WORK(&ppd->dc_host_req_work, handle_8051_request);
INIT_WORK(&ppd->freeze_work, handle_freeze);
INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
INIT_WORK(&ppd->sma_message_work, handle_sma_message);
free_percpu(dd->rcv_limit);
hfi1_dev_affinity_free(dd);
free_percpu(dd->send_schedule);
- ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
+ rvt_dealloc_device(&dd->verbs_dev.rdi);
}
/*
bail:
if (!list_empty(&dd->list))
list_del_init(&dd->list);
- ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
+ rvt_dealloc_device(&dd->verbs_dev.rdi);
return ERR_PTR(ret);
}
break;
}
- set_link_state(ppd, link_state);
+ if ((link_state == HLS_DN_POLL ||
+ link_state == HLS_DN_DOWNDEF)) {
+ /*
+ * Going to poll. No matter what the current state,
+ * always move offline first, then tune and start the
+ * link. This correctly handles a FM link bounce and
+ * a link enable. Going offline is a no-op if already
+ * offline.
+ */
+ set_link_state(ppd, HLS_DN_OFFLINE);
+ tune_serdes(ppd);
+ start_link(ppd);
+ } else {
+ set_link_state(ppd, link_state);
+ }
if (link_state == HLS_DN_DISABLE &&
(ppd->offline_disabled_reason >
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
static unsigned long mmu_node_last(struct mmu_rb_node *node)
{
- return PAGE_ALIGN((node->addr & PAGE_MASK) + node->len) - 1;
+ return PAGE_ALIGN(node->addr + node->len) - 1;
}
int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
if (!handler)
return;
+ /* Unregister first so we don't get any more notifications. */
+ if (current->mm)
+ mmu_notifier_unregister(&handler->mn, current->mm);
+
spin_lock_irqsave(&mmu_rb_lock, flags);
list_del(&handler->list);
spin_unlock_irqrestore(&mmu_rb_lock, flags);
+ spin_lock_irqsave(&handler->lock, flags);
if (!RB_EMPTY_ROOT(root)) {
struct rb_node *node;
struct mmu_rb_node *rbnode;
handler->ops->remove(root, rbnode, NULL);
}
}
+ spin_unlock_irqrestore(&handler->lock, flags);
- if (current->mm)
- mmu_notifier_unregister(&handler->mn, current->mm);
kfree(handler);
}
return node;
}
+struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *root,
+ unsigned long addr, unsigned long len)
+{
+ struct mmu_rb_handler *handler = find_mmu_handler(root);
+ struct mmu_rb_node *node;
+ unsigned long flags;
+
+ if (!handler)
+ return ERR_PTR(-EINVAL);
+
+ spin_lock_irqsave(&handler->lock, flags);
+ node = __mmu_rb_search(handler, addr, len);
+ if (node)
+ __mmu_int_rb_remove(node, handler->root);
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+ return node;
+}
+
void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
{
struct mmu_rb_handler *handler = find_mmu_handler(root);
hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
node->addr, node->len);
if (handler->ops->invalidate(root, node)) {
- spin_unlock_irqrestore(&handler->lock, flags);
- __mmu_rb_remove(handler, node, mm);
- spin_lock_irqsave(&handler->lock, flags);
+ __mmu_int_rb_remove(node, root);
+ if (handler->ops->remove)
+ handler->ops->remove(root, node, mm);
}
}
spin_unlock_irqrestore(&handler->lock, flags);
void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
unsigned long);
+struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *, unsigned long,
+ unsigned long);
#endif /* _HFI1_MMU_RB_H */
/* Send Context Size (SCS) wildcards */
#define SCS_POOL_0 -1
#define SCS_POOL_1 -2
+
/* Send Context Count (SCC) wildcards */
#define SCC_PER_VL -1
#define SCC_PER_CPU -2
-
#define SCC_PER_KRCVQ -3
-#define SCC_ACK_CREDITS 32
+
+/* Send Context Size (SCS) constants */
+#define SCS_ACK_CREDITS 32
+#define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */
+
+#define PIO_THRESHOLD_CEILING 4096
#define PIO_WAIT_BATCH_SIZE 5
/* default send context sizes */
static struct sc_config_sizes sc_config_sizes[SC_MAX] = {
[SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */
- .count = SCC_PER_VL },/* one per NUMA */
- [SC_ACK] = { .size = SCC_ACK_CREDITS,
+ .count = SCC_PER_VL }, /* one per NUMA */
+ [SC_ACK] = { .size = SCS_ACK_CREDITS,
.count = SCC_PER_KRCVQ },
[SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */
.count = SCC_PER_CPU }, /* one per CPU */
+ [SC_VL15] = { .size = SCS_VL15_CREDITS,
+ .count = 1 },
};
static const char *sc_type_names[SC_MAX] = {
"kernel",
"ack",
- "user"
+ "user",
+ "vl15"
};
static const char *sc_type_name(int index)
int extra;
int i;
+ /*
+ * When SDMA is enabled, kernel context pio packet size is capped by
+ * "piothreshold". Reduce pio buffer allocation for kernel context by
+ * setting it to a fixed size. The allocation allows 3-deep buffering
+ * of the largest pio packets plus up to 128 bytes header, sufficient
+ * to maintain verbs performance.
+ *
+ * When SDMA is disabled, keep the default pooling allocation.
+ */
+ if (HFI1_CAP_IS_KSET(SDMA)) {
+ u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ?
+ piothreshold : PIO_THRESHOLD_CEILING;
+ sc_config_sizes[SC_KERNEL].size =
+ 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE;
+ }
+
/*
* Step 0:
* - copy the centipercents/absolute sizes from the pool config
if (i == SC_ACK) {
count = dd->n_krcv_queues;
} else if (i == SC_KERNEL) {
- count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */;
+ count = INIT_SC_PER_VL * num_vls;
} else if (count == SCC_PER_CPU) {
count = dd->num_rcv_contexts - dd->n_krcv_queues;
} else if (count < 0) {
* Return value is what to write into the CSR: trigger return when
* unreturned credits pass this count.
*/
-static u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
+u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
{
return (sc->credits * percent) / 100;
}
* For Ack contexts, set a threshold for half the credits.
* For User contexts use the given percentage. This has been
* sanitized on driver start-up.
- * For Kernel contexts, use the default MTU plus a header.
+ * For Kernel contexts, use the default MTU plus a header
+ * or half the credits, whichever is smaller. This should
+ * work for both the 3-deep buffering allocation and the
+ * pooling allocation.
*/
if (type == SC_ACK) {
thresh = sc_percent_to_threshold(sc, 50);
thresh = sc_percent_to_threshold(sc,
user_credit_return_threshold);
} else { /* kernel */
- thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize);
+ thresh = min(sc_percent_to_threshold(sc, 50),
+ sc_mtu_to_threshold(sc, hfi1_max_mtu,
+ hdrqentsize));
}
reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT);
/* add in early return */
unsigned long flags;
unsigned i, n = 0;
- if (dd->send_contexts[sc->sw_index].type != SC_KERNEL)
+ if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
+ dd->send_contexts[sc->sw_index].type != SC_VL15)
return;
list = &sc->piowait;
/*
u32 ctxt;
struct hfi1_pportdata *ppd = dd->pport;
- dd->vld[15].sc = sc_alloc(dd, SC_KERNEL,
+ dd->vld[15].sc = sc_alloc(dd, SC_VL15,
dd->rcd[0]->rcvhdrqentsize, dd->node);
if (!dd->vld[15].sc)
goto nomem;
#define SC_KERNEL 0
#define SC_ACK 1
#define SC_USER 2
-#define SC_MAX 3
+#define SC_VL15 3
+#define SC_MAX 4
/* invalid send context index */
#define INVALID_SCI 0xff
void sc_add_credit_return_intr(struct send_context *sc);
void sc_del_credit_return_intr(struct send_context *sc);
void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold);
+u32 sc_percent_to_threshold(struct send_context *sc, u32 percent);
u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize);
void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint);
void sc_wait(struct hfi1_devdata *dd);
if (ret)
return ret;
- if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
- cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
- else
- cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+ cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
- if (cable_power_class <= 3 && cable_power_class > (power_class_max - 1))
- ppd->offline_disabled_reason =
- HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
- else if (cable_power_class > 4 && cable_power_class > (power_class_max))
+ if (cable_power_class > power_class_max)
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
- /*
- * cable_power_class will never have value 4 as this simply
- * means the high power settings are unused
- */
if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
u8 *cache = ppd->qsfp_info.cache;
int ret;
- if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
- cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
- else
- cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+ cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
- if (cable_power_class) {
+ if (cable_power_class > QSFP_POWER_CLASS_1) {
power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS];
power_ctrl_byte |= 1;
if (ret != 1)
return -EIO;
- if (cable_power_class > 3) {
- /* > power class 4*/
+ if (cable_power_class > QSFP_POWER_CLASS_4) {
power_ctrl_byte |= (1 << 2);
ret = qsfp_write(ppd, ppd->dd->hfi1_id,
QSFP_PWR_CTRL_BYTE_OFFS,
{
u32 rx_preset;
u8 *cache = ppd->qsfp_info.cache;
+ int cable_power_class;
if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) &&
(cache[QSFP_CDR_INFO_OFFS] & 0x40)))
return;
- /* rx_preset preset to zero to catch error */
+ /* RX CDR present, bypass supported */
+ cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
+
+ if (cable_power_class <= QSFP_POWER_CLASS_3) {
+ /* Power class <= 3, ignore config & turn RX CDR on */
+ *cdr_ctrl_byte |= 0xF;
+ return;
+ }
+
get_platform_config_field(
ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
static void apply_tx_cdr(struct hfi1_pportdata *ppd,
u32 tx_preset_index,
- u8 *ctr_ctrl_byte)
+ u8 *cdr_ctrl_byte)
{
u32 tx_preset;
u8 *cache = ppd->qsfp_info.cache;
+ int cable_power_class;
if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) &&
(cache[QSFP_CDR_INFO_OFFS] & 0x80)))
return;
+ /* TX CDR present, bypass supported */
+ cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
+
+ if (cable_power_class <= QSFP_POWER_CLASS_3) {
+ /* Power class <= 3, ignore config & turn TX CDR on */
+ *cdr_ctrl_byte |= 0xF0;
+ return;
+ }
+
get_platform_config_field(
ppd->dd,
PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
(tx_preset << 2) | (tx_preset << 3));
if (tx_preset)
- *ctr_ctrl_byte |= (tx_preset << 4);
+ *cdr_ctrl_byte |= (tx_preset << 4);
else
/* Preserve current/determined RX CDR status */
- *ctr_ctrl_byte &= ((tx_preset << 4) | 0xF);
+ *cdr_ctrl_byte &= ((tx_preset << 4) | 0xF);
}
static void apply_cdr_settings(
"Applying TX settings");
}
+/* Must be holding the QSFP i2c resource */
static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
u32 *ptr_rx_preset, u32 *ptr_total_atten)
{
u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
u8 *cache = ppd->qsfp_info.cache;
- ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
- if (ret) {
- dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
- __func__, (int)ppd->dd->hfi1_id);
- return ret;
- }
-
ppd->qsfp_info.limiting_active = 1;
ret = set_qsfp_tx(ppd, 0);
if (ret)
- goto bail_unlock;
+ return ret;
ret = qual_power(ppd);
if (ret)
- goto bail_unlock;
+ return ret;
ret = qual_bitrate(ppd);
if (ret)
- goto bail_unlock;
+ return ret;
if (ppd->qsfp_info.reset_needed) {
reset_qsfp(ppd);
ret = set_qsfp_high_power(ppd);
if (ret)
- goto bail_unlock;
+ return ret;
if (cache[QSFP_EQ_INFO_OFFS] & 0x4) {
ret = get_platform_config_field(
ptr_tx_preset, 4);
if (ret) {
*ptr_tx_preset = OPA_INVALID_INDEX;
- goto bail_unlock;
+ return ret;
}
} else {
ret = get_platform_config_field(
ptr_tx_preset, 4);
if (ret) {
*ptr_tx_preset = OPA_INVALID_INDEX;
- goto bail_unlock;
+ return ret;
}
}
PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4);
if (ret) {
*ptr_rx_preset = OPA_INVALID_INDEX;
- goto bail_unlock;
+ return ret;
}
if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
ret = set_qsfp_tx(ppd, 1);
-bail_unlock:
- release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
return ret;
}
total_atten = platform_atten + remote_atten;
tuning_method = OPA_PASSIVE_TUNING;
- } else
+ } else {
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG);
+ goto bail;
+ }
break;
case PORT_TYPE_QSFP:
if (qsfp_mod_present(ppd)) {
+ ret = acquire_chip_resource(ppd->dd,
+ qsfp_resource(ppd->dd),
+ QSFP_WAIT);
+ if (ret) {
+ dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
+ __func__, (int)ppd->dd->hfi1_id);
+ goto bail;
+ }
refresh_qsfp_cache(ppd, &ppd->qsfp_info);
if (ppd->qsfp_info.cache_valid) {
* update the cache to reflect the changes
*/
refresh_qsfp_cache(ppd, &ppd->qsfp_info);
- if (ret)
- goto bail;
-
limiting_active =
ppd->qsfp_info.limiting_active;
} else {
dd_dev_err(dd,
"%s: Reading QSFP memory failed\n",
__func__);
- goto bail;
+ ret = -EINVAL; /* a fail indication */
}
- } else
+ release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+ if (ret)
+ goto bail;
+ } else {
ppd->offline_disabled_reason =
HFI1_ODR_MASK(
OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+ goto bail;
+ }
break;
default:
dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
*/
static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
{
- int val = opa_mtu_enum_to_int((int)mtu);
+ int val;
+ /* Constraining 10KB packets to 8KB packets */
+ if (mtu == (enum ib_mtu)OPA_MTU_10240)
+ mtu = OPA_MTU_8192;
+ val = opa_mtu_enum_to_int((int)mtu);
if (val > 0)
return val;
return ib_mtu_enum_to_int(mtu);
{
int ret;
- if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+ if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
/* make sure the TWSI bus is in a sane state */
{
int ret;
- if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+ if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
/* make sure the TWSI bus is in a sane state */
int ret;
u8 page;
- if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+ if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
/* make sure the TWSI bus is in a sane state */
int ret;
u8 page;
- if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+ if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
/* make sure the TWSI bus is in a sane state */
* The calls to qsfp_{read,write} in this function correctly handle the
* address map difference between this mapping and the mapping implemented
* by those functions
+ *
+ * The caller must be holding the QSFP i2c chain resource.
*/
int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
{
if (!qsfp_mod_present(ppd)) {
ret = -ENODEV;
- goto bail_no_release;
+ goto bail;
}
- ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
- if (ret)
- goto bail_no_release;
-
ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE);
if (ret != QSFP_PAGESIZE) {
dd_dev_info(ppd->dd,
}
}
- release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
-
spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
ppd->qsfp_info.cache_valid = 1;
ppd->qsfp_info.cache_refresh_required = 0;
return 0;
bail:
- release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
-bail_no_release:
memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
return ret;
}
#define QSFP_DUMP_CHUNK 16 /* Holds longest string */
#define QSFP_DEFAULT_HDR_CNT 224
-static const char *pwr_codes = "1.5W2.0W2.5W3.5W";
+#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
+#define QSFP_HIGH_PWR(pbyte) ((pbyte) & 3)
+/* For use with QSFP_HIGH_PWR macro */
+#define QSFP_HIGH_PWR_UNUSED 0 /* Bits [1:0] = 00 implies low power module */
+
+/*
+ * Takes power class byte [Page 00 Byte 129] in SFF 8636
+ * Returns power class as integer (1 through 7, per SFF 8636 rev 2.4)
+ */
+int get_qsfp_power_class(u8 power_byte)
+{
+ if (QSFP_HIGH_PWR(power_byte) == QSFP_HIGH_PWR_UNUSED)
+ /* power classes count from 1, their bit encodings from 0 */
+ return (QSFP_PWR(power_byte) + 1);
+ /*
+ * 00 in the high power classes stands for unused, bringing
+ * balance to the off-by-1 offset above, we add 4 here to
+ * account for the difference between the low and high power
+ * groups
+ */
+ return (QSFP_HIGH_PWR(power_byte) + 4);
+}
int qsfp_mod_present(struct hfi1_pportdata *ppd)
{
return ret;
}
+static const char *pwr_codes[8] = {"N/AW",
+ "1.5W",
+ "2.0W",
+ "2.5W",
+ "3.5W",
+ "4.0W",
+ "4.5W",
+ "5.0W"
+ };
+
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
{
u8 *cache = &ppd->qsfp_info.cache[0];
int bidx = 0;
u8 *atten = &cache[QSFP_ATTEN_OFFS];
u8 *vendor_oui = &cache[QSFP_VOUI_OFFS];
+ u8 power_byte = 0;
sofar = 0;
lenstr[0] = ' ';
if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
+ power_byte = cache[QSFP_MOD_PWR_OFFS];
sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n",
- pwr_codes +
- (QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]) * 4));
+ pwr_codes[get_qsfp_power_class(power_byte)]);
sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n",
lenstr,
/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
#define QSFP_MOD_ID_OFFS 128
/*
- * Byte 129 is "Extended Identifier". We only care about D7,D6: Power class
- * 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W
+ * Byte 129 is "Extended Identifier".
+ * For bits [7:6]: 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W
+ * For bits [1:0]: 0:Unused, 1:4W, 2:4.5W, 3:5W
*/
#define QSFP_MOD_PWR_OFFS 129
/* Byte 130 is Connector type. Not Intel req'd */
#define QSFP_HIGH_BIAS_WARNING 0x22
#define QSFP_LOW_BIAS_WARNING 0x11
+#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
+#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
+
/*
* struct qsfp_data encapsulates state of QSFP device for one port.
* it will be part of port-specific data if a board supports QSFP.
* and let the qsfp_lock arbitrate access to common resources.
*
*/
-
-#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
-#define QSFP_HIGH_PWR(pbyte) (((pbyte) & 3) | 4)
-#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
-#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
-
struct qsfp_data {
/* Helps to find our way */
struct hfi1_pportdata *ppd;
int refresh_qsfp_cache(struct hfi1_pportdata *ppd,
struct qsfp_data *cp);
+int get_qsfp_power_class(u8 power_byte);
int qsfp_mod_present(struct hfi1_pportdata *ppd);
int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr,
u32 len, u8 *data);
/* Ignore reserved NAK codes. */
goto bail_stop;
}
- return ret;
+ /* cannot be reached */
bail_stop:
hfi1_stop_rc_timers(qp);
return ret;
if (sl >= OPA_MAX_SLS)
return;
- cca_timer = &ppd->cca_timer[sl];
-
cc_state = get_cc_state(ppd);
if (!cc_state)
spin_lock_irqsave(&ppd->cca_timer_lock, flags);
+ cca_timer = &ppd->cca_timer[sl];
if (cca_timer->ccti < ccti_limit) {
if (cca_timer->ccti + ccti_incr <= ccti_limit)
cca_timer->ccti += ccti_incr;
set_link_ipg(ppd);
}
- spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
-
ccti = cca_timer->ccti;
if (!hrtimer_active(&cca_timer->hrtimer)) {
HRTIMER_MODE_REL);
}
+ spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
+
if ((trigger_threshold != 0) && (ccti >= trigger_threshold))
log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type);
}
struct hfi1_pkt_state ps;
struct hfi1_qp_priv *priv = qp->priv;
int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
- unsigned long flags;
unsigned long timeout;
unsigned long timeout_int;
int cpu;
timeout_int = SEND_RESCHED_TIMEOUT;
}
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irqsave(&qp->s_lock, ps.flags);
/* Return if we are already busy processing a work request. */
if (!hfi1_send_ok(qp)) {
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irqrestore(&qp->s_lock, ps.flags);
return;
}
do {
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/*
* If the packet cannot be sent now, return and
* the send tasklet will be woken up later.
if (unlikely(time_after(jiffies, timeout))) {
if (workqueue_congested(cpu,
ps.ppd->hfi1_wq)) {
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irqsave(
+ &qp->s_lock,
+ ps.flags);
qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp);
- spin_unlock_irqrestore(&qp->s_lock,
- flags);
+ spin_unlock_irqrestore(
+ &qp->s_lock,
+ ps.flags);
this_cpu_inc(
*ps.ppd->dd->send_schedule);
return;
}
timeout = jiffies + (timeout_int) / 8;
}
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irqsave(&qp->s_lock, ps.flags);
}
} while (make_req(qp, &ps));
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irqrestore(&qp->s_lock, ps.flags);
}
/*
rcu_read_unlock();
return -EINVAL;
}
- memcpy(buf, &cc_state->cct, count);
+ memcpy(buf, (void *)&cc_state->cct + pos, count);
rcu_read_unlock();
return count;
rcu_read_unlock();
return -EINVAL;
}
- memcpy(buf, &cc_state->cong_setting, count);
+ memcpy(buf, (void *)&cc_state->cong_setting + pos, count);
rcu_read_unlock();
return count;
(lid == ppd->lid ||
(lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
qp->ibqp.qp_type == IB_QPT_GSI)))) {
- unsigned long flags;
+ unsigned long tflags = ps->flags;
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
goto bail;
}
qp->s_cur = next_cur;
- local_irq_save(flags);
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irqrestore(&qp->s_lock, tflags);
ud_loopback(qp, wqe);
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irqsave(&qp->s_lock, tflags);
+ ps->flags = tflags;
hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done_free_tx;
}
* pages, accept the amount pinned so far and program only that.
* User space knows how to deal with partially programmed buffers.
*/
- if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages))
- return -ENOMEM;
+ if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages);
if (pinned <= 0) {
ret = pinned;
u64 offset;
};
+#define SDMA_CACHE_NODE_EVICT BIT(0)
+
struct sdma_mmu_node {
struct mmu_rb_node rb;
struct list_head list;
atomic_t refcount;
struct page **pages;
unsigned npages;
+ unsigned long flags;
};
struct user_sdma_request {
goto free_req;
}
+ /* Checking P_KEY for requests from user-space */
+ if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc,
+ PKEY_CHECK_INVALID)) {
+ ret = -EINVAL;
+ goto free_req;
+ }
+
/*
* Also should check the BTH.lnh. If it says the next header is GRH then
* the RXE parsing will be off and will land in the middle of the KDETH
return 1 + ((epage - spage) >> PAGE_SHIFT);
}
-/* Caller must hold pq->evict_lock */
static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
{
u32 cleared = 0;
struct sdma_mmu_node *node, *ptr;
+ struct list_head to_evict = LIST_HEAD_INIT(to_evict);
+ spin_lock(&pq->evict_lock);
list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
/* Make sure that no one is still using the node. */
if (!atomic_read(&node->refcount)) {
- /*
- * Need to use the page count now as the remove callback
- * will free the node.
- */
+ set_bit(SDMA_CACHE_NODE_EVICT, &node->flags);
+ list_del_init(&node->list);
+ list_add(&node->list, &to_evict);
cleared += node->npages;
- spin_unlock(&pq->evict_lock);
- hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
- spin_lock(&pq->evict_lock);
if (cleared >= npages)
break;
}
}
+ spin_unlock(&pq->evict_lock);
+
+ list_for_each_entry_safe(node, ptr, &to_evict, list)
+ hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
+
return cleared;
}
struct sdma_mmu_node *node = NULL;
struct mmu_rb_node *rb_node;
- rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
- (unsigned long)iovec->iov.iov_base,
- iovec->iov.iov_len);
+ rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root,
+ (unsigned long)iovec->iov.iov_base,
+ iovec->iov.iov_len);
if (rb_node && !IS_ERR(rb_node))
node = container_of(rb_node, struct sdma_mmu_node, rb);
else
return -ENOMEM;
node->rb.addr = (unsigned long)iovec->iov.iov_base;
- node->rb.len = iovec->iov.iov_len;
node->pq = pq;
atomic_set(&node->refcount, 0);
INIT_LIST_HEAD(&node->list);
memcpy(pages, node->pages, node->npages * sizeof(*pages));
npages -= node->npages;
+
+ /*
+ * If rb_node is NULL, it means that this is brand new node
+ * and, therefore not on the eviction list.
+ * If, however, the rb_node is non-NULL, it means that the
+ * node is already in RB tree and, therefore on the eviction
+ * list (nodes are unconditionally inserted in the eviction
+ * list). In that case, we have to remove the node prior to
+ * calling the eviction function in order to prevent it from
+ * freeing this node.
+ */
+ if (rb_node) {
+ spin_lock(&pq->evict_lock);
+ list_del_init(&node->list);
+ spin_unlock(&pq->evict_lock);
+ }
retry:
if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
- spin_lock(&pq->evict_lock);
cleared = sdma_cache_evict(pq, npages);
- spin_unlock(&pq->evict_lock);
if (cleared >= npages)
goto retry;
}
goto bail;
}
kfree(node->pages);
+ node->rb.len = iovec->iov.iov_len;
node->pages = pages;
node->npages += pinned;
npages = node->npages;
spin_lock(&pq->evict_lock);
- if (!rb_node)
- list_add(&node->list, &pq->evict);
- else
- list_move(&node->list, &pq->evict);
+ list_add(&node->list, &pq->evict);
pq->n_locked += pinned;
spin_unlock(&pq->evict_lock);
}
iovec->pages = node->pages;
iovec->npages = npages;
- if (!rb_node) {
- ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
- if (ret) {
- spin_lock(&pq->evict_lock);
+ ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
+ if (ret) {
+ spin_lock(&pq->evict_lock);
+ if (!list_empty(&node->list))
list_del(&node->list);
- pq->n_locked -= node->npages;
- spin_unlock(&pq->evict_lock);
- ret = 0;
- goto bail;
- }
- } else {
- atomic_inc(&node->refcount);
+ pq->n_locked -= node->npages;
+ spin_unlock(&pq->evict_lock);
+ goto bail;
}
return 0;
bail:
- if (!rb_node)
- kfree(node);
+ if (rb_node)
+ unpin_vector_pages(current->mm, node->pages, 0, node->npages);
+ kfree(node);
return ret;
}
container_of(mnode, struct sdma_mmu_node, rb);
spin_lock(&node->pq->evict_lock);
- list_del(&node->list);
+ /*
+ * We've been called by the MMU notifier but this node has been
+ * scheduled for eviction. The eviction function will take care
+ * of freeing this node.
+ * We have to take the above lock first because we are racing
+ * against the setting of the bit in the eviction function.
+ */
+ if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) {
+ spin_unlock(&node->pq->evict_lock);
+ return;
+ }
+
+ if (!list_empty(&node->list))
+ list_del(&node->list);
node->pq->n_locked -= node->npages;
spin_unlock(&node->pq->evict_lock);
if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
goto dropit;
- if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
+ if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
(opcode == IB_OPCODE_CNP))
return 1;
dropit:
/*
* egress_pkey_matches_entry - return 1 if the pkey matches ent (ent
- * being an entry from the ingress partition key table), return 0
+ * being an entry from the partition key table), return 0
* otherwise. Use the matching criteria for egress partition keys
* specified in the OPAv1 spec., section 9.1l.7.
*/
static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
{
u16 mkey = pkey & PKEY_LOW_15_MASK;
- u16 ment = ent & PKEY_LOW_15_MASK;
+ u16 mentry = ent & PKEY_LOW_15_MASK;
- if (mkey == ment) {
+ if (mkey == mentry) {
/*
* If pkey[15] is set (full partition member),
* is bit 15 in the corresponding table element
return 0;
}
-/*
- * egress_pkey_check - return 0 if hdr's pkey matches according to the
- * criteria in the OPAv1 spec., section 9.11.7.
+/**
+ * egress_pkey_check - check P_KEY of a packet
+ * @ppd: Physical IB port data
+ * @lrh: Local route header
+ * @bth: Base transport header
+ * @sc5: SC for packet
+ * @s_pkey_index: It will be used for look up optimization for kernel contexts
+ * only. If it is negative value, then it means user contexts is calling this
+ * function.
+ *
+ * It checks if hdr's pkey is valid.
+ *
+ * Return: 0 on success, otherwise, 1
*/
-static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
- struct hfi1_ib_header *hdr,
- struct rvt_qp *qp)
+int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth,
+ u8 sc5, int8_t s_pkey_index)
{
- struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
struct hfi1_devdata *dd;
- int i = 0;
+ int i;
u16 pkey;
- u8 lnh, sc5 = priv->s_sc;
+ int is_user_ctxt_mechanism = (s_pkey_index < 0);
if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
return 0;
- /* locate the pkey within the headers */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
- if (lnh == HFI1_LRH_GRH)
- ohdr = &hdr->u.l.oth;
- else
- ohdr = &hdr->u.oth;
-
- pkey = (u16)be32_to_cpu(ohdr->bth[0]);
+ pkey = (u16)be32_to_cpu(bth[0]);
/* If SC15, pkey[0:14] must be 0x7fff */
if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
if ((pkey & PKEY_LOW_15_MASK) == 0)
goto bad;
- /* The most likely matching pkey has index qp->s_pkey_index */
- if (unlikely(!egress_pkey_matches_entry(pkey,
- ppd->pkeys
- [qp->s_pkey_index]))) {
- /* no match - try the entire table */
- for (; i < MAX_PKEY_VALUES; i++) {
- if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
- break;
- }
+ /*
+ * For the kernel contexts only, if a qp is passed into the function,
+ * the most likely matching pkey has index qp->s_pkey_index
+ */
+ if (!is_user_ctxt_mechanism &&
+ egress_pkey_matches_entry(pkey, ppd->pkeys[s_pkey_index])) {
+ return 0;
}
- if (i < MAX_PKEY_VALUES)
- return 0;
+ for (i = 0; i < MAX_PKEY_VALUES; i++) {
+ if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
+ return 0;
+ }
bad:
- incr_cntr64(&ppd->port_xmit_constraint_errors);
- dd = ppd->dd;
- if (!(dd->err_info_xmit_constraint.status & OPA_EI_STATUS_SMASK)) {
- u16 slid = be16_to_cpu(hdr->lrh[3]);
-
- dd->err_info_xmit_constraint.status |= OPA_EI_STATUS_SMASK;
- dd->err_info_xmit_constraint.slid = slid;
- dd->err_info_xmit_constraint.pkey = pkey;
+ /*
+ * For the user-context mechanism, the P_KEY check would only happen
+ * once per SDMA request, not once per packet. Therefore, there's no
+ * need to increment the counter for the user-context mechanism.
+ */
+ if (!is_user_ctxt_mechanism) {
+ incr_cntr64(&ppd->port_xmit_constraint_errors);
+ dd = ppd->dd;
+ if (!(dd->err_info_xmit_constraint.status &
+ OPA_EI_STATUS_SMASK)) {
+ u16 slid = be16_to_cpu(lrh[3]);
+
+ dd->err_info_xmit_constraint.status |=
+ OPA_EI_STATUS_SMASK;
+ dd->err_info_xmit_constraint.slid = slid;
+ dd->err_info_xmit_constraint.pkey = pkey;
+ }
}
return 1;
}
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_other_headers *ohdr;
+ struct hfi1_ib_header *hdr;
send_routine sr;
int ret;
+ u8 lnh;
+
+ hdr = &ps->s_txreq->phdr.hdr;
+ /* locate the pkey within the headers */
+ lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ if (lnh == HFI1_LRH_GRH)
+ ohdr = &hdr->u.l.oth;
+ else
+ ohdr = &hdr->u.oth;
sr = get_send_routine(qp, ps->s_txreq);
- ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp);
+ ret = egress_pkey_check(dd->pport,
+ hdr->lrh,
+ ohdr->bth,
+ priv->s_sc,
+ qp->s_pkey_index);
if (unlikely(ret)) {
/*
* The value we are returning here does not get propagated to
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq;
+ unsigned long flags;
};
#define HFI1_PSN_CREDIT 16
#endif
#define PSN_MODIFY_MASK 0xFFFFFF
-/* Number of bits to pay attention to in the opcode for checking qp type */
-#define OPCODE_QP_MASK 0xE0
-
/*
* Compare the lower 24 bits of the msn values.
* Returns an integer <, ==, or > than zero.
transport_handle_queue_full(cmd, cmd->se_dev);
}
-static inline void transport_free_sgl(struct scatterlist *sgl, int nents)
+void target_free_sgl(struct scatterlist *sgl, int nents)
{
struct scatterlist *sg;
int count;
kfree(sgl);
}
+EXPORT_SYMBOL(target_free_sgl);
static inline void transport_reset_sgl_orig(struct se_cmd *cmd)
{
static inline void transport_free_pages(struct se_cmd *cmd)
{
if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) {
- transport_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents);
+ target_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents);
cmd->t_prot_sg = NULL;
cmd->t_prot_nents = 0;
}
* SG_TO_MEM_NOALLOC to function with COMPARE_AND_WRITE
*/
if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) {
- transport_free_sgl(cmd->t_bidi_data_sg,
+ target_free_sgl(cmd->t_bidi_data_sg,
cmd->t_bidi_data_nents);
cmd->t_bidi_data_sg = NULL;
cmd->t_bidi_data_nents = 0;
}
transport_reset_sgl_orig(cmd);
- transport_free_sgl(cmd->t_data_sg, cmd->t_data_nents);
+ target_free_sgl(cmd->t_data_sg, cmd->t_data_nents);
cmd->t_data_sg = NULL;
cmd->t_data_nents = 0;
- transport_free_sgl(cmd->t_bidi_data_sg, cmd->t_bidi_data_nents);
+ target_free_sgl(cmd->t_bidi_data_sg, cmd->t_bidi_data_nents);
cmd->t_bidi_data_sg = NULL;
cmd->t_bidi_data_nents = 0;
}
int
target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, u32 length,
- bool zero_page)
+ bool zero_page, bool chainable)
{
struct scatterlist *sg;
struct page *page;
gfp_t zero_flag = (zero_page) ? __GFP_ZERO : 0;
- unsigned int nent;
+ unsigned int nalloc, nent;
int i = 0;
- nent = DIV_ROUND_UP(length, PAGE_SIZE);
- sg = kmalloc(sizeof(struct scatterlist) * nent, GFP_KERNEL);
+ nalloc = nent = DIV_ROUND_UP(length, PAGE_SIZE);
+ if (chainable)
+ nalloc++;
+ sg = kmalloc_array(nalloc, sizeof(struct scatterlist), GFP_KERNEL);
if (!sg)
return -ENOMEM;
- sg_init_table(sg, nent);
+ sg_init_table(sg, nalloc);
while (length) {
u32 page_len = min_t(u32, length, PAGE_SIZE);
kfree(sg);
return -ENOMEM;
}
+EXPORT_SYMBOL(target_alloc_sgl);
/*
* Allocate any required resources to execute the command. For writes we
if (cmd->prot_op != TARGET_PROT_NORMAL &&
!(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) {
ret = target_alloc_sgl(&cmd->t_prot_sg, &cmd->t_prot_nents,
- cmd->prot_length, true);
+ cmd->prot_length, true, false);
if (ret < 0)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
ret = target_alloc_sgl(&cmd->t_bidi_data_sg,
&cmd->t_bidi_data_nents,
- bidi_length, zero_flag);
+ bidi_length, zero_flag, false);
if (ret < 0)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents,
- cmd->data_length, zero_flag);
+ cmd->data_length, zero_flag, false);
if (ret < 0)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
} else if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
ret = target_alloc_sgl(&cmd->t_bidi_data_sg,
&cmd->t_bidi_data_nents,
- caw_length, zero_flag);
+ caw_length, zero_flag, false);
if (ret < 0)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
if (alloc_mem) {
rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents,
- cmd->data_length, false);
+ cmd->data_length, false, false);
if (rc < 0) {
ret = rc;
goto out;
goto unlock_exit;
}
table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
- if (table_group_tmp->ops != table_group->ops) {
+ if (table_group_tmp->ops->create_table !=
+ table_group->ops->create_table) {
pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
iommu_group_id(iommu_group),
iommu_group_id(tcegrp->grp));
config HAVE_FB_ATMEL
bool
-config SH_MIPI_DSI
- tristate
- depends on (SUPERH || ARCH_SHMOBILE) && HAVE_CLK
-
config SH_LCD_MIPI_DSI
bool
select FB_SYS_FOPS
select FB_DEFERRED_IO
select FB_BACKLIGHT
- select SH_MIPI_DSI if SH_LCD_MIPI_DSI
---help---
Frame buffer driver for the on-chip SH-Mobile LCD controller.
obj-$(CONFIG_FB_UDL) += udlfb.o
obj-$(CONFIG_FB_SMSCUFX) += smscufx.o
obj-$(CONFIG_FB_XILINX) += xilinxfb.o
-obj-$(CONFIG_SH_MIPI_DSI) += sh_mipi_dsi.o
obj-$(CONFIG_FB_SH_MOBILE_MERAM) += sh_mobile_meram.o
obj-$(CONFIG_FB_SH_MOBILE_LCDC) += sh_mobile_lcdcfb.o
obj-$(CONFIG_FB_OMAP) += omap/
#include <video/of_display_timing.h>
#include <video/videomode.h>
-#include <asm/sizes.h>
-
#define to_clcd(info) container_of(info, struct clcd_fb, fb)
/* This is limited to 16 characters when displayed by X startup */
static int __init
fbmem_init(void)
{
- proc_create("fb", 0, NULL, &fb_proc_fops);
+ int ret;
+
+ if (!proc_create("fb", 0, NULL, &fb_proc_fops))
+ return -ENOMEM;
- if (register_chrdev(FB_MAJOR,"fb",&fb_fops))
+ ret = register_chrdev(FB_MAJOR, "fb", &fb_fops);
+ if (ret) {
printk("unable to get major %d for fb devs\n", FB_MAJOR);
+ goto err_chrdev;
+ }
fb_class = class_create(THIS_MODULE, "graphics");
if (IS_ERR(fb_class)) {
- printk(KERN_WARNING "Unable to create fb class; errno = %ld\n", PTR_ERR(fb_class));
+ ret = PTR_ERR(fb_class);
+ pr_warn("Unable to create fb class; errno = %d\n", ret);
fb_class = NULL;
+ goto err_class;
}
return 0;
+
+err_class:
+ unregister_chrdev(FB_MAJOR, "fb");
+err_chrdev:
+ remove_proc_entry("fb", NULL);
+ return ret;
}
#ifdef MODULE
goto err_release_fb;
}
- printk(KERN_INFO "efifb: framebuffer at 0x%lx, mapped to 0x%p, "
- "using %dk, total %dk\n",
- efifb_fix.smem_start, info->screen_base,
- size_remap/1024, size_total/1024);
+ printk(KERN_INFO "efifb: framebuffer at 0x%lx, using %dk, total %dk\n",
+ efifb_fix.smem_start, size_remap/1024, size_total/1024);
printk(KERN_INFO "efifb: mode is %dx%dx%d, linelength=%d, pages=%d\n",
efifb_defined.xres, efifb_defined.yres,
efifb_defined.bits_per_pixel, efifb_fix.line_length,
return 0;
}
-static void imxfb_enable_controller(struct imxfb_info *fbi)
+static int imxfb_enable_controller(struct imxfb_info *fbi)
{
+ int ret;
if (fbi->enabled)
- return;
+ return 0;
pr_debug("Enabling LCD controller\n");
*/
writel(RMCR_LCDC_EN_MX1, fbi->regs + LCDC_RMCR);
- clk_prepare_enable(fbi->clk_ipg);
- clk_prepare_enable(fbi->clk_ahb);
- clk_prepare_enable(fbi->clk_per);
+ ret = clk_prepare_enable(fbi->clk_ipg);
+ if (ret)
+ goto err_enable_ipg;
+
+ ret = clk_prepare_enable(fbi->clk_ahb);
+ if (ret)
+ goto err_enable_ahb;
+
+ ret = clk_prepare_enable(fbi->clk_per);
+ if (ret)
+ goto err_enable_per;
+
fbi->enabled = true;
+ return 0;
+
+err_enable_per:
+ clk_disable_unprepare(fbi->clk_ahb);
+err_enable_ahb:
+ clk_disable_unprepare(fbi->clk_ipg);
+err_enable_ipg:
+ writel(0, fbi->regs + LCDC_RMCR);
+
+ return ret;
}
static void imxfb_disable_controller(struct imxfb_info *fbi)
pr_debug("Disabling LCD controller\n");
clk_disable_unprepare(fbi->clk_per);
- clk_disable_unprepare(fbi->clk_ipg);
clk_disable_unprepare(fbi->clk_ahb);
+ clk_disable_unprepare(fbi->clk_ipg);
fbi->enabled = false;
writel(0, fbi->regs + LCDC_RMCR);
break;
case FB_BLANK_UNBLANK:
- imxfb_enable_controller(fbi);
- break;
+ return imxfb_enable_controller(fbi);
}
return 0;
}
{
struct imxfb_info *fbi = dev_get_drvdata(&lcddev->dev);
- if (!IS_ERR(fbi->lcd_pwr))
- return regulator_is_enabled(fbi->lcd_pwr);
+ if (!IS_ERR(fbi->lcd_pwr) &&
+ !regulator_is_enabled(fbi->lcd_pwr))
+ return FB_BLANK_POWERDOWN;
- return 1;
+ return FB_BLANK_UNBLANK;
}
static int imxfb_lcd_set_power(struct lcd_device *lcddev, int power)
struct imxfb_info *fbi = dev_get_drvdata(&lcddev->dev);
if (!IS_ERR(fbi->lcd_pwr)) {
- if (power)
+ if (power == FB_BLANK_UNBLANK)
return regulator_enable(fbi->lcd_pwr);
else
return regulator_disable(fbi->lcd_pwr);
return PTR_ERR(vdds_dsi);
}
- if (regulator_can_change_voltage(vdds_dsi)) {
- r = regulator_set_voltage(vdds_dsi, 1800000, 1800000);
- if (r) {
- devm_regulator_put(vdds_dsi);
- DSSERR("can't set the DSI regulator voltage\n");
- return r;
- }
+ r = regulator_set_voltage(vdds_dsi, 1800000, 1800000);
+ if (r) {
+ devm_regulator_put(vdds_dsi);
+ DSSERR("can't set the DSI regulator voltage\n");
+ return r;
}
dsi->vdds_dsi_reg = vdds_dsi;
return PTR_ERR(reg);
}
- if (regulator_can_change_voltage(reg)) {
- r = regulator_set_voltage(reg, 1800000, 1800000);
- if (r) {
- devm_regulator_put(reg);
- DSSWARN("can't set the regulator voltage\n");
- return r;
- }
+ r = regulator_set_voltage(reg, 1800000, 1800000);
+ if (r) {
+ devm_regulator_put(reg);
+ DSSWARN("can't set the regulator voltage\n");
+ return r;
}
hdmi.vdda_reg = reg;
return PTR_ERR(reg);
}
- if (regulator_can_change_voltage(reg)) {
- r = regulator_set_voltage(reg, 1800000, 1800000);
- if (r) {
- devm_regulator_put(reg);
- DSSWARN("can't set the regulator voltage\n");
- return r;
- }
+ r = regulator_set_voltage(reg, 1800000, 1800000);
+ if (r) {
+ devm_regulator_put(reg);
+ DSSWARN("can't set the regulator voltage\n");
+ return r;
}
hdmi.vdda_reg = reg;
+++ /dev/null
-/*
- * Renesas SH-mobile MIPI DSI support
- *
- * Copyright (C) 2010 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- */
-
-#include <linux/bitmap.h>
-#include <linux/clk.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/module.h>
-
-#include <video/mipi_display.h>
-#include <video/sh_mipi_dsi.h>
-#include <video/sh_mobile_lcdc.h>
-
-#include "sh_mobile_lcdcfb.h"
-
-#define SYSCTRL 0x0000
-#define SYSCONF 0x0004
-#define TIMSET 0x0008
-#define RESREQSET0 0x0018
-#define RESREQSET1 0x001c
-#define HSTTOVSET 0x0020
-#define LPRTOVSET 0x0024
-#define TATOVSET 0x0028
-#define PRTOVSET 0x002c
-#define DSICTRL 0x0030
-#define DSIINTE 0x0060
-#define PHYCTRL 0x0070
-
-/* relative to linkbase */
-#define DTCTR 0x0000
-#define VMCTR1 0x0020
-#define VMCTR2 0x0024
-#define VMLEN1 0x0028
-#define VMLEN2 0x002c
-#define CMTSRTREQ 0x0070
-#define CMTSRTCTR 0x00d0
-
-/* E.g., sh7372 has 2 MIPI-DSIs - one for each LCDC */
-#define MAX_SH_MIPI_DSI 2
-
-struct sh_mipi {
- struct sh_mobile_lcdc_entity entity;
-
- void __iomem *base;
- void __iomem *linkbase;
- struct clk *dsit_clk;
- struct platform_device *pdev;
-};
-
-#define to_sh_mipi(e) container_of(e, struct sh_mipi, entity)
-
-static struct sh_mipi *mipi_dsi[MAX_SH_MIPI_DSI];
-
-/* Protect the above array */
-static DEFINE_MUTEX(array_lock);
-
-static struct sh_mipi *sh_mipi_by_handle(int handle)
-{
- if (handle >= ARRAY_SIZE(mipi_dsi) || handle < 0)
- return NULL;
-
- return mipi_dsi[handle];
-}
-
-static int sh_mipi_send_short(struct sh_mipi *mipi, u8 dsi_cmd,
- u8 cmd, u8 param)
-{
- u32 data = (dsi_cmd << 24) | (cmd << 16) | (param << 8);
- int cnt = 100;
-
- /* transmit a short packet to LCD panel */
- iowrite32(1 | data, mipi->linkbase + CMTSRTCTR);
- iowrite32(1, mipi->linkbase + CMTSRTREQ);
-
- while ((ioread32(mipi->linkbase + CMTSRTREQ) & 1) && --cnt)
- udelay(1);
-
- return cnt ? 0 : -ETIMEDOUT;
-}
-
-#define LCD_CHAN2MIPI(c) ((c) < LCDC_CHAN_MAINLCD || (c) > LCDC_CHAN_SUBLCD ? \
- -EINVAL : (c) - 1)
-
-static int sh_mipi_dcs(int handle, u8 cmd)
-{
- struct sh_mipi *mipi = sh_mipi_by_handle(LCD_CHAN2MIPI(handle));
- if (!mipi)
- return -ENODEV;
- return sh_mipi_send_short(mipi, MIPI_DSI_DCS_SHORT_WRITE, cmd, 0);
-}
-
-static int sh_mipi_dcs_param(int handle, u8 cmd, u8 param)
-{
- struct sh_mipi *mipi = sh_mipi_by_handle(LCD_CHAN2MIPI(handle));
- if (!mipi)
- return -ENODEV;
- return sh_mipi_send_short(mipi, MIPI_DSI_DCS_SHORT_WRITE_PARAM, cmd,
- param);
-}
-
-static void sh_mipi_dsi_enable(struct sh_mipi *mipi, bool enable)
-{
- /*
- * enable LCDC data tx, transition to LPS after completion of each HS
- * packet
- */
- iowrite32(0x00000002 | enable, mipi->linkbase + DTCTR);
-}
-
-static void sh_mipi_shutdown(struct platform_device *pdev)
-{
- struct sh_mipi *mipi = to_sh_mipi(platform_get_drvdata(pdev));
-
- sh_mipi_dsi_enable(mipi, false);
-}
-
-static int sh_mipi_setup(struct sh_mipi *mipi, const struct fb_videomode *mode)
-{
- void __iomem *base = mipi->base;
- struct sh_mipi_dsi_info *pdata = mipi->pdev->dev.platform_data;
- u32 pctype, datatype, pixfmt, linelength, vmctr2;
- u32 tmp, top, bottom, delay, div;
- int bpp;
-
- /*
- * Select data format. MIPI DSI is not hot-pluggable, so, we just use
- * the default videomode. If this ever becomes a problem, We'll have to
- * move this to mipi_display_on() above and use info->var.xres
- */
- switch (pdata->data_format) {
- case MIPI_RGB888:
- pctype = 0;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_24;
- pixfmt = MIPI_DCS_PIXEL_FMT_24BIT;
- linelength = mode->xres * 3;
- break;
- case MIPI_RGB565:
- pctype = 1;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_16;
- pixfmt = MIPI_DCS_PIXEL_FMT_16BIT;
- linelength = mode->xres * 2;
- break;
- case MIPI_RGB666_LP:
- pctype = 2;
- datatype = MIPI_DSI_PIXEL_STREAM_3BYTE_18;
- pixfmt = MIPI_DCS_PIXEL_FMT_24BIT;
- linelength = mode->xres * 3;
- break;
- case MIPI_RGB666:
- pctype = 3;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_18;
- pixfmt = MIPI_DCS_PIXEL_FMT_18BIT;
- linelength = (mode->xres * 18 + 7) / 8;
- break;
- case MIPI_BGR888:
- pctype = 8;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_24;
- pixfmt = MIPI_DCS_PIXEL_FMT_24BIT;
- linelength = mode->xres * 3;
- break;
- case MIPI_BGR565:
- pctype = 9;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_16;
- pixfmt = MIPI_DCS_PIXEL_FMT_16BIT;
- linelength = mode->xres * 2;
- break;
- case MIPI_BGR666_LP:
- pctype = 0xa;
- datatype = MIPI_DSI_PIXEL_STREAM_3BYTE_18;
- pixfmt = MIPI_DCS_PIXEL_FMT_24BIT;
- linelength = mode->xres * 3;
- break;
- case MIPI_BGR666:
- pctype = 0xb;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_18;
- pixfmt = MIPI_DCS_PIXEL_FMT_18BIT;
- linelength = (mode->xres * 18 + 7) / 8;
- break;
- case MIPI_YUYV:
- pctype = 4;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR16;
- pixfmt = MIPI_DCS_PIXEL_FMT_16BIT;
- linelength = mode->xres * 2;
- break;
- case MIPI_UYVY:
- pctype = 5;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR16;
- pixfmt = MIPI_DCS_PIXEL_FMT_16BIT;
- linelength = mode->xres * 2;
- break;
- case MIPI_YUV420_L:
- pctype = 6;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR12;
- pixfmt = MIPI_DCS_PIXEL_FMT_12BIT;
- linelength = (mode->xres * 12 + 7) / 8;
- break;
- case MIPI_YUV420:
- pctype = 7;
- datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR12;
- pixfmt = MIPI_DCS_PIXEL_FMT_12BIT;
- /* Length of U/V line */
- linelength = (mode->xres + 1) / 2;
- break;
- default:
- return -EINVAL;
- }
-
- if (!pdata->lane)
- return -EINVAL;
-
- /* reset DSI link */
- iowrite32(0x00000001, base + SYSCTRL);
- /* Hold reset for 100 cycles of the slowest of bus, HS byte and LP clock */
- udelay(50);
- iowrite32(0x00000000, base + SYSCTRL);
-
- /* setup DSI link */
-
- /*
- * T_wakeup = 0x7000
- * T_hs-trail = 3
- * T_hs-prepare = 3
- * T_clk-trail = 3
- * T_clk-prepare = 2
- */
- iowrite32(0x70003332, base + TIMSET);
- /* no responses requested */
- iowrite32(0x00000000, base + RESREQSET0);
- /* request response to packets of type 0x28 */
- iowrite32(0x00000100, base + RESREQSET1);
- /* High-speed transmission timeout, default 0xffffffff */
- iowrite32(0x0fffffff, base + HSTTOVSET);
- /* LP reception timeout, default 0xffffffff */
- iowrite32(0x0fffffff, base + LPRTOVSET);
- /* Turn-around timeout, default 0xffffffff */
- iowrite32(0x0fffffff, base + TATOVSET);
- /* Peripheral reset timeout, default 0xffffffff */
- iowrite32(0x0fffffff, base + PRTOVSET);
- /* Interrupts not used, disable all */
- iowrite32(0, base + DSIINTE);
- /* DSI-Tx bias on */
- iowrite32(0x00000001, base + PHYCTRL);
- udelay(200);
- /* Deassert resets, power on */
- iowrite32(0x03070001 | pdata->phyctrl, base + PHYCTRL);
-
- /*
- * Default = ULPS enable |
- * Contention detection enabled |
- * EoT packet transmission enable |
- * CRC check enable |
- * ECC check enable
- */
- bitmap_fill((unsigned long *)&tmp, pdata->lane);
- tmp |= 0x00003700;
- iowrite32(tmp, base + SYSCONF);
-
- /* setup l-bridge */
-
- /*
- * Enable transmission of all packets,
- * transmit LPS after each HS packet completion
- */
- iowrite32(0x00000006, mipi->linkbase + DTCTR);
- /* VSYNC width = 2 (<< 17) */
- iowrite32((mode->vsync_len << pdata->vsynw_offset) |
- (pdata->clksrc << 16) | (pctype << 12) | datatype,
- mipi->linkbase + VMCTR1);
-
- /*
- * Non-burst mode with sync pulses: VSE and HSE are output,
- * HSA period allowed, no commands in LP
- */
- vmctr2 = 0;
- if (pdata->flags & SH_MIPI_DSI_VSEE)
- vmctr2 |= 1 << 23;
- if (pdata->flags & SH_MIPI_DSI_HSEE)
- vmctr2 |= 1 << 22;
- if (pdata->flags & SH_MIPI_DSI_HSAE)
- vmctr2 |= 1 << 21;
- if (pdata->flags & SH_MIPI_DSI_BL2E)
- vmctr2 |= 1 << 17;
- if (pdata->flags & SH_MIPI_DSI_HSABM)
- vmctr2 |= 1 << 5;
- if (pdata->flags & SH_MIPI_DSI_HBPBM)
- vmctr2 |= 1 << 4;
- if (pdata->flags & SH_MIPI_DSI_HFPBM)
- vmctr2 |= 1 << 3;
- iowrite32(vmctr2, mipi->linkbase + VMCTR2);
-
- /*
- * VMLEN1 = RGBLEN | HSALEN
- *
- * see
- * Video mode - Blanking Packet setting
- */
- top = linelength << 16; /* RGBLEN */
- bottom = 0x00000001;
- if (pdata->flags & SH_MIPI_DSI_HSABM) /* HSALEN */
- bottom = (pdata->lane * mode->hsync_len) - 10;
- iowrite32(top | bottom , mipi->linkbase + VMLEN1);
-
- /*
- * VMLEN2 = HBPLEN | HFPLEN
- *
- * see
- * Video mode - Blanking Packet setting
- */
- top = 0x00010000;
- bottom = 0x00000001;
- delay = 0;
-
- div = 1; /* HSbyteCLK is calculation base
- * HS4divCLK = HSbyteCLK/2
- * HS6divCLK is not supported for now */
- if (pdata->flags & SH_MIPI_DSI_HS4divCLK)
- div = 2;
-
- if (pdata->flags & SH_MIPI_DSI_HFPBM) { /* HBPLEN */
- top = mode->hsync_len + mode->left_margin;
- top = ((pdata->lane * top / div) - 10) << 16;
- }
- if (pdata->flags & SH_MIPI_DSI_HBPBM) { /* HFPLEN */
- bottom = mode->right_margin;
- bottom = (pdata->lane * bottom / div) - 12;
- }
-
- bpp = linelength / mode->xres; /* byte / pixel */
- if ((pdata->lane / div) > bpp) {
- tmp = mode->xres / bpp; /* output cycle */
- tmp = mode->xres - tmp; /* (input - output) cycle */
- delay = (pdata->lane * tmp);
- }
-
- iowrite32(top | (bottom + delay) , mipi->linkbase + VMLEN2);
-
- msleep(5);
-
- /* setup LCD panel */
-
- /* cf. drivers/video/omap/lcd_mipid.c */
- sh_mipi_dcs(pdata->channel, MIPI_DCS_EXIT_SLEEP_MODE);
- msleep(120);
- /*
- * [7] - Page Address Mode
- * [6] - Column Address Mode
- * [5] - Page / Column Address Mode
- * [4] - Display Device Line Refresh Order
- * [3] - RGB/BGR Order
- * [2] - Display Data Latch Data Order
- * [1] - Flip Horizontal
- * [0] - Flip Vertical
- */
- sh_mipi_dcs_param(pdata->channel, MIPI_DCS_SET_ADDRESS_MODE, 0x00);
- /* cf. set_data_lines() */
- sh_mipi_dcs_param(pdata->channel, MIPI_DCS_SET_PIXEL_FORMAT,
- pixfmt << 4);
- sh_mipi_dcs(pdata->channel, MIPI_DCS_SET_DISPLAY_ON);
-
- /* Enable timeout counters */
- iowrite32(0x00000f00, base + DSICTRL);
-
- return 0;
-}
-
-static int mipi_display_on(struct sh_mobile_lcdc_entity *entity)
-{
- struct sh_mipi *mipi = to_sh_mipi(entity);
- struct sh_mipi_dsi_info *pdata = mipi->pdev->dev.platform_data;
- int ret;
-
- pm_runtime_get_sync(&mipi->pdev->dev);
-
- ret = pdata->set_dot_clock(mipi->pdev, mipi->base, 1);
- if (ret < 0)
- goto mipi_display_on_fail1;
-
- ret = sh_mipi_setup(mipi, &entity->def_mode);
- if (ret < 0)
- goto mipi_display_on_fail2;
-
- sh_mipi_dsi_enable(mipi, true);
-
- return SH_MOBILE_LCDC_DISPLAY_CONNECTED;
-
-mipi_display_on_fail1:
- pm_runtime_put_sync(&mipi->pdev->dev);
-mipi_display_on_fail2:
- pdata->set_dot_clock(mipi->pdev, mipi->base, 0);
-
- return ret;
-}
-
-static void mipi_display_off(struct sh_mobile_lcdc_entity *entity)
-{
- struct sh_mipi *mipi = to_sh_mipi(entity);
- struct sh_mipi_dsi_info *pdata = mipi->pdev->dev.platform_data;
-
- sh_mipi_dsi_enable(mipi, false);
-
- pdata->set_dot_clock(mipi->pdev, mipi->base, 0);
-
- pm_runtime_put_sync(&mipi->pdev->dev);
-}
-
-static const struct sh_mobile_lcdc_entity_ops mipi_ops = {
- .display_on = mipi_display_on,
- .display_off = mipi_display_off,
-};
-
-static int __init sh_mipi_probe(struct platform_device *pdev)
-{
- struct sh_mipi *mipi;
- struct sh_mipi_dsi_info *pdata = pdev->dev.platform_data;
- struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- struct resource *res2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- unsigned long rate, f_current;
- int idx = pdev->id, ret;
-
- if (!res || !res2 || idx >= ARRAY_SIZE(mipi_dsi) || !pdata)
- return -ENODEV;
-
- if (!pdata->set_dot_clock)
- return -EINVAL;
-
- mutex_lock(&array_lock);
- if (idx < 0)
- for (idx = 0; idx < ARRAY_SIZE(mipi_dsi) && mipi_dsi[idx]; idx++)
- ;
-
- if (idx == ARRAY_SIZE(mipi_dsi)) {
- ret = -EBUSY;
- goto efindslot;
- }
-
- mipi = kzalloc(sizeof(*mipi), GFP_KERNEL);
- if (!mipi) {
- ret = -ENOMEM;
- goto ealloc;
- }
-
- mipi->entity.owner = THIS_MODULE;
- mipi->entity.ops = &mipi_ops;
-
- if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
- dev_err(&pdev->dev, "MIPI register region already claimed\n");
- ret = -EBUSY;
- goto ereqreg;
- }
-
- mipi->base = ioremap(res->start, resource_size(res));
- if (!mipi->base) {
- ret = -ENOMEM;
- goto emap;
- }
-
- if (!request_mem_region(res2->start, resource_size(res2), pdev->name)) {
- dev_err(&pdev->dev, "MIPI register region 2 already claimed\n");
- ret = -EBUSY;
- goto ereqreg2;
- }
-
- mipi->linkbase = ioremap(res2->start, resource_size(res2));
- if (!mipi->linkbase) {
- ret = -ENOMEM;
- goto emap2;
- }
-
- mipi->pdev = pdev;
-
- mipi->dsit_clk = clk_get(&pdev->dev, "dsit_clk");
- if (IS_ERR(mipi->dsit_clk)) {
- ret = PTR_ERR(mipi->dsit_clk);
- goto eclktget;
- }
-
- f_current = clk_get_rate(mipi->dsit_clk);
- /* 80MHz required by the datasheet */
- rate = clk_round_rate(mipi->dsit_clk, 80000000);
- if (rate > 0 && rate != f_current)
- ret = clk_set_rate(mipi->dsit_clk, rate);
- else
- ret = rate;
- if (ret < 0)
- goto esettrate;
-
- dev_dbg(&pdev->dev, "DSI-T clk %lu -> %lu\n", f_current, rate);
-
- ret = clk_enable(mipi->dsit_clk);
- if (ret < 0)
- goto eclkton;
-
- mipi_dsi[idx] = mipi;
-
- pm_runtime_enable(&pdev->dev);
- pm_runtime_resume(&pdev->dev);
-
- mutex_unlock(&array_lock);
- platform_set_drvdata(pdev, &mipi->entity);
-
- return 0;
-
-eclkton:
-esettrate:
- clk_put(mipi->dsit_clk);
-eclktget:
- iounmap(mipi->linkbase);
-emap2:
- release_mem_region(res2->start, resource_size(res2));
-ereqreg2:
- iounmap(mipi->base);
-emap:
- release_mem_region(res->start, resource_size(res));
-ereqreg:
- kfree(mipi);
-ealloc:
-efindslot:
- mutex_unlock(&array_lock);
-
- return ret;
-}
-
-static int sh_mipi_remove(struct platform_device *pdev)
-{
- struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- struct resource *res2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- struct sh_mipi *mipi = to_sh_mipi(platform_get_drvdata(pdev));
- int i, ret;
-
- mutex_lock(&array_lock);
-
- for (i = 0; i < ARRAY_SIZE(mipi_dsi) && mipi_dsi[i] != mipi; i++)
- ;
-
- if (i == ARRAY_SIZE(mipi_dsi)) {
- ret = -EINVAL;
- } else {
- ret = 0;
- mipi_dsi[i] = NULL;
- }
-
- mutex_unlock(&array_lock);
-
- if (ret < 0)
- return ret;
-
- pm_runtime_disable(&pdev->dev);
- clk_disable(mipi->dsit_clk);
- clk_put(mipi->dsit_clk);
-
- iounmap(mipi->linkbase);
- if (res2)
- release_mem_region(res2->start, resource_size(res2));
- iounmap(mipi->base);
- if (res)
- release_mem_region(res->start, resource_size(res));
- kfree(mipi);
-
- return 0;
-}
-
-static struct platform_driver sh_mipi_driver = {
- .remove = sh_mipi_remove,
- .shutdown = sh_mipi_shutdown,
- .driver = {
- .name = "sh-mipi-dsi",
- },
-};
-
-module_platform_driver_probe(sh_mipi_driver, sh_mipi_probe);
-
-MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
-MODULE_DESCRIPTION("SuperH / ARM-shmobile MIPI DSI driver");
-MODULE_LICENSE("GPL v2");
return ret;
ret = ssd1307fb_write_cmd(par->client,
- (par->device_info->need_chargepump & 0x1 << 2) & 0x14);
+ BIT(4) | (par->device_info->need_chargepump ? BIT(2) : 0));
if (ret < 0)
return ret;
viapar->shared->vq_vram_addr = viapar->fbmem_free;
viapar->fbmem_used += VQ_SIZE;
-#if defined(CONFIG_VIDEO_VIA_CAMERA) || defined(CONFIG_VIDEO_VIA_CAMERA_MODULE)
+#if IS_ENABLED(CONFIG_VIDEO_VIA_CAMERA)
/*
* Set aside a chunk of framebuffer memory for the camera
* driver. Someday this driver probably needs a proper allocator
* most viafb systems will not need to have this extra code for a while.
* As soon as another user comes long, the ifdef can be removed.
*/
-#if defined(CONFIG_VIDEO_VIA_CAMERA) || defined(CONFIG_VIDEO_VIA_CAMERA_MODULE)
+#if IS_ENABLED(CONFIG_VIDEO_VIA_CAMERA)
/*
* Access to the DMA engine. This currently provides what the camera
* driver needs (i.e. outgoing only) but is easily expandable if need
{
.name = "viafb-i2c",
},
-#if defined(CONFIG_VIDEO_VIA_CAMERA) || defined(CONFIG_VIDEO_VIA_CAMERA_MODULE)
+#if IS_ENABLED(CONFIG_VIDEO_VIA_CAMERA)
{
.name = "viafb-camera",
},
*/
static void free_more_memory(void)
{
- struct zone *zone;
+ struct zoneref *z;
int nid;
wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
yield();
for_each_online_node(nid) {
- (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
- gfp_zone(GFP_NOFS), NULL,
- &zone);
- if (zone)
+
+ z = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
+ gfp_zone(GFP_NOFS), NULL);
+ if (z->zone)
try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
GFP_NOFS, NULL);
}
return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
}
-static inline struct timespec ep_set_mstimeout(long ms)
+static inline struct timespec64 ep_set_mstimeout(long ms)
{
- struct timespec now, ts = {
+ struct timespec64 now, ts = {
.tv_sec = ms / MSEC_PER_SEC,
.tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
};
- ktime_get_ts(&now);
- return timespec_add_safe(now, ts);
+ ktime_get_ts64(&now);
+ return timespec64_add_safe(now, ts);
}
/**
ktime_t expires, *to = NULL;
if (timeout > 0) {
- struct timespec end_time = ep_set_mstimeout(timeout);
+ struct timespec64 end_time = ep_set_mstimeout(timeout);
slack = select_estimate_accuracy(&end_time);
to = &expires;
- *to = timespec_to_ktime(end_time);
+ *to = timespec64_to_ktime(end_time);
} else if (timeout == 0) {
/*
* Avoid the unnecessary trip to the wait queue loop, if the
if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
list_del_init(&bd->bd_list);
else
- gfs2_remove_from_journal(bh, current->journal_info, 0);
+ gfs2_remove_from_journal(bh, REMOVE_JDATA);
}
bh->b_bdev = NULL;
clear_buffer_mapped(bh);
gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
rv = gfs2_glock_nq(&gh);
if (rv)
- return rv;
+ goto out_uninit;
rv = gfs2_ok_for_dio(ip, offset);
if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */
gfs2_get_block_direct, NULL, NULL, 0);
out:
gfs2_glock_dq(&gh);
+out_uninit:
gfs2_holder_uninit(&gh);
return rv;
}
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
error = gfs2_glock_nq(&gh);
if (error)
- return error;
+ goto out_uninit;
fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags);
if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA)
error = -EFAULT;
gfs2_glock_dq(&gh);
+out_uninit:
gfs2_holder_uninit(&gh);
return error;
}
return ret;
}
+static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct inode *inode = in->f_mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ inode_lock(inode);
+
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ if (ret) {
+ inode_unlock(inode);
+ return ret;
+ }
+
+ gfs2_glock_dq_uninit(&gh);
+ inode_unlock(inode);
+
+ return generic_file_splice_read(in, ppos, pipe, len, flags);
+}
+
+
static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
- .splice_read = generic_file_splice_read,
+ .splice_read = gfs2_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
.open = gfs2_open,
.release = gfs2_release,
.fsync = gfs2_fsync,
- .splice_read = generic_file_splice_read,
+ .splice_read = gfs2_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
*
*/
-static inline void do_error(struct gfs2_glock *gl, const int ret)
+static void do_error(struct gfs2_glock *gl, const int ret)
{
struct gfs2_holder *gh, *tmp;
if (sdp->sd_lockstruct.ls_ops->lm_lock) {
/* lock_dlm */
ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
- if (ret) {
+ if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
+ target == LM_ST_UNLOCKED &&
+ test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
+ finish_xmote(gl, target);
+ if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ gfs2_glock_put(gl);
+ }
+ else if (ret) {
pr_err("lm_lock ret %d\n", ret);
GLOCK_BUG_ON(gl, 1);
}
static int inode_go_demote_ok(const struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct gfs2_holder *gh;
if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
return 0;
- if (!list_empty(&gl->gl_holders)) {
- gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
- if (gh->gh_list.next != &gl->gl_holders)
- return 0;
- }
-
return 1;
}
int error;
inode = iget_locked(sb, (unsigned long)no_addr);
- ip = GFS2_I(inode);
- ip->i_no_addr = no_addr;
-
if (!inode)
return ERR_PTR(-ENOMEM);
+ ip = GFS2_I(inode);
+ ip->i_no_addr = no_addr;
+
if (inode->i_state & I_NEW) {
struct gfs2_sbd *sdp = GFS2_SB(inode);
ip->i_no_formal_ino = no_formal_ino;
return 0;
}
-void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
+void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
{
struct address_space *mapping = bh->b_page->mapping;
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
struct gfs2_bufdata *bd = bh->b_private;
+ struct gfs2_trans *tr = current->journal_info;
int was_pinned = 0;
if (test_clear_buffer_pinned(bh)) {
trace_gfs2_pin(bd, 0);
atomic_dec(&sdp->sd_log_pinned);
list_del_init(&bd->bd_list);
- if (meta)
+ if (meta == REMOVE_META)
tr->tr_num_buf_rm++;
else
tr->tr_num_databuf_rm++;
if (bh) {
lock_buffer(bh);
gfs2_log_lock(sdp);
- gfs2_remove_from_journal(bh, current->journal_info, 1);
+ gfs2_remove_from_journal(bh, REMOVE_META);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
brelse(bh);
extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno,
int create);
-extern void gfs2_remove_from_journal(struct buffer_head *bh,
- struct gfs2_trans *tr, int meta);
+enum {
+ REMOVE_JDATA = 0,
+ REMOVE_META = 1,
+};
+
+extern void gfs2_remove_from_journal(struct buffer_head *bh, int meta);
extern void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
struct buffer_head **bhp);
};
static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
- const struct gfs2_inode *ip, bool nowrap,
- const struct gfs2_alloc_parms *ap);
+ const struct gfs2_inode *ip, bool nowrap);
/**
if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
return;
- ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
+ ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
if (ret == 0) {
rs->rs_rbm = rbm;
rs->rs_free = extlen;
* @ip: If set, check for reservations
* @nowrap: Stop looking at the end of the rgrp, rather than wrapping
* around until we've reached the starting point.
- * @ap: the allocation parameters
*
* Side effects:
* - If looking for free blocks, we set GBF_FULL on each bitmap which
*/
static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
- const struct gfs2_inode *ip, bool nowrap,
- const struct gfs2_alloc_parms *ap)
+ const struct gfs2_inode *ip, bool nowrap)
{
struct buffer_head *bh;
int initial_bii;
while (1) {
down_write(&sdp->sd_log_flush_lock);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
- true, NULL);
+ true);
up_write(&sdp->sd_log_flush_lock);
if (error == -ENOSPC)
break;
int error;
gfs2_set_alloc_start(&rbm, ip, dinode);
- error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
if (error == -ENOSPC) {
gfs2_set_alloc_start(&rbm, ip, dinode);
- error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
- NULL);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
}
/* Since all blocks are reserved in advance, this shouldn't happen */
fs_err(sdp, "telling LM to unmount\n");
lm->lm_unmount(sdp);
}
+ set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
fs_err(sdp, "withdrawn\n");
dump_stack();
}
fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks,
&mnt->mnt_root->d_lock);
}
+/* prepare for freeing all marks associated with given group */
+extern void fsnotify_detach_group_marks(struct fsnotify_group *group);
+/*
+ * wait for fsnotify_mark_srcu period to end and free all marks in destroy_list
+ */
+extern void fsnotify_mark_destroy_list(void);
+
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.
*/
void fsnotify_destroy_group(struct fsnotify_group *group)
{
- /* clear all inode marks for this group */
- fsnotify_clear_marks_by_group(group);
+ /* clear all inode marks for this group, attach them to destroy_list */
+ fsnotify_detach_group_marks(group);
- synchronize_srcu(&fsnotify_mark_srcu);
+ /*
+ * Wait for fsnotify_mark_srcu period to end and free all marks in
+ * destroy_list
+ */
+ fsnotify_mark_destroy_list();
- /* clear the notification queue of all events */
+ /*
+ * Since we have waited for fsnotify_mark_srcu in
+ * fsnotify_mark_destroy_list() there can be no outstanding event
+ * notification against this group. So clearing the notification queue
+ * of all events is reliable now.
+ */
fsnotify_flush_notify(group);
/*
static DEFINE_SPINLOCK(destroy_lock);
static LIST_HEAD(destroy_list);
-static void fsnotify_mark_destroy(struct work_struct *work);
-static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy);
+static void fsnotify_mark_destroy_workfn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn);
void fsnotify_get_mark(struct fsnotify_mark *mark)
{
}
/*
- * Free fsnotify mark. The freeing is actually happening from a kthread which
- * first waits for srcu period end. Caller must have a reference to the mark
- * or be protected by fsnotify_mark_srcu.
+ * Prepare mark for freeing and add it to the list of marks prepared for
+ * freeing. The actual freeing must happen after SRCU period ends and the
+ * caller is responsible for this.
+ *
+ * The function returns true if the mark was added to the list of marks for
+ * freeing. The function returns false if someone else has already called
+ * __fsnotify_free_mark() for the mark.
*/
-void fsnotify_free_mark(struct fsnotify_mark *mark)
+static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
{
struct fsnotify_group *group = mark->group;
/* something else already called this function on this mark */
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
spin_unlock(&mark->lock);
- return;
+ return false;
}
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
spin_unlock(&mark->lock);
- spin_lock(&destroy_lock);
- list_add(&mark->g_list, &destroy_list);
- spin_unlock(&destroy_lock);
- queue_delayed_work(system_unbound_wq, &reaper_work,
- FSNOTIFY_REAPER_DELAY);
-
/*
* Some groups like to know that marks are being freed. This is a
* callback to the group function to let it know that this mark
*/
if (group->ops->freeing_mark)
group->ops->freeing_mark(mark, group);
+
+ spin_lock(&destroy_lock);
+ list_add(&mark->g_list, &destroy_list);
+ spin_unlock(&destroy_lock);
+
+ return true;
+}
+
+/*
+ * Free fsnotify mark. The freeing is actually happening from a workqueue which
+ * first waits for srcu period end. Caller must have a reference to the mark
+ * or be protected by fsnotify_mark_srcu.
+ */
+void fsnotify_free_mark(struct fsnotify_mark *mark)
+{
+ if (__fsnotify_free_mark(mark)) {
+ queue_delayed_work(system_unbound_wq, &reaper_work,
+ FSNOTIFY_REAPER_DELAY);
+ }
}
void fsnotify_destroy_mark(struct fsnotify_mark *mark,
}
/*
- * Given a group, destroy all of the marks associated with that group.
+ * Given a group, prepare for freeing all the marks associated with that group.
+ * The marks are attached to the list of marks prepared for destruction, the
+ * caller is responsible for freeing marks in that list after SRCU period has
+ * ended.
*/
-void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
+void fsnotify_detach_group_marks(struct fsnotify_group *group)
{
- fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1);
+ struct fsnotify_mark *mark;
+
+ while (1) {
+ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
+ if (list_empty(&group->marks_list)) {
+ mutex_unlock(&group->mark_mutex);
+ break;
+ }
+ mark = list_first_entry(&group->marks_list,
+ struct fsnotify_mark, g_list);
+ fsnotify_get_mark(mark);
+ fsnotify_detach_mark(mark);
+ mutex_unlock(&group->mark_mutex);
+ __fsnotify_free_mark(mark);
+ fsnotify_put_mark(mark);
+ }
}
void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
mark->free_mark = free_mark;
}
-static void fsnotify_mark_destroy(struct work_struct *work)
+/*
+ * Destroy all marks in destroy_list, waits for SRCU period to finish before
+ * actually freeing marks.
+ */
+void fsnotify_mark_destroy_list(void)
{
struct fsnotify_mark *mark, *next;
struct list_head private_destroy_list;
fsnotify_put_mark(mark);
}
}
+
+static void fsnotify_mark_destroy_workfn(struct work_struct *work)
+{
+ fsnotify_mark_destroy_list();
+}
{
int ret;
u32 left_cpos, rec_range, trunc_range;
- int wants_rotate = 0, is_rightmost_tree_rec = 0;
+ int is_rightmost_tree_rec = 0;
struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
struct ocfs2_path *left_path = NULL;
struct ocfs2_extent_list *el = path_leaf_el(path);
memset(rec, 0, sizeof(*rec));
ocfs2_cleanup_merge(el, index);
- wants_rotate = 1;
next_free = le16_to_cpu(el->l_next_free_rec);
if (is_rightmost_tree_rec && next_free > 1) {
static int o2hb_read_block_input(struct o2hb_region *reg,
const char *page,
- size_t count,
unsigned long *ret_bytes,
unsigned int *ret_bits)
{
if (reg->hr_bdev)
return -EINVAL;
- status = o2hb_read_block_input(reg, page, count,
- &block_bytes, &block_bits);
+ status = o2hb_read_block_input(reg, page, &block_bytes,
+ &block_bits);
if (status)
return status;
/*00*/ __u8 es_valid;
__u8 es_reserved1[3];
__le32 es_node_num;
-/*10*/
+/*08*/
};
/*
spin_unlock(&osb->osb_lock);
status = ocfs2_update_disk_slot(osb, si, slot_num);
- if (status < 0) {
+ if (status < 0)
mlog_errno(status);
- goto bail;
- }
-bail:
ocfs2_free_slot_info(osb);
}
-
/*
- * Caveats on high order pages: page->_count will only be set
+ * Caveats on high order pages: page->_refcount will only be set
* -1 on the head page; SLUB/SLQB do the same for PG_slab;
* SLOB won't set PG_slab at all on compound pages.
*/
#define MAX_SLACK (100 * NSEC_PER_MSEC)
-static long __estimate_accuracy(struct timespec *tv)
+static long __estimate_accuracy(struct timespec64 *tv)
{
long slack;
int divfactor = 1000;
return slack;
}
-u64 select_estimate_accuracy(struct timespec *tv)
+u64 select_estimate_accuracy(struct timespec64 *tv)
{
u64 ret;
- struct timespec now;
+ struct timespec64 now;
/*
* Realtime tasks get a slack of 0 for obvious reasons.
if (rt_task(current))
return 0;
- ktime_get_ts(&now);
- now = timespec_sub(*tv, now);
+ ktime_get_ts64(&now);
+ now = timespec64_sub(*tv, now);
ret = __estimate_accuracy(&now);
if (ret < current->timer_slack_ns)
return current->timer_slack_ns;
/**
* poll_select_set_timeout - helper function to setup the timeout value
- * @to: pointer to timespec variable for the final timeout
+ * @to: pointer to timespec64 variable for the final timeout
* @sec: seconds (from user space)
* @nsec: nanoseconds (from user space)
*
*
* Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
*/
-int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
+int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
{
- struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
+ struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec};
- if (!timespec_valid(&ts))
+ if (!timespec64_valid(&ts))
return -EINVAL;
/* Optimize for the zero timeout value here */
if (!sec && !nsec) {
to->tv_sec = to->tv_nsec = 0;
} else {
- ktime_get_ts(to);
- *to = timespec_add_safe(*to, ts);
+ ktime_get_ts64(to);
+ *to = timespec64_add_safe(*to, ts);
}
return 0;
}
-static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
+static int poll_select_copy_remaining(struct timespec64 *end_time,
+ void __user *p,
int timeval, int ret)
{
+ struct timespec64 rts64;
struct timespec rts;
struct timeval rtv;
if (!end_time->tv_sec && !end_time->tv_nsec)
return ret;
- ktime_get_ts(&rts);
- rts = timespec_sub(*end_time, rts);
- if (rts.tv_sec < 0)
- rts.tv_sec = rts.tv_nsec = 0;
+ ktime_get_ts64(&rts64);
+ rts64 = timespec64_sub(*end_time, rts64);
+ if (rts64.tv_sec < 0)
+ rts64.tv_sec = rts64.tv_nsec = 0;
+
+ rts = timespec64_to_timespec(rts64);
if (timeval) {
if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
memset(&rtv, 0, sizeof(rtv));
- rtv.tv_sec = rts.tv_sec;
- rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
+ rtv.tv_sec = rts64.tv_sec;
+ rtv.tv_usec = rts64.tv_nsec / NSEC_PER_USEC;
if (!copy_to_user(p, &rtv, sizeof(rtv)))
return ret;
wait->_key |= POLLOUT_SET;
}
-int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
+int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
{
ktime_t expire, *to = NULL;
struct poll_wqueues table;
* pointer to the expiry value.
*/
if (end_time && !to) {
- expire = timespec_to_ktime(*end_time);
+ expire = timespec64_to_ktime(*end_time);
to = &expire;
}
* I'm trying ERESTARTNOHAND which restart only when you want to.
*/
int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
- fd_set __user *exp, struct timespec *end_time)
+ fd_set __user *exp, struct timespec64 *end_time)
{
fd_set_bits fds;
void *bits;
SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
fd_set __user *, exp, struct timeval __user *, tvp)
{
- struct timespec end_time, *to = NULL;
+ struct timespec64 end_time, *to = NULL;
struct timeval tv;
int ret;
const sigset_t __user *sigmask, size_t sigsetsize)
{
sigset_t ksigmask, sigsaved;
- struct timespec ts, end_time, *to = NULL;
+ struct timespec ts;
+ struct timespec64 ts64, end_time, *to = NULL;
int ret;
if (tsp) {
if (copy_from_user(&ts, tsp, sizeof(ts)))
return -EFAULT;
+ ts64 = timespec_to_timespec64(ts);
to = &end_time;
- if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+ if (poll_select_set_timeout(to, ts64.tv_sec, ts64.tv_nsec))
return -EINVAL;
}
}
static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
- struct timespec *end_time)
+ struct timespec64 *end_time)
{
poll_table* pt = &wait->pt;
ktime_t expire, *to = NULL;
* pointer to the expiry value.
*/
if (end_time && !to) {
- expire = timespec_to_ktime(*end_time);
+ expire = timespec64_to_ktime(*end_time);
to = &expire;
}
sizeof(struct pollfd))
int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
- struct timespec *end_time)
+ struct timespec64 *end_time)
{
struct poll_wqueues table;
int err = -EFAULT, fdcount, len, size;
{
struct pollfd __user *ufds = restart_block->poll.ufds;
int nfds = restart_block->poll.nfds;
- struct timespec *to = NULL, end_time;
+ struct timespec64 *to = NULL, end_time;
int ret;
if (restart_block->poll.has_timeout) {
SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
int, timeout_msecs)
{
- struct timespec end_time, *to = NULL;
+ struct timespec64 end_time, *to = NULL;
int ret;
if (timeout_msecs >= 0) {
size_t, sigsetsize)
{
sigset_t ksigmask, sigsaved;
- struct timespec ts, end_time, *to = NULL;
+ struct timespec ts;
+ struct timespec64 end_time, *to = NULL;
int ret;
if (tsp) {
#define io_remap_pfn_range remap_pfn_range
#endif
+#ifndef has_transparent_hugepage
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define has_transparent_hugepage() 1
+#else
+#define has_transparent_hugepage() 0
+#endif
+#endif
+
#endif /* _ASM_GENERIC_PGTABLE_H */
#define ARIZONA_ACCDET_MODE_HPM 4
#define ARIZONA_ACCDET_MODE_ADC 7
+#define ARIZONA_GPSW_OPEN 0
+#define ARIZONA_GPSW_CLOSED 1
+#define ARIZONA_GPSW_CLAMP_ENABLED 2
+#define ARIZONA_GPSW_CLAMP_DISABLED 3
+
#endif
--- /dev/null
+/*
+ * This header provides macros for MAXIM MAX77620 device bindings.
+ *
+ * Copyright (c) 2016, NVIDIA Corporation.
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ */
+
+#ifndef _DT_BINDINGS_MFD_MAX77620_H
+#define _DT_BINDINGS_MFD_MAX77620_H
+
+/* MAX77620 interrupts */
+#define MAX77620_IRQ_TOP_GLBL 0 /* Low-Battery */
+#define MAX77620_IRQ_TOP_SD 1 /* SD power fail */
+#define MAX77620_IRQ_TOP_LDO 2 /* LDO power fail */
+#define MAX77620_IRQ_TOP_GPIO 3 /* GPIO internal int to MAX77620 */
+#define MAX77620_IRQ_TOP_RTC 4 /* RTC */
+#define MAX77620_IRQ_TOP_32K 5 /* 32kHz oscillator */
+#define MAX77620_IRQ_TOP_ONOFF 6 /* ON/OFF oscillator */
+#define MAX77620_IRQ_LBT_MBATLOW 7 /* Thermal alarm status, > 120C */
+#define MAX77620_IRQ_LBT_TJALRM1 8 /* Thermal alarm status, > 120C */
+#define MAX77620_IRQ_LBT_TJALRM2 9 /* Thermal alarm status, > 140C */
+
+/* FPS event source */
+#define MAX77620_FPS_EVENT_SRC_EN0 0
+#define MAX77620_FPS_EVENT_SRC_EN1 1
+#define MAX77620_FPS_EVENT_SRC_SW 2
+
+/* Device state when FPS event LOW */
+#define MAX77620_FPS_INACTIVE_STATE_SLEEP 0
+#define MAX77620_FPS_INACTIVE_STATE_LOW_POWER 1
+
+/* FPS source */
+#define MAX77620_FPS_SRC_0 0
+#define MAX77620_FPS_SRC_1 1
+#define MAX77620_FPS_SRC_2 2
+#define MAX77620_FPS_SRC_NONE 3
+#define MAX77620_FPS_SRC_DEF 4
+
+#endif
unsigned long goal);
extern void *__alloc_bootmem_nopanic(unsigned long size,
unsigned long align,
- unsigned long goal);
+ unsigned long goal) __malloc;
extern void *__alloc_bootmem_node(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
- unsigned long goal);
+ unsigned long goal) __malloc;
void *__alloc_bootmem_node_high(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
- unsigned long goal);
+ unsigned long goal) __malloc;
extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
- unsigned long goal);
+ unsigned long goal) __malloc;
void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal,
- unsigned long limit);
+ unsigned long limit) __malloc;
extern void *__alloc_bootmem_low(unsigned long size,
unsigned long align,
- unsigned long goal);
+ unsigned long goal) __malloc;
void *__alloc_bootmem_low_nopanic(unsigned long size,
unsigned long align,
- unsigned long goal);
+ unsigned long goal) __malloc;
extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
- unsigned long goal);
+ unsigned long goal) __malloc;
#ifdef CONFIG_NO_BOOTMEM
/* We are using top down, so it is safe to use 0 here */
extern int fragmentation_index(struct zone *zone, unsigned int order);
extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
- int alloc_flags, const struct alloc_context *ac,
- enum migrate_mode mode, int *contended);
+ unsigned int alloc_flags, const struct alloc_context *ac,
+ enum migrate_mode mode, int *contended);
extern void compact_pgdat(pg_data_t *pgdat, int order);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern unsigned long compaction_suitable(struct zone *zone, int order,
- int alloc_flags, int classzone_idx);
+ unsigned int alloc_flags, int classzone_idx);
extern void defer_compaction(struct zone *zone, int order);
extern bool compaction_deferred(struct zone *zone, int order);
#if GCC_VERSION >= 30400
#define __must_check __attribute__((warn_unused_result))
+#define __malloc __attribute__((__malloc__))
#endif
#if GCC_VERSION >= 40000
#define __deprecated_for_modules
#endif
+#ifndef __malloc
+#define __malloc
+#endif
+
/*
* Allow us to avoid 'defined but not used' warnings on functions and data,
* as well as force them to be emitted to the assembly file.
#ifdef CONFIG_CPUSETS
-extern struct static_key cpusets_enabled_key;
+extern struct static_key_false cpusets_enabled_key;
static inline bool cpusets_enabled(void)
{
- return static_key_false(&cpusets_enabled_key);
+ return static_branch_unlikely(&cpusets_enabled_key);
}
static inline int nr_cpusets(void)
{
/* jump label reference count + the top-level cpuset */
- return static_key_count(&cpusets_enabled_key) + 1;
+ return static_key_count(&cpusets_enabled_key.key) + 1;
}
static inline void cpuset_inc(void)
{
- static_key_slow_inc(&cpusets_enabled_key);
+ static_branch_inc(&cpusets_enabled_key);
}
static inline void cpuset_dec(void)
{
- static_key_slow_dec(&cpusets_enabled_key);
+ static_branch_dec(&cpusets_enabled_key);
}
extern int cpuset_init(void);
void cpuset_init_current_mems_allowed(void);
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
-extern int __cpuset_node_allowed(int node, gfp_t gfp_mask);
+extern bool __cpuset_node_allowed(int node, gfp_t gfp_mask);
-static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
+static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
- return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask);
+ if (cpusets_enabled())
+ return __cpuset_node_allowed(node, gfp_mask);
+ return true;
}
-static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
- return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+ return __cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+}
+
+static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+{
+ if (cpusets_enabled())
+ return __cpuset_zone_allowed(z, gfp_mask);
+ return true;
}
extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
return 1;
}
-static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
+static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
- return 1;
+ return true;
}
-static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
- return 1;
+ return true;
+}
+
+static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+{
+ return true;
}
static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
* @name: name of the object typee
* @debug_hint: function returning address, which have associated
* kernel symbol, to allow identify the object
+ * @is_static_object return true if the obj is static, otherwise return false
* @fixup_init: fixup function, which is called when the init check
- * fails
+ * fails. All fixup functions must return true if fixup
+ * was successful, otherwise return false
* @fixup_activate: fixup function, which is called when the activate check
* fails
* @fixup_destroy: fixup function, which is called when the destroy check
*/
struct debug_obj_descr {
const char *name;
- void *(*debug_hint) (void *addr);
- int (*fixup_init) (void *addr, enum debug_obj_state state);
- int (*fixup_activate) (void *addr, enum debug_obj_state state);
- int (*fixup_destroy) (void *addr, enum debug_obj_state state);
- int (*fixup_free) (void *addr, enum debug_obj_state state);
- int (*fixup_assert_init)(void *addr, enum debug_obj_state state);
+ void *(*debug_hint)(void *addr);
+ bool (*is_static_object)(void *addr);
+ bool (*fixup_init)(void *addr, enum debug_obj_state state);
+ bool (*fixup_activate)(void *addr, enum debug_obj_state state);
+ bool (*fixup_destroy)(void *addr, enum debug_obj_state state);
+ bool (*fixup_free)(void *addr, enum debug_obj_state state);
+ bool (*fixup_assert_init)(void *addr, enum debug_obj_state state);
};
#ifdef CONFIG_DEBUG_OBJECTS
#ifdef CONFIG_DEBUG_DEVRES
extern void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp,
- int nid, const char *name);
+ int nid, const char *name) __malloc;
#define devres_alloc(release, size, gfp) \
__devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release)
#define devres_alloc_node(release, size, gfp, nid) \
__devres_alloc_node(release, size, gfp, nid, #release)
#else
extern void *devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp,
- int nid);
+ int nid) __malloc;
static inline void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
{
return devres_alloc_node(release, size, gfp, NUMA_NO_NODE);
extern int devres_release_group(struct device *dev, void *id);
/* managed devm_k.alloc/kfree for device drivers */
-extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp);
+extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __malloc;
extern __printf(3, 0)
char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt,
- va_list ap);
+ va_list ap) __malloc;
extern __printf(3, 4)
-char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...);
+char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...) __malloc;
static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
{
return devm_kmalloc(dev, size, gfp | __GFP_ZERO);
return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO);
}
extern void devm_kfree(struct device *dev, void *p);
-extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp);
+extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc;
extern void *devm_kmemdup(struct device *dev, const void *src, size_t len,
gfp_t gfp);
extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group);
/* run all the marks in a group, and clear all of the marks where mark->flags & flags is true*/
extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags);
-/* run all the marks in a group, and flag them to be freed */
-extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
extern void fsnotify_get_mark(struct fsnotify_mark *mark);
extern void fsnotify_put_mark(struct fsnotify_mark *mark);
extern void fsnotify_unmount_inodes(struct super_block *sb);
extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned char *vec);
-extern bool move_huge_pmd(struct vm_area_struct *vma,
- struct vm_area_struct *new_vma,
- unsigned long old_addr,
+extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd);
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
/* arch callback */
int __init alloc_bootmem_huge_page(struct hstate *h);
+void __init hugetlb_bad_size(void);
void __init hugetlb_add_hstate(unsigned order);
struct hstate *size_to_hstate(unsigned long size);
#include <linux/mm.h>
-static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
+static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
{
return !!(vma->vm_flags & VM_HUGETLB);
}
#else
-static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
+static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
{
- return 0;
+ return false;
}
#endif
int scnprintf(char *buf, size_t size, const char *fmt, ...);
extern __printf(3, 0)
int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
-extern __printf(2, 3)
+extern __printf(2, 3) __malloc
char *kasprintf(gfp_t gfp, const char *fmt, ...);
-extern __printf(2, 0)
+extern __printf(2, 0) __malloc
char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
extern __printf(2, 0)
const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args);
return 0;
}
-static inline void
-mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
- int increment)
-{
-}
-
static inline unsigned long
mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
int nid, unsigned int lru_mask)
#ifdef CONFIG_MEMORY_HOTREMOVE
-extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
+extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
extern void try_offline_node(int nid);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern void remove_memory(int nid, u64 start, u64 size);
#else
-static inline int is_mem_section_removable(unsigned long pfn,
+static inline bool is_mem_section_removable(unsigned long pfn,
unsigned long nr_pages)
{
- return 0;
+ return false;
}
static inline void try_offline_node(int nid) {}
extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
/* Check if a vma is migratable */
-static inline int vma_migratable(struct vm_area_struct *vma)
+static inline bool vma_migratable(struct vm_area_struct *vma)
{
if (vma->vm_flags & (VM_IO | VM_PFNMAP))
- return 0;
+ return false;
#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
if (vma->vm_flags & VM_HUGETLB)
- return 0;
+ return false;
#endif
/*
if (vma->vm_file &&
gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
< policy_zone)
- return 0;
- return 1;
+ return false;
+ return true;
}
extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
{
}
+static inline struct mempolicy *
+mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
+{
+ return NULL;
+}
+
#define vma_policy(vma) NULL
static inline int
#define _LINUX_MEMPOOL_H
#include <linux/wait.h>
+#include <linux/compiler.h>
struct kmem_cache;
extern int mempool_resize(mempool_t *pool, int new_min_nr);
extern void mempool_destroy(mempool_t *pool);
-extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask);
+extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc;
extern void mempool_free(void *element, mempool_t *pool);
/*
AXP221_ID,
AXP223_ID,
AXP288_ID,
+ AXP809_ID,
NR_AXP20X_VARIANTS,
};
AXP22X_REG_ID_MAX,
};
+enum {
+ AXP809_DCDC1 = 0,
+ AXP809_DCDC2,
+ AXP809_DCDC3,
+ AXP809_DCDC4,
+ AXP809_DCDC5,
+ AXP809_DC1SW,
+ AXP809_DC5LDO,
+ AXP809_ALDO1,
+ AXP809_ALDO2,
+ AXP809_ALDO3,
+ AXP809_ELDO1,
+ AXP809_ELDO2,
+ AXP809_ELDO3,
+ AXP809_DLDO1,
+ AXP809_DLDO2,
+ AXP809_RTC_LDO,
+ AXP809_LDO_IO0,
+ AXP809_LDO_IO1,
+ AXP809_SW,
+ AXP809_REG_ID_MAX,
+};
+
/* IRQs */
enum {
AXP152_IRQ_LDO0IN_CONNECT = 1,
AXP288_IRQ_BC_USB_CHNG,
};
+enum axp809_irqs {
+ AXP809_IRQ_ACIN_OVER_V = 1,
+ AXP809_IRQ_ACIN_PLUGIN,
+ AXP809_IRQ_ACIN_REMOVAL,
+ AXP809_IRQ_VBUS_OVER_V,
+ AXP809_IRQ_VBUS_PLUGIN,
+ AXP809_IRQ_VBUS_REMOVAL,
+ AXP809_IRQ_VBUS_V_LOW,
+ AXP809_IRQ_BATT_PLUGIN,
+ AXP809_IRQ_BATT_REMOVAL,
+ AXP809_IRQ_BATT_ENT_ACT_MODE,
+ AXP809_IRQ_BATT_EXIT_ACT_MODE,
+ AXP809_IRQ_CHARG,
+ AXP809_IRQ_CHARG_DONE,
+ AXP809_IRQ_BATT_CHG_TEMP_HIGH,
+ AXP809_IRQ_BATT_CHG_TEMP_HIGH_END,
+ AXP809_IRQ_BATT_CHG_TEMP_LOW,
+ AXP809_IRQ_BATT_CHG_TEMP_LOW_END,
+ AXP809_IRQ_BATT_ACT_TEMP_HIGH,
+ AXP809_IRQ_BATT_ACT_TEMP_HIGH_END,
+ AXP809_IRQ_BATT_ACT_TEMP_LOW,
+ AXP809_IRQ_BATT_ACT_TEMP_LOW_END,
+ AXP809_IRQ_DIE_TEMP_HIGH,
+ AXP809_IRQ_LOW_PWR_LVL1,
+ AXP809_IRQ_LOW_PWR_LVL2,
+ AXP809_IRQ_TIMER,
+ AXP809_IRQ_PEK_RIS_EDGE,
+ AXP809_IRQ_PEK_FAL_EDGE,
+ AXP809_IRQ_PEK_SHORT,
+ AXP809_IRQ_PEK_LONG,
+ AXP809_IRQ_PEK_OVER_OFF,
+ AXP809_IRQ_GPIO1_INPUT,
+ AXP809_IRQ_GPIO0_INPUT,
+};
+
#define AXP288_TS_ADC_H 0x58
#define AXP288_TS_ADC_L 0x59
#define AXP288_GP_ADC_H 0x5a
extern void mfd_remove_devices(struct device *parent);
+extern int devm_mfd_add_devices(struct device *dev, int id,
+ const struct mfd_cell *cells, int n_devs,
+ struct resource *mem_base,
+ int irq_base, struct irq_domain *irq_domain);
#endif
--- /dev/null
+/*
+ * Device driver for regulators in hi655x IC
+ *
+ * Copyright (c) 2016 Hisilicon.
+ *
+ * Authors:
+ * Chen Feng <puck.chen@hisilicon.com>
+ * Fei Wang <w.f@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __HI655X_PMIC_H
+#define __HI655X_PMIC_H
+
+/* Hi655x registers are mapped to memory bus in 4 bytes stride */
+#define HI655X_STRIDE 4
+#define HI655X_BUS_ADDR(x) ((x) << 2)
+
+#define HI655X_BITS 8
+
+#define HI655X_NR_IRQ 32
+
+#define HI655X_IRQ_STAT_BASE (0x003 << 2)
+#define HI655X_IRQ_MASK_BASE (0x007 << 2)
+#define HI655X_ANA_IRQM_BASE (0x1b5 << 2)
+#define HI655X_IRQ_ARRAY 4
+#define HI655X_IRQ_MASK 0xFF
+#define HI655X_IRQ_CLR 0xFF
+#define HI655X_VER_REG 0x00
+
+#define PMU_VER_START 0x10
+#define PMU_VER_END 0x38
+
+#define RESERVE_INT BIT(7)
+#define PWRON_D20R_INT BIT(6)
+#define PWRON_D20F_INT BIT(5)
+#define PWRON_D4SR_INT BIT(4)
+#define VSYS_6P0_D200UR_INT BIT(3)
+#define VSYS_UV_D3R_INT BIT(2)
+#define VSYS_2P5_R_INT BIT(1)
+#define OTMP_D1R_INT BIT(0)
+
+struct hi655x_pmic {
+ struct resource *res;
+ struct device *dev;
+ struct regmap *regmap;
+ int gpio;
+ unsigned int ver;
+ struct regmap_irq_chip_data *irq_data;
+};
+
+#endif
--- /dev/null
+/*
+ * Defining registers address and its bit definitions of MAX77620 and MAX20024
+ *
+ * Copyright (C) 2016 NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#ifndef _MFD_MAX77620_H_
+#define _MFD_MAX77620_H_
+
+#include <linux/types.h>
+
+/* GLOBAL, PMIC, GPIO, FPS, ONOFFC, CID Registers */
+#define MAX77620_REG_CNFGGLBL1 0x00
+#define MAX77620_REG_CNFGGLBL2 0x01
+#define MAX77620_REG_CNFGGLBL3 0x02
+#define MAX77620_REG_CNFG1_32K 0x03
+#define MAX77620_REG_CNFGBBC 0x04
+#define MAX77620_REG_IRQTOP 0x05
+#define MAX77620_REG_INTLBT 0x06
+#define MAX77620_REG_IRQSD 0x07
+#define MAX77620_REG_IRQ_LVL2_L0_7 0x08
+#define MAX77620_REG_IRQ_LVL2_L8 0x09
+#define MAX77620_REG_IRQ_LVL2_GPIO 0x0A
+#define MAX77620_REG_ONOFFIRQ 0x0B
+#define MAX77620_REG_NVERC 0x0C
+#define MAX77620_REG_IRQTOPM 0x0D
+#define MAX77620_REG_INTENLBT 0x0E
+#define MAX77620_REG_IRQMASKSD 0x0F
+#define MAX77620_REG_IRQ_MSK_L0_7 0x10
+#define MAX77620_REG_IRQ_MSK_L8 0x11
+#define MAX77620_REG_ONOFFIRQM 0x12
+#define MAX77620_REG_STATLBT 0x13
+#define MAX77620_REG_STATSD 0x14
+#define MAX77620_REG_ONOFFSTAT 0x15
+
+/* SD and LDO Registers */
+#define MAX77620_REG_SD0 0x16
+#define MAX77620_REG_SD1 0x17
+#define MAX77620_REG_SD2 0x18
+#define MAX77620_REG_SD3 0x19
+#define MAX77620_REG_SD4 0x1A
+#define MAX77620_REG_DVSSD0 0x1B
+#define MAX77620_REG_DVSSD1 0x1C
+#define MAX77620_REG_SD0_CFG 0x1D
+#define MAX77620_REG_SD1_CFG 0x1E
+#define MAX77620_REG_SD2_CFG 0x1F
+#define MAX77620_REG_SD3_CFG 0x20
+#define MAX77620_REG_SD4_CFG 0x21
+#define MAX77620_REG_SD_CFG2 0x22
+#define MAX77620_REG_LDO0_CFG 0x23
+#define MAX77620_REG_LDO0_CFG2 0x24
+#define MAX77620_REG_LDO1_CFG 0x25
+#define MAX77620_REG_LDO1_CFG2 0x26
+#define MAX77620_REG_LDO2_CFG 0x27
+#define MAX77620_REG_LDO2_CFG2 0x28
+#define MAX77620_REG_LDO3_CFG 0x29
+#define MAX77620_REG_LDO3_CFG2 0x2A
+#define MAX77620_REG_LDO4_CFG 0x2B
+#define MAX77620_REG_LDO4_CFG2 0x2C
+#define MAX77620_REG_LDO5_CFG 0x2D
+#define MAX77620_REG_LDO5_CFG2 0x2E
+#define MAX77620_REG_LDO6_CFG 0x2F
+#define MAX77620_REG_LDO6_CFG2 0x30
+#define MAX77620_REG_LDO7_CFG 0x31
+#define MAX77620_REG_LDO7_CFG2 0x32
+#define MAX77620_REG_LDO8_CFG 0x33
+#define MAX77620_REG_LDO8_CFG2 0x34
+#define MAX77620_REG_LDO_CFG3 0x35
+
+#define MAX77620_LDO_SLEW_RATE_MASK 0x1
+
+/* LDO Configuration 3 */
+#define MAX77620_TRACK4_MASK BIT(5)
+#define MAX77620_TRACK4_SHIFT 5
+
+/* Voltage */
+#define MAX77620_SDX_VOLT_MASK 0xFF
+#define MAX77620_SD0_VOLT_MASK 0x3F
+#define MAX77620_SD1_VOLT_MASK 0x7F
+#define MAX77620_LDO_VOLT_MASK 0x3F
+
+#define MAX77620_REG_GPIO0 0x36
+#define MAX77620_REG_GPIO1 0x37
+#define MAX77620_REG_GPIO2 0x38
+#define MAX77620_REG_GPIO3 0x39
+#define MAX77620_REG_GPIO4 0x3A
+#define MAX77620_REG_GPIO5 0x3B
+#define MAX77620_REG_GPIO6 0x3C
+#define MAX77620_REG_GPIO7 0x3D
+#define MAX77620_REG_PUE_GPIO 0x3E
+#define MAX77620_REG_PDE_GPIO 0x3F
+#define MAX77620_REG_AME_GPIO 0x40
+#define MAX77620_REG_ONOFFCNFG1 0x41
+#define MAX77620_REG_ONOFFCNFG2 0x42
+
+/* FPS Registers */
+#define MAX77620_REG_FPS_CFG0 0x43
+#define MAX77620_REG_FPS_CFG1 0x44
+#define MAX77620_REG_FPS_CFG2 0x45
+#define MAX77620_REG_FPS_LDO0 0x46
+#define MAX77620_REG_FPS_LDO1 0x47
+#define MAX77620_REG_FPS_LDO2 0x48
+#define MAX77620_REG_FPS_LDO3 0x49
+#define MAX77620_REG_FPS_LDO4 0x4A
+#define MAX77620_REG_FPS_LDO5 0x4B
+#define MAX77620_REG_FPS_LDO6 0x4C
+#define MAX77620_REG_FPS_LDO7 0x4D
+#define MAX77620_REG_FPS_LDO8 0x4E
+#define MAX77620_REG_FPS_SD0 0x4F
+#define MAX77620_REG_FPS_SD1 0x50
+#define MAX77620_REG_FPS_SD2 0x51
+#define MAX77620_REG_FPS_SD3 0x52
+#define MAX77620_REG_FPS_SD4 0x53
+#define MAX77620_REG_FPS_NONE 0
+
+#define MAX77620_FPS_SRC_MASK 0xC0
+#define MAX77620_FPS_SRC_SHIFT 6
+#define MAX77620_FPS_PU_PERIOD_MASK 0x38
+#define MAX77620_FPS_PU_PERIOD_SHIFT 3
+#define MAX77620_FPS_PD_PERIOD_MASK 0x07
+#define MAX77620_FPS_PD_PERIOD_SHIFT 0
+#define MAX77620_FPS_TIME_PERIOD_MASK 0x38
+#define MAX77620_FPS_TIME_PERIOD_SHIFT 3
+#define MAX77620_FPS_EN_SRC_MASK 0x06
+#define MAX77620_FPS_EN_SRC_SHIFT 1
+#define MAX77620_FPS_ENFPS_SW_MASK 0x01
+#define MAX77620_FPS_ENFPS_SW 0x01
+
+/* Minimum and maximum FPS period time (in microseconds) are
+ * different for MAX77620 and Max20024.
+ */
+#define MAX77620_FPS_PERIOD_MIN_US 40
+#define MAX20024_FPS_PERIOD_MIN_US 20
+
+#define MAX77620_FPS_PERIOD_MAX_US 2560
+#define MAX20024_FPS_PERIOD_MAX_US 5120
+
+#define MAX77620_REG_FPS_GPIO1 0x54
+#define MAX77620_REG_FPS_GPIO2 0x55
+#define MAX77620_REG_FPS_GPIO3 0x56
+#define MAX77620_REG_FPS_RSO 0x57
+#define MAX77620_REG_CID0 0x58
+#define MAX77620_REG_CID1 0x59
+#define MAX77620_REG_CID2 0x5A
+#define MAX77620_REG_CID3 0x5B
+#define MAX77620_REG_CID4 0x5C
+#define MAX77620_REG_CID5 0x5D
+
+#define MAX77620_REG_DVSSD4 0x5E
+#define MAX20024_REG_MAX_ADD 0x70
+
+#define MAX77620_CID_DIDM_MASK 0xF0
+#define MAX77620_CID_DIDM_SHIFT 4
+
+/* CNCG2SD */
+#define MAX77620_SD_CNF2_ROVS_EN_SD1 BIT(1)
+#define MAX77620_SD_CNF2_ROVS_EN_SD0 BIT(2)
+
+/* Device Identification Metal */
+#define MAX77620_CID5_DIDM(n) (((n) >> 4) & 0xF)
+/* Device Indentification OTP */
+#define MAX77620_CID5_DIDO(n) ((n) & 0xF)
+
+/* SD CNFG1 */
+#define MAX77620_SD_SR_MASK 0xC0
+#define MAX77620_SD_SR_SHIFT 6
+#define MAX77620_SD_POWER_MODE_MASK 0x30
+#define MAX77620_SD_POWER_MODE_SHIFT 4
+#define MAX77620_SD_CFG1_ADE_MASK BIT(3)
+#define MAX77620_SD_CFG1_ADE_DISABLE 0
+#define MAX77620_SD_CFG1_ADE_ENABLE BIT(3)
+#define MAX77620_SD_FPWM_MASK 0x04
+#define MAX77620_SD_FPWM_SHIFT 2
+#define MAX77620_SD_FSRADE_MASK 0x01
+#define MAX77620_SD_FSRADE_SHIFT 0
+#define MAX77620_SD_CFG1_FPWM_SD_MASK BIT(2)
+#define MAX77620_SD_CFG1_FPWM_SD_SKIP 0
+#define MAX77620_SD_CFG1_FPWM_SD_FPWM BIT(2)
+#define MAX77620_SD_CFG1_FSRADE_SD_MASK BIT(0)
+#define MAX77620_SD_CFG1_FSRADE_SD_DISABLE 0
+#define MAX77620_SD_CFG1_FSRADE_SD_ENABLE BIT(0)
+
+/* LDO_CNFG2 */
+#define MAX77620_LDO_POWER_MODE_MASK 0xC0
+#define MAX77620_LDO_POWER_MODE_SHIFT 6
+#define MAX77620_LDO_CFG2_ADE_MASK BIT(1)
+#define MAX77620_LDO_CFG2_ADE_DISABLE 0
+#define MAX77620_LDO_CFG2_ADE_ENABLE BIT(1)
+#define MAX77620_LDO_CFG2_SS_MASK BIT(0)
+#define MAX77620_LDO_CFG2_SS_FAST BIT(0)
+#define MAX77620_LDO_CFG2_SS_SLOW 0
+
+#define MAX77620_IRQ_TOP_GLBL_MASK BIT(7)
+#define MAX77620_IRQ_TOP_SD_MASK BIT(6)
+#define MAX77620_IRQ_TOP_LDO_MASK BIT(5)
+#define MAX77620_IRQ_TOP_GPIO_MASK BIT(4)
+#define MAX77620_IRQ_TOP_RTC_MASK BIT(3)
+#define MAX77620_IRQ_TOP_32K_MASK BIT(2)
+#define MAX77620_IRQ_TOP_ONOFF_MASK BIT(1)
+
+#define MAX77620_IRQ_LBM_MASK BIT(3)
+#define MAX77620_IRQ_TJALRM1_MASK BIT(2)
+#define MAX77620_IRQ_TJALRM2_MASK BIT(1)
+
+#define MAX77620_PWR_I2C_ADDR 0x3c
+#define MAX77620_RTC_I2C_ADDR 0x68
+
+#define MAX77620_CNFG_GPIO_DRV_MASK BIT(0)
+#define MAX77620_CNFG_GPIO_DRV_PUSHPULL BIT(0)
+#define MAX77620_CNFG_GPIO_DRV_OPENDRAIN 0
+#define MAX77620_CNFG_GPIO_DIR_MASK BIT(1)
+#define MAX77620_CNFG_GPIO_DIR_INPUT BIT(1)
+#define MAX77620_CNFG_GPIO_DIR_OUTPUT 0
+#define MAX77620_CNFG_GPIO_INPUT_VAL_MASK BIT(2)
+#define MAX77620_CNFG_GPIO_OUTPUT_VAL_MASK BIT(3)
+#define MAX77620_CNFG_GPIO_OUTPUT_VAL_HIGH BIT(3)
+#define MAX77620_CNFG_GPIO_OUTPUT_VAL_LOW 0
+#define MAX77620_CNFG_GPIO_INT_MASK (0x3 << 4)
+#define MAX77620_CNFG_GPIO_INT_FALLING BIT(4)
+#define MAX77620_CNFG_GPIO_INT_RISING BIT(5)
+#define MAX77620_CNFG_GPIO_DBNC_MASK (0x3 << 6)
+#define MAX77620_CNFG_GPIO_DBNC_None (0x0 << 6)
+#define MAX77620_CNFG_GPIO_DBNC_8ms (0x1 << 6)
+#define MAX77620_CNFG_GPIO_DBNC_16ms (0x2 << 6)
+#define MAX77620_CNFG_GPIO_DBNC_32ms (0x3 << 6)
+
+#define MAX77620_IRQ_LVL2_GPIO_EDGE0 BIT(0)
+#define MAX77620_IRQ_LVL2_GPIO_EDGE1 BIT(1)
+#define MAX77620_IRQ_LVL2_GPIO_EDGE2 BIT(2)
+#define MAX77620_IRQ_LVL2_GPIO_EDGE3 BIT(3)
+#define MAX77620_IRQ_LVL2_GPIO_EDGE4 BIT(4)
+#define MAX77620_IRQ_LVL2_GPIO_EDGE5 BIT(5)
+#define MAX77620_IRQ_LVL2_GPIO_EDGE6 BIT(6)
+#define MAX77620_IRQ_LVL2_GPIO_EDGE7 BIT(7)
+
+#define MAX77620_CNFG1_32K_OUT0_EN BIT(2)
+
+#define MAX77620_ONOFFCNFG1_SFT_RST BIT(7)
+#define MAX77620_ONOFFCNFG1_MRT_MASK 0x38
+#define MAX77620_ONOFFCNFG1_MRT_SHIFT 0x3
+#define MAX77620_ONOFFCNFG1_SLPEN BIT(2)
+#define MAX77620_ONOFFCNFG1_PWR_OFF BIT(1)
+#define MAX20024_ONOFFCNFG1_CLRSE 0x18
+
+#define MAX77620_ONOFFCNFG2_SFT_RST_WK BIT(7)
+#define MAX77620_ONOFFCNFG2_WD_RST_WK BIT(6)
+#define MAX77620_ONOFFCNFG2_SLP_LPM_MSK BIT(5)
+#define MAX77620_ONOFFCNFG2_WK_ALARM1 BIT(2)
+#define MAX77620_ONOFFCNFG2_WK_EN0 BIT(0)
+
+#define MAX77620_GLBLM_MASK BIT(0)
+
+#define MAX77620_WDTC_MASK 0x3
+#define MAX77620_WDTOFFC BIT(4)
+#define MAX77620_WDTSLPC BIT(3)
+#define MAX77620_WDTEN BIT(2)
+
+#define MAX77620_TWD_MASK 0x3
+#define MAX77620_TWD_2s 0x0
+#define MAX77620_TWD_16s 0x1
+#define MAX77620_TWD_64s 0x2
+#define MAX77620_TWD_128s 0x3
+
+#define MAX77620_CNFGGLBL1_LBDAC_EN BIT(7)
+#define MAX77620_CNFGGLBL1_MPPLD BIT(6)
+#define MAX77620_CNFGGLBL1_LBHYST (BIT(5) | BIT(4))
+#define MAX77620_CNFGGLBL1_LBDAC 0x0E
+#define MAX77620_CNFGGLBL1_LBRSTEN BIT(0)
+
+/* CNFG BBC registers */
+#define MAX77620_CNFGBBC_ENABLE BIT(0)
+#define MAX77620_CNFGBBC_CURRENT_MASK 0x06
+#define MAX77620_CNFGBBC_CURRENT_SHIFT 1
+#define MAX77620_CNFGBBC_VOLTAGE_MASK 0x18
+#define MAX77620_CNFGBBC_VOLTAGE_SHIFT 3
+#define MAX77620_CNFGBBC_LOW_CURRENT_DISABLE BIT(5)
+#define MAX77620_CNFGBBC_RESISTOR_MASK 0xC0
+#define MAX77620_CNFGBBC_RESISTOR_SHIFT 6
+
+#define MAX77620_FPS_COUNT 3
+
+/* Interrupts */
+enum {
+ MAX77620_IRQ_TOP_GLBL, /* Low-Battery */
+ MAX77620_IRQ_TOP_SD, /* SD power fail */
+ MAX77620_IRQ_TOP_LDO, /* LDO power fail */
+ MAX77620_IRQ_TOP_GPIO, /* TOP GPIO internal int to MAX77620 */
+ MAX77620_IRQ_TOP_RTC, /* RTC */
+ MAX77620_IRQ_TOP_32K, /* 32kHz oscillator */
+ MAX77620_IRQ_TOP_ONOFF, /* ON/OFF oscillator */
+ MAX77620_IRQ_LBT_MBATLOW, /* Thermal alarm status, > 120C */
+ MAX77620_IRQ_LBT_TJALRM1, /* Thermal alarm status, > 120C */
+ MAX77620_IRQ_LBT_TJALRM2, /* Thermal alarm status, > 140C */
+};
+
+/* GPIOs */
+enum {
+ MAX77620_GPIO0,
+ MAX77620_GPIO1,
+ MAX77620_GPIO2,
+ MAX77620_GPIO3,
+ MAX77620_GPIO4,
+ MAX77620_GPIO5,
+ MAX77620_GPIO6,
+ MAX77620_GPIO7,
+ MAX77620_GPIO_NR,
+};
+
+/* FPS Source */
+enum max77620_fps_src {
+ MAX77620_FPS_SRC_0,
+ MAX77620_FPS_SRC_1,
+ MAX77620_FPS_SRC_2,
+ MAX77620_FPS_SRC_NONE,
+ MAX77620_FPS_SRC_DEF,
+};
+
+enum max77620_chip_id {
+ MAX77620,
+ MAX20024,
+};
+
+struct max77620_chip {
+ struct device *dev;
+ struct regmap *rmap;
+
+ int chip_irq;
+ int irq_base;
+
+ /* chip id */
+ enum max77620_chip_id chip_id;
+
+ bool sleep_enable;
+ bool enable_global_lpm;
+ int shutdown_fps_period[MAX77620_FPS_COUNT];
+ int suspend_fps_period[MAX77620_FPS_COUNT];
+
+ struct regmap_irq_chip_data *top_irq_data;
+ struct regmap_irq_chip_data *gpio_irq_data;
+};
+
+#endif /* _MFD_MAX77620_H_ */
#define __LINUX_MFD_SYSCON_H__
#include <linux/err.h>
+#include <linux/errno.h>
struct device_node;
#define WM8400_LINE_CMP_VTHD_SHIFT 0 /* LINE_CMP_VTHD - [3:0] */
#define WM8400_LINE_CMP_VTHD_WIDTH 4 /* LINE_CMP_VTHD - [3:0] */
-u16 wm8400_reg_read(struct wm8400 *wm8400, u8 reg);
int wm8400_block_read(struct wm8400 *wm8400, u8 reg, int count, u16 *data);
static inline int wm8400_set_bits(struct wm8400 *wm8400, u8 reg,
unsigned arm_sn;
struct mlx5_rsc_debug *dbg;
int pid;
+ struct {
+ struct list_head list;
+ void (*comp)(struct mlx5_core_cq *);
+ void *priv;
+ } tasklet_ctx;
};
#include <linux/vmalloc.h>
#include <linux/radix-tree.h>
#include <linux/workqueue.h>
+#include <linux/interrupt.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/doorbell.h>
u8 page_shift;
};
+struct mlx5_eq_tasklet {
+ struct list_head list;
+ struct list_head process_list;
+ struct tasklet_struct task;
+ /* lock on completion tasklet list */
+ spinlock_t lock;
+};
+
struct mlx5_eq {
struct mlx5_core_dev *dev;
__be32 __iomem *doorbell;
struct list_head list;
int index;
struct mlx5_rsc_debug *dbg;
+ struct mlx5_eq_tasklet tasklet_ctx;
};
struct mlx5_core_psv {
* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
* is no special casing required.
*/
-static inline int is_vmalloc_addr(const void *x)
+static inline bool is_vmalloc_addr(const void *x)
{
#ifdef CONFIG_MMU
unsigned long addr = (unsigned long)x;
return addr >= VMALLOC_START && addr < VMALLOC_END;
#else
- return 0;
+ return false;
#endif
}
#ifdef CONFIG_MMU
page = compound_head(page);
/*
* Getting a normal page or the head of a compound page
- * requires to already have an elevated page->_count.
+ * requires to already have an elevated page->_refcount.
*/
VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
page_ref_inc(page);
static inline void page_cpupid_reset_last(struct page *page)
{
- int cpupid = (1 << LAST_CPUPID_SHIFT) - 1;
-
- page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);
- page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;
+ page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT;
}
#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
#else /* !CONFIG_NUMA_BALANCING */
return page->index;
}
-/*
- * Return true if this page is mapped into pagetables.
- * For compound page it returns true if any subpage of compound page is mapped.
- */
-static inline bool page_mapped(struct page *page)
-{
- int i;
- if (likely(!PageCompound(page)))
- return atomic_read(&page->_mapcount) >= 0;
- page = compound_head(page);
- if (atomic_read(compound_mapcount_ptr(page)) >= 0)
- return true;
- if (PageHuge(page))
- return false;
- for (i = 0; i < hpage_nr_pages(page); i++) {
- if (atomic_read(&page[i]._mapcount) >= 0)
- return true;
- }
- return false;
-}
+bool page_mapped(struct page *page);
/*
* Return true only if the page has been allocated with
return !PageSwapBacked(page);
}
+static __always_inline void __update_lru_size(struct lruvec *lruvec,
+ enum lru_list lru, int nr_pages)
+{
+ __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages);
+}
+
+static __always_inline void update_lru_size(struct lruvec *lruvec,
+ enum lru_list lru, int nr_pages)
+{
+#ifdef CONFIG_MEMCG
+ mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
+#else
+ __update_lru_size(lruvec, lru, nr_pages);
+#endif
+}
+
static __always_inline void add_page_to_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
- int nr_pages = hpage_nr_pages(page);
- mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
+ update_lru_size(lruvec, lru, hpage_nr_pages(page));
list_add(&page->lru, &lruvec->lists[lru]);
- __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages);
}
static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
- int nr_pages = hpage_nr_pages(page);
- mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
list_del(&page->lru);
- __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, -nr_pages);
+ update_lru_size(lruvec, lru, -hpage_nr_pages(page));
}
/**
unsigned long counters;
#else
/*
- * Keep _count separate from slub cmpxchg_double data.
- * As the rest of the double word is protected by
- * slab_lock but _count is not.
+ * Keep _refcount separate from slub cmpxchg_double
+ * data. As the rest of the double word is protected by
+ * slab_lock but _refcount is not.
*/
unsigned counters;
#endif
};
int units; /* SLOB */
};
- atomic_t _count; /* Usage count, see below. */
+ /*
+ * Usage count, *USE WRAPPER FUNCTION*
+ * when manual accounting. See page_ref.h
+ */
+ atomic_t _refcount;
};
unsigned int active; /* SLAB */
};
__u32 offset;
#endif
/* we maintain a pagecount bias, so that we dont dirty cache line
- * containing page->_count every time we allocate a fragment.
+ * containing page->_refcount every time we allocate a fragment.
*/
unsigned int pagecnt_bias;
bool pfmemalloc;
get_pfnblock_flags_mask(page, page_to_pfn(page), \
PB_migrate_end, MIGRATETYPE_MASK)
-static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn)
-{
- BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2);
- return get_pfnblock_flags_mask(page, pfn, PB_migrate_end,
- MIGRATETYPE_MASK);
-}
-
struct free_area {
struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free;
void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
bool zone_watermark_ok(struct zone *z, unsigned int order,
- unsigned long mark, int classzone_idx, int alloc_flags);
+ unsigned long mark, int classzone_idx,
+ unsigned int alloc_flags);
bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx);
enum memmap_context {
static inline int is_highmem(struct zone *zone)
{
#ifdef CONFIG_HIGHMEM
- int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones;
- return zone_off == ZONE_HIGHMEM * sizeof(*zone) ||
- (zone_off == ZONE_MOVABLE * sizeof(*zone) &&
- zone_movable_is_highmem());
+ return is_highmem_idx(zone_idx(zone));
#else
return 0;
#endif
#endif /* CONFIG_NUMA */
}
+struct zoneref *__next_zones_zonelist(struct zoneref *z,
+ enum zone_type highest_zoneidx,
+ nodemask_t *nodes);
+
/**
* next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
* @z - The cursor used as a starting point for the search
* being examined. It should be advanced by one before calling
* next_zones_zonelist again.
*/
-struct zoneref *next_zones_zonelist(struct zoneref *z,
+static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
enum zone_type highest_zoneidx,
- nodemask_t *nodes);
+ nodemask_t *nodes)
+{
+ if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx))
+ return z;
+ return __next_zones_zonelist(z, highest_zoneidx, nodes);
+}
/**
* first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
*/
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
enum zone_type highest_zoneidx,
- nodemask_t *nodes,
- struct zone **zone)
+ nodemask_t *nodes)
{
- struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs,
+ return next_zones_zonelist(zonelist->_zonerefs,
highest_zoneidx, nodes);
- *zone = zonelist_zone(z);
- return z;
}
/**
* within a given nodemask
*/
#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
- for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \
+ for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z); \
+ zone; \
+ z = next_zones_zonelist(++z, highidx, nodemask), \
+ zone = zonelist_zone(z))
+
+#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
+ for (zone = z->zone; \
zone; \
z = next_zones_zonelist(++z, highidx, nodemask), \
- zone = zonelist_zone(z)) \
+ zone = zonelist_zone(z))
+
/**
* for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
*
* int first_node(mask) Number lowest set bit, or MAX_NUMNODES
* int next_node(node, mask) Next node past 'node', or MAX_NUMNODES
+ * int next_node_in(node, mask) Next node past 'node', or wrap to first,
+ * or MAX_NUMNODES
* int first_unset_node(mask) First node not set in mask, or
- * MAX_NUMNODES.
+ * MAX_NUMNODES
*
* nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set
* NODE_MASK_ALL Initializer - all bits set
return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
}
+/*
+ * Find the next present node in src, starting after node n, wrapping around to
+ * the first node in src if needed. Returns MAX_NUMNODES if src is empty.
+ */
+#define next_node_in(n, src) __next_node_in((n), &(src))
+int __next_node_in(int node, const nodemask_t *srcp);
+
static inline void init_nodemask_of_node(nodemask_t *mask, int node)
{
nodes_clear(*mask);
uint32_t args[MAX_PHANDLE_ARGS];
};
+struct of_phandle_iterator {
+ /* Common iterator information */
+ const char *cells_name;
+ int cell_count;
+ const struct device_node *parent;
+
+ /* List size information */
+ const __be32 *list_end;
+ const __be32 *phandle_end;
+
+ /* Current position state */
+ const __be32 *cur;
+ uint32_t cur_count;
+ phandle phandle;
+ struct device_node *node;
+};
+
struct of_reconfig_data {
struct device_node *dn;
struct property *prop;
extern int of_count_phandle_with_args(const struct device_node *np,
const char *list_name, const char *cells_name);
+/* phandle iterator functions */
+extern int of_phandle_iterator_init(struct of_phandle_iterator *it,
+ const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int cell_count);
+
+extern int of_phandle_iterator_next(struct of_phandle_iterator *it);
+extern int of_phandle_iterator_args(struct of_phandle_iterator *it,
+ uint32_t *args,
+ int size);
+
extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
extern int of_alias_get_id(struct device_node *np, const char *stem);
extern int of_alias_get_highest_id(const char *stem);
return -ENOSYS;
}
+static inline int of_phandle_iterator_init(struct of_phandle_iterator *it,
+ const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int cell_count)
+{
+ return -ENOSYS;
+}
+
+static inline int of_phandle_iterator_next(struct of_phandle_iterator *it)
+{
+ return -ENOSYS;
+}
+
+static inline int of_phandle_iterator_args(struct of_phandle_iterator *it,
+ uint32_t *args,
+ int size)
+{
+ return 0;
+}
+
static inline int of_alias_get_id(struct device_node *np, const char *stem)
{
return -ENOSYS;
return of_property_read_u32(np, propname, (u32*) out_value);
}
+#define of_for_each_phandle(it, err, np, ln, cn, cc) \
+ for (of_phandle_iterator_init((it), (np), (ln), (cn), (cc)), \
+ err = of_phandle_iterator_next(it); \
+ err == 0; \
+ err = of_phandle_iterator_next(it))
+
#define of_property_for_each_u32(np, propname, prop, p, u) \
for (prop = of_find_property(np, propname, NULL), \
p = of_prop_next_u32(prop, NULL, &u); \
unsigned long node);
extern int of_fdt_match(const void *blob, unsigned long node,
const char *const *compat);
-extern void of_fdt_unflatten_tree(const unsigned long *blob,
- struct device_node **mynodes);
+extern void *of_fdt_unflatten_tree(const unsigned long *blob,
+ struct device_node *dad,
+ struct device_node **mynodes);
/* TBD: Temporary export of fdt globals - remove when code fully merged */
extern int __initdata dt_root_addr_cells;
#define __LINUX_OF_GRAPH_H
#include <linux/types.h>
+#include <linux/errno.h>
/**
* struct of_endpoint - the OF graph endpoint data structure
extern void mark_oom_victim(struct task_struct *tsk);
+#ifdef CONFIG_MMU
+extern void try_oom_reaper(struct task_struct *tsk);
+#else
+static inline void try_oom_reaper(struct task_struct *tsk)
+{
+}
+#endif
+
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
extern void padata_do_serial(struct padata_priv *padata);
extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
cpumask_var_t cpumask);
-extern int padata_set_cpumasks(struct padata_instance *pinst,
- cpumask_var_t pcpumask,
- cpumask_var_t cbcpumask);
-extern int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask);
-extern int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask);
extern int padata_start(struct padata_instance *pinst);
extern void padata_stop(struct padata_instance *pinst);
extern int padata_register_cpumask_notifier(struct padata_instance *pinst,
#define PAGE_MAPPING_KSM 2
#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)
+static __always_inline int PageAnonHead(struct page *page)
+{
+ return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
+}
+
static __always_inline int PageAnon(struct page *page)
{
page = compound_head(page);
- return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
+ return PageAnonHead(page);
}
#ifdef CONFIG_KSM
static inline int page_ref_count(struct page *page)
{
- return atomic_read(&page->_count);
+ return atomic_read(&page->_refcount);
}
static inline int page_count(struct page *page)
{
- return atomic_read(&compound_head(page)->_count);
+ return atomic_read(&compound_head(page)->_refcount);
}
static inline void set_page_count(struct page *page, int v)
{
- atomic_set(&page->_count, v);
+ atomic_set(&page->_refcount, v);
if (page_ref_tracepoint_active(__tracepoint_page_ref_set))
__page_ref_set(page, v);
}
static inline void page_ref_add(struct page *page, int nr)
{
- atomic_add(nr, &page->_count);
+ atomic_add(nr, &page->_refcount);
if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
__page_ref_mod(page, nr);
}
static inline void page_ref_sub(struct page *page, int nr)
{
- atomic_sub(nr, &page->_count);
+ atomic_sub(nr, &page->_refcount);
if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
__page_ref_mod(page, -nr);
}
static inline void page_ref_inc(struct page *page)
{
- atomic_inc(&page->_count);
+ atomic_inc(&page->_refcount);
if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
__page_ref_mod(page, 1);
}
static inline void page_ref_dec(struct page *page)
{
- atomic_dec(&page->_count);
+ atomic_dec(&page->_refcount);
if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
__page_ref_mod(page, -1);
}
static inline int page_ref_sub_and_test(struct page *page, int nr)
{
- int ret = atomic_sub_and_test(nr, &page->_count);
+ int ret = atomic_sub_and_test(nr, &page->_refcount);
if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test))
__page_ref_mod_and_test(page, -nr, ret);
static inline int page_ref_dec_and_test(struct page *page)
{
- int ret = atomic_dec_and_test(&page->_count);
+ int ret = atomic_dec_and_test(&page->_refcount);
if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test))
__page_ref_mod_and_test(page, -1, ret);
static inline int page_ref_dec_return(struct page *page)
{
- int ret = atomic_dec_return(&page->_count);
+ int ret = atomic_dec_return(&page->_refcount);
if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return))
__page_ref_mod_and_return(page, -1, ret);
static inline int page_ref_add_unless(struct page *page, int nr, int u)
{
- int ret = atomic_add_unless(&page->_count, nr, u);
+ int ret = atomic_add_unless(&page->_refcount, nr, u);
if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_unless))
__page_ref_mod_unless(page, nr, ret);
static inline int page_ref_freeze(struct page *page, int count)
{
- int ret = likely(atomic_cmpxchg(&page->_count, count, 0) == count);
+ int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
if (page_ref_tracepoint_active(__tracepoint_page_ref_freeze))
__page_ref_freeze(page, count, ret);
VM_BUG_ON_PAGE(page_count(page) != 0, page);
VM_BUG_ON(count == 0);
- atomic_set(&page->_count, count);
+ atomic_set(&page->_refcount, count);
if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze))
__page_ref_unfreeze(page, count);
}
/*
* speculatively take a reference to a page.
- * If the page is free (_count == 0), then _count is untouched, and 0
- * is returned. Otherwise, _count is incremented by 1 and 1 is returned.
+ * If the page is free (_refcount == 0), then _refcount is untouched, and 0
+ * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned.
*
* This function must be called inside the same rcu_read_lock() section as has
* been used to lookup the page in the pagecache radix-tree (or page table):
- * this allows allocators to use a synchronize_rcu() to stabilize _count.
+ * this allows allocators to use a synchronize_rcu() to stabilize _refcount.
*
* Unless an RCU grace period has passed, the count of all pages coming out
* of the allocator must be considered unstable. page_count may return higher
* 2. conditionally increment refcount
* 3. check the page is still in pagecache (if no, goto 1)
*
- * Remove-side that cares about stability of _count (eg. reclaim) has the
+ * Remove-side that cares about stability of _refcount (eg. reclaim) has the
* following (with tree_lock held for write):
* A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
* B. remove page from pagecache
extern void poll_freewait(struct poll_wqueues *pwq);
extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
ktime_t *expires, unsigned long slack);
-extern u64 select_estimate_accuracy(struct timespec *tv);
+extern u64 select_estimate_accuracy(struct timespec64 *tv);
static inline int poll_schedule(struct poll_wqueues *pwq, int state)
#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
-extern int do_select(int n, fd_set_bits *fds, struct timespec *end_time);
+extern int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time);
extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds,
- struct timespec *end_time);
+ struct timespec64 *end_time);
extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
- fd_set __user *exp, struct timespec *end_time);
+ fd_set __user *exp, struct timespec64 *end_time);
-extern int poll_select_set_timeout(struct timespec *to, long sec, long nsec);
+extern int poll_select_set_timeout(struct timespec64 *to, time64_t sec,
+ long nsec);
#endif /* _LINUX_POLL_H */
}
#endif /* !CONFIG_SLOB */
-void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment;
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment;
+void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
+void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *, void *);
/*
}
#ifdef CONFIG_NUMA
-void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
-void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment;
+void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc;
+void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc;
#else
static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
#endif
#ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment;
+extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment __malloc;
#ifdef CONFIG_NUMA
extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
gfp_t gfpflags,
- int node, size_t size) __assume_slab_alignment;
+ int node, size_t size) __assume_slab_alignment __malloc;
#else
static __always_inline void *
kmem_cache_alloc_node_trace(struct kmem_cache *s,
}
#endif /* CONFIG_TRACING */
-extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc;
#ifdef CONFIG_TRACING
-extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
+extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc;
#else
static __always_inline void *
kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
struct kasan_cache kasan_info;
#endif
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+ void *random_seq;
+#endif
+
struct kmem_cache_node *node[MAX_NUMNODES];
};
extern void kfree_const(const void *x);
-extern char *kstrdup(const char *s, gfp_t gfp);
+extern char *kstrdup(const char *s, gfp_t gfp) __malloc;
extern const char *kstrdup_const(const char *s, gfp_t gfp);
extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
# define timespec64_equal timespec_equal
# define timespec64_compare timespec_compare
# define set_normalized_timespec64 set_normalized_timespec
-# define timespec64_add_safe timespec_add_safe
# define timespec64_add timespec_add
# define timespec64_sub timespec_sub
# define timespec64_valid timespec_valid
extern void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec);
-/*
- * timespec64_add_safe assumes both values are positive and checks for
- * overflow. It will return TIME_T_MAX if the returned value would be
- * smaller then either of the arguments.
- */
-extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
- const struct timespec64 rhs);
-
-
static inline struct timespec64 timespec64_add(struct timespec64 lhs,
struct timespec64 rhs)
{
#endif
+/*
+ * timespec64_add_safe assumes both values are positive and checks for
+ * overflow. It will return TIME64_MAX in case of overflow.
+ */
+extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
+ const struct timespec64 rhs);
+
#endif /* _LINUX_TIME64_H */
#ifdef CONFIG_NUMA
extern unsigned long node_page_state(int node, enum zone_stat_item item);
-extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
#else
#define node_page_state(node, item) global_page_state(item)
-#define zone_statistics(_zl, _z, gfp) do { } while (0)
#endif /* CONFIG_NUMA */
void cpu_vm_stats_fold(int cpu);
void refresh_zone_stat_thresholds(void);
+struct ctl_table;
+int vmstat_refresh(struct ctl_table *, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+
void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
int calculate_pressure_threshold(struct zone *zone);
*/
void cxl_set_master(struct cxl_context *ctx);
+/*
+ * Sets the context to use real mode memory accesses to operate with
+ * translation disabled. Note that this only makes sense for kernel contexts
+ * under bare metal, and will not work with virtualisation. May only be
+ * performed on stopped contexts.
+ */
+int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode);
+
/*
* Map and unmap the AFU Problem Space area. The amount and location mapped
* depends on if this context is a master or slave.
IB_DEVICE_ON_DEMAND_PAGING = (1 << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33),
+ IB_DEVICE_RAW_SCATTER_FCS = ((u64)1 << 34),
};
enum ib_signature_prot_cap {
u32 max_send_sge;
u32 max_recv_sge;
u32 max_inline_data;
+
+ /*
+ * Maximum number of rdma_rw_ctx structures in flight at a time.
+ * ib_create_qp() will calculate the right amount of neededed WRs
+ * and MRs based on this.
+ */
+ u32 max_rdma_ctxs;
};
enum ib_sig_type {
IB_QP_CREATE_NETIF_QP = 1 << 5,
IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
+ IB_QP_CREATE_SCATTER_FCS = 1 << 8,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
enum ib_qp_create_flags create_flags;
- u8 port_num; /* special QP types only */
+
+ /*
+ * Only needed for special QP types, or when using the RW API.
+ */
+ u8 port_num;
};
struct ib_qp_open_attr {
struct ib_pd *pd;
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
+ spinlock_t mr_lock;
+ int mrs_used;
+ struct list_head rdma_mrs;
+ struct list_head sig_mrs;
struct ib_srq *srq;
struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct list_head xrcd_list;
+
/* count times opened, mcast attaches, flow attaches */
atomic_t usecnt;
struct list_head open_list;
struct ib_mr {
struct ib_device *device;
struct ib_pd *pd;
- struct ib_uobject *uobject;
u32 lkey;
u32 rkey;
u64 iova;
u32 length;
unsigned int page_size;
+ bool need_inval;
+ union {
+ struct ib_uobject *uobject; /* user */
+ struct list_head qp_entry; /* FR */
+ };
};
struct ib_mw {
u32 max_num_sg);
int (*map_mr_sg)(struct ib_mr *mr,
struct scatterlist *sg,
- int sg_nents);
+ int sg_nents,
+ unsigned int *sg_offset);
struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
enum ib_mw_type type,
struct ib_udata *udata);
device->add_gid && device->del_gid;
}
+/*
+ * Check if the device supports READ W/ INVALIDATE.
+ */
+static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
+{
+ /*
+ * iWarp drivers must support READ W/ INVALIDATE. No other protocol
+ * has support for it yet.
+ */
+ return rdma_protocol_iwarp(dev, port_num);
+}
+
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid,
struct ib_gid_attr *attr);
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);
-int ib_map_mr_sg(struct ib_mr *mr,
- struct scatterlist *sg,
- int sg_nents,
- unsigned int page_size);
+int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset, unsigned int page_size);
static inline int
-ib_map_mr_sg_zbva(struct ib_mr *mr,
- struct scatterlist *sg,
- int sg_nents,
- unsigned int page_size)
+ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset, unsigned int page_size)
{
int n;
- n = ib_map_mr_sg(mr, sg, sg_nents, page_size);
+ n = ib_map_mr_sg(mr, sg, sg_nents, sg_offset, page_size);
mr->iova = 0;
return n;
}
-int ib_sg_to_pages(struct ib_mr *mr,
- struct scatterlist *sgl,
- int sg_nents,
- int (*set_page)(struct ib_mr *, u64));
+int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
+ unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64));
void ib_drain_rq(struct ib_qp *qp);
void ib_drain_sq(struct ib_qp *qp);
--- /dev/null
+/*
+ * Copyright (c) 2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#ifndef _RDMA_MR_POOL_H
+#define _RDMA_MR_POOL_H 1
+
+#include <rdma/ib_verbs.h>
+
+struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list);
+void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr);
+
+int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
+ enum ib_mr_type type, u32 max_num_sg);
+void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list);
+
+#endif /* _RDMA_MR_POOL_H */
}
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
+void rvt_dealloc_device(struct rvt_dev_info *rdi);
int rvt_register_device(struct rvt_dev_info *rvd);
void rvt_unregister_device(struct rvt_dev_info *rvd);
int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
/*
* Wait flags that would prevent any packet type from being sent.
*/
-#define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \
- RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
+#define RVT_S_ANY_WAIT_IO \
+ (RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \
+ RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
/*
* Wait flags that would prevent send work requests from making progress.
--- /dev/null
+/*
+ * Copyright (c) 2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#ifndef _RDMA_RW_H
+#define _RDMA_RW_H
+
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/mr_pool.h>
+
+struct rdma_rw_ctx {
+ /* number of RDMA READ/WRITE WRs (not counting MR WRs) */
+ u32 nr_ops;
+
+ /* tag for the union below: */
+ u8 type;
+
+ union {
+ /* for mapping a single SGE: */
+ struct {
+ struct ib_sge sge;
+ struct ib_rdma_wr wr;
+ } single;
+
+ /* for mapping of multiple SGEs: */
+ struct {
+ struct ib_sge *sges;
+ struct ib_rdma_wr *wrs;
+ } map;
+
+ /* for registering multiple WRs: */
+ struct rdma_rw_reg_ctx {
+ struct ib_sge sge;
+ struct ib_rdma_wr wr;
+ struct ib_reg_wr reg_wr;
+ struct ib_send_wr inv_wr;
+ struct ib_mr *mr;
+ } *reg;
+
+ struct {
+ struct rdma_rw_reg_ctx data;
+ struct rdma_rw_reg_ctx prot;
+ struct ib_send_wr sig_inv_wr;
+ struct ib_mr *sig_mr;
+ struct ib_sge sig_sge;
+ struct ib_sig_handover_wr sig_wr;
+ } *sig;
+ };
+};
+
+int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir);
+void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ struct scatterlist *sg, u32 sg_cnt,
+ enum dma_data_direction dir);
+
+int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ struct scatterlist *prot_sg, u32 prot_sg_cnt,
+ struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey,
+ enum dma_data_direction dir);
+void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ struct scatterlist *prot_sg, u32 prot_sg_cnt,
+ enum dma_data_direction dir);
+
+struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
+int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
+
+void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
+int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
+void rdma_rw_cleanup_mrs(struct ib_qp *qp);
+
+#endif /* _RDMA_RW_H */
void *transport_kmap_data_sg(struct se_cmd *);
void transport_kunmap_data_sg(struct se_cmd *);
/* core helpers also used by xcopy during internal command setup */
-int target_alloc_sgl(struct scatterlist **, unsigned int *, u32, bool);
sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *,
struct scatterlist *, u32, struct scatterlist *, u32);
int core_tpg_register(struct se_wwn *, struct se_portal_group *, int);
int core_tpg_deregister(struct se_portal_group *);
+int target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents,
+ u32 length, bool zero_page, bool chainable);
+void target_free_sgl(struct scatterlist *sgl, int nents);
+
/*
* The LIO target core uses DMA_TO_DEVICE to mean that data is going
* to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean
struct ib_uverbs_odp_caps odp_caps;
__u64 timestamp_mask;
__u64 hca_core_clock; /* in KHZ */
+ __u64 device_cap_flags_ex;
};
struct ib_uverbs_query_port {
+++ /dev/null
-/*
- * Public SH-mobile MIPI DSI header
- *
- * Copyright (C) 2010 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef VIDEO_SH_MIPI_DSI_H
-#define VIDEO_SH_MIPI_DSI_H
-
-enum sh_mipi_dsi_data_fmt {
- MIPI_RGB888,
- MIPI_RGB565,
- MIPI_RGB666_LP,
- MIPI_RGB666,
- MIPI_BGR888,
- MIPI_BGR565,
- MIPI_BGR666_LP,
- MIPI_BGR666,
- MIPI_YUYV,
- MIPI_UYVY,
- MIPI_YUV420_L,
- MIPI_YUV420,
-};
-
-#define SH_MIPI_DSI_HSABM (1 << 0)
-#define SH_MIPI_DSI_HBPBM (1 << 1)
-#define SH_MIPI_DSI_HFPBM (1 << 2)
-#define SH_MIPI_DSI_BL2E (1 << 3)
-#define SH_MIPI_DSI_VSEE (1 << 4)
-#define SH_MIPI_DSI_HSEE (1 << 5)
-#define SH_MIPI_DSI_HSAE (1 << 6)
-
-#define SH_MIPI_DSI_HSbyteCLK (1 << 24)
-#define SH_MIPI_DSI_HS6divCLK (1 << 25)
-#define SH_MIPI_DSI_HS4divCLK (1 << 26)
-
-#define SH_MIPI_DSI_SYNC_PULSES_MODE (SH_MIPI_DSI_VSEE | \
- SH_MIPI_DSI_HSEE | \
- SH_MIPI_DSI_HSAE)
-#define SH_MIPI_DSI_SYNC_EVENTS_MODE (0)
-#define SH_MIPI_DSI_SYNC_BURST_MODE (SH_MIPI_DSI_BL2E)
-
-struct sh_mipi_dsi_info {
- enum sh_mipi_dsi_data_fmt data_format;
- int channel;
- int lane;
- unsigned long flags;
- u32 clksrc;
- u32 phyctrl; /* for extra setting */
- unsigned int vsynw_offset;
- int (*set_dot_clock)(struct platform_device *pdev,
- void __iomem *base,
- int enable);
-};
-
-#endif
endchoice
+config SLAB_FREELIST_RANDOM
+ default n
+ depends on SLAB
+ bool "SLAB freelist randomization"
+ help
+ Randomizes the freelist order used on creating new SLABs. This
+ security feature reduces the predictability of the kernel slab
+ allocator against heap overflows.
+
config SLUB_CPU_PARTIAL
default y
depends on SLUB && SMP
#include <linux/cgroup.h>
#include <linux/wait.h>
-struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE;
+DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
/* See "Frequency meter" comments, below. */
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*/
-int __cpuset_node_allowed(int node, gfp_t gfp_mask)
+bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
{
struct cpuset *cs; /* current cpuset ancestors */
int allowed; /* is allocation in zone z allowed? */
unsigned long flags;
if (in_interrupt())
- return 1;
+ return true;
if (node_isset(node, current->mems_allowed))
- return 1;
+ return true;
/*
* Allow tasks that have access to memory reserves because they have
* been OOM killed to get memory anywhere.
*/
if (unlikely(test_thread_flag(TIF_MEMDIE)))
- return 1;
+ return true;
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
- return 0;
+ return false;
if (current->flags & PF_EXITING) /* Let dying task have memory */
- return 1;
+ return true;
/* Not hardwall and node outside mems_allowed: scan up cpusets */
spin_lock_irqsave(&callback_lock, flags);
static int cpuset_spread_node(int *rotor)
{
- int node;
-
- node = next_node(*rotor, current->mems_allowed);
- if (node == MAX_NUMNODES)
- node = first_node(current->mems_allowed);
- *rotor = node;
- return node;
+ return *rotor = next_node_in(*rotor, current->mems_allowed);
}
int cpuset_mem_spread_node(void)
VMCOREINFO_STRUCT_SIZE(list_head);
VMCOREINFO_SIZE(nodemask_t);
VMCOREINFO_OFFSET(page, flags);
- VMCOREINFO_OFFSET(page, _count);
+ VMCOREINFO_OFFSET(page, _refcount);
VMCOREINFO_OFFSET(page, mapping);
VMCOREINFO_OFFSET(page, lru);
VMCOREINFO_OFFSET(page, _mapcount);
}
#endif
+#ifndef arch_memremap_wb
+static void *arch_memremap_wb(resource_size_t offset, unsigned long size)
+{
+ return (__force void *)ioremap_cache(offset, size);
+}
+#endif
+
static void *try_ram_remap(resource_size_t offset, size_t size)
{
unsigned long pfn = PHYS_PFN(offset);
/* In the simple case just return the existing linear address */
if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)))
return __va(offset);
- return NULL; /* fallback to ioremap_cache */
+ return NULL; /* fallback to arch_memremap_wb */
}
/**
if (is_ram == REGION_INTERSECTS)
addr = try_ram_remap(offset, size);
if (!addr)
- addr = ioremap_cache(offset, size);
+ addr = arch_memremap_wb(offset, size);
}
/*
return 0;
}
-/**
- * padata_set_cpumasks - Set both parallel and serial cpumasks. The first
- * one is used by parallel workers and the second one
- * by the wokers doing serialization.
- *
- * @pinst: padata instance
- * @pcpumask: the cpumask to use for parallel workers
- * @cbcpumask: the cpumsak to use for serial workers
- */
-int padata_set_cpumasks(struct padata_instance *pinst, cpumask_var_t pcpumask,
- cpumask_var_t cbcpumask)
-{
- int err;
-
- mutex_lock(&pinst->lock);
- get_online_cpus();
-
- err = __padata_set_cpumasks(pinst, pcpumask, cbcpumask);
-
- put_online_cpus();
- mutex_unlock(&pinst->lock);
-
- return err;
-
-}
-EXPORT_SYMBOL(padata_set_cpumasks);
-
/**
* padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
* equivalent to @cpumask.
}
EXPORT_SYMBOL(padata_set_cpumask);
+/**
+ * padata_start - start the parallel processing
+ *
+ * @pinst: padata instance to start
+ */
+int padata_start(struct padata_instance *pinst)
+{
+ int err = 0;
+
+ mutex_lock(&pinst->lock);
+
+ if (pinst->flags & PADATA_INVALID)
+ err = -EINVAL;
+
+ __padata_start(pinst);
+
+ mutex_unlock(&pinst->lock);
+
+ return err;
+}
+EXPORT_SYMBOL(padata_start);
+
+/**
+ * padata_stop - stop the parallel processing
+ *
+ * @pinst: padata instance to stop
+ */
+void padata_stop(struct padata_instance *pinst)
+{
+ mutex_lock(&pinst->lock);
+ __padata_stop(pinst);
+ mutex_unlock(&pinst->lock);
+}
+EXPORT_SYMBOL(padata_stop);
+
+#ifdef CONFIG_HOTPLUG_CPU
+
static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
{
struct parallel_data *pd;
return 0;
}
- /**
- * padata_add_cpu - add a cpu to one or both(parallel and serial)
- * padata cpumasks.
- *
- * @pinst: padata instance
- * @cpu: cpu to add
- * @mask: bitmask of flags specifying to which cpumask @cpu shuld be added.
- * The @mask may be any combination of the following flags:
- * PADATA_CPU_SERIAL - serial cpumask
- * PADATA_CPU_PARALLEL - parallel cpumask
- */
-
-int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask)
-{
- int err;
-
- if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
- return -EINVAL;
-
- mutex_lock(&pinst->lock);
-
- get_online_cpus();
- if (mask & PADATA_CPU_SERIAL)
- cpumask_set_cpu(cpu, pinst->cpumask.cbcpu);
- if (mask & PADATA_CPU_PARALLEL)
- cpumask_set_cpu(cpu, pinst->cpumask.pcpu);
-
- err = __padata_add_cpu(pinst, cpu);
- put_online_cpus();
-
- mutex_unlock(&pinst->lock);
-
- return err;
-}
-EXPORT_SYMBOL(padata_add_cpu);
-
static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
{
struct parallel_data *pd = NULL;
}
EXPORT_SYMBOL(padata_remove_cpu);
-/**
- * padata_start - start the parallel processing
- *
- * @pinst: padata instance to start
- */
-int padata_start(struct padata_instance *pinst)
-{
- int err = 0;
-
- mutex_lock(&pinst->lock);
-
- if (pinst->flags & PADATA_INVALID)
- err =-EINVAL;
-
- __padata_start(pinst);
-
- mutex_unlock(&pinst->lock);
-
- return err;
-}
-EXPORT_SYMBOL(padata_start);
-
-/**
- * padata_stop - stop the parallel processing
- *
- * @pinst: padata instance to stop
- */
-void padata_stop(struct padata_instance *pinst)
-{
- mutex_lock(&pinst->lock);
- __padata_stop(pinst);
- mutex_unlock(&pinst->lock);
-}
-EXPORT_SYMBOL(padata_stop);
-
-#ifdef CONFIG_HOTPLUG_CPU
-
static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
{
return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
err:
return NULL;
}
-EXPORT_SYMBOL(padata_alloc);
/**
* padata_free - free a padata instance
debug_object_free(head, &rcuhead_debug_descr);
}
-/*
- * fixup_activate is called when:
- * - an active object is activated
- * - an unknown object is activated (might be a statically initialized object)
- * Activation is performed internally by call_rcu().
- */
-static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
+static bool rcuhead_is_static_object(void *addr)
{
- struct rcu_head *head = addr;
-
- switch (state) {
-
- case ODEBUG_STATE_NOTAVAILABLE:
- /*
- * This is not really a fixup. We just make sure that it is
- * tracked in the object tracker.
- */
- debug_object_init(head, &rcuhead_debug_descr);
- debug_object_activate(head, &rcuhead_debug_descr);
- return 0;
- default:
- return 1;
- }
+ return true;
}
/**
struct debug_obj_descr rcuhead_debug_descr = {
.name = "rcu_head",
- .fixup_activate = rcuhead_fixup_activate,
+ .is_static_object = rcuhead_is_static_object,
};
EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "stat_refresh",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0600,
+ .proc_handler = vmstat_refresh,
+ },
#endif
#ifdef CONFIG_MMU
{
* fixup_init is called when:
* - an active object is initialized
*/
-static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
+static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state)
{
struct hrtimer *timer = addr;
case ODEBUG_STATE_ACTIVE:
hrtimer_cancel(timer);
debug_object_init(timer, &hrtimer_debug_descr);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
/*
* fixup_activate is called when:
* - an active object is activated
- * - an unknown object is activated (might be a statically initialized object)
+ * - an unknown non-static object is activated
*/
-static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
+static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
{
switch (state) {
-
- case ODEBUG_STATE_NOTAVAILABLE:
- WARN_ON_ONCE(1);
- return 0;
-
case ODEBUG_STATE_ACTIVE:
WARN_ON(1);
default:
- return 0;
+ return false;
}
}
* fixup_free is called when:
* - an active object is freed
*/
-static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
+static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state)
{
struct hrtimer *timer = addr;
case ODEBUG_STATE_ACTIVE:
hrtimer_cancel(timer);
debug_object_free(timer, &hrtimer_debug_descr);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
return res;
}
+
+/*
+ * Add two timespec64 values and do a safety check for overflow.
+ * It's assumed that both values are valid (>= 0).
+ * And, each timespec64 is in normalized form.
+ */
+struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
+ const struct timespec64 rhs)
+{
+ struct timespec64 res;
+
+ set_normalized_timespec64(&res, lhs.tv_sec + rhs.tv_sec,
+ lhs.tv_nsec + rhs.tv_nsec);
+
+ if (unlikely(res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)) {
+ res.tv_sec = TIME64_MAX;
+ res.tv_nsec = 0;
+ }
+
+ return res;
+}
return ((struct timer_list *) addr)->function;
}
+static bool timer_is_static_object(void *addr)
+{
+ struct timer_list *timer = addr;
+
+ return (timer->entry.pprev == NULL &&
+ timer->entry.next == TIMER_ENTRY_STATIC);
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
*/
-static int timer_fixup_init(void *addr, enum debug_obj_state state)
+static bool timer_fixup_init(void *addr, enum debug_obj_state state)
{
struct timer_list *timer = addr;
case ODEBUG_STATE_ACTIVE:
del_timer_sync(timer);
debug_object_init(timer, &timer_debug_descr);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
/*
* fixup_activate is called when:
* - an active object is activated
- * - an unknown object is activated (might be a statically initialized object)
+ * - an unknown non-static object is activated
*/
-static int timer_fixup_activate(void *addr, enum debug_obj_state state)
+static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
{
struct timer_list *timer = addr;
switch (state) {
-
case ODEBUG_STATE_NOTAVAILABLE:
- /*
- * This is not really a fixup. The timer was
- * statically initialized. We just make sure that it
- * is tracked in the object tracker.
- */
- if (timer->entry.pprev == NULL &&
- timer->entry.next == TIMER_ENTRY_STATIC) {
- debug_object_init(timer, &timer_debug_descr);
- debug_object_activate(timer, &timer_debug_descr);
- return 0;
- } else {
- setup_timer(timer, stub_timer, 0);
- return 1;
- }
- return 0;
+ setup_timer(timer, stub_timer, 0);
+ return true;
case ODEBUG_STATE_ACTIVE:
WARN_ON(1);
default:
- return 0;
+ return false;
}
}
* fixup_free is called when:
* - an active object is freed
*/
-static int timer_fixup_free(void *addr, enum debug_obj_state state)
+static bool timer_fixup_free(void *addr, enum debug_obj_state state)
{
struct timer_list *timer = addr;
case ODEBUG_STATE_ACTIVE:
del_timer_sync(timer);
debug_object_free(timer, &timer_debug_descr);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
* fixup_assert_init is called when:
* - an untracked/uninit-ed object is found
*/
-static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
+static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
{
struct timer_list *timer = addr;
switch (state) {
case ODEBUG_STATE_NOTAVAILABLE:
- if (timer->entry.next == TIMER_ENTRY_STATIC) {
- /*
- * This is not really a fixup. The timer was
- * statically initialized. We just make sure that it
- * is tracked in the object tracker.
- */
- debug_object_init(timer, &timer_debug_descr);
- return 0;
- } else {
- setup_timer(timer, stub_timer, 0);
- return 1;
- }
+ setup_timer(timer, stub_timer, 0);
+ return true;
default:
- return 0;
+ return false;
}
}
static struct debug_obj_descr timer_debug_descr = {
.name = "timer_list",
.debug_hint = timer_debug_hint,
+ .is_static_object = timer_is_static_object,
.fixup_init = timer_fixup_init,
.fixup_activate = timer_fixup_activate,
.fixup_free = timer_fixup_free,
int type;
};
+/*
+ * If symbols in an architecture don't correspond exactly to the user-visible
+ * name of what they represent, it is possible to define this function to
+ * perform the necessary adjustments.
+*/
+char * __weak arch_ftrace_match_adjust(char *str, const char *search)
+{
+ return str;
+}
+
static int ftrace_match(char *str, struct ftrace_glob *g)
{
int matched = 0;
int slen;
+ str = arch_ftrace_match_adjust(str, g->search);
+
switch (g->type) {
case MATCH_FULL:
if (strcmp(str, g->search) == 0)
return ((struct work_struct *) addr)->func;
}
-/*
- * fixup_init is called when:
- * - an active object is initialized
- */
-static int work_fixup_init(void *addr, enum debug_obj_state state)
+static bool work_is_static_object(void *addr)
{
struct work_struct *work = addr;
- switch (state) {
- case ODEBUG_STATE_ACTIVE:
- cancel_work_sync(work);
- debug_object_init(work, &work_debug_descr);
- return 1;
- default:
- return 0;
- }
+ return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
}
/*
- * fixup_activate is called when:
- * - an active object is activated
- * - an unknown object is activated (might be a statically initialized object)
+ * fixup_init is called when:
+ * - an active object is initialized
*/
-static int work_fixup_activate(void *addr, enum debug_obj_state state)
+static bool work_fixup_init(void *addr, enum debug_obj_state state)
{
struct work_struct *work = addr;
switch (state) {
-
- case ODEBUG_STATE_NOTAVAILABLE:
- /*
- * This is not really a fixup. The work struct was
- * statically initialized. We just make sure that it
- * is tracked in the object tracker.
- */
- if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
- debug_object_init(work, &work_debug_descr);
- debug_object_activate(work, &work_debug_descr);
- return 0;
- }
- WARN_ON_ONCE(1);
- return 0;
-
case ODEBUG_STATE_ACTIVE:
- WARN_ON(1);
-
+ cancel_work_sync(work);
+ debug_object_init(work, &work_debug_descr);
+ return true;
default:
- return 0;
+ return false;
}
}
* fixup_free is called when:
* - an active object is freed
*/
-static int work_fixup_free(void *addr, enum debug_obj_state state)
+static bool work_fixup_free(void *addr, enum debug_obj_state state)
{
struct work_struct *work = addr;
case ODEBUG_STATE_ACTIVE:
cancel_work_sync(work);
debug_object_free(work, &work_debug_descr);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
static struct debug_obj_descr work_debug_descr = {
.name = "work_struct",
.debug_hint = work_debug_hint,
+ .is_static_object = work_is_static_object,
.fixup_init = work_fixup_init,
- .fixup_activate = work_fixup_activate,
.fixup_free = work_fixup_free,
};
sha1.o md5.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
- earlycpio.o seq_buf.o nmi_backtrace.o
+ earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
lib-$(CONFIG_MMU) += ioremap.o
* Try to repair the damage, so we have a better chance to get useful
* debug output.
*/
-static int
-debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state),
+static bool
+debug_object_fixup(bool (*fixup)(void *addr, enum debug_obj_state state),
void * addr, enum debug_obj_state state)
{
- int fixed = 0;
-
- if (fixup)
- fixed = fixup(addr, state);
- debug_objects_fixups += fixed;
- return fixed;
+ if (fixup && fixup(addr, state)) {
+ debug_objects_fixups++;
+ return true;
+ }
+ return false;
}
static void debug_object_is_on_stack(void *addr, int onstack)
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
ret = debug_object_fixup(descr->fixup_activate, addr, state);
- return ret ? -EINVAL : 0;
+ return ret ? 0 : -EINVAL;
case ODEBUG_STATE_DESTROYED:
debug_print_object(obj, "activate");
raw_spin_unlock_irqrestore(&db->lock, flags);
/*
- * This happens when a static object is activated. We
- * let the type specific code decide whether this is
- * true or not.
+ * We are here when a static object is activated. We
+ * let the type specific code confirm whether this is
+ * true or not. if true, we just make sure that the
+ * static object is tracked in the object tracker. If
+ * not, this must be a bug, so we try to fix it up.
*/
- if (debug_object_fixup(descr->fixup_activate, addr,
- ODEBUG_STATE_NOTAVAILABLE)) {
+ if (descr->is_static_object && descr->is_static_object(addr)) {
+ /* track this static object */
+ debug_object_init(addr, descr);
+ debug_object_activate(addr, descr);
+ } else {
debug_print_object(&o, "activate");
- return -EINVAL;
+ ret = debug_object_fixup(descr->fixup_activate, addr,
+ ODEBUG_STATE_NOTAVAILABLE);
+ return ret ? 0 : -EINVAL;
}
return 0;
}
raw_spin_unlock_irqrestore(&db->lock, flags);
/*
- * Maybe the object is static. Let the type specific
- * code decide what to do.
+ * Maybe the object is static, and we let the type specific
+ * code confirm. Track this static object if true, else invoke
+ * fixup.
*/
- if (debug_object_fixup(descr->fixup_assert_init, addr,
- ODEBUG_STATE_NOTAVAILABLE))
+ if (descr->is_static_object && descr->is_static_object(addr)) {
+ /* Track this static object */
+ debug_object_init(addr, descr);
+ } else {
debug_print_object(&o, "assert_init");
+ debug_object_fixup(descr->fixup_assert_init, addr,
+ ODEBUG_STATE_NOTAVAILABLE);
+ }
return;
}
static __initdata struct debug_obj_descr descr_type_test;
+static bool __init is_static_object(void *addr)
+{
+ struct self_test *obj = addr;
+
+ return obj->static_init;
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
*/
-static int __init fixup_init(void *addr, enum debug_obj_state state)
+static bool __init fixup_init(void *addr, enum debug_obj_state state)
{
struct self_test *obj = addr;
case ODEBUG_STATE_ACTIVE:
debug_object_deactivate(obj, &descr_type_test);
debug_object_init(obj, &descr_type_test);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
/*
* fixup_activate is called when:
* - an active object is activated
- * - an unknown object is activated (might be a statically initialized object)
+ * - an unknown non-static object is activated
*/
-static int __init fixup_activate(void *addr, enum debug_obj_state state)
+static bool __init fixup_activate(void *addr, enum debug_obj_state state)
{
struct self_test *obj = addr;
switch (state) {
case ODEBUG_STATE_NOTAVAILABLE:
- if (obj->static_init == 1) {
- debug_object_init(obj, &descr_type_test);
- debug_object_activate(obj, &descr_type_test);
- return 0;
- }
- return 1;
-
+ return true;
case ODEBUG_STATE_ACTIVE:
debug_object_deactivate(obj, &descr_type_test);
debug_object_activate(obj, &descr_type_test);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
* fixup_destroy is called when:
* - an active object is destroyed
*/
-static int __init fixup_destroy(void *addr, enum debug_obj_state state)
+static bool __init fixup_destroy(void *addr, enum debug_obj_state state)
{
struct self_test *obj = addr;
case ODEBUG_STATE_ACTIVE:
debug_object_deactivate(obj, &descr_type_test);
debug_object_destroy(obj, &descr_type_test);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
* fixup_free is called when:
* - an active object is freed
*/
-static int __init fixup_free(void *addr, enum debug_obj_state state)
+static bool __init fixup_free(void *addr, enum debug_obj_state state)
{
struct self_test *obj = addr;
case ODEBUG_STATE_ACTIVE:
debug_object_deactivate(obj, &descr_type_test);
debug_object_free(obj, &descr_type_test);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
static __initdata struct debug_obj_descr descr_type_test = {
.name = "selftest",
+ .is_static_object = is_static_object,
.fixup_init = fixup_init,
.fixup_activate = fixup_activate,
.fixup_destroy = fixup_destroy,
--- /dev/null
+#include <linux/nodemask.h>
+#include <linux/module.h>
+#include <linux/random.h>
+
+int __next_node_in(int node, const nodemask_t *srcp)
+{
+ int ret = __next_node(node, srcp);
+
+ if (ret == MAX_NUMNODES)
+ ret = __first_node(srcp);
+ return ret;
+}
+EXPORT_SYMBOL(__next_node_in);
+
+#ifdef CONFIG_NUMA
+/*
+ * Return the bit number of a random bit set in the nodemask.
+ * (returns NUMA_NO_NODE if nodemask is empty)
+ */
+int node_random(const nodemask_t *maskp)
+{
+ int w, bit = NUMA_NO_NODE;
+
+ w = nodes_weight(*maskp);
+ if (w)
+ bit = bitmap_ord_to_pos(maskp->bits,
+ get_random_int() % w, MAX_NUMNODES);
+ return bit;
+}
+#endif
static struct debug_obj_descr percpu_counter_debug_descr;
-static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
+static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
{
struct percpu_counter *fbc = addr;
case ODEBUG_STATE_ACTIVE:
percpu_counter_destroy(fbc);
debug_object_free(fbc, &percpu_counter_debug_descr);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
def_bool y
depends on SPARSEMEM && MEMORY_HOTPLUG
+config MEMORY_HOTPLUG_DEFAULT_ONLINE
+ bool "Online the newly added memory blocks by default"
+ default n
+ depends on MEMORY_HOTPLUG
+ help
+ This option sets the default policy setting for memory hotplug
+ onlining policy (/sys/devices/system/memory/auto_online_blocks) which
+ determines what happens to newly added memory regions. Policy setting
+ can always be changed at runtime.
+ See Documentation/memory-hotplug.txt for more information.
+
+ Say Y here if you want all hot-plugged memory blocks to appear in
+ 'online' state by default.
+ Say N here if you want the default policy to keep all hot-plugged
+ memory blocks in 'offline' state.
+
config MEMORY_HOTREMOVE
bool "Allow for memory hot remove"
select MEMORY_ISOLATION
config PHYS_ADDR_T_64BIT
def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
-config ZONE_DMA_FLAG
- int
- default "0" if !ZONE_DMA
- default "1"
-
config BOUNCE
bool "Enable bounce buffers"
default y
#define CREATE_TRACE_POINTS
#include <trace/events/compaction.h>
+#define block_start_pfn(pfn, order) round_down(pfn, 1UL << (order))
+#define block_end_pfn(pfn, order) ALIGN((pfn) + 1, 1UL << (order))
+#define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order)
+#define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order)
+
static unsigned long release_freepages(struct list_head *freelist)
{
struct page *page, *next;
zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
zone->compact_cached_free_pfn =
- round_down(zone_end_pfn(zone) - 1, pageblock_nr_pages);
+ pageblock_start_pfn(zone_end_pfn(zone) - 1);
}
/*
LIST_HEAD(freelist);
pfn = start_pfn;
- block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
+ block_start_pfn = pageblock_start_pfn(pfn);
if (block_start_pfn < cc->zone->zone_start_pfn)
block_start_pfn = cc->zone->zone_start_pfn;
- block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+ block_end_pfn = pageblock_end_pfn(pfn);
for (; pfn < end_pfn; pfn += isolated,
block_start_pfn = block_end_pfn,
* scanning range to right one.
*/
if (pfn >= block_end_pfn) {
- block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
- block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+ block_start_pfn = pageblock_start_pfn(pfn);
+ block_end_pfn = pageblock_end_pfn(pfn);
block_end_pfn = min(block_end_pfn, end_pfn);
}
{
struct zone *zone = cc->zone;
unsigned long nr_scanned = 0, nr_isolated = 0;
- struct list_head *migratelist = &cc->migratepages;
struct lruvec *lruvec;
unsigned long flags = 0;
bool locked = false;
struct page *page = NULL, *valid_page = NULL;
unsigned long start_pfn = low_pfn;
+ bool skip_on_failure = false;
+ unsigned long next_skip_pfn = 0;
/*
* Ensure that there are not too many pages isolated from the LRU
if (compact_should_abort(cc))
return 0;
+ if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
+ skip_on_failure = true;
+ next_skip_pfn = block_end_pfn(low_pfn, cc->order);
+ }
+
/* Time to isolate some pages for migration */
for (; low_pfn < end_pfn; low_pfn++) {
bool is_lru;
+ if (skip_on_failure && low_pfn >= next_skip_pfn) {
+ /*
+ * We have isolated all migration candidates in the
+ * previous order-aligned block, and did not skip it due
+ * to failure. We should migrate the pages now and
+ * hopefully succeed compaction.
+ */
+ if (nr_isolated)
+ break;
+
+ /*
+ * We failed to isolate in the previous order-aligned
+ * block. Set the new boundary to the end of the
+ * current block. Note we can't simply increase
+ * next_skip_pfn by 1 << order, as low_pfn might have
+ * been incremented by a higher number due to skipping
+ * a compound or a high-order buddy page in the
+ * previous loop iteration.
+ */
+ next_skip_pfn = block_end_pfn(low_pfn, cc->order);
+ }
+
/*
* Periodically drop the lock (if held) regardless of its
* contention, to give chance to IRQs. Abort async compaction
break;
if (!pfn_valid_within(low_pfn))
- continue;
+ goto isolate_fail;
nr_scanned++;
page = pfn_to_page(low_pfn);
if (likely(comp_order < MAX_ORDER))
low_pfn += (1UL << comp_order) - 1;
- continue;
+ goto isolate_fail;
}
if (!is_lru)
- continue;
+ goto isolate_fail;
/*
* Migration will fail if an anonymous page is pinned in memory,
*/
if (!page_mapping(page) &&
page_count(page) > page_mapcount(page))
- continue;
+ goto isolate_fail;
/* If we already hold the lock, we can skip some rechecking */
if (!locked) {
/* Recheck PageLRU and PageCompound under lock */
if (!PageLRU(page))
- continue;
+ goto isolate_fail;
/*
* Page become compound since the non-locked check,
*/
if (unlikely(PageCompound(page))) {
low_pfn += (1UL << compound_order(page)) - 1;
- continue;
+ goto isolate_fail;
}
}
/* Try isolate the page */
if (__isolate_lru_page(page, isolate_mode) != 0)
- continue;
+ goto isolate_fail;
VM_BUG_ON_PAGE(PageCompound(page), page);
del_page_from_lru_list(page, lruvec, page_lru(page));
isolate_success:
- list_add(&page->lru, migratelist);
+ list_add(&page->lru, &cc->migratepages);
cc->nr_migratepages++;
nr_isolated++;
+ /*
+ * Record where we could have freed pages by migration and not
+ * yet flushed them to buddy allocator.
+ * - this is the lowest page that was isolated and likely be
+ * then freed by migration.
+ */
+ if (!cc->last_migrated_pfn)
+ cc->last_migrated_pfn = low_pfn;
+
/* Avoid isolating too much */
if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
++low_pfn;
break;
}
+
+ continue;
+isolate_fail:
+ if (!skip_on_failure)
+ continue;
+
+ /*
+ * We have isolated some pages, but then failed. Release them
+ * instead of migrating, as we cannot form the cc->order buddy
+ * page anyway.
+ */
+ if (nr_isolated) {
+ if (locked) {
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+ locked = false;
+ }
+ acct_isolated(zone, cc);
+ putback_movable_pages(&cc->migratepages);
+ cc->nr_migratepages = 0;
+ cc->last_migrated_pfn = 0;
+ nr_isolated = 0;
+ }
+
+ if (low_pfn < next_skip_pfn) {
+ low_pfn = next_skip_pfn - 1;
+ /*
+ * The check near the loop beginning would have updated
+ * next_skip_pfn too, but this is a bit simpler.
+ */
+ next_skip_pfn += 1UL << cc->order;
+ }
}
/*
/* Scan block by block. First and last block may be incomplete */
pfn = start_pfn;
- block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
+ block_start_pfn = pageblock_start_pfn(pfn);
if (block_start_pfn < cc->zone->zone_start_pfn)
block_start_pfn = cc->zone->zone_start_pfn;
- block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+ block_end_pfn = pageblock_end_pfn(pfn);
for (; pfn < end_pfn; pfn = block_end_pfn,
block_start_pfn = block_end_pfn,
* is using.
*/
isolate_start_pfn = cc->free_pfn;
- block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
+ block_start_pfn = pageblock_start_pfn(cc->free_pfn);
block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
zone_end_pfn(zone));
- low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
+ low_pfn = pageblock_end_pfn(cc->migrate_pfn);
/*
* Isolate free pages until enough are available to migrate the
unsigned long block_start_pfn;
unsigned long block_end_pfn;
unsigned long low_pfn;
- unsigned long isolate_start_pfn;
struct page *page;
const isolate_mode_t isolate_mode =
(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
* initialized by compact_zone()
*/
low_pfn = cc->migrate_pfn;
- block_start_pfn = cc->migrate_pfn & ~(pageblock_nr_pages - 1);
+ block_start_pfn = pageblock_start_pfn(low_pfn);
if (block_start_pfn < zone->zone_start_pfn)
block_start_pfn = zone->zone_start_pfn;
/* Only scan within a pageblock boundary */
- block_end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
+ block_end_pfn = pageblock_end_pfn(low_pfn);
/*
* Iterate over whole pageblocks until we find the first suitable.
continue;
/* Perform the isolation */
- isolate_start_pfn = low_pfn;
low_pfn = isolate_migratepages_block(cc, low_pfn,
block_end_pfn, isolate_mode);
return ISOLATE_ABORT;
}
- /*
- * Record where we could have freed pages by migration and not
- * yet flushed them to buddy allocator.
- * - this is the lowest page that could have been isolated and
- * then freed by migration.
- */
- if (cc->nr_migratepages && !cc->last_migrated_pfn)
- cc->last_migrated_pfn = isolate_start_pfn;
-
/*
* Either we isolated something and proceed with migration. Or
* we failed and compact_zone should decide if we should
* COMPACT_CONTINUE - If compaction should run now
*/
static unsigned long __compaction_suitable(struct zone *zone, int order,
- int alloc_flags, int classzone_idx)
+ unsigned int alloc_flags,
+ int classzone_idx)
{
int fragindex;
unsigned long watermark;
}
unsigned long compaction_suitable(struct zone *zone, int order,
- int alloc_flags, int classzone_idx)
+ unsigned int alloc_flags,
+ int classzone_idx)
{
unsigned long ret;
cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
cc->free_pfn = zone->compact_cached_free_pfn;
if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
- cc->free_pfn = round_down(end_pfn - 1, pageblock_nr_pages);
+ cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
zone->compact_cached_free_pfn = cc->free_pfn;
}
if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
ret = COMPACT_CONTENDED;
goto out;
}
+ /*
+ * We failed to migrate at least one page in the current
+ * order-aligned block, so skip the rest of it.
+ */
+ if (cc->direct_compaction &&
+ (cc->mode == MIGRATE_ASYNC)) {
+ cc->migrate_pfn = block_end_pfn(
+ cc->migrate_pfn - 1, cc->order);
+ /* Draining pcplists is useless in this case */
+ cc->last_migrated_pfn = 0;
+
+ }
}
check_drain:
if (cc->order > 0 && cc->last_migrated_pfn) {
int cpu;
unsigned long current_block_start =
- cc->migrate_pfn & ~((1UL << cc->order) - 1);
+ block_start_pfn(cc->migrate_pfn, cc->order);
if (cc->last_migrated_pfn < current_block_start) {
cpu = get_cpu();
cc->nr_freepages = 0;
VM_BUG_ON(free_pfn == 0);
/* The cached pfn is always the first in a pageblock */
- free_pfn &= ~(pageblock_nr_pages-1);
+ free_pfn = pageblock_start_pfn(free_pfn);
/*
* Only go back, not forward. The cached pfn might have been
* already reset to zone end in compact_finished()
static unsigned long compact_zone_order(struct zone *zone, int order,
gfp_t gfp_mask, enum migrate_mode mode, int *contended,
- int alloc_flags, int classzone_idx)
+ unsigned int alloc_flags, int classzone_idx)
{
unsigned long ret;
struct compact_control cc = {
* This is the main entry point for direct page compaction.
*/
unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
- int alloc_flags, const struct alloc_context *ac,
- enum migrate_mode mode, int *contended)
+ unsigned int alloc_flags, const struct alloc_context *ac,
+ enum migrate_mode mode, int *contended)
{
int may_enter_fs = gfp_mask & __GFP_FS;
int may_perform_io = gfp_mask & __GFP_IO;
status = compact_zone_order(zone, order, gfp_mask, mode,
&zone_contended, alloc_flags,
- ac->classzone_idx);
+ ac_classzone_idx(ac));
rc = max(status, rc);
/*
* It takes at least one zone that wasn't lock contended
/* If a normal allocation would succeed, stop compacting */
if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
- ac->classzone_idx, alloc_flags)) {
+ ac_classzone_idx(ac), alloc_flags)) {
/*
* We think the allocation will succeed in this zone,
* but it is not certain, hence the false. The caller
* some other bad page check should catch it later.
*/
page_mapcount_reset(page);
- atomic_sub(mapcount, &page->_count);
+ page_ref_sub(page, mapcount);
}
}
unsigned int nr_free_highpages (void)
{
- pg_data_t *pgdat;
+ struct zone *zone;
unsigned int pages = 0;
- for_each_online_pgdat(pgdat) {
- pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
- NR_FREE_PAGES);
- if (zone_movable_is_highmem())
- pages += zone_page_state(
- &pgdat->node_zones[ZONE_MOVABLE],
- NR_FREE_PAGES);
+ for_each_populated_zone(zone) {
+ if (is_highmem(zone))
+ pages += zone_page_state(zone, NR_FREE_PAGES);
}
return pages;
return 1;
}
-bool move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
- unsigned long old_addr,
+bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd)
{
spinlock_t *old_ptl, *new_ptl;
pmd_t pmd;
-
struct mm_struct *mm = vma->vm_mm;
if ((old_addr & ~HPAGE_PMD_MASK) ||
(new_addr & ~HPAGE_PMD_MASK) ||
- old_end - old_addr < HPAGE_PMD_SIZE ||
- (new_vma->vm_flags & VM_NOHUGEPAGE))
+ old_end - old_addr < HPAGE_PMD_SIZE)
return false;
/*
VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
/*
- * tail_page->_count is zero and not changing from under us. But
+ * tail_page->_refcount is zero and not changing from under us. But
* get_page_unless_zero() may be running from under us on the
* tail_page. If we used atomic_set() below instead of atomic_inc(), we
* would then run atomic_set() concurrently with
if (mlocked)
lru_add_drain();
- /* Prevent deferred_split_scan() touching ->_count */
+ /* Prevent deferred_split_scan() touching ->_refcount */
spin_lock_irqsave(&pgdata->split_queue_lock, flags);
count = page_count(head);
mapcount = total_mapcount(head);
static struct hstate * __initdata parsed_hstate;
static unsigned long __initdata default_hstate_max_huge_pages;
static unsigned long __initdata default_hstate_size;
+static bool __initdata parsed_valid_hugepagesz = true;
/*
* Protects updates to hugepage_freelists, hugepage_activelist, nr_huge_pages,
}
}
- if (spool->min_hpages != -1) { /* minimum size accounting */
+ /* minimum size accounting */
+ if (spool->min_hpages != -1 && spool->rsv_hpages) {
if (delta > spool->rsv_hpages) {
/*
* Asking for more reserves than those already taken on
if (spool->max_hpages != -1) /* maximum size accounting */
spool->used_hpages -= delta;
- if (spool->min_hpages != -1) { /* minimum size accounting */
+ /* minimum size accounting */
+ if (spool->min_hpages != -1 && spool->used_hpages < spool->min_hpages) {
if (spool->rsv_hpages + delta <= spool->min_hpages)
ret = 0;
else
*/
static int next_node_allowed(int nid, nodemask_t *nodes_allowed)
{
- nid = next_node(nid, *nodes_allowed);
- if (nid == MAX_NUMNODES)
- nid = first_node(*nodes_allowed);
+ nid = next_node_in(nid, *nodes_allowed);
VM_BUG_ON(nid >= MAX_NUMNODES);
return nid;
return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
}
-static bool pfn_range_valid_gigantic(unsigned long start_pfn,
- unsigned long nr_pages)
+static bool pfn_range_valid_gigantic(struct zone *z,
+ unsigned long start_pfn, unsigned long nr_pages)
{
unsigned long i, end_pfn = start_pfn + nr_pages;
struct page *page;
page = pfn_to_page(i);
+ if (page_zone(page) != z)
+ return false;
+
if (PageReserved(page))
return false;
pfn = ALIGN(z->zone_start_pfn, nr_pages);
while (zone_spans_last_pfn(z, pfn, nr_pages)) {
- if (pfn_range_valid_gigantic(pfn, nr_pages)) {
+ if (pfn_range_valid_gigantic(z, pfn, nr_pages)) {
/*
* We release the zone lock here because
* alloc_contig_range() will also lock the zone
subsys_initcall(hugetlb_init);
/* Should be called on processing a hugepagesz=... option */
+void __init hugetlb_bad_size(void)
+{
+ parsed_valid_hugepagesz = false;
+}
+
void __init hugetlb_add_hstate(unsigned int order)
{
struct hstate *h;
for (i = 0; i < MAX_NUMNODES; ++i)
INIT_LIST_HEAD(&h->hugepage_freelists[i]);
INIT_LIST_HEAD(&h->hugepage_activelist);
- h->next_nid_to_alloc = first_node(node_states[N_MEMORY]);
- h->next_nid_to_free = first_node(node_states[N_MEMORY]);
+ h->next_nid_to_alloc = first_memory_node;
+ h->next_nid_to_free = first_memory_node;
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
huge_page_size(h)/1024);
unsigned long *mhp;
static unsigned long *last_mhp;
+ if (!parsed_valid_hugepagesz) {
+ pr_warn("hugepages = %s preceded by "
+ "an unsupported hugepagesz, ignoring\n", s);
+ parsed_valid_hugepagesz = true;
+ return 1;
+ }
/*
* !hugetlb_max_hstate means we haven't parsed a hugepagesz= parameter yet,
* so this hugepages= parameter goes to the "default hstate".
*/
- if (!hugetlb_max_hstate)
+ else if (!hugetlb_max_hstate)
mhp = &default_hstate_max_huge_pages;
else
mhp = &parsed_hstate->max_huge_pages;
}
/*
- * Turn a non-refcounted page (->_count == 0) into refcounted with
+ * Turn a non-refcounted page (->_refcount == 0) into refcounted with
* a count of one.
*/
static inline void set_page_refcounted(struct page *page)
struct alloc_context {
struct zonelist *zonelist;
nodemask_t *nodemask;
- struct zone *preferred_zone;
- int classzone_idx;
+ struct zoneref *preferred_zoneref;
int migratetype;
enum zone_type high_zoneidx;
bool spread_dirty_pages;
};
+#define ac_classzone_idx(ac) zonelist_zone_idx(ac->preferred_zoneref)
+
/*
* Locate the struct page for both the matching buddy in our
* pair (buddy1) and the combined O(n+1) page they form (page).
bool direct_compaction; /* False from kcompactd or /proc/... */
int order; /* order a direct compactor needs */
const gfp_t gfp_mask; /* gfp mask of a direct compactor */
- const int alloc_flags; /* alloc flags of a direct compactor */
+ const unsigned int alloc_flags; /* alloc flags of a direct compactor */
const int classzone_idx; /* zone index of a direct compactor */
struct zone *zone;
int contended; /* Signal need_sched() or lock
* @lru: index of lru list the page is sitting on
* @nr_pages: positive when adding or negative when removing
*
- * This function must be called when a page is added to or removed from an
- * lru list.
+ * This function must be called under lru_lock, just before a page is added
+ * to or just after a page is removed from an lru list (that ordering being
+ * so as to allow it to check that lru_size 0 is consistent with list_empty).
*/
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
int nr_pages)
{
struct mem_cgroup_per_zone *mz;
unsigned long *lru_size;
+ long size;
+ bool empty;
+
+ __update_lru_size(lruvec, lru, nr_pages);
if (mem_cgroup_disabled())
return;
mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
lru_size = mz->lru_size + lru;
- *lru_size += nr_pages;
- VM_BUG_ON((long)(*lru_size) < 0);
+ empty = list_empty(lruvec->lists + lru);
+
+ if (nr_pages < 0)
+ *lru_size += nr_pages;
+
+ size = *lru_size;
+ if (WARN_ONCE(size < 0 || empty != !size,
+ "%s(%p, %d, %d): lru_size %ld but %sempty\n",
+ __func__, lruvec, lru, nr_pages, size, empty ? "" : "not ")) {
+ VM_BUG_ON(1);
+ *lru_size = 0;
+ }
+
+ if (nr_pages > 0)
+ *lru_size += nr_pages;
}
bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
*/
if (fatal_signal_pending(current) || task_will_free_mem(current)) {
mark_oom_victim(current);
+ try_oom_reaper(current);
goto unlock;
}
mem_cgroup_may_update_nodemask(memcg);
node = memcg->last_scanned_node;
- node = next_node(node, memcg->scan_nodes);
- if (node == MAX_NUMNODES)
- node = first_node(memcg->scan_nodes);
+ node = next_node_in(node, memcg->scan_nodes);
/*
- * We call this when we hit limit, not when pages are added to LRU.
- * No LRU may hold pages because all pages are UNEVICTABLE or
- * memcg is too small and all pages are not on LRU. In that case,
- * we use curret node.
+ * mem_cgroup_may_update_nodemask might have seen no reclaimmable pages
+ * last time it really checked all the LRUs due to rate limiting.
+ * Fallback to the current node in that case for simplicity.
*/
if (unlikely(node == MAX_NUMNODES))
node = numa_node_id();
#define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map)
#define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map)
+#ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
bool memhp_auto_online;
+#else
+bool memhp_auto_online = true;
+#endif
EXPORT_SYMBOL_GPL(memhp_auto_online);
+static int __init setup_memhp_default_state(char *str)
+{
+ if (!strcmp(str, "online"))
+ memhp_auto_online = true;
+ else if (!strcmp(str, "offline"))
+ memhp_auto_online = false;
+
+ return 1;
+}
+__setup("memhp_default_state=", setup_memhp_default_state);
+
void get_online_mems(void)
{
might_sleep();
}
/* Checks if this range of memory is likely to be hot-removable. */
-int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
+bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
{
struct page *page = pfn_to_page(start_pfn);
struct page *end_page = page + nr_pages;
/* Check the starting page of each pageblock within the range */
for (; page < end_page; page = next_active_pageblock(page)) {
if (!is_pageblock_removable_nolock(page))
- return 0;
+ return false;
cond_resched();
}
/* All pageblocks in the memory block are likely to be hot-removable */
- return 1;
+ return true;
}
/*
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
-#include <linux/random.h>
#include "internal.h"
BUG();
if (!node_isset(current->il_next, tmp)) {
- current->il_next = next_node(current->il_next, tmp);
- if (current->il_next >= MAX_NUMNODES)
- current->il_next = first_node(tmp);
+ current->il_next = next_node_in(current->il_next, tmp);
if (current->il_next >= MAX_NUMNODES)
current->il_next = numa_node_id();
}
struct task_struct *me = current;
nid = me->il_next;
- next = next_node(nid, policy->v.nodes);
- if (next >= MAX_NUMNODES)
- next = first_node(policy->v.nodes);
+ next = next_node_in(nid, policy->v.nodes);
if (next < MAX_NUMNODES)
me->il_next = next;
return nid;
return interleave_nodes(policy);
case MPOL_BIND: {
+ struct zoneref *z;
+
/*
* Follow bind policy behavior and start allocation at the
* first node.
*/
struct zonelist *zonelist;
- struct zone *zone;
enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL);
zonelist = &NODE_DATA(node)->node_zonelists[0];
- (void)first_zones_zonelist(zonelist, highest_zoneidx,
- &policy->v.nodes,
- &zone);
- return zone ? zone->node : node;
+ z = first_zones_zonelist(zonelist, highest_zoneidx,
+ &policy->v.nodes);
+ return z->zone ? z->zone->node : node;
}
default:
}
}
-/* Do static interleaving for a VMA with known offset. */
+/*
+ * Do static interleaving for a VMA with known offset @n. Returns the n'th
+ * node in pol->v.nodes (starting from n=0), wrapping around if n exceeds the
+ * number of present nodes.
+ */
static unsigned offset_il_node(struct mempolicy *pol,
- struct vm_area_struct *vma, unsigned long off)
+ struct vm_area_struct *vma, unsigned long n)
{
unsigned nnodes = nodes_weight(pol->v.nodes);
unsigned target;
- int c;
- int nid = NUMA_NO_NODE;
+ int i;
+ int nid;
if (!nnodes)
return numa_node_id();
- target = (unsigned int)off % nnodes;
- c = 0;
- do {
+ target = (unsigned int)n % nnodes;
+ nid = first_node(pol->v.nodes);
+ for (i = 0; i < target; i++)
nid = next_node(nid, pol->v.nodes);
- c++;
- } while (c <= target);
return nid;
}
return interleave_nodes(pol);
}
-/*
- * Return the bit number of a random bit set in the nodemask.
- * (returns NUMA_NO_NODE if nodemask is empty)
- */
-int node_random(const nodemask_t *maskp)
-{
- int w, bit = NUMA_NO_NODE;
-
- w = nodes_weight(*maskp);
- if (w)
- bit = bitmap_ord_to_pos(maskp->bits,
- get_random_int() % w, MAX_NUMNODES);
- return bit;
-}
-
#ifdef CONFIG_HUGETLBFS
/*
* huge_zonelist(@vma, @addr, @gfp_flags, @mpol)
int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr)
{
struct mempolicy *pol;
- struct zone *zone;
+ struct zoneref *z;
int curnid = page_to_nid(page);
unsigned long pgoff;
int thiscpu = raw_smp_processor_id();
break;
case MPOL_BIND:
+
/*
* allows binding to multiple nodes.
* use current page if in policy nodemask,
*/
if (node_isset(curnid, pol->v.nodes))
goto out;
- (void)first_zones_zonelist(
+ z = first_zones_zonelist(
node_zonelist(numa_node_id(), GFP_HIGHUSER),
gfp_zone(GFP_HIGHUSER),
- &pol->v.nodes, &zone);
- polnid = zone->node;
+ &pol->v.nodes);
+ polnid = z->zone->node;
break;
default:
newpage->index = page->index;
newpage->mapping = page->mapping;
if (PageSwapBacked(page))
- SetPageSwapBacked(newpage);
+ __SetPageSwapBacked(newpage);
return MIGRATEPAGE_SUCCESS;
}
newpage->index = page->index;
newpage->mapping = page->mapping;
if (PageSwapBacked(page))
- SetPageSwapBacked(newpage);
+ __SetPageSwapBacked(newpage);
get_page(newpage); /* add cache reference */
if (PageSwapCache(page)) {
/* Prepare a page as a migration target */
__SetPageLocked(new_page);
- SetPageSwapBacked(new_page);
+ __SetPageSwapBacked(new_page);
/* anon mapping, we can simply copy page->mapping to the new page: */
new_page->mapping = page->mapping;
#define arch_mmap_check(addr, len, flags) (0)
#endif
-#ifndef arch_rebalance_pgtables
-#define arch_rebalance_pgtables(addr, len) (addr)
-#endif
-
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
if (offset_in_page(addr))
return -EINVAL;
- addr = arch_rebalance_pgtables(addr, len);
error = security_mmap_addr(addr);
return error ? error : addr;
}
}
/* Returns the next zone at or below highest_zoneidx in a zonelist */
-struct zoneref *next_zones_zonelist(struct zoneref *z,
+struct zoneref *__next_zones_zonelist(struct zoneref *z,
enum zone_type highest_zoneidx,
nodemask_t *nodes)
{
return pmd;
}
+static void take_rmap_locks(struct vm_area_struct *vma)
+{
+ if (vma->vm_file)
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+ if (vma->anon_vma)
+ anon_vma_lock_write(vma->anon_vma);
+}
+
+static void drop_rmap_locks(struct vm_area_struct *vma)
+{
+ if (vma->anon_vma)
+ anon_vma_unlock_write(vma->anon_vma);
+ if (vma->vm_file)
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+}
+
static pte_t move_soft_dirty_pte(pte_t pte)
{
/*
struct vm_area_struct *new_vma, pmd_t *new_pmd,
unsigned long new_addr, bool need_rmap_locks)
{
- struct address_space *mapping = NULL;
- struct anon_vma *anon_vma = NULL;
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
spinlock_t *old_ptl, *new_ptl;
* serialize access to individual ptes, but only rmap traversal
* order guarantees that we won't miss both the old and new ptes).
*/
- if (need_rmap_locks) {
- if (vma->vm_file) {
- mapping = vma->vm_file->f_mapping;
- i_mmap_lock_write(mapping);
- }
- if (vma->anon_vma) {
- anon_vma = vma->anon_vma;
- anon_vma_lock_write(anon_vma);
- }
- }
+ if (need_rmap_locks)
+ take_rmap_locks(vma);
/*
* We don't have to worry about the ordering of src and dst
spin_unlock(new_ptl);
pte_unmap(new_pte - 1);
pte_unmap_unlock(old_pte - 1, old_ptl);
- if (anon_vma)
- anon_vma_unlock_write(anon_vma);
- if (mapping)
- i_mmap_unlock_write(mapping);
+ if (need_rmap_locks)
+ drop_rmap_locks(vma);
}
#define LATENCY_LIMIT (64 * PAGE_SIZE)
if (pmd_trans_huge(*old_pmd)) {
if (extent == HPAGE_PMD_SIZE) {
bool moved;
- VM_BUG_ON_VMA(vma->vm_file || !vma->anon_vma,
- vma);
/* See comment in move_ptes() */
if (need_rmap_locks)
- anon_vma_lock_write(vma->anon_vma);
- moved = move_huge_pmd(vma, new_vma, old_addr,
- new_addr, old_end,
- old_pmd, new_pmd);
+ take_rmap_locks(vma);
+ moved = move_huge_pmd(vma, old_addr, new_addr,
+ old_end, old_pmd, new_pmd);
if (need_rmap_locks)
- anon_vma_unlock_write(vma->anon_vma);
+ drop_rmap_locks(vma);
if (moved) {
need_flush = true;
continue;
#define K(x) ((x) << (PAGE_SHIFT-10))
+/*
+ * task->mm can be NULL if the task is the exited group leader. So to
+ * determine whether the task is using a particular mm, we examine all the
+ * task's threads: if one of those is using this mm then this task was also
+ * using it.
+ */
+static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
+{
+ struct task_struct *t;
+
+ for_each_thread(p, t) {
+ struct mm_struct *t_mm = READ_ONCE(t->mm);
+ if (t_mm)
+ return t_mm == mm;
+ }
+ return false;
+}
+
+
#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
up_read(&mm->mmap_sem);
/*
- * Clear TIF_MEMDIE because the task shouldn't be sitting on a
- * reasonably reclaimable memory anymore. OOM killer can continue
- * by selecting other victim if unmapping hasn't led to any
- * improvements. This also means that selecting this task doesn't
- * make any sense.
+ * This task can be safely ignored because we cannot do much more
+ * to release its memory.
*/
tsk->signal->oom_score_adj = OOM_SCORE_ADJ_MIN;
- exit_oom_victim(tsk);
out:
mmput(mm);
return ret;
debug_show_all_locks();
}
+ /*
+ * Clear TIF_MEMDIE because the task shouldn't be sitting on a
+ * reasonably reclaimable memory anymore or it is not a good candidate
+ * for the oom victim right now because it cannot release its memory
+ * itself nor by the oom reaper.
+ */
+ tsk->oom_reaper_list = NULL;
+ exit_oom_victim(tsk);
+
/* Drop a reference taken by wake_oom_reaper */
put_task_struct(tsk);
}
wake_up(&oom_reaper_wait);
}
+/* Check if we can reap the given task. This has to be called with stable
+ * tsk->mm
+ */
+void try_oom_reaper(struct task_struct *tsk)
+{
+ struct mm_struct *mm = tsk->mm;
+ struct task_struct *p;
+
+ if (!mm)
+ return;
+
+ /*
+ * There might be other threads/processes which are either not
+ * dying or even not killable.
+ */
+ if (atomic_read(&mm->mm_users) > 1) {
+ rcu_read_lock();
+ for_each_process(p) {
+ bool exiting;
+
+ if (!process_shares_mm(p, mm))
+ continue;
+ if (same_thread_group(p, tsk))
+ continue;
+ if (fatal_signal_pending(p))
+ continue;
+
+ /*
+ * If the task is exiting make sure the whole thread group
+ * is exiting and cannot acces mm anymore.
+ */
+ spin_lock_irq(&p->sighand->siglock);
+ exiting = signal_group_exit(p->signal);
+ spin_unlock_irq(&p->sighand->siglock);
+ if (exiting)
+ continue;
+
+ /* Give up */
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+ }
+
+ wake_oom_reaper(tsk);
+}
+
static int __init oom_init(void)
{
oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
oom_killer_disabled = false;
}
-/*
- * task->mm can be NULL if the task is the exited group leader. So to
- * determine whether the task is using a particular mm, we examine all the
- * task's threads: if one of those is using this mm then this task was also
- * using it.
- */
-static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
-{
- struct task_struct *t;
-
- for_each_thread(p, t) {
- struct mm_struct *t_mm = READ_ONCE(t->mm);
- if (t_mm)
- return t_mm == mm;
- }
- return false;
-}
-
/*
* Must be called while holding a reference to p, which will be released upon
* returning.
task_lock(p);
if (p->mm && task_will_free_mem(p)) {
mark_oom_victim(p);
+ try_oom_reaper(p);
task_unlock(p);
put_task_struct(p);
return;
if (current->mm &&
(fatal_signal_pending(current) || task_will_free_mem(current))) {
mark_oom_victim(current);
+ try_oom_reaper(current);
return true;
}
+ /*
+ * The OOM killer does not compensate for IO-less reclaim.
+ * pagefault_out_of_memory lost its gfp context so we have to
+ * make sure exclude 0 mask - all other users should have at least
+ * ___GFP_DIRECT_RECLAIM to get here.
+ */
+ if (oc->gfp_mask && !(oc->gfp_mask & (__GFP_FS|__GFP_NOFAIL)))
+ return true;
+
/*
* Check if there were limitations on the allocation (only relevant for
* NUMA) that may require different handling.
#ifdef CONFIG_HIGHMEM
int node;
unsigned long x = 0;
+ int i;
for_each_node_state(node, N_HIGH_MEMORY) {
- struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *z = &NODE_DATA(node)->node_zones[i];
- x += zone_dirtyable_memory(z);
+ if (is_highmem(z))
+ x += zone_dirtyable_memory(z);
+ }
}
/*
* Unreclaimable memory (kernel memory or anonymous memory
}
#endif
+/* Return a pointer to the bitmap storing bits affecting a block of pages */
+static inline unsigned long *get_pageblock_bitmap(struct page *page,
+ unsigned long pfn)
+{
+#ifdef CONFIG_SPARSEMEM
+ return __pfn_to_section(pfn)->pageblock_flags;
+#else
+ return page_zone(page)->pageblock_flags;
+#endif /* CONFIG_SPARSEMEM */
+}
+
+static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
+{
+#ifdef CONFIG_SPARSEMEM
+ pfn &= (PAGES_PER_SECTION-1);
+ return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
+#else
+ pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages);
+ return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
+#endif /* CONFIG_SPARSEMEM */
+}
+
+/**
+ * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
+ * @page: The page within the block of interest
+ * @pfn: The target page frame number
+ * @end_bitidx: The last bit of interest to retrieve
+ * @mask: mask of bits that the caller is interested in
+ *
+ * Return: pageblock_bits flags
+ */
+static __always_inline unsigned long __get_pfnblock_flags_mask(struct page *page,
+ unsigned long pfn,
+ unsigned long end_bitidx,
+ unsigned long mask)
+{
+ unsigned long *bitmap;
+ unsigned long bitidx, word_bitidx;
+ unsigned long word;
+
+ bitmap = get_pageblock_bitmap(page, pfn);
+ bitidx = pfn_to_bitidx(page, pfn);
+ word_bitidx = bitidx / BITS_PER_LONG;
+ bitidx &= (BITS_PER_LONG-1);
+
+ word = bitmap[word_bitidx];
+ bitidx += end_bitidx;
+ return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
+}
+
+unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+ unsigned long end_bitidx,
+ unsigned long mask)
+{
+ return __get_pfnblock_flags_mask(page, pfn, end_bitidx, mask);
+}
+
+static __always_inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn)
+{
+ return __get_pfnblock_flags_mask(page, pfn, PB_migrate_end, MIGRATETYPE_MASK);
+}
+
+/**
+ * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
+ * @page: The page within the block of interest
+ * @flags: The flags to set
+ * @pfn: The target page frame number
+ * @end_bitidx: The last bit of interest
+ * @mask: mask of bits that the caller is interested in
+ */
+void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
+ unsigned long pfn,
+ unsigned long end_bitidx,
+ unsigned long mask)
+{
+ unsigned long *bitmap;
+ unsigned long bitidx, word_bitidx;
+ unsigned long old_word, word;
+
+ BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
+
+ bitmap = get_pageblock_bitmap(page, pfn);
+ bitidx = pfn_to_bitidx(page, pfn);
+ word_bitidx = bitidx / BITS_PER_LONG;
+ bitidx &= (BITS_PER_LONG-1);
+
+ VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
+
+ bitidx += end_bitidx;
+ mask <<= (BITS_PER_LONG - bitidx - 1);
+ flags <<= (BITS_PER_LONG - bitidx - 1);
+
+ word = READ_ONCE(bitmap[word_bitidx]);
+ for (;;) {
+ old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
+ if (word == old_word)
+ break;
+ word = old_word;
+ }
+}
void set_pageblock_migratetype(struct page *page, int migratetype)
{
zone->free_area[order].nr_free++;
}
-static inline int free_pages_check(struct page *page)
+/*
+ * A bad page could be due to a number of fields. Instead of multiple branches,
+ * try and check multiple fields with one check. The caller must do a detailed
+ * check if necessary.
+ */
+static inline bool page_expected_state(struct page *page,
+ unsigned long check_flags)
{
- const char *bad_reason = NULL;
- unsigned long bad_flags = 0;
+ if (unlikely(atomic_read(&page->_mapcount) != -1))
+ return false;
+
+ if (unlikely((unsigned long)page->mapping |
+ page_ref_count(page) |
+#ifdef CONFIG_MEMCG
+ (unsigned long)page->mem_cgroup |
+#endif
+ (page->flags & check_flags)))
+ return false;
+
+ return true;
+}
+
+static void free_pages_check_bad(struct page *page)
+{
+ const char *bad_reason;
+ unsigned long bad_flags;
+
+ bad_reason = NULL;
+ bad_flags = 0;
if (unlikely(atomic_read(&page->_mapcount) != -1))
bad_reason = "nonzero mapcount";
if (unlikely(page->mapping != NULL))
bad_reason = "non-NULL mapping";
if (unlikely(page_ref_count(page) != 0))
- bad_reason = "nonzero _count";
+ bad_reason = "nonzero _refcount";
if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) {
bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
bad_flags = PAGE_FLAGS_CHECK_AT_FREE;
if (unlikely(page->mem_cgroup))
bad_reason = "page still charged to cgroup";
#endif
- if (unlikely(bad_reason)) {
- bad_page(page, bad_reason, bad_flags);
- return 1;
+ bad_page(page, bad_reason, bad_flags);
+}
+
+static inline int free_pages_check(struct page *page)
+{
+ if (likely(page_expected_state(page, PAGE_FLAGS_CHECK_AT_FREE)))
+ return 0;
+
+ /* Something has gone sideways, find it */
+ free_pages_check_bad(page);
+ return 1;
+}
+
+static int free_tail_pages_check(struct page *head_page, struct page *page)
+{
+ int ret = 1;
+
+ /*
+ * We rely page->lru.next never has bit 0 set, unless the page
+ * is PageTail(). Let's make sure that's true even for poisoned ->lru.
+ */
+ BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
+
+ if (!IS_ENABLED(CONFIG_DEBUG_VM)) {
+ ret = 0;
+ goto out;
+ }
+ switch (page - head_page) {
+ case 1:
+ /* the first tail page: ->mapping is compound_mapcount() */
+ if (unlikely(compound_mapcount(page))) {
+ bad_page(page, "nonzero compound_mapcount", 0);
+ goto out;
+ }
+ break;
+ case 2:
+ /*
+ * the second tail page: ->mapping is
+ * page_deferred_list().next -- ignore value.
+ */
+ break;
+ default:
+ if (page->mapping != TAIL_MAPPING) {
+ bad_page(page, "corrupted mapping in tail page", 0);
+ goto out;
+ }
+ break;
+ }
+ if (unlikely(!PageTail(page))) {
+ bad_page(page, "PageTail not set", 0);
+ goto out;
}
+ if (unlikely(compound_head(page) != head_page)) {
+ bad_page(page, "compound_head not consistent", 0);
+ goto out;
+ }
+ ret = 0;
+out:
+ page->mapping = NULL;
+ clear_compound_head(page);
+ return ret;
+}
+
+static __always_inline bool free_pages_prepare(struct page *page,
+ unsigned int order, bool check_free)
+{
+ int bad = 0;
+
+ VM_BUG_ON_PAGE(PageTail(page), page);
+
+ trace_mm_page_free(page, order);
+ kmemcheck_free_shadow(page, order);
+ kasan_free_pages(page, order);
+
+ /*
+ * Check tail pages before head page information is cleared to
+ * avoid checking PageCompound for order-0 pages.
+ */
+ if (unlikely(order)) {
+ bool compound = PageCompound(page);
+ int i;
+
+ VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
+
+ for (i = 1; i < (1 << order); i++) {
+ if (compound)
+ bad += free_tail_pages_check(page, page + i);
+ if (unlikely(free_pages_check(page + i))) {
+ bad++;
+ continue;
+ }
+ (page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
+ }
+ }
+ if (PageAnonHead(page))
+ page->mapping = NULL;
+ if (check_free)
+ bad += free_pages_check(page);
+ if (bad)
+ return false;
+
page_cpupid_reset_last(page);
- if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
- page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
- return 0;
+ page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
+ reset_page_owner(page, order);
+
+ if (!PageHighMem(page)) {
+ debug_check_no_locks_freed(page_address(page),
+ PAGE_SIZE << order);
+ debug_check_no_obj_freed(page_address(page),
+ PAGE_SIZE << order);
+ }
+ arch_free_page(page, order);
+ kernel_poison_pages(page, 1 << order, 0);
+ kernel_map_pages(page, 1 << order, 0);
+
+ return true;
+}
+
+#ifdef CONFIG_DEBUG_VM
+static inline bool free_pcp_prepare(struct page *page)
+{
+ return free_pages_prepare(page, 0, true);
+}
+
+static inline bool bulkfree_pcp_prepare(struct page *page)
+{
+ return false;
+}
+#else
+static bool free_pcp_prepare(struct page *page)
+{
+ return free_pages_prepare(page, 0, false);
}
+static bool bulkfree_pcp_prepare(struct page *page)
+{
+ return free_pages_check(page);
+}
+#endif /* CONFIG_DEBUG_VM */
+
/*
* Frees a number of pages from the PCP lists
* Assumes all pages on list are in same zone, and of same order.
{
int migratetype = 0;
int batch_free = 0;
- int to_free = count;
unsigned long nr_scanned;
+ bool isolated_pageblocks;
spin_lock(&zone->lock);
+ isolated_pageblocks = has_isolate_pageblock(zone);
nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
if (nr_scanned)
__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
- while (to_free) {
+ while (count) {
struct page *page;
struct list_head *list;
/* This is the only non-empty list. Free them all. */
if (batch_free == MIGRATE_PCPTYPES)
- batch_free = to_free;
+ batch_free = count;
do {
int mt; /* migratetype of the to-be-freed page */
/* MIGRATE_ISOLATE page should not go to pcplists */
VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
/* Pageblock could have been isolated meanwhile */
- if (unlikely(has_isolate_pageblock(zone)))
+ if (unlikely(isolated_pageblocks))
mt = get_pageblock_migratetype(page);
+ if (bulkfree_pcp_prepare(page))
+ continue;
+
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
- } while (--to_free && --batch_free && !list_empty(list));
+ } while (--count && --batch_free && !list_empty(list));
}
spin_unlock(&zone->lock);
}
spin_unlock(&zone->lock);
}
-static int free_tail_pages_check(struct page *head_page, struct page *page)
-{
- int ret = 1;
-
- /*
- * We rely page->lru.next never has bit 0 set, unless the page
- * is PageTail(). Let's make sure that's true even for poisoned ->lru.
- */
- BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
-
- if (!IS_ENABLED(CONFIG_DEBUG_VM)) {
- ret = 0;
- goto out;
- }
- switch (page - head_page) {
- case 1:
- /* the first tail page: ->mapping is compound_mapcount() */
- if (unlikely(compound_mapcount(page))) {
- bad_page(page, "nonzero compound_mapcount", 0);
- goto out;
- }
- break;
- case 2:
- /*
- * the second tail page: ->mapping is
- * page_deferred_list().next -- ignore value.
- */
- break;
- default:
- if (page->mapping != TAIL_MAPPING) {
- bad_page(page, "corrupted mapping in tail page", 0);
- goto out;
- }
- break;
- }
- if (unlikely(!PageTail(page))) {
- bad_page(page, "PageTail not set", 0);
- goto out;
- }
- if (unlikely(compound_head(page) != head_page)) {
- bad_page(page, "compound_head not consistent", 0);
- goto out;
- }
- ret = 0;
-out:
- page->mapping = NULL;
- clear_compound_head(page);
- return ret;
-}
-
static void __meminit __init_single_page(struct page *page, unsigned long pfn,
unsigned long zone, int nid)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long end_pfn = PFN_UP(end);
- for (; start_pfn < end_pfn; start_pfn++) {
- if (pfn_valid(start_pfn)) {
- struct page *page = pfn_to_page(start_pfn);
-
- init_reserved_page(start_pfn);
-
- /* Avoid false-positive PageTail() */
- INIT_LIST_HEAD(&page->lru);
-
- SetPageReserved(page);
- }
- }
-}
-
-static bool free_pages_prepare(struct page *page, unsigned int order)
-{
- bool compound = PageCompound(page);
- int i, bad = 0;
-
- VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
-
- trace_mm_page_free(page, order);
- kmemcheck_free_shadow(page, order);
- kasan_free_pages(page, order);
-
- if (PageAnon(page))
- page->mapping = NULL;
- bad += free_pages_check(page);
- for (i = 1; i < (1 << order); i++) {
- if (compound)
- bad += free_tail_pages_check(page, page + i);
- bad += free_pages_check(page + i);
- }
- if (bad)
- return false;
+ for (; start_pfn < end_pfn; start_pfn++) {
+ if (pfn_valid(start_pfn)) {
+ struct page *page = pfn_to_page(start_pfn);
- reset_page_owner(page, order);
+ init_reserved_page(start_pfn);
- if (!PageHighMem(page)) {
- debug_check_no_locks_freed(page_address(page),
- PAGE_SIZE << order);
- debug_check_no_obj_freed(page_address(page),
- PAGE_SIZE << order);
- }
- arch_free_page(page, order);
- kernel_poison_pages(page, 1 << order, 0);
- kernel_map_pages(page, 1 << order, 0);
+ /* Avoid false-positive PageTail() */
+ INIT_LIST_HEAD(&page->lru);
- return true;
+ SetPageReserved(page);
+ }
+ }
}
static void __free_pages_ok(struct page *page, unsigned int order)
int migratetype;
unsigned long pfn = page_to_pfn(page);
- if (!free_pages_prepare(page, order))
+ if (!free_pages_prepare(page, order, true))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
local_irq_restore(flags);
}
-static void __init __free_pages_boot_core(struct page *page,
- unsigned long pfn, unsigned int order)
+static void __init __free_pages_boot_core(struct page *page, unsigned int order)
{
unsigned int nr_pages = 1 << order;
struct page *p = page;
{
if (early_page_uninitialised(pfn))
return;
- return __free_pages_boot_core(page, pfn, order);
+ return __free_pages_boot_core(page, order);
}
/*
if (nr_pages == MAX_ORDER_NR_PAGES &&
(pfn & (MAX_ORDER_NR_PAGES-1)) == 0) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
- __free_pages_boot_core(page, pfn, MAX_ORDER-1);
+ __free_pages_boot_core(page, MAX_ORDER-1);
return;
}
- for (i = 0; i < nr_pages; i++, page++, pfn++)
- __free_pages_boot_core(page, pfn, 0);
+ for (i = 0; i < nr_pages; i++, page++)
+ __free_pages_boot_core(page, 0);
}
/* Completion tracking for deferred_init_memmap() threads */
}
}
-/*
- * This page is about to be returned from the page allocator
- */
-static inline int check_new_page(struct page *page)
+static void check_new_page_bad(struct page *page)
{
const char *bad_reason = NULL;
unsigned long bad_flags = 0;
if (unlikely(page->mem_cgroup))
bad_reason = "page still charged to cgroup";
#endif
- if (unlikely(bad_reason)) {
- bad_page(page, bad_reason, bad_flags);
- return 1;
- }
- return 0;
+ bad_page(page, bad_reason, bad_flags);
+}
+
+/*
+ * This page is about to be returned from the page allocator
+ */
+static inline int check_new_page(struct page *page)
+{
+ if (likely(page_expected_state(page,
+ PAGE_FLAGS_CHECK_AT_PREP|__PG_HWPOISON)))
+ return 0;
+
+ check_new_page_bad(page);
+ return 1;
}
static inline bool free_pages_prezeroed(bool poisoned)
page_poisoning_enabled() && poisoned;
}
-static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
- int alloc_flags)
+#ifdef CONFIG_DEBUG_VM
+static bool check_pcp_refill(struct page *page)
+{
+ return false;
+}
+
+static bool check_new_pcp(struct page *page)
+{
+ return check_new_page(page);
+}
+#else
+static bool check_pcp_refill(struct page *page)
+{
+ return check_new_page(page);
+}
+static bool check_new_pcp(struct page *page)
+{
+ return false;
+}
+#endif /* CONFIG_DEBUG_VM */
+
+static bool check_new_pages(struct page *page, unsigned int order)
+{
+ int i;
+ for (i = 0; i < (1 << order); i++) {
+ struct page *p = page + i;
+
+ if (unlikely(check_new_page(p)))
+ return true;
+ }
+
+ return false;
+}
+
+static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
+ unsigned int alloc_flags)
{
int i;
bool poisoned = true;
for (i = 0; i < (1 << order); i++) {
struct page *p = page + i;
- if (unlikely(check_new_page(p)))
- return 1;
if (poisoned)
poisoned &= page_is_poisoned(p);
}
set_page_pfmemalloc(page);
else
clear_page_pfmemalloc(page);
-
- return 0;
}
/*
if (unlikely(page == NULL))
break;
+ if (unlikely(check_pcp_refill(page)))
+ continue;
+
/*
* Split buddy pages returned by expand() are received here
* in physical page order. The page is added to the callers and
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
+
+ if (page_zone(page) != zone)
+ continue;
+
if (!swsusp_page_is_forbidden(page))
swsusp_unset_page_free(page);
}
unsigned long pfn = page_to_pfn(page);
int migratetype;
- if (!free_pages_prepare(page, 0))
+ if (!free_pcp_prepare(page))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
return nr_pages;
}
+/*
+ * Update NUMA hit/miss statistics
+ *
+ * Must be called with interrupts disabled.
+ *
+ * When __GFP_OTHER_NODE is set assume the node of the preferred
+ * zone is the local node. This is useful for daemons who allocate
+ * memory on behalf of other processes.
+ */
+static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
+ gfp_t flags)
+{
+#ifdef CONFIG_NUMA
+ int local_nid = numa_node_id();
+ enum zone_stat_item local_stat = NUMA_LOCAL;
+
+ if (unlikely(flags & __GFP_OTHER_NODE)) {
+ local_stat = NUMA_OTHER;
+ local_nid = preferred_zone->node;
+ }
+
+ if (z->node == local_nid) {
+ __inc_zone_state(z, NUMA_HIT);
+ __inc_zone_state(z, local_stat);
+ } else {
+ __inc_zone_state(z, NUMA_MISS);
+ __inc_zone_state(preferred_zone, NUMA_FOREIGN);
+ }
+#endif
+}
+
/*
* Allocate a page from the given zone. Use pcplists for order-0 allocations.
*/
static inline
struct page *buffered_rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
- gfp_t gfp_flags, int alloc_flags, int migratetype)
+ gfp_t gfp_flags, unsigned int alloc_flags,
+ int migratetype)
{
unsigned long flags;
struct page *page;
struct list_head *list;
local_irq_save(flags);
- pcp = &this_cpu_ptr(zone->pageset)->pcp;
- list = &pcp->lists[migratetype];
- if (list_empty(list)) {
- pcp->count += rmqueue_bulk(zone, 0,
- pcp->batch, list,
- migratetype, cold);
- if (unlikely(list_empty(list)))
- goto failed;
- }
+ do {
+ pcp = &this_cpu_ptr(zone->pageset)->pcp;
+ list = &pcp->lists[migratetype];
+ if (list_empty(list)) {
+ pcp->count += rmqueue_bulk(zone, 0,
+ pcp->batch, list,
+ migratetype, cold);
+ if (unlikely(list_empty(list)))
+ goto failed;
+ }
- if (cold)
- page = list_last_entry(list, struct page, lru);
- else
- page = list_first_entry(list, struct page, lru);
+ if (cold)
+ page = list_last_entry(list, struct page, lru);
+ else
+ page = list_first_entry(list, struct page, lru);
+ } while (page && check_new_pcp(page));
+ __dec_zone_state(zone, NR_ALLOC_BATCH);
list_del(&page->lru);
pcp->count--;
} else {
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
spin_lock_irqsave(&zone->lock, flags);
- page = NULL;
- if (alloc_flags & ALLOC_HARDER) {
- page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
- if (page)
- trace_mm_page_alloc_zone_locked(page, order, migratetype);
- }
- if (!page)
- page = __rmqueue(zone, order, migratetype);
+ do {
+ page = NULL;
+ if (alloc_flags & ALLOC_HARDER) {
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+ if (page)
+ trace_mm_page_alloc_zone_locked(page, order, migratetype);
+ }
+ if (!page)
+ page = __rmqueue(zone, order, migratetype);
+ } while (page && check_new_pages(page, order));
spin_unlock(&zone->lock);
if (!page)
goto failed;
+ __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
__mod_zone_freepage_state(zone, -(1 << order),
get_pcppage_migratetype(page));
}
- __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
!test_bit(ZONE_FAIR_DEPLETED, &zone->flags))
set_bit(ZONE_FAIR_DEPLETED, &zone->flags);
* to check in the allocation paths if no pages are free.
*/
static bool __zone_watermark_ok(struct zone *z, unsigned int order,
- unsigned long mark, int classzone_idx, int alloc_flags,
+ unsigned long mark, int classzone_idx,
+ unsigned int alloc_flags,
long free_pages)
{
long min = mark;
int o;
- const int alloc_harder = (alloc_flags & ALLOC_HARDER);
+ const bool alloc_harder = (alloc_flags & ALLOC_HARDER);
/* free_pages may go negative - that's OK */
free_pages -= (1 << order) - 1;
}
bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
- int classzone_idx, int alloc_flags)
+ int classzone_idx, unsigned int alloc_flags)
{
return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
zone_page_state(z, NR_FREE_PAGES));
}
+static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
+ unsigned long mark, int classzone_idx, unsigned int alloc_flags)
+{
+ long free_pages = zone_page_state(z, NR_FREE_PAGES);
+ long cma_pages = 0;
+
+#ifdef CONFIG_CMA
+ /* If allocation can't use CMA areas don't use free CMA pages */
+ if (!(alloc_flags & ALLOC_CMA))
+ cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
+#endif
+
+ /*
+ * Fast check for order-0 only. If this fails then the reserves
+ * need to be calculated. There is a corner case where the check
+ * passes but only the high-order atomic reserve are free. If
+ * the caller is !atomic then it'll uselessly search the free
+ * list. That corner case is then slower but it is harmless.
+ */
+ if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
+ return true;
+
+ return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+ free_pages);
+}
+
bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx)
{
get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
const struct alloc_context *ac)
{
- struct zonelist *zonelist = ac->zonelist;
- struct zoneref *z;
- struct page *page = NULL;
+ struct zoneref *z = ac->preferred_zoneref;
struct zone *zone;
- int nr_fair_skipped = 0;
- bool zonelist_rescan;
+ bool fair_skipped = false;
+ bool apply_fair = (alloc_flags & ALLOC_FAIR);
zonelist_scan:
- zonelist_rescan = false;
-
/*
* Scan zonelist, looking for a zone with enough free.
* See also __cpuset_node_allowed() comment in kernel/cpuset.c.
*/
- for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
+ for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
ac->nodemask) {
+ struct page *page;
unsigned long mark;
if (cpusets_enabled() &&
(alloc_flags & ALLOC_CPUSET) &&
- !cpuset_zone_allowed(zone, gfp_mask))
+ !__cpuset_zone_allowed(zone, gfp_mask))
continue;
/*
* Distribute pages in proportion to the individual
* page was allocated in should have no effect on the
* time the page has in memory before being reclaimed.
*/
- if (alloc_flags & ALLOC_FAIR) {
- if (!zone_local(ac->preferred_zone, zone))
- break;
+ if (apply_fair) {
if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) {
- nr_fair_skipped++;
+ fair_skipped = true;
continue;
}
+ if (!zone_local(ac->preferred_zoneref->zone, zone)) {
+ if (fair_skipped)
+ goto reset_fair;
+ apply_fair = false;
+ }
}
/*
* When allocating a page cache page for writing, we
continue;
mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
- if (!zone_watermark_ok(zone, order, mark,
- ac->classzone_idx, alloc_flags)) {
+ if (!zone_watermark_fast(zone, order, mark,
+ ac_classzone_idx(ac), alloc_flags)) {
int ret;
/* Checked here to keep the fast path fast */
goto try_this_zone;
if (zone_reclaim_mode == 0 ||
- !zone_allows_reclaim(ac->preferred_zone, zone))
+ !zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
continue;
ret = zone_reclaim(zone, gfp_mask, order);
default:
/* did we reclaim enough */
if (zone_watermark_ok(zone, order, mark,
- ac->classzone_idx, alloc_flags))
+ ac_classzone_idx(ac), alloc_flags))
goto try_this_zone;
continue;
}
try_this_zone:
- page = buffered_rmqueue(ac->preferred_zone, zone, order,
+ page = buffered_rmqueue(ac->preferred_zoneref->zone, zone, order,
gfp_mask, alloc_flags, ac->migratetype);
if (page) {
- if (prep_new_page(page, order, gfp_mask, alloc_flags))
- goto try_this_zone;
+ prep_new_page(page, order, gfp_mask, alloc_flags);
/*
* If this is a high-order atomic allocation then check
* include remote zones now, before entering the slowpath and waking
* kswapd: prefer spilling to a remote zone over swapping locally.
*/
- if (alloc_flags & ALLOC_FAIR) {
- alloc_flags &= ~ALLOC_FAIR;
- if (nr_fair_skipped) {
- zonelist_rescan = true;
- reset_alloc_batches(ac->preferred_zone);
- }
- if (nr_online_nodes > 1)
- zonelist_rescan = true;
- }
-
- if (zonelist_rescan)
+ if (fair_skipped) {
+reset_fair:
+ apply_fair = false;
+ fair_skipped = false;
+ reset_alloc_batches(ac->preferred_zoneref->zone);
goto zonelist_scan;
+ }
return NULL;
}
/* The OOM killer does not needlessly kill tasks for lowmem */
if (ac->high_zoneidx < ZONE_NORMAL)
goto out;
- /* The OOM killer does not compensate for IO-less reclaim */
- if (!(gfp_mask & __GFP_FS)) {
- /*
- * XXX: Page reclaim didn't yield anything,
- * and the OOM killer can't be invoked, but
- * keep looping as per tradition.
- *
- * But do not keep looping if oom_killer_disable()
- * was already called, for the system is trying to
- * enter a quiescent state during suspend.
- */
- *did_some_progress = !oom_killer_disabled;
- goto out;
- }
if (pm_suspended_storage())
goto out;
+ /*
+ * XXX: GFP_NOFS allocations should rather fail than rely on
+ * other request to make a forward progress.
+ * We are in an unfortunate situation where out_of_memory cannot
+ * do much for this context but let's try it to at least get
+ * access to memory reserved if the current task is killed (see
+ * out_of_memory). Once filesystems are ready to handle allocation
+ * failures more gracefully we should just bail out here.
+ */
+
/* The OOM killer may not free memory on a specific node */
if (gfp_mask & __GFP_THISNODE)
goto out;
/* Try memory compaction for high-order allocations before reclaim */
static struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
- int alloc_flags, const struct alloc_context *ac,
+ unsigned int alloc_flags, const struct alloc_context *ac,
enum migrate_mode mode, int *contended_compaction,
bool *deferred_compaction)
{
#else
static inline struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
- int alloc_flags, const struct alloc_context *ac,
+ unsigned int alloc_flags, const struct alloc_context *ac,
enum migrate_mode mode, int *contended_compaction,
bool *deferred_compaction)
{
/* The really slow allocator path where we enter direct reclaim */
static inline struct page *
__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
- int alloc_flags, const struct alloc_context *ac,
+ unsigned int alloc_flags, const struct alloc_context *ac,
unsigned long *did_some_progress)
{
struct page *page = NULL;
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
ac->high_zoneidx, ac->nodemask)
- wakeup_kswapd(zone, order, zone_idx(ac->preferred_zone));
+ wakeup_kswapd(zone, order, ac_classzone_idx(ac));
}
-static inline int
+static inline unsigned int
gfp_to_alloc_flags(gfp_t gfp_mask)
{
- int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
+ unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
{
bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
struct page *page = NULL;
- int alloc_flags;
+ unsigned int alloc_flags;
unsigned long pages_reclaimed = 0;
unsigned long did_some_progress;
enum migrate_mode migration_mode = MIGRATE_ASYNC;
*/
alloc_flags = gfp_to_alloc_flags(gfp_mask);
- /*
- * Find the true preferred zone if the allocation is unconstrained by
- * cpusets.
- */
- if (!(alloc_flags & ALLOC_CPUSET) && !ac->nodemask) {
- struct zoneref *preferred_zoneref;
- preferred_zoneref = first_zones_zonelist(ac->zonelist,
- ac->high_zoneidx, NULL, &ac->preferred_zone);
- ac->classzone_idx = zonelist_zone_idx(preferred_zoneref);
- }
-
/* This is the last chance, in general, before the goto nopage. */
page = get_page_from_freelist(gfp_mask, order,
alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) ||
((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
/* Wait for some write requests to complete then retry */
- wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50);
+ wait_iff_congested(ac->preferred_zoneref->zone, BLK_RW_ASYNC, HZ/50);
goto retry;
}
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, nodemask_t *nodemask)
{
- struct zoneref *preferred_zoneref;
- struct page *page = NULL;
+ struct page *page;
unsigned int cpuset_mems_cookie;
- int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
- gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
+ unsigned int alloc_flags = ALLOC_WMARK_LOW|ALLOC_FAIR;
+ gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = {
.high_zoneidx = gfp_zone(gfp_mask),
+ .zonelist = zonelist,
.nodemask = nodemask,
.migratetype = gfpflags_to_migratetype(gfp_mask),
};
+ if (cpusets_enabled()) {
+ alloc_mask |= __GFP_HARDWALL;
+ alloc_flags |= ALLOC_CPUSET;
+ if (!ac.nodemask)
+ ac.nodemask = &cpuset_current_mems_allowed;
+ }
+
gfp_mask &= gfp_allowed_mask;
lockdep_trace_alloc(gfp_mask);
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
- /* We set it here, as __alloc_pages_slowpath might have changed it */
- ac.zonelist = zonelist;
-
/* Dirty zone balancing only done in the fast path */
ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
/* The preferred zone is used for statistics later */
- preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx,
- ac.nodemask ? : &cpuset_current_mems_allowed,
- &ac.preferred_zone);
- if (!ac.preferred_zone)
- goto out;
- ac.classzone_idx = zonelist_zone_idx(preferred_zoneref);
+ ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
+ ac.high_zoneidx, ac.nodemask);
+ if (!ac.preferred_zoneref) {
+ page = NULL;
+ goto no_zone;
+ }
/* First allocation attempt */
- alloc_mask = gfp_mask|__GFP_HARDWALL;
page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
- if (unlikely(!page)) {
- /*
- * Runtime PM, block IO and its error handling path
- * can deadlock because I/O on the device might not
- * complete.
- */
- alloc_mask = memalloc_noio_flags(gfp_mask);
- ac.spread_dirty_pages = false;
-
- page = __alloc_pages_slowpath(alloc_mask, order, &ac);
- }
+ if (likely(page))
+ goto out;
- if (kmemcheck_enabled && page)
- kmemcheck_pagealloc_alloc(page, order, gfp_mask);
+ /*
+ * Runtime PM, block IO and its error handling path can deadlock
+ * because I/O on the device might not complete.
+ */
+ alloc_mask = memalloc_noio_flags(gfp_mask);
+ ac.spread_dirty_pages = false;
- trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
+ /*
+ * Restore the original nodemask if it was potentially replaced with
+ * &cpuset_current_mems_allowed to optimize the fast-path attempt.
+ */
+ if (cpusets_enabled())
+ ac.nodemask = nodemask;
+ page = __alloc_pages_slowpath(alloc_mask, order, &ac);
-out:
+no_zone:
/*
* When updating a task's mems_allowed, it is possible to race with
* parallel threads in such a way that an allocation can fail while
* the mask is being updated. If a page allocation is about to fail,
* check if the cpuset changed during allocation and if so, retry.
*/
- if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
+ if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) {
+ alloc_mask = gfp_mask;
goto retry_cpuset;
+ }
+
+out:
+ if (kmemcheck_enabled && page)
+ kmemcheck_pagealloc_alloc(page, order, gfp_mask);
+
+ trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
return page;
}
{
int zone_type; /* needs to be signed */
unsigned long managed_pages = 0;
+ unsigned long managed_highpages = 0;
+ unsigned long free_highpages = 0;
pg_data_t *pgdat = NODE_DATA(nid);
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
val->sharedram = node_page_state(nid, NR_SHMEM);
val->freeram = node_page_state(nid, NR_FREE_PAGES);
#ifdef CONFIG_HIGHMEM
- val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
- val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
- NR_FREE_PAGES);
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
+ struct zone *zone = &pgdat->node_zones[zone_type];
+
+ if (is_highmem(zone)) {
+ managed_highpages += zone->managed_pages;
+ free_highpages += zone_page_state(zone, NR_FREE_PAGES);
+ }
+ }
+ val->totalhigh = managed_highpages;
+ val->freehigh = free_highpages;
#else
- val->totalhigh = 0;
- val->freehigh = 0;
+ val->totalhigh = managed_highpages;
+ val->freehigh = free_highpages;
#endif
val->mem_unit = PAGE_SIZE;
}
*/
int local_memory_node(int node)
{
- struct zone *zone;
+ struct zoneref *z;
- (void)first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
+ z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
gfp_zone(GFP_KERNEL),
- NULL,
- &zone);
- return zone->node;
+ NULL);
+ return z->zone->node;
}
#endif
return table;
}
-/* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct zone *zone,
- unsigned long pfn)
-{
-#ifdef CONFIG_SPARSEMEM
- return __pfn_to_section(pfn)->pageblock_flags;
-#else
- return zone->pageblock_flags;
-#endif /* CONFIG_SPARSEMEM */
-}
-
-static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
-{
-#ifdef CONFIG_SPARSEMEM
- pfn &= (PAGES_PER_SECTION-1);
- return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
-#else
- pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages);
- return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
-#endif /* CONFIG_SPARSEMEM */
-}
-
-/**
- * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
- * @page: The page within the block of interest
- * @pfn: The target page frame number
- * @end_bitidx: The last bit of interest to retrieve
- * @mask: mask of bits that the caller is interested in
- *
- * Return: pageblock_bits flags
- */
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
- unsigned long end_bitidx,
- unsigned long mask)
-{
- struct zone *zone;
- unsigned long *bitmap;
- unsigned long bitidx, word_bitidx;
- unsigned long word;
-
- zone = page_zone(page);
- bitmap = get_pageblock_bitmap(zone, pfn);
- bitidx = pfn_to_bitidx(zone, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
-
- word = bitmap[word_bitidx];
- bitidx += end_bitidx;
- return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
-}
-
-/**
- * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
- * @page: The page within the block of interest
- * @flags: The flags to set
- * @pfn: The target page frame number
- * @end_bitidx: The last bit of interest
- * @mask: mask of bits that the caller is interested in
- */
-void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
- unsigned long pfn,
- unsigned long end_bitidx,
- unsigned long mask)
-{
- struct zone *zone;
- unsigned long *bitmap;
- unsigned long bitidx, word_bitidx;
- unsigned long old_word, word;
-
- BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
-
- zone = page_zone(page);
- bitmap = get_pageblock_bitmap(zone, pfn);
- bitidx = pfn_to_bitidx(zone, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
-
- VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);
-
- bitidx += end_bitidx;
- mask <<= (BITS_PER_LONG - bitidx - 1);
- flags <<= (BITS_PER_LONG - bitidx - 1);
-
- word = READ_ONCE(bitmap[word_bitidx]);
- for (;;) {
- old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
- if (word == old_word)
- break;
- word = old_word;
- }
-}
-
/*
* This function checks whether pageblock includes unmovable pages or not.
* If @count is not zero, it is okay to include less @count unmovable pages
* We can't use page_count without pin a page
* because another CPU can free compound page.
* This check already skips compound tails of THP
- * because their page->_count is zero at all time.
+ * because their page->_refcount is zero at all time.
*/
if (!page_ref_count(page)) {
if (PageBuddy(page))
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
- * All pages in the range must be isolated before calling this.
+ * All pages in the range must be in a single zone and isolated
+ * before calling this.
*/
void
__offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
return pfn;
}
+/* Caller should ensure that requested range is in a single zone */
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
bool skip_hwpoisoned_pages)
{
* accordance with memory policy of the user process if possible. For
* now as a simple work-around, we use the next node for destination.
*/
- if (PageHuge(page)) {
- int node = next_online_node(page_to_nid(page));
- if (node == MAX_NUMNODES)
- node = first_online_node;
+ if (PageHuge(page))
return alloc_huge_page_node(page_hstate(compound_head(page)),
- node);
- }
+ next_node_in(page_to_nid(page),
+ node_online_map));
if (PageHighMem(page))
gfp_mask |= __GFP_HIGHMEM;
goto err;
/* Print information relevant to grouping pages by mobility */
- pageblock_mt = get_pfnblock_migratetype(page, pfn);
+ pageblock_mt = get_pageblock_migratetype(page);
page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
ret += snprintf(kbuf + ret, count - ret,
"PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
page = pfn_to_page(pfn);
+ if (page_zone(page) != zone)
+ continue;
+
/*
* We are safe to check buddy flag and order, because
* this is init stage and only single thread runs.
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma = avc->anon_vma;
- BUG_ON(anon_vma->degree);
+ VM_WARN_ON(anon_vma->degree);
put_anon_vma(anon_vma);
list_del(&avc->same_vma);
int nr = compound ? hpage_nr_pages(page) : 1;
VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
- SetPageSwapBacked(page);
+ __SetPageSwapBacked(page);
if (compound) {
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
/* increment count (starts at -1) */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */
- SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
};
static int shmem_replace_page(struct page **pagep, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index);
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
- struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
+ struct page **pagep, enum sgp_type sgp,
+ gfp_t gfp, struct mm_struct *fault_mm, int *fault_type);
static inline int shmem_getpage(struct inode *inode, pgoff_t index,
- struct page **pagep, enum sgp_type sgp, int *fault_type)
+ struct page **pagep, enum sgp_type sgp)
{
return shmem_getpage_gfp(inode, index, pagep, sgp,
- mapping_gfp_mask(inode->i_mapping), fault_type);
+ mapping_gfp_mask(inode->i_mapping), NULL, NULL);
}
static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
/*
* ... whereas tmpfs objects are accounted incrementally as
- * pages are allocated, in order to allow huge sparse files.
+ * pages are allocated, in order to allow large sparse files.
* shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
* so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
*/
if (partial_start) {
struct page *page = NULL;
- shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
+ shmem_getpage(inode, start - 1, &page, SGP_READ);
if (page) {
unsigned int top = PAGE_SIZE;
if (start > end) {
}
if (partial_end) {
struct page *page = NULL;
- shmem_getpage(inode, end, &page, SGP_READ, NULL);
+ shmem_getpage(inode, end, &page, SGP_READ);
if (page) {
zero_user_segment(page, 0, partial_end);
set_page_dirty(page);
return 0;
}
-#ifdef CONFIG_NUMA
-#ifdef CONFIG_TMPFS
+#if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
{
char buffer[64];
}
return mpol;
}
-#endif /* CONFIG_TMPFS */
+#else /* !CONFIG_NUMA || !CONFIG_TMPFS */
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
+{
+}
+static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+ return NULL;
+}
+#endif /* CONFIG_NUMA && CONFIG_TMPFS */
+#ifndef CONFIG_NUMA
+#define vm_policy vm_private_data
+#endif
static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
pvma.vm_ops = NULL;
pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
- page = alloc_page_vma(gfp, &pvma, 0);
+ page = alloc_pages_vma(gfp, 0, &pvma, 0, numa_node_id(), false);
+ if (page) {
+ __SetPageLocked(page);
+ __SetPageSwapBacked(page);
+ }
/* Drop reference taken by mpol_shared_policy_lookup() */
mpol_cond_put(pvma.vm_policy);
return page;
}
-#else /* !CONFIG_NUMA */
-#ifdef CONFIG_TMPFS
-static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
-{
-}
-#endif /* CONFIG_TMPFS */
-
-static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
- struct shmem_inode_info *info, pgoff_t index)
-{
- return swapin_readahead(swap, gfp, NULL, 0);
-}
-
-static inline struct page *shmem_alloc_page(gfp_t gfp,
- struct shmem_inode_info *info, pgoff_t index)
-{
- return alloc_page(gfp);
-}
-#endif /* CONFIG_NUMA */
-
-#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
-static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
-{
- return NULL;
-}
-#endif
/*
* When a page is moved from swapcache to shmem filecache (either by the
copy_highpage(newpage, oldpage);
flush_dcache_page(newpage);
- __SetPageLocked(newpage);
SetPageUptodate(newpage);
- SetPageSwapBacked(newpage);
set_page_private(newpage, swap_index);
SetPageSwapCache(newpage);
*
* If we allocate a new one we do not mark it dirty. That's up to the
* vm. If we swap it in we mark it dirty since we also free the swap
- * entry since a page cannot live in both the swap and page cache
+ * entry since a page cannot live in both the swap and page cache.
+ *
+ * fault_mm and fault_type are only supplied by shmem_fault:
+ * otherwise they are NULL.
*/
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
- struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
+ struct page **pagep, enum sgp_type sgp, gfp_t gfp,
+ struct mm_struct *fault_mm, int *fault_type)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo;
+ struct mm_struct *charge_mm;
struct mem_cgroup *memcg;
struct page *page;
swp_entry_t swap;
page = NULL;
}
- if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
+ if (sgp <= SGP_CACHE &&
((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
error = -EINVAL;
goto unlock;
*/
info = SHMEM_I(inode);
sbinfo = SHMEM_SB(inode->i_sb);
+ charge_mm = fault_mm ? : current->mm;
if (swap.val) {
/* Look it up and read it in.. */
page = lookup_swap_cache(swap);
if (!page) {
- /* here we actually do the io */
- if (fault_type)
+ /* Or update major stats only when swapin succeeds?? */
+ if (fault_type) {
*fault_type |= VM_FAULT_MAJOR;
+ count_vm_event(PGMAJFAULT);
+ mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT);
+ }
+ /* Here we actually start the io */
page = shmem_swapin(swap, gfp, info, index);
if (!page) {
error = -ENOMEM;
goto failed;
}
- error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg,
+ error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
false);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
error = -ENOMEM;
goto decused;
}
-
- __SetPageSwapBacked(page);
- __SetPageLocked(page);
if (sgp == SGP_WRITE)
__SetPageReferenced(page);
- error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg,
+ error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
false);
if (error)
goto decused;
flush_dcache_page(page);
SetPageUptodate(page);
}
- if (sgp == SGP_DIRTY)
- set_page_dirty(page);
}
/* Perhaps the file has been truncated since we checked */
- if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
+ if (sgp <= SGP_CACHE &&
((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
if (alloced) {
ClearPageDirty(page);
static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct inode *inode = file_inode(vma->vm_file);
+ gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
int error;
int ret = VM_FAULT_LOCKED;
spin_unlock(&inode->i_lock);
}
- error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
+ error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
+ gfp, vma->vm_mm, &ret);
if (error)
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
-
- if (ret & VM_FAULT_MAJOR) {
- count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
- }
return ret;
}
return -EPERM;
}
- return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+ return shmem_getpage(inode, index, pagep, SGP_WRITE);
}
static int
* and even mark them dirty, so it cannot exceed the max_blocks limit.
*/
if (!iter_is_iovec(to))
- sgp = SGP_DIRTY;
+ sgp = SGP_CACHE;
index = *ppos >> PAGE_SHIFT;
offset = *ppos & ~PAGE_MASK;
break;
}
- error = shmem_getpage(inode, index, &page, sgp, NULL);
+ error = shmem_getpage(inode, index, &page, sgp);
if (error) {
if (error == -EINVAL)
error = 0;
break;
}
- if (page)
+ if (page) {
+ if (sgp == SGP_CACHE)
+ set_page_dirty(page);
unlock_page(page);
+ }
/*
* We must evaluate after, since reads (unlike writes)
error = 0;
while (spd.nr_pages < nr_pages) {
- error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
+ error = shmem_getpage(inode, index, &page, SGP_CACHE);
if (error)
break;
unlock_page(page);
page = spd.pages[page_nr];
if (!PageUptodate(page) || page->mapping != mapping) {
- error = shmem_getpage(inode, index, &page,
- SGP_CACHE, NULL);
+ error = shmem_getpage(inode, index, &page, SGP_CACHE);
if (error)
break;
unlock_page(page);
else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
error = -ENOMEM;
else
- error = shmem_getpage(inode, index, &page, SGP_FALLOC,
- NULL);
+ error = shmem_getpage(inode, index, &page, SGP_FALLOC);
if (error) {
/* Remove the !PageUptodate pages we added */
shmem_undo_range(inode,
inode->i_op = &shmem_short_symlink_operations;
} else {
inode_nohighmem(inode);
- error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
+ error = shmem_getpage(inode, 0, &page, SGP_WRITE);
if (error) {
iput(inode);
return error;
return ERR_PTR(-ECHILD);
}
} else {
- error = shmem_getpage(inode, 0, &page, SGP_READ, NULL);
+ error = shmem_getpage(inode, 0, &page, SGP_READ);
if (error)
return ERR_PTR(error);
unlock_page(page);
int error;
BUG_ON(mapping->a_ops != &shmem_aops);
- error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
+ error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
+ gfp, NULL, NULL);
if (error)
page = ERR_PTR(error);
else
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
static void cache_reap(struct work_struct *unused);
+static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
+ void **list);
+static inline void fixup_slab_list(struct kmem_cache *cachep,
+ struct kmem_cache_node *n, struct page *page,
+ void **list);
static int slab_early_init = 1;
#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
.name = "kmem_cache",
};
-#define BAD_ALIEN_MAGIC 0x01020304ul
-
static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
static void init_reap_node(int cpu)
{
- int node;
-
- node = next_node(cpu_to_mem(cpu), node_online_map);
- if (node == MAX_NUMNODES)
- node = first_node(node_online_map);
-
- per_cpu(slab_reap_node, cpu) = node;
+ per_cpu(slab_reap_node, cpu) = next_node_in(cpu_to_mem(cpu),
+ node_online_map);
}
static void next_reap_node(void)
{
int node = __this_cpu_read(slab_reap_node);
- node = next_node(node, node_online_map);
- if (unlikely(node >= MAX_NUMNODES))
- node = first_node(node_online_map);
+ node = next_node_in(node, node_online_map);
__this_cpu_write(slab_reap_node, node);
}
static inline struct alien_cache **alloc_alien_cache(int node,
int limit, gfp_t gfp)
{
- return (struct alien_cache **)BAD_ALIEN_MAGIC;
+ return NULL;
}
static inline void free_alien_cache(struct alien_cache **ac_ptr)
}
#endif
+static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
+{
+ struct kmem_cache_node *n;
+
+ /*
+ * Set up the kmem_cache_node for cpu before we can
+ * begin anything. Make sure some other cpu on this
+ * node has not already allocated this
+ */
+ n = get_node(cachep, node);
+ if (n) {
+ spin_lock_irq(&n->list_lock);
+ n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
+ cachep->num;
+ spin_unlock_irq(&n->list_lock);
+
+ return 0;
+ }
+
+ n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
+ if (!n)
+ return -ENOMEM;
+
+ kmem_cache_node_init(n);
+ n->next_reap = jiffies + REAPTIMEOUT_NODE +
+ ((unsigned long)cachep) % REAPTIMEOUT_NODE;
+
+ n->free_limit =
+ (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;
+
+ /*
+ * The kmem_cache_nodes don't come and go as CPUs
+ * come and go. slab_mutex is sufficient
+ * protection here.
+ */
+ cachep->node[node] = n;
+
+ return 0;
+}
+
/*
* Allocates and initializes node for a node on each slab cache, used for
* either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
*/
static int init_cache_node_node(int node)
{
+ int ret;
struct kmem_cache *cachep;
- struct kmem_cache_node *n;
- const size_t memsize = sizeof(struct kmem_cache_node);
list_for_each_entry(cachep, &slab_caches, list) {
- /*
- * Set up the kmem_cache_node for cpu before we can
- * begin anything. Make sure some other cpu on this
- * node has not already allocated this
- */
- n = get_node(cachep, node);
- if (!n) {
- n = kmalloc_node(memsize, GFP_KERNEL, node);
- if (!n)
- return -ENOMEM;
- kmem_cache_node_init(n);
- n->next_reap = jiffies + REAPTIMEOUT_NODE +
- ((unsigned long)cachep) % REAPTIMEOUT_NODE;
-
- /*
- * The kmem_cache_nodes don't come and go as CPUs
- * come and go. slab_mutex is sufficient
- * protection here.
- */
- cachep->node[node] = n;
- }
-
- spin_lock_irq(&n->list_lock);
- n->free_limit =
- (1 + nr_cpus_node(node)) *
- cachep->batchcount + cachep->num;
- spin_unlock_irq(&n->list_lock);
+ ret = init_cache_node(cachep, node, GFP_KERNEL);
+ if (ret)
+ return ret;
}
+
return 0;
}
-static inline int slabs_tofree(struct kmem_cache *cachep,
- struct kmem_cache_node *n)
+static int setup_kmem_cache_node(struct kmem_cache *cachep,
+ int node, gfp_t gfp, bool force_change)
{
- return (n->free_objects + cachep->num - 1) / cachep->num;
+ int ret = -ENOMEM;
+ struct kmem_cache_node *n;
+ struct array_cache *old_shared = NULL;
+ struct array_cache *new_shared = NULL;
+ struct alien_cache **new_alien = NULL;
+ LIST_HEAD(list);
+
+ if (use_alien_caches) {
+ new_alien = alloc_alien_cache(node, cachep->limit, gfp);
+ if (!new_alien)
+ goto fail;
+ }
+
+ if (cachep->shared) {
+ new_shared = alloc_arraycache(node,
+ cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
+ if (!new_shared)
+ goto fail;
+ }
+
+ ret = init_cache_node(cachep, node, gfp);
+ if (ret)
+ goto fail;
+
+ n = get_node(cachep, node);
+ spin_lock_irq(&n->list_lock);
+ if (n->shared && force_change) {
+ free_block(cachep, n->shared->entry,
+ n->shared->avail, node, &list);
+ n->shared->avail = 0;
+ }
+
+ if (!n->shared || force_change) {
+ old_shared = n->shared;
+ n->shared = new_shared;
+ new_shared = NULL;
+ }
+
+ if (!n->alien) {
+ n->alien = new_alien;
+ new_alien = NULL;
+ }
+
+ spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
+
+ /*
+ * To protect lockless access to n->shared during irq disabled context.
+ * If n->shared isn't NULL in irq disabled context, accessing to it is
+ * guaranteed to be valid until irq is re-enabled, because it will be
+ * freed after synchronize_sched().
+ */
+ if (force_change)
+ synchronize_sched();
+
+fail:
+ kfree(old_shared);
+ kfree(new_shared);
+ free_alien_cache(new_alien);
+
+ return ret;
}
static void cpuup_canceled(long cpu)
n = get_node(cachep, node);
if (!n)
continue;
- drain_freelist(cachep, n, slabs_tofree(cachep, n));
+ drain_freelist(cachep, n, INT_MAX);
}
}
static int cpuup_prepare(long cpu)
{
struct kmem_cache *cachep;
- struct kmem_cache_node *n = NULL;
int node = cpu_to_mem(cpu);
int err;
* array caches
*/
list_for_each_entry(cachep, &slab_caches, list) {
- struct array_cache *shared = NULL;
- struct alien_cache **alien = NULL;
-
- if (cachep->shared) {
- shared = alloc_arraycache(node,
- cachep->shared * cachep->batchcount,
- 0xbaadf00d, GFP_KERNEL);
- if (!shared)
- goto bad;
- }
- if (use_alien_caches) {
- alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
- if (!alien) {
- kfree(shared);
- goto bad;
- }
- }
- n = get_node(cachep, node);
- BUG_ON(!n);
-
- spin_lock_irq(&n->list_lock);
- if (!n->shared) {
- /*
- * We are serialised from CPU_DEAD or
- * CPU_UP_CANCELLED by the cpucontrol lock
- */
- n->shared = shared;
- shared = NULL;
- }
-#ifdef CONFIG_NUMA
- if (!n->alien) {
- n->alien = alien;
- alien = NULL;
- }
-#endif
- spin_unlock_irq(&n->list_lock);
- kfree(shared);
- free_alien_cache(alien);
+ err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false);
+ if (err)
+ goto bad;
}
return 0;
if (!n)
continue;
- drain_freelist(cachep, n, slabs_tofree(cachep, n));
+ drain_freelist(cachep, n, INT_MAX);
if (!list_empty(&n->slabs_full) ||
!list_empty(&n->slabs_partial)) {
}
}
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+static void freelist_randomize(struct rnd_state *state, freelist_idx_t *list,
+ size_t count)
+{
+ size_t i;
+ unsigned int rand;
+
+ for (i = 0; i < count; i++)
+ list[i] = i;
+
+ /* Fisher-Yates shuffle */
+ for (i = count - 1; i > 0; i--) {
+ rand = prandom_u32_state(state);
+ rand %= (i + 1);
+ swap(list[i], list[rand]);
+ }
+}
+
+/* Create a random sequence per cache */
+static int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
+{
+ unsigned int seed, count = cachep->num;
+ struct rnd_state state;
+
+ if (count < 2)
+ return 0;
+
+ /* If it fails, we will just use the global lists */
+ cachep->random_seq = kcalloc(count, sizeof(freelist_idx_t), gfp);
+ if (!cachep->random_seq)
+ return -ENOMEM;
+
+ /* Get best entropy at this stage */
+ get_random_bytes_arch(&seed, sizeof(seed));
+ prandom_seed_state(&state, seed);
+
+ freelist_randomize(&state, cachep->random_seq, count);
+ return 0;
+}
+
+/* Destroy the per-cache random freelist sequence */
+static void cache_random_seq_destroy(struct kmem_cache *cachep)
+{
+ kfree(cachep->random_seq);
+ cachep->random_seq = NULL;
+}
+#else
+static inline int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
+{
+ return 0;
+}
+static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
+#endif /* CONFIG_SLAB_FREELIST_RANDOM */
+
+
/*
* Initialisation. Called after the page allocator have been initialised and
* before smp_init().
sizeof(struct rcu_head));
kmem_cache = &kmem_cache_boot;
- if (num_possible_nodes() == 1)
+ if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
use_alien_caches = 0;
for (i = 0; i < NUM_INIT_LISTS; i++)
/*
* Needed to avoid possible looping condition
- * in cache_grow()
+ * in cache_grow_begin()
*/
if (OFF_SLAB(freelist_cache))
continue;
cachep->freelist_size = cachep->num * sizeof(freelist_idx_t);
cachep->flags = flags;
cachep->allocflags = __GFP_COMP;
- if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
+ if (flags & SLAB_CACHE_DMA)
cachep->allocflags |= GFP_DMA;
cachep->size = size;
cachep->reciprocal_buffer_size = reciprocal_value(size);
BUG_ON(irqs_disabled());
}
+static void check_mutex_acquired(void)
+{
+ BUG_ON(!mutex_is_locked(&slab_mutex));
+}
+
static void check_spinlock_acquired(struct kmem_cache *cachep)
{
#ifdef CONFIG_SMP
#else
#define check_irq_off() do { } while(0)
#define check_irq_on() do { } while(0)
+#define check_mutex_acquired() do { } while(0)
#define check_spinlock_acquired(x) do { } while(0)
#define check_spinlock_acquired_node(x, y) do { } while(0)
#endif
-static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
- struct array_cache *ac,
- int force, int node);
+static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
+ int node, bool free_all, struct list_head *list)
+{
+ int tofree;
+
+ if (!ac || !ac->avail)
+ return;
+
+ tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
+ if (tofree > ac->avail)
+ tofree = (ac->avail + 1) / 2;
+
+ free_block(cachep, ac->entry, tofree, node, list);
+ ac->avail -= tofree;
+ memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);
+}
static void do_drain(void *arg)
{
{
struct kmem_cache_node *n;
int node;
+ LIST_HEAD(list);
on_each_cpu(do_drain, cachep, 1);
check_irq_on();
if (n->alien)
drain_alien_cache(cachep, n->alien);
- for_each_kmem_cache_node(cachep, node, n)
- drain_array(cachep, n, n->shared, 1, node);
+ for_each_kmem_cache_node(cachep, node, n) {
+ spin_lock_irq(&n->list_lock);
+ drain_array_locked(cachep, n->shared, node, true, &list);
+ spin_unlock_irq(&n->list_lock);
+
+ slabs_destroy(cachep, &list);
+ }
}
/*
check_irq_on();
for_each_kmem_cache_node(cachep, node, n) {
- drain_freelist(cachep, n, slabs_tofree(cachep, n));
+ drain_freelist(cachep, n, INT_MAX);
ret += !list_empty(&n->slabs_full) ||
!list_empty(&n->slabs_partial);
int i;
struct kmem_cache_node *n;
+ cache_random_seq_destroy(cachep);
+
free_percpu(cachep->cpu_cache);
/* NUMA: free the node structures */
#endif
}
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+/* Hold information during a freelist initialization */
+union freelist_init_state {
+ struct {
+ unsigned int pos;
+ freelist_idx_t *list;
+ unsigned int count;
+ unsigned int rand;
+ };
+ struct rnd_state rnd_state;
+};
+
+/*
+ * Initialize the state based on the randomization methode available.
+ * return true if the pre-computed list is available, false otherwize.
+ */
+static bool freelist_state_initialize(union freelist_init_state *state,
+ struct kmem_cache *cachep,
+ unsigned int count)
+{
+ bool ret;
+ unsigned int rand;
+
+ /* Use best entropy available to define a random shift */
+ get_random_bytes_arch(&rand, sizeof(rand));
+
+ /* Use a random state if the pre-computed list is not available */
+ if (!cachep->random_seq) {
+ prandom_seed_state(&state->rnd_state, rand);
+ ret = false;
+ } else {
+ state->list = cachep->random_seq;
+ state->count = count;
+ state->pos = 0;
+ state->rand = rand;
+ ret = true;
+ }
+ return ret;
+}
+
+/* Get the next entry on the list and randomize it using a random shift */
+static freelist_idx_t next_random_slot(union freelist_init_state *state)
+{
+ return (state->list[state->pos++] + state->rand) % state->count;
+}
+
+/*
+ * Shuffle the freelist initialization state based on pre-computed lists.
+ * return true if the list was successfully shuffled, false otherwise.
+ */
+static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
+{
+ unsigned int objfreelist = 0, i, count = cachep->num;
+ union freelist_init_state state;
+ bool precomputed;
+
+ if (count < 2)
+ return false;
+
+ precomputed = freelist_state_initialize(&state, cachep, count);
+
+ /* Take a random entry as the objfreelist */
+ if (OBJFREELIST_SLAB(cachep)) {
+ if (!precomputed)
+ objfreelist = count - 1;
+ else
+ objfreelist = next_random_slot(&state);
+ page->freelist = index_to_obj(cachep, page, objfreelist) +
+ obj_offset(cachep);
+ count--;
+ }
+
+ /*
+ * On early boot, generate the list dynamically.
+ * Later use a pre-computed list for speed.
+ */
+ if (!precomputed) {
+ freelist_randomize(&state.rnd_state, page->freelist, count);
+ } else {
+ for (i = 0; i < count; i++)
+ set_free_obj(page, i, next_random_slot(&state));
+ }
+
+ if (OBJFREELIST_SLAB(cachep))
+ set_free_obj(page, cachep->num - 1, objfreelist);
+
+ return true;
+}
+#else
+static inline bool shuffle_freelist(struct kmem_cache *cachep,
+ struct page *page)
+{
+ return false;
+}
+#endif /* CONFIG_SLAB_FREELIST_RANDOM */
+
static void cache_init_objs(struct kmem_cache *cachep,
struct page *page)
{
int i;
void *objp;
+ bool shuffled;
cache_init_objs_debug(cachep, page);
- if (OBJFREELIST_SLAB(cachep)) {
+ /* Try to randomize the freelist if enabled */
+ shuffled = shuffle_freelist(cachep, page);
+
+ if (!shuffled && OBJFREELIST_SLAB(cachep)) {
page->freelist = index_to_obj(cachep, page, cachep->num - 1) +
obj_offset(cachep);
}
kasan_poison_object_data(cachep, objp);
}
- set_free_obj(page, i, i);
- }
-}
-
-static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
-{
- if (CONFIG_ZONE_DMA_FLAG) {
- if (flags & GFP_DMA)
- BUG_ON(!(cachep->allocflags & GFP_DMA));
- else
- BUG_ON(cachep->allocflags & GFP_DMA);
+ if (!shuffled)
+ set_free_obj(page, i, i);
}
}
* Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
-static int cache_grow(struct kmem_cache *cachep,
- gfp_t flags, int nodeid, struct page *page)
+static struct page *cache_grow_begin(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid)
{
void *freelist;
size_t offset;
gfp_t local_flags;
+ int page_node;
struct kmem_cache_node *n;
+ struct page *page;
/*
* Be lazy and only check for valid flags here, keeping it out of the
}
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
- /* Take the node list lock to change the colour_next on this node */
check_irq_off();
- n = get_node(cachep, nodeid);
- spin_lock(&n->list_lock);
-
- /* Get colour for the slab, and cal the next value. */
- offset = n->colour_next;
- n->colour_next++;
- if (n->colour_next >= cachep->colour)
- n->colour_next = 0;
- spin_unlock(&n->list_lock);
-
- offset *= cachep->colour_off;
-
if (gfpflags_allow_blocking(local_flags))
local_irq_enable();
- /*
- * The test for missing atomic flag is performed here, rather than
- * the more obvious place, simply to reduce the critical path length
- * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
- * will eventually be caught here (where it matters).
- */
- kmem_flagcheck(cachep, flags);
-
/*
* Get mem for the objs. Attempt to allocate a physical page from
* 'nodeid'.
*/
- if (!page)
- page = kmem_getpages(cachep, local_flags, nodeid);
+ page = kmem_getpages(cachep, local_flags, nodeid);
if (!page)
goto failed;
+ page_node = page_to_nid(page);
+ n = get_node(cachep, page_node);
+
+ /* Get colour for the slab, and cal the next value. */
+ n->colour_next++;
+ if (n->colour_next >= cachep->colour)
+ n->colour_next = 0;
+
+ offset = n->colour_next;
+ if (offset >= cachep->colour)
+ offset = 0;
+
+ offset *= cachep->colour_off;
+
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
- local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
+ local_flags & ~GFP_CONSTRAINT_MASK, page_node);
if (OFF_SLAB(cachep) && !freelist)
goto opps1;
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
- check_irq_off();
- spin_lock(&n->list_lock);
- /* Make slab active. */
- list_add_tail(&page->lru, &(n->slabs_free));
- STATS_INC_GROWN(cachep);
- n->free_objects += cachep->num;
- spin_unlock(&n->list_lock);
- return 1;
+ return page;
+
opps1:
kmem_freepages(cachep, page);
failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
- return 0;
+ return NULL;
+}
+
+static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+{
+ struct kmem_cache_node *n;
+ void *list = NULL;
+
+ check_irq_off();
+
+ if (!page)
+ return;
+
+ INIT_LIST_HEAD(&page->lru);
+ n = get_node(cachep, page_to_nid(page));
+
+ spin_lock(&n->list_lock);
+ if (!page->active)
+ list_add_tail(&page->lru, &(n->slabs_free));
+ else
+ fixup_slab_list(cachep, n, page, &list);
+ STATS_INC_GROWN(cachep);
+ n->free_objects += cachep->num - page->active;
+ spin_unlock(&n->list_lock);
+
+ fixup_objfreelist_debug(cachep, &list);
}
#if DEBUG
return obj;
}
+/*
+ * Slab list should be fixed up by fixup_slab_list() for existing slab
+ * or cache_grow_end() for new slab
+ */
+static __always_inline int alloc_block(struct kmem_cache *cachep,
+ struct array_cache *ac, struct page *page, int batchcount)
+{
+ /*
+ * There must be at least one object available for
+ * allocation.
+ */
+ BUG_ON(page->active >= cachep->num);
+
+ while (page->active < cachep->num && batchcount--) {
+ STATS_INC_ALLOCED(cachep);
+ STATS_INC_ACTIVE(cachep);
+ STATS_SET_HIGH(cachep);
+
+ ac->entry[ac->avail++] = slab_get_obj(cachep, page);
+ }
+
+ return batchcount;
+}
+
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
{
int batchcount;
struct kmem_cache_node *n;
- struct array_cache *ac;
+ struct array_cache *ac, *shared;
int node;
void *list = NULL;
+ struct page *page;
check_irq_off();
node = numa_mem_id();
-retry:
ac = cpu_cache_get(cachep);
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
n = get_node(cachep, node);
BUG_ON(ac->avail > 0 || !n);
+ shared = READ_ONCE(n->shared);
+ if (!n->free_objects && (!shared || !shared->avail))
+ goto direct_grow;
+
spin_lock(&n->list_lock);
+ shared = READ_ONCE(n->shared);
/* See if we can refill from the shared array */
- if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
- n->shared->touched = 1;
+ if (shared && transfer_objects(ac, shared, batchcount)) {
+ shared->touched = 1;
goto alloc_done;
}
while (batchcount > 0) {
- struct page *page;
/* Get slab alloc is to come from. */
page = get_first_slab(n, false);
if (!page)
check_spinlock_acquired(cachep);
- /*
- * The slab was either on partial or free list so
- * there must be at least one object available for
- * allocation.
- */
- BUG_ON(page->active >= cachep->num);
-
- while (page->active < cachep->num && batchcount--) {
- STATS_INC_ALLOCED(cachep);
- STATS_INC_ACTIVE(cachep);
- STATS_SET_HIGH(cachep);
-
- ac->entry[ac->avail++] = slab_get_obj(cachep, page);
- }
-
+ batchcount = alloc_block(cachep, ac, page, batchcount);
fixup_slab_list(cachep, n, page, &list);
}
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
+direct_grow:
if (unlikely(!ac->avail)) {
- int x;
-
/* Check if we can use obj in pfmemalloc slab */
if (sk_memalloc_socks()) {
void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
return obj;
}
- x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);
+ page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
- /* cache_grow can reenable interrupts, then ac could change. */
+ /*
+ * cache_grow_begin() can reenable interrupts,
+ * then ac could change.
+ */
ac = cpu_cache_get(cachep);
- node = numa_mem_id();
+ if (!ac->avail && page)
+ alloc_block(cachep, ac, page, batchcount);
+ cache_grow_end(cachep, page);
- /* no objects in sight? abort */
- if (!x && ac->avail == 0)
+ if (!ac->avail)
return NULL;
-
- if (!ac->avail) /* objects refilled by interrupt? */
- goto retry;
}
ac->touched = 1;
gfp_t flags)
{
might_sleep_if(gfpflags_allow_blocking(flags));
-#if DEBUG
- kmem_flagcheck(cachep, flags);
-#endif
}
#if DEBUG
static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
{
struct zonelist *zonelist;
- gfp_t local_flags;
struct zoneref *z;
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags);
void *obj = NULL;
+ struct page *page;
int nid;
unsigned int cpuset_mems_cookie;
if (flags & __GFP_THISNODE)
return NULL;
- local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
-
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
zonelist = node_zonelist(mempolicy_slab_node(), flags);
* We may trigger various forms of reclaim on the allowed
* set and go into memory reserves if necessary.
*/
- struct page *page;
-
- if (gfpflags_allow_blocking(local_flags))
- local_irq_enable();
- kmem_flagcheck(cache, flags);
- page = kmem_getpages(cache, local_flags, numa_mem_id());
- if (gfpflags_allow_blocking(local_flags))
- local_irq_disable();
+ page = cache_grow_begin(cache, flags, numa_mem_id());
+ cache_grow_end(cache, page);
if (page) {
+ nid = page_to_nid(page);
+ obj = ____cache_alloc_node(cache,
+ gfp_exact_node(flags), nid);
+
/*
- * Insert into the appropriate per node queues
+ * Another processor may allocate the objects in
+ * the slab since we are not holding any locks.
*/
- nid = page_to_nid(page);
- if (cache_grow(cache, flags, nid, page)) {
- obj = ____cache_alloc_node(cache,
- gfp_exact_node(flags), nid);
- if (!obj)
- /*
- * Another processor may allocate the
- * objects in the slab since we are
- * not holding any locks.
- */
- goto retry;
- } else {
- /* cache_grow already freed obj */
- obj = NULL;
- }
+ if (!obj)
+ goto retry;
}
}
{
struct page *page;
struct kmem_cache_node *n;
- void *obj;
+ void *obj = NULL;
void *list = NULL;
- int x;
VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES);
n = get_node(cachep, nodeid);
BUG_ON(!n);
-retry:
check_irq_off();
spin_lock(&n->list_lock);
page = get_first_slab(n, false);
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
- goto done;
+ return obj;
must_grow:
spin_unlock(&n->list_lock);
- x = cache_grow(cachep, gfp_exact_node(flags), nodeid, NULL);
- if (x)
- goto retry;
-
- return fallback_alloc(cachep, flags);
+ page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
+ if (page) {
+ /* This slab isn't counted yet so don't update free_objects */
+ obj = slab_get_obj(cachep, page);
+ }
+ cache_grow_end(cachep, page);
-done:
- return obj;
+ return obj ? obj : fallback_alloc(cachep, flags);
}
static __always_inline void *
{
int i;
struct kmem_cache_node *n = get_node(cachep, node);
+ struct page *page;
+
+ n->free_objects += nr_objects;
for (i = 0; i < nr_objects; i++) {
void *objp;
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp);
STATS_DEC_ACTIVE(cachep);
- n->free_objects++;
/* fixup slab chains */
- if (page->active == 0) {
- if (n->free_objects > n->free_limit) {
- n->free_objects -= cachep->num;
- list_add_tail(&page->lru, list);
- } else {
- list_add(&page->lru, &n->slabs_free);
- }
- } else {
+ if (page->active == 0)
+ list_add(&page->lru, &n->slabs_free);
+ else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
list_add_tail(&page->lru, &n->slabs_partial);
}
}
+
+ while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
+ n->free_objects -= cachep->num;
+
+ page = list_last_entry(&n->slabs_free, struct page, lru);
+ list_del(&page->lru);
+ list_add(&page->lru, list);
+ }
}
static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
/*
* This initializes kmem_cache_node or resizes various caches for all nodes.
*/
-static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
+static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
{
+ int ret;
int node;
struct kmem_cache_node *n;
- struct array_cache *new_shared;
- struct alien_cache **new_alien = NULL;
for_each_online_node(node) {
-
- if (use_alien_caches) {
- new_alien = alloc_alien_cache(node, cachep->limit, gfp);
- if (!new_alien)
- goto fail;
- }
-
- new_shared = NULL;
- if (cachep->shared) {
- new_shared = alloc_arraycache(node,
- cachep->shared*cachep->batchcount,
- 0xbaadf00d, gfp);
- if (!new_shared) {
- free_alien_cache(new_alien);
- goto fail;
- }
- }
-
- n = get_node(cachep, node);
- if (n) {
- struct array_cache *shared = n->shared;
- LIST_HEAD(list);
-
- spin_lock_irq(&n->list_lock);
-
- if (shared)
- free_block(cachep, shared->entry,
- shared->avail, node, &list);
-
- n->shared = new_shared;
- if (!n->alien) {
- n->alien = new_alien;
- new_alien = NULL;
- }
- n->free_limit = (1 + nr_cpus_node(node)) *
- cachep->batchcount + cachep->num;
- spin_unlock_irq(&n->list_lock);
- slabs_destroy(cachep, &list);
- kfree(shared);
- free_alien_cache(new_alien);
- continue;
- }
- n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
- if (!n) {
- free_alien_cache(new_alien);
- kfree(new_shared);
+ ret = setup_kmem_cache_node(cachep, node, gfp, true);
+ if (ret)
goto fail;
- }
- kmem_cache_node_init(n);
- n->next_reap = jiffies + REAPTIMEOUT_NODE +
- ((unsigned long)cachep) % REAPTIMEOUT_NODE;
- n->shared = new_shared;
- n->alien = new_alien;
- n->free_limit = (1 + nr_cpus_node(node)) *
- cachep->batchcount + cachep->num;
- cachep->node[node] = n;
}
+
return 0;
fail:
cachep->shared = shared;
if (!prev)
- goto alloc_node;
+ goto setup_node;
for_each_online_cpu(cpu) {
LIST_HEAD(list);
}
free_percpu(prev);
-alloc_node:
- return alloc_kmem_cache_node(cachep, gfp);
+setup_node:
+ return setup_kmem_cache_nodes(cachep, gfp);
}
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
int shared = 0;
int batchcount = 0;
+ err = cache_random_seq_create(cachep, gfp);
+ if (err)
+ goto end;
+
if (!is_root_cache(cachep)) {
struct kmem_cache *root = memcg_root_cache(cachep);
limit = root->limit;
batchcount = (limit + 1) / 2;
skip_setup:
err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
+end:
if (err)
pr_err("enable_cpucache failed for %s, error %d\n",
cachep->name, -err);
* if drain_array() is used on the shared array.
*/
static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
- struct array_cache *ac, int force, int node)
+ struct array_cache *ac, int node)
{
LIST_HEAD(list);
- int tofree;
+
+ /* ac from n->shared can be freed if we don't hold the slab_mutex. */
+ check_mutex_acquired();
if (!ac || !ac->avail)
return;
- if (ac->touched && !force) {
+
+ if (ac->touched) {
ac->touched = 0;
- } else {
- spin_lock_irq(&n->list_lock);
- if (ac->avail) {
- tofree = force ? ac->avail : (ac->limit + 4) / 5;
- if (tofree > ac->avail)
- tofree = (ac->avail + 1) / 2;
- free_block(cachep, ac->entry, tofree, node, &list);
- ac->avail -= tofree;
- memmove(ac->entry, &(ac->entry[tofree]),
- sizeof(void *) * ac->avail);
- }
- spin_unlock_irq(&n->list_lock);
- slabs_destroy(cachep, &list);
+ return;
}
+
+ spin_lock_irq(&n->list_lock);
+ drain_array_locked(cachep, ac, node, false, &list);
+ spin_unlock_irq(&n->list_lock);
+
+ slabs_destroy(cachep, &list);
}
/**
reap_alien(searchp, n);
- drain_array(searchp, n, cpu_cache_get(searchp), 0, node);
+ drain_array(searchp, n, cpu_cache_get(searchp), node);
/*
* These are racy checks but it does not matter
n->next_reap = jiffies + REAPTIMEOUT_NODE;
- drain_array(searchp, n, n->shared, 0, node);
+ drain_array(searchp, n, n->shared, node);
if (n->free_touched)
n->free_touched = 0;
tmp.counters = counters_new;
/*
* page->counters can cover frozen/inuse/objects as well
- * as page->_count. If we assign to ->counters directly
- * we run the risk of losing updates to page->_count, so
+ * as page->_refcount. If we assign to ->counters directly
+ * we run the risk of losing updates to page->_refcount, so
* be careful and only assign to the fields we need.
*/
page->frozen = tmp.frozen;
* may return off node objects because partial slabs are obtained
* from other nodes and filled up.
*
- * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
- * defrag_ratio = 1000) then every (well almost) allocation will
- * first attempt to defrag slab caches on other nodes. This means
- * scanning over all nodes to look for partial slabs which may be
- * expensive if we do it every time we are trying to find a slab
+ * If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100
+ * (which makes defrag_ratio = 1000) then every (well almost)
+ * allocation will first attempt to defrag slab caches on other nodes.
+ * This means scanning over all nodes to look for partial slabs which
+ * may be expensive if we do it every time we are trying to find a slab
* with available objects.
*/
if (!s->remote_node_defrag_ratio ||
* s->cpu_partial is checked locklessly (see put_cpu_partial),
* so we have to make sure the change is visible.
*/
- kick_all_cpus_sync();
+ synchronize_sched();
}
flush_all(s);
/* May fail (-ENOMEM) if radix-tree node allocation failed. */
__SetPageLocked(new_page);
- SetPageSwapBacked(new_page);
+ __SetPageSwapBacked(new_page);
err = __add_to_swap_cache(new_page, entry);
if (likely(!err)) {
radix_tree_preload_end();
return new_page;
}
radix_tree_preload_end();
- ClearPageSwapBacked(new_page);
__ClearPageLocked(new_page);
/*
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
return __page_rmapping(page);
}
+/*
+ * Return true if this page is mapped into pagetables.
+ * For compound page it returns true if any subpage of compound page is mapped.
+ */
+bool page_mapped(struct page *page)
+{
+ int i;
+
+ if (likely(!PageCompound(page)))
+ return atomic_read(&page->_mapcount) >= 0;
+ page = compound_head(page);
+ if (atomic_read(compound_mapcount_ptr(page)) >= 0)
+ return true;
+ if (PageHuge(page))
+ return false;
+ for (i = 0; i < hpage_nr_pages(page); i++) {
+ if (atomic_read(&page[i]._mapcount) >= 0)
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL(page_mapped);
+
struct anon_vma *page_anon_vma(struct page *page)
{
unsigned long mapping;
*
* Reversing the order of the tests ensures such a situation cannot
* escape unnoticed. The smp_rmb is needed to ensure the page->flags
- * load is not satisfied before that of page->_count.
+ * load is not satisfied before that of page->_refcount.
*
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under tree_lock, then this ordering is not required.
for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&
!list_empty(src); scan++) {
struct page *page;
- int nr_pages;
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
switch (__isolate_lru_page(page, mode)) {
case 0:
- nr_pages = hpage_nr_pages(page);
- mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
+ nr_taken += hpage_nr_pages(page);
list_move(&page->lru, dst);
- nr_taken += nr_pages;
break;
case -EBUSY:
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
&nr_scanned, sc, isolate_mode, lru);
- __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
+ update_lru_size(lruvec, lru, -nr_taken);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
+ reclaim_stat->recent_scanned[file] += nr_taken;
if (global_reclaim(sc)) {
__mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
spin_lock_irq(&zone->lru_lock);
- reclaim_stat->recent_scanned[file] += nr_taken;
-
if (global_reclaim(sc)) {
if (current_is_kswapd())
__count_zone_vm_events(PGSTEAL_KSWAPD, zone,
* It is safe to rely on PG_active against the non-LRU pages in here because
* nobody will play with that bit on a non-LRU page.
*
- * The downside is that we have to touch page->_count against each page.
+ * The downside is that we have to touch page->_refcount against each page.
* But we had to alter page->flags anyway.
*/
SetPageLRU(page);
nr_pages = hpage_nr_pages(page);
- mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
+ update_lru_size(lruvec, lru, nr_pages);
list_move(&page->lru, &lruvec->lists[lru]);
pgmoved += nr_pages;
list_add(&page->lru, pages_to_free);
}
}
- __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
+
if (!is_active_lru(lru))
__count_vm_events(PGDEACTIVATE, pgmoved);
}
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, isolate_mode, lru);
- if (global_reclaim(sc))
- __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
+ update_lru_size(lruvec, lru, -nr_taken);
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
reclaim_stat->recent_scanned[file] += nr_taken;
+ if (global_reclaim(sc))
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
__count_zone_vm_events(PGREFILL, zone, nr_scanned);
- __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
- __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
+
spin_unlock_irq(&zone->lru_lock);
while (!list_empty(&l_hold)) {
#endif
#ifdef CONFIG_NUMA
-/*
- * zonelist = the list of zones passed to the allocator
- * z = the zone from which the allocation occurred.
- *
- * Must be called with interrupts disabled.
- *
- * When __GFP_OTHER_NODE is set assume the node of the preferred
- * zone is the local node. This is useful for daemons who allocate
- * memory on behalf of other processes.
- */
-void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
-{
- if (z->zone_pgdat == preferred_zone->zone_pgdat) {
- __inc_zone_state(z, NUMA_HIT);
- } else {
- __inc_zone_state(z, NUMA_MISS);
- __inc_zone_state(preferred_zone, NUMA_FOREIGN);
- }
- if (z->node == ((flags & __GFP_OTHER_NODE) ?
- preferred_zone->node : numa_node_id()))
- __inc_zone_state(z, NUMA_LOCAL);
- else
- __inc_zone_state(z, NUMA_OTHER);
-}
-
/*
* Determine the per node value of a stat item.
*/
unsigned long node_page_state(int node, enum zone_stat_item item)
{
struct zone *zones = NODE_DATA(node)->node_zones;
+ int i;
+ unsigned long count = 0;
- return
-#ifdef CONFIG_ZONE_DMA
- zone_page_state(&zones[ZONE_DMA], item) +
-#endif
-#ifdef CONFIG_ZONE_DMA32
- zone_page_state(&zones[ZONE_DMA32], item) +
-#endif
-#ifdef CONFIG_HIGHMEM
- zone_page_state(&zones[ZONE_HIGHMEM], item) +
-#endif
- zone_page_state(&zones[ZONE_NORMAL], item) +
- zone_page_state(&zones[ZONE_MOVABLE], item);
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ count += zone_page_state(zones + i, item);
+
+ return count;
}
#endif
if (!memmap_valid_within(pfn, page, zone))
continue;
+ if (page_zone(page) != zone)
+ continue;
+
mtype = get_pageblock_migratetype(page);
if (mtype < MIGRATE_TYPES)
block_end_pfn = min(block_end_pfn, end_pfn);
page = pfn_to_page(pfn);
- pageblock_mt = get_pfnblock_migratetype(page, pfn);
+ pageblock_mt = get_pageblock_migratetype(page);
for (; pfn < block_end_pfn; pfn++) {
if (!pfn_valid_within(pfn))
continue;
page = pfn_to_page(pfn);
+
+ if (page_zone(page) != zone)
+ continue;
+
if (PageBuddy(page)) {
pfn += (1UL << page_order(page)) - 1;
continue;
int sysctl_stat_interval __read_mostly = HZ;
static cpumask_var_t cpu_stat_off;
+#ifdef CONFIG_PROC_FS
+static void refresh_vm_stats(struct work_struct *work)
+{
+ refresh_cpu_vm_stats(true);
+}
+
+int vmstat_refresh(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ long val;
+ int err;
+ int i;
+
+ /*
+ * The regular update, every sysctl_stat_interval, may come later
+ * than expected: leaving a significant amount in per_cpu buckets.
+ * This is particularly misleading when checking a quantity of HUGE
+ * pages, immediately after running a test. /proc/sys/vm/stat_refresh,
+ * which can equally be echo'ed to or cat'ted from (by root),
+ * can be used to update the stats just before reading them.
+ *
+ * Oh, and since global_page_state() etc. are so careful to hide
+ * transiently negative values, report an error here if any of
+ * the stats is negative, so we know to go looking for imbalance.
+ */
+ err = schedule_on_each_cpu(refresh_vm_stats);
+ if (err)
+ return err;
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
+ val = atomic_long_read(&vm_stat[i]);
+ if (val < 0) {
+ switch (i) {
+ case NR_ALLOC_BATCH:
+ case NR_PAGES_SCANNED:
+ /*
+ * These are often seen to go negative in
+ * recent kernels, but not to go permanently
+ * negative. Whilst it would be nicer not to
+ * have exceptions, rooting them out would be
+ * another task, of rather low priority.
+ */
+ break;
+ default:
+ pr_warn("%s: %s %ld\n",
+ __func__, vmstat_text[i], val);
+ err = -EINVAL;
+ break;
+ }
+ }
+ }
+ if (err)
+ return err;
+ if (write)
+ *ppos += *lenp;
+ else
+ *lenp = 0;
+ return 0;
+}
+#endif /* CONFIG_PROC_FS */
+
static void vmstat_update(struct work_struct *w)
{
if (refresh_cpu_vm_stats(true)) {
cpu_relax();
}
- ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, PAGE_SIZE);
+ ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, 0, PAGE_SIZE);
if (unlikely(ret != ibmr->sg_len))
return ret < 0 ? ret : -EINVAL;
struct mmsghdr __user *entry;
struct compat_mmsghdr __user *compat_entry;
struct msghdr msg_sys;
- struct timespec end_time;
+ struct timespec64 end_time;
+ struct timespec64 timeout64;
if (timeout &&
poll_select_set_timeout(&end_time, timeout->tv_sec,
flags |= MSG_DONTWAIT;
if (timeout) {
- ktime_get_ts(timeout);
- *timeout = timespec_sub(end_time, *timeout);
+ ktime_get_ts64(&timeout64);
+ *timeout = timespec64_to_timespec(
+ timespec64_sub(end_time, timeout64));
if (timeout->tv_sec < 0) {
timeout->tv_sec = timeout->tv_nsec = 0;
break;
return -ENOMEM;
}
- n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+ n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
if (unlikely(n != frmr->sg_nents)) {
pr_err("RPC: %s: failed to map mr %p (%u/%u)\n",
__func__, frmr->fr_mr, n, frmr->sg_nents);
}
atomic_inc(&xprt->sc_dma_used);
- n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+ n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
if (unlikely(n != frmr->sg_nents)) {
pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
frmr->mr, n, frmr->sg_nents);
struct skb_shared_info *sh = skb_shinfo(skb);
int page_offset;
- atomic_inc(&page->_count);
+ page_ref_inc(page);
page_offset = ptr - page_address(page);
skb_add_rx_frag(skb, sh->nr_frags, page, page_offset, len, size);
}
# ---------------------------------------------------------------------------
DTC ?= $(objtree)/scripts/dtc/dtc
+# Disable noisy checks by default
+ifeq ($(KBUILD_ENABLE_EXTRA_GCC_CHECKS),)
+DTC_FLAGS += -Wno-unit_address_vs_reg
+endif
+
# Generate an assembly file to wrap the output of the device tree compiler
quiet_cmd_dt_S_dtb= DTB $@
cmd_dt_S_dtb= \
new = getsizes(sys.argv[2])
grow, shrink, add, remove, up, down = 0, 0, 0, 0, 0, 0
delta, common = [], {}
+otot, ntot = 0, 0
for a in old:
if a in new:
common[a] = 1
for name in old:
+ otot += old[name]
if name not in common:
remove += 1
down += old[name]
delta.append((-old[name], name))
for name in new:
+ ntot += new[name]
if name not in common:
add += 1
up += new[name]
print("%-40s %7s %7s %+7s" % ("function", "old", "new", "delta"))
for d, n in delta:
if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d))
+
+print("Total: Before=%d, After=%d, chg %f%%" % \
+ (otot, ntot, (ntot - otot)*100/otot))
# (c) 2014, Sasha Levin <sasha.levin@oracle.com>
#set -x
-if [[ $# != 2 ]]; then
+if [[ $# < 2 ]]; then
echo "Usage:"
- echo " $0 [vmlinux] [base path]"
+ echo " $0 [vmlinux] [base path] [modules path]"
exit 1
fi
vmlinux=$1
basepath=$2
+modpath=$3
declare -A cache
+declare -A modcache
parse_symbol() {
# The structure of symbol at this point is:
# For example:
# do_basic_setup+0x9c/0xbf
+ if [[ $module == "" ]] ; then
+ local objfile=$vmlinux
+ elif [[ "${modcache[$module]+isset}" == "isset" ]]; then
+ local objfile=${modcache[$module]}
+ else
+ [[ $modpath == "" ]] && return
+ local objfile=$(find "$modpath" -name $module.ko -print -quit)
+ [[ $objfile == "" ]] && return
+ modcache[$module]=$objfile
+ fi
+
# Remove the englobing parenthesis
symbol=${symbol#\(}
symbol=${symbol%\)}
# Use 'nm vmlinux' to figure out the base address of said symbol.
# It's actually faster to call it every time than to load it
# all into bash.
- if [[ "${cache[$name]+isset}" == "isset" ]]; then
- local base_addr=${cache[$name]}
+ if [[ "${cache[$module,$name]+isset}" == "isset" ]]; then
+ local base_addr=${cache[$module,$name]}
else
- local base_addr=$(nm "$vmlinux" | grep -i ' t ' | awk "/ $name\$/ {print \$1}" | head -n1)
- cache["$name"]="$base_addr"
+ local base_addr=$(nm "$objfile" | grep -i ' t ' | awk "/ $name\$/ {print \$1}" | head -n1)
+ cache[$module,$name]="$base_addr"
fi
# Let's start doing the math to get the exact address into the
# symbol. First, strip out the symbol total length.
local address=$(printf "%x\n" "$expr")
# Pass it to addr2line to get filename and line number
- # Could get more than one result
- if [[ "${cache[$address]+isset}" == "isset" ]]; then
- local code=${cache[$address]}
+ # Could get more than one result
+ if [[ "${cache[$module,$address]+isset}" == "isset" ]]; then
+ local code=${cache[$module,$address]}
else
- local code=$(addr2line -i -e "$vmlinux" "$address")
- cache[$address]=$code
+ local code=$(addr2line -i -e "$objfile" "$address")
+ cache[$module,$address]=$code
fi
# addr2line doesn't return a proper error code if it fails, so
fi
done
- # The symbol is the last element, process it
- symbol=${words[$last]}
+ if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then
+ module=${words[$last]}
+ module=${module#\[}
+ module=${module%\]}
+ symbol=${words[$last-1]}
+ unset words[$last-1]
+ else
+ # The symbol is the last element, process it
+ symbol=${words[$last]}
+ module=
+ fi
+
unset words[$last]
parse_symbol # modifies $symbol
# Add up the line number to the symbol
- echo "${words[@]}" "$symbol"
+ echo "${words[@]}" "$symbol $module"
}
while read line; do
handle_line "$line"
# Is it a code line?
elif [[ $line == *Code:* ]]; then
- decode_code "$line"
- else
+ decode_code "$line"
+ else
# Nothing special in this line, show it as is
echo "$line"
fi
}
NODE_ERROR(node_name_format, NULL, &node_name_chars);
+static void check_unit_address_vs_reg(struct check *c, struct node *dt,
+ struct node *node)
+{
+ const char *unitname = get_unitname(node);
+ struct property *prop = get_property(node, "reg");
+
+ if (!prop) {
+ prop = get_property(node, "ranges");
+ if (prop && !prop->val.len)
+ prop = NULL;
+ }
+
+ if (prop) {
+ if (!unitname[0])
+ FAIL(c, "Node %s has a reg or ranges property, but no unit name",
+ node->fullpath);
+ } else {
+ if (unitname[0])
+ FAIL(c, "Node %s has a unit name, but no reg property",
+ node->fullpath);
+ }
+}
+NODE_WARNING(unit_address_vs_reg, NULL);
+
static void check_property_name_chars(struct check *c, struct node *dt,
struct node *node, struct property *prop)
{
&addr_size_cells, ®_format, &ranges_format,
+ &unit_address_vs_reg,
+
&avoid_default_addr_size,
&obsolete_chosen_interrupt_controller,
if (version >= 3) {
uint32_t size_str = fdt32_to_cpu(fdt->size_dt_strings);
- if (off_str+size_str > totalsize)
+ if ((off_str+size_str < off_str) || (off_str+size_str > totalsize))
die("String table extends past total size\n");
inbuf_init(&strbuf, blob + off_str, blob + off_str + size_str);
} else {
if (version >= 17) {
size_dt = fdt32_to_cpu(fdt->size_dt_struct);
- if (off_dt+size_dt > totalsize)
+ if ((off_dt+size_dt < off_dt) || (off_dt+size_dt > totalsize))
die("Structure block extends past total size\n");
}
prop = fdt_getprop(fdt, nodeoffset, "compatible", &len);
if (!prop)
return len;
- if (fdt_stringlist_contains(prop, len, compatible))
- return 0;
- else
- return 1;
+
+ return !fdt_stringlist_contains(prop, len, compatible);
}
int fdt_node_offset_by_compatible(const void *fdt, int startoffset,
-#define DTC_VERSION "DTC 1.4.1-gb06e55c8"
+#define DTC_VERSION "DTC 1.4.1-g53bf130b"
static int all_symbols = 0;
static int absolute_percpu = 0;
static char symbol_prefix_char = '\0';
-static unsigned long long kernel_start_addr = 0;
static int base_relative = 0;
int token_profit[0x10000];
static char *special_suffixes[] = {
"_veneer", /* arm */
+ "_from_arm", /* arm */
+ "_from_thumb", /* arm */
NULL };
int i;
char *sym_name = (char *)s->sym + 1;
-
- if (s->addr < kernel_start_addr)
- return 0;
-
/* skip prefix char */
if (symbol_prefix_char && *sym_name == symbol_prefix_char)
sym_name++;
if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
p++;
symbol_prefix_char = *p;
- } else if (strncmp(argv[i], "--page-offset=", 14) == 0) {
- const char *p = &argv[i][14];
- kernel_start_addr = strtoull(p, NULL, 16);
} else if (strcmp(argv[i], "--base-relative") == 0)
base_relative = 1;
else
kallsymopt="${kallsymopt} --all-symbols"
fi
- if [ -n "${CONFIG_ARM}" ] && [ -z "${CONFIG_XIP_KERNEL}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then
- kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET"
- fi
-
if [ -n "${CONFIG_KALLSYMS_ABSOLUTE_PERCPU}" ]; then
kallsymopt="${kallsymopt} --absolute-percpu"
fi
fetaure||feature
fetaures||features
fileystem||filesystem
+fimware||firmware
finanize||finalize
findn||find
finilizes||finalizes
--- /dev/null
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_POWERPC_MAX
+#ifdef __powerpc64__
+ #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
+#else
+ #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+#endif
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_R1
+
+static const char *reg_names[] = {
+ [PERF_REG_POWERPC_R0] = "r0",
+ [PERF_REG_POWERPC_R1] = "r1",
+ [PERF_REG_POWERPC_R2] = "r2",
+ [PERF_REG_POWERPC_R3] = "r3",
+ [PERF_REG_POWERPC_R4] = "r4",
+ [PERF_REG_POWERPC_R5] = "r5",
+ [PERF_REG_POWERPC_R6] = "r6",
+ [PERF_REG_POWERPC_R7] = "r7",
+ [PERF_REG_POWERPC_R8] = "r8",
+ [PERF_REG_POWERPC_R9] = "r9",
+ [PERF_REG_POWERPC_R10] = "r10",
+ [PERF_REG_POWERPC_R11] = "r11",
+ [PERF_REG_POWERPC_R12] = "r12",
+ [PERF_REG_POWERPC_R13] = "r13",
+ [PERF_REG_POWERPC_R14] = "r14",
+ [PERF_REG_POWERPC_R15] = "r15",
+ [PERF_REG_POWERPC_R16] = "r16",
+ [PERF_REG_POWERPC_R17] = "r17",
+ [PERF_REG_POWERPC_R18] = "r18",
+ [PERF_REG_POWERPC_R19] = "r19",
+ [PERF_REG_POWERPC_R20] = "r20",
+ [PERF_REG_POWERPC_R21] = "r21",
+ [PERF_REG_POWERPC_R22] = "r22",
+ [PERF_REG_POWERPC_R23] = "r23",
+ [PERF_REG_POWERPC_R24] = "r24",
+ [PERF_REG_POWERPC_R25] = "r25",
+ [PERF_REG_POWERPC_R26] = "r26",
+ [PERF_REG_POWERPC_R27] = "r27",
+ [PERF_REG_POWERPC_R28] = "r28",
+ [PERF_REG_POWERPC_R29] = "r29",
+ [PERF_REG_POWERPC_R30] = "r30",
+ [PERF_REG_POWERPC_R31] = "r31",
+ [PERF_REG_POWERPC_NIP] = "nip",
+ [PERF_REG_POWERPC_MSR] = "msr",
+ [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+ [PERF_REG_POWERPC_CTR] = "ctr",
+ [PERF_REG_POWERPC_LINK] = "link",
+ [PERF_REG_POWERPC_XER] = "xer",
+ [PERF_REG_POWERPC_CCR] = "ccr",
+ [PERF_REG_POWERPC_SOFTE] = "softe",
+ [PERF_REG_POWERPC_TRAP] = "trap",
+ [PERF_REG_POWERPC_DAR] = "dar",
+ [PERF_REG_POWERPC_DSISR] = "dsisr"
+};
+
+static inline const char *perf_reg_name(int id)
+{
+ return reg_names[id];
+}
+#endif /* ARCH_PERF_REGS_H */
libperf-y += header.o
libperf-y += sym-handling.o
libperf-y += kvm-stat.o
+libperf-y += perf_regs.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
+libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
--- /dev/null
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG(r0, PERF_REG_POWERPC_R0),
+ SMPL_REG(r1, PERF_REG_POWERPC_R1),
+ SMPL_REG(r2, PERF_REG_POWERPC_R2),
+ SMPL_REG(r3, PERF_REG_POWERPC_R3),
+ SMPL_REG(r4, PERF_REG_POWERPC_R4),
+ SMPL_REG(r5, PERF_REG_POWERPC_R5),
+ SMPL_REG(r6, PERF_REG_POWERPC_R6),
+ SMPL_REG(r7, PERF_REG_POWERPC_R7),
+ SMPL_REG(r8, PERF_REG_POWERPC_R8),
+ SMPL_REG(r9, PERF_REG_POWERPC_R9),
+ SMPL_REG(r10, PERF_REG_POWERPC_R10),
+ SMPL_REG(r11, PERF_REG_POWERPC_R11),
+ SMPL_REG(r12, PERF_REG_POWERPC_R12),
+ SMPL_REG(r13, PERF_REG_POWERPC_R13),
+ SMPL_REG(r14, PERF_REG_POWERPC_R14),
+ SMPL_REG(r15, PERF_REG_POWERPC_R15),
+ SMPL_REG(r16, PERF_REG_POWERPC_R16),
+ SMPL_REG(r17, PERF_REG_POWERPC_R17),
+ SMPL_REG(r18, PERF_REG_POWERPC_R18),
+ SMPL_REG(r19, PERF_REG_POWERPC_R19),
+ SMPL_REG(r20, PERF_REG_POWERPC_R20),
+ SMPL_REG(r21, PERF_REG_POWERPC_R21),
+ SMPL_REG(r22, PERF_REG_POWERPC_R22),
+ SMPL_REG(r23, PERF_REG_POWERPC_R23),
+ SMPL_REG(r24, PERF_REG_POWERPC_R24),
+ SMPL_REG(r25, PERF_REG_POWERPC_R25),
+ SMPL_REG(r26, PERF_REG_POWERPC_R26),
+ SMPL_REG(r27, PERF_REG_POWERPC_R27),
+ SMPL_REG(r28, PERF_REG_POWERPC_R28),
+ SMPL_REG(r29, PERF_REG_POWERPC_R29),
+ SMPL_REG(r30, PERF_REG_POWERPC_R30),
+ SMPL_REG(r31, PERF_REG_POWERPC_R31),
+ SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+ SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+ SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+ SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+ SMPL_REG(link, PERF_REG_POWERPC_LINK),
+ SMPL_REG(xer, PERF_REG_POWERPC_XER),
+ SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+ SMPL_REG(softe, PERF_REG_POWERPC_SOFTE),
+ SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+ SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+ SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+ SMPL_REG_END
+};
--- /dev/null
+/*
+ * Copyright 2016 Chandan Kumar, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <errno.h>
+#include <libunwind.h>
+#include <asm/perf_regs.h>
+#include "../../util/unwind.h"
+#include "../../util/debug.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_PPC64_R0:
+ return PERF_REG_POWERPC_R0;
+ case UNW_PPC64_R1:
+ return PERF_REG_POWERPC_R1;
+ case UNW_PPC64_R2:
+ return PERF_REG_POWERPC_R2;
+ case UNW_PPC64_R3:
+ return PERF_REG_POWERPC_R3;
+ case UNW_PPC64_R4:
+ return PERF_REG_POWERPC_R4;
+ case UNW_PPC64_R5:
+ return PERF_REG_POWERPC_R5;
+ case UNW_PPC64_R6:
+ return PERF_REG_POWERPC_R6;
+ case UNW_PPC64_R7:
+ return PERF_REG_POWERPC_R7;
+ case UNW_PPC64_R8:
+ return PERF_REG_POWERPC_R8;
+ case UNW_PPC64_R9:
+ return PERF_REG_POWERPC_R9;
+ case UNW_PPC64_R10:
+ return PERF_REG_POWERPC_R10;
+ case UNW_PPC64_R11:
+ return PERF_REG_POWERPC_R11;
+ case UNW_PPC64_R12:
+ return PERF_REG_POWERPC_R12;
+ case UNW_PPC64_R13:
+ return PERF_REG_POWERPC_R13;
+ case UNW_PPC64_R14:
+ return PERF_REG_POWERPC_R14;
+ case UNW_PPC64_R15:
+ return PERF_REG_POWERPC_R15;
+ case UNW_PPC64_R16:
+ return PERF_REG_POWERPC_R16;
+ case UNW_PPC64_R17:
+ return PERF_REG_POWERPC_R17;
+ case UNW_PPC64_R18:
+ return PERF_REG_POWERPC_R18;
+ case UNW_PPC64_R19:
+ return PERF_REG_POWERPC_R19;
+ case UNW_PPC64_R20:
+ return PERF_REG_POWERPC_R20;
+ case UNW_PPC64_R21:
+ return PERF_REG_POWERPC_R21;
+ case UNW_PPC64_R22:
+ return PERF_REG_POWERPC_R22;
+ case UNW_PPC64_R23:
+ return PERF_REG_POWERPC_R23;
+ case UNW_PPC64_R24:
+ return PERF_REG_POWERPC_R24;
+ case UNW_PPC64_R25:
+ return PERF_REG_POWERPC_R25;
+ case UNW_PPC64_R26:
+ return PERF_REG_POWERPC_R26;
+ case UNW_PPC64_R27:
+ return PERF_REG_POWERPC_R27;
+ case UNW_PPC64_R28:
+ return PERF_REG_POWERPC_R28;
+ case UNW_PPC64_R29:
+ return PERF_REG_POWERPC_R29;
+ case UNW_PPC64_R30:
+ return PERF_REG_POWERPC_R30;
+ case UNW_PPC64_R31:
+ return PERF_REG_POWERPC_R31;
+ case UNW_PPC64_LR:
+ return PERF_REG_POWERPC_LINK;
+ case UNW_PPC64_CTR:
+ return PERF_REG_POWERPC_CTR;
+ case UNW_PPC64_XER:
+ return PERF_REG_POWERPC_XER;
+ case UNW_PPC64_NIP:
+ return PERF_REG_POWERPC_NIP;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+ return -EINVAL;
+}
NO_PERF_REGS := 1
+# Additional ARCH settings for ppc
+ifeq ($(ARCH),powerpc)
+ NO_PERF_REGS := 0
+ LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
+endif
+
# Additional ARCH settings for x86
ifeq ($(ARCH),x86)
$(call detected,CONFIG_X86)
int i, idx = 0;
u64 mask = regs->mask;
- if (regs->cache_mask & (1 << id))
+ if (regs->cache_mask & (1ULL << id))
goto out;
- if (!(mask & (1 << id)))
+ if (!(mask & (1ULL << id)))
return -EINVAL;
for (i = 0; i < id; i++) {
- if (mask & (1 << i))
+ if (mask & (1ULL << i))
idx++;
}
- regs->cache_mask |= (1 << id);
+ regs->cache_mask |= (1ULL << id);
regs->cache_regs[id] = regs->regs[idx];
out:
SUB_DIRS = benchmarks \
copyloops \
+ context_switch \
dscr \
mm \
pmu \
--- /dev/null
+TEST_PROGS := cp_abort
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c ../utils.c
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TEST_PROGS)
--- /dev/null
+/*
+ * Adapted from Anton Blanchard's context switch microbenchmark.
+ *
+ * Copyright 2009, Anton Blanchard, IBM Corporation.
+ * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This program tests the copy paste abort functionality of a P9
+ * (or later) by setting up two processes on the same CPU, one
+ * which executes the copy instruction and the other which
+ * executes paste.
+ *
+ * The paste instruction should never succeed, as the cp_abort
+ * instruction is called by the kernel during a context switch.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include "utils.h"
+#include <sched.h>
+
+#define READ_FD 0
+#define WRITE_FD 1
+
+#define NUM_LOOPS 1000
+
+/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */
+#define PASTE(RA, RB, L, RC) \
+ .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31))
+
+int paste(void *i)
+{
+ int cr;
+
+ asm volatile(str(PASTE(0, %1, 1, 1))";"
+ "mfcr %0;"
+ : "=r" (cr)
+ : "b" (i)
+ : "memory"
+ );
+ return cr;
+}
+
+/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */
+#define COPY(RA, RB, L) \
+ .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10))
+
+void copy(void *i)
+{
+ asm volatile(str(COPY(0, %0, 1))";"
+ :
+ : "b" (i)
+ : "memory"
+ );
+}
+
+int test_cp_abort(void)
+{
+ /* 128 bytes for a full cache line */
+ char buf[128] __cacheline_aligned;
+ cpu_set_t cpuset;
+ int fd1[2], fd2[2], pid;
+ char c;
+
+ /* only run this test on a P9 or later */
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+
+ /*
+ * Run both processes on the same CPU, so that copy is more likely
+ * to leak into a paste.
+ */
+ CPU_ZERO(&cpuset);
+ CPU_SET(pick_online_cpu(), &cpuset);
+ FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset));
+
+ FAIL_IF(pipe(fd1) || pipe(fd2));
+
+ pid = fork();
+ FAIL_IF(pid < 0);
+
+ if (!pid) {
+ for (int i = 0; i < NUM_LOOPS; i++) {
+ FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1);
+ FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1);
+ /* A paste succeeds if CR0 EQ bit is set */
+ FAIL_IF(paste(buf) & 0x20000000);
+ }
+ } else {
+ for (int i = 0; i < NUM_LOOPS; i++) {
+ FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1);
+ copy(buf);
+ FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1));
+ }
+ }
+ return 0;
+
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_cp_abort, "cp_abort");
+}
want_fault |= (subpage == ((page + 1) % 16));
if (faulted != want_fault) {
- printf("Failed at 0x%p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n",
+ printf("Failed at %p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n",
addr, page, subpage, write,
want_fault ? "fault" : "pass",
faulted ? "fault" : "pass");
if (faulted) {
if (dar != addr) {
- printf("Fault expected at 0x%p and happened at 0x%p !\n",
+ printf("Fault expected at %p and happened at %p !\n",
addr, dar);
}
faulted = 0;
mallocblock = (void *)align;
- printf("allocated malloc block of 0x%lx bytes at 0x%p\n",
+ printf("allocated malloc block of 0x%lx bytes at %p\n",
mallocsize, mallocblock);
printf("testing malloc block...\n");
perror("failed to map file");
return 1;
}
- printf("allocated %s for 0x%lx bytes at 0x%p\n",
+ printf("allocated %s for 0x%lx bytes at %p\n",
file_name, filesize, fileblock);
printf("testing file map...\n");
int main(int argc, char *argv[])
{
- test_harness(test_anon, "subpage_prot_anon");
+ int rc;
+
+ rc = test_harness(test_anon, "subpage_prot_anon");
+ if (rc)
+ return rc;
if (argc > 1)
file_name = argv[1];
else
file_name = "tempfile";
- test_harness(test_file, "subpage_prot_file");
-
- return 0;
+ return test_harness(test_file, "subpage_prot_file");
}
#include <sys/ioctl.h>
#include "trace.h"
-#include "reg.h"
#include "ebb.h"
+++ /dev/null
-/*
- * Copyright 2014, Michael Ellerman, IBM Corp.
- * Licensed under GPLv2.
- */
-
-#ifndef _SELFTESTS_POWERPC_REG_H
-#define _SELFTESTS_POWERPC_REG_H
-
-#define __stringify_1(x) #x
-#define __stringify(x) __stringify_1(x)
-
-#define mfspr(rn) ({unsigned long rval; \
- asm volatile("mfspr %0," __stringify(rn) \
- : "=r" (rval)); rval; })
-#define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \
- : "r" ((unsigned long)(v)) \
- : "memory")
-
-#define mb() asm volatile("sync" : : : "memory");
-
-#define SPRN_MMCR2 769
-#define SPRN_MMCRA 770
-#define SPRN_MMCR0 779
-#define MMCR0_PMAO 0x00000080
-#define MMCR0_PMAE 0x04000000
-#define MMCR0_FC 0x80000000
-#define SPRN_EBBHR 804
-#define SPRN_EBBRR 805
-#define SPRN_BESCR 806 /* Branch event status & control register */
-#define SPRN_BESCRS 800 /* Branch event status & control set (1 bits set to 1) */
-#define SPRN_BESCRSU 801 /* Branch event status & control set upper */
-#define SPRN_BESCRR 802 /* Branch event status & control REset (1 bits set to 0) */
-#define SPRN_BESCRRU 803 /* Branch event status & control REset upper */
-
-#define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */
-#define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */
-
-#define SPRN_PMC1 771
-#define SPRN_PMC2 772
-#define SPRN_PMC3 773
-#define SPRN_PMC4 774
-#define SPRN_PMC5 775
-#define SPRN_PMC6 776
-
-#define SPRN_SIAR 780
-#define SPRN_SDAR 781
-#define SPRN_SIER 768
-
-#endif /* _SELFTESTS_POWERPC_REG_H */
#include <stdlib.h>
#include "ebb.h"
-#include "reg.h"
/*
--- /dev/null
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_REG_H
+#define _SELFTESTS_POWERPC_REG_H
+
+#define __stringify_1(x) #x
+#define __stringify(x) __stringify_1(x)
+
+#define mfspr(rn) ({unsigned long rval; \
+ asm volatile("mfspr %0," _str(rn) \
+ : "=r" (rval)); rval; })
+#define mtspr(rn, v) asm volatile("mtspr " _str(rn) ",%0" : \
+ : "r" ((unsigned long)(v)) \
+ : "memory")
+
+#define mb() asm volatile("sync" : : : "memory");
+
+#define SPRN_MMCR2 769
+#define SPRN_MMCRA 770
+#define SPRN_MMCR0 779
+#define MMCR0_PMAO 0x00000080
+#define MMCR0_PMAE 0x04000000
+#define MMCR0_FC 0x80000000
+#define SPRN_EBBHR 804
+#define SPRN_EBBRR 805
+#define SPRN_BESCR 806 /* Branch event status & control register */
+#define SPRN_BESCRS 800 /* Branch event status & control set (1 bits set to 1) */
+#define SPRN_BESCRSU 801 /* Branch event status & control set upper */
+#define SPRN_BESCRR 802 /* Branch event status & control REset (1 bits set to 0) */
+#define SPRN_BESCRRU 803 /* Branch event status & control REset upper */
+
+#define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */
+#define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */
+
+#define SPRN_PMC1 771
+#define SPRN_PMC2 772
+#define SPRN_PMC3 773
+#define SPRN_PMC4 774
+#define SPRN_PMC5 775
+#define SPRN_PMC6 776
+
+#define SPRN_SIAR 780
+#define SPRN_SDAR 781
+#define SPRN_SIER 768
+
+#define SPRN_TEXASR 0x82
+#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
+#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
+#define TEXASR_FS 0x08000000
+#define SPRN_TAR 0x32f
+
+#endif /* _SELFTESTS_POWERPC_REG_H */
tm-signal-msr-resv
tm-signal-stack
tm-vmxcopy
+tm-fork
+tm-tar
+tm-tmspr
-TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy
+TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar tm-tmspr
all: $(TEST_PROGS)
tm-syscall: tm-syscall-asm.S
tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include
+tm-tmspr: CFLAGS += -pthread
include ../../lib.mk
--- /dev/null
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Edited: Rashmica Gupta, Nov 2015
+ *
+ * This test does a fork syscall inside a transaction. Basic sniff test
+ * to see if we can enter the kernel during a transaction.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int test_fork(void)
+{
+ SKIP_IF(!have_htm());
+
+ asm __volatile__(
+ "tbegin.;"
+ "blt 1f; "
+ "li 0, 2;" /* fork syscall */
+ "sc ;"
+ "tend.;"
+ "1: ;"
+ : : : "memory", "r0");
+ /* If we reach here, we've passed. Otherwise we've probably crashed
+ * the kernel */
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_fork, "tm_fork");
+}
#include "utils.h"
#include "tm.h"
-#define TBEGIN ".long 0x7C00051D ;"
-#define TEND ".long 0x7C00055D ;"
-#define TCHECK ".long 0x7C00059C ;"
-#define TSUSPEND ".long 0x7C0005DD ;"
-#define TRESUME ".long 0x7C2005DD ;"
-#define SPRN_TEXASR 0x82
#define SPRN_DSCR 0x03
int test_body(void)
"mtspr %[sprn_dscr], 3;"
/* start and suspend a transaction */
- TBEGIN
+ "tbegin.;"
"beq 1f;"
- TSUSPEND
+ "tsuspend.;"
/* hard loop until the transaction becomes doomed */
"2: ;"
- TCHECK
+ "tcheck 0;"
"bc 4, 0, 2b;"
/* record DSCR and TEXASR */
"mfspr 3, %[sprn_texasr];"
"std 3, %[texasr];"
- TRESUME
- TEND
+ "tresume.;"
+ "tend.;"
"li %[rv], 0;"
"1: ;"
: [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr)
exit(1);
asm volatile("li 1, 0 ;" /* stack ptr == NULL */
"1:"
- ".long 0x7C00051D ;" /* tbegin */
+ "tbegin.;"
"beq 1b ;" /* retry forever */
- ".long 0x7C0005DD ; ;" /* tsuspend */
+ "tsuspend.;"
"ld 2, 0(1) ;" /* trigger segv" */
: : : "memory");
--- /dev/null
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ * Original: Michael Neuling 19/7/2013
+ * Edited: Rashmica Gupta 01/12/2015
+ *
+ * Do some transactions, see if the tar is corrupted.
+ * If the transaction is aborted, the TAR should be rolled back to the
+ * checkpointed value before the transaction began. The value written to
+ * TAR in suspended mode should only remain in TAR if the transaction
+ * completes.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int num_loops = 10000;
+
+int test_tar(void)
+{
+ int i;
+
+ SKIP_IF(!have_htm());
+
+ for (i = 0; i < num_loops; i++)
+ {
+ uint64_t result = 0;
+ asm __volatile__(
+ "li 7, 1;"
+ "mtspr %[tar], 7;" /* tar = 1 */
+ "tbegin.;"
+ "beq 3f;"
+ "li 4, 0x7000;" /* Loop lots, to use time */
+ "2:;" /* Start loop */
+ "li 7, 2;"
+ "mtspr %[tar], 7;" /* tar = 2 */
+ "tsuspend.;"
+ "li 7, 3;"
+ "mtspr %[tar], 7;" /* tar = 3 */
+ "tresume.;"
+ "subi 4, 4, 1;"
+ "cmpdi 4, 0;"
+ "bne 2b;"
+ "tend.;"
+
+ /* Transaction sucess! TAR should be 3 */
+ "mfspr 7, %[tar];"
+ "ori %[res], 7, 4;" // res = 3|4 = 7
+ "b 4f;"
+
+ /* Abort handler. TAR should be rolled back to 1 */
+ "3:;"
+ "mfspr 7, %[tar];"
+ "ori %[res], 7, 8;" // res = 1|8 = 9
+ "4:;"
+
+ : [res]"=r"(result)
+ : [tar]"i"(SPRN_TAR)
+ : "memory", "r0", "r4", "r7");
+
+ /* If result is anything else other than 7 or 9, the tar
+ * value must have been corrupted. */
+ if ((result != 7) && (result != 9))
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ /* A low number of iterations (eg 100) can cause a false pass */
+ if (argc > 1) {
+ if (strcmp(argv[1], "-h") == 0) {
+ printf("Syntax:\n\t%s [<num loops>]\n",
+ argv[0]);
+ return 1;
+ } else {
+ num_loops = atoi(argv[1]);
+ }
+ }
+
+ printf("Starting, %d loops\n", num_loops);
+
+ return test_harness(test_tar, "tm_tar");
+}
--- /dev/null
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Original: Michael Neuling 3/4/2014
+ * Modified: Rashmica Gupta 8/12/2015
+ *
+ * Check if any of the Transaction Memory SPRs get corrupted.
+ * - TFIAR - stores address of location of transaction failure
+ * - TFHAR - stores address of software failure handler (if transaction
+ * fails)
+ * - TEXASR - lots of info about the transacion(s)
+ *
+ * (1) create more threads than cpus
+ * (2) in each thread:
+ * (a) set TFIAR and TFHAR a unique value
+ * (b) loop for awhile, continually checking to see if
+ * either register has been corrupted.
+ *
+ * (3) Loop:
+ * (a) begin transaction
+ * (b) abort transaction
+ * (c) check TEXASR to see if FS has been corrupted
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int num_loops = 10000;
+int passed = 1;
+
+void tfiar_tfhar(void *in)
+{
+ int i, cpu;
+ unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ cpu = (unsigned long)in >> 1;
+ CPU_SET(cpu, &cpuset);
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
+
+ /* TFIAR: Last bit has to be high so userspace can read register */
+ tfiar = ((unsigned long)in) + 1;
+ tfiar += 2;
+ mtspr(SPRN_TFIAR, tfiar);
+
+ /* TFHAR: Last two bits are reserved */
+ tfhar = ((unsigned long)in);
+ tfhar &= ~0x3UL;
+ tfhar += 4;
+ mtspr(SPRN_TFHAR, tfhar);
+
+ for (i = 0; i < num_loops; i++) {
+ tfhar_rd = mfspr(SPRN_TFHAR);
+ tfiar_rd = mfspr(SPRN_TFIAR);
+ if ( (tfhar != tfhar_rd) || (tfiar != tfiar_rd) ) {
+ passed = 0;
+ return;
+ }
+ }
+ return;
+}
+
+void texasr(void *in)
+{
+ unsigned long i;
+ uint64_t result = 0;
+
+ for (i = 0; i < num_loops; i++) {
+ asm __volatile__(
+ "tbegin.;"
+ "beq 3f ;"
+ "tabort. 0 ;"
+ "tend.;"
+
+ /* Abort handler */
+ "3: ;"
+ ::: "memory");
+
+ /* Check the TEXASR */
+ result = mfspr(SPRN_TEXASR);
+ if ((result & TEXASR_FS) == 0) {
+ passed = 0;
+ return;
+ }
+ }
+ return;
+}
+
+int test_tmspr()
+{
+ pthread_t thread;
+ int thread_num;
+ unsigned long i;
+
+ SKIP_IF(!have_htm());
+
+ /* To cause some context switching */
+ thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
+
+ /* Test TFIAR and TFHAR */
+ for (i = 0 ; i < thread_num ; i += 2){
+ if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i))
+ return EXIT_FAILURE;
+ }
+ if (pthread_join(thread, NULL) != 0)
+ return EXIT_FAILURE;
+
+ /* Test TEXASR */
+ for (i = 0 ; i < thread_num ; i++){
+ if (pthread_create(&thread, NULL, (void*)texasr, (void *)i))
+ return EXIT_FAILURE;
+ }
+ if (pthread_join(thread, NULL) != 0)
+ return EXIT_FAILURE;
+
+ if (passed)
+ return 0;
+ else
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc > 1) {
+ if (strcmp(argv[1], "-h") == 0) {
+ printf("Syntax:\t [<num loops>]\n");
+ return 0;
+ } else {
+ num_loops = atoi(argv[1]);
+ }
+ }
+ return test_harness(test_tmspr, "tm_tmspr");
+}
#ifndef _SELFTESTS_POWERPC_UTILS_H
#define _SELFTESTS_POWERPC_UTILS_H
+#define __cacheline_aligned __attribute__((aligned(128)))
+
#include <stdint.h>
#include <stdbool.h>
#include <linux/auxvec.h>
+#include "reg.h"
/* Avoid headaches with PRI?64 - just use %ll? always */
typedef unsigned long long u64;
#define _str(s) #s
#define str(s) _str(s)
+/* POWER9 feature */
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00 0x00800000
+#endif
+
#endif /* _SELFTESTS_POWERPC_UTILS_H */