Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
Mark Brown <broonie@sirena.org.uk>
+Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
Matthieu CASTET <castet.matthieu@free.fr>
Description: information about CPUs heterogeneity.
cpu_capacity: capacity of cpu#.
+
+What: /sys/devices/system/cpu/vulnerabilities
+ /sys/devices/system/cpu/vulnerabilities/meltdown
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date: January 2018
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Information about CPU vulnerabilities
+
+ The files are named after the code names of CPU
+ vulnerabilities. The output of those files reflects the
+ state of the CPUs in the system. Possible output values:
+
+ "Not affected" CPU is not affected by the vulnerability
+ "Vulnerable" CPU is affected and no mitigation in effect
+ "Mitigation: $M" CPU is affected and mitigation $M is in effect
acpi_sleep= [HW,ACPI] Sleep options
Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
- old_ordering, nonvs, sci_force_enable }
+ old_ordering, nonvs, sci_force_enable, nobl }
See Documentation/power/video.txt for information on
s3_bios and s3_mode.
s3_beep is for debugging; it makes the PC's speaker beep
sci_force_enable causes the kernel to set SCI_EN directly
on resume from S1/S3 (which is against the ACPI spec,
but some broken systems don't work without it).
+ nobl causes the internal blacklist of systems known to
+ behave incorrectly in some ways with respect to system
+ suspend and resume to be ignored (use wisely).
acpi_use_timer_override [HW,ACPI]
Use timer override. For some broken Nvidia NF5 boards
It will be ignored when crashkernel=X,high is not used
or memory reserved is below 4G.
- crossrelease_fullstack
- [KNL] Allow to record full stack trace in cross-release
-
cryptomgr.notests
[KNL] Disable crypto self-tests
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
+ nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
+ (indirect branch prediction) vulnerability. System may
+ allow data leaks with this option, which is equivalent
+ to spectre_v2=off.
+
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
steal time is computed, but won't influence scheduler
behaviour
- nopti [X86-64] Disable kernel page table isolation
-
nolapic [X86-32,APIC] Do not enable or use the local APIC.
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
pcie_scan_all Scan all possible PCIe devices. Otherwise we
only look for one device below a PCIe downstream
port.
+ big_root_window Try to add a big 64bit memory window to the PCIe
+ root complex on AMD CPUs. Some GFX hardware
+ can resize a BAR to allow access to all VRAM.
+ Adding the window is slightly risky (it may
+ conflict with unreported devices), so this
+ taints the kernel.
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
Management.
pt. [PARIDE]
See Documentation/blockdev/paride.txt.
- pti= [X86_64]
- Control user/kernel address space isolation:
- on - enable
- off - disable
- auto - default setting
+ pti= [X86_64] Control Page Table Isolation of user and
+ kernel address spaces. Disabling this feature
+ removes hardening, but improves performance of
+ system calls and interrupts.
+
+ on - unconditionally enable
+ off - unconditionally disable
+ auto - kernel detects whether your CPU model is
+ vulnerable to issues that PTI mitigates
+
+ Not specifying this option is equivalent to pti=auto.
+
+ nopti [X86_64]
+ Equivalent to pti=off
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/laptops/sonypi.txt
+ spectre_v2= [X86] Control mitigation of Spectre variant 2
+ (indirect branch speculation) vulnerability.
+
+ on - unconditionally enable
+ off - unconditionally disable
+ auto - kernel detects whether your CPU model is
+ vulnerable
+
+ Selecting 'on' will, and 'auto' may, choose a
+ mitigation method at run time according to the
+ CPU, the available microcode, the setting of the
+ CONFIG_RETPOLINE configuration option, and the
+ compiler with which the kernel was built.
+
+ Specific mitigations can also be selected manually:
+
+ retpoline - replace indirect branches
+ retpoline,generic - google's original retpoline
+ retpoline,amd - AMD-specific minimal thunk
+
+ Not specifying this option is equivalent to
+ spectre_v2=auto.
+
spia_io_base= [HW,MTD]
spia_fio_base=
spia_pedr=
Example:
compatible = "marvell,armada-3720-db", "marvell,armada3720", "marvell,armada3710";
+
+
+Power management
+----------------
+
+For power management (particularly DVFS and AVS), the North Bridge
+Power Management component is needed:
+
+Required properties:
+- compatible : should contain "marvell,armada-3700-nb-pm", "syscon";
+- reg : the register start and length for the North Bridge
+ Power Management
+
+Example:
+
+nb_pm: syscon@14000 {
+ compatible = "marvell,armada-3700-nb-pm", "syscon";
+ reg = <0x14000 0x60>;
+}
--- /dev/null
+Texas Instruments OMAP compatible OPP supply description
+
+OMAP5, DRA7, and AM57 family of SoCs have Class0 AVS eFuse registers which
+contain data that can be used to adjust voltages programmed for some of their
+supplies for more efficient operation. This binding provides the information
+needed to read these values and use them to program the main regulator during
+an OPP transitions.
+
+Also, some supplies may have an associated vbb-supply which is an Adaptive Body
+Bias regulator which much be transitioned in a specific sequence with regards
+to the vdd-supply and clk when making an OPP transition. By supplying two
+regulators to the device that will undergo OPP transitions we can make use
+of the multi regulator binding that is part of the OPP core described here [1]
+to describe both regulators needed by the platform.
+
+[1] Documentation/devicetree/bindings/opp/opp.txt
+
+Required Properties for Device Node:
+- vdd-supply: phandle to regulator controlling VDD supply
+- vbb-supply: phandle to regulator controlling Body Bias supply
+ (Usually Adaptive Body Bias regulator)
+
+Required Properties for opp-supply node:
+- compatible: Should be one of:
+ "ti,omap-opp-supply" - basic OPP supply controlling VDD and VBB
+ "ti,omap5-opp-supply" - OMAP5+ optimized voltages in efuse(class0)VDD
+ along with VBB
+ "ti,omap5-core-opp-supply" - OMAP5+ optimized voltages in efuse(class0) VDD
+ but no VBB.
+- reg: Address and length of the efuse register set for the device (mandatory
+ only for "ti,omap5-opp-supply")
+- ti,efuse-settings: An array of u32 tuple items providing information about
+ optimized efuse configuration. Each item consists of the following:
+ volt: voltage in uV - reference voltage (OPP voltage)
+ efuse_offseet: efuse offset from reg where the optimized voltage is stored.
+- ti,absolute-max-voltage-uv: absolute maximum voltage for the OPP supply.
+
+Example:
+
+/* Device Node (CPU) */
+cpus {
+ cpu0: cpu@0 {
+ device_type = "cpu";
+
+ ...
+
+ vdd-supply = <&vcc>;
+ vbb-supply = <&abb_mpu>;
+ };
+};
+
+/* OMAP OPP Supply with Class0 registers */
+opp_supply_mpu: opp_supply@4a003b20 {
+ compatible = "ti,omap5-opp-supply";
+ reg = <0x4a003b20 0x8>;
+ ti,efuse-settings = <
+ /* uV offset */
+ 1060000 0x0
+ 1160000 0x4
+ 1210000 0x8
+ >;
+ ti,absolute-max-voltage-uv = <1500000>;
+};
runtime suspend at the beginning of the ``suspend_late`` phase of system-wide
suspend (or in the ``poweroff_late`` phase of hibernation), when runtime PM
has been disabled for it, under the assumption that its state should not change
-after that point until the system-wide transition is over. If that happens, the
-driver's system-wide resume callbacks, if present, may still be invoked during
-the subsequent system-wide resume transition and the device's runtime power
-management status may be set to "active" before enabling runtime PM for it,
-so the driver must be prepared to cope with the invocation of its system-wide
-resume callbacks back-to-back with its ``->runtime_suspend`` one (without the
-intervening ``->runtime_resume`` and so on) and the final state of the device
-must reflect the "active" status for runtime PM in that case.
+after that point until the system-wide transition is over (the PM core itself
+does that for devices whose "noirq", "late" and "early" system-wide PM callbacks
+are executed directly by it). If that happens, the driver's system-wide resume
+callbacks, if present, may still be invoked during the subsequent system-wide
+resume transition and the device's runtime power management status may be set
+to "active" before enabling runtime PM for it, so the driver must be prepared to
+cope with the invocation of its system-wide resume callbacks back-to-back with
+its ``->runtime_suspend`` one (without the intervening ``->runtime_resume`` and
+so on) and the final state of the device must reflect the "active" runtime PM
+status in that case.
During system-wide resume from a sleep state it's easiest to put devices into
the full-power state, as explained in :file:`Documentation/power/runtime_pm.txt`.
-Refer to that document for more information regarding this particular issue as
+[Refer to that document for more information regarding this particular issue as
well as for information on the device runtime power management framework in
-general.
+general.]
+
+However, it often is desirable to leave devices in suspend after system
+transitions to the working state, especially if those devices had been in
+runtime suspend before the preceding system-wide suspend (or analogous)
+transition. Device drivers can use the ``DPM_FLAG_LEAVE_SUSPENDED`` flag to
+indicate to the PM core (and middle-layer code) that they prefer the specific
+devices handled by them to be left suspended and they have no problems with
+skipping their system-wide resume callbacks for this reason. Whether or not the
+devices will actually be left in suspend may depend on their state before the
+given system suspend-resume cycle and on the type of the system transition under
+way. In particular, devices are not left suspended if that transition is a
+restore from hibernation, as device states are not guaranteed to be reflected
+by the information stored in the hibernation image in that case.
+
+The middle-layer code involved in the handling of the device is expected to
+indicate to the PM core if the device may be left in suspend by setting its
+:c:member:`power.may_skip_resume` status bit which is checked by the PM core
+during the "noirq" phase of the preceding system-wide suspend (or analogous)
+transition. The middle layer is then responsible for handling the device as
+appropriate in its "noirq" resume callback, which is executed regardless of
+whether or not the device is left suspended, but the other resume callbacks
+(except for ``->complete``) will be skipped automatically by the PM core if the
+device really can be left in suspend.
+
+For devices whose "noirq", "late" and "early" driver callbacks are invoked
+directly by the PM core, all of the system-wide resume callbacks are skipped if
+``DPM_FLAG_LEAVE_SUSPENDED`` is set and the device is in runtime suspend during
+the ``suspend_noirq`` (or analogous) phase or the transition under way is a
+proper system suspend (rather than anything related to hibernation) and the
+device's wakeup settings are suitable for runtime PM (that is, it cannot
+generate wakeup signals at all or it is allowed to wake up the system from
+sleep).
cleaner or garbage collector) are required. Details on the tools are
described in the man pages included in the package.
-Project web page: http://nilfs.sourceforge.net/
-Download page: http://nilfs.sourceforge.net/en/download.html
+Project web page: https://nilfs.sourceforge.io/
+Download page: https://nilfs.sourceforge.io/en/download.html
List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
Caveats
GuC-specific firmware loader
----------------------------
-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
- :doc: GuC-specific firmware loader
-
-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
+.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c
:internal:
GuC-based command submission
<expr> ::= <symbol> (1)
<symbol> '=' <symbol> (2)
<symbol> '!=' <symbol> (3)
- '(' <expr> ')' (4)
- '!' <expr> (5)
- <expr> '&&' <expr> (6)
- <expr> '||' <expr> (7)
+ <symbol1> '<' <symbol2> (4)
+ <symbol1> '>' <symbol2> (4)
+ <symbol1> '<=' <symbol2> (4)
+ <symbol1> '>=' <symbol2> (4)
+ '(' <expr> ')' (5)
+ '!' <expr> (6)
+ <expr> '&&' <expr> (7)
+ <expr> '||' <expr> (8)
Expressions are listed in decreasing order of precedence.
otherwise 'n'.
(3) If the values of both symbols are equal, it returns 'n',
otherwise 'y'.
-(4) Returns the value of the expression. Used to override precedence.
-(5) Returns the result of (2-/expr/).
-(6) Returns the result of min(/expr/, /expr/).
-(7) Returns the result of max(/expr/, /expr/).
+(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
+ or greater-or-equal than value of <symbol2>, it returns 'y',
+ otherwise 'n'.
+(5) Returns the value of the expression. Used to override precedence.
+(6) Returns the result of (2-/expr/).
+(7) Returns the result of min(/expr/, /expr/).
+(8) Returns the result of max(/expr/, /expr/).
An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
respectively for calculations). A menu entry becomes visible when its
batman-adv
kapi
z8530book
+ msg_zerocopy
.. only:: subproject
=======
* :ref:`genindex`
-
if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
error(1, errno, "setsockopt zerocopy");
+Setting the socket option only works when the socket is in its initial
+(TCP_CLOSED) state. Trying to set the option for a socket returned by accept(),
+for example, will lead to an EBUSY error. In this case, the option should be set
+to the listening socket and it will be inherited by the accepted sockets.
Transmission
------------
the function will set the power.direct_complete flag for it (to make the PM core
skip the subsequent "thaw" callbacks for it) and return.
+Setting the DPM_FLAG_LEAVE_SUSPENDED flag means that the driver prefers the
+device to be left in suspend after system-wide transitions to the working state.
+This flag is checked by the PM core, but the PCI bus type informs the PM core
+which devices may be left in suspend from its perspective (that happens during
+the "noirq" phase of system-wide suspend and analogous transitions) and next it
+uses the dev_pm_may_skip_resume() helper to decide whether or not to return from
+pci_pm_resume_noirq() early, as the PM core will skip the remaining resume
+callbacks for the device during the transition under way and will set its
+runtime PM status to "suspended" if dev_pm_may_skip_resume() returns "true" for
+it.
+
3.2. Device Runtime Power Management
------------------------------------
In addition to providing device power management callbacks PCI device drivers
clip_cpus: cpumask of cpus where the frequency constraints will happen.
1.1.2 struct thermal_cooling_device *of_cpufreq_cooling_register(
- struct device_node *np, const struct cpumask *clip_cpus)
+ struct cpufreq_policy *policy)
This interface function registers the cpufreq cooling device with
the name "thermal-cpufreq-%x" linking it with a device tree node, in
order to bind it via the thermal DT code. This api can support multiple
instances of cpufreq cooling devices.
- np: pointer to the cooling device device tree node
- clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ policy: CPUFreq policy.
-1.1.3 struct thermal_cooling_device *cpufreq_power_cooling_register(
- const struct cpumask *clip_cpus, u32 capacitance,
- get_static_t plat_static_func)
-
-Similar to cpufreq_cooling_register, this function registers a cpufreq
-cooling device. Using this function, the cooling device will
-implement the power extensions by using a simple cpu power model. The
-cpus must have registered their OPPs using the OPP library.
-
-The additional parameters are needed for the power model (See 2. Power
-models). "capacitance" is the dynamic power coefficient (See 2.1
-Dynamic power). "plat_static_func" is a function to calculate the
-static power consumed by these cpus (See 2.2 Static power).
-
-1.1.4 struct thermal_cooling_device *of_cpufreq_power_cooling_register(
- struct device_node *np, const struct cpumask *clip_cpus, u32 capacitance,
- get_static_t plat_static_func)
-
-Similar to cpufreq_power_cooling_register, this function register a
-cpufreq cooling device with power extensions using the device tree
-information supplied by the np parameter.
-
-1.1.5 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
+1.1.3 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
This interface function unregisters the "thermal-cpufreq-%x" cooling device.
2. Power models
The power API registration functions provide a simple power model for
-CPUs. The current power is calculated as dynamic + (optionally)
-static power. This power model requires that the operating-points of
+CPUs. The current power is calculated as dynamic power (static power isn't
+supported currently). This power model requires that the operating-points of
the CPUs are registered using the kernel's opp library and the
`cpufreq_frequency_table` is assigned to the `struct device` of the
cpu. If you are using CONFIG_CPUFREQ_DT then the
`cpufreq_frequency_table` should already be assigned to the cpu
device.
-The `plat_static_func` parameter of `cpufreq_power_cooling_register()`
-and `of_cpufreq_power_cooling_register()` is optional. If you don't
-provide it, only dynamic power will be considered.
-
-2.1 Dynamic power
-
The dynamic power consumption of a processor depends on many factors.
For a given processor implementation the primary factors are:
from 100 to 500. For reference, the approximate values for the SoC in
ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and
140 for the Cortex-A53 cluster.
-
-
-2.2 Static power
-
-Static leakage power consumption depends on a number of factors. For a
-given circuit implementation the primary factors are:
-
-- Time the circuit spends in each 'power state'
-- Temperature
-- Operating voltage
-- Process grade
-
-The time the circuit spends in each 'power state' for a given
-evaluation period at first order means OFF or ON. However,
-'retention' states can also be supported that reduce power during
-inactive periods without loss of context.
-
-Note: The visibility of state entries to the OS can vary, according to
-platform specifics, and this can then impact the accuracy of a model
-based on OS state information alone. It might be possible in some
-cases to extract more accurate information from system resources.
-
-The temperature, operating voltage and process 'grade' (slow to fast)
-of the circuit are all significant factors in static leakage power
-consumption. All of these have complex relationships to static power.
-
-Circuit implementation specific factors include the chosen silicon
-process as well as the type, number and size of transistors in both
-the logic gates and any RAM elements included.
-
-The static power consumption modelling must take into account the
-power managed regions that are implemented. Taking the example of an
-ARM processor cluster, the modelling would take into account whether
-each CPU can be powered OFF separately or if only a single power
-region is implemented for the complete cluster.
-
-In one view, there are others, a static power consumption model can
-then start from a set of reference values for each power managed
-region (e.g. CPU, Cluster/L2) in each state (e.g. ON, OFF) at an
-arbitrary process grade, voltage and temperature point. These values
-are then scaled for all of the following: the time in each state, the
-process grade, the current temperature and the operating voltage.
-However, since both implementation specific and complex relationships
-dominate the estimate, the appropriate interface to the model from the
-cpu cooling device is to provide a function callback that calculates
-the static power in this platform. When registering the cpu cooling
-device pass a function pointer that follows the `get_static_t`
-prototype:
-
- int plat_get_static(cpumask_t *cpumask, int interval,
- unsigned long voltage, u32 &power);
-
-`cpumask` is the cpumask of the cpus involved in the calculation.
-`voltage` is the voltage at which they are operating. The function
-should calculate the average static power for the last `interval`
-milliseconds. It returns 0 on success, -E* on error. If it
-succeeds, it should store the static power in `power`. Reading the
-temperature of the cpus described by `cpumask` is left for
-plat_get_static() to do as the platform knows best which thermal
-sensor is closest to the cpu.
-
-If `plat_static_func` is NULL, static power is considered to be
-negligible for this platform and only dynamic power is considered.
-
-The platform specific callback can then use any combination of tables
-and/or equations to permute the estimated value. Process grade
-information is not passed to the model since access to such data, from
-on-chip measurement capability or manufacture time data, is platform
-specific.
-
-Note: the significance of static power for CPUs in comparison to
-dynamic power is highly dependent on implementation. Given the
-potential complexity in implementation, the importance and accuracy of
-its inclusion when using cpu cooling devices should be assessed on a
-case by case basis.
-
in each line. The rules stated above are best illustrated with an example:
# mkdir functions/uvc.usb0/control/header/h
-# cd functions/uvc.usb0/control/header/h
+# cd functions/uvc.usb0/control/
# ln -s header/h class/fs
# ln -s header/h class/ss
# mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
--- /dev/null
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications. When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy. When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT). There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled. It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI. This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level. This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel. This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal. The only difference is when the kernel
+makes entries in the top (PGD) level. In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables. This leaves a single, shared set of
+userspace page tables to manage. One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important. But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+ a. Each process now needs an order-1 PGD instead of order-0.
+ (Consumes an additional 4k per process).
+ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+ aligned so that it can be mapped by setting a single PMD
+ entry. This consumes nearly 2MB of RAM once the kernel
+ is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+ a. CR3 manipulation to switch between the page table copies
+ must be done at interrupt, syscall, and exception entry
+ and exit (it can be skipped when the kernel is interrupted,
+ though.) Moves to CR3 are on the order of a hundred
+ cycles, and are required at every entry and exit.
+ b. A "trampoline" must be used for SYSCALL entry. This
+ trampoline depends on a smaller set of resources than the
+ non-PTI SYSCALL entry code, so requires mapping fewer
+ things into the userspace page tables. The downside is
+ that stacks must be switched at entry time.
+ d. Global pages are disabled for all kernel structures not
+ mapped into both kernel and userspace page tables. This
+ feature of the MMU allows different processes to share TLB
+ entries mapping the kernel. Losing the feature means more
+ TLB misses after a context switch. The actual loss of
+ performance is very small, however, never exceeding 1%.
+ d. Process Context IDentifiers (PCID) is a CPU feature that
+ allows us to skip flushing the entire TLB when switching page
+ tables by setting a special bit in CR3 when the page tables
+ are changed. This makes switching the page tables (at context
+ switch, or kernel entry/exit) cheaper. But, on systems with
+ PCID support, the context switch code must flush both the user
+ and kernel entries out of the TLB. The user PCID TLB flush is
+ deferred until the exit to userspace, minimizing the cost.
+ See intel.com/sdm for the gory PCID/INVPCID details.
+ e. The userspace page tables must be populated for each new
+ process. Even without PTI, the shared kernel mappings
+ are created by copying top-level (PGD) entries into each
+ new process. But, with PTI, there are now *two* kernel
+ mappings: one in the kernel page tables that maps everything
+ and one for the entry/exit structures. At fork(), we need to
+ copy both.
+ f. In addition to the fork()-time copying, there must also
+ be an update to the userspace PGD any time a set_pgd() is done
+ on a PGD used to map userspace. This ensures that the kernel
+ and userspace copies always map the same userspace
+ memory.
+ g. On systems without PCID support, each CR3 write flushes
+ the entire TLB. That means that each syscall, interrupt
+ or exception flushes the TLB.
+ h. INVPCID is a TLB-flushing instruction which allows flushing
+ of TLB entries for non-current PCIDs. Some systems support
+ PCIDs, but do not support INVPCID. On these systems, addresses
+ can only be flushed from the TLB for the current PCID. When
+ flushing a kernel address, we need to flush all PCIDs, so a
+ single kernel address flush will require a TLB-flushing CR3
+ write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+ unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+ boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+ (excluding MPX and protection_keys) in a loop on multiple CPUs for
+ several minutes. These tests frequently uncover corner cases in the
+ kernel entry code. In general, old kernels might cause these tests
+ themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+ frequent performance monitoring non-maskable interrupts (see "NMI"
+ in /proc/interrupts). This exercises the NMI entry/exit code which
+ is known to trigger bugs in code paths that did not expect to be
+ interrupted, including nested NMIs. Using "-c" boosts the rate of
+ NMIs, and using two -c with separate counters encourages nested NMIs
+ and less deterministic behavior.
+
+ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+ This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code. Usually a bug in one of the
+ more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup. Bugs
+ in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt. Caused by bugs in entry_64.S,
+ like screwing up a page table switch. Also caused by
+ incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI. The NMI code is separate from main
+ interrupt handlers and can have bugs that do not affect
+ normal interrupts. Also caused by incorrectly mapping NMI
+ code. NMIs that interrupt the entry code must be very
+ careful and can be the cause of crashes that show up when
+ running perf.
+ * Kernel crashes at the first exit to userspace. entry_64.S
+ bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+ in entry_64.S that return to userspace are sometimes separate
+ from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+ faults upon page faults. Caused by touching non-pti-mapped
+ data in the entry code, or forgetting to switch to kernel
+ CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+ as mount(8) failing to mount the rootfs. These have
+ tended to be TLB invalidation issues. Usually invalidating
+ the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf
... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ...
-fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
-fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ vaddr_end for KASLR
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ...
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ...
-fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ vaddr_end for KASLR
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ...
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
-ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
+ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
The mappings are not part of any other kernel PGD and are only available
during EFI runtime calls.
-The module mapping space size changes based on the CONFIG requirements for the
-following fixmap section.
-
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.
+
+Be very careful vs. KASLR when changing anything here. The KASLR address
+range must not overlap with anything except the KASAN shadow area, which is
+correct as KASAN disables KASLR.
F: arch/arm/configs/mvebu_*_defconfig
F: arch/arm/mach-mvebu/
F: arch/arm64/boot/dts/marvell/armada*
+F: drivers/cpufreq/armada-37xx-cpufreq.c
F: drivers/cpufreq/mvebu-cpufreq.c
F: drivers/irqchip/irq-armada-370-xp.c
F: drivers/irqchip/irq-mvebu-*
EFI TEST DRIVER
L: linux-efi@vger.kernel.org
M: Ivan Hu <ivan.hu@canonical.com>
-M: Matt Fleming <matt@codeblueprint.co.uk>
+M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
S: Maintained
F: drivers/firmware/efi/test/
EFI VARIABLE FILESYSTEM
M: Matthew Garrett <matthew.garrett@nebula.com>
M: Jeremy Kerr <jk@ozlabs.org>
-M: Matt Fleming <matt@codeblueprint.co.uk>
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
+M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
L: linux-efi@vger.kernel.org
S: Maintained
F: fs/efivarfs/
F: security/integrity/evm/
EXTENSIBLE FIRMWARE INTERFACE (EFI)
-M: Matt Fleming <matt@codeblueprint.co.uk>
M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
L: linux-efi@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
NILFS2 FILESYSTEM
M: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
L: linux-nilfs@vger.kernel.org
-W: http://nilfs.sourceforge.net/
-W: http://nilfs.osdn.jp/
+W: https://nilfs.sourceforge.io/
+W: https://nilfs.osdn.jp/
T: git git://github.com/konis/nilfs2.git
S: Supported
F: Documentation/filesystems/nilfs2.txt
F: drivers/irqchip/irq-or1k-*
OPENVSWITCH
-M: Pravin Shelar <pshelar@nicira.com>
+M: Pravin B Shelar <pshelar@ovn.org>
L: netdev@vger.kernel.org
L: dev@openvswitch.org
W: http://openvswitch.org
F: include/linux/pm_*
F: include/linux/powercap.h
F: drivers/powercap/
+F: kernel/configs/nopm.config
POWER STATE COORDINATION INTERFACE (PSCI)
M: Mark Rutland <mark.rutland@arm.com>
VERSION = 4
PATCHLEVEL = 15
SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc8
NAME = Fearless Coyote
# *DOCUMENTATION*
endif
KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
-KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
-KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
-# Quiet clang warning: comparison of unsigned expression < 0 is always false
-KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
-# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
-# source of a reference will be _MergedGlobals and not on of the whitelisted names.
-# See modpost pattern 2
-KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
-KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
-KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
-KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
-else
-
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
endif
ifeq ($(config-targets),1)
endif
KBUILD_CFLAGS += $(stackp-flag)
+ifeq ($(cc-name),clang)
+KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
+KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+# Quiet clang warning: comparison of unsigned expression < 0 is always false
+KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
+# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
+# source of a reference will be _MergedGlobals and not on of the whitelisted names.
+# See modpost pattern 2
+KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
+KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
+KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
+KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
+else
+
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+endif
+
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
else
reg = <0x80 0x10>, <0x100 0x10>;
#clock-cells = <0>;
clocks = <&input_clk>;
+
+ /*
+ * Set initial core pll output frequency to 90MHz.
+ * It will be applied at the core pll driver probing
+ * on early boot.
+ */
+ assigned-clocks = <&core_clk>;
+ assigned-clock-rates = <90000000>;
};
core_intc: archs-intc@cpu {
reg = <0x80 0x10>, <0x100 0x10>;
#clock-cells = <0>;
clocks = <&input_clk>;
+
+ /*
+ * Set initial core pll output frequency to 100MHz.
+ * It will be applied at the core pll driver probing
+ * on early boot.
+ */
+ assigned-clocks = <&core_clk>;
+ assigned-clock-rates = <100000000>;
};
core_intc: archs-intc@cpu {
reg = <0x00 0x10>, <0x14B8 0x4>;
#clock-cells = <0>;
clocks = <&input_clk>;
+
+ /*
+ * Set initial core pll output frequency to 1GHz.
+ * It will be applied at the core pll driver probing
+ * on early boot.
+ */
+ assigned-clocks = <&core_clk>;
+ assigned-clock-rates = <1000000000>;
};
serial: serial@5000 {
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
+CONFIG_DRM=y
+# CONFIG_DRM_FBDEV_EMULATION is not set
+CONFIG_DRM_UDL=y
CONFIG_FB=y
-CONFIG_FB_UDL=y
CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_USB=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_HCD_PLATFORM=y
CONFIG_USB_OHCI_HCD=y
return 0;
__asm__ __volatile__(
+ " mov lp_count, %5 \n"
" lp 3f \n"
"1: ldb.ab %3, [%2, 1] \n"
" breq.d %3, 0, 3f \n"
" .word 1b, 4b \n"
" .previous \n"
: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
- : "g"(-EFAULT), "l"(count)
- : "memory");
+ : "g"(-EFAULT), "r"(count)
+ : "lp_count", "lp_start", "lp_end", "memory");
return res;
}
unsigned int exec_ctrl;
READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
- cpu->extn.dual_enb = exec_ctrl & 1;
+ cpu->extn.dual_enb = !(exec_ctrl & 1);
/* dual issue always present for this core */
cpu->extn.dual = 1;
*/
static int __print_sym(unsigned int address, void *unused)
{
- __print_symbol(" %s\n", address);
+ printk(" %pS\n", (void *)address);
return 0;
}
DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
+DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0)
/*
* Entry Point for Misaligned Data access Exception, for emulating in software
* Thus TRAP_S <n> can be used for specific purpose
* -1 used for software breakpointing (gdb)
* -2 used by kprobes
+ * -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as
+ * -fno-isolate-erroneous-paths-dereference
*/
void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
{
kgdb_trap(regs);
break;
+ case 5:
+ do_trap5_error(address, regs);
+ break;
default:
break;
}
insterror_is_error(address, regs);
}
+
+/*
+ * abort() call generated by older gcc for __builtin_trap()
+ */
+void abort(void)
+{
+ __asm__ __volatile__("trap_s 5\n");
+}
else
pr_cont("Bus Error, check PRM\n");
#endif
+ } else if (vec == ECR_V_TRAP) {
+ if (regs->ecr_param == 5)
+ pr_cont("gcc generated __builtin_trap\n");
} else {
pr_cont("Check Programmer's Manual\n");
}
* Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
* of fudging the freq in DT
*/
+#define AXS103_QUAD_CORE_CPU_FREQ_HZ 50000000
+
unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
if (num_cores > 2) {
- u32 freq = 50, orig;
- /*
- * TODO: use cpu node "cpu-freq" param instead of platform-specific
- * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu.
- */
+ u32 freq;
int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
const struct fdt_property *prop;
prop = fdt_get_property(initial_boot_params, off,
- "clock-frequency", NULL);
- orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000;
+ "assigned-clock-rates", NULL);
+ freq = be32_to_cpu(*(u32 *)(prop->data));
/* Patching .dtb in-place with new core clock value */
- if (freq != orig ) {
- freq = cpu_to_be32(freq * 1000000);
+ if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) {
+ freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ);
fdt_setprop_inplace(initial_boot_params, off,
- "clock-frequency", &freq, sizeof(freq));
+ "assigned-clock-rates", &freq, sizeof(freq));
}
}
#endif
#define CREG_PAE (CREG_BASE + 0x180)
#define CREG_PAE_UPDATE (CREG_BASE + 0x194)
-#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8)
-#define CREG_CORE_IF_CLK_DIV_2 0x1
-#define CGU_BASE ARC_PERIPHERAL_BASE
-#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4)
-#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0)
-#define CGU_PLL_STATUS_LOCK BIT(0)
-#define CGU_PLL_STATUS_ERR BIT(1)
-#define CGU_PLL_CTRL_1GHZ 0x3A10
-#define HSDK_PLL_LOCK_TIMEOUT 500
-
-#define HSDK_PLL_LOCKED() \
- !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK)
-
-#define HSDK_PLL_ERR() \
- !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR)
-
-static void __init hsdk_set_cpu_freq_1ghz(void)
-{
- u32 timeout = HSDK_PLL_LOCK_TIMEOUT;
-
- /*
- * As we set cpu clock which exceeds 500MHz, the divider for the interface
- * clock must be programmed to div-by-2.
- */
- iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV);
-
- /* Set cpu clock to 1GHz */
- iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL);
-
- while (!HSDK_PLL_LOCKED() && timeout--)
- cpu_relax();
-
- if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR())
- pr_err("Failed to setup CPU frequency to 1GHz!");
-}
-
#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000)
#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108)
#define SDIO_UHS_REG_EXT_DIV_2 (2 << 30)
* minimum possible div-by-2.
*/
iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT);
-
- /*
- * Setup CPU frequency to 1GHz.
- * TODO: remove it after smart hsdk pll driver will be introduced.
- */
- hsdk_set_cpu_freq_1ghz();
}
static const char *hsdk_compat[] __initconst = {
compatible = "aspeed,ast2400-vuart";
reg = <0x1e787000 0x40>;
reg-shift = <2>;
- interrupts = <10>;
+ interrupts = <8>;
clocks = <&clk_uart>;
no-loopback-test;
status = "disabled";
jc42@18 {
compatible = "nxp,se97b", "jedec,jc-42.4-temp";
reg = <0x18>;
+ smbus-timeout-disable;
};
dpot: mcp4651-104@28 {
*/
battery {
pinctrl-names = "default";
- pintctrl-0 = <&battery_pins>;
+ pinctrl-0 = <&battery_pins>;
compatible = "lego,ev3-battery";
io-channels = <&adc 4>, <&adc 3>;
io-channel-names = "voltage", "current";
batt_volt_en {
gpio-hog;
gpios = <6 GPIO_ACTIVE_HIGH>;
- output-low;
+ output-high;
};
};
status = "okay";
};
+&mixer {
+ status = "okay";
+};
+
/* eMMC flash */
&mmc_0 {
status = "okay";
clock-latency = <61036>; /* two CLK32 periods */
operating-points = <
/* kHz uV */
+ 696000 1275000
528000 1175000
396000 1025000
198000 950000
>;
fsl,soc-operating-points = <
/* KHz uV */
+ 696000 1275000
528000 1175000
396000 1175000
198000 1175000
reg = <0x2a>;
VDDA-supply = <®_3p3v>;
VDDIO-supply = <®_3p3v>;
- clocks = <&sys_mclk 1>;
+ clocks = <&sys_mclk>;
};
};
};
reg = <0x0a>;
VDDA-supply = <®_3p3v>;
VDDIO-supply = <®_3p3v>;
- clocks = <&sys_mclk 1>;
+ clocks = <&sys_mclk>;
};
};
};
};
+&cpu0 {
+ cpu0-supply = <&vdd_arm>;
+};
+
&i2c1 {
status = "okay";
clock-frequency = <400000>;
iep_mmu: iommu@ff900800 {
compatible = "rockchip,iommu";
reg = <0x0 0xff900800 0x0 0x40>;
- interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH 0>;
+ interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "iep_mmu";
#iommu-cells = <0>;
status = "disabled";
reg = <0x01c16000 0x1000>;
interrupts = <58>;
clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
- <&ccu 9>,
- <&ccu 18>;
+ <&ccu CLK_PLL_VIDEO0_2X>,
+ <&ccu CLK_PLL_VIDEO1_2X>;
clock-names = "ahb", "mod", "pll-0", "pll-1";
dmas = <&dma SUN4I_DMA_NORMAL 16>,
<&dma SUN4I_DMA_NORMAL 16>,
reg = <0x01c16000 0x1000>;
interrupts = <58>;
clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>,
- <&ccu 9>,
- <&ccu 16>;
+ <&ccu CLK_PLL_VIDEO0_2X>,
+ <&ccu CLK_PLL_VIDEO1_2X>;
clock-names = "ahb", "mod", "pll-0", "pll-1";
dmas = <&dma SUN4I_DMA_NORMAL 16>,
<&dma SUN4I_DMA_NORMAL 16>,
interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>,
<&ccu CLK_HDMI_DDC>,
- <&ccu 7>,
- <&ccu 13>;
+ <&ccu CLK_PLL_VIDEO0_2X>,
+ <&ccu CLK_PLL_VIDEO1_2X>;
clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1";
resets = <&ccu RST_AHB1_HDMI>;
reset-names = "ahb";
reg = <0x01c16000 0x1000>;
interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
- <&ccu 9>,
- <&ccu 18>;
+ <&ccu CLK_PLL_VIDEO0_2X>,
+ <&ccu CLK_PLL_VIDEO1_2X>;
clock-names = "ahb", "mod", "pll-0", "pll-1";
dmas = <&dma SUN4I_DMA_NORMAL 16>,
<&dma SUN4I_DMA_NORMAL 16>,
status = "okay";
axp81x: pmic@3a3 {
+ compatible = "x-powers,axp813";
reg = <0x3a3>;
interrupt-parent = <&r_intc>;
interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
reg = <0x6e000 0x400>;
ranges = <0 0x6e000 0x400>;
interrupt-parent = <&gic>;
- interrupt-controller;
#address-cells = <1>;
#size-cells = <1>;
/* if that doesn't kill us, halt */
panic("Oops failed to kill thread");
}
-EXPORT_SYMBOL(abort);
void __init trap_init(void)
{
{ "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) },
{ "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) },
{ "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) },
- { "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
- { "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
- { "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
- { "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
+ { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
+ { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
+ { "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
+ { "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
};
static struct edma_soc_info dm365_edma_pdata = {
/* not using TC*_ERR */
};
-static struct platform_device dm365_edma_device = {
- .name = "edma",
- .id = 0,
- .dev.platform_data = &dm365_edma_pdata,
- .num_resources = ARRAY_SIZE(edma_resources),
- .resource = edma_resources,
+static const struct platform_device_info dm365_edma_device __initconst = {
+ .name = "edma",
+ .id = 0,
+ .dma_mask = DMA_BIT_MASK(32),
+ .res = edma_resources,
+ .num_res = ARRAY_SIZE(edma_resources),
+ .data = &dm365_edma_pdata,
+ .size_data = sizeof(dm365_edma_pdata),
};
static struct resource dm365_asp_resources[] = {
static int __init dm365_init_devices(void)
{
+ struct platform_device *edma_pdev;
int ret = 0;
if (!cpu_is_davinci_dm365())
return 0;
davinci_cfg_reg(DM365_INT_EDMA_CC);
- platform_device_register(&dm365_edma_device);
+ edma_pdev = platform_device_register_full(&dm365_edma_device);
+ if (IS_ERR(edma_pdev)) {
+ pr_warn("%s: Failed to register eDMA\n", __func__);
+ return PTR_ERR(edma_pdev);
+ }
platform_device_register(&dm365_mdio_device);
platform_device_register(&dm365_emac_device);
pinctrl-0 = <&rgmii_pins>;
phy-mode = "rgmii";
phy-handle = <&ext_rgmii_phy>;
+ phy-supply = <®_dc1sw>;
status = "okay";
};
pinctrl-0 = <&rmii_pins>;
phy-mode = "rmii";
phy-handle = <&ext_rmii_phy1>;
+ phy-supply = <®_dc1sw>;
status = "okay";
};
pinctrl-0 = <&rgmii_pins>;
phy-mode = "rgmii";
phy-handle = <&ext_rgmii_phy>;
+ phy-supply = <®_dc1sw>;
status = "okay";
};
&mmc2 {
pinctrl-names = "default";
pinctrl-0 = <&mmc2_pins>;
- vmmc-supply = <®_vcc3v3>;
+ vmmc-supply = <®_dcdc1>;
vqmmc-supply = <®_vcc1v8>;
bus-width = <8>;
non-removable;
#include "sun50i-a64.dtsi"
-/ {
- reg_vcc3v3: vcc3v3 {
- compatible = "regulator-fixed";
- regulator-name = "vcc3v3";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- };
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins>;
- vmmc-supply = <®_vcc3v3>;
+ vmmc-supply = <®_dcdc1>;
non-removable;
disable-wp;
bus-width = <4>;
pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
vmmc-supply = <®_vcc3v3>;
bus-width = <4>;
- cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>;
+ cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;
status = "okay";
};
&avb {
pinctrl-0 = <&avb_pins>;
pinctrl-names = "default";
- renesas,no-ether-link;
phy-handle = <&phy0>;
status = "okay";
&avb {
pinctrl-0 = <&avb_pins>;
pinctrl-names = "default";
- renesas,no-ether-link;
phy-handle = <&phy0>;
status = "okay";
assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
clock_in_out = "input";
+ /* shows instability at 1GBit right now */
+ max-speed = <100>;
phy-supply = <&vcc_io>;
phy-mode = "rgmii";
pinctrl-names = "default";
tsadc: tsadc@ff250000 {
compatible = "rockchip,rk3328-tsadc";
reg = <0x0 0xff250000 0x0 0x100>;
- interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>;
+ interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
assigned-clocks = <&cru SCLK_TSADC>;
assigned-clock-rates = <50000>;
clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>;
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
};
-
- vdd_log: vdd-log {
- compatible = "pwm-regulator";
- pwms = <&pwm2 0 25000 0>;
- regulator-name = "vdd_log";
- regulator-min-microvolt = <800000>;
- regulator-max-microvolt = <1400000>;
- regulator-always-on;
- regulator-boot-on;
- status = "okay";
- };
};
&cpu_b0 {
gpio-controller;
#gpio-cells = <2>;
gpio-ranges = <&pinctrl 0 0 0>,
- <&pinctrl 96 0 0>,
- <&pinctrl 160 0 0>;
+ <&pinctrl 104 0 0>,
+ <&pinctrl 168 0 0>;
gpio-ranges-group-names = "gpio_range0",
"gpio_range1",
"gpio_range2";
}
if (ti->softirq_time) {
- delta = cycle_to_nsec(ti->softirq_time));
+ delta = cycle_to_nsec(ti->softirq_time);
account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
}
/* if that doesn't kill us, halt */
panic("Oops failed to kill thread");
}
-EXPORT_SYMBOL(abort);
void __init trap_init(void)
{
has_mt t0, 3f
.set push
+ .set MIPS_ISA_LEVEL_RAW
.set mt
/* Only allow 1 TC per VPE to execute... */
#elif defined(CONFIG_MIPS_MT)
.set push
+ .set MIPS_ISA_LEVEL_RAW
.set mt
/* If the core doesn't support MT then return */
struct task_struct *t;
int max_users;
+ /* If nothing to change, return right away, successfully. */
+ if (value == mips_get_process_fp_mode(task))
+ return 0;
+
+ /* Only accept a mode change if 64-bit FP enabled for o32. */
+ if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
+ return -EOPNOTSUPP;
+
+ /* And only for o32 tasks. */
+ if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
+ return -EOPNOTSUPP;
+
/* Check the value is valid */
if (value & ~known_bits)
return -EOPNOTSUPP;
#endif /* CONFIG_64BIT */
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
+ * correspond 1:1 to buffer slots. Only general registers are copied.
+ */
+static int fpr_get_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
+{
+ return user_regset_copyout(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
+ * general register slots are copied to buffer slots. Only general
+ * registers are copied.
+ */
+static int fpr_get_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
+{
+ unsigned int i;
+ u64 fpr_val;
+ int err;
+
+ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
+ err = user_regset_copyout(pos, count, kbuf, ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ */
static int fpr_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- unsigned i;
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
int err;
- u64 fpr_val;
- /* XXX fcr31 */
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
- for (i = 0; i < NUM_FPU_REGS; i++) {
- fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
- err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &fpr_val, i * sizeof(elf_fpreg_t),
- (i + 1) * sizeof(elf_fpreg_t));
+ return err;
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
+ * context's general register slots. Only general registers are copied.
+ */
+static int fpr_set_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ return user_regset_copyin(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
+ * bits only of FP context's general register slots. Only general
+ * registers are copied.
+ */
+static int fpr_set_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ unsigned int i;
+ u64 fpr_val;
+ int err;
+
+ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+ for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
+ err = user_regset_copyin(pos, count, kbuf, ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
if (err)
return err;
+ set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
}
return 0;
}
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ *
+ * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
+ * which is supposed to have been guaranteed by the kernel before
+ * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
+ * so that we can safely avoid preinitializing temporaries for
+ * partial register writes.
+ */
static int fpr_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- unsigned i;
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+ u32 fcr31;
int err;
- u64 fpr_val;
- /* XXX fcr31 */
+ BUG_ON(count % sizeof(elf_fpreg_t));
+
+ if (pos + count > sizeof(elf_fpregset_t))
+ return -EIO;
init_fp_ctx(target);
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
- BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
- for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
+ if (count > 0) {
err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &fpr_val, i * sizeof(elf_fpreg_t),
- (i + 1) * sizeof(elf_fpreg_t));
+ &fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
if (err)
return err;
- set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
+
+ ptrace_setfcr31(target, fcr31);
}
- return 0;
+ return err;
}
enum mips_regset {
for the semaphore. */
#define __PA_LDCW_ALIGNMENT 16
+#define __PA_LDCW_ALIGN_ORDER 4
#define __ldcw_align(a) ({ \
unsigned long __ret = (unsigned long) &(a)->lock[0]; \
__ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \
ldcd). */
#define __PA_LDCW_ALIGNMENT 4
+#define __PA_LDCW_ALIGN_ORDER 2
#define __ldcw_align(a) (&(a)->slock)
#define __LDCW "ldcw,co"
static int count;
print_pa_hwpath(dev, hw_path);
- printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
+ printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type,
dev->id.hversion_rev, dev->id.hversion, dev->id.sversion);
#include <asm/pgtable.h>
#include <asm/signal.h>
#include <asm/unistd.h>
+#include <asm/ldcw.h>
#include <asm/thread_info.h>
#include <linux/linkage.h>
#endif
.import pa_tlb_lock,data
+ .macro load_pa_tlb_lock reg
+#if __PA_LDCW_ALIGNMENT > 4
+ load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
+ depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
+#else
+ load32 PA(pa_tlb_lock), \reg
+#endif
+ .endm
/* space_to_prot macro creates a prot id from a space id */
.macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
#ifdef CONFIG_SMP
cmpib,COND(=),n 0,\spc,2f
- load32 PA(pa_tlb_lock),\tmp
+ load_pa_tlb_lock \tmp
1: LDCW 0(\tmp),\tmp1
cmpib,COND(=) 0,\tmp1,1b
nop
/* Release pa_tlb_lock lock. */
.macro tlb_unlock1 spc,tmp
#ifdef CONFIG_SMP
- load32 PA(pa_tlb_lock),\tmp
+ load_pa_tlb_lock \tmp
tlb_unlock0 \spc,\tmp
#endif
.endm
#include <asm/assembly.h>
#include <asm/pgtable.h>
#include <asm/cache.h>
+#include <asm/ldcw.h>
#include <linux/linkage.h>
.text
.macro tlb_lock la,flags,tmp
#ifdef CONFIG_SMP
- ldil L%pa_tlb_lock,%r1
- ldo R%pa_tlb_lock(%r1),\la
+#if __PA_LDCW_ALIGNMENT > 4
+ load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
+ depi 0,31,__PA_LDCW_ALIGN_ORDER, \la
+#else
+ load32 pa_tlb_lock, \la
+#endif
rsm PSW_SM_I,\flags
1: LDCW 0(\la),\tmp
cmpib,<>,n 0,\tmp,3f
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/personality.h>
#include <linux/ptrace.h>
return 1;
}
+/*
+ * Idle thread support
+ *
+ * Detect when running on QEMU with SeaBIOS PDC Firmware and let
+ * QEMU idle the host too.
+ */
+
+int running_on_qemu __read_mostly;
+
+void __cpuidle arch_cpu_idle_dead(void)
+{
+ /* nop on real hardware, qemu will offline CPU. */
+ asm volatile("or %%r31,%%r31,%%r31\n":::);
+}
+
+void __cpuidle arch_cpu_idle(void)
+{
+ local_irq_enable();
+
+ /* nop on real hardware, qemu will idle sleep. */
+ asm volatile("or %%r10,%%r10,%%r10\n":::);
+}
+
+static int __init parisc_idle_init(void)
+{
+ const char *marker;
+
+ /* check QEMU/SeaBIOS marker in PAGE0 */
+ marker = (char *) &PAGE0->pad0;
+ running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
+
+ if (!running_on_qemu)
+ cpu_idle_poll_ctrl(1);
+
+ return 0;
+}
+arch_initcall(parisc_idle_init);
+
/*
* Copy architecture-specific thread state
*/
mem_init_print_info(NULL);
#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */
printk("virtual kernel memory layout:\n"
- " vmalloc : 0x%p - 0x%p (%4ld MB)\n"
- " memory : 0x%p - 0x%p (%4ld MB)\n"
- " .init : 0x%p - 0x%p (%4ld kB)\n"
- " .data : 0x%p - 0x%p (%4ld kB)\n"
- " .text : 0x%p - 0x%p (%4ld kB)\n",
+ " vmalloc : 0x%px - 0x%px (%4ld MB)\n"
+ " memory : 0x%px - 0x%px (%4ld MB)\n"
+ " .init : 0x%px - 0x%px (%4ld kB)\n"
+ " .data : 0x%px - 0x%px (%4ld kB)\n"
+ " .text : 0x%px - 0x%px (%4ld kB)\n",
(void*)VMALLOC_START, (void*)VMALLOC_END,
(VMALLOC_END - VMALLOC_START) >> 20,
ori r3,r3,vector_offset@l; \
mtspr SPRN_IVOR##vector_number,r3;
+#define RFI_TO_KERNEL \
+ rfi
+
+#define RFI_TO_USER \
+ rfi
+
#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
*/
#define EX_R3 EX_DAR
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT \
+ RFI_FLUSH_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop
+
+#define RFI_TO_KERNEL \
+ rfid
+
+#define RFI_TO_USER \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_GUEST \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define HRFI_TO_KERNEL \
+ hrfid
+
+#define HRFI_TO_USER \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_GUEST \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
#ifdef CONFIG_RELOCATABLE
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
mtspr SPRN_##h##SRR0,r12; \
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
mtspr SPRN_##h##SRR1,r10; \
- h##rfid; \
+ h##RFI_TO_KERNEL; \
b . /* prevent speculative execution */
#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
__EXCEPTION_PROLOG_PSERIES_1(label, h)
mtspr SPRN_##h##SRR0,r12; \
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
mtspr SPRN_##h##SRR1,r10; \
- h##rfid; \
+ h##RFI_TO_KERNEL; \
b . /* prevent speculative execution */
#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \
FTR_ENTRY_OFFSET label##1b-label##3b; \
.popsection;
+#define RFI_FLUSH_FIXUP_SECTION \
+951: \
+ .pushsection __rfi_flush_fixup,"a"; \
+ .align 2; \
+952: \
+ FTR_ENTRY_OFFSET 951b-952b; \
+ .popsection;
+
+
#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
void apply_feature_fixups(void);
void setup_feature_keys(void);
#endif
#define H_GET_HCA_INFO 0x1B8
#define H_GET_PERF_COUNT 0x1BC
#define H_MANAGE_TRACE 0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
#define H_QUERY_INT_STATE 0x1E4
#define H_POLL_PENDING 0x1D8
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
/* >= 0 values are CPU number */
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
+
/* Flag values used in H_REGISTER_PROC_TBL hcall */
#define PROC_TABLE_OP_MASK 0x18
#define PROC_TABLE_DEREG 0x10
}
}
+struct h_cpu_char_result {
+ u64 character;
+ u64 behaviour;
+};
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
struct sibling_subcore_state *sibling_subcore_state;
#endif
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * rfi fallback flush must be in its own cacheline to prevent
+ * other paca data leaking into the L1d
+ */
+ u64 exrfi[EX_SIZE] __aligned(0x80);
+ void *rfi_flush_fallback_area;
+ u64 l1d_flush_congruence;
+ u64 l1d_flush_sets;
+#endif
};
extern void copy_mm_to_paca(struct mm_struct *mm);
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
}
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+ if (rc == H_SUCCESS) {
+ p->character = retbuf[0];
+ p->behaviour = retbuf[1];
+ }
+
+ return rc;
+}
+
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
static inline void pseries_little_endian_exceptions(void) {}
#endif /* CONFIG_PPC_PSERIES */
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+ L1D_FLUSH_NONE = 0x1,
+ L1D_FLUSH_FALLBACK = 0x2,
+ L1D_FLUSH_ORI = 0x4,
+ L1D_FLUSH_MTTRIG = 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_SETUP_H */
OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
OFFSET(PACA_IN_MCE, paca_struct, in_mce);
OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+ OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+ OFFSET(PACA_EXRFI, paca_struct, exrfi);
+ OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
+ OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+
#endif
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
#include <asm/tm.h>
#include <asm/ppc-opcode.h>
#include <asm/export.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
/*
* System calls.
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
+ ld r2,GPR2(r1)
+ ld r1,GPR1(r1)
+ mtlr r4
+ mtcr r5
+ mtspr SPRN_SRR0,r7
+ mtspr SPRN_SRR1,r8
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+
+ /* exit to kernel */
1: ld r2,GPR2(r1)
ld r1,GPR1(r1)
mtlr r4
mtcr r5
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
.Lsyscall_error:
mtmsrd r10, 1
mtspr SPRN_SRR0, r11
mtspr SPRN_SRR1, r12
-
- rfid
+ RFI_TO_USER
b . /* prevent speculative execution */
#endif
_ASM_NOKPROBE_SYMBOL(system_call_common);
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
-1:
+
mtspr SPRN_SRR1,r3
ld r2,_CCR(r1)
ld r3,GPR3(r1)
ld r4,GPR4(r1)
ld r1,GPR1(r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
- rfid
+1: mtspr SPRN_SRR1,r3
+
+ ld r2,_CCR(r1)
+ mtcrf 0xFF,r2
+ ld r2,_NIP(r1)
+ mtspr SPRN_SRR0,r2
+
+ ld r0,GPR0(r1)
+ ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r4,GPR4(r1)
+ ld r1,GPR1(r1)
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
#endif /* CONFIG_PPC_BOOK3E */
mtspr SPRN_SRR0,r5
mtspr SPRN_SRR1,r6
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
rtas_return_loc:
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
_ASM_NOKPROBE_SYMBOL(__enter_rtas)
_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
andc r11,r11,r12
mtsrr1 r11
- rfid
+ RFI_TO_KERNEL
#endif /* CONFIG_PPC_BOOK3E */
1: /* Return from OF */
LOAD_HANDLER(r12, machine_check_handle_early)
1: mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r11
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
2:
/* Stack overflow. Stay on emergency stack and panic.
li r3,MSR_ME
andc r10,r10,r3 /* Turn off MSR_ME */
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
2:
/*
*/
bl machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
- rfid
+ RFI_TO_USER_OR_KERNEL
9:
/* Deliver the machine check to host kernel in V mode. */
MACHINE_CHECK_HANDLER_WINDUP
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+ andi. r9,r11,MSR_PR // Check for exception from userspace
+ cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
+
/*
* Test MSR_RI before calling slb_allocate_realmode, because the
* MSR in r11 gets clobbered. However we still want to allocate
/* All done -- return from exception. */
+ bne cr4,1f /* returning to kernel */
+
.machine push
.machine "power4"
mtcrf 0x80,r9
+ mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
ld r11,PACA_EXSLB+EX_R11(r13)
ld r12,PACA_EXSLB+EX_R12(r13)
ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+1:
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
+ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
+ mtcrf 0x02,r9 /* I/D indication is in cr6 */
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+ RESTORE_CTR(r9, PACA_EXSLB)
+ RESTORE_PPR_PACA(PACA_EXSLB, r9)
+ mr r3,r12
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
+
2: std r3,PACA_EXSLB+EX_DAR(r13)
mr r3,r12
mfspr r11,SPRN_SRR0
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
8: std r3,PACA_EXSLB+EX_DAR(r13)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
EXC_COMMON_BEGIN(unrecov_slb)
mtspr SPRN_SRR0,r10 ; \
ld r10,PACAKMSR(r13) ; \
mtspr SPRN_SRR1,r10 ; \
- rfid ; \
+ RFI_TO_KERNEL ; \
b . ; /* prevent speculative execution */
#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
xori r12,r12,MSR_LE ; \
mtspr SPRN_SRR1,r12 ; \
mr r13,r9 ; \
- rfid ; /* return to userspace */ \
+ RFI_TO_USER ; /* return to userspace */ \
b . ; /* prevent speculative execution */
#else
#define SYSCALL_FASTENDIAN_TEST
mtcr r11
REST_GPR(11, r1)
ld r1,GPR1(r1)
- hrfid
+ HRFI_TO_USER_OR_KERNEL
1: mtcr r11
REST_GPR(11, r1)
ld r11,PACA_EXGEN+EX_R11(r13)
ld r12,PACA_EXGEN+EX_R12(r13)
ld r13,PACA_EXGEN+EX_R13(r13)
- HRFID
+ HRFI_TO_UNKNOWN
b .
#endif
ld r10,PACA_EXGEN+EX_R10(r13); \
ld r11,PACA_EXGEN+EX_R11(r13); \
/* returns to kernel where r13 must be set up, so don't restore it */ \
- ##_H##rfid; \
+ ##_H##RFI_TO_KERNEL; \
b .; \
MASKED_DEC_HANDLER(_H)
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ std r12,PACA_EXRFI+EX_R12(r13)
+ std r8,PACA_EXRFI+EX_R13(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SETS(r13)
+ ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+ addi r12,r12,8
+ mtctr r11
+ DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+1: li r8,0
+ .rept 8 /* 8-way set associative */
+ ldx r11,r10,r8
+ add r8,r8,r12
+ xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
+ add r8,r8,r11 // Add 0, this creates a dependency on the ldx
+ .endr
+ addi r10,r10,128 /* 128 byte cache line */
+ bdnz 1b
+
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r12,PACA_EXRFI+EX_R12(r13)
+ ld r8,PACA_EXRFI+EX_R13(r13)
+ GET_SCRATCH0(r13);
+ rfid
+
+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ std r12,PACA_EXRFI+EX_R12(r13)
+ std r8,PACA_EXRFI+EX_R13(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SETS(r13)
+ ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+ addi r12,r12,8
+ mtctr r11
+ DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+1: li r8,0
+ .rept 8 /* 8-way set associative */
+ ldx r11,r10,r8
+ add r8,r8,r12
+ xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
+ add r8,r8,r11 // Add 0, this creates a dependency on the ldx
+ .endr
+ addi r10,r10,128 /* 128 byte cache line */
+ bdnz 1b
+
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r12,PACA_EXRFI+EX_R12(r13)
+ ld r8,PACA_EXRFI+EX_R13(r13)
+ GET_SCRATCH0(r13);
+ hrfid
+
/*
* Real mode exceptions actually use this too, but alternate
* instruction code patches (which end up in the common .text area)
addi r13, r13, 4
mtspr SPRN_SRR0, r13
GET_SCRATCH0(r13)
- rfid
+ RFI_TO_KERNEL
b .
TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
addi r13, r13, 4
mtspr SPRN_HSRR0, r13
GET_SCRATCH0(r13)
- hrfid
+ HRFI_TO_KERNEL
b .
#endif
return 0;
}
early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+bool rfi_flush;
+
+static int __init handle_no_rfi_flush(char *p)
+{
+ pr_info("rfi-flush: disabled on command line.");
+ no_rfi_flush = true;
+ return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+ pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+ handle_no_rfi_flush(NULL);
+ return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+ /*
+ * We don't need to do the flush explicitly, just enter+exit kernel is
+ * sufficient, the RFI exit handlers will do the right thing.
+ */
+}
+
+void rfi_flush_enable(bool enable)
+{
+ if (rfi_flush == enable)
+ return;
+
+ if (enable) {
+ do_rfi_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else
+ do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+ rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+ u64 l1d_size, limit;
+ int cpu;
+
+ l1d_size = ppc64_caches.l1d.size;
+ limit = min(safe_stack_limit(), ppc64_rma_size);
+
+ /*
+ * Align to L1d size, and size it at 2x L1d size, to catch possible
+ * hardware prefetch runoff. We don't have a recipe for load patterns to
+ * reliably avoid the prefetcher.
+ */
+ l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+ memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+ for_each_possible_cpu(cpu) {
+ /*
+ * The fallback flush is currently coded for 8-way
+ * associativity. Different associativity is possible, but it
+ * will be treated as 8-way and may not evict the lines as
+ * effectively.
+ *
+ * 128 byte lines are mandatory.
+ */
+ u64 c = l1d_size / 8;
+
+ paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+ paca[cpu].l1d_flush_congruence = c;
+ paca[cpu].l1d_flush_sets = c / 128;
+ }
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+ if (types & L1D_FLUSH_FALLBACK) {
+ pr_info("rfi-flush: Using fallback displacement flush\n");
+ init_fallback_flush();
+ }
+
+ if (types & L1D_FLUSH_ORI)
+ pr_info("rfi-flush: Using ori type flush\n");
+
+ if (types & L1D_FLUSH_MTTRIG)
+ pr_info("rfi-flush: Using mttrig type flush\n");
+
+ enabled_flush_types = types;
+
+ if (!no_rfi_flush)
+ rfi_flush_enable(enable);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
/* Read-only data */
RO_DATA(PAGE_SIZE)
+#ifdef CONFIG_PPC64
+ . = ALIGN(8);
+ __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+ __start___rfi_flush_fixup = .;
+ *(__rfi_flush_fixup)
+ __stop___rfi_flush_fixup = .;
+ }
+#endif
+
EXCEPTION_TABLE(0)
NOTES :kernel :notes
gpte->may_read = true;
gpte->may_write = true;
gpte->page_size = MMU_PAGE_4K;
+ gpte->wimg = HPTE_R_M;
return 0;
}
u32 order;
/* These fields protected by kvm->lock */
+
+ /* Possible values and their usage:
+ * <0 an error occurred during allocation,
+ * -EBUSY allocation is in the progress,
+ * 0 allocation made successfuly.
+ */
int error;
- bool prepare_done;
- /* Private to the work thread, until prepare_done is true,
- * then protected by kvm->resize_hpt_sem */
+ /* Private to the work thread, until error != -EBUSY,
+ * then protected by kvm->lock.
+ */
struct kvm_hpt_info hpt;
};
* Reset all the reverse-mapping chains for all memslots
*/
kvmppc_rmap_reset(kvm);
- /* Ensure that each vcpu will flush its TLB on next entry. */
- cpumask_setall(&kvm->arch.need_tlb_flush);
err = 0;
goto out;
}
kvmppc_set_hpt(kvm, &info);
out:
+ if (err == 0)
+ /* Ensure that each vcpu will flush its TLB on next entry. */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+
mutex_unlock(&kvm->lock);
return err;
}
static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
{
- BUG_ON(kvm->arch.resize_hpt != resize);
+ if (WARN_ON(!mutex_is_locked(&kvm->lock)))
+ return;
if (!resize)
return;
- if (resize->hpt.virt)
- kvmppc_free_hpt(&resize->hpt);
+ if (resize->error != -EBUSY) {
+ if (resize->hpt.virt)
+ kvmppc_free_hpt(&resize->hpt);
+ kfree(resize);
+ }
- kvm->arch.resize_hpt = NULL;
- kfree(resize);
+ if (kvm->arch.resize_hpt == resize)
+ kvm->arch.resize_hpt = NULL;
}
static void resize_hpt_prepare_work(struct work_struct *work)
struct kvm_resize_hpt,
work);
struct kvm *kvm = resize->kvm;
- int err;
+ int err = 0;
- resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
- resize->order);
-
- err = resize_hpt_allocate(resize);
+ if (WARN_ON(resize->error != -EBUSY))
+ return;
mutex_lock(&kvm->lock);
+ /* Request is still current? */
+ if (kvm->arch.resize_hpt == resize) {
+ /* We may request large allocations here:
+ * do not sleep with kvm->lock held for a while.
+ */
+ mutex_unlock(&kvm->lock);
+
+ resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+ resize->order);
+
+ err = resize_hpt_allocate(resize);
+
+ /* We have strict assumption about -EBUSY
+ * when preparing for HPT resize.
+ */
+ if (WARN_ON(err == -EBUSY))
+ err = -EINPROGRESS;
+
+ mutex_lock(&kvm->lock);
+ /* It is possible that kvm->arch.resize_hpt != resize
+ * after we grab kvm->lock again.
+ */
+ }
+
resize->error = err;
- resize->prepare_done = true;
+
+ if (kvm->arch.resize_hpt != resize)
+ resize_hpt_release(kvm, resize);
mutex_unlock(&kvm->lock);
}
if (resize) {
if (resize->order == shift) {
- /* Suitable resize in progress */
- if (resize->prepare_done) {
- ret = resize->error;
- if (ret != 0)
- resize_hpt_release(kvm, resize);
- } else {
+ /* Suitable resize in progress? */
+ ret = resize->error;
+ if (ret == -EBUSY)
ret = 100; /* estimated time in ms */
- }
+ else if (ret)
+ resize_hpt_release(kvm, resize);
goto out;
}
ret = -ENOMEM;
goto out;
}
+
+ resize->error = -EBUSY;
resize->order = shift;
resize->kvm = kvm;
INIT_WORK(&resize->work, resize_hpt_prepare_work);
if (!resize || (resize->order != shift))
goto out;
- ret = -EBUSY;
- if (!resize->prepare_done)
- goto out;
-
ret = resize->error;
- if (ret != 0)
+ if (ret)
goto out;
ret = resize_hpt_rehash(resize);
- if (ret != 0)
+ if (ret)
goto out;
resize_hpt_pivot(resize);
mtmsrd r0,1 /* clear RI in MSR */
mtsrr0 r5
mtsrr1 r6
- RFI
+ RFI_TO_KERNEL
kvmppc_call_hv_entry:
BEGIN_FTR_SECTION
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- RFI
+ RFI_TO_KERNEL
/* Virtual-mode return */
.Lvirt_return:
ld r0, VCPU_GPR(R0)(r4)
ld r4, VCPU_GPR(R4)(r4)
-
- hrfid
+ HRFI_TO_GUEST
b .
secondary_too_late:
ld r4, PACAKMSR(r13)
mtspr SPRN_SRR0, r3
mtspr SPRN_SRR1, r4
- rfid
+ RFI_TO_KERNEL
9: addi r3, r1, STACK_FRAME_OVERHEAD
bl kvmppc_bad_interrupt
b 9b
#define MSR_USER32 MSR_USER
#define MSR_USER64 MSR_USER
#define HW_PAGE_SIZE PAGE_SIZE
+#define HPTE_R_M _PAGE_COHERENT
#endif
static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
pte.eaddr = eaddr;
pte.vpage = eaddr >> 12;
pte.page_size = MMU_PAGE_64K;
+ pte.wimg = HPTE_R_M;
}
switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
#define FUNC(name) name
+#define RFI_TO_KERNEL RFI
+#define RFI_TO_GUEST RFI
+
.macro INTERRUPT_TRAMPOLINE intno
.global kvmppc_trampoline_\intno
GET_SCRATCH0(r13)
/* And get back into the code */
- RFI
+ RFI_TO_KERNEL
#endif
/*
ori r5, r5, MSR_EE
mtsrr0 r7
mtsrr1 r6
- RFI
+ RFI_TO_KERNEL
#include "book3s_segment.S"
PPC_LL r9, SVCPU_R9(r3)
PPC_LL r3, (SVCPU_R3)(r3)
- RFI
+ RFI_TO_GUEST
kvmppc_handler_trampoline_enter_end:
cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
beqa BOOK3S_INTERRUPT_DOORBELL
- RFI
+ RFI_TO_KERNEL
kvmppc_handler_trampoline_exit_end:
}
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+ unsigned int instrs[3], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___rfi_flush_fixup),
+ end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+
+ if (types & L1D_FLUSH_FALLBACK)
+ /* b .+16 to fallback flush */
+ instrs[0] = 0x48000010;
+
+ i = 0;
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction(dest, instrs[0]);
+ patch_instruction(dest + 1, instrs[1]);
+ patch_instruction(dest + 2, instrs[2]);
+ }
+
+ printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
long *start, *end;
return __bad_area(regs, address, SEGV_MAPERR);
}
+static noinline int bad_access(struct pt_regs *regs, unsigned long address)
+{
+ return __bad_area(regs, address, SEGV_ACCERR);
+}
+
static int do_sigbus(struct pt_regs *regs, unsigned long address,
unsigned int fault)
{
good_area:
if (unlikely(access_error(is_write, is_exec, vma)))
- return bad_area(regs, address);
+ return bad_access(regs, address);
/*
* If for any reason at all we couldn't handle the fault,
#include <asm/kexec.h>
#include <asm/smp.h>
#include <asm/tm.h>
+#include <asm/setup.h>
#include "powernv.h"
+static void pnv_setup_rfi_flush(void)
+{
+ struct device_node *np, *fw_features;
+ enum l1d_flush_type type;
+ int enable;
+
+ /* Default to fallback in case fw-features are not available */
+ type = L1D_FLUSH_FALLBACK;
+ enable = 1;
+
+ np = of_find_node_by_name(NULL, "ibm,opal");
+ fw_features = of_get_child_by_name(np, "fw-features");
+ of_node_put(np);
+
+ if (fw_features) {
+ np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
+ if (np && of_property_read_bool(np, "enabled"))
+ type = L1D_FLUSH_MTTRIG;
+
+ of_node_put(np);
+
+ np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
+ if (np && of_property_read_bool(np, "enabled"))
+ type = L1D_FLUSH_ORI;
+
+ of_node_put(np);
+
+ /* Enable unless firmware says NOT to */
+ enable = 2;
+ np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
+ if (np && of_property_read_bool(np, "disabled"))
+ enable--;
+
+ of_node_put(np);
+
+ np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
+ if (np && of_property_read_bool(np, "disabled"))
+ enable--;
+
+ of_node_put(np);
+ of_node_put(fw_features);
+ }
+
+ setup_rfi_flush(type, enable > 0);
+}
+
static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+ pnv_setup_rfi_flush();
+
/* Initialize SMP */
pnv_smp_init();
static CLASS_ATTR_RW(dlpar);
-static int __init pseries_dlpar_init(void)
+int __init dlpar_workqueue_init(void)
{
+ if (pseries_hp_wq)
+ return 0;
+
pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
- WQ_UNBOUND, 1);
+ WQ_UNBOUND, 1);
+
+ return pseries_hp_wq ? 0 : -ENOMEM;
+}
+
+static int __init dlpar_sysfs_init(void)
+{
+ int rc;
+
+ rc = dlpar_workqueue_init();
+ if (rc)
+ return rc;
+
return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
}
-machine_device_initcall(pseries, pseries_dlpar_init);
+machine_device_initcall(pseries, dlpar_sysfs_init);
return CMO_PageSize;
}
+int dlpar_workqueue_init(void);
+
#endif /* _PSERIES_PSERIES_H */
/* Hotplug Events */
np = of_find_node_by_path("/event-sources/hot-plug-events");
if (np != NULL) {
- request_event_sources_irqs(np, ras_hotplug_interrupt,
+ if (dlpar_workqueue_init() == 0)
+ request_event_sources_irqs(np, ras_hotplug_interrupt,
"RAS_HOTPLUG");
of_node_put(np);
}
of_pci_check_probe_only();
}
+static void pseries_setup_rfi_flush(void)
+{
+ struct h_cpu_char_result result;
+ enum l1d_flush_type types;
+ bool enable;
+ long rc;
+
+ /* Enable by default */
+ enable = true;
+
+ rc = plpar_get_cpu_characteristics(&result);
+ if (rc == H_SUCCESS) {
+ types = L1D_FLUSH_NONE;
+
+ if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+ types |= L1D_FLUSH_MTTRIG;
+ if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+ types |= L1D_FLUSH_ORI;
+
+ /* Use fallback if nothing set in hcall */
+ if (types == L1D_FLUSH_NONE)
+ types = L1D_FLUSH_FALLBACK;
+
+ if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+ enable = false;
+ } else {
+ /* Default to fallback if case hcall is not available */
+ types = L1D_FLUSH_FALLBACK;
+ }
+
+ setup_rfi_flush(types, enable);
+}
+
static void __init pSeries_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
fwnmi_init();
+ pseries_setup_rfi_flush();
+
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
+CONFIG_SMP=y
+CONFIG_PCI=y
+CONFIG_PCIE_XILINX=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NETLINK_DIAG=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_MACB=y
+CONFIG_E1000E=y
+CONFIG_R8169=y
+CONFIG_MICROSEMI_PHY=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_PLATFORM=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_UAS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_RAS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
+# CONFIG_RCU_TRACE is not set
+CONFIG_CRYPTO_USER_API_HASH=y
#include <linux/const.h>
/* Status register flags */
-#define SR_IE _AC(0x00000002, UL) /* Interrupt Enable */
-#define SR_PIE _AC(0x00000020, UL) /* Previous IE */
-#define SR_PS _AC(0x00000100, UL) /* Previously Supervisor */
-#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */
+#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_SPIE _AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */
#define SR_FS _AC(0x00006000, UL) /* Floating-point Status */
#define SR_FS_OFF _AC(0x00000000, UL)
#include <linux/types.h>
-#ifdef CONFIG_MMU
-
extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
/*
extern void iounmap(volatile void __iomem *addr);
-#endif /* CONFIG_MMU */
-
/* Generic IO read/write. These perform native-endian accesses. */
#define __raw_writeb __raw_writeb
static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
/* unconditionally enable interrupts */
static inline void arch_local_irq_enable(void)
{
- csr_set(sstatus, SR_IE);
+ csr_set(sstatus, SR_SIE);
}
/* unconditionally disable interrupts */
static inline void arch_local_irq_disable(void)
{
- csr_clear(sstatus, SR_IE);
+ csr_clear(sstatus, SR_SIE);
}
/* get status and disable interrupts */
static inline unsigned long arch_local_irq_save(void)
{
- return csr_read_clear(sstatus, SR_IE);
+ return csr_read_clear(sstatus, SR_SIE);
}
/* test flags */
static inline int arch_irqs_disabled_flags(unsigned long flags)
{
- return !(flags & SR_IE);
+ return !(flags & SR_SIE);
}
/* test hardware interrupt enable bit */
/* set interrupt enabled status */
static inline void arch_local_irq_restore(unsigned long flags)
{
- csr_set(sstatus, flags & SR_IE);
+ csr_set(sstatus, flags & SR_SIE);
}
#endif /* _ASM_RISCV_IRQFLAGS_H */
#ifndef __ASSEMBLY__
-#ifdef CONFIG_MMU
-
/* Page Upper Directory not used in RISC-V */
#include <asm-generic/pgtable-nopud.h>
#include <asm/page.h>
/* No page table caches to initialize */
}
-#endif /* CONFIG_MMU */
-
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END (PAGE_OFFSET - 1)
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
#define REG_FMT "%08lx"
#endif
-#define user_mode(regs) (((regs)->sstatus & SR_PS) == 0)
+#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
/* Helpers for working with the instruction pointer */
#ifndef _ASM_RISCV_TLBFLUSH_H
#define _ASM_RISCV_TLBFLUSH_H
-#ifdef CONFIG_MMU
-
#include <linux/mm_types.h>
/*
flush_tlb_all();
}
-#endif /* CONFIG_MMU */
-
#endif /* _ASM_RISCV_TLBFLUSH_H */
* call.
*/
-#ifdef CONFIG_MMU
#define __get_user_asm(insn, x, ptr, err) \
do { \
uintptr_t __tmp; \
__disable_user_access(); \
(x) = __x; \
} while (0)
-#endif /* CONFIG_MMU */
#ifdef CONFIG_64BIT
#define __get_user_8(x, ptr, err) \
__get_user_asm("ld", x, ptr, err)
#else /* !CONFIG_64BIT */
-#ifdef CONFIG_MMU
#define __get_user_8(x, ptr, err) \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
(x) = (__typeof__(x))((__typeof__((x)-(x)))( \
(((u64)__hi << 32) | __lo))); \
} while (0)
-#endif /* CONFIG_MMU */
#endif /* CONFIG_64BIT */
((x) = 0, -EFAULT); \
})
-
-#ifdef CONFIG_MMU
#define __put_user_asm(insn, x, ptr, err) \
do { \
uintptr_t __tmp; \
: "rJ" (__x), "i" (-EFAULT)); \
__disable_user_access(); \
} while (0)
-#endif /* CONFIG_MMU */
-
#ifdef CONFIG_64BIT
#define __put_user_8(x, ptr, err) \
__put_user_asm("sd", x, ptr, err)
#else /* !CONFIG_64BIT */
-#ifdef CONFIG_MMU
#define __put_user_8(x, ptr, err) \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
: "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \
__disable_user_access(); \
} while (0)
-#endif /* CONFIG_MMU */
#endif /* CONFIG_64BIT */
* will set "err" to -EFAULT, while successful accesses return the previous
* value.
*/
-#ifdef CONFIG_MMU
#define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \
({ \
__typeof__(ptr) __ptr = (ptr); \
(err) = __err; \
__ret; \
})
-#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_UACCESS_H */
#define __ARCH_HAVE_MMU
#define __ARCH_WANT_SYS_CLONE
#include <uapi/asm/unistd.h>
+#include <uapi/asm/syscalls.h>
+++ /dev/null
-/*
- * Copyright (C) 2017 SiFive
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
-#define _ASM_RISCV_VDSO_SYSCALLS_H
-
-#ifdef CONFIG_SMP
-
-/* These syscalls are only used by the vDSO and are not in the uapi. */
-#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
-__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
-
-#endif
-
-#endif /* _ASM_RISCV_VDSO_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2017 SiFive
+ */
+
+#ifndef _ASM__UAPI__SYSCALLS_H
+#define _ASM__UAPI__SYSCALLS_H
+
+/*
+ * Allows the instruction cache to be flushed from userspace. Despite RISC-V
+ * having a direct 'fence.i' instruction available to userspace (which we
+ * can't trap!), that's not actually viable when running on Linux because the
+ * kernel might schedule a process on another hart. There is no way for
+ * userspace to handle this without invoking the kernel (as it doesn't know the
+ * thread->hart mappings), so we've defined a RISC-V specific system call to
+ * flush the instruction cache.
+ *
+ * __NR_riscv_flush_icache is defined to flush the instruction cache over an
+ * address range, with the flush applying to either all threads or just the
+ * caller. We don't currently do anything with the address range, that's just
+ * in there for forwards compatibility.
+ */
+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
+__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
+
+#endif
addi s2, s2, 0x4
REG_S s2, PT_SEPC(sp)
/* System calls run with interrupts enabled */
- csrs sstatus, SR_IE
+ csrs sstatus, SR_SIE
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_TRACE
ret_from_exception:
REG_L s0, PT_SSTATUS(sp)
- csrc sstatus, SR_IE
- andi s0, s0, SR_PS
+ csrc sstatus, SR_SIE
+ andi s0, s0, SR_SPP
bnez s0, restore_all
resume_userspace:
bnez s1, work_resched
work_notifysig:
/* Handle pending signals and notify-resume requests */
- csrs sstatus, SR_IE /* Enable interrupts for do_notify_resume() */
+ csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */
move a0, sp /* pt_regs */
move a1, s0 /* current_thread_info->flags */
tail do_notify_resume
void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
- regs->sstatus = SR_PIE /* User mode, irqs on */ | SR_FS_INITIAL;
+ regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
regs->sepc = pc;
regs->sp = sp;
set_fs(USER_DS);
const register unsigned long gp __asm__ ("gp");
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gp = gp;
- childregs->sstatus = SR_PS | SR_PIE; /* Supervisor, irqs on */
+ childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
p->thread.ra = (unsigned long)ret_from_kernel_thread;
p->thread.s[0] = usp; /* fn */
void *sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = sys_ni_syscall,
#include <asm/unistd.h>
-#include <asm/vdso-syscalls.h>
};
#include <linux/linkage.h>
#include <asm/unistd.h>
-#include <asm/vdso-syscalls.h>
.text
/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
goto vmalloc_fault;
/* Enable interrupts if they were enabled in the parent context. */
- if (likely(regs->sstatus & SR_PIE))
+ if (likely(regs->sstatus & SR_SPIE))
local_irq_enable();
/*
if (kvm->arch.use_cmma) {
/*
- * Get the last slot. They should be sorted by base_gfn, so the
- * last slot is also the one at the end of the address space.
- * We have verified above that at least one slot is present.
+ * Get the first slot. They are reverse sorted by base_gfn, so
+ * the first slot is also the one at the end of the address
+ * space. We have verified above that at least one slot is
+ * present.
*/
- ms = slots->memslots + slots->used_slots - 1;
+ ms = slots->memslots;
/* round up so we only use full longs */
ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
/* allocate enough bytes to store all the bits */
cbrlo[entries] = gfn << PAGE_SHIFT;
}
- if (orc) {
+ if (orc && gfn < ms->bitmap_size) {
/* increment only if we are really flipping the bit to 1 */
if (!test_and_set_bit(gfn, ms->pgste_bitmap))
atomic64_inc(&ms->dirty_pages);
void disable_sacf_uaccess(mm_segment_t old_fs)
{
+ current->thread.mm_segment = old_fs;
if (old_fs == USER_DS && test_facility(27)) {
__ctl_load(S390_lowcore.user_asce, 1, 1);
clear_cpu_flag(CIF_ASCE_PRIMARY);
}
- current->thread.mm_segment = old_fs;
}
EXPORT_SYMBOL(disable_sacf_uaccess);
static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
size_t size, int flags)
{
+ unsigned long irqflags;
+ int ret;
+
/*
* With zdev->tlb_refresh == 0, rpcit is not required to establish new
* translations when previously invalid translation-table entries are
return 0;
}
- return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
- PAGE_ALIGN(size));
+ ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
+ PAGE_ALIGN(size));
+ if (ret == -ENOMEM && !s390_iommu_strict) {
+ /* enable the hypervisor to free some resources */
+ if (zpci_refresh_global(zdev))
+ goto out;
+
+ spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
+ bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+ zdev->lazy_bitmap, zdev->iommu_pages);
+ bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+ spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
+ ret = 0;
+ }
+out:
+ return ret;
}
static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
if (cc)
zpci_err_insn(cc, status, addr, range);
+ if (cc == 1 && (status == 4 || status == 16))
+ return -ENOMEM;
+
return (cc) ? -EIO : 0;
}
*/
#include <linux/init.h>
#include <linux/platform_device.h>
+#include <linux/sh_eth.h>
#include <mach-se/mach/se.h>
#include <mach-se/mach/mrshpc.h>
#include <asm/machvec.h>
#if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\
defined(CONFIG_CPU_SUBTYPE_SH7712)
/* SH771X Ethernet driver */
+static struct sh_eth_plat_data sh_eth_plat = {
+ .phy = PHY_ID,
+ .phy_interface = PHY_INTERFACE_MODE_MII,
+};
+
static struct resource sh_eth0_resources[] = {
[0] = {
.start = SH_ETH0_BASE,
- .end = SH_ETH0_BASE + 0x1B8,
+ .end = SH_ETH0_BASE + 0x1B8 - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
+ .start = SH_TSU_BASE,
+ .end = SH_TSU_BASE + 0x200 - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ [2] = {
.start = SH_ETH0_IRQ,
.end = SH_ETH0_IRQ,
.flags = IORESOURCE_IRQ,
.name = "sh771x-ether",
.id = 0,
.dev = {
- .platform_data = PHY_ID,
+ .platform_data = &sh_eth_plat,
},
.num_resources = ARRAY_SIZE(sh_eth0_resources),
.resource = sh_eth0_resources,
static struct resource sh_eth1_resources[] = {
[0] = {
.start = SH_ETH1_BASE,
- .end = SH_ETH1_BASE + 0x1B8,
+ .end = SH_ETH1_BASE + 0x1B8 - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
+ .start = SH_TSU_BASE,
+ .end = SH_TSU_BASE + 0x200 - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ [2] = {
.start = SH_ETH1_IRQ,
.end = SH_ETH1_IRQ,
.flags = IORESOURCE_IRQ,
.name = "sh771x-ether",
.id = 1,
.dev = {
- .platform_data = PHY_ID,
+ .platform_data = &sh_eth_plat,
},
.num_resources = ARRAY_SIZE(sh_eth1_resources),
.resource = sh_eth1_resources,
/* Base address */
#define SH_ETH0_BASE 0xA7000000
#define SH_ETH1_BASE 0xA7000400
+#define SH_TSU_BASE 0xA7000800
/* PHY ID */
#if defined(CONFIG_CPU_SUBTYPE_SH7710)
# define PHY_ID 0x00
/* if that doesn't kill us, halt */
panic("Oops failed to kill thread");
}
-EXPORT_SYMBOL(abort);
void __init trap_init(void)
{
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_PMEM_API if X86_64
- # Causing hangs/crashes, see the commit that added this change for details.
select ARCH_HAS_REFCOUNT
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_SET_MEMORY
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
def_bool y
depends on X86_GOLDFISH
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ default y
+ help
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+ Without compiler support, at least indirect branches in assembler
+ code are eliminated. Since this includes the syscall entry path,
+ it is not entirely pointless.
+
config INTEL_RDT
bool "Intel Resource Director Technology support"
default n
#
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ endif
+endif
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
#include <linux/linkage.h>
#include <asm/inst.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
/*
* The following macros are used to move an (un)aligned 16 byte value to/from
pxor INC, STATE4
movdqu IV, 0x30(OUTP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x00(OUTP), INC
pxor INC, STATE1
_aesni_gf128mul_x_ble()
movups IV, (IVP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x40(OUTP), INC
pxor INC, STATE1
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 16), %rsp;
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
vpxor 14 * 32(%rax), %ymm15, %ymm14;
vpxor 15 * 32(%rax), %ymm15, %ymm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 32), %rsp;
#include <asm/inst.h>
#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
movzxw (bufp, %rax, 2), len
lea crc_array(%rip), bufp
lea (bufp, len, 1), bufp
- jmp *bufp
+ JMP_NOSPEC bufp
################################################################
## 2a) PROCESS FULL BLOCKS:
* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
* halves:
*/
-#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
-#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
+#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
+#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
+#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
+#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
.macro SET_NOFLUSH_BIT reg:req
bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
.macro ADJUST_KERNEL_CR3 reg:req
ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
- andq $(~PTI_SWITCH_MASK), \reg
+ andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
.endm
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
/* Flush needed, clear the bit */
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
movq \scratch_reg2, \scratch_reg
- jmp .Lwrcr3_\@
+ jmp .Lwrcr3_pcid_\@
.Lnoflush_\@:
movq \scratch_reg2, \scratch_reg
SET_NOFLUSH_BIT \scratch_reg
+.Lwrcr3_pcid_\@:
+ /* Flip the ASID to the user version */
+ orq $(PTI_USER_PCID_MASK), \scratch_reg
+
.Lwrcr3_\@:
- /* Flip the PGD and ASID to the user version */
- orq $(PTI_SWITCH_MASK), \scratch_reg
+ /* Flip the PGD to the user version */
+ orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
mov \scratch_reg, %cr3
.Lend_\@:
.endm
movq %cr3, \scratch_reg
movq \scratch_reg, \save_reg
/*
- * Is the "switch mask" all zero? That means that both of
- * these are zero:
- *
- * 1. The user/kernel PCID bit, and
- * 2. The user/kernel "bit" that points CR3 to the
- * bottom half of the 8k PGD
- *
- * That indicates a kernel CR3 value, not a user CR3.
+ * Test the user pagetable bit. If set, then the user page tables
+ * are active. If clear CR3 already has the kernel page table
+ * active.
*/
- testq $(PTI_SWITCH_MASK), \scratch_reg
- jz .Ldone_\@
+ bt $PTI_USER_PGTABLE_BIT, \scratch_reg
+ jnc .Ldone_\@
ADJUST_KERNEL_CR3 \scratch_reg
movq \scratch_reg, %cr3
* KERNEL pages can always resume with NOFLUSH as we do
* explicit flushes.
*/
- bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
+ bt $PTI_USER_PGTABLE_BIT, \save_reg
jnc .Lnoflush_\@
/*
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
.section .entry.text, "ax"
/* kernel thread */
1: movl %edi, %eax
- call *%ebx
+ CALL_NOSPEC %ebx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
movl %ecx, %es
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- call *%edi
+ CALL_NOSPEC %edi
jmp ret_from_exception
END(common_exception)
#include <asm/pgtable_types.h>
#include <asm/export.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#include <linux/err.h>
#include "calling.h"
*/
pushq %rdi
movq $entry_SYSCALL_64_stage2, %rdi
- jmp *%rdi
+ JMP_NOSPEC %rdi
END(entry_SYSCALL_64_trampoline)
.popsection
* It might end up jumping to the slow path. If it jumps, RAX
* and all argument registers are clobbered.
*/
+#ifdef CONFIG_RETPOLINE
+ movq sys_call_table(, %rax, 8), %rax
+ call __x86_indirect_thunk_rax
+#else
call *sys_call_table(, %rax, 8)
+#endif
.Lentry_SYSCALL_64_after_fastpath_call:
movq %rax, RAX(%rsp)
jmp entry_SYSCALL64_slow_path
1:
- jmp *%rax /* Called from C */
+ JMP_NOSPEC %rax /* Called from C */
END(stub_ptregs_64)
.macro ptregs_stub func
1:
/* kernel thread */
movq %r12, %rdi
- call *%rbx
+ CALL_NOSPEC %rbx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
/* Interrupts are off on entry. */
swapgs
- /* Stash user ESP and switch to the kernel stack. */
+ /* Stash user ESP */
movl %esp, %r8d
+
+ /* Use %rsp as scratch reg. User ESP is stashed in r8 */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+ /* Switch to the kernel stack */
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/* Construct struct pt_regs on stack */
pushq $0 /* pt_regs->r14 = 0 */
pushq $0 /* pt_regs->r15 = 0 */
- /*
- * We just saved %rdi so it is safe to clobber. It is not
- * preserved during the C calls inside TRACE_IRQS_OFF anyway.
- */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-
/*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
return -ENODEV;
+ if (boot_cpu_has(X86_FEATURE_PTI)) {
+ /*
+ * BTS hardware writes through a virtual memory map we must
+ * either use the kernel physical map, or the user mapping of
+ * the AUX buffer.
+ *
+ * However, since this driver supports per-CPU and per-task inherit
+ * we cannot use the user mapping since it will not be availble
+ * if we're not running the owning process.
+ *
+ * With PTI we can't use the kernal map either, because its not
+ * there when we run userspace.
+ *
+ * For now, disable this driver when using PTI.
+ */
+ return -ENODEV;
+ }
+
bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
PERF_PMU_CAP_EXCLUSIVE;
bts_pmu.task_ctx_nr = perf_sw_context;
#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
+#include <asm/tlbflush.h>
#include <asm/insn.h>
#include "../perf_event.h"
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
+ unsigned long start = (unsigned long)cea;
phys_addr_t pa;
size_t msz = 0;
pa = virt_to_phys(addr);
+
+ preempt_disable();
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, pa, prot);
+
+ /*
+ * This is a cross-CPU update of the cpu_entry_area, we must shoot down
+ * all TLB entries for it.
+ */
+ flush_tlb_kernel_range(start, start + size);
+ preempt_enable();
}
static void ds_clear_cea(void *cea, size_t size)
{
+ unsigned long start = (unsigned long)cea;
size_t msz = 0;
+ preempt_disable();
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, 0, PAGE_NONE);
+
+ flush_tlb_kernel_range(start, start + size);
+ preempt_enable();
}
static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
- ".popsection"
+ ".popsection\n"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR_2(oldinstr, 1, 2) \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
- ".popsection"
+ ".popsection\n"
/*
* Alternative instructions for different CPU types or capabilities.
#include <asm/pgtable.h>
#include <asm/special_insns.h>
#include <asm/preempt.h>
+#include <asm/asm.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
#endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+INDIRECT_THUNK(sp)
+#endif /* CONFIG_RETPOLINE */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#endif /* _ASM_X86_CPUFEATURES_H */
#include <linux/nmi.h>
#include <asm/io.h>
#include <asm/hyperv.h>
+#include <asm/nospec-branch.h>
/*
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
return U64_MAX;
__asm__ __volatile__("mov %4, %%r8\n"
- "call *%5"
+ CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
- : "r" (output_address), "m" (hv_hypercall_pg)
+ : "r" (output_address),
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory", "r8", "r9", "r10", "r11");
#else
u32 input_address_hi = upper_32_bits(input_address);
if (!hv_hypercall_pg)
return U64_MAX;
- __asm__ __volatile__("call *%7"
+ __asm__ __volatile__(CALL_NOSPEC
: "=A" (hv_status),
"+c" (input_address_lo), ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input_address_hi),
"D"(output_address_hi), "S"(output_address_lo),
- "m" (hv_hypercall_pg)
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory");
#endif /* !x86_64 */
return hv_status;
#ifdef CONFIG_X86_64
{
- __asm__ __volatile__("call *%4"
+ __asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
- : "m" (hv_hypercall_pg)
+ : THUNK_TARGET(hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
}
#else
u32 input1_hi = upper_32_bits(input1);
u32 input1_lo = lower_32_bits(input1);
- __asm__ __volatile__ ("call *%5"
+ __asm__ __volatile__ (CALL_NOSPEC
: "=A"(hv_status),
"+c"(input1_lo),
ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input1_hi),
- "m" (hv_hypercall_pg)
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "edi", "esi");
}
#endif
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
+#define MSR_F10H_DECFG 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+ mov $(nr/2), reg; \
+771: \
+ call 772f; \
+773: /* speculation trap */ \
+ pause; \
+ jmp 773b; \
+772: \
+ call 774f; \
+775: /* speculation trap */ \
+ pause; \
+ jmp 775b; \
+774: \
+ dec reg; \
+ jnz 771b; \
+ add $(BITS_PER_LONG/8) * nr, sp;
+
+#ifdef __ASSEMBLY__
+
+/*
+ * This should be used immediately before a retpoline alternative. It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+.macro ANNOTATE_NOSPEC_ALTERNATIVE
+ .Lannotate_\@:
+ .pushsection .discard.nospec
+ .long .Lannotate_\@ - .
+ .popsection
+.endm
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ pause
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov \reg, (%_ASM_SP)
+ ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+ jmp .Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+ RETPOLINE_JMP \reg
+.Ldo_call_\@:
+ call .Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE_2 __stringify(jmp *\reg), \
+ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ jmp *\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE_2 __stringify(call *\reg), \
+ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ call *\reg
+#endif
+.endm
+
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+ \ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define ANNOTATE_NOSPEC_ALTERNATIVE \
+ "999:\n\t" \
+ ".pushsection .discard.nospec\n\t" \
+ ".long 999b - .\n\t" \
+ ".popsection\n\t"
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC \
+ ANNOTATE_NOSPEC_ALTERNATIVE \
+ ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
+ " jmp 904f;\n" \
+ " .align 16\n" \
+ "901: call 903f;\n" \
+ "902: pause;\n" \
+ " jmp 902b;\n" \
+ " .align 16\n" \
+ "903: addl $4, %%esp;\n" \
+ " pushl %[thunk_target];\n" \
+ " ret;\n" \
+ " .align 16\n" \
+ "904: call 901b;\n", \
+ X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+ SPECTRE_V2_NONE,
+ SPECTRE_V2_RETPOLINE_MINIMAL,
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+ SPECTRE_V2_RETPOLINE_GENERIC,
+ SPECTRE_V2_RETPOLINE_AMD,
+ SPECTRE_V2_IBRS,
+};
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+ unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=&r" (loops), ASM_CALL_CONSTRAINT
+ : "r" (loops) : "memory" );
+#endif
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
#define PCI_NOASSIGN_ROMS 0x80000
#define PCI_ROOT_NO_CRS 0x100000
#define PCI_NOASSIGN_BARS 0x200000
+#define PCI_BIG_ROOT_WINDOW 0x400000
extern unsigned int pci_probe;
extern unsigned long pirq_table_addr;
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+/*
+ * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
+ *
+ * Be very careful vs. KASLR when changing anything here. The KASLR address
+ * range must not overlap with anything except the KASAN shadow area, which
+ * is correct as KASAN disables KASLR.
+ */
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
#ifdef CONFIG_X86_5LEVEL
# define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
-# define LDT_PGD_ENTRY _AC(-4, UL)
+# define LDT_PGD_ENTRY _AC(-3, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
-#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
+#define CPU_ENTRY_AREA_PGD _AC(-4, UL)
#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
#define CR3_NOFLUSH BIT_ULL(63)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
-# define X86_CR3_PTI_SWITCH_BIT 11
+# define X86_CR3_PTI_PCID_USER_BIT 11
#endif
#else
* Make sure that the dynamic ASID space does not confict with the
* bit we are using to switch between user and kernel ASIDs.
*/
- BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
+ BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
/*
* The ASID being passed in here should have respected the
* MAX_ASID_AVAILABLE and thus never have the switch bit set.
*/
- VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
+ VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
#endif
/*
* The dynamically-assigned ASIDs that get passed in are small
{
u16 ret = kern_pcid(asid);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
+ ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
#endif
return ret;
}
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
/*
- * WARNING: The entire pt_regs may not be safe to dereference. In some cases,
- * only the iret frame registers are accessible. Use with caution!
+ * If 'partial' returns true, only the iret frame registers are valid.
*/
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
+ bool *partial)
{
if (unwind_done(state))
return NULL;
+ if (partial) {
+#ifdef CONFIG_UNWINDER_ORC
+ *partial = !state->full_regs;
+#else
+ *partial = false;
+#endif
+ }
+
return state->regs;
}
#else
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
+ bool *partial)
{
return NULL;
}
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
stac();
- asm volatile("call *%[call]"
+ asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
- : [call] "a" (&hypercall_page[call])
+ : [thunk_target] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
clac();
acpi_nvs_nosave_s3();
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
+ if (strncmp(str, "nobl", 4) == 0)
+ acpi_sleep_no_blacklist();
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
{
unsigned long flags;
+ int i;
- if (instr[0] != 0x90)
- return;
+ for (i = 0; i < a->padlen; i++) {
+ if (instr[i] != 0x90)
+ return;
+ }
local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
set_cpu_cap(c, X86_FEATURE_K8);
if (cpu_has(c, X86_FEATURE_XMM2)) {
- /* MFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ unsigned long long val;
+ int ret;
+
+ /*
+ * A serializing LFENCE has less overhead than MFENCE, so
+ * use it for execution serialization. On families which
+ * don't have that MSR, LFENCE is already serializing.
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+ msr_set_bit(MSR_F10H_DECFG,
+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+ /*
+ * Verify that the MSR write was successful (could be running
+ * under a hypervisor) and only then assume that LFENCE is
+ * serializing.
+ */
+ ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ } else {
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
}
/*
*/
#include <linux/init.h>
#include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+static void __init spectre_v2_select_mitigation(void);
+
void __init check_bugs(void)
{
identify_boot_cpu();
print_cpu_info(&boot_cpu_data);
}
+ /* Select the proper spectre mitigation before patching alternatives */
+ spectre_v2_select_mitigation();
+
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
set_memory_4k((unsigned long)__va(0), 1);
#endif
}
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_NONE,
+ SPECTRE_V2_CMD_AUTO,
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+ return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
+}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+ char arg[20];
+ int ret;
+
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+ sizeof(arg));
+ if (ret > 0) {
+ if (match_option(arg, ret, "off")) {
+ goto disable;
+ } else if (match_option(arg, ret, "on")) {
+ spec2_print_if_secure("force enabled on command line.");
+ return SPECTRE_V2_CMD_FORCE;
+ } else if (match_option(arg, ret, "retpoline")) {
+ spec2_print_if_insecure("retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE;
+ } else if (match_option(arg, ret, "retpoline,amd")) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ spec2_print_if_insecure("AMD retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE_AMD;
+ } else if (match_option(arg, ret, "retpoline,generic")) {
+ spec2_print_if_insecure("generic retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+ } else if (match_option(arg, ret, "auto")) {
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ }
+
+ if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_AUTO;
+disable:
+ spec2_print_if_insecure("disabled on command line.");
+ return SPECTRE_V2_CMD_NONE;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+ /*
+ * If the CPU is not affected and the command line mode is NONE or AUTO
+ * then nothing to do.
+ */
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+ return;
+
+ switch (cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return;
+
+ case SPECTRE_V2_CMD_FORCE:
+ /* FALLTRHU */
+ case SPECTRE_V2_CMD_AUTO:
+ goto retpoline_auto;
+
+ case SPECTRE_V2_CMD_RETPOLINE_AMD:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_amd;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_generic;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ }
+ pr_err("kernel not compiled with retpoline; no mitigation available!");
+ return;
+
+retpoline_auto:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ retpoline_amd:
+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+ goto retpoline_generic;
+ }
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ } else {
+ retpoline_generic:
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+ SPECTRE_V2_RETPOLINE_MINIMAL;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ }
+
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ return sprintf(buf, "Not affected\n");
+ if (boot_cpu_has(X86_FEATURE_PTI))
+ return sprintf(buf, "Mitigation: PTI\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ return sprintf(buf, "Not affected\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- /* Assume for now that ALL x86 CPUs are insecure */
- setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
+ if (c->x86_vendor != X86_VENDOR_AMD)
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
fpu__init_system(c);
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
- pr_err_once("late loading on model 79 is disabled.\n");
+ /*
+ * Late loading on model 79 with microcode revision less than 0x0b000021
+ * may result in a system hang. This behavior is documented in item
+ * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+ */
+ if (c->x86 == 6 &&
+ c->x86_model == INTEL_FAM6_BROADWELL_X &&
+ c->x86_mask == 0x01 &&
+ c->microcode < 0x0b000021) {
+ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
return true;
}
regs->sp, regs->flags);
}
-static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
+static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
+ bool partial)
{
- if (on_stack(info, regs, sizeof(*regs)))
+ /*
+ * These on_stack() checks aren't strictly necessary: the unwind code
+ * has already validated the 'regs' pointer. The checks are done for
+ * ordering reasons: if the registers are on the next stack, we don't
+ * want to print them out yet. Otherwise they'll be shown as part of
+ * the wrong stack. Later, when show_trace_log_lvl() switches to the
+ * next stack, this function will be called again with the same regs so
+ * they can be printed in the right context.
+ */
+ if (!partial && on_stack(info, regs, sizeof(*regs))) {
__show_regs(regs, 0);
- else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
- IRET_FRAME_SIZE)) {
+
+ } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+ IRET_FRAME_SIZE)) {
/*
* When an interrupt or exception occurs in entry code, the
* full pt_regs might not have been saved yet. In that case
struct stack_info stack_info = {0};
unsigned long visit_mask = 0;
int graph_idx = 0;
+ bool partial;
printk("%sCall Trace:\n", log_lvl);
unwind_start(&state, task, regs, stack);
stack = stack ? : get_stack_pointer(task, regs);
+ regs = unwind_get_entry_regs(&state, &partial);
/*
* Iterate through the stacks, starting with the current stack pointer.
* - hardirq stack
* - entry stack
*/
- for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+ for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
const char *stack_name;
if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
printk("%s <%s>\n", log_lvl, stack_name);
if (regs)
- show_regs_safe(&stack_info, regs);
+ show_regs_if_on_stack(&stack_info, regs, partial);
/*
* Scan the stack, printing any text addresses we find. At the
/*
* Don't print regs->ip again if it was already printed
- * by show_regs_safe() below.
+ * by show_regs_if_on_stack().
*/
if (regs && stack == ®s->ip)
goto next;
unwind_next_frame(&state);
/* if the frame has entry regs, print them */
- regs = unwind_get_entry_regs(&state);
+ regs = unwind_get_entry_regs(&state, &partial);
if (regs)
- show_regs_safe(&stack_info, regs);
+ show_regs_if_on_stack(&stack_info, regs, partial);
}
if (stack_name)
#include <asm/segment.h>
#include <asm/export.h>
#include <asm/ftrace.h>
+#include <asm/nospec-branch.h>
#ifdef CC_USING_FENTRY
# define function_hook __fentry__
movl 0x4(%ebp), %edx
subl $MCOUNT_INSN_SIZE, %eax
- call *ftrace_trace_function
+ movl ftrace_trace_function, %ecx
+ CALL_NOSPEC %ecx
popl %edx
popl %ecx
movl %eax, %ecx
popl %edx
popl %eax
- jmp *%ecx
+ JMP_NOSPEC %ecx
#endif
#include <asm/ptrace.h>
#include <asm/ftrace.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
.code64
.section .entry.text, "ax"
* ip and parent ip are used and the list function is called when
* function tracing is enabled.
*/
- call *ftrace_trace_function
-
+ movq ftrace_trace_function, %r8
+ CALL_NOSPEC %r8
restore_mcount_regs
jmp fgraph_trace
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- jmp *%rdi
+ JMP_NOSPEC %rdi
#endif
#include <linux/mm.h>
#include <asm/apic.h>
+#include <asm/nospec-branch.h>
#ifdef CONFIG_DEBUG_STACKOVERFLOW
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=b" (stack)
: "0" (stack),
- "D"(func)
+ [thunk_target] "D"(func)
: "memory", "cc", "edx", "ecx", "eax");
}
call_on_stack(print_stack_overflow, isp);
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=a" (arg1), "=b" (isp)
: "0" (desc), "1" (isp),
- "D" (desc->handle_irq)
+ [thunk_target] "D" (desc->handle_irq)
: "memory", "cc", "ecx");
return 1;
}
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
+__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
.x86_tss = {
/*
* .sp0 is only used when entering ring 0 from a lower
set_bit(EFI_BOOT, &efi.flags);
set_bit(EFI_64BIT, &efi.flags);
}
-
- if (efi_enabled(EFI_BOOT))
- efi_memblock_x86_reserve_range();
#endif
x86_init.oem.arch_setup();
parse_early_param();
+ if (efi_enabled(EFI_BOOT))
+ efi_memblock_x86_reserve_range();
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Memory used by the kernel cannot be hot-removed because Linux
for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
unwind_next_frame(&state)) {
- regs = unwind_get_entry_regs(&state);
+ regs = unwind_get_entry_regs(&state, NULL);
if (regs) {
/*
* Kernel mode registers on the stack indicate an
return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
pte_unmap(pte);
+
+ /*
+ * PTI poisons low addresses in the kernel page tables in the
+ * name of making them unusable for userspace. To execute
+ * code at such a low address, the poison must be cleared.
+ *
+ * Note: 'pgd' actually gets set in p4d_alloc() _or_
+ * pud_alloc() depending on 4/5-level paging.
+ */
+ pgd->pgd &= ~_PAGE_NX;
+
return 0;
}
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!lapic_in_kernel(vcpu) ||
- kvm_event_needs_reinjection(vcpu)))
+ kvm_event_needs_reinjection(vcpu) ||
+ vcpu->arch.exception.pending))
return false;
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
int kvm_mmu_module_init(void)
{
+ int ret = -ENOMEM;
+
kvm_mmu_clear_all_pte_masks();
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
0, SLAB_ACCOUNT, NULL);
if (!pte_list_desc_cache)
- goto nomem;
+ goto out;
mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
sizeof(struct kvm_mmu_page),
0, SLAB_ACCOUNT, NULL);
if (!mmu_page_header_cache)
- goto nomem;
+ goto out;
if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
- goto nomem;
+ goto out;
- register_shrinker(&mmu_shrinker);
+ ret = register_shrinker(&mmu_shrinker);
+ if (ret)
+ goto out;
return 0;
-nomem:
+out:
mmu_destroy_caches();
- return -ENOMEM;
+ return ret;
}
/*
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
#include <asm/virtext.h>
#include "trace.h"
{
struct vmcb_control_area *c, *h;
struct nested_state *g;
- u32 h_intercept_exceptions;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
h = &svm->nested.hsave->control;
g = &svm->nested;
- /* No need to intercept #UD if L1 doesn't intercept it */
- h_intercept_exceptions =
- h->intercept_exceptions & ~(1U << UD_VECTOR);
-
c->intercept_cr = h->intercept_cr | g->intercept_cr;
c->intercept_dr = h->intercept_dr | g->intercept_dr;
- c->intercept_exceptions =
- h_intercept_exceptions | g->intercept_exceptions;
+ c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
c->intercept = h->intercept | g->intercept;
}
{
int er;
- WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
"mov %%r13, %c[r13](%[svm]) \n\t"
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
+#endif
+ /*
+ * Clear host registers marked as clobbered to prevent
+ * speculative use.
+ */
+ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
+ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
+ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
+ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
+#ifdef CONFIG_X86_64
+ "xor %%r8, %%r8 \n\t"
+ "xor %%r9, %%r9 \n\t"
+ "xor %%r10, %%r10 \n\t"
+ "xor %%r11, %%r11 \n\t"
+ "xor %%r12, %%r12 \n\t"
+ "xor %%r13, %%r13 \n\t"
+ "xor %%r14, %%r14 \n\t"
+ "xor %%r15, %%r15 \n\t"
#endif
"pop %%" _ASM_BP
:
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
#include "trace.h"
#include "pmu.h"
{
BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
- vmcs_field_to_offset_table[field] == 0)
+ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+ return -ENOENT;
+
+ /*
+ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
+ * generic mechanism.
+ */
+ asm("lfence");
+
+ if (vmcs_field_to_offset_table[field] == 0)
return -ENOENT;
return vmcs_field_to_offset_table[field];
{
u32 eb;
- eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
+ eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
- else
- eb |= 1u << UD_VECTOR;
vmcs_write32(EXCEPTION_BITMAP, eb);
}
return 1; /* already handled by vmx_vcpu_run() */
if (is_invalid_opcode(intr_info)) {
- WARN_ON_ONCE(is_guest_mode(vcpu));
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
"pop %0 \n\t"
+ "setbe %c[fail](%0)\n\t"
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
+ "xor %%r8d, %%r8d \n\t"
+ "xor %%r9d, %%r9d \n\t"
+ "xor %%r10d, %%r10d \n\t"
+ "xor %%r11d, %%r11d \n\t"
+ "xor %%r12d, %%r12d \n\t"
+ "xor %%r13d, %%r13d \n\t"
+ "xor %%r14d, %%r14d \n\t"
+ "xor %%r15d, %%r15d \n\t"
#endif
"mov %%cr2, %%" _ASM_AX " \n\t"
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
+ "xor %%eax, %%eax \n\t"
+ "xor %%ebx, %%ebx \n\t"
+ "xor %%esi, %%esi \n\t"
+ "xor %%edi, %%edi \n\t"
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
- "setbe %c[fail](%0) \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
update_debugctlmsr(debugctlmsr);
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
#include <asm/errno.h>
#include <asm/asm.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
+
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+ .section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86_indirect_thunk_\reg)
+ CFI_STARTPROC
+ JMP_NOSPEC %\reg
+ CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+GENERATE_THUNK(_ASM_SP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
+ CPU_ENTRY_AREA_NR,
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
LDT_NR,
#endif
- CPU_ENTRY_AREA_NR,
#ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
#endif
.next_asid = 1,
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
-EXPORT_SYMBOL_GPL(cpu_tlbstate);
+EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{
#define TB_SHIFT 40
/*
- * Virtual address start and end range for randomization. The end changes base
- * on configuration to have the highest amount of space for randomization.
- * It increases the possible random position for each randomized region.
+ * Virtual address start and end range for randomization.
*
- * You need to add an if/def entry if you introduce a new memory region
- * compatible with KASLR. Your entry must be in logical order with memory
- * layout. For example, ESPFIX is before EFI because its virtual address is
- * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
- * ensure that this order is correct and won't be changed.
+ * The end address could depend on more configuration options to make the
+ * highest amount of space for randomization available, but that's too hard
+ * to keep straight and caused issues already.
*/
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
-
-#if defined(CONFIG_X86_ESPFIX64)
-static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
-#elif defined(CONFIG_EFI)
-static const unsigned long vaddr_end = EFI_VA_END;
-#else
-static const unsigned long vaddr_end = __START_KERNEL_map;
-#endif
+static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
/* Default values */
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
unsigned long remain_entropy;
/*
- * All these BUILD_BUG_ON checks ensures the memory layout is
- * consistent with the vaddr_start/vaddr_end variables.
+ * These BUILD_BUG_ON checks ensure the memory layout is consistent
+ * with the vaddr_start/vaddr_end variables. These checks are very
+ * limited....
*/
BUILD_BUG_ON(vaddr_start >= vaddr_end);
- BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
- vaddr_end >= EFI_VA_END);
- BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
- IS_ENABLED(CONFIG_EFI)) &&
- vaddr_end >= __START_KERNEL_map);
+ BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
if (!kaslr_memory_enabled())
static void __init pti_print_if_insecure(const char *reason)
{
- if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+ if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
pr_info("%s\n", reason);
}
static void __init pti_print_if_secure(const char *reason)
{
- if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
pr_info("%s\n", reason);
}
}
autosel:
- if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
return;
enable:
setup_force_cpu_cap(X86_FEATURE_PTI);
*
* Returns a pointer to a P4D on success, or NULL on failure.
*/
-static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
{
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
if (!new_p4d_page)
return NULL;
- if (pgd_none(*pgd)) {
- set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
- new_p4d_page = 0;
- }
- if (new_p4d_page)
- free_page(new_p4d_page);
+ set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
}
BUILD_BUG_ON(pgd_large(*pgd) != 0);
*
* Returns a pointer to a PMD on success, or NULL on failure.
*/
-static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
if (!new_pud_page)
return NULL;
- if (p4d_none(*p4d)) {
- set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
- new_pud_page = 0;
- }
- if (new_pud_page)
- free_page(new_pud_page);
+ set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
}
pud = pud_offset(p4d, address);
if (!new_pmd_page)
return NULL;
- if (pud_none(*pud)) {
- set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
- new_pmd_page = 0;
- }
- if (new_pmd_page)
- free_page(new_pmd_page);
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
}
return pmd_offset(pud, address);
if (!new_pte_page)
return NULL;
- if (pmd_none(*pmd)) {
- set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
- new_pte_page = 0;
- }
- if (new_pte_page)
- free_page(new_pte_page);
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
}
pte = pte_offset_kernel(pmd, address);
static void __init pti_clone_entry_text(void)
{
pti_clone_pmds((unsigned long) __entry_text_start,
- (unsigned long) __irqentry_text_end, _PAGE_RW);
+ (unsigned long) __irqentry_text_end,
+ _PAGE_RW | _PAGE_GLOBAL);
}
/*
} else if (!strcmp(str, "nocrs")) {
pci_probe |= PCI_ROOT_NO_CRS;
return NULL;
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ } else if (!strcmp(str, "big_root_window")) {
+ pci_probe |= PCI_BIG_ROOT_WINDOW;
+ return NULL;
+#endif
} else if (!strcmp(str, "earlydump")) {
pci_early_dump_regs = 1;
return NULL;
*/
static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
{
- unsigned i;
u32 base, limit, high;
- struct resource *res, *conflict;
struct pci_dev *other;
+ struct resource *res;
+ unsigned i;
+ int r;
+
+ if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
+ return;
/* Check that we are the only device of that type */
other = pci_get_device(dev->vendor, dev->device, NULL);
if (!res)
return;
+ /*
+ * Allocate a 256GB window directly below the 0xfd00000000 hardware
+ * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
+ */
res->name = "PCI Bus 0000:00";
res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
- res->start = 0x100000000ull;
+ res->start = 0xbd00000000ull;
res->end = 0xfd00000000ull - 1;
- /* Just grab the free area behind system memory for this */
- while ((conflict = request_resource_conflict(&iomem_resource, res))) {
- if (conflict->end >= res->end) {
- kfree(res);
- return;
- }
- res->start = conflict->end + 1;
+ r = request_resource(&iomem_resource, res);
+ if (r) {
+ kfree(res);
+ return;
}
- dev_info(&dev->dev, "adding root bus resource %pR\n", res);
+ dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
+ res);
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
pud[j] = *pud_offset(p4d_k, vaddr);
}
}
+ pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
}
+
out:
__flush_tlb_all();
/*
* Update the first page pointer to skip over the CSH header.
*/
- cap_info->pages[0] += csh->headersize;
+ cap_info->phys[0] += csh->headersize;
+
+ /*
+ * cap_info->capsule should point at a virtual mapping of the entire
+ * capsule, starting at the capsule header. Our image has the Quark
+ * security header prepended, so we cannot rely on the default vmap()
+ * mapping created by the generic capsule code.
+ * Given that the Quark firmware does not appear to care about the
+ * virtual mapping, let's just point cap_info->capsule at our copy
+ * of the capsule header.
+ */
+ cap_info->capsule = &cap_info->header;
return 1;
}
return 0;
}
-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
.setup = tng_bt_sfi_setup,
};
{
struct {
struct mmuext_op op;
-#ifdef CONFIG_SMP
- DECLARE_BITMAP(mask, num_processors);
-#else
DECLARE_BITMAP(mask, NR_CPUS);
-#endif
} *args;
struct multicall_space mcs;
+ const size_t mc_entry_size = sizeof(args->op) +
+ sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
if (cpumask_empty(cpus))
return; /* nothing to do */
- mcs = xen_mc_entry(sizeof(*args));
+ mcs = xen_mc_entry(mc_entry_size);
args = mcs.args;
args->op.arg2.vcpumask = to_cpumask(args->mask);
void xen_setup_cpu_clockevents(void);
void xen_save_time_memory_area(void);
void xen_restore_time_memory_area(void);
-void __init xen_init_time_ops(void);
+void __ref xen_init_time_ops(void);
void __init xen_hvm_init_time_ops(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
}
}
+void blk_drain_queue(struct request_queue *q)
+{
+ spin_lock_irq(q->queue_lock);
+ __blk_drain_queue(q, true);
+ spin_unlock_irq(q->queue_lock);
+}
+
/**
* blk_queue_bypass_start - enter queue bypass mode
* @q: queue of interest
*/
blk_freeze_queue(q);
spin_lock_irq(lock);
- if (!q->mq_ops)
- __blk_drain_queue(q, true);
queue_flag_set(QUEUE_FLAG_DEAD, q);
spin_unlock_irq(lock);
* exported to drivers as the only user for unfreeze is blk_mq.
*/
blk_freeze_queue_start(q);
+ if (!q->mq_ops)
+ blk_drain_queue(q);
blk_mq_freeze_queue_wait(q);
}
}
#endif /* CONFIG_BOUNCE */
+extern void blk_drain_queue(struct request_queue *q);
+
#endif /* BLK_INTERNAL_H */
unsigned int i;
list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
- ctx->rcvused -= rsgl->sg_num_bytes;
+ atomic_sub(rsgl->sg_num_bytes, &ctx->rcvused);
af_alg_free_sg(&rsgl->sgl);
list_del(&rsgl->list);
if (rsgl != &areq->first_rsgl)
areq->last_rsgl = rsgl;
len += err;
- ctx->rcvused += err;
+ atomic_add(err, &ctx->rcvused);
rsgl->sg_num_bytes = err;
iov_iter_advance(&msg->msg_iter, err);
}
spawn->alg = NULL;
spawns = &inst->alg.cra_users;
+
+ /*
+ * We may encounter an unregistered instance here, since
+ * an instance's spawns are set up prior to the instance
+ * being registered. An unregistered instance will have
+ * NULL ->cra_users.next, since ->cra_users isn't
+ * properly initialized until registration. But an
+ * unregistered instance cannot have any users, so treat
+ * it the same as ->cra_users being empty.
+ */
+ if (spawns->next == NULL)
+ break;
}
} while ((spawns = crypto_more_spawns(alg, &stack, &top,
&secondary_spawns)));
INIT_LIST_HEAD(&ctx->tsgl_list);
ctx->len = len;
ctx->used = 0;
- ctx->rcvused = 0;
+ atomic_set(&ctx->rcvused, 0);
ctx->more = 0;
ctx->merge = 0;
ctx->enc = 0;
INIT_LIST_HEAD(&ctx->tsgl_list);
ctx->len = len;
ctx->used = 0;
- ctx->rcvused = 0;
+ atomic_set(&ctx->rcvused, 0);
ctx->more = 0;
ctx->merge = 0;
ctx->enc = 0;
algt->mask));
if (IS_ERR(poly))
return PTR_ERR(poly);
+ poly_hash = __crypto_hash_alg_common(poly);
+
+ err = -EINVAL;
+ if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
+ goto out_put_poly;
err = -ENOMEM;
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
ctx = aead_instance_ctx(inst);
ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
- poly_hash = __crypto_hash_alg_common(poly);
err = crypto_init_ahash_spawn(&ctx->poly, poly_hash,
aead_crypto_instance(inst));
if (err)
crypto_free_aead(ctx->child);
}
+static void pcrypt_free(struct aead_instance *inst)
+{
+ struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);
+
+ crypto_drop_aead(&ctx->spawn);
+ kfree(inst);
+}
+
static int pcrypt_init_instance(struct crypto_instance *inst,
struct crypto_alg *alg)
{
inst->alg.encrypt = pcrypt_aead_encrypt;
inst->alg.decrypt = pcrypt_aead_decrypt;
+ inst->free = pcrypt_free;
+
err = aead_register_instance(tmpl, inst);
if (err)
goto out_drop_aead;
return -EINVAL;
}
-static void pcrypt_free(struct crypto_instance *inst)
-{
- struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
-
- crypto_drop_aead(&ctx->spawn);
- kfree(inst);
-}
-
static int pcrypt_cpumask_change_notify(struct notifier_block *self,
unsigned long val, void *data)
{
static struct crypto_template pcrypt_tmpl = {
.name = "pcrypt",
.create = pcrypt_create,
- .free = pcrypt_free,
.module = THIS_MODULE,
};
* the sleep state it is going out of and it has never been resumed till
* now, resume it in case the firmware powered it up.
*/
- if (dev->power.direct_complete && pm_resume_via_firmware())
+ if (pm_runtime_suspended(dev) && pm_resume_via_firmware())
pm_request_resume(dev);
}
EXPORT_SYMBOL_GPL(acpi_subsys_complete);
*/
int acpi_subsys_suspend_noirq(struct device *dev)
{
- if (dev_pm_smart_suspend_and_suspended(dev))
+ int ret;
+
+ if (dev_pm_smart_suspend_and_suspended(dev)) {
+ dev->power.may_skip_resume = true;
return 0;
+ }
+
+ ret = pm_generic_suspend_noirq(dev);
+ if (ret)
+ return ret;
+
+ /*
+ * If the target system sleep state is suspend-to-idle, it is sufficient
+ * to check whether or not the device's wakeup settings are good for
+ * runtime PM. Otherwise, the pm_resume_via_firmware() check will cause
+ * acpi_subsys_complete() to take care of fixing up the device's state
+ * anyway, if need be.
+ */
+ dev->power.may_skip_resume = device_may_wakeup(dev) ||
+ !device_can_wakeup(dev);
- return pm_generic_suspend_noirq(dev);
+ return 0;
}
EXPORT_SYMBOL_GPL(acpi_subsys_suspend_noirq);
*/
int acpi_subsys_resume_noirq(struct device *dev)
{
+ if (dev_pm_may_skip_resume(dev))
+ return 0;
+
/*
* Devices with DPM_FLAG_SMART_SUSPEND may be left in runtime suspend
* during system suspend, so update their runtime PM status to "active"
{},
};
+static bool ignore_blacklist;
+
+void __init acpi_sleep_no_blacklist(void)
+{
+ ignore_blacklist = true;
+}
+
static void __init acpi_sleep_dmi_check(void)
{
int year;
+ if (ignore_blacklist)
+ return;
+
if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year >= 2012)
acpi_nvs_nosave_s3();
#define ACPI_LPS0_ENTRY 5
#define ACPI_LPS0_EXIT 6
-#define ACPI_S2IDLE_FUNC_MASK ((1 << ACPI_LPS0_ENTRY) | (1 << ACPI_LPS0_EXIT))
+#define ACPI_LPS0_SCREEN_MASK ((1 << ACPI_LPS0_SCREEN_OFF) | (1 << ACPI_LPS0_SCREEN_ON))
+#define ACPI_LPS0_PLATFORM_MASK ((1 << ACPI_LPS0_ENTRY) | (1 << ACPI_LPS0_EXIT))
static acpi_handle lps0_device_handle;
static guid_t lps0_dsm_guid;
if (out_obj && out_obj->type == ACPI_TYPE_BUFFER) {
char bitmask = *(char *)out_obj->buffer.pointer;
- if ((bitmask & ACPI_S2IDLE_FUNC_MASK) == ACPI_S2IDLE_FUNC_MASK) {
+ if ((bitmask & ACPI_LPS0_PLATFORM_MASK) == ACPI_LPS0_PLATFORM_MASK ||
+ (bitmask & ACPI_LPS0_SCREEN_MASK) == ACPI_LPS0_SCREEN_MASK) {
lps0_dsm_func_mask = bitmask;
lps0_device_handle = adev->handle;
/*
config GENERIC_CPU_AUTOPROBE
bool
+config GENERIC_CPU_VULNERABILITIES
+ bool
+
config SOC_BUS
bool
select GLOB
#endif
}
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t __weak cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+
+static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ &dev_attr_meltdown.attr,
+ &dev_attr_spectre_v1.attr,
+ &dev_attr_spectre_v2.attr,
+ NULL
+};
+
+static const struct attribute_group cpu_root_vulnerabilities_group = {
+ .name = "vulnerabilities",
+ .attrs = cpu_root_vulnerabilities_attrs,
+};
+
+static void __init cpu_register_vulnerabilities(void)
+{
+ if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
+ &cpu_root_vulnerabilities_group))
+ pr_err("Unable to register CPU vulnerabilities\n");
+}
+
+#else
+static inline void cpu_register_vulnerabilities(void) { }
+#endif
+
void __init cpu_dev_init(void)
{
if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
panic("Failed to register CPU subsystem");
cpu_dev_register_generic();
+ cpu_register_vulnerabilities();
}
static int genpd_finish_suspend(struct device *dev, bool poweroff)
{
struct generic_pm_domain *genpd;
- int ret;
+ int ret = 0;
genpd = dev_to_genpd(dev);
if (IS_ERR(genpd))
return -EINVAL;
- if (dev->power.wakeup_path && genpd_is_active_wakeup(genpd))
- return 0;
-
if (poweroff)
ret = pm_generic_poweroff_noirq(dev);
else
if (ret)
return ret;
- if (genpd->dev_ops.stop && genpd->dev_ops.start) {
- ret = pm_runtime_force_suspend(dev);
- if (ret)
+ if (dev->power.wakeup_path && genpd_is_active_wakeup(genpd))
+ return 0;
+
+ if (genpd->dev_ops.stop && genpd->dev_ops.start &&
+ !pm_runtime_status_suspended(dev)) {
+ ret = genpd_stop_dev(genpd, dev);
+ if (ret) {
+ if (poweroff)
+ pm_generic_restore_noirq(dev);
+ else
+ pm_generic_resume_noirq(dev);
return ret;
+ }
}
genpd_lock(genpd);
static int genpd_resume_noirq(struct device *dev)
{
struct generic_pm_domain *genpd;
- int ret = 0;
+ int ret;
dev_dbg(dev, "%s()\n", __func__);
return -EINVAL;
if (dev->power.wakeup_path && genpd_is_active_wakeup(genpd))
- return 0;
+ return pm_generic_resume_noirq(dev);
genpd_lock(genpd);
genpd_sync_power_on(genpd, true, 0);
genpd->suspended_count--;
genpd_unlock(genpd);
- if (genpd->dev_ops.stop && genpd->dev_ops.start)
- ret = pm_runtime_force_resume(dev);
-
- ret = pm_generic_resume_noirq(dev);
- if (ret)
- return ret;
+ if (genpd->dev_ops.stop && genpd->dev_ops.start &&
+ !pm_runtime_status_suspended(dev)) {
+ ret = genpd_start_dev(genpd, dev);
+ if (ret)
+ return ret;
+ }
- return ret;
+ return pm_generic_resume_noirq(dev);
}
/**
if (ret)
return ret;
- if (genpd->dev_ops.stop && genpd->dev_ops.start)
- ret = pm_runtime_force_suspend(dev);
+ if (genpd->dev_ops.stop && genpd->dev_ops.start &&
+ !pm_runtime_status_suspended(dev))
+ ret = genpd_stop_dev(genpd, dev);
return ret;
}
if (IS_ERR(genpd))
return -EINVAL;
- if (genpd->dev_ops.stop && genpd->dev_ops.start) {
- ret = pm_runtime_force_resume(dev);
+ if (genpd->dev_ops.stop && genpd->dev_ops.start &&
+ !pm_runtime_status_suspended(dev)) {
+ ret = genpd_start_dev(genpd, dev);
if (ret)
return ret;
}
genpd_sync_power_on(genpd, true, 0);
genpd_unlock(genpd);
- if (genpd->dev_ops.stop && genpd->dev_ops.start) {
- ret = pm_runtime_force_resume(dev);
+ if (genpd->dev_ops.stop && genpd->dev_ops.start &&
+ !pm_runtime_status_suspended(dev)) {
+ ret = genpd_start_dev(genpd, dev);
if (ret)
return ret;
}
ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells", 0, &pd_args);
- if (ret < 0) {
- if (ret != -ENOENT)
- return ret;
-
- /*
- * Try legacy Samsung-specific bindings
- * (for backwards compatibility of DT ABI)
- */
- pd_args.args_count = 0;
- pd_args.np = of_parse_phandle(dev->of_node,
- "samsung,power-domain", 0);
- if (!pd_args.np)
- return -ENOENT;
- }
+ if (ret < 0)
+ return ret;
mutex_lock(&gpd_list_lock);
pd = genpd_get_from_provider(&pd_args);
*/
#include <linux/device.h>
-#include <linux/kallsyms.h>
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/pm.h>
dev->power.is_suspended = false;
}
+/**
+ * suspend_event - Return a "suspend" message for given "resume" one.
+ * @resume_msg: PM message representing a system-wide resume transition.
+ */
+static pm_message_t suspend_event(pm_message_t resume_msg)
+{
+ switch (resume_msg.event) {
+ case PM_EVENT_RESUME:
+ return PMSG_SUSPEND;
+ case PM_EVENT_THAW:
+ case PM_EVENT_RESTORE:
+ return PMSG_FREEZE;
+ case PM_EVENT_RECOVER:
+ return PMSG_HIBERNATE;
+ }
+ return PMSG_ON;
+}
+
+/**
+ * dev_pm_may_skip_resume - System-wide device resume optimization check.
+ * @dev: Target device.
+ *
+ * Checks whether or not the device may be left in suspend after a system-wide
+ * transition to the working state.
+ */
+bool dev_pm_may_skip_resume(struct device *dev)
+{
+ return !dev->power.must_resume && pm_transition.event != PM_EVENT_RESTORE;
+}
+
+static pm_callback_t dpm_subsys_resume_noirq_cb(struct device *dev,
+ pm_message_t state,
+ const char **info_p)
+{
+ pm_callback_t callback;
+ const char *info;
+
+ if (dev->pm_domain) {
+ info = "noirq power domain ";
+ callback = pm_noirq_op(&dev->pm_domain->ops, state);
+ } else if (dev->type && dev->type->pm) {
+ info = "noirq type ";
+ callback = pm_noirq_op(dev->type->pm, state);
+ } else if (dev->class && dev->class->pm) {
+ info = "noirq class ";
+ callback = pm_noirq_op(dev->class->pm, state);
+ } else if (dev->bus && dev->bus->pm) {
+ info = "noirq bus ";
+ callback = pm_noirq_op(dev->bus->pm, state);
+ } else {
+ return NULL;
+ }
+
+ if (info_p)
+ *info_p = info;
+
+ return callback;
+}
+
+static pm_callback_t dpm_subsys_suspend_noirq_cb(struct device *dev,
+ pm_message_t state,
+ const char **info_p);
+
+static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev,
+ pm_message_t state,
+ const char **info_p);
+
/**
* device_resume_noirq - Execute a "noirq resume" callback for given device.
* @dev: Device to handle.
*/
static int device_resume_noirq(struct device *dev, pm_message_t state, bool async)
{
- pm_callback_t callback = NULL;
- const char *info = NULL;
+ pm_callback_t callback;
+ const char *info;
+ bool skip_resume;
int error = 0;
TRACE_DEVICE(dev);
dpm_wait_for_superior(dev, async);
- if (dev->pm_domain) {
- info = "noirq power domain ";
- callback = pm_noirq_op(&dev->pm_domain->ops, state);
- } else if (dev->type && dev->type->pm) {
- info = "noirq type ";
- callback = pm_noirq_op(dev->type->pm, state);
- } else if (dev->class && dev->class->pm) {
- info = "noirq class ";
- callback = pm_noirq_op(dev->class->pm, state);
- } else if (dev->bus && dev->bus->pm) {
- info = "noirq bus ";
- callback = pm_noirq_op(dev->bus->pm, state);
+ skip_resume = dev_pm_may_skip_resume(dev);
+
+ callback = dpm_subsys_resume_noirq_cb(dev, state, &info);
+ if (callback)
+ goto Run;
+
+ if (skip_resume)
+ goto Skip;
+
+ if (dev_pm_smart_suspend_and_suspended(dev)) {
+ pm_message_t suspend_msg = suspend_event(state);
+
+ /*
+ * If "freeze" callbacks have been skipped during a transition
+ * related to hibernation, the subsequent "thaw" callbacks must
+ * be skipped too or bad things may happen. Otherwise, resume
+ * callbacks are going to be run for the device, so its runtime
+ * PM status must be changed to reflect the new state after the
+ * transition under way.
+ */
+ if (!dpm_subsys_suspend_late_cb(dev, suspend_msg, NULL) &&
+ !dpm_subsys_suspend_noirq_cb(dev, suspend_msg, NULL)) {
+ if (state.event == PM_EVENT_THAW) {
+ skip_resume = true;
+ goto Skip;
+ } else {
+ pm_runtime_set_active(dev);
+ }
+ }
}
- if (!callback && dev->driver && dev->driver->pm) {
+ if (dev->driver && dev->driver->pm) {
info = "noirq driver ";
callback = pm_noirq_op(dev->driver->pm, state);
}
+Run:
error = dpm_run_callback(callback, dev, state, info);
+
+Skip:
dev->power.is_noirq_suspended = false;
- Out:
+ if (skip_resume) {
+ /*
+ * The device is going to be left in suspend, but it might not
+ * have been in runtime suspend before the system suspended, so
+ * its runtime PM status needs to be updated to avoid confusing
+ * the runtime PM framework when runtime PM is enabled for the
+ * device again.
+ */
+ pm_runtime_set_suspended(dev);
+ dev_pm_skip_next_resume_phases(dev);
+ }
+
+Out:
complete_all(&dev->power.completion);
TRACE_RESUME(error);
return error;
dpm_noirq_end();
}
+static pm_callback_t dpm_subsys_resume_early_cb(struct device *dev,
+ pm_message_t state,
+ const char **info_p)
+{
+ pm_callback_t callback;
+ const char *info;
+
+ if (dev->pm_domain) {
+ info = "early power domain ";
+ callback = pm_late_early_op(&dev->pm_domain->ops, state);
+ } else if (dev->type && dev->type->pm) {
+ info = "early type ";
+ callback = pm_late_early_op(dev->type->pm, state);
+ } else if (dev->class && dev->class->pm) {
+ info = "early class ";
+ callback = pm_late_early_op(dev->class->pm, state);
+ } else if (dev->bus && dev->bus->pm) {
+ info = "early bus ";
+ callback = pm_late_early_op(dev->bus->pm, state);
+ } else {
+ return NULL;
+ }
+
+ if (info_p)
+ *info_p = info;
+
+ return callback;
+}
+
/**
* device_resume_early - Execute an "early resume" callback for given device.
* @dev: Device to handle.
*/
static int device_resume_early(struct device *dev, pm_message_t state, bool async)
{
- pm_callback_t callback = NULL;
- const char *info = NULL;
+ pm_callback_t callback;
+ const char *info;
int error = 0;
TRACE_DEVICE(dev);
dpm_wait_for_superior(dev, async);
- if (dev->pm_domain) {
- info = "early power domain ";
- callback = pm_late_early_op(&dev->pm_domain->ops, state);
- } else if (dev->type && dev->type->pm) {
- info = "early type ";
- callback = pm_late_early_op(dev->type->pm, state);
- } else if (dev->class && dev->class->pm) {
- info = "early class ";
- callback = pm_late_early_op(dev->class->pm, state);
- } else if (dev->bus && dev->bus->pm) {
- info = "early bus ";
- callback = pm_late_early_op(dev->bus->pm, state);
- }
+ callback = dpm_subsys_resume_early_cb(dev, state, &info);
if (!callback && dev->driver && dev->driver->pm) {
info = "early driver ";
return PMSG_ON;
}
+static void dpm_superior_set_must_resume(struct device *dev)
+{
+ struct device_link *link;
+ int idx;
+
+ if (dev->parent)
+ dev->parent->power.must_resume = true;
+
+ idx = device_links_read_lock();
+
+ list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+ link->supplier->power.must_resume = true;
+
+ device_links_read_unlock(idx);
+}
+
+static pm_callback_t dpm_subsys_suspend_noirq_cb(struct device *dev,
+ pm_message_t state,
+ const char **info_p)
+{
+ pm_callback_t callback;
+ const char *info;
+
+ if (dev->pm_domain) {
+ info = "noirq power domain ";
+ callback = pm_noirq_op(&dev->pm_domain->ops, state);
+ } else if (dev->type && dev->type->pm) {
+ info = "noirq type ";
+ callback = pm_noirq_op(dev->type->pm, state);
+ } else if (dev->class && dev->class->pm) {
+ info = "noirq class ";
+ callback = pm_noirq_op(dev->class->pm, state);
+ } else if (dev->bus && dev->bus->pm) {
+ info = "noirq bus ";
+ callback = pm_noirq_op(dev->bus->pm, state);
+ } else {
+ return NULL;
+ }
+
+ if (info_p)
+ *info_p = info;
+
+ return callback;
+}
+
+static bool device_must_resume(struct device *dev, pm_message_t state,
+ bool no_subsys_suspend_noirq)
+{
+ pm_message_t resume_msg = resume_event(state);
+
+ /*
+ * If all of the device driver's "noirq", "late" and "early" callbacks
+ * are invoked directly by the core, the decision to allow the device to
+ * stay in suspend can be based on its current runtime PM status and its
+ * wakeup settings.
+ */
+ if (no_subsys_suspend_noirq &&
+ !dpm_subsys_suspend_late_cb(dev, state, NULL) &&
+ !dpm_subsys_resume_early_cb(dev, resume_msg, NULL) &&
+ !dpm_subsys_resume_noirq_cb(dev, resume_msg, NULL))
+ return !pm_runtime_status_suspended(dev) &&
+ (resume_msg.event != PM_EVENT_RESUME ||
+ (device_can_wakeup(dev) && !device_may_wakeup(dev)));
+
+ /*
+ * The only safe strategy here is to require that if the device may not
+ * be left in suspend, resume callbacks must be invoked for it.
+ */
+ return !dev->power.may_skip_resume;
+}
+
/**
* __device_suspend_noirq - Execute a "noirq suspend" callback for given device.
* @dev: Device to handle.
*/
static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool async)
{
- pm_callback_t callback = NULL;
- const char *info = NULL;
+ pm_callback_t callback;
+ const char *info;
+ bool no_subsys_cb = false;
int error = 0;
TRACE_DEVICE(dev);
if (dev->power.syscore || dev->power.direct_complete)
goto Complete;
- if (dev->pm_domain) {
- info = "noirq power domain ";
- callback = pm_noirq_op(&dev->pm_domain->ops, state);
- } else if (dev->type && dev->type->pm) {
- info = "noirq type ";
- callback = pm_noirq_op(dev->type->pm, state);
- } else if (dev->class && dev->class->pm) {
- info = "noirq class ";
- callback = pm_noirq_op(dev->class->pm, state);
- } else if (dev->bus && dev->bus->pm) {
- info = "noirq bus ";
- callback = pm_noirq_op(dev->bus->pm, state);
- }
+ callback = dpm_subsys_suspend_noirq_cb(dev, state, &info);
+ if (callback)
+ goto Run;
- if (!callback && dev->driver && dev->driver->pm) {
+ no_subsys_cb = !dpm_subsys_suspend_late_cb(dev, state, NULL);
+
+ if (dev_pm_smart_suspend_and_suspended(dev) && no_subsys_cb)
+ goto Skip;
+
+ if (dev->driver && dev->driver->pm) {
info = "noirq driver ";
callback = pm_noirq_op(dev->driver->pm, state);
}
+Run:
error = dpm_run_callback(callback, dev, state, info);
- if (!error)
- dev->power.is_noirq_suspended = true;
- else
+ if (error) {
async_error = error;
+ goto Complete;
+ }
+
+Skip:
+ dev->power.is_noirq_suspended = true;
+
+ if (dev_pm_test_driver_flags(dev, DPM_FLAG_LEAVE_SUSPENDED)) {
+ dev->power.must_resume = dev->power.must_resume ||
+ atomic_read(&dev->power.usage_count) > 1 ||
+ device_must_resume(dev, state, no_subsys_cb);
+ } else {
+ dev->power.must_resume = true;
+ }
+
+ if (dev->power.must_resume)
+ dpm_superior_set_must_resume(dev);
Complete:
complete_all(&dev->power.completion);
return ret;
}
+static void dpm_propagate_wakeup_to_parent(struct device *dev)
+{
+ struct device *parent = dev->parent;
+
+ if (!parent)
+ return;
+
+ spin_lock_irq(&parent->power.lock);
+
+ if (dev->power.wakeup_path && !parent->power.ignore_children)
+ parent->power.wakeup_path = true;
+
+ spin_unlock_irq(&parent->power.lock);
+}
+
+static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev,
+ pm_message_t state,
+ const char **info_p)
+{
+ pm_callback_t callback;
+ const char *info;
+
+ if (dev->pm_domain) {
+ info = "late power domain ";
+ callback = pm_late_early_op(&dev->pm_domain->ops, state);
+ } else if (dev->type && dev->type->pm) {
+ info = "late type ";
+ callback = pm_late_early_op(dev->type->pm, state);
+ } else if (dev->class && dev->class->pm) {
+ info = "late class ";
+ callback = pm_late_early_op(dev->class->pm, state);
+ } else if (dev->bus && dev->bus->pm) {
+ info = "late bus ";
+ callback = pm_late_early_op(dev->bus->pm, state);
+ } else {
+ return NULL;
+ }
+
+ if (info_p)
+ *info_p = info;
+
+ return callback;
+}
+
/**
* __device_suspend_late - Execute a "late suspend" callback for given device.
* @dev: Device to handle.
*/
static int __device_suspend_late(struct device *dev, pm_message_t state, bool async)
{
- pm_callback_t callback = NULL;
- const char *info = NULL;
+ pm_callback_t callback;
+ const char *info;
int error = 0;
TRACE_DEVICE(dev);
if (dev->power.syscore || dev->power.direct_complete)
goto Complete;
- if (dev->pm_domain) {
- info = "late power domain ";
- callback = pm_late_early_op(&dev->pm_domain->ops, state);
- } else if (dev->type && dev->type->pm) {
- info = "late type ";
- callback = pm_late_early_op(dev->type->pm, state);
- } else if (dev->class && dev->class->pm) {
- info = "late class ";
- callback = pm_late_early_op(dev->class->pm, state);
- } else if (dev->bus && dev->bus->pm) {
- info = "late bus ";
- callback = pm_late_early_op(dev->bus->pm, state);
- }
+ callback = dpm_subsys_suspend_late_cb(dev, state, &info);
+ if (callback)
+ goto Run;
- if (!callback && dev->driver && dev->driver->pm) {
+ if (dev_pm_smart_suspend_and_suspended(dev) &&
+ !dpm_subsys_suspend_noirq_cb(dev, state, NULL))
+ goto Skip;
+
+ if (dev->driver && dev->driver->pm) {
info = "late driver ";
callback = pm_late_early_op(dev->driver->pm, state);
}
+Run:
error = dpm_run_callback(callback, dev, state, info);
- if (!error)
- dev->power.is_late_suspended = true;
- else
+ if (error) {
async_error = error;
+ goto Complete;
+ }
+ dpm_propagate_wakeup_to_parent(dev);
+
+Skip:
+ dev->power.is_late_suspended = true;
Complete:
TRACE_SUSPEND(error);
return error;
}
-static void dpm_clear_suppliers_direct_complete(struct device *dev)
+static void dpm_clear_superiors_direct_complete(struct device *dev)
{
struct device_link *link;
int idx;
+ if (dev->parent) {
+ spin_lock_irq(&dev->parent->power.lock);
+ dev->parent->power.direct_complete = false;
+ spin_unlock_irq(&dev->parent->power.lock);
+ }
+
idx = device_links_read_lock();
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
dev->power.direct_complete = false;
}
+ dev->power.may_skip_resume = false;
+ dev->power.must_resume = false;
+
dpm_watchdog_set(&wd, dev);
device_lock(dev);
End:
if (!error) {
- struct device *parent = dev->parent;
-
dev->power.is_suspended = true;
- if (parent) {
- spin_lock_irq(&parent->power.lock);
-
- dev->parent->power.direct_complete = false;
- if (dev->power.wakeup_path
- && !dev->parent->power.ignore_children)
- dev->parent->power.wakeup_path = true;
+ if (device_may_wakeup(dev))
+ dev->power.wakeup_path = true;
- spin_unlock_irq(&parent->power.lock);
- }
- dpm_clear_suppliers_direct_complete(dev);
+ dpm_propagate_wakeup_to_parent(dev);
+ dpm_clear_superiors_direct_complete(dev);
}
device_unlock(dev);
if (dev->power.syscore)
return 0;
- WARN_ON(dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) &&
- !pm_runtime_enabled(dev));
+ WARN_ON(!pm_runtime_enabled(dev) &&
+ dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND |
+ DPM_FLAG_LEAVE_SUSPENDED));
/*
* If a device's parent goes into runtime suspend at the wrong time,
device_lock(dev);
- dev->power.wakeup_path = device_may_wakeup(dev);
+ dev->power.wakeup_path = false;
if (dev->power.no_pm_callbacks) {
ret = 1; /* Let device go direct_complete */
#ifdef CONFIG_PM_SLEEP
-extern int device_wakeup_attach_irq(struct device *dev,
- struct wake_irq *wakeirq);
+extern void device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq);
extern void device_wakeup_detach_irq(struct device *dev);
extern void device_wakeup_arm_wake_irqs(void);
extern void device_wakeup_disarm_wake_irqs(void);
#else
-static inline int
-device_wakeup_attach_irq(struct device *dev,
- struct wake_irq *wakeirq)
-{
- return 0;
-}
+static inline void device_wakeup_attach_irq(struct device *dev,
+ struct wake_irq *wakeirq) {}
static inline void device_wakeup_detach_irq(struct device *dev)
{
spin_unlock_irq(&dev->power.lock);
}
+static bool pm_runtime_need_not_resume(struct device *dev)
+{
+ return atomic_read(&dev->power.usage_count) <= 1 &&
+ (atomic_read(&dev->power.child_count) == 0 ||
+ dev->power.ignore_children);
+}
+
/**
* pm_runtime_force_suspend - Force a device into suspend state if needed.
* @dev: Device to suspend.
*
* Disable runtime PM so we safely can check the device's runtime PM status and
- * if it is active, invoke it's .runtime_suspend callback to bring it into
- * suspend state. Keep runtime PM disabled to preserve the state unless we
- * encounter errors.
+ * if it is active, invoke its ->runtime_suspend callback to suspend it and
+ * change its runtime PM status field to RPM_SUSPENDED. Also, if the device's
+ * usage and children counters don't indicate that the device was in use before
+ * the system-wide transition under way, decrement its parent's children counter
+ * (if there is a parent). Keep runtime PM disabled to preserve the state
+ * unless we encounter errors.
*
* Typically this function may be invoked from a system suspend callback to make
- * sure the device is put into low power state.
+ * sure the device is put into low power state and it should only be used during
+ * system-wide PM transitions to sleep states. It assumes that the analogous
+ * pm_runtime_force_resume() will be used to resume the device.
*/
int pm_runtime_force_suspend(struct device *dev)
{
int (*callback)(struct device *);
- int ret = 0;
+ int ret;
pm_runtime_disable(dev);
if (pm_runtime_status_suspended(dev))
callback = RPM_GET_CALLBACK(dev, runtime_suspend);
- if (!callback) {
- ret = -ENOSYS;
- goto err;
- }
-
- ret = callback(dev);
+ ret = callback ? callback(dev) : 0;
if (ret)
goto err;
/*
- * Increase the runtime PM usage count for the device's parent, in case
- * when we find the device being used when system suspend was invoked.
- * This informs pm_runtime_force_resume() to resume the parent
- * immediately, which is needed to be able to resume its children,
- * when not deferring the resume to be managed via runtime PM.
+ * If the device can stay in suspend after the system-wide transition
+ * to the working state that will follow, drop the children counter of
+ * its parent, but set its status to RPM_SUSPENDED anyway in case this
+ * function will be called again for it in the meantime.
*/
- if (dev->parent && atomic_read(&dev->power.usage_count) > 1)
- pm_runtime_get_noresume(dev->parent);
+ if (pm_runtime_need_not_resume(dev))
+ pm_runtime_set_suspended(dev);
+ else
+ __update_runtime_status(dev, RPM_SUSPENDED);
- pm_runtime_set_suspended(dev);
return 0;
+
err:
pm_runtime_enable(dev);
return ret;
*
* Prior invoking this function we expect the user to have brought the device
* into low power state by a call to pm_runtime_force_suspend(). Here we reverse
- * those actions and brings the device into full power, if it is expected to be
- * used on system resume. To distinguish that, we check whether the runtime PM
- * usage count is greater than 1 (the PM core increases the usage count in the
- * system PM prepare phase), as that indicates a real user (such as a subsystem,
- * driver, userspace, etc.) is using it. If that is the case, the device is
- * expected to be used on system resume as well, so then we resume it. In the
- * other case, we defer the resume to be managed via runtime PM.
+ * those actions and bring the device into full power, if it is expected to be
+ * used on system resume. In the other case, we defer the resume to be managed
+ * via runtime PM.
*
* Typically this function may be invoked from a system resume callback.
*/
int (*callback)(struct device *);
int ret = 0;
- callback = RPM_GET_CALLBACK(dev, runtime_resume);
-
- if (!callback) {
- ret = -ENOSYS;
- goto out;
- }
-
- if (!pm_runtime_status_suspended(dev))
+ if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev))
goto out;
/*
- * Decrease the parent's runtime PM usage count, if we increased it
- * during system suspend in pm_runtime_force_suspend().
- */
- if (atomic_read(&dev->power.usage_count) > 1) {
- if (dev->parent)
- pm_runtime_put_noidle(dev->parent);
- } else {
- goto out;
- }
+ * The value of the parent's children counter is correct already, so
+ * just update the status of the device.
+ */
+ __update_runtime_status(dev, RPM_ACTIVE);
- ret = pm_runtime_set_active(dev);
- if (ret)
- goto out;
+ callback = RPM_GET_CALLBACK(dev, runtime_resume);
- ret = callback(dev);
+ ret = callback ? callback(dev) : 0;
if (ret) {
pm_runtime_set_suspended(dev);
goto out;
static ssize_t control_store(struct device * dev, struct device_attribute *attr,
const char * buf, size_t n)
{
- char *cp;
- int len = n;
-
- cp = memchr(buf, '\n', n);
- if (cp)
- len = cp - buf;
device_lock(dev);
- if (len == sizeof ctrl_auto - 1 && strncmp(buf, ctrl_auto, len) == 0)
+ if (sysfs_streq(buf, ctrl_auto))
pm_runtime_allow(dev);
- else if (len == sizeof ctrl_on - 1 && strncmp(buf, ctrl_on, len) == 0)
+ else if (sysfs_streq(buf, ctrl_on))
pm_runtime_forbid(dev);
else
n = -EINVAL;
return n;
}
-static DEVICE_ATTR(control, 0644, control_show, control_store);
+static DEVICE_ATTR_RW(control);
-static ssize_t rtpm_active_time_show(struct device *dev,
+static ssize_t runtime_active_time_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
int ret;
return ret;
}
-static DEVICE_ATTR(runtime_active_time, 0444, rtpm_active_time_show, NULL);
+static DEVICE_ATTR_RO(runtime_active_time);
-static ssize_t rtpm_suspended_time_show(struct device *dev,
+static ssize_t runtime_suspended_time_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
int ret;
return ret;
}
-static DEVICE_ATTR(runtime_suspended_time, 0444, rtpm_suspended_time_show, NULL);
+static DEVICE_ATTR_RO(runtime_suspended_time);
-static ssize_t rtpm_status_show(struct device *dev,
+static ssize_t runtime_status_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const char *p;
return sprintf(buf, p);
}
-static DEVICE_ATTR(runtime_status, 0444, rtpm_status_show, NULL);
+static DEVICE_ATTR_RO(runtime_status);
static ssize_t autosuspend_delay_ms_show(struct device *dev,
struct device_attribute *attr, char *buf)
return n;
}
-static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
- autosuspend_delay_ms_store);
+static DEVICE_ATTR_RW(autosuspend_delay_ms);
-static ssize_t pm_qos_resume_latency_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t pm_qos_resume_latency_us_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
s32 value = dev_pm_qos_requested_resume_latency(dev);
if (value == 0)
return sprintf(buf, "n/a\n");
- else if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
+ if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
value = 0;
return sprintf(buf, "%d\n", value);
}
-static ssize_t pm_qos_resume_latency_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t n)
+static ssize_t pm_qos_resume_latency_us_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t n)
{
s32 value;
int ret;
if (value == 0)
value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
- } else if (!strcmp(buf, "n/a") || !strcmp(buf, "n/a\n")) {
+ } else if (sysfs_streq(buf, "n/a")) {
value = 0;
} else {
return -EINVAL;
return ret < 0 ? ret : n;
}
-static DEVICE_ATTR(pm_qos_resume_latency_us, 0644,
- pm_qos_resume_latency_show, pm_qos_resume_latency_store);
+static DEVICE_ATTR_RW(pm_qos_resume_latency_us);
-static ssize_t pm_qos_latency_tolerance_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t pm_qos_latency_tolerance_us_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
s32 value = dev_pm_qos_get_user_latency_tolerance(dev);
if (value < 0)
return sprintf(buf, "auto\n");
- else if (value == PM_QOS_LATENCY_ANY)
+ if (value == PM_QOS_LATENCY_ANY)
return sprintf(buf, "any\n");
return sprintf(buf, "%d\n", value);
}
-static ssize_t pm_qos_latency_tolerance_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t n)
+static ssize_t pm_qos_latency_tolerance_us_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t n)
{
s32 value;
int ret;
if (value < 0)
return -EINVAL;
} else {
- if (!strcmp(buf, "auto") || !strcmp(buf, "auto\n"))
+ if (sysfs_streq(buf, "auto"))
value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
- else if (!strcmp(buf, "any") || !strcmp(buf, "any\n"))
+ else if (sysfs_streq(buf, "any"))
value = PM_QOS_LATENCY_ANY;
else
return -EINVAL;
return ret < 0 ? ret : n;
}
-static DEVICE_ATTR(pm_qos_latency_tolerance_us, 0644,
- pm_qos_latency_tolerance_show, pm_qos_latency_tolerance_store);
+static DEVICE_ATTR_RW(pm_qos_latency_tolerance_us);
static ssize_t pm_qos_no_power_off_show(struct device *dev,
struct device_attribute *attr,
return ret < 0 ? ret : n;
}
-static DEVICE_ATTR(pm_qos_no_power_off, 0644,
- pm_qos_no_power_off_show, pm_qos_no_power_off_store);
+static DEVICE_ATTR_RW(pm_qos_no_power_off);
#ifdef CONFIG_PM_SLEEP
static const char _enabled[] = "enabled";
static const char _disabled[] = "disabled";
-static ssize_t
-wake_show(struct device * dev, struct device_attribute *attr, char * buf)
+static ssize_t wakeup_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%s\n", device_can_wakeup(dev)
? (device_may_wakeup(dev) ? _enabled : _disabled)
: "");
}
-static ssize_t
-wake_store(struct device * dev, struct device_attribute *attr,
- const char * buf, size_t n)
+static ssize_t wakeup_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t n)
{
- char *cp;
- int len = n;
-
if (!device_can_wakeup(dev))
return -EINVAL;
- cp = memchr(buf, '\n', n);
- if (cp)
- len = cp - buf;
- if (len == sizeof _enabled - 1
- && strncmp(buf, _enabled, sizeof _enabled - 1) == 0)
+ if (sysfs_streq(buf, _enabled))
device_set_wakeup_enable(dev, 1);
- else if (len == sizeof _disabled - 1
- && strncmp(buf, _disabled, sizeof _disabled - 1) == 0)
+ else if (sysfs_streq(buf, _disabled))
device_set_wakeup_enable(dev, 0);
else
return -EINVAL;
return n;
}
-static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
+static DEVICE_ATTR_RW(wakeup);
static ssize_t wakeup_count_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
unsigned long count = 0;
bool enabled = false;
return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL);
+static DEVICE_ATTR_RO(wakeup_count);
static ssize_t wakeup_active_count_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
unsigned long count = 0;
bool enabled = false;
return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL);
+static DEVICE_ATTR_RO(wakeup_active_count);
static ssize_t wakeup_abort_count_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
unsigned long count = 0;
bool enabled = false;
return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_abort_count, 0444, wakeup_abort_count_show, NULL);
+static DEVICE_ATTR_RO(wakeup_abort_count);
static ssize_t wakeup_expire_count_show(struct device *dev,
struct device_attribute *attr,
return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_expire_count, 0444, wakeup_expire_count_show, NULL);
+static DEVICE_ATTR_RO(wakeup_expire_count);
static ssize_t wakeup_active_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
unsigned int active = 0;
bool enabled = false;
return enabled ? sprintf(buf, "%u\n", active) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_active, 0444, wakeup_active_show, NULL);
+static DEVICE_ATTR_RO(wakeup_active);
-static ssize_t wakeup_total_time_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t wakeup_total_time_ms_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
s64 msec = 0;
bool enabled = false;
return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_total_time_ms, 0444, wakeup_total_time_show, NULL);
+static DEVICE_ATTR_RO(wakeup_total_time_ms);
-static ssize_t wakeup_max_time_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t wakeup_max_time_ms_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
s64 msec = 0;
bool enabled = false;
return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_max_time_ms, 0444, wakeup_max_time_show, NULL);
+static DEVICE_ATTR_RO(wakeup_max_time_ms);
-static ssize_t wakeup_last_time_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t wakeup_last_time_ms_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
s64 msec = 0;
bool enabled = false;
return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL);
+static DEVICE_ATTR_RO(wakeup_last_time_ms);
#ifdef CONFIG_PM_AUTOSLEEP
-static ssize_t wakeup_prevent_sleep_time_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t wakeup_prevent_sleep_time_ms_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
s64 msec = 0;
bool enabled = false;
return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444,
- wakeup_prevent_sleep_time_show, NULL);
+static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms);
#endif /* CONFIG_PM_AUTOSLEEP */
#endif /* CONFIG_PM_SLEEP */
#ifdef CONFIG_PM_ADVANCED_DEBUG
-static ssize_t rtpm_usagecount_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t runtime_usage_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", atomic_read(&dev->power.usage_count));
}
+static DEVICE_ATTR_RO(runtime_usage);
-static ssize_t rtpm_children_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t runtime_active_kids_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", dev->power.ignore_children ?
0 : atomic_read(&dev->power.child_count));
}
+static DEVICE_ATTR_RO(runtime_active_kids);
-static ssize_t rtpm_enabled_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t runtime_enabled_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- if ((dev->power.disable_depth) && (dev->power.runtime_auto == false))
+ if (dev->power.disable_depth && (dev->power.runtime_auto == false))
return sprintf(buf, "disabled & forbidden\n");
- else if (dev->power.disable_depth)
+ if (dev->power.disable_depth)
return sprintf(buf, "disabled\n");
- else if (dev->power.runtime_auto == false)
+ if (dev->power.runtime_auto == false)
return sprintf(buf, "forbidden\n");
return sprintf(buf, "enabled\n");
}
-
-static DEVICE_ATTR(runtime_usage, 0444, rtpm_usagecount_show, NULL);
-static DEVICE_ATTR(runtime_active_kids, 0444, rtpm_children_show, NULL);
-static DEVICE_ATTR(runtime_enabled, 0444, rtpm_enabled_show, NULL);
+static DEVICE_ATTR_RO(runtime_enabled);
#ifdef CONFIG_PM_SLEEP
static ssize_t async_show(struct device *dev, struct device_attribute *attr,
static ssize_t async_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t n)
{
- char *cp;
- int len = n;
-
- cp = memchr(buf, '\n', n);
- if (cp)
- len = cp - buf;
- if (len == sizeof _enabled - 1 && strncmp(buf, _enabled, len) == 0)
+ if (sysfs_streq(buf, _enabled))
device_enable_async_suspend(dev);
- else if (len == sizeof _disabled - 1 &&
- strncmp(buf, _disabled, len) == 0)
+ else if (sysfs_streq(buf, _disabled))
device_disable_async_suspend(dev);
else
return -EINVAL;
return n;
}
-static DEVICE_ATTR(async, 0644, async_show, async_store);
+static DEVICE_ATTR_RW(async);
#endif /* CONFIG_PM_SLEEP */
#endif /* CONFIG_PM_ADVANCED_DEBUG */
struct wake_irq *wirq)
{
unsigned long flags;
- int err;
if (!dev || !wirq)
return -EINVAL;
return -EEXIST;
}
- err = device_wakeup_attach_irq(dev, wirq);
- if (!err)
- dev->power.wakeirq = wirq;
+ dev->power.wakeirq = wirq;
+ device_wakeup_attach_irq(dev, wirq);
spin_unlock_irqrestore(&dev->power.lock, flags);
- return err;
+ return 0;
}
/**
#include "power.h"
+#ifndef CONFIG_SUSPEND
+suspend_state_t pm_suspend_target_state;
+#define pm_suspend_target_state (PM_SUSPEND_ON)
+#endif
+
/*
* If set, the suspend/hibernate code will abort transitions to a sleep state
* if wakeup events are registered during or immediately before the transition.
if (!dev || !dev->power.can_wakeup)
return -EINVAL;
+ if (pm_suspend_target_state != PM_SUSPEND_ON)
+ dev_dbg(dev, "Suspicious %s() during system transition!\n", __func__);
+
ws = wakeup_source_register(dev_name(dev));
if (!ws)
return -ENOMEM;
*
* Call under the device's power.lock lock.
*/
-int device_wakeup_attach_irq(struct device *dev,
+void device_wakeup_attach_irq(struct device *dev,
struct wake_irq *wakeirq)
{
struct wakeup_source *ws;
ws = dev->power.wakeup;
- if (!ws) {
- dev_err(dev, "forgot to call call device_init_wakeup?\n");
- return -EINVAL;
- }
+ if (!ws)
+ return;
if (ws->wakeirq)
- return -EEXIST;
+ dev_err(dev, "Leftover wakeup IRQ found, overriding\n");
ws->wakeirq = wakeirq;
- return 0;
}
/**
device_set_wakeup_capable(dev, true);
ret = device_wakeup_enable(dev);
} else {
- if (dev->power.can_wakeup)
- device_wakeup_disable(dev);
-
+ device_wakeup_disable(dev);
device_set_wakeup_capable(dev, false);
}
*/
int device_set_wakeup_enable(struct device *dev, bool enable)
{
- if (!dev || !dev->power.can_wakeup)
- return -EINVAL;
-
return enable ? device_wakeup_enable(dev) : device_wakeup_disable(dev);
}
EXPORT_SYMBOL_GPL(device_set_wakeup_enable);
return err;
}
-static void lo_release(struct gendisk *disk, fmode_t mode)
+static void __lo_release(struct loop_device *lo)
{
- struct loop_device *lo = disk->private_data;
int err;
if (atomic_dec_return(&lo->lo_refcnt))
mutex_unlock(&lo->lo_ctl_mutex);
}
+static void lo_release(struct gendisk *disk, fmode_t mode)
+{
+ mutex_lock(&loop_index_mutex);
+ __lo_release(disk->private_data);
+ mutex_unlock(&loop_index_mutex);
+}
+
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
.open = lo_open,
mutex_unlock(&rbd_dev->watch_mutex);
}
+static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
+{
+ struct rbd_client_id cid = rbd_get_cid(rbd_dev);
+
+ strcpy(rbd_dev->lock_cookie, cookie);
+ rbd_set_owner_cid(rbd_dev, &cid);
+ queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
+}
+
/*
* lock_rwsem must be held for write
*/
static int rbd_lock(struct rbd_device *rbd_dev)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
- struct rbd_client_id cid = rbd_get_cid(rbd_dev);
char cookie[32];
int ret;
return ret;
rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
- strcpy(rbd_dev->lock_cookie, cookie);
- rbd_set_owner_cid(rbd_dev, &cid);
- queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
+ __rbd_lock(rbd_dev, cookie);
return 0;
}
queue_delayed_work(rbd_dev->task_wq,
&rbd_dev->lock_dwork, 0);
} else {
- strcpy(rbd_dev->lock_cookie, cookie);
+ __rbd_lock(rbd_dev, cookie);
}
}
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
q->limits.max_sectors = queue_max_hw_sectors(q);
- blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
+ blk_queue_max_segments(q, USHRT_MAX);
blk_queue_max_segment_size(q, segment_size);
blk_queue_io_min(q, segment_size);
blk_queue_io_opt(q, segment_size);
SRAM, ethernet adapters, FPGAs and LCD displays.
config SIMPLE_PM_BUS
- bool "Simple Power-Managed Bus Driver"
+ tristate "Simple Power-Managed Bus Driver"
depends on OF && PM
help
Driver for transparent busses that don't need a real driver, but
.match = sunxi_rsb_device_match,
.probe = sunxi_rsb_device_probe,
.remove = sunxi_rsb_device_remove,
+ .uevent = of_device_uevent_modalias,
};
static void sunxi_rsb_dev_release(struct device *dev)
# ARM CPU Frequency scaling drivers
#
+config ACPI_CPPC_CPUFREQ
+ tristate "CPUFreq driver based on the ACPI CPPC spec"
+ depends on ACPI_PROCESSOR
+ select ACPI_CPPC_LIB
+ help
+ This adds a CPUFreq driver which uses CPPC methods
+ as described in the ACPIv5.1 spec. CPPC stands for
+ Collaborative Processor Performance Controls. It
+ is based on an abstract continuous scale of CPU
+ performance values which allows the remote power
+ processor to flexibly optimize for power and
+ performance. CPPC relies on power management firmware
+ support for its operation.
+
+ If in doubt, say N.
+
+config ARM_ARMADA_37XX_CPUFREQ
+ tristate "Armada 37xx CPUFreq support"
+ depends on ARCH_MVEBU
+ help
+ This adds the CPUFreq driver support for Marvell Armada 37xx SoCs.
+ The Armada 37xx PMU supports 4 frequency and VDD levels.
+
# big LITTLE core layer and glue drivers
config ARM_BIG_LITTLE_CPUFREQ
tristate "Generic ARM big LITTLE CPUfreq driver"
help
This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
+config ARM_DT_BL_CPUFREQ
+ tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver"
+ depends on ARM_BIG_LITTLE_CPUFREQ && OF
+ help
+ This enables probing via DT for Generic CPUfreq driver for ARM
+ big.LITTLE platform. This gets frequency tables from DT.
+
+config ARM_SCPI_CPUFREQ
+ tristate "SCPI based CPUfreq driver"
+ depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
+ help
+ This adds the CPUfreq driver support for ARM big.LITTLE platforms
+ using SCPI protocol for CPU power management.
+
+ This driver uses SCPI Message Protocol driver to interact with the
+ firmware providing the CPU DVFS functionality.
+
+config ARM_VEXPRESS_SPC_CPUFREQ
+ tristate "Versatile Express SPC based CPUfreq driver"
+ depends on ARM_BIG_LITTLE_CPUFREQ && ARCH_VEXPRESS_SPC
+ help
+ This add the CPUfreq driver support for Versatile Express
+ big.LITTLE platforms using SPC for power management.
+
config ARM_BRCMSTB_AVS_CPUFREQ
tristate "Broadcom STB AVS CPUfreq driver"
depends on ARCH_BRCMSTB || COMPILE_TEST
If in doubt, say N.
-config ARM_DT_BL_CPUFREQ
- tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver"
- depends on ARM_BIG_LITTLE_CPUFREQ && OF
- help
- This enables probing via DT for Generic CPUfreq driver for ARM
- big.LITTLE platform. This gets frequency tables from DT.
-
-config ARM_VEXPRESS_SPC_CPUFREQ
- tristate "Versatile Express SPC based CPUfreq driver"
- depends on ARM_BIG_LITTLE_CPUFREQ && ARCH_VEXPRESS_SPC
- help
- This add the CPUfreq driver support for Versatile Express
- big.LITTLE platforms using SPC for power management.
-
config ARM_EXYNOS5440_CPUFREQ
tristate "SAMSUNG EXYNOS5440"
depends on SOC_EXYNOS5440
config ARM_SA1110_CPUFREQ
bool
-config ARM_SCPI_CPUFREQ
- tristate "SCPI based CPUfreq driver"
- depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
- help
- This adds the CPUfreq driver support for ARM big.LITTLE platforms
- using SCPI protocol for CPU power management.
-
- This driver uses SCPI Message Protocol driver to interact with the
- firmware providing the CPU DVFS functionality.
-
config ARM_SPEAR_CPUFREQ
bool "SPEAr CPUFreq support"
depends on PLAT_SPEAR
This add the CPUFreq driver support for Intel PXA2xx SOCs.
If in doubt, say N.
-
-config ACPI_CPPC_CPUFREQ
- tristate "CPUFreq driver based on the ACPI CPPC spec"
- depends on ACPI_PROCESSOR
- select ACPI_CPPC_LIB
- default n
- help
- This adds a CPUFreq driver which uses CPPC methods
- as described in the ACPIv5.1 spec. CPPC stands for
- Collaborative Processor Performance Controls. It
- is based on an abstract continuous scale of CPU
- performance values which allows the remote power
- processor to flexibly optimize for power and
- performance. CPPC relies on power management firmware
- support for its operation.
-
- If in doubt, say N.
# LITTLE drivers, so that it is probed last.
obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o
+obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o
obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o
+obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o
obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ) += exynos5440-cpufreq.o
obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o
obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o
obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o
obj-$(CONFIG_ARM_MEDIATEK_CPUFREQ) += mediatek-cpufreq.o
+obj-$(CONFIG_MACH_MVEBU_V7) += mvebu-cpufreq.o
obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o
obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o
obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o
-obj-$(CONFIG_ARM_S3C24XX_CPUFREQ) += s3c24xx-cpufreq.o
-obj-$(CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS) += s3c24xx-cpufreq-debugfs.o
obj-$(CONFIG_ARM_S3C2410_CPUFREQ) += s3c2410-cpufreq.o
obj-$(CONFIG_ARM_S3C2412_CPUFREQ) += s3c2412-cpufreq.o
obj-$(CONFIG_ARM_S3C2416_CPUFREQ) += s3c2416-cpufreq.o
obj-$(CONFIG_ARM_S3C2440_CPUFREQ) += s3c2440-cpufreq.o
obj-$(CONFIG_ARM_S3C64XX_CPUFREQ) += s3c64xx-cpufreq.o
+obj-$(CONFIG_ARM_S3C24XX_CPUFREQ) += s3c24xx-cpufreq.o
+obj-$(CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS) += s3c24xx-cpufreq-debugfs.o
obj-$(CONFIG_ARM_S5PV210_CPUFREQ) += s5pv210-cpufreq.o
obj-$(CONFIG_ARM_SA1100_CPUFREQ) += sa1100-cpufreq.o
obj-$(CONFIG_ARM_SA1110_CPUFREQ) += sa1110-cpufreq.o
obj-$(CONFIG_ARM_TEGRA186_CPUFREQ) += tegra186-cpufreq.o
obj-$(CONFIG_ARM_TI_CPUFREQ) += ti-cpufreq.o
obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o
-obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
-obj-$(CONFIG_MACH_MVEBU_V7) += mvebu-cpufreq.o
##################################################################################
static void bL_cpufreq_ready(struct cpufreq_policy *policy)
{
- struct device *cpu_dev = get_cpu_device(policy->cpu);
int cur_cluster = cpu_to_cluster(policy->cpu);
- struct device_node *np;
/* Do not register a cpu_cooling device if we are in IKS mode */
if (cur_cluster >= MAX_CLUSTERS)
return;
- np = of_node_get(cpu_dev->of_node);
- if (WARN_ON(!np))
- return;
-
- if (of_find_property(np, "#cooling-cells", NULL)) {
- u32 power_coefficient = 0;
-
- of_property_read_u32(np, "dynamic-power-coefficient",
- &power_coefficient);
-
- cdev[cur_cluster] = of_cpufreq_power_cooling_register(np,
- policy, power_coefficient, NULL);
- if (IS_ERR(cdev[cur_cluster])) {
- dev_err(cpu_dev,
- "running cpufreq without cooling device: %ld\n",
- PTR_ERR(cdev[cur_cluster]));
- cdev[cur_cluster] = NULL;
- }
- }
- of_node_put(np);
+ cdev[cur_cluster] = of_cpufreq_cooling_register(policy);
}
static struct cpufreq_driver bL_cpufreq_driver = {
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * CPU frequency scaling support for Armada 37xx platform.
+ *
+ * Copyright (C) 2017 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+/* Power management in North Bridge register set */
+#define ARMADA_37XX_NB_L0L1 0x18
+#define ARMADA_37XX_NB_L2L3 0x1C
+#define ARMADA_37XX_NB_TBG_DIV_OFF 13
+#define ARMADA_37XX_NB_TBG_DIV_MASK 0x7
+#define ARMADA_37XX_NB_CLK_SEL_OFF 11
+#define ARMADA_37XX_NB_CLK_SEL_MASK 0x1
+#define ARMADA_37XX_NB_CLK_SEL_TBG 0x1
+#define ARMADA_37XX_NB_TBG_SEL_OFF 9
+#define ARMADA_37XX_NB_TBG_SEL_MASK 0x3
+#define ARMADA_37XX_NB_VDD_SEL_OFF 6
+#define ARMADA_37XX_NB_VDD_SEL_MASK 0x3
+#define ARMADA_37XX_NB_CONFIG_SHIFT 16
+#define ARMADA_37XX_NB_DYN_MOD 0x24
+#define ARMADA_37XX_NB_CLK_SEL_EN BIT(26)
+#define ARMADA_37XX_NB_TBG_EN BIT(28)
+#define ARMADA_37XX_NB_DIV_EN BIT(29)
+#define ARMADA_37XX_NB_VDD_EN BIT(30)
+#define ARMADA_37XX_NB_DFS_EN BIT(31)
+#define ARMADA_37XX_NB_CPU_LOAD 0x30
+#define ARMADA_37XX_NB_CPU_LOAD_MASK 0x3
+#define ARMADA_37XX_DVFS_LOAD_0 0
+#define ARMADA_37XX_DVFS_LOAD_1 1
+#define ARMADA_37XX_DVFS_LOAD_2 2
+#define ARMADA_37XX_DVFS_LOAD_3 3
+
+/*
+ * On Armada 37xx the Power management manages 4 level of CPU load,
+ * each level can be associated with a CPU clock source, a CPU
+ * divider, a VDD level, etc...
+ */
+#define LOAD_LEVEL_NR 4
+
+struct armada_37xx_dvfs {
+ u32 cpu_freq_max;
+ u8 divider[LOAD_LEVEL_NR];
+};
+
+static struct armada_37xx_dvfs armada_37xx_dvfs[] = {
+ {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} },
+ {.cpu_freq_max = 1000*1000*1000, .divider = {1, 2, 4, 5} },
+ {.cpu_freq_max = 800*1000*1000, .divider = {1, 2, 3, 4} },
+ {.cpu_freq_max = 600*1000*1000, .divider = {2, 4, 5, 6} },
+};
+
+static struct armada_37xx_dvfs *armada_37xx_cpu_freq_info_get(u32 freq)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(armada_37xx_dvfs); i++) {
+ if (freq == armada_37xx_dvfs[i].cpu_freq_max)
+ return &armada_37xx_dvfs[i];
+ }
+
+ pr_err("Unsupported CPU frequency %d MHz\n", freq/1000000);
+ return NULL;
+}
+
+/*
+ * Setup the four level managed by the hardware. Once the four level
+ * will be configured then the DVFS will be enabled.
+ */
+static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base,
+ struct clk *clk, u8 *divider)
+{
+ int load_lvl;
+ struct clk *parent;
+
+ for (load_lvl = 0; load_lvl < LOAD_LEVEL_NR; load_lvl++) {
+ unsigned int reg, mask, val, offset = 0;
+
+ if (load_lvl <= ARMADA_37XX_DVFS_LOAD_1)
+ reg = ARMADA_37XX_NB_L0L1;
+ else
+ reg = ARMADA_37XX_NB_L2L3;
+
+ if (load_lvl == ARMADA_37XX_DVFS_LOAD_0 ||
+ load_lvl == ARMADA_37XX_DVFS_LOAD_2)
+ offset += ARMADA_37XX_NB_CONFIG_SHIFT;
+
+ /* Set cpu clock source, for all the level we use TBG */
+ val = ARMADA_37XX_NB_CLK_SEL_TBG << ARMADA_37XX_NB_CLK_SEL_OFF;
+ mask = (ARMADA_37XX_NB_CLK_SEL_MASK
+ << ARMADA_37XX_NB_CLK_SEL_OFF);
+
+ /*
+ * Set cpu divider based on the pre-computed array in
+ * order to have balanced step.
+ */
+ val |= divider[load_lvl] << ARMADA_37XX_NB_TBG_DIV_OFF;
+ mask |= (ARMADA_37XX_NB_TBG_DIV_MASK
+ << ARMADA_37XX_NB_TBG_DIV_OFF);
+
+ /* Set VDD divider which is actually the load level. */
+ val |= load_lvl << ARMADA_37XX_NB_VDD_SEL_OFF;
+ mask |= (ARMADA_37XX_NB_VDD_SEL_MASK
+ << ARMADA_37XX_NB_VDD_SEL_OFF);
+
+ val <<= offset;
+ mask <<= offset;
+
+ regmap_update_bits(base, reg, mask, val);
+ }
+
+ /*
+ * Set cpu clock source, for all the level we keep the same
+ * clock source that the one already configured. For this one
+ * we need to use the clock framework
+ */
+ parent = clk_get_parent(clk);
+ clk_set_parent(clk, parent);
+}
+
+static void __init armada37xx_cpufreq_disable_dvfs(struct regmap *base)
+{
+ unsigned int reg = ARMADA_37XX_NB_DYN_MOD,
+ mask = ARMADA_37XX_NB_DFS_EN;
+
+ regmap_update_bits(base, reg, mask, 0);
+}
+
+static void __init armada37xx_cpufreq_enable_dvfs(struct regmap *base)
+{
+ unsigned int val, reg = ARMADA_37XX_NB_CPU_LOAD,
+ mask = ARMADA_37XX_NB_CPU_LOAD_MASK;
+
+ /* Start with the highest load (0) */
+ val = ARMADA_37XX_DVFS_LOAD_0;
+ regmap_update_bits(base, reg, mask, val);
+
+ /* Now enable DVFS for the CPUs */
+ reg = ARMADA_37XX_NB_DYN_MOD;
+ mask = ARMADA_37XX_NB_CLK_SEL_EN | ARMADA_37XX_NB_TBG_EN |
+ ARMADA_37XX_NB_DIV_EN | ARMADA_37XX_NB_VDD_EN |
+ ARMADA_37XX_NB_DFS_EN;
+
+ regmap_update_bits(base, reg, mask, mask);
+}
+
+static int __init armada37xx_cpufreq_driver_init(void)
+{
+ struct armada_37xx_dvfs *dvfs;
+ struct platform_device *pdev;
+ unsigned int cur_frequency;
+ struct regmap *nb_pm_base;
+ struct device *cpu_dev;
+ int load_lvl, ret;
+ struct clk *clk;
+
+ nb_pm_base =
+ syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm");
+
+ if (IS_ERR(nb_pm_base))
+ return -ENODEV;
+
+ /* Before doing any configuration on the DVFS first, disable it */
+ armada37xx_cpufreq_disable_dvfs(nb_pm_base);
+
+ /*
+ * On CPU 0 register the operating points supported (which are
+ * the nominal CPU frequency and full integer divisions of
+ * it).
+ */
+ cpu_dev = get_cpu_device(0);
+ if (!cpu_dev) {
+ dev_err(cpu_dev, "Cannot get CPU\n");
+ return -ENODEV;
+ }
+
+ clk = clk_get(cpu_dev, 0);
+ if (IS_ERR(clk)) {
+ dev_err(cpu_dev, "Cannot get clock for CPU0\n");
+ return PTR_ERR(clk);
+ }
+
+ /* Get nominal (current) CPU frequency */
+ cur_frequency = clk_get_rate(clk);
+ if (!cur_frequency) {
+ dev_err(cpu_dev, "Failed to get clock rate for CPU\n");
+ return -EINVAL;
+ }
+
+ dvfs = armada_37xx_cpu_freq_info_get(cur_frequency);
+ if (!dvfs)
+ return -EINVAL;
+
+ armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider);
+
+ for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
+ load_lvl++) {
+ unsigned long freq = cur_frequency / dvfs->divider[load_lvl];
+
+ ret = dev_pm_opp_add(cpu_dev, freq, 0);
+ if (ret) {
+ /* clean-up the already added opp before leaving */
+ while (load_lvl-- > ARMADA_37XX_DVFS_LOAD_0) {
+ freq = cur_frequency / dvfs->divider[load_lvl];
+ dev_pm_opp_remove(cpu_dev, freq);
+ }
+ return ret;
+ }
+ }
+
+ /* Now that everything is setup, enable the DVFS at hardware level */
+ armada37xx_cpufreq_enable_dvfs(nb_pm_base);
+
+ pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
+
+ return PTR_ERR_OR_ZERO(pdev);
+}
+/* late_initcall, to guarantee the driver is loaded after A37xx clock driver */
+late_initcall(armada37xx_cpufreq_driver_init);
+
+MODULE_AUTHOR("Gregory CLEMENT <gregory.clement@free-electrons.com>");
+MODULE_DESCRIPTION("Armada 37xx cpufreq driver");
+MODULE_LICENSE("GPL");
{ .compatible = "marvell,armadaxp", },
+ { .compatible = "mediatek,mt2701", },
+ { .compatible = "mediatek,mt2712", },
+ { .compatible = "mediatek,mt7622", },
+ { .compatible = "mediatek,mt7623", },
+ { .compatible = "mediatek,mt817x", },
+ { .compatible = "mediatek,mt8173", },
+ { .compatible = "mediatek,mt8176", },
+
{ .compatible = "nvidia,tegra124", },
{ .compatible = "st,stih407", },
static void cpufreq_ready(struct cpufreq_policy *policy)
{
struct private_data *priv = policy->driver_data;
- struct device_node *np = of_node_get(priv->cpu_dev->of_node);
- if (WARN_ON(!np))
- return;
-
- /*
- * For now, just loading the cooling device;
- * thermal DT code takes care of matching them.
- */
- if (of_find_property(np, "#cooling-cells", NULL)) {
- u32 power_coefficient = 0;
-
- of_property_read_u32(np, "dynamic-power-coefficient",
- &power_coefficient);
-
- priv->cdev = of_cpufreq_power_cooling_register(np,
- policy, power_coefficient, NULL);
- if (IS_ERR(priv->cdev)) {
- dev_err(priv->cpu_dev,
- "running cpufreq without cooling device: %ld\n",
- PTR_ERR(priv->cdev));
-
- priv->cdev = NULL;
- }
- }
-
- of_node_put(np);
+ priv->cdev = of_cpufreq_cooling_register(policy);
}
static struct cpufreq_driver dt_cpufreq_driver = {
/**
* cpufreq_parse_governor - parse a governor string
*/
-static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
- struct cpufreq_governor **governor)
+static int cpufreq_parse_governor(char *str_governor,
+ struct cpufreq_policy *policy)
{
- int err = -EINVAL;
-
if (cpufreq_driver->setpolicy) {
if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
- *policy = CPUFREQ_POLICY_PERFORMANCE;
- err = 0;
- } else if (!strncasecmp(str_governor, "powersave",
- CPUFREQ_NAME_LEN)) {
- *policy = CPUFREQ_POLICY_POWERSAVE;
- err = 0;
+ policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+ return 0;
+ }
+
+ if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) {
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
+ return 0;
}
} else {
struct cpufreq_governor *t;
mutex_lock(&cpufreq_governor_mutex);
t = find_governor(str_governor);
-
- if (t == NULL) {
+ if (!t) {
int ret;
mutex_unlock(&cpufreq_governor_mutex);
+
ret = request_module("cpufreq_%s", str_governor);
- mutex_lock(&cpufreq_governor_mutex);
+ if (ret)
+ return -EINVAL;
- if (ret == 0)
- t = find_governor(str_governor);
- }
+ mutex_lock(&cpufreq_governor_mutex);
- if (t != NULL) {
- *governor = t;
- err = 0;
+ t = find_governor(str_governor);
}
+ if (t && !try_module_get(t->owner))
+ t = NULL;
mutex_unlock(&cpufreq_governor_mutex);
+
+ if (t) {
+ policy->governor = t;
+ return 0;
+ }
}
- return err;
+
+ return -EINVAL;
}
/**
if (ret != 1)
return -EINVAL;
- if (cpufreq_parse_governor(str_governor, &new_policy.policy,
- &new_policy.governor))
+ if (cpufreq_parse_governor(str_governor, &new_policy))
return -EINVAL;
ret = cpufreq_set_policy(policy, &new_policy);
+
+ if (new_policy.governor)
+ module_put(new_policy.governor->owner);
+
return ret ? ret : count;
}
if (policy->last_policy)
new_policy.policy = policy->last_policy;
else
- cpufreq_parse_governor(gov->name, &new_policy.policy,
- NULL);
+ cpufreq_parse_governor(gov->name, &new_policy);
}
/* set default policy */
return cpufreq_set_policy(policy, &new_policy);
mutex_lock(&cpufreq_governor_mutex);
list_del(&governor->governor_list);
mutex_unlock(&cpufreq_governor_mutex);
- return;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
unsigned int *trans_table;
};
-static int cpufreq_stats_update(struct cpufreq_stats *stats)
+static void cpufreq_stats_update(struct cpufreq_stats *stats)
{
unsigned long long cur_time = get_jiffies_64();
stats->time_in_state[stats->last_index] += cur_time - stats->last_time;
stats->last_time = cur_time;
spin_unlock(&cpufreq_stats_lock);
- return 0;
}
static void cpufreq_stats_clear_table(struct cpufreq_stats *stats)
static struct regulator *pu_reg;
static struct regulator *soc_reg;
-static struct clk *arm_clk;
-static struct clk *pll1_sys_clk;
-static struct clk *pll1_sw_clk;
-static struct clk *step_clk;
-static struct clk *pll2_pfd2_396m_clk;
-
-/* clk used by i.MX6UL */
-static struct clk *pll2_bus_clk;
-static struct clk *secondary_sel_clk;
+enum IMX6_CPUFREQ_CLKS {
+ ARM,
+ PLL1_SYS,
+ STEP,
+ PLL1_SW,
+ PLL2_PFD2_396M,
+ /* MX6UL requires two more clks */
+ PLL2_BUS,
+ SECONDARY_SEL,
+};
+#define IMX6Q_CPUFREQ_CLK_NUM 5
+#define IMX6UL_CPUFREQ_CLK_NUM 7
+
+static int num_clks;
+static struct clk_bulk_data clks[] = {
+ { .id = "arm" },
+ { .id = "pll1_sys" },
+ { .id = "step" },
+ { .id = "pll1_sw" },
+ { .id = "pll2_pfd2_396m" },
+ { .id = "pll2_bus" },
+ { .id = "secondary_sel" },
+};
static struct device *cpu_dev;
static bool free_opp;
new_freq = freq_table[index].frequency;
freq_hz = new_freq * 1000;
- old_freq = clk_get_rate(arm_clk) / 1000;
+ old_freq = clk_get_rate(clks[ARM].clk) / 1000;
opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz);
if (IS_ERR(opp)) {
* voltage of 528MHz, so lower the CPU frequency to one
* half before changing CPU frequency.
*/
- clk_set_rate(arm_clk, (old_freq >> 1) * 1000);
- clk_set_parent(pll1_sw_clk, pll1_sys_clk);
- if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk))
- clk_set_parent(secondary_sel_clk, pll2_bus_clk);
+ clk_set_rate(clks[ARM].clk, (old_freq >> 1) * 1000);
+ clk_set_parent(clks[PLL1_SW].clk, clks[PLL1_SYS].clk);
+ if (freq_hz > clk_get_rate(clks[PLL2_PFD2_396M].clk))
+ clk_set_parent(clks[SECONDARY_SEL].clk,
+ clks[PLL2_BUS].clk);
else
- clk_set_parent(secondary_sel_clk, pll2_pfd2_396m_clk);
- clk_set_parent(step_clk, secondary_sel_clk);
- clk_set_parent(pll1_sw_clk, step_clk);
+ clk_set_parent(clks[SECONDARY_SEL].clk,
+ clks[PLL2_PFD2_396M].clk);
+ clk_set_parent(clks[STEP].clk, clks[SECONDARY_SEL].clk);
+ clk_set_parent(clks[PLL1_SW].clk, clks[STEP].clk);
+ if (freq_hz > clk_get_rate(clks[PLL2_BUS].clk)) {
+ clk_set_rate(clks[PLL1_SYS].clk, new_freq * 1000);
+ clk_set_parent(clks[PLL1_SW].clk, clks[PLL1_SYS].clk);
+ }
} else {
- clk_set_parent(step_clk, pll2_pfd2_396m_clk);
- clk_set_parent(pll1_sw_clk, step_clk);
- if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) {
- clk_set_rate(pll1_sys_clk, new_freq * 1000);
- clk_set_parent(pll1_sw_clk, pll1_sys_clk);
+ clk_set_parent(clks[STEP].clk, clks[PLL2_PFD2_396M].clk);
+ clk_set_parent(clks[PLL1_SW].clk, clks[STEP].clk);
+ if (freq_hz > clk_get_rate(clks[PLL2_PFD2_396M].clk)) {
+ clk_set_rate(clks[PLL1_SYS].clk, new_freq * 1000);
+ clk_set_parent(clks[PLL1_SW].clk, clks[PLL1_SYS].clk);
} else {
/* pll1_sys needs to be enabled for divider rate change to work. */
pll1_sys_temp_enabled = true;
- clk_prepare_enable(pll1_sys_clk);
+ clk_prepare_enable(clks[PLL1_SYS].clk);
}
}
/* Ensure the arm clock divider is what we expect */
- ret = clk_set_rate(arm_clk, new_freq * 1000);
+ ret = clk_set_rate(clks[ARM].clk, new_freq * 1000);
if (ret) {
dev_err(cpu_dev, "failed to set clock rate: %d\n", ret);
regulator_set_voltage_tol(arm_reg, volt_old, 0);
/* PLL1 is only needed until after ARM-PODF is set. */
if (pll1_sys_temp_enabled)
- clk_disable_unprepare(pll1_sys_clk);
+ clk_disable_unprepare(clks[PLL1_SYS].clk);
/* scaling down? scale voltage after frequency */
if (new_freq < old_freq) {
{
int ret;
- policy->clk = arm_clk;
+ policy->clk = clks[ARM].clk;
ret = cpufreq_generic_init(policy, freq_table, transition_latency);
policy->suspend_freq = policy->max;
of_node_put(np);
}
+#define OCOTP_CFG3_6UL_SPEED_696MHZ 0x2
+
+static void imx6ul_opp_check_speed_grading(struct device *dev)
+{
+ struct device_node *np;
+ void __iomem *base;
+ u32 val;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp");
+ if (!np)
+ return;
+
+ base = of_iomap(np, 0);
+ if (!base) {
+ dev_err(dev, "failed to map ocotp\n");
+ goto put_node;
+ }
+
+ /*
+ * Speed GRADING[1:0] defines the max speed of ARM:
+ * 2b'00: Reserved;
+ * 2b'01: 528000000Hz;
+ * 2b'10: 696000000Hz;
+ * 2b'11: Reserved;
+ * We need to set the max speed of ARM according to fuse map.
+ */
+ val = readl_relaxed(base + OCOTP_CFG3);
+ val >>= OCOTP_CFG3_SPEED_SHIFT;
+ val &= 0x3;
+ if (val != OCOTP_CFG3_6UL_SPEED_696MHZ)
+ if (dev_pm_opp_disable(dev, 696000000))
+ dev_warn(dev, "failed to disable 696MHz OPP\n");
+ iounmap(base);
+put_node:
+ of_node_put(np);
+}
+
static int imx6q_cpufreq_probe(struct platform_device *pdev)
{
struct device_node *np;
return -ENOENT;
}
- arm_clk = clk_get(cpu_dev, "arm");
- pll1_sys_clk = clk_get(cpu_dev, "pll1_sys");
- pll1_sw_clk = clk_get(cpu_dev, "pll1_sw");
- step_clk = clk_get(cpu_dev, "step");
- pll2_pfd2_396m_clk = clk_get(cpu_dev, "pll2_pfd2_396m");
- if (IS_ERR(arm_clk) || IS_ERR(pll1_sys_clk) || IS_ERR(pll1_sw_clk) ||
- IS_ERR(step_clk) || IS_ERR(pll2_pfd2_396m_clk)) {
- dev_err(cpu_dev, "failed to get clocks\n");
- ret = -ENOENT;
- goto put_clk;
- }
-
if (of_machine_is_compatible("fsl,imx6ul") ||
- of_machine_is_compatible("fsl,imx6ull")) {
- pll2_bus_clk = clk_get(cpu_dev, "pll2_bus");
- secondary_sel_clk = clk_get(cpu_dev, "secondary_sel");
- if (IS_ERR(pll2_bus_clk) || IS_ERR(secondary_sel_clk)) {
- dev_err(cpu_dev, "failed to get clocks specific to imx6ul\n");
- ret = -ENOENT;
- goto put_clk;
- }
- }
+ of_machine_is_compatible("fsl,imx6ull"))
+ num_clks = IMX6UL_CPUFREQ_CLK_NUM;
+ else
+ num_clks = IMX6Q_CPUFREQ_CLK_NUM;
+
+ ret = clk_bulk_get(cpu_dev, num_clks, clks);
+ if (ret)
+ goto put_node;
arm_reg = regulator_get(cpu_dev, "arm");
pu_reg = regulator_get_optional(cpu_dev, "pu");
goto put_reg;
}
- imx6q_opp_check_speed_grading(cpu_dev);
+ if (of_machine_is_compatible("fsl,imx6ul"))
+ imx6ul_opp_check_speed_grading(cpu_dev);
+ else
+ imx6q_opp_check_speed_grading(cpu_dev);
/* Because we have added the OPPs here, we must free them */
free_opp = true;
regulator_put(pu_reg);
if (!IS_ERR(soc_reg))
regulator_put(soc_reg);
-put_clk:
- if (!IS_ERR(arm_clk))
- clk_put(arm_clk);
- if (!IS_ERR(pll1_sys_clk))
- clk_put(pll1_sys_clk);
- if (!IS_ERR(pll1_sw_clk))
- clk_put(pll1_sw_clk);
- if (!IS_ERR(step_clk))
- clk_put(step_clk);
- if (!IS_ERR(pll2_pfd2_396m_clk))
- clk_put(pll2_pfd2_396m_clk);
- if (!IS_ERR(pll2_bus_clk))
- clk_put(pll2_bus_clk);
- if (!IS_ERR(secondary_sel_clk))
- clk_put(secondary_sel_clk);
+
+ clk_bulk_put(num_clks, clks);
+put_node:
of_node_put(np);
+
return ret;
}
if (!IS_ERR(pu_reg))
regulator_put(pu_reg);
regulator_put(soc_reg);
- clk_put(arm_clk);
- clk_put(pll1_sys_clk);
- clk_put(pll1_sw_clk);
- clk_put(step_clk);
- clk_put(pll2_pfd2_396m_clk);
- clk_put(pll2_bus_clk);
- clk_put(secondary_sel_clk);
+
+ clk_bulk_put(num_clks, clks);
return 0;
}
.get_val = core_get_val,
};
-static const struct pstate_funcs bxt_funcs = {
- .get_max = core_get_max_pstate,
- .get_max_physical = core_get_max_pstate_physical,
- .get_min = core_get_min_pstate,
- .get_turbo = core_get_turbo_pstate,
- .get_scaling = core_get_scaling,
- .get_val = core_get_val,
-};
-
#define ICPU(model, policy) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
(unsigned long)&policy }
ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs),
ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs),
ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs),
- ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_funcs),
- ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, bxt_funcs),
+ ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs),
+ ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, core_funcs),
+ ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0))
longhaul_setup_voltagescaling();
- policy->cpuinfo.transition_latency = 200000; /* nsec */
+ policy->transition_delay_us = 200000; /* usec */
return cpufreq_table_validate_and_show(policy, longhaul_table);
}
static void mtk_cpufreq_ready(struct cpufreq_policy *policy)
{
struct mtk_cpu_dvfs_info *info = policy->driver_data;
- struct device_node *np = of_node_get(info->cpu_dev->of_node);
- u32 capacitance = 0;
- if (WARN_ON(!np))
- return;
-
- if (of_find_property(np, "#cooling-cells", NULL)) {
- of_property_read_u32(np, DYNAMIC_POWER, &capacitance);
-
- info->cdev = of_cpufreq_power_cooling_register(np,
- policy, capacitance, NULL);
-
- if (IS_ERR(info->cdev)) {
- dev_err(info->cpu_dev,
- "running cpufreq without cooling device: %ld\n",
- PTR_ERR(info->cdev));
-
- info->cdev = NULL;
- }
- }
-
- of_node_put(np);
+ info->cdev = of_cpufreq_cooling_register(policy);
}
static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
/* List of machines supported by this driver */
static const struct of_device_id mtk_cpufreq_machines[] __initconst = {
{ .compatible = "mediatek,mt2701", },
+ { .compatible = "mediatek,mt2712", },
{ .compatible = "mediatek,mt7622", },
{ .compatible = "mediatek,mt7623", },
{ .compatible = "mediatek,mt817x", },
return PTR_ERR(clk);
}
- /*
- * In case of a failure of dev_pm_opp_add(), we don't
- * bother with cleaning up the registered OPP (there's
- * no function to do so), and simply cancel the
- * registration of the cpufreq device.
- */
ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0);
if (ret) {
clk_put(clk);
ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
if (ret) {
clk_put(clk);
- return ret;
+ dev_err(cpu_dev, "Failed to register OPPs\n");
+ goto opp_register_failed;
}
ret = dev_pm_opp_set_sharing_cpus(cpu_dev,
if (ret)
dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
__func__, ret);
+ clk_put(clk);
}
platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
return 0;
+
+opp_register_failed:
+ /* As registering has failed remove all the opp for all cpus */
+ dev_pm_opp_cpumask_remove_table(cpu_possible_mask);
+
+ return ret;
}
device_initcall(armada_xp_pmsu_cpufreq_init);
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/hashtable.h>
#include <trace/events/power.h>
#include <asm/cputhreads.h>
#include <asm/opal.h>
#include <linux/timer.h>
-#define POWERNV_MAX_PSTATES 256
+#define POWERNV_MAX_PSTATES_ORDER 8
+#define POWERNV_MAX_PSTATES (1UL << (POWERNV_MAX_PSTATES_ORDER))
#define PMSR_PSAFE_ENABLE (1UL << 30)
#define PMSR_SPR_EM_DISABLE (1UL << 31)
-#define PMSR_MAX(x) ((x >> 32) & 0xFF)
+#define MAX_PSTATE_SHIFT 32
#define LPSTATE_SHIFT 48
#define GPSTATE_SHIFT 56
-#define GET_LPSTATE(x) (((x) >> LPSTATE_SHIFT) & 0xFF)
-#define GET_GPSTATE(x) (((x) >> GPSTATE_SHIFT) & 0xFF)
#define MAX_RAMP_DOWN_TIME 5120
/*
};
static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
+
+DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER);
+/**
+ * struct pstate_idx_revmap_data: Entry in the hashmap pstate_revmap
+ * indexed by a function of pstate id.
+ *
+ * @pstate_id: pstate id for this entry.
+ *
+ * @cpufreq_table_idx: Index into the powernv_freqs
+ * cpufreq_frequency_table for frequency
+ * corresponding to pstate_id.
+ *
+ * @hentry: hlist_node that hooks this entry into the pstate_revmap
+ * hashtable
+ */
+struct pstate_idx_revmap_data {
+ u8 pstate_id;
+ unsigned int cpufreq_table_idx;
+ struct hlist_node hentry;
+};
+
static bool rebooting, throttled, occ_reset;
static const char * const throttle_reason[] = {
bool wof_enabled;
} powernv_pstate_info;
-/* Use following macros for conversions between pstate_id and index */
-static inline int idx_to_pstate(unsigned int i)
+static inline u8 extract_pstate(u64 pmsr_val, unsigned int shift)
+{
+ return ((pmsr_val >> shift) & 0xFF);
+}
+
+#define extract_local_pstate(x) extract_pstate(x, LPSTATE_SHIFT)
+#define extract_global_pstate(x) extract_pstate(x, GPSTATE_SHIFT)
+#define extract_max_pstate(x) extract_pstate(x, MAX_PSTATE_SHIFT)
+
+/* Use following functions for conversions between pstate_id and index */
+
+/**
+ * idx_to_pstate : Returns the pstate id corresponding to the
+ * frequency in the cpufreq frequency table
+ * powernv_freqs indexed by @i.
+ *
+ * If @i is out of bound, this will return the pstate
+ * corresponding to the nominal frequency.
+ */
+static inline u8 idx_to_pstate(unsigned int i)
{
if (unlikely(i >= powernv_pstate_info.nr_pstates)) {
- pr_warn_once("index %u is out of bound\n", i);
+ pr_warn_once("idx_to_pstate: index %u is out of bound\n", i);
return powernv_freqs[powernv_pstate_info.nominal].driver_data;
}
return powernv_freqs[i].driver_data;
}
-static inline unsigned int pstate_to_idx(int pstate)
+/**
+ * pstate_to_idx : Returns the index in the cpufreq frequencytable
+ * powernv_freqs for the frequency whose corresponding
+ * pstate id is @pstate.
+ *
+ * If no frequency corresponding to @pstate is found,
+ * this will return the index of the nominal
+ * frequency.
+ */
+static unsigned int pstate_to_idx(u8 pstate)
{
- int min = powernv_freqs[powernv_pstate_info.min].driver_data;
- int max = powernv_freqs[powernv_pstate_info.max].driver_data;
+ unsigned int key = pstate % POWERNV_MAX_PSTATES;
+ struct pstate_idx_revmap_data *revmap_data;
- if (min > 0) {
- if (unlikely((pstate < max) || (pstate > min))) {
- pr_warn_once("pstate %d is out of bound\n", pstate);
- return powernv_pstate_info.nominal;
- }
- } else {
- if (unlikely((pstate > max) || (pstate < min))) {
- pr_warn_once("pstate %d is out of bound\n", pstate);
- return powernv_pstate_info.nominal;
- }
+ hash_for_each_possible(pstate_revmap, revmap_data, hentry, key) {
+ if (revmap_data->pstate_id == pstate)
+ return revmap_data->cpufreq_table_idx;
}
- /*
- * abs() is deliberately used so that is works with
- * both monotonically increasing and decreasing
- * pstate values
- */
- return abs(pstate - idx_to_pstate(powernv_pstate_info.max));
+
+ pr_warn_once("pstate_to_idx: pstate 0x%x not found\n", pstate);
+ return powernv_pstate_info.nominal;
}
static inline void reset_gpstates(struct cpufreq_policy *policy)
powernv_pstate_info.wof_enabled = true;
next:
- pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min,
+ pr_info("cpufreq pstate min 0x%x nominal 0x%x max 0x%x\n", pstate_min,
pstate_nominal, pstate_max);
pr_info("Workload Optimized Frequency is %s in the platform\n",
(powernv_pstate_info.wof_enabled) ? "enabled" : "disabled");
powernv_pstate_info.nr_pstates = nr_pstates;
pr_debug("NR PStates %d\n", nr_pstates);
+
for (i = 0; i < nr_pstates; i++) {
u32 id = be32_to_cpu(pstate_ids[i]);
u32 freq = be32_to_cpu(pstate_freqs[i]);
+ struct pstate_idx_revmap_data *revmap_data;
+ unsigned int key;
pr_debug("PState id %d freq %d MHz\n", id, freq);
powernv_freqs[i].frequency = freq * 1000; /* kHz */
- powernv_freqs[i].driver_data = id;
+ powernv_freqs[i].driver_data = id & 0xFF;
+
+ revmap_data = (struct pstate_idx_revmap_data *)
+ kmalloc(sizeof(*revmap_data), GFP_KERNEL);
+
+ revmap_data->pstate_id = id & 0xFF;
+ revmap_data->cpufreq_table_idx = i;
+ key = (revmap_data->pstate_id) % POWERNV_MAX_PSTATES;
+ hash_add(pstate_revmap, &revmap_data->hentry, key);
if (id == pstate_max)
powernv_pstate_info.max = i;
- else if (id == pstate_nominal)
+ if (id == pstate_nominal)
powernv_pstate_info.nominal = i;
- else if (id == pstate_min)
+ if (id == pstate_min)
powernv_pstate_info.min = i;
if (powernv_pstate_info.wof_enabled && id == pstate_turbo) {
}
/* Returns the CPU frequency corresponding to the pstate_id. */
-static unsigned int pstate_id_to_freq(int pstate_id)
+static unsigned int pstate_id_to_freq(u8 pstate_id)
{
int i;
i = pstate_to_idx(pstate_id);
if (i >= powernv_pstate_info.nr_pstates || i < 0) {
- pr_warn("PState id %d outside of PState table, "
- "reporting nominal id %d instead\n",
+ pr_warn("PState id 0x%x outside of PState table, reporting nominal id 0x%x instead\n",
pstate_id, idx_to_pstate(powernv_pstate_info.nominal));
i = powernv_pstate_info.nominal;
}
*/
struct powernv_smp_call_data {
unsigned int freq;
- int pstate_id;
- int gpstate_id;
+ u8 pstate_id;
+ u8 gpstate_id;
};
/*
static void powernv_read_cpu_freq(void *arg)
{
unsigned long pmspr_val;
- s8 local_pstate_id;
struct powernv_smp_call_data *freq_data = arg;
pmspr_val = get_pmspr(SPRN_PMSR);
-
- /*
- * The local pstate id corresponds bits 48..55 in the PMSR.
- * Note: Watch out for the sign!
- */
- local_pstate_id = (pmspr_val >> 48) & 0xFF;
- freq_data->pstate_id = local_pstate_id;
+ freq_data->pstate_id = extract_local_pstate(pmspr_val);
freq_data->freq = pstate_id_to_freq(freq_data->pstate_id);
- pr_debug("cpu %d pmsr %016lX pstate_id %d frequency %d kHz\n",
- raw_smp_processor_id(), pmspr_val, freq_data->pstate_id,
- freq_data->freq);
+ pr_debug("cpu %d pmsr %016lX pstate_id 0x%x frequency %d kHz\n",
+ raw_smp_processor_id(), pmspr_val, freq_data->pstate_id,
+ freq_data->freq);
}
/*
struct chip *chip;
unsigned int cpu = smp_processor_id();
unsigned long pmsr;
- int pmsr_pmax;
+ u8 pmsr_pmax;
unsigned int pmsr_pmax_idx;
pmsr = get_pmspr(SPRN_PMSR);
chip = this_cpu_read(chip_info);
/* Check for Pmax Capping */
- pmsr_pmax = (s8)PMSR_MAX(pmsr);
+ pmsr_pmax = extract_max_pstate(pmsr);
pmsr_pmax_idx = pstate_to_idx(pmsr_pmax);
if (pmsr_pmax_idx != powernv_pstate_info.max) {
if (chip->throttled)
goto next;
chip->throttled = true;
if (pmsr_pmax_idx > powernv_pstate_info.nominal) {
- pr_warn_once("CPU %d on Chip %u has Pmax(%d) reduced below nominal frequency(%d)\n",
+ pr_warn_once("CPU %d on Chip %u has Pmax(0x%x) reduced below that of nominal frequency(0x%x)\n",
cpu, chip->id, pmsr_pmax,
idx_to_pstate(powernv_pstate_info.nominal));
chip->throttle_sub_turbo++;
* value. Hence, read from PMCR to get correct data.
*/
val = get_pmspr(SPRN_PMCR);
- freq_data.gpstate_id = (s8)GET_GPSTATE(val);
- freq_data.pstate_id = (s8)GET_LPSTATE(val);
+ freq_data.gpstate_id = extract_global_pstate(val);
+ freq_data.pstate_id = extract_local_pstate(val);
if (freq_data.gpstate_id == freq_data.pstate_id) {
reset_gpstates(policy);
spin_unlock(&gpstates->gpstate_lock);
static void qoriq_cpufreq_ready(struct cpufreq_policy *policy)
{
struct cpu_data *cpud = policy->driver_data;
- struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
- if (of_find_property(np, "#cooling-cells", NULL)) {
- cpud->cdev = of_cpufreq_cooling_register(np, policy);
-
- if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) {
- pr_err("cpu%d is not running as cooling device: %ld\n",
- policy->cpu, PTR_ERR(cpud->cdev));
-
- cpud->cdev = NULL;
- }
- }
-
- of_node_put(np);
+ cpud->cdev = of_cpufreq_cooling_register(policy);
}
static struct cpufreq_driver qoriq_cpufreq_driver = {
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/clk.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_cooling.h>
+#include <linux/export.h>
#include <linux/module.h>
-#include <linux/platform_device.h>
+#include <linux/of_platform.h>
#include <linux/pm_opp.h>
#include <linux/scpi_protocol.h>
+#include <linux/slab.h>
#include <linux/types.h>
-#include "arm_big_little.h"
+struct scpi_data {
+ struct clk *clk;
+ struct device *cpu_dev;
+ struct thermal_cooling_device *cdev;
+};
static struct scpi_ops *scpi_ops;
-static int scpi_get_transition_latency(struct device *cpu_dev)
+static unsigned int scpi_cpufreq_get_rate(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu);
+ struct scpi_data *priv = policy->driver_data;
+ unsigned long rate = clk_get_rate(priv->clk);
+
+ return rate / 1000;
+}
+
+static int
+scpi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index)
+{
+ struct scpi_data *priv = policy->driver_data;
+ u64 rate = policy->freq_table[index].frequency * 1000;
+ int ret;
+
+ ret = clk_set_rate(priv->clk, rate);
+ if (!ret && (clk_get_rate(priv->clk) != rate))
+ ret = -EIO;
+
+ return ret;
+}
+
+static int
+scpi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
{
- return scpi_ops->get_transition_latency(cpu_dev);
+ int cpu, domain, tdomain;
+ struct device *tcpu_dev;
+
+ domain = scpi_ops->device_domain_id(cpu_dev);
+ if (domain < 0)
+ return domain;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == cpu_dev->id)
+ continue;
+
+ tcpu_dev = get_cpu_device(cpu);
+ if (!tcpu_dev)
+ continue;
+
+ tdomain = scpi_ops->device_domain_id(tcpu_dev);
+ if (tdomain == domain)
+ cpumask_set_cpu(cpu, cpumask);
+ }
+
+ return 0;
}
-static int scpi_init_opp_table(const struct cpumask *cpumask)
+static int scpi_cpufreq_init(struct cpufreq_policy *policy)
{
int ret;
- struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask));
+ unsigned int latency;
+ struct device *cpu_dev;
+ struct scpi_data *priv;
+ struct cpufreq_frequency_table *freq_table;
+
+ cpu_dev = get_cpu_device(policy->cpu);
+ if (!cpu_dev) {
+ pr_err("failed to get cpu%d device\n", policy->cpu);
+ return -ENODEV;
+ }
ret = scpi_ops->add_opps_to_device(cpu_dev);
if (ret) {
return ret;
}
- ret = dev_pm_opp_set_sharing_cpus(cpu_dev, cpumask);
- if (ret)
+ ret = scpi_get_sharing_cpus(cpu_dev, policy->cpus);
+ if (ret) {
+ dev_warn(cpu_dev, "failed to get sharing cpumask\n");
+ return ret;
+ }
+
+ ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus);
+ if (ret) {
dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
__func__, ret);
+ return ret;
+ }
+
+ ret = dev_pm_opp_get_opp_count(cpu_dev);
+ if (ret <= 0) {
+ dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n");
+ ret = -EPROBE_DEFER;
+ goto out_free_opp;
+ }
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto out_free_opp;
+ }
+
+ ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
+ if (ret) {
+ dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
+ goto out_free_priv;
+ }
+
+ priv->cpu_dev = cpu_dev;
+ priv->clk = clk_get(cpu_dev, NULL);
+ if (IS_ERR(priv->clk)) {
+ dev_err(cpu_dev, "%s: Failed to get clk for cpu: %d\n",
+ __func__, cpu_dev->id);
+ goto out_free_cpufreq_table;
+ }
+
+ policy->driver_data = priv;
+
+ ret = cpufreq_table_validate_and_show(policy, freq_table);
+ if (ret) {
+ dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
+ ret);
+ goto out_put_clk;
+ }
+
+ /* scpi allows DVFS request for any domain from any CPU */
+ policy->dvfs_possible_from_any_cpu = true;
+
+ latency = scpi_ops->get_transition_latency(cpu_dev);
+ if (!latency)
+ latency = CPUFREQ_ETERNAL;
+
+ policy->cpuinfo.transition_latency = latency;
+
+ policy->fast_switch_possible = false;
+ return 0;
+
+out_put_clk:
+ clk_put(priv->clk);
+out_free_cpufreq_table:
+ dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
+out_free_priv:
+ kfree(priv);
+out_free_opp:
+ dev_pm_opp_cpumask_remove_table(policy->cpus);
+
return ret;
}
-static const struct cpufreq_arm_bL_ops scpi_cpufreq_ops = {
- .name = "scpi",
- .get_transition_latency = scpi_get_transition_latency,
- .init_opp_table = scpi_init_opp_table,
- .free_opp_table = dev_pm_opp_cpumask_remove_table,
+static int scpi_cpufreq_exit(struct cpufreq_policy *policy)
+{
+ struct scpi_data *priv = policy->driver_data;
+
+ cpufreq_cooling_unregister(priv->cdev);
+ clk_put(priv->clk);
+ dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
+ kfree(priv);
+ dev_pm_opp_cpumask_remove_table(policy->related_cpus);
+
+ return 0;
+}
+
+static void scpi_cpufreq_ready(struct cpufreq_policy *policy)
+{
+ struct scpi_data *priv = policy->driver_data;
+ struct thermal_cooling_device *cdev;
+
+ cdev = of_cpufreq_cooling_register(policy);
+ if (!IS_ERR(cdev))
+ priv->cdev = cdev;
+}
+
+static struct cpufreq_driver scpi_cpufreq_driver = {
+ .name = "scpi-cpufreq",
+ .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
+ CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+ .verify = cpufreq_generic_frequency_table_verify,
+ .attr = cpufreq_generic_attr,
+ .get = scpi_cpufreq_get_rate,
+ .init = scpi_cpufreq_init,
+ .exit = scpi_cpufreq_exit,
+ .ready = scpi_cpufreq_ready,
+ .target_index = scpi_cpufreq_set_target,
};
static int scpi_cpufreq_probe(struct platform_device *pdev)
{
+ int ret;
+
scpi_ops = get_scpi_ops();
if (!scpi_ops)
return -EIO;
- return bL_cpufreq_register(&scpi_cpufreq_ops);
+ ret = cpufreq_register_driver(&scpi_cpufreq_driver);
+ if (ret)
+ dev_err(&pdev->dev, "%s: registering cpufreq failed, err: %d\n",
+ __func__, ret);
+ return ret;
}
static int scpi_cpufreq_remove(struct platform_device *pdev)
{
- bL_cpufreq_unregister(&scpi_cpufreq_ops);
+ cpufreq_unregister_driver(&scpi_cpufreq_driver);
scpi_ops = NULL;
return 0;
}
#include <linux/cpu.h>
#include <linux/io.h>
#include <linux/mfd/syscon.h>
+#include <linux/module.h>
#include <linux/init.h>
#include <linux/of.h>
#include <linux/of_platform.h>
unsigned long efuse_mask;
unsigned long efuse_shift;
unsigned long rev_offset;
+ bool multi_regulator;
};
struct ti_cpufreq_data {
struct device_node *opp_node;
struct regmap *syscon;
const struct ti_cpufreq_soc_data *soc_data;
+ struct opp_table *opp_table;
};
static unsigned long amx3_efuse_xlate(struct ti_cpufreq_data *opp_data,
.efuse_offset = 0x07fc,
.efuse_mask = 0x1fff,
.rev_offset = 0x600,
+ .multi_regulator = false,
};
static struct ti_cpufreq_soc_data am4x_soc_data = {
.efuse_offset = 0x0610,
.efuse_mask = 0x3f,
.rev_offset = 0x600,
+ .multi_regulator = false,
};
static struct ti_cpufreq_soc_data dra7_soc_data = {
.efuse_mask = 0xf80000,
.efuse_shift = 19,
.rev_offset = 0x204,
+ .multi_regulator = true,
};
/**
{},
};
-static int ti_cpufreq_init(void)
+static int ti_cpufreq_probe(struct platform_device *pdev)
{
u32 version[VERSION_COUNT];
struct device_node *np;
const struct of_device_id *match;
+ struct opp_table *ti_opp_table;
struct ti_cpufreq_data *opp_data;
+ const char * const reg_names[] = {"vdd", "vbb"};
int ret;
np = of_find_node_by_path("/");
if (ret)
goto fail_put_node;
- ret = PTR_ERR_OR_ZERO(dev_pm_opp_set_supported_hw(opp_data->cpu_dev,
- version, VERSION_COUNT));
- if (ret) {
+ ti_opp_table = dev_pm_opp_set_supported_hw(opp_data->cpu_dev,
+ version, VERSION_COUNT);
+ if (IS_ERR(ti_opp_table)) {
dev_err(opp_data->cpu_dev,
"Failed to set supported hardware\n");
+ ret = PTR_ERR(ti_opp_table);
goto fail_put_node;
}
- of_node_put(opp_data->opp_node);
+ opp_data->opp_table = ti_opp_table;
+
+ if (opp_data->soc_data->multi_regulator) {
+ ti_opp_table = dev_pm_opp_set_regulators(opp_data->cpu_dev,
+ reg_names,
+ ARRAY_SIZE(reg_names));
+ if (IS_ERR(ti_opp_table)) {
+ dev_pm_opp_put_supported_hw(opp_data->opp_table);
+ ret = PTR_ERR(ti_opp_table);
+ goto fail_put_node;
+ }
+ }
+ of_node_put(opp_data->opp_node);
register_cpufreq_dt:
platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
return ret;
}
-device_initcall(ti_cpufreq_init);
+
+static int ti_cpufreq_init(void)
+{
+ platform_device_register_simple("ti-cpufreq", -1, NULL, 0);
+ return 0;
+}
+module_init(ti_cpufreq_init);
+
+static struct platform_driver ti_cpufreq_driver = {
+ .probe = ti_cpufreq_probe,
+ .driver = {
+ .name = "ti-cpufreq",
+ },
+};
+module_platform_driver(ti_cpufreq_driver);
+
+MODULE_DESCRIPTION("TI CPUFreq/OPP hw-supported driver");
+MODULE_AUTHOR("Dave Gerlach <d-gerlach@ti.com>");
+MODULE_LICENSE("GPL v2");
/**
* cpuidle_switch_governor - changes the governor
* @gov: the new target governor
- *
- * NOTE: "gov" can be NULL to specify disabled
* Must be called with cpuidle_lock acquired.
*/
int cpuidle_switch_governor(struct cpuidle_governor *gov)
{
struct cpuidle_device *dev;
+ if (!gov)
+ return -EINVAL;
+
if (gov == cpuidle_curr_governor)
return 0;
select CRYPTO_SHA256
select CRYPTO_SHA512
select CRYPTO_AUTHENC
+ select CRYPTO_GF128MUL
---help---
The Chelsio Crypto Co-processor driver for T6 adapters.
ndesc = ctx->handle_result(priv, ring, sreq->req,
&should_complete, &ret);
if (ndesc < 0) {
+ kfree(sreq);
dev_err(priv->dev, "failed to handle result (%d)", ndesc);
return;
}
#include <crypto/aes.h>
#include <crypto/skcipher.h>
+#include <crypto/internal/skcipher.h>
#include "safexcel.h"
unsigned int key_len;
};
+struct safexcel_cipher_req {
+ bool needs_inv;
+};
+
static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx,
struct crypto_async_request *async,
struct safexcel_command_desc *cdesc,
return 0;
}
-static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
- struct crypto_async_request *async,
- bool *should_complete, int *ret)
+static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
+ struct crypto_async_request *async,
+ bool *should_complete, int *ret)
{
struct skcipher_request *req = skcipher_request_cast(async);
struct safexcel_result_desc *rdesc;
spin_unlock_bh(&priv->ring[ring].egress_lock);
request->req = &req->base;
- ctx->base.handle_result = safexcel_handle_result;
*commands = n_cdesc;
*results = n_rdesc;
ring = safexcel_select_ring(priv);
ctx->base.ring = ring;
- ctx->base.needs_inv = false;
- ctx->base.send = safexcel_aes_send;
spin_lock_bh(&priv->ring[ring].queue_lock);
enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
return ndesc;
}
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+ struct crypto_async_request *async,
+ bool *should_complete, int *ret)
+{
+ struct skcipher_request *req = skcipher_request_cast(async);
+ struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+ int err;
+
+ if (sreq->needs_inv) {
+ sreq->needs_inv = false;
+ err = safexcel_handle_inv_result(priv, ring, async,
+ should_complete, ret);
+ } else {
+ err = safexcel_handle_req_result(priv, ring, async,
+ should_complete, ret);
+ }
+
+ return err;
+}
+
static int safexcel_cipher_send_inv(struct crypto_async_request *async,
int ring, struct safexcel_request *request,
int *commands, int *results)
struct safexcel_crypto_priv *priv = ctx->priv;
int ret;
- ctx->base.handle_result = safexcel_handle_inv_result;
-
ret = safexcel_invalidate_cache(async, &ctx->base, priv,
ctx->base.ctxr_dma, ring, request);
if (unlikely(ret))
return 0;
}
+static int safexcel_send(struct crypto_async_request *async,
+ int ring, struct safexcel_request *request,
+ int *commands, int *results)
+{
+ struct skcipher_request *req = skcipher_request_cast(async);
+ struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+ int ret;
+
+ if (sreq->needs_inv)
+ ret = safexcel_cipher_send_inv(async, ring, request,
+ commands, results);
+ else
+ ret = safexcel_aes_send(async, ring, request,
+ commands, results);
+ return ret;
+}
+
static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
{
struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
struct safexcel_crypto_priv *priv = ctx->priv;
- struct skcipher_request req;
+ SKCIPHER_REQUEST_ON_STACK(req, __crypto_skcipher_cast(tfm));
+ struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
struct safexcel_inv_result result = {};
int ring = ctx->base.ring;
- memset(&req, 0, sizeof(struct skcipher_request));
+ memset(req, 0, sizeof(struct skcipher_request));
/* create invalidation request */
init_completion(&result.completion);
- skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- safexcel_inv_complete, &result);
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ safexcel_inv_complete, &result);
- skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm));
- ctx = crypto_tfm_ctx(req.base.tfm);
+ skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm));
+ ctx = crypto_tfm_ctx(req->base.tfm);
ctx->base.exit_inv = true;
- ctx->base.send = safexcel_cipher_send_inv;
+ sreq->needs_inv = true;
spin_lock_bh(&priv->ring[ring].queue_lock);
- crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+ crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
spin_unlock_bh(&priv->ring[ring].queue_lock);
if (!priv->ring[ring].need_dequeue)
enum safexcel_cipher_direction dir, u32 mode)
{
struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+ struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
struct safexcel_crypto_priv *priv = ctx->priv;
int ret, ring;
+ sreq->needs_inv = false;
ctx->direction = dir;
ctx->mode = mode;
if (ctx->base.ctxr) {
- if (ctx->base.needs_inv)
- ctx->base.send = safexcel_cipher_send_inv;
+ if (ctx->base.needs_inv) {
+ sreq->needs_inv = true;
+ ctx->base.needs_inv = false;
+ }
} else {
ctx->base.ring = safexcel_select_ring(priv);
- ctx->base.send = safexcel_aes_send;
-
ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
EIP197_GFP_FLAGS(req->base),
&ctx->base.ctxr_dma);
alg.skcipher.base);
ctx->priv = tmpl->priv;
+ ctx->base.send = safexcel_send;
+ ctx->base.handle_result = safexcel_handle_result;
+
+ crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
+ sizeof(struct safexcel_cipher_req));
return 0;
}
bool last_req;
bool finish;
bool hmac;
+ bool needs_inv;
u8 state_sz; /* expected sate size, only set once */
- u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
+ u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32));
u64 len;
u64 processed;
}
}
-static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
- struct crypto_async_request *async,
- bool *should_complete, int *ret)
+static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
+ struct crypto_async_request *async,
+ bool *should_complete, int *ret)
{
struct safexcel_result_desc *rdesc;
struct ahash_request *areq = ahash_request_cast(async);
struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
struct safexcel_ahash_req *sreq = ahash_request_ctx(areq);
- int cache_len, result_sz = sreq->state_sz;
+ int cache_len;
*ret = 0;
spin_unlock_bh(&priv->ring[ring].egress_lock);
if (sreq->finish)
- result_sz = crypto_ahash_digestsize(ahash);
- memcpy(sreq->state, areq->result, result_sz);
+ memcpy(areq->result, sreq->state,
+ crypto_ahash_digestsize(ahash));
dma_unmap_sg(priv->dev, areq->src,
sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE);
return 1;
}
-static int safexcel_ahash_send(struct crypto_async_request *async, int ring,
- struct safexcel_request *request, int *commands,
- int *results)
+static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
+ struct safexcel_request *request,
+ int *commands, int *results)
{
struct ahash_request *areq = ahash_request_cast(async);
struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
/* Add the token */
safexcel_hash_token(first_cdesc, len, req->state_sz);
- ctx->base.result_dma = dma_map_single(priv->dev, areq->result,
+ ctx->base.result_dma = dma_map_single(priv->dev, req->state,
req->state_sz, DMA_FROM_DEVICE);
if (dma_mapping_error(priv->dev, ctx->base.result_dma)) {
ret = -EINVAL;
req->processed += len;
request->req = &areq->base;
- ctx->base.handle_result = safexcel_handle_result;
*commands = n_cdesc;
*results = 1;
ring = safexcel_select_ring(priv);
ctx->base.ring = ring;
- ctx->base.needs_inv = false;
- ctx->base.send = safexcel_ahash_send;
spin_lock_bh(&priv->ring[ring].queue_lock);
enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
return 1;
}
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+ struct crypto_async_request *async,
+ bool *should_complete, int *ret)
+{
+ struct ahash_request *areq = ahash_request_cast(async);
+ struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+ int err;
+
+ if (req->needs_inv) {
+ req->needs_inv = false;
+ err = safexcel_handle_inv_result(priv, ring, async,
+ should_complete, ret);
+ } else {
+ err = safexcel_handle_req_result(priv, ring, async,
+ should_complete, ret);
+ }
+
+ return err;
+}
+
static int safexcel_ahash_send_inv(struct crypto_async_request *async,
int ring, struct safexcel_request *request,
int *commands, int *results)
struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
int ret;
- ctx->base.handle_result = safexcel_handle_inv_result;
ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv,
ctx->base.ctxr_dma, ring, request);
if (unlikely(ret))
return 0;
}
+static int safexcel_ahash_send(struct crypto_async_request *async,
+ int ring, struct safexcel_request *request,
+ int *commands, int *results)
+{
+ struct ahash_request *areq = ahash_request_cast(async);
+ struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+ int ret;
+
+ if (req->needs_inv)
+ ret = safexcel_ahash_send_inv(async, ring, request,
+ commands, results);
+ else
+ ret = safexcel_ahash_send_req(async, ring, request,
+ commands, results);
+ return ret;
+}
+
static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
{
struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
struct safexcel_crypto_priv *priv = ctx->priv;
- struct ahash_request req;
+ AHASH_REQUEST_ON_STACK(req, __crypto_ahash_cast(tfm));
+ struct safexcel_ahash_req *rctx = ahash_request_ctx(req);
struct safexcel_inv_result result = {};
int ring = ctx->base.ring;
- memset(&req, 0, sizeof(struct ahash_request));
+ memset(req, 0, sizeof(struct ahash_request));
/* create invalidation request */
init_completion(&result.completion);
- ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
safexcel_inv_complete, &result);
- ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm));
- ctx = crypto_tfm_ctx(req.base.tfm);
+ ahash_request_set_tfm(req, __crypto_ahash_cast(tfm));
+ ctx = crypto_tfm_ctx(req->base.tfm);
ctx->base.exit_inv = true;
- ctx->base.send = safexcel_ahash_send_inv;
+ rctx->needs_inv = true;
spin_lock_bh(&priv->ring[ring].queue_lock);
- crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+ crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
spin_unlock_bh(&priv->ring[ring].queue_lock);
if (!priv->ring[ring].need_dequeue)
struct safexcel_crypto_priv *priv = ctx->priv;
int ret, ring;
- ctx->base.send = safexcel_ahash_send;
+ req->needs_inv = false;
if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq);
if (ctx->base.ctxr) {
- if (ctx->base.needs_inv)
- ctx->base.send = safexcel_ahash_send_inv;
+ if (ctx->base.needs_inv) {
+ ctx->base.needs_inv = false;
+ req->needs_inv = true;
+ }
} else {
ctx->base.ring = safexcel_select_ring(priv);
ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
struct safexcel_alg_template, alg.ahash);
ctx->priv = tmpl->priv;
+ ctx->base.send = safexcel_ahash_send;
+ ctx->base.handle_result = safexcel_handle_result;
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct safexcel_ahash_req));
CWQ_ENTRY_SIZE, 0, NULL);
if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+ queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
return -ENOMEM;
}
return 0;
{
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
+ queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
+ queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL;
}
static long spu_queue_register_workfn(void *arg)
* Power management
*/
-#ifdef CONFIG_PM_SLEEP
-static int rcar_dmac_sleep_suspend(struct device *dev)
-{
- /*
- * TODO: Wait for the current transfer to complete and stop the device.
- */
- return 0;
-}
-
-static int rcar_dmac_sleep_resume(struct device *dev)
-{
- /* TODO: Resume transfers, if any. */
- return 0;
-}
-#endif
-
#ifdef CONFIG_PM
static int rcar_dmac_runtime_suspend(struct device *dev)
{
#endif
static const struct dev_pm_ops rcar_dmac_pm = {
- SET_SYSTEM_SLEEP_PM_OPS(rcar_dmac_sleep_suspend, rcar_dmac_sleep_resume)
+ /*
+ * TODO for system sleep/resume:
+ * - Wait for the current transfer to complete and stop the device,
+ * - Resume transfers, if any.
+ */
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
SET_RUNTIME_PM_OPS(rcar_dmac_runtime_suspend, rcar_dmac_runtime_resume,
NULL)
};
#define NO_FURTHER_WRITE_ACTION -1
-#ifndef phys_to_page
-#define phys_to_page(x) pfn_to_page((x) >> PAGE_SHIFT)
-#endif
-
/**
* efi_free_all_buff_pages - free all previous allocated buffer pages
* @cap_info: pointer to current instance of capsule_info structure
static void efi_free_all_buff_pages(struct capsule_info *cap_info)
{
while (cap_info->index > 0)
- __free_page(phys_to_page(cap_info->pages[--cap_info->index]));
+ __free_page(cap_info->pages[--cap_info->index]);
cap_info->index = NO_FURTHER_WRITE_ACTION;
}
cap_info->pages = temp_page;
+ temp_page = krealloc(cap_info->phys,
+ pages_needed * sizeof(phys_addr_t *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!temp_page)
+ return -ENOMEM;
+
+ cap_info->phys = temp_page;
+
return 0;
}
**/
static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
{
+ bool do_vunmap = false;
int ret;
- ret = efi_capsule_update(&cap_info->header, cap_info->pages);
+ /*
+ * cap_info->capsule may have been assigned already by a quirk
+ * handler, so only overwrite it if it is NULL
+ */
+ if (!cap_info->capsule) {
+ cap_info->capsule = vmap(cap_info->pages, cap_info->index,
+ VM_MAP, PAGE_KERNEL);
+ if (!cap_info->capsule)
+ return -ENOMEM;
+ do_vunmap = true;
+ }
+
+ ret = efi_capsule_update(cap_info->capsule, cap_info->phys);
+ if (do_vunmap)
+ vunmap(cap_info->capsule);
if (ret) {
pr_err("capsule update failed\n");
return ret;
goto failed;
}
- cap_info->pages[cap_info->index++] = page_to_phys(page);
+ cap_info->pages[cap_info->index] = page;
+ cap_info->phys[cap_info->index] = page_to_phys(page);
cap_info->page_bytes_remain = PAGE_SIZE;
+ cap_info->index++;
} else {
- page = phys_to_page(cap_info->pages[cap_info->index - 1]);
+ page = cap_info->pages[cap_info->index - 1];
}
kbuff = kmap(page);
struct capsule_info *cap_info = file->private_data;
kfree(cap_info->pages);
+ kfree(cap_info->phys);
kfree(file->private_data);
file->private_data = NULL;
return 0;
return -ENOMEM;
}
+ cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
+ if (!cap_info->phys) {
+ kfree(cap_info->pages);
+ kfree(cap_info);
+ return -ENOMEM;
+ }
+
file->private_data = cap_info;
return 0;
return 0;
}
-static int find_clusters(const struct cpumask *cpus,
- const struct cpumask **clusters)
+static int find_cpu_groups(const struct cpumask *cpus,
+ const struct cpumask **cpu_groups)
{
unsigned int nb = 0;
cpumask_var_t tmp;
cpumask_copy(tmp, cpus);
while (!cpumask_empty(tmp)) {
- const struct cpumask *cluster =
+ const struct cpumask *cpu_group =
topology_core_cpumask(cpumask_any(tmp));
- clusters[nb++] = cluster;
- cpumask_andnot(tmp, tmp, cluster);
+ cpu_groups[nb++] = cpu_group;
+ cpumask_andnot(tmp, tmp, cpu_group);
}
free_cpumask_var(tmp);
{
int err;
cpumask_var_t offlined_cpus;
- int i, nb_cluster;
- const struct cpumask **clusters;
+ int i, nb_cpu_group;
+ const struct cpumask **cpu_groups;
char *page_buf;
err = -ENOMEM;
if (!alloc_cpumask_var(&offlined_cpus, GFP_KERNEL))
return err;
- /* We may have up to nb_available_cpus clusters. */
- clusters = kmalloc_array(nb_available_cpus, sizeof(*clusters),
- GFP_KERNEL);
- if (!clusters)
+ /* We may have up to nb_available_cpus cpu_groups. */
+ cpu_groups = kmalloc_array(nb_available_cpus, sizeof(*cpu_groups),
+ GFP_KERNEL);
+ if (!cpu_groups)
goto out_free_cpus;
page_buf = (char *)__get_free_page(GFP_KERNEL);
if (!page_buf)
- goto out_free_clusters;
+ goto out_free_cpu_groups;
err = 0;
- nb_cluster = find_clusters(cpu_online_mask, clusters);
+ nb_cpu_group = find_cpu_groups(cpu_online_mask, cpu_groups);
/*
* Of course the last CPU cannot be powered down and cpu_down() should
err += down_and_up_cpus(cpu_online_mask, offlined_cpus);
/*
- * Take down CPUs by cluster this time. When the last CPU is turned
- * off, the cluster itself should shut down.
+ * Take down CPUs by cpu group this time. When the last CPU is turned
+ * off, the cpu group itself should shut down.
*/
- for (i = 0; i < nb_cluster; ++i) {
- int cluster_id =
- topology_physical_package_id(cpumask_any(clusters[i]));
+ for (i = 0; i < nb_cpu_group; ++i) {
ssize_t len = cpumap_print_to_pagebuf(true, page_buf,
- clusters[i]);
+ cpu_groups[i]);
/* Remove trailing newline. */
page_buf[len - 1] = '\0';
- pr_info("Trying to turn off and on again cluster %d "
- "(CPUs %s)\n", cluster_id, page_buf);
- err += down_and_up_cpus(clusters[i], offlined_cpus);
+ pr_info("Trying to turn off and on again group %d (CPUs %s)\n",
+ i, page_buf);
+ err += down_and_up_cpus(cpu_groups[i], offlined_cpus);
}
free_page((unsigned long)page_buf);
-out_free_clusters:
- kfree(clusters);
+out_free_cpu_groups:
+ kfree(cpu_groups);
out_free_cpus:
free_cpumask_var(offlined_cpus);
return err;
}
EXPORT_SYMBOL_GPL(gpiod_set_raw_value);
+/**
+ * gpiod_set_value_nocheck() - set a GPIO line value without checking
+ * @desc: the descriptor to set the value on
+ * @value: value to set
+ *
+ * This sets the value of a GPIO line backing a descriptor, applying
+ * different semantic quirks like active low and open drain/source
+ * handling.
+ */
+static void gpiod_set_value_nocheck(struct gpio_desc *desc, int value)
+{
+ if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+ value = !value;
+ if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
+ gpio_set_open_drain_value_commit(desc, value);
+ else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
+ gpio_set_open_source_value_commit(desc, value);
+ else
+ gpiod_set_raw_value_commit(desc, value);
+}
+
/**
* gpiod_set_value() - assign a gpio's value
* @desc: gpio whose value will be assigned
void gpiod_set_value(struct gpio_desc *desc, int value)
{
VALIDATE_DESC_VOID(desc);
- /* Should be using gpiod_set_value_cansleep() */
WARN_ON(desc->gdev->chip->can_sleep);
- if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
- value = !value;
- if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
- gpio_set_open_drain_value_commit(desc, value);
- else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
- gpio_set_open_source_value_commit(desc, value);
- else
- gpiod_set_raw_value_commit(desc, value);
+ gpiod_set_value_nocheck(desc, value);
}
EXPORT_SYMBOL_GPL(gpiod_set_value);
{
might_sleep_if(extra_checks);
VALIDATE_DESC_VOID(desc);
- if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
- value = !value;
- gpiod_set_raw_value_commit(desc, value);
+ gpiod_set_value_nocheck(desc, value);
}
EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
void dpp1_cm_set_output_csc_default(
struct dpp *dpp_base,
- const struct default_adjustment *default_adjust);
+ enum dc_color_space colorspace);
void dpp1_cm_set_gamut_remap(
struct dpp *dpp,
void dpp1_cm_set_output_csc_default(
struct dpp *dpp_base,
- const struct default_adjustment *default_adjust)
+ enum dc_color_space colorspace)
{
struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
uint32_t ocsc_mode = 0;
- if (default_adjust != NULL) {
- switch (default_adjust->out_color_space) {
+ switch (colorspace) {
case COLOR_SPACE_SRGB:
case COLOR_SPACE_2020_RGB_FULLRANGE:
ocsc_mode = 0;
case COLOR_SPACE_UNKNOWN:
default:
break;
- }
}
REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
tbl_entry.color_space = color_space;
//tbl_entry.regval = matrix;
pipe_ctx->plane_res.dpp->funcs->opp_set_csc_adjustment(pipe_ctx->plane_res.dpp, &tbl_entry);
+ } else {
+ pipe_ctx->plane_res.dpp->funcs->opp_set_csc_default(pipe_ctx->plane_res.dpp, colorspace);
}
}
static bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
void (*opp_set_csc_default)(
struct dpp *dpp,
- const struct default_adjustment *default_adjust);
+ enum dc_color_space colorspace);
void (*opp_set_csc_adjustment)(
struct dpp *dpp,
void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb,
int x, int y)
{
+ const struct drm_format_info *format = fb->format;
+ unsigned int num_planes = format->num_planes;
u32 addr = drm_fb_obj(fb)->dev_addr;
- int num_planes = fb->format->num_planes;
int i;
if (num_planes > 3)
num_planes = 3;
- for (i = 0; i < num_planes; i++)
+ addrs[0] = addr + fb->offsets[0] + y * fb->pitches[0] +
+ x * format->cpp[0];
+
+ y /= format->vsub;
+ x /= format->hsub;
+
+ for (i = 1; i < num_planes; i++)
addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] +
- x * fb->format->cpp[i];
+ x * format->cpp[i];
for (; i < 3; i++)
addrs[i] = 0;
}
if (plane->fb)
drm_framebuffer_put(plane->fb);
- /* Power down the Y/U/V FIFOs */
- sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66;
-
/* Power down most RAMs and FIFOs if this is the primary plane */
if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
- sram_para1 |= CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 |
- CFG_PDWN32x32 | CFG_PDWN64x66;
+ sram_para1 = CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 |
+ CFG_PDWN32x32 | CFG_PDWN64x66;
dma_ctrl0_mask = CFG_GRA_ENA;
} else {
+ /* Power down the Y/U/V FIFOs */
+ sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66;
dma_ctrl0_mask = CFG_DMA_ENA;
}
ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc",
dcrtc);
- if (ret < 0) {
- kfree(dcrtc);
- return ret;
- }
+ if (ret < 0)
+ goto err_crtc;
if (dcrtc->variant->init) {
ret = dcrtc->variant->init(dcrtc, dev);
- if (ret) {
- kfree(dcrtc);
- return ret;
- }
+ if (ret)
+ goto err_crtc;
}
/* Ensure AXI pipeline is enabled */
dcrtc->crtc.port = port;
primary = kzalloc(sizeof(*primary), GFP_KERNEL);
- if (!primary)
- return -ENOMEM;
+ if (!primary) {
+ ret = -ENOMEM;
+ goto err_crtc;
+ }
ret = armada_drm_plane_init(primary);
if (ret) {
kfree(primary);
- return ret;
+ goto err_crtc;
}
ret = drm_universal_plane_init(drm, &primary->base, 0,
DRM_PLANE_TYPE_PRIMARY, NULL);
if (ret) {
kfree(primary);
- return ret;
+ goto err_crtc;
}
ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL,
err_crtc_init:
primary->base.funcs->destroy(&primary->base);
+err_crtc:
+ kfree(dcrtc);
+
return ret;
}
};
struct armada_plane_state {
+ u16 src_x;
+ u16 src_y;
u32 src_hw;
u32 dst_hw;
u32 dst_yx;
{
struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane);
struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
+ const struct drm_format_info *format;
struct drm_rect src = {
.x1 = src_x,
.y1 = src_y,
};
uint32_t val, ctrl0;
unsigned idx = 0;
- bool visible;
+ bool visible, fb_changed;
int ret;
trace_armada_ovl_plane_update(plane, crtc, fb,
if (!visible)
ctrl0 &= ~CFG_DMA_ENA;
+ /*
+ * Shifting a YUV packed format image by one pixel causes the U/V
+ * planes to swap. Compensate for it by also toggling the UV swap.
+ */
+ format = fb->format;
+ if (format->num_planes == 1 && src.x1 >> 16 & (format->hsub - 1))
+ ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV);
+
+ fb_changed = plane->fb != fb ||
+ dplane->base.state.src_x != src.x1 >> 16 ||
+ dplane->base.state.src_y != src.y1 >> 16;
+
if (!dcrtc->plane) {
dcrtc->plane = plane;
armada_ovl_update_attr(&dplane->prop, dcrtc);
/* FIXME: overlay on an interlaced display */
/* Just updating the position/size? */
- if (plane->fb == fb && dplane->base.state.ctrl0 == ctrl0) {
+ if (!fb_changed && dplane->base.state.ctrl0 == ctrl0) {
val = (drm_rect_height(&src) & 0xffff0000) |
drm_rect_width(&src) >> 16;
dplane->base.state.src_hw = val;
if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0)
armada_drm_plane_work_cancel(dcrtc, &dplane->base);
- if (plane->fb != fb) {
- u32 addrs[3], pixel_format;
- int num_planes, hsub;
+ if (fb_changed) {
+ u32 addrs[3];
/*
* Take a reference on the new framebuffer - we want to
if (plane->fb)
armada_ovl_retire_fb(dplane, plane->fb);
- src_y = src.y1 >> 16;
- src_x = src.x1 >> 16;
+ dplane->base.state.src_y = src_y = src.y1 >> 16;
+ dplane->base.state.src_x = src_x = src.x1 >> 16;
armada_drm_plane_calc_addrs(addrs, fb, src_x, src_y);
- pixel_format = fb->format->format;
- hsub = drm_format_horz_chroma_subsampling(pixel_format);
- num_planes = fb->format->num_planes;
-
- /*
- * Annoyingly, shifting a YUYV-format image by one pixel
- * causes the U/V planes to toggle. Toggle the UV swap.
- * (Unfortunately, this causes momentary colour flickering.)
- */
- if (src_x & (hsub - 1) && num_planes == 1)
- ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV);
-
armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0],
LCD_SPU_DMA_START_ADDR_Y0);
armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1],
}
static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt,
- unsigned int opcode, int rings)
+ unsigned int opcode, unsigned long rings)
{
struct cmd_info *info = NULL;
unsigned int ring;
- for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) {
+ for_each_set_bit(ring, &rings, I915_NUM_ENGINES) {
info = find_cmd_entry(gvt, opcode, ring);
if (info)
break;
return ret;
} else {
if (!test_bit(index, spt->post_shadow_bitmap)) {
+ int type = spt->shadow_page.type;
+
ppgtt_get_shadow_entry(spt, &se, index);
ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
if (ret)
return ret;
+ ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
+ ppgtt_set_shadow_entry(spt, &se, index);
}
-
ppgtt_set_post_shadow(spt, index);
}
*/
struct workqueue_struct *wq;
+ /* ordered wq for modesets */
+ struct workqueue_struct *modeset_wq;
+
/* Display functions */
struct drm_i915_display_funcs display;
struct drm_i915_gem_request *rq;
struct intel_engine_cs *engine;
- if (!dma_fence_is_i915(fence))
+ if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
return;
rq = to_request(fence);
#define RESET_PCH_HANDSHAKE_ENABLE (1<<4)
#define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430)
+#define SKL_SELECT_ALTERNATE_DC_EXIT (1<<30)
#define MASK_WAKEMEM (1<<13)
#define SKL_DFSM _MMIO(0x51000)
#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308)
#define DISABLE_PIXEL_MASK_CAMMING (1<<14)
+#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
+
#define GEN7_L3SQCREG1 _MMIO(0xB010)
#define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000
#define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22)
#define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22)
#define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20)
+#define CDCLK_DIVMUX_CD_OVERRIDE (1<<19)
#define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3)
#define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16)
#define CDCLK_FREQ_DECIMAL_MASK (0x7ff)
static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco)
{
- int min_cdclk = skl_calc_cdclk(0, vco);
u32 val;
WARN_ON(vco != 8100000 && vco != 8640000);
- /* select the minimum CDCLK before enabling DPLL 0 */
- val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk);
- I915_WRITE(CDCLK_CTL, val);
- POSTING_READ(CDCLK_CTL);
-
/*
* We always enable DPLL0 with the lowest link rate possible, but still
* taking into account the VCO required to operate the eDP panel at the
{
int cdclk = cdclk_state->cdclk;
int vco = cdclk_state->vco;
- u32 freq_select, pcu_ack;
+ u32 freq_select, pcu_ack, cdclk_ctl;
int ret;
WARN_ON((cdclk == 24000) != (vco == 0));
return;
}
- /* set CDCLK_CTL */
+ /* Choose frequency for this cdclk */
switch (cdclk) {
case 450000:
case 432000:
dev_priv->cdclk.hw.vco != vco)
skl_dpll0_disable(dev_priv);
+ cdclk_ctl = I915_READ(CDCLK_CTL);
+
+ if (dev_priv->cdclk.hw.vco != vco) {
+ /* Wa Display #1183: skl,kbl,cfl */
+ cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK);
+ cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk);
+ I915_WRITE(CDCLK_CTL, cdclk_ctl);
+ }
+
+ /* Wa Display #1183: skl,kbl,cfl */
+ cdclk_ctl |= CDCLK_DIVMUX_CD_OVERRIDE;
+ I915_WRITE(CDCLK_CTL, cdclk_ctl);
+ POSTING_READ(CDCLK_CTL);
+
if (dev_priv->cdclk.hw.vco != vco)
skl_dpll0_enable(dev_priv, vco);
- I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk));
+ /* Wa Display #1183: skl,kbl,cfl */
+ cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK);
+ I915_WRITE(CDCLK_CTL, cdclk_ctl);
+
+ cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk);
+ I915_WRITE(CDCLK_CTL, cdclk_ctl);
+
+ /* Wa Display #1183: skl,kbl,cfl */
+ cdclk_ctl &= ~CDCLK_DIVMUX_CD_OVERRIDE;
+ I915_WRITE(CDCLK_CTL, cdclk_ctl);
POSTING_READ(CDCLK_CTL);
/* inform PCU of the change */
INIT_WORK(&state->commit_work, intel_atomic_commit_work);
i915_sw_fence_commit(&intel_state->commit_ready);
- if (nonblock)
+ if (nonblock && intel_state->modeset) {
+ queue_work(dev_priv->modeset_wq, &state->commit_work);
+ } else if (nonblock) {
queue_work(system_unbound_wq, &state->commit_work);
- else
+ } else {
+ if (intel_state->modeset)
+ flush_workqueue(dev_priv->modeset_wq);
intel_atomic_commit_tail(state);
-
+ }
return 0;
}
enum pipe pipe;
struct intel_crtc *crtc;
+ dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0);
+
drm_mode_config_init(dev);
dev->mode_config.min_width = 0;
intel_cleanup_gt_powersave(dev_priv);
intel_teardown_gmbus(dev_priv);
+
+ destroy_workqueue(dev_priv->modeset_wq);
}
void intel_connector_attach_encoder(struct intel_connector *connector,
if (ret)
return ret;
+ /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
+ ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
+ if (ret)
+ return ret;
+
/* WaToEnableHwFixForPushConstHWBug:glk */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
+ if (i915_gem_request_completed(request))
+ return;
+
if (prio <= READ_ONCE(request->priotree.priority))
return;
struct drm_i915_private *dev_priv = to_i915(dev);
if (dev_priv->psr.active) {
- i915_reg_t psr_ctl;
+ i915_reg_t psr_status;
u32 psr_status_mask;
if (dev_priv->psr.aux_frame_sync)
0);
if (dev_priv->psr.psr2_support) {
- psr_ctl = EDP_PSR2_CTL;
+ psr_status = EDP_PSR2_STATUS_CTL;
psr_status_mask = EDP_PSR2_STATUS_STATE_MASK;
- I915_WRITE(psr_ctl,
- I915_READ(psr_ctl) &
+ I915_WRITE(EDP_PSR2_CTL,
+ I915_READ(EDP_PSR2_CTL) &
~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE));
} else {
- psr_ctl = EDP_PSR_STATUS_CTL;
+ psr_status = EDP_PSR_STATUS_CTL;
psr_status_mask = EDP_PSR_STATUS_STATE_MASK;
- I915_WRITE(psr_ctl,
- I915_READ(psr_ctl) & ~EDP_PSR_ENABLE);
+ I915_WRITE(EDP_PSR_CTL,
+ I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE);
}
/* Wait till PSR is idle */
if (intel_wait_for_register(dev_priv,
- psr_ctl, psr_status_mask, 0,
+ psr_status, psr_status_mask, 0,
2000))
DRM_ERROR("Timed out waiting for PSR Idle State\n");
DRM_DEBUG_KMS("Enabling DC5\n");
+ /* Wa Display #1183: skl,kbl,cfl */
+ if (IS_GEN9_BC(dev_priv))
+ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
+ SKL_SELECT_ALTERNATE_DC_EXIT);
+
gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
}
{
DRM_DEBUG_KMS("Disabling DC6\n");
+ /* Wa Display #1183: skl,kbl,cfl */
+ if (IS_GEN9_BC(dev_priv))
+ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
+ SKL_SELECT_ALTERNATE_DC_EXIT);
+
gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
}
GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \
BIT_ULL(POWER_DOMAIN_MODESET) | \
BIT_ULL(POWER_DOMAIN_AUX_A) | \
+ BIT_ULL(POWER_DOMAIN_GMBUS) | \
BIT_ULL(POWER_DOMAIN_INIT))
#define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \
.links = gf119_sor_dp_links,
.power = g94_sor_dp_power,
.pattern = gf119_sor_dp_pattern,
+ .drive = gf119_sor_dp_drive,
.vcpi = gf119_sor_dp_vcpi,
.audio = gf119_sor_dp_audio,
.audio_sym = gf119_sor_dp_audio_sym,
/* then read the message */
msg.len = cnt & 0xf;
+ if (msg.len > CEC_MAX_MSG_SIZE - 2)
+ msg.len = CEC_MAX_MSG_SIZE - 2;
msg.msg[0] = hdmi_read_reg(core->base,
HDMI_CEC_RX_CMD_HEADER);
msg.msg[1] = hdmi_read_reg(core->base,
}
}
-static void hdmi_cec_transmit_fifo_empty(struct hdmi_core_data *core, u32 stat1)
-{
- if (stat1 & 2) {
- u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3);
-
- cec_transmit_done(core->adap,
- CEC_TX_STATUS_NACK |
- CEC_TX_STATUS_MAX_RETRIES,
- 0, (dbg3 >> 4) & 7, 0, 0);
- } else if (stat1 & 1) {
- cec_transmit_done(core->adap,
- CEC_TX_STATUS_ARB_LOST |
- CEC_TX_STATUS_MAX_RETRIES,
- 0, 0, 0, 0);
- } else if (stat1 == 0) {
- cec_transmit_done(core->adap, CEC_TX_STATUS_OK,
- 0, 0, 0, 0);
- }
-}
-
void hdmi4_cec_irq(struct hdmi_core_data *core)
{
u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0);
hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_0, stat0);
hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, stat1);
- if (stat0 & 0x40)
+ if (stat0 & 0x20) {
+ cec_transmit_done(core->adap, CEC_TX_STATUS_OK,
+ 0, 0, 0, 0);
REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
- else if (stat0 & 0x24)
- hdmi_cec_transmit_fifo_empty(core, stat1);
- if (stat1 & 2) {
+ } else if (stat1 & 0x02) {
u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3);
cec_transmit_done(core->adap,
CEC_TX_STATUS_NACK |
CEC_TX_STATUS_MAX_RETRIES,
0, (dbg3 >> 4) & 7, 0, 0);
- } else if (stat1 & 1) {
- cec_transmit_done(core->adap,
- CEC_TX_STATUS_ARB_LOST |
- CEC_TX_STATUS_MAX_RETRIES,
- 0, 0, 0, 0);
+ REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
}
if (stat0 & 0x02)
hdmi_cec_received_msg(core);
- if (stat1 & 0x3)
- REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
}
static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap)
/*
* Enable CEC interrupts:
* Transmit Buffer Full/Empty Change event
- * Transmitter FIFO Empty event
* Receiver FIFO Not Empty event
*/
- hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x26);
+ hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x22);
/*
* Enable CEC interrupts:
- * RX FIFO Overrun Error event
- * Short Pulse Detected event
* Frame Retransmit Count Exceeded event
- * Start Bit Irregularity event
*/
- hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x0f);
+ hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x02);
/* cec calibration enable (self clearing) */
hdmi_write_reg(core->base, HDMI_CEC_SETUP, 0x03);
name, err);
goto remove;
}
+ } else {
+ /* fall back to the module clock on SOR0 (eDP/LVDS only) */
+ sor->clk_out = sor->clk;
}
sor->clk_parent = devm_clk_get(&pdev->dev, "parent");
pr_info("Initializing pool allocator\n");
_manager = kzalloc(sizeof(*_manager), GFP_KERNEL);
+ if (!_manager)
+ return -ENOMEM;
ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc", 0);
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
- /* Undo the effects of a previous vc4_irq_uninstall. */
- enable_irq(dev->irq);
-
/* Enable both the render done and out of memory interrupts. */
V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
return ret;
vc4_v3d_init_hw(vc4->dev);
+
+ /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */
+ enable_irq(vc4->dev->irq);
vc4_irq_postinstall(vc4->dev);
return 0;
}
view_type = vmw_view_cmd_to_type(header->id);
+ if (view_type == vmw_view_max)
+ return -EINVAL;
cmd = container_of(header, typeof(*cmd), header);
ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
user_surface_converter,
vps->pinned = 0;
/* Mapping is managed by prepare_fb/cleanup_fb */
- memset(&vps->guest_map, 0, sizeof(vps->guest_map));
memset(&vps->host_map, 0, sizeof(vps->host_map));
vps->cpp = 0;
/* Should have been freed by cleanup_fb */
- if (vps->guest_map.virtual) {
- DRM_ERROR("Guest mapping not freed\n");
- ttm_bo_kunmap(&vps->guest_map);
- }
-
if (vps->host_map.virtual) {
DRM_ERROR("Host mapping not freed\n");
ttm_bo_kunmap(&vps->host_map);
int pinned;
/* For CPU Blit */
- struct ttm_bo_kmap_obj host_map, guest_map;
+ struct ttm_bo_kmap_obj host_map;
unsigned int cpp;
};
bool defined;
/* For CPU Blit */
- struct ttm_bo_kmap_obj host_map, guest_map;
+ struct ttm_bo_kmap_obj host_map;
unsigned int cpp;
};
s32 src_pitch, dst_pitch;
u8 *src, *dst;
bool not_used;
-
+ struct ttm_bo_kmap_obj guest_map;
+ int ret;
if (!dirty->num_hits)
return;
if (width == 0 || height == 0)
return;
+ ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
+ &guest_map);
+ if (ret) {
+ DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
+ ret);
+ goto out_cleanup;
+ }
/* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
dst_pitch = ddirty->pitch;
- dst = ttm_kmap_obj_virtual(&stdu->guest_map, ¬_used);
+ dst = ttm_kmap_obj_virtual(&guest_map, ¬_used);
dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
vmw_fifo_commit(dev_priv, sizeof(*cmd));
}
+ ttm_bo_kunmap(&guest_map);
out_cleanup:
ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
ddirty->right = ddirty->bottom = S32_MIN;
{
struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
- if (vps->guest_map.virtual)
- ttm_bo_kunmap(&vps->guest_map);
-
if (vps->host_map.virtual)
ttm_bo_kunmap(&vps->host_map);
*/
if (vps->content_fb_type == SEPARATE_DMA &&
!(dev_priv->capabilities & SVGA_CAP_3D)) {
-
- struct vmw_framebuffer_dmabuf *new_vfbd;
-
- new_vfbd = vmw_framebuffer_to_vfbd(new_fb);
-
- ret = ttm_bo_reserve(&new_vfbd->buffer->base, false, false,
- NULL);
- if (ret)
- goto out_srf_unpin;
-
- ret = ttm_bo_kmap(&new_vfbd->buffer->base, 0,
- new_vfbd->buffer->base.num_pages,
- &vps->guest_map);
-
- ttm_bo_unreserve(&new_vfbd->buffer->base);
-
- if (ret) {
- DRM_ERROR("Failed to map content buffer to CPU\n");
- goto out_srf_unpin;
- }
-
ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
vps->surf->res.backup->base.num_pages,
&vps->host_map);
if (ret) {
DRM_ERROR("Failed to map display buffer to CPU\n");
- ttm_bo_kunmap(&vps->guest_map);
goto out_srf_unpin;
}
stdu->display_srf = vps->surf;
stdu->content_fb_type = vps->content_fb_type;
stdu->cpp = vps->cpp;
- memcpy(&stdu->guest_map, &vps->guest_map, sizeof(vps->guest_map));
memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
if (!stdu->defined)
int (*acquire_lock)(struct dw_i2c_dev *dev);
void (*release_lock)(struct dw_i2c_dev *dev);
bool pm_disabled;
- bool suspended;
- bool skip_resume;
void (*disable)(struct dw_i2c_dev *dev);
void (*disable_int)(struct dw_i2c_dev *dev);
int (*init)(struct dw_i2c_dev *dev);
#include <linux/reset.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/suspend.h>
#include "i2c-designware-core.h"
ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev));
adap->dev.of_node = pdev->dev.of_node;
+ dev_pm_set_driver_flags(&pdev->dev,
+ DPM_FLAG_SMART_PREPARE |
+ DPM_FLAG_SMART_SUSPEND |
+ DPM_FLAG_LEAVE_SUSPENDED);
+
/* The code below assumes runtime PM to be disabled. */
WARN_ON(pm_runtime_enabled(&pdev->dev));
#ifdef CONFIG_PM_SLEEP
static int dw_i2c_plat_prepare(struct device *dev)
{
- return pm_runtime_suspended(dev);
+ /*
+ * If the ACPI companion device object is present for this device, it
+ * may be accessed during suspend and resume of other devices via I2C
+ * operation regions, so tell the PM core and middle layers to avoid
+ * skipping system suspend/resume callbacks for it in that case.
+ */
+ return !has_acpi_companion(dev);
}
static void dw_i2c_plat_complete(struct device *dev)
{
- if (dev->power.direct_complete)
+ /*
+ * The device can only be in runtime suspend at this point if it has not
+ * been resumed throughout the ending system suspend/resume cycle, so if
+ * the platform firmware might mess up with it, request the runtime PM
+ * framework to resume it.
+ */
+ if (pm_runtime_suspended(dev) && pm_resume_via_firmware())
pm_request_resume(dev);
}
#else
{
struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
- if (i_dev->suspended) {
- i_dev->skip_resume = true;
- return 0;
- }
-
i_dev->disable(i_dev);
i2c_dw_plat_prepare_clk(i_dev, false);
- i_dev->suspended = true;
-
return 0;
}
{
struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
- if (!i_dev->suspended)
- return 0;
-
- if (i_dev->skip_resume) {
- i_dev->skip_resume = false;
- return 0;
- }
-
i2c_dw_plat_prepare_clk(i_dev, true);
i_dev->init(i_dev);
- i_dev->suspended = false;
-
return 0;
}
}
#endif
-struct ib_device *__ib_device_get_by_index(u32 ifindex);
+struct ib_device *ib_device_get_by_index(u32 ifindex);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
return 0;
}
-struct ib_device *__ib_device_get_by_index(u32 index)
+static struct ib_device *__ib_device_get_by_index(u32 index)
{
struct ib_device *device;
return NULL;
}
+/*
+ * Caller is responsible to return refrerence count by calling put_device()
+ */
+struct ib_device *ib_device_get_by_index(u32 index)
+{
+ struct ib_device *device;
+
+ down_read(&lists_rwsem);
+ device = __ib_device_get_by_index(index);
+ if (device)
+ get_device(&device->dev);
+
+ up_read(&lists_rwsem);
+ return device;
+}
+
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
- device = __ib_device_get_by_index(index);
+ device = ib_device_get_by_index(index);
if (!device)
return -EINVAL;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
+ if (!msg) {
+ err = -ENOMEM;
+ goto err;
+ }
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
err = fill_dev_info(msg, device);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
+ if (err)
+ goto err_free;
nlmsg_end(msg, nlh);
+ put_device(&device->dev);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_free:
+ nlmsg_free(msg);
+err:
+ put_device(&device->dev);
+ return err;
}
static int _nldev_get_dumpit(struct ib_device *device,
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
- device = __ib_device_get_by_index(index);
+ device = ib_device_get_by_index(index);
if (!device)
return -EINVAL;
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
- if (!rdma_is_port_valid(device, port))
- return -EINVAL;
+ if (!rdma_is_port_valid(device, port)) {
+ err = -EINVAL;
+ goto err;
+ }
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
+ if (!msg) {
+ err = -ENOMEM;
+ goto err;
+ }
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
err = fill_port_info(msg, device, port);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
+ if (err)
+ goto err_free;
nlmsg_end(msg, nlh);
+ put_device(&device->dev);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_free:
+ nlmsg_free(msg);
+err:
+ put_device(&device->dev);
+ return err;
}
static int nldev_port_get_dumpit(struct sk_buff *skb,
return -EINVAL;
ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
- device = __ib_device_get_by_index(ifindex);
+ device = ib_device_get_by_index(ifindex);
if (!device)
return -EINVAL;
nlmsg_end(skb, nlh);
}
-out: cb->args[0] = idx;
+out:
+ put_device(&device->dev);
+ cb->args[0] = idx;
return skb->len;
}
goto err_free_mr;
mr->max_pages = max_num_sg;
-
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
goto err_free_pl;
return &mr->ibmr;
err_free_pl:
+ mr->ibmr.device = pd->device;
mlx4_free_priv_pages(mr);
err_free_mr:
(void) mlx4_mr_free(dev->dev, &mr->mmr);
return 0;
}
-static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
- struct net_device *dev)
+static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
+ struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rdma_netdev *rn = netdev_priv(dev);
spin_unlock_irqrestore(&priv->lock, flags);
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
- return;
+ return NULL;
+ }
+
+ /* To avoid race condition, make sure that the
+ * neigh will be added only once.
+ */
+ if (unlikely(!list_empty(&neigh->list))) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return neigh;
}
path = __path_find(dev, daddr + 4);
path->ah->last_send = rn->send(dev, skb, path->ah->ah,
IPOIB_QPN(daddr));
ipoib_neigh_put(neigh);
- return;
+ return NULL;
}
} else {
neigh->ah = NULL;
spin_unlock_irqrestore(&priv->lock, flags);
ipoib_neigh_put(neigh);
- return;
+ return NULL;
err_path:
ipoib_neigh_free(neigh);
spin_unlock_irqrestore(&priv->lock, flags);
ipoib_neigh_put(neigh);
+
+ return NULL;
}
static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
case htons(ETH_P_TIPC):
neigh = ipoib_neigh_get(dev, phdr->hwaddr);
if (unlikely(!neigh)) {
- neigh_add_path(skb, phdr->hwaddr, dev);
- return NETDEV_TX_OK;
+ neigh = neigh_add_path(skb, phdr->hwaddr, dev);
+ if (likely(!neigh))
+ return NETDEV_TX_OK;
}
break;
case htons(ETH_P_ARP):
spin_lock_irqsave(&priv->lock, flags);
if (!neigh) {
neigh = ipoib_neigh_alloc(daddr, dev);
- if (neigh) {
+ /* Make sure that the neigh will be added only
+ * once to mcast list.
+ */
+ if (neigh && list_empty(&neigh->list)) {
kref_get(&mcast->ah->ref);
neigh->ah = mcast->ah;
list_add_tail(&neigh->list, &mcast->neigh_list);
return -ENOMEM;
attr->qp_state = IB_QPS_INIT;
- attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
+ attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
attr->port_num = ch->sport->port;
attr->pkey_index = 0;
goto destroy_ib;
}
- guid = (__be16 *)¶m->primary_path->sgid.global.interface_id;
+ guid = (__be16 *)¶m->primary_path->dgid.global.interface_id;
snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
#define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0)
#define DELTA(x,y) ((y)-(x))
#define TIME_NAME "TSC"
-#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE)
+#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) || defined(CONFIG_TILE)
#define GET_TIME(x) do { x = get_cycles(); } while (0)
#define DELTA(x,y) ((y)-(x))
#define TIME_NAME "get_cycles"
return union_desc;
dev_err(&intf->dev,
- "Union descriptor to short (%d vs %zd\n)",
+ "Union descriptor too short (%d vs %zd)\n",
union_desc->bLength, sizeof(*union_desc));
return NULL;
}
0, width, 0, 0);
input_set_abs_params(mtouch, ABS_MT_POSITION_Y,
0, height, 0, 0);
- input_set_abs_params(mtouch, ABS_MT_PRESSURE,
- 0, 255, 0, 0);
ret = input_mt_init_slots(mtouch, num_cont, INPUT_MT_DIRECT);
if (ret) {
case 5:
etd->hw_version = 3;
break;
- case 6 ... 14:
+ case 6 ... 15:
etd->hw_version = 4;
break;
default:
#include <linux/module.h>
#include <linux/input.h>
#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <linux/platform_device.h>
#include <linux/async.h>
#include <linux/i2c.h>
}
/*
- * Systems using device tree should set up interrupt via DTS,
- * the rest will use the default falling edge interrupts.
+ * Platform code (ACPI, DTS) should normally set up interrupt
+ * for us, but in case it did not let's fall back to using falling
+ * edge to be compatible with older Chromebooks.
*/
- irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING;
+ irqflags = irq_get_trigger_type(client->irq);
+ if (!irqflags)
+ irqflags = IRQF_TRIGGER_FALLING;
error = devm_request_threaded_irq(&client->dev, client->irq,
NULL, elants_i2c_irq,
#include <linux/of.h>
#include <linux/firmware.h>
#include <linux/delay.h>
-#include <linux/gpio.h>
-#include <linux/gpio/machine.h>
+#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
#include <linux/acpi.h>
#include <linux/interrupt.h>
domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
domain->geometry.aperture_end = (1UL << ias) - 1;
domain->geometry.force_aperture = true;
- smmu_domain->pgtbl_ops = pgtbl_ops;
ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
- if (ret < 0)
+ if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
+ return ret;
+ }
- return ret;
+ smmu_domain->pgtbl_ops = pgtbl_ops;
+ return 0;
}
static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
{
- int i;
+ int i, j;
struct arm_smmu_master_data *master = fwspec->iommu_priv;
struct arm_smmu_device *smmu = master->smmu;
u32 sid = fwspec->ids[i];
__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
+ /* Bridged PCI devices may end up with duplicated IDs */
+ for (j = 0; j < i; j++)
+ if (fwspec->ids[j] == sid)
+ break;
+ if (j < i)
+ continue;
+
arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
}
}
unsigned long *delay_on,
unsigned long *delay_off)
{
- led_stop_software_blink(led_cdev);
+ del_timer_sync(&led_cdev->blink_timer);
+ clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);
if (ret)
goto err_remove_ltr;
+ dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND);
+
return 0;
err_remove_ltr:
static int resume_lpss_device(struct device *dev, void *data)
{
- pm_runtime_resume(dev);
+ if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND))
+ pm_runtime_resume(dev);
+
return 0;
}
rtsx_pci_power_off(pcr, HOST_ENTER_S1);
pci_disable_device(pcidev);
+ free_irq(pcr->irq, (void *)pcr);
+ if (pcr->msi_en)
+ pci_disable_msi(pcr->pci);
}
#else /* CONFIG_PM */
#include <linux/kernel.h>
#include <linux/clk.h>
#include <linux/slab.h>
+#include <linux/module.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/mmc/host.h>
return 0;
}
EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
+
+MODULE_LICENSE("GPL v2");
struct s3cmci_reg {
unsigned short addr;
unsigned char *name;
-} debug_regs[] = {
+};
+
+static const struct s3cmci_reg debug_regs[] = {
DBG_REG(CON),
DBG_REG(PRE),
DBG_REG(CMDARG),
static int s3cmci_regs_show(struct seq_file *seq, void *v)
{
struct s3cmci_host *host = seq->private;
- struct s3cmci_reg *rptr = debug_regs;
+ const struct s3cmci_reg *rptr = debug_regs;
for (; rptr->name; rptr++)
seq_printf(seq, "SDI%s\t=0x%08x\n", rptr->name,
switch (command) {
case NAND_CMD_READ0:
+ case NAND_CMD_READOOB:
case NAND_CMD_PAGEPROG:
info->use_ecc = 1;
break;
return dev->of_node == data;
}
+/* Note this function returns a reference to the mux_chip dev. */
static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
{
struct device *dev;
(!args.args_count && (mux_chip->controllers > 1))) {
dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n",
np, args.np);
+ put_device(&mux_chip->dev);
return ERR_PTR(-EINVAL);
}
if (controller >= mux_chip->controllers) {
dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n",
np, controller, args.np);
+ put_device(&mux_chip->dev);
return ERR_PTR(-EINVAL);
}
- get_device(&mux_chip->dev);
return &mux_chip->mux[controller];
}
EXPORT_SYMBOL_GPL(mux_control_get);
data = be32_to_cpup((__be32 *)&cf->data[0]);
flexcan_write(data, &priv->tx_mb->data[0]);
}
- if (cf->can_dlc > 3) {
+ if (cf->can_dlc > 4) {
data = be32_to_cpup((__be32 *)&cf->data[4]);
flexcan_write(data, &priv->tx_mb->data[1]);
}
if (dev->can.state == CAN_STATE_ERROR_WARNING ||
dev->can.state == CAN_STATE_ERROR_PASSIVE) {
+ cf->can_id |= CAN_ERR_CRTL;
cf->data[1] = (txerr > rxerr) ?
CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE;
}
dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
rc);
- return rc;
+ return (rc > 0) ? 0 : rc;
}
static void gs_usb_xmit_callback(struct urb *urb)
tbp = peer_tb;
}
- if (tbp[IFLA_IFNAME]) {
+ if (ifmp && tbp[IFLA_IFNAME]) {
nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
name_assign_type = NET_NAME_USER;
} else {
{
struct b53_device *dev = ds->priv;
- /* Older models support a different tag format that we do not
- * support in net/dsa/tag_brcm.c yet.
+ /* Older models (5325, 5365) support a different tag format that we do
+ * not support in net/dsa/tag_brcm.c yet. 539x and 531x5 require managed
+ * mode to be turned on which means we need to specifically manage ARL
+ * misses on multicast addresses (TBD).
*/
- if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port))
+ if (is5325(dev) || is5365(dev) || is539x(dev) || is531x5(dev) ||
+ !b53_can_enable_brcm_tags(ds, port))
return DSA_TAG_PROTO_NONE;
/* Broadcom BCM58xx chips have a flow accelerator on Port 8
struct sk_buff* rx_skbuff[RX_RING_SIZE];
struct sk_buff* tx_skbuff[TX_RING_SIZE];
unsigned int cur_rx, cur_tx; /* The next free ring entry */
- unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */
+ unsigned int dirty_tx; /* The ring entries to be free()ed. */
struct vortex_extra_stats xstats; /* NIC-specific extra stats */
struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */
dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */
/* The remainder are related to chip state, mostly media selection. */
struct timer_list timer; /* Media selection timer. */
- struct timer_list rx_oom_timer; /* Rx skb allocation retry timer */
int options; /* User-settable misc. driver options. */
unsigned int media_override:4, /* Passed-in media type. */
default_media:4, /* Read from the EEPROM/Wn3_Config. */
static int mdio_read(struct net_device *dev, int phy_id, int location);
static void mdio_write(struct net_device *vp, int phy_id, int location, int value);
static void vortex_timer(struct timer_list *t);
-static void rx_oom_timer(struct timer_list *t);
static netdev_tx_t vortex_start_xmit(struct sk_buff *skb,
struct net_device *dev);
static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb,
timer_setup(&vp->timer, vortex_timer, 0);
mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait));
- timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0);
if (vortex_debug > 1)
pr_debug("%s: Initial media type %s.\n",
window_write16(vp, 0x0040, 4, Wn4_NetDiag);
if (vp->full_bus_master_rx) { /* Boomerang bus master. */
- vp->cur_rx = vp->dirty_rx = 0;
+ vp->cur_rx = 0;
/* Initialize the RxEarly register as recommended. */
iowrite16(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD);
iowrite32(0x0020, ioaddr + PktStatus);
struct vortex_private *vp = netdev_priv(dev);
int i;
int retval;
+ dma_addr_t dma;
/* Use the now-standard shared IRQ implementation. */
if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ?
break; /* Bad news! */
skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */
- vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
+ dma = pci_map_single(VORTEX_PCI(vp), skb->data,
+ PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma))
+ break;
+ vp->rx_ring[i].addr = cpu_to_le32(dma);
}
if (i != RX_RING_SIZE) {
pr_emerg("%s: no memory for rx ring\n", dev->name);
int len = (skb->len + 3) & ~3;
vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len,
PCI_DMA_TODEVICE);
+ if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) {
+ dev_kfree_skb_any(skb);
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
+
spin_lock_irq(&vp->window_lock);
window_set(vp, 7);
iowrite32(vp->tx_skb_dma, ioaddr + Wn7_MasterAddr);
int entry = vp->cur_rx % RX_RING_SIZE;
void __iomem *ioaddr = vp->ioaddr;
int rx_status;
- int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx;
+ int rx_work_limit = RX_RING_SIZE;
if (vortex_debug > 5)
pr_debug("boomerang_rx(): status %4.4x\n", ioread16(ioaddr+EL3_STATUS));
} else {
/* The packet length: up to 4.5K!. */
int pkt_len = rx_status & 0x1fff;
- struct sk_buff *skb;
+ struct sk_buff *skb, *newskb;
+ dma_addr_t newdma;
dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr);
if (vortex_debug > 4)
pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
vp->rx_copy++;
} else {
+ /* Pre-allocate the replacement skb. If it or its
+ * mapping fails then recycle the buffer thats already
+ * in place
+ */
+ newskb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ);
+ if (!newskb) {
+ dev->stats.rx_dropped++;
+ goto clear_complete;
+ }
+ newdma = pci_map_single(VORTEX_PCI(vp), newskb->data,
+ PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) {
+ dev->stats.rx_dropped++;
+ consume_skb(newskb);
+ goto clear_complete;
+ }
+
/* Pass up the skbuff already on the Rx ring. */
skb = vp->rx_skbuff[entry];
- vp->rx_skbuff[entry] = NULL;
+ vp->rx_skbuff[entry] = newskb;
+ vp->rx_ring[entry].addr = cpu_to_le32(newdma);
skb_put(skb, pkt_len);
pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
vp->rx_nocopy++;
netif_rx(skb);
dev->stats.rx_packets++;
}
- entry = (++vp->cur_rx) % RX_RING_SIZE;
- }
- /* Refill the Rx ring buffers. */
- for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) {
- struct sk_buff *skb;
- entry = vp->dirty_rx % RX_RING_SIZE;
- if (vp->rx_skbuff[entry] == NULL) {
- skb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ);
- if (skb == NULL) {
- static unsigned long last_jif;
- if (time_after(jiffies, last_jif + 10 * HZ)) {
- pr_warn("%s: memory shortage\n",
- dev->name);
- last_jif = jiffies;
- }
- if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)
- mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1));
- break; /* Bad news! */
- }
- vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
- vp->rx_skbuff[entry] = skb;
- }
+clear_complete:
vp->rx_ring[entry].status = 0; /* Clear complete bit. */
iowrite16(UpUnstall, ioaddr + EL3_CMD);
+ entry = (++vp->cur_rx) % RX_RING_SIZE;
}
return 0;
}
-/*
- * If we've hit a total OOM refilling the Rx ring we poll once a second
- * for some memory. Otherwise there is no way to restart the rx process.
- */
-static void
-rx_oom_timer(struct timer_list *t)
-{
- struct vortex_private *vp = from_timer(vp, t, rx_oom_timer);
- struct net_device *dev = vp->mii.dev;
-
- spin_lock_irq(&vp->lock);
- if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */
- boomerang_rx(dev);
- if (vortex_debug > 1) {
- pr_debug("%s: rx_oom_timer %s\n", dev->name,
- ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying");
- }
- spin_unlock_irq(&vp->lock);
-}
-
static void
vortex_down(struct net_device *dev, int final_down)
{
netdev_reset_queue(dev);
netif_stop_queue(dev);
- del_timer_sync(&vp->rx_oom_timer);
del_timer_sync(&vp->timer);
/* Turn off statistics ASAP. We update dev->stats below. */
MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
static int ena_rss_init_default(struct ena_adapter *adapter);
+static void check_for_admin_com_state(struct ena_adapter *adapter);
+static void ena_destroy_device(struct ena_adapter *adapter);
+static int ena_restore_device(struct ena_adapter *adapter);
static void ena_tx_timeout(struct net_device *dev)
{
static int ena_up_complete(struct ena_adapter *adapter)
{
- int rc, i;
+ int rc;
rc = ena_rss_configure(adapter);
if (rc)
ena_napi_enable_all(adapter);
- /* Enable completion queues interrupt */
- for (i = 0; i < adapter->num_queues; i++)
- ena_unmask_interrupt(&adapter->tx_ring[i],
- &adapter->rx_ring[i]);
-
- /* schedule napi in case we had pending packets
- * from the last time we disable napi
- */
- for (i = 0; i < adapter->num_queues; i++)
- napi_schedule(&adapter->ena_napi[i].napi);
-
return 0;
}
static int ena_up(struct ena_adapter *adapter)
{
- int rc;
+ int rc, i;
netdev_dbg(adapter->netdev, "%s\n", __func__);
set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+ /* Enable completion queues interrupt */
+ for (i = 0; i < adapter->num_queues; i++)
+ ena_unmask_interrupt(&adapter->tx_ring[i],
+ &adapter->rx_ring[i]);
+
+ /* schedule napi in case we had pending packets
+ * from the last time we disable napi
+ */
+ for (i = 0; i < adapter->num_queues; i++)
+ napi_schedule(&adapter->ena_napi[i].napi);
+
return rc;
err_up:
if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
ena_down(adapter);
+ /* Check for device status and issue reset if needed*/
+ check_for_admin_com_state(adapter);
+ if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ netif_err(adapter, ifdown, adapter->netdev,
+ "Destroy failure, restarting device\n");
+ ena_dump_stats_to_dmesg(adapter);
+ /* rtnl lock already obtained in dev_ioctl() layer */
+ ena_destroy_device(adapter);
+ ena_restore_device(adapter);
+ }
+
return 0;
}
ena_com_set_admin_running_state(ena_dev, false);
- ena_close(netdev);
+ if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ ena_down(adapter);
/* Before releasing the ENA resources, a device reset is required.
* (to prevent the device from accessing them).
- * In case the reset flag is set and the device is up, ena_close
+ * In case the reset flag is set and the device is up, ena_down()
* already perform the reset, so it can be skipped.
*/
if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
netdev_err(bp->dev, "vf ndo called though sriov is disabled\n");
return -EINVAL;
}
- if (vf_id >= bp->pf.max_vfs) {
+ if (vf_id >= bp->pf.active_vfs) {
netdev_err(bp->dev, "Invalid VF id %d\n", vf_id);
return -EINVAL;
}
}
/* If all IP and L4 fields are wildcarded then this is an L2 flow */
- if (is_wildcard(&l3_mask, sizeof(l3_mask)) &&
+ if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
} else {
unsigned int sf_size; /* serial flash size in bytes */
unsigned int sf_nsec; /* # of flash sectors */
- unsigned int sf_fw_start; /* start of FW image in flash */
unsigned int fw_vers; /* firmware version */
unsigned int bs_vers; /* bootstrap version */
SF_RD_DATA_FAST = 0xb, /* read flash */
SF_RD_ID = 0x9f, /* read ID */
SF_ERASE_SECTOR = 0xd8, /* erase sector */
-
- FW_MAX_SIZE = 16 * SF_SEC_SIZE,
};
/**
const __be32 *p = (const __be32 *)fw_data;
const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data;
unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
- unsigned int fw_img_start = adap->params.sf_fw_start;
- unsigned int fw_start_sec = fw_img_start / sf_sec_size;
+ unsigned int fw_start_sec = FLASH_FW_START_SEC;
+ unsigned int fw_size = FLASH_FW_MAX_SIZE;
+ unsigned int fw_start = FLASH_FW_START;
if (!size) {
dev_err(adap->pdev_dev, "FW image has no data\n");
"FW image size differs from size in FW header\n");
return -EINVAL;
}
- if (size > FW_MAX_SIZE) {
+ if (size > fw_size) {
dev_err(adap->pdev_dev, "FW image too large, max is %u bytes\n",
- FW_MAX_SIZE);
+ fw_size);
return -EFBIG;
}
if (!t4_fw_matches_chip(adap, hdr))
*/
memcpy(first_page, fw_data, SF_PAGE_SIZE);
((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
- ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page);
+ ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
if (ret)
goto out;
- addr = fw_img_start;
+ addr = fw_start;
for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
addr += SF_PAGE_SIZE;
fw_data += SF_PAGE_SIZE;
}
ret = t4_write_flash(adap,
- fw_img_start + offsetof(struct fw_hdr, fw_ver),
+ fw_start + offsetof(struct fw_hdr, fw_ver),
sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
out:
if (ret)
goto failed_regulator;
}
} else {
+ if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) {
+ ret = -EPROBE_DEFER;
+ goto failed_regulator;
+ }
fep->reg_phy = NULL;
}
failed_clk:
if (of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
-failed_phy:
of_node_put(phy_node);
+failed_phy:
+ dev_id--;
failed_ioremap:
free_netdev(ndev);
now = tmr_cnt_read(etsects);
now += delta;
tmr_cnt_write(etsects, now);
+ set_fipers(etsects);
spin_unlock_irqrestore(&etsects->lock, flags);
- set_fipers(etsects);
-
return 0;
}
enum e1000_state_t {
__E1000_TESTING,
__E1000_RESETTING,
- __E1000_DOWN
+ __E1000_DOWN,
+ __E1000_DISABLED
};
#undef pr_fmt
static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct net_device *netdev;
- struct e1000_adapter *adapter;
+ struct e1000_adapter *adapter = NULL;
struct e1000_hw *hw;
static int cards_found;
u16 tmp = 0;
u16 eeprom_apme_mask = E1000_EEPROM_APME;
int bars, need_ioport;
+ bool disable_dev = false;
/* do not allocate ioport bars when not needed */
need_ioport = e1000_is_need_ioport(pdev);
iounmap(hw->ce4100_gbe_mdio_base_virt);
iounmap(hw->hw_addr);
err_ioremap:
+ disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
free_netdev(netdev);
err_alloc_etherdev:
pci_release_selected_regions(pdev, bars);
err_pci_reg:
- pci_disable_device(pdev);
+ if (!adapter || disable_dev)
+ pci_disable_device(pdev);
return err;
}
struct net_device *netdev = pci_get_drvdata(pdev);
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
+ bool disable_dev;
e1000_down_and_stop(adapter);
e1000_release_manageability(adapter);
iounmap(hw->flash_address);
pci_release_selected_regions(pdev, adapter->bars);
+ disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
free_netdev(netdev);
- pci_disable_device(pdev);
+ if (disable_dev)
+ pci_disable_device(pdev);
}
/**
if (netif_running(netdev))
e1000_free_irq(adapter);
- pci_disable_device(pdev);
+ if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
+ pci_disable_device(pdev);
return 0;
}
pr_err("Cannot enable PCI device from suspend\n");
return err;
}
+
+ /* flush memory to make sure state is correct */
+ smp_mb__before_atomic();
+ clear_bit(__E1000_DISABLED, &adapter->flags);
pci_set_master(pdev);
pci_enable_wake(pdev, PCI_D3hot, 0);
if (netif_running(netdev))
e1000_down(adapter);
- pci_disable_device(pdev);
+
+ if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
+ pci_disable_device(pdev);
/* Request a slot slot reset. */
return PCI_ERS_RESULT_NEED_RESET;
pr_err("Cannot re-enable PCI device after reset.\n");
return PCI_ERS_RESULT_DISCONNECT;
}
+
+ /* flush memory to make sure state is correct */
+ smp_mb__before_atomic();
+ clear_bit(__E1000_DISABLED, &adapter->flags);
pci_set_master(pdev);
pci_enable_wake(pdev, PCI_D3hot, 0);
* Checks to see of the link status of the hardware has changed. If a
* change in link status has been detected, then we read the PHY registers
* to get the current speed/duplex if link exists.
+ *
+ * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
+ * up).
**/
static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
{
* Change or Rx Sequence Error interrupt.
*/
if (!mac->get_link_status)
- return 0;
+ return 1;
/* First we want to see if the MII Status Register reports
* link. If so, then we want to get the current speed/duplex
* different link partner.
*/
ret_val = e1000e_config_fc_after_link_up(hw);
- if (ret_val)
+ if (ret_val) {
e_dbg("Error configuring flow control\n");
+ return ret_val;
+ }
- return ret_val;
+ return 1;
}
static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
else
netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
+ /* Copy the address first, so that we avoid a possible race with
+ * .set_rx_mode(). If we copy after changing the address in the filter
+ * list, we might open ourselves to a narrow race window where
+ * .set_rx_mode could delete our dev_addr filter and prevent traffic
+ * from passing.
+ */
+ ether_addr_copy(netdev->dev_addr, addr->sa_data);
+
spin_lock_bh(&vsi->mac_filter_hash_lock);
i40e_del_mac_filter(vsi, netdev->dev_addr);
i40e_add_mac_filter(vsi, addr->sa_data);
spin_unlock_bh(&vsi->mac_filter_hash_lock);
- ether_addr_copy(netdev->dev_addr, addr->sa_data);
if (vsi->type == I40E_VSI_MAIN) {
i40e_status ret;
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
+ /* Under some circumstances, we might receive a request to delete
+ * our own device address from our uc list. Because we store the
+ * device address in the VSI's MAC/VLAN filter list, we need to ignore
+ * such requests and not delete our device address from this list.
+ */
+ if (ether_addr_equal(addr, netdev->dev_addr))
+ return 0;
+
i40e_del_mac_filter(vsi, addr);
return 0;
/* Set Bit 7 to be valid */
mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
- /* Set L4type to both TCP and UDP support */
- mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH;
+ /* Set L4type for TCP support */
+ mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP;
/* Set cloud filter mode */
mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
is_valid_ether_addr(filter->src_mac)) ||
(is_multicast_ether_addr(filter->dst_mac) &&
is_multicast_ether_addr(filter->src_mac)))
- return -EINVAL;
+ return -EOPNOTSUPP;
- /* Make sure port is specified, otherwise bail out, for channel
- * specific cloud filter needs 'L4 port' to be non-zero
+ /* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP
+ * ports are not supported via big buffer now.
*/
- if (!filter->dst_port)
- return -EINVAL;
+ if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP)
+ return -EOPNOTSUPP;
/* adding filter using src_port/src_ip is not supported at this stage */
if (filter->src_port || filter->src_ipv4 ||
!ipv6_addr_any(&filter->ip.v6.src_ip6))
- return -EINVAL;
+ return -EOPNOTSUPP;
/* copy element needed to add cloud filter from filter */
i40e_set_cld_element(filter, &cld_filter.element);
is_multicast_ether_addr(filter->src_mac)) {
/* MAC + IP : unsupported mode */
if (filter->dst_ipv4)
- return -EINVAL;
+ return -EOPNOTSUPP;
/* since we validated that L4 port must be valid before
* we get here, start with respective "flags" value
if (tc < 0) {
dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
- return -EINVAL;
+ return -EOPNOTSUPP;
}
if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
/* Walk through fragments adding latest fragment, testing it, and
* then removing stale fragments from the sum.
*/
- stale = &skb_shinfo(skb)->frags[0];
- for (;;) {
+ for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+ int stale_size = skb_frag_size(stale);
+
sum += skb_frag_size(frag++);
+ /* The stale fragment may present us with a smaller
+ * descriptor than the actual fragment size. To account
+ * for that we need to remove all the data on the front and
+ * figure out what the remainder would be in the last
+ * descriptor associated with the fragment.
+ */
+ if (stale_size > I40E_MAX_DATA_PER_TXD) {
+ int align_pad = -(stale->page_offset) &
+ (I40E_MAX_READ_REQ_SIZE - 1);
+
+ sum -= align_pad;
+ stale_size -= align_pad;
+
+ do {
+ sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+ stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+ } while (stale_size > I40E_MAX_DATA_PER_TXD);
+ }
+
/* if sum is negative we failed to make sufficient progress */
if (sum < 0)
return true;
if (!nr_frags--)
break;
- sum -= skb_frag_size(stale++);
+ sum -= stale_size;
}
return false;
/* Walk through fragments adding latest fragment, testing it, and
* then removing stale fragments from the sum.
*/
- stale = &skb_shinfo(skb)->frags[0];
- for (;;) {
+ for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+ int stale_size = skb_frag_size(stale);
+
sum += skb_frag_size(frag++);
+ /* The stale fragment may present us with a smaller
+ * descriptor than the actual fragment size. To account
+ * for that we need to remove all the data on the front and
+ * figure out what the remainder would be in the last
+ * descriptor associated with the fragment.
+ */
+ if (stale_size > I40E_MAX_DATA_PER_TXD) {
+ int align_pad = -(stale->page_offset) &
+ (I40E_MAX_READ_REQ_SIZE - 1);
+
+ sum -= align_pad;
+ stale_size -= align_pad;
+
+ do {
+ sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+ stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+ } while (stale_size > I40E_MAX_DATA_PER_TXD);
+ }
+
/* if sum is negative we failed to make sufficient progress */
if (sum < 0)
return true;
if (!nr_frags--)
break;
- sum -= skb_frag_size(stale++);
+ sum -= stale_size;
}
return false;
return 0;
}
- wmb(); /* reset needs to be written before we read control register */
+ /* Reset needs to be written before we read control register, and
+ * we must wait for the HW to become responsive once again
+ */
+ wmb();
+ msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
+
end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
do {
u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
#define MLXSW_PCI_SW_RESET 0xF0010
#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000
+#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100
#define MLXSW_PCI_FW_READY 0xA1844
#define MLXSW_PCI_FW_READY_MASK 0xFFFF
#define MLXSW_PCI_FW_READY_MAGIC 0x5E
}
if (!info->linking)
break;
- if (netdev_has_any_upper_dev(upper_dev)) {
+ if (netdev_has_any_upper_dev(upper_dev) &&
+ (!netif_is_bridge_master(upper_dev) ||
+ !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
+ upper_dev))) {
NL_SET_ERR_MSG(extack,
"spectrum: Enslaving a port to a device that already has an upper device is not supported");
return -EINVAL;
u16 vid)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct netdev_notifier_changeupper_info *info = ptr;
struct netlink_ext_ack *extack;
struct net_device *upper_dev;
}
if (!info->linking)
break;
- if (netdev_has_any_upper_dev(upper_dev)) {
+ if (netdev_has_any_upper_dev(upper_dev) &&
+ (!netif_is_bridge_master(upper_dev) ||
+ !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
+ upper_dev))) {
NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
return -EINVAL;
}
void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *brport_dev,
struct net_device *br_dev);
+bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev);
/* spectrum.c */
int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
int tclass_num, u32 min, u32 max,
u32 probability, bool is_ecn)
{
- char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)];
+ char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
+ char cwtp_cmd[MLXSW_REG_CWTP_LEN];
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
int err;
if (err)
return err;
- mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num,
+ mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn);
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
}
static int
{
if (!removing)
nh->should_offload = 1;
- else if (nh->offloaded)
+ else
nh->should_offload = 0;
nh->update = 1;
}
return NULL;
}
+bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev)
+{
+ return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+}
+
static struct mlxsw_sp_bridge_device *
mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
struct net_device *br_dev)
return err;
}
nn_writeb(nn, ctrl_offset, entry->entry);
+ nfp_net_irq_unmask(nn, entry->entry);
return 0;
}
unsigned int vector_idx)
{
nn_writeb(nn, ctrl_offset, 0xff);
+ nn_pci_flush(nn);
free_irq(nn->irq_entries[vector_idx].vector, nn);
}
[FWNLCR0] = 0x0090,
[FWALCR0] = 0x0094,
[TXNLCR1] = 0x00a0,
- [TXALCR1] = 0x00a0,
+ [TXALCR1] = 0x00a4,
[RXNLCR1] = 0x00a8,
[RXALCR1] = 0x00ac,
[FWNLCR1] = 0x00b0,
[FWNLCR0] = 0x0090,
[FWALCR0] = 0x0094,
[TXNLCR1] = 0x00a0,
- [TXALCR1] = 0x00a0,
+ [TXALCR1] = 0x00a4,
[RXNLCR1] = 0x00a8,
[RXALCR1] = 0x00ac,
[FWNLCR1] = 0x00b0,
/* ioremap the TSU registers */
if (mdp->cd->tsu) {
struct resource *rtsu;
+
rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
- if (IS_ERR(mdp->tsu_addr)) {
- ret = PTR_ERR(mdp->tsu_addr);
+ if (!rtsu) {
+ dev_err(&pdev->dev, "no TSU resource\n");
+ ret = -ENODEV;
+ goto out_release;
+ }
+ /* We can only request the TSU region for the first port
+ * of the two sharing this TSU for the probe to succeed...
+ */
+ if (devno % 2 == 0 &&
+ !devm_request_mem_region(&pdev->dev, rtsu->start,
+ resource_size(rtsu),
+ dev_name(&pdev->dev))) {
+ dev_err(&pdev->dev, "can't request TSU resource.\n");
+ ret = -EBUSY;
+ goto out_release;
+ }
+ mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
+ resource_size(rtsu));
+ if (!mdp->tsu_addr) {
+ dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
+ ret = -ENOMEM;
goto out_release;
}
mdp->port = devno % 2;
ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
}
- /* initialize first or needed device */
- if (!devno || pd->needs_init) {
+ /* Need to init only the first port of the two sharing a TSU */
+ if (devno % 2 == 0) {
if (mdp->cd->chip_reset)
mdp->cd->chip_reset(ndev);
bool stmmac_eee_init(struct stmmac_priv *priv)
{
struct net_device *ndev = priv->dev;
+ int interface = priv->plat->interface;
unsigned long flags;
bool ret = false;
+ if ((interface != PHY_INTERFACE_MODE_MII) &&
+ (interface != PHY_INTERFACE_MODE_GMII) &&
+ !phy_interface_mode_is_rgmii(interface))
+ goto out;
+
/* Using PCS we cannot dial with the phy registers at this stage
* so we do not support extra feature like EEE.
*/
if (IS_ERR(rt))
return PTR_ERR(rt);
+ if (skb_dst(skb)) {
+ int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) -
+ GENEVE_BASE_HLEN - info->options_len - 14;
+
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ }
+
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
if (geneve->collect_md) {
tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
if (IS_ERR(dst))
return PTR_ERR(dst);
+ if (skb_dst(skb)) {
+ int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) -
+ GENEVE_BASE_HLEN - info->options_len - 14;
+
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ }
+
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
if (geneve->collect_md) {
prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
return 0;
unregister_netdev:
+ /* macvlan_uninit would free the macvlan port */
unregister_netdevice(dev);
+ return err;
destroy_macvlan_port:
- if (create)
+ /* the macvlan port may be freed by macvlan_uninit when fail to register.
+ * so we destroy the macvlan port only when it's valid.
+ */
+ if (create && macvlan_port_get_rtnl(dev))
macvlan_port_destroy(port->dev);
return err;
}
data->regulator = devm_regulator_get(&pdev->dev, "phy");
if (IS_ERR(data->regulator)) {
- if (PTR_ERR(data->regulator) == -EPROBE_DEFER)
- return -EPROBE_DEFER;
+ if (PTR_ERR(data->regulator) == -EPROBE_DEFER) {
+ ret = -EPROBE_DEFER;
+ goto err_out_free_mdiobus;
+ }
dev_info(&pdev->dev, "no regulator found\n");
data->regulator = NULL;
switch (cmd) {
case SIOCGMIIPHY:
mii->phy_id = pl->phydev->mdio.addr;
+ /* fall through */
case SIOCGMIIREG:
ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num);
switch (cmd) {
case SIOCGMIIPHY:
mii->phy_id = 0;
+ /* fall through */
case SIOCGMIIREG:
ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num);
WARN_ON(!lockdep_rtnl_is_held());
set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
+ queue_work(system_power_efficient_wq, &pl->resolve);
flush_work(&pl->resolve);
-
- netif_carrier_off(pl->netdev);
}
static void phylink_sfp_link_up(void *upstream)
void sfp_unregister_upstream(struct sfp_bus *bus)
{
rtnl_lock();
- sfp_unregister_bus(bus);
+ if (bus->sfp)
+ sfp_unregister_bus(bus);
bus->upstream = NULL;
bus->netdev = NULL;
rtnl_unlock();
void sfp_unregister_socket(struct sfp_bus *bus)
{
rtnl_lock();
- sfp_unregister_bus(bus);
+ if (bus->netdev)
+ sfp_unregister_bus(bus);
bus->sfp_dev = NULL;
bus->sfp = NULL;
bus->socket_ops = NULL;
{QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
{QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
+ {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */
{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
{QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */
{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */
}
}
+ if (changed & IEEE80211_CONF_CHANGE_PS) {
+ list_for_each_entry(tmp, &wcn->vif_list, list) {
+ vif = wcn36xx_priv_to_vif(tmp);
+ if (hw->conf.flags & IEEE80211_CONF_PS) {
+ if (vif->bss_conf.ps) /* ps allowed ? */
+ wcn36xx_pmc_enter_bmps_state(wcn, vif);
+ } else {
+ wcn36xx_pmc_exit_bmps_state(wcn, vif);
+ }
+ }
+ }
+
mutex_unlock(&wcn->conf_mutex);
return 0;
vif_priv->dtim_period = bss_conf->dtim_period;
}
- if (changed & BSS_CHANGED_PS) {
- wcn36xx_dbg(WCN36XX_DBG_MAC,
- "mac bss PS set %d\n",
- bss_conf->ps);
- if (bss_conf->ps) {
- wcn36xx_pmc_enter_bmps_state(wcn, vif);
- } else {
- wcn36xx_pmc_exit_bmps_state(wcn, vif);
- }
- }
-
if (changed & BSS_CHANGED_BSSID) {
wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss changed_bssid %pM\n",
bss_conf->bssid);
struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
if (WCN36XX_BMPS != vif_priv->pw_state) {
- wcn36xx_err("Not in BMPS mode, no need to exit from BMPS mode!\n");
- return -EINVAL;
+ /* Unbalanced call or last BMPS enter failed */
+ wcn36xx_dbg(WCN36XX_DBG_PMC,
+ "Not in BMPS mode, no need to exit\n");
+ return -EALREADY;
}
wcn36xx_smd_exit_bmps(wcn, vif);
vif_priv->pw_state = WCN36XX_FULL_POWER;
return index & (q->n_window - 1);
}
-static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
+static inline void *iwl_pcie_get_tfd(struct iwl_trans *trans,
struct iwl_txq *txq, int idx)
{
- return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq,
- idx);
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+ if (trans->cfg->use_tfh)
+ idx = iwl_pcie_get_cmd_index(txq, idx);
+
+ return txq->tfds + trans_pcie->tfd_size * idx;
}
static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
{
- struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-
/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
* idx is bounded by n_window
*/
lockdep_assert_held(&txq->lock);
iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
- iwl_pcie_get_tfd(trans_pcie, txq, idx));
+ iwl_pcie_get_tfd(trans, txq, idx));
/* free SKB */
if (txq->entries) {
struct sk_buff *skb,
struct iwl_cmd_meta *out_meta)
{
- struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
- struct iwl_tfh_tfd *tfd =
- iwl_pcie_get_tfd(trans_pcie, txq, idx);
+ struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
dma_addr_t tb_phys;
bool amsdu;
int i, len, tb1_len, tb2_len, hdr_len;
u8 group_id = iwl_cmd_groupid(cmd->id);
const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
- struct iwl_tfh_tfd *tfd =
- iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
+ struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
memset(tfd, 0, sizeof(*tfd));
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
int i, num_tbs;
- void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
+ void *tfd = iwl_pcie_get_tfd(trans, txq, index);
/* Sanity check on number of chunks */
num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
}
trace_iwlwifi_dev_tx(trans->dev, skb,
- iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+ iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
trans_pcie->tfd_size,
&dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
hdr_len);
IEEE80211_CCMP_HDR_LEN : 0;
trace_iwlwifi_dev_tx(trans->dev, skb,
- iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+ iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
trans_pcie->tfd_size,
&dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
IWL_FIRST_TB_SIZE);
- tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
+ tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
/* Set up entry for this TFD in Tx byte-count array */
iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
iwl_pcie_tfd_get_num_tbs(trans, tfd));
netif_carrier_off(netdev);
+ xenbus_switch_state(dev, XenbusStateInitialising);
return netdev;
exit:
struct nvme_ns *ns, struct nvme_id_ns *id)
{
sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
+ unsigned short bs = 1 << ns->lba_shift;
unsigned stream_alignment = 0;
if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
- blk_queue_logical_block_size(disk->queue, 1 << ns->lba_shift);
+ blk_queue_logical_block_size(disk->queue, bs);
+ blk_queue_physical_block_size(disk->queue, bs);
+ blk_queue_io_min(disk->queue, bs);
+
if (ns->ms && !ns->ext &&
(ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
nvme_init_integrity(disk, ns->ms, ns->pi_type);
mutex_unlock(&ns->ctrl->namespaces_mutex);
synchronize_srcu(&ns->head->srcu);
+ nvme_mpath_check_last_path(ns);
nvme_put_ns(ns);
}
return NULL;
kref_init(&host->ref);
+ uuid_gen(&host->id);
snprintf(host->nqn, NVMF_NQN_SIZE,
"nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
rcu_assign_pointer(head->current_path, NULL);
}
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
+
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+ struct nvme_ns_head *head = ns->head;
+
+ if (head->disk && list_empty(&head->list))
+ kblockd_schedule_work(&head->requeue_work);
+}
+
#else
static inline void nvme_failover_req(struct request *req)
{
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
}
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+}
#endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_NVM
return (void **)(iod->sg + blk_rq_nr_phys_segments(req));
}
+static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
+{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ unsigned int avg_seg_size;
+
+ avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
+ blk_rq_nr_phys_segments(req));
+
+ if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
+ return false;
+ if (!iod->nvmeq->qid)
+ return false;
+ if (!sgl_threshold || avg_seg_size < sgl_threshold)
+ return false;
+ return true;
+}
+
static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
int nseg = blk_rq_nr_phys_segments(rq);
unsigned int size = blk_rq_payload_bytes(rq);
+ iod->use_sgl = nvme_pci_use_sgls(dev, rq);
+
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
iod->use_sgl);
dma_addr_t prp_dma;
int nprps, i;
- iod->use_sgl = false;
-
length -= (page_size - offset);
if (length <= 0) {
iod->first_dma = 0;
int entries = iod->nents, i = 0;
dma_addr_t sgl_dma;
- iod->use_sgl = true;
-
/* setting the transfer type as SGL */
cmd->flags = NVME_CMD_SGL_METABUF;
return BLK_STS_OK;
}
-static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
-{
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- unsigned int avg_seg_size;
-
- avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
- blk_rq_nr_phys_segments(req));
-
- if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
- return false;
- if (!iod->nvmeq->qid)
- return false;
- if (!sgl_threshold || avg_seg_size < sgl_threshold)
- return false;
- return true;
-}
-
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
DMA_ATTR_NO_WARN))
goto out;
- if (nvme_pci_use_sgls(dev, req))
+ if (iod->use_sgl)
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
else
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_start_queues(&ctrl->ctrl);
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+ /* state change failure should never happen */
+ WARN_ON_ONCE(1);
+ return;
+ }
+
nvme_rdma_reconnect_or_remove(ctrl);
}
static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
{
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
return;
queue_work(nvme_wq, &ctrl->err_work);
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+ /* state change failure should never happen */
+ WARN_ON_ONCE(1);
+ return;
+ }
+
ret = nvme_rdma_configure_admin_queue(ctrl, false);
if (ret)
goto out_fail;
const char *buf, size_t count)
{
struct fcloop_nport *nport = NULL, *tmpport;
- struct fcloop_tport *tport;
+ struct fcloop_tport *tport = NULL;
u64 nodename, portname;
unsigned long flags;
int ret;
rc = of_mdiobus_register_phy(mdio, child, addr);
else
rc = of_mdiobus_register_device(mdio, child, addr);
- if (rc)
+
+ if (rc == -ENODEV)
+ dev_err(&mdio->dev,
+ "MDIO device at address %d is missing.\n",
+ addr);
+ else if (rc)
goto unregister;
}
if (of_mdiobus_child_is_phy(child)) {
rc = of_mdiobus_register_phy(mdio, child, addr);
- if (rc)
+ if (rc && rc != -ENODEV)
goto unregister;
}
}
obj-y += core.o cpu.o
obj-$(CONFIG_OF) += of.o
obj-$(CONFIG_DEBUG_FS) += debugfs.o
+obj-$(CONFIG_ARM_TI_CPUFREQ) += ti-opp-supply.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2016-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Nishanth Menon <nm@ti.com>
+ * Dave Gerlach <d-gerlach@ti.com>
+ *
+ * TI OPP supply driver that provides override into the regulator control
+ * for generic opp core to handle devices with ABB regulator and/or
+ * SmartReflex Class0.
+ */
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+/**
+ * struct ti_opp_supply_optimum_voltage_table - optimized voltage table
+ * @reference_uv: reference voltage (usually Nominal voltage)
+ * @optimized_uv: Optimized voltage from efuse
+ */
+struct ti_opp_supply_optimum_voltage_table {
+ unsigned int reference_uv;
+ unsigned int optimized_uv;
+};
+
+/**
+ * struct ti_opp_supply_data - OMAP specific opp supply data
+ * @vdd_table: Optimized voltage mapping table
+ * @num_vdd_table: number of entries in vdd_table
+ * @vdd_absolute_max_voltage_uv: absolute maximum voltage in UV for the supply
+ */
+struct ti_opp_supply_data {
+ struct ti_opp_supply_optimum_voltage_table *vdd_table;
+ u32 num_vdd_table;
+ u32 vdd_absolute_max_voltage_uv;
+};
+
+static struct ti_opp_supply_data opp_data;
+
+/**
+ * struct ti_opp_supply_of_data - device tree match data
+ * @flags: specific type of opp supply
+ * @efuse_voltage_mask: mask required for efuse register representing voltage
+ * @efuse_voltage_uv: Are the efuse entries in micro-volts? if not, assume
+ * milli-volts.
+ */
+struct ti_opp_supply_of_data {
+#define OPPDM_EFUSE_CLASS0_OPTIMIZED_VOLTAGE BIT(1)
+#define OPPDM_HAS_NO_ABB BIT(2)
+ const u8 flags;
+ const u32 efuse_voltage_mask;
+ const bool efuse_voltage_uv;
+};
+
+/**
+ * _store_optimized_voltages() - store optimized voltages
+ * @dev: ti opp supply device for which we need to store info
+ * @data: data specific to the device
+ *
+ * Picks up efuse based optimized voltages for VDD unique per device and
+ * stores it in internal data structure for use during transition requests.
+ *
+ * Return: If successful, 0, else appropriate error value.
+ */
+static int _store_optimized_voltages(struct device *dev,
+ struct ti_opp_supply_data *data)
+{
+ void __iomem *base;
+ struct property *prop;
+ struct resource *res;
+ const __be32 *val;
+ int proplen, i;
+ int ret = 0;
+ struct ti_opp_supply_optimum_voltage_table *table;
+ const struct ti_opp_supply_of_data *of_data = dev_get_drvdata(dev);
+
+ /* pick up Efuse based voltages */
+ res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(dev, "Unable to get IO resource\n");
+ ret = -ENODEV;
+ goto out_map;
+ }
+
+ base = ioremap_nocache(res->start, resource_size(res));
+ if (!base) {
+ dev_err(dev, "Unable to map Efuse registers\n");
+ ret = -ENOMEM;
+ goto out_map;
+ }
+
+ /* Fetch efuse-settings. */
+ prop = of_find_property(dev->of_node, "ti,efuse-settings", NULL);
+ if (!prop) {
+ dev_err(dev, "No 'ti,efuse-settings' property found\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ proplen = prop->length / sizeof(int);
+ data->num_vdd_table = proplen / 2;
+ /* Verify for corrupted OPP entries in dt */
+ if (data->num_vdd_table * 2 * sizeof(int) != prop->length) {
+ dev_err(dev, "Invalid 'ti,efuse-settings'\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = of_property_read_u32(dev->of_node, "ti,absolute-max-voltage-uv",
+ &data->vdd_absolute_max_voltage_uv);
+ if (ret) {
+ dev_err(dev, "ti,absolute-max-voltage-uv is missing\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ table = kzalloc(sizeof(*data->vdd_table) *
+ data->num_vdd_table, GFP_KERNEL);
+ if (!table) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ data->vdd_table = table;
+
+ val = prop->value;
+ for (i = 0; i < data->num_vdd_table; i++, table++) {
+ u32 efuse_offset;
+ u32 tmp;
+
+ table->reference_uv = be32_to_cpup(val++);
+ efuse_offset = be32_to_cpup(val++);
+
+ tmp = readl(base + efuse_offset);
+ tmp &= of_data->efuse_voltage_mask;
+ tmp >>= __ffs(of_data->efuse_voltage_mask);
+
+ table->optimized_uv = of_data->efuse_voltage_uv ? tmp :
+ tmp * 1000;
+
+ dev_dbg(dev, "[%d] efuse=0x%08x volt_table=%d vset=%d\n",
+ i, efuse_offset, table->reference_uv,
+ table->optimized_uv);
+
+ /*
+ * Some older samples might not have optimized efuse
+ * Use reference voltage for those - just add debug message
+ * for them.
+ */
+ if (!table->optimized_uv) {
+ dev_dbg(dev, "[%d] efuse=0x%08x volt_table=%d:vset0\n",
+ i, efuse_offset, table->reference_uv);
+ table->optimized_uv = table->reference_uv;
+ }
+ }
+out:
+ iounmap(base);
+out_map:
+ return ret;
+}
+
+/**
+ * _free_optimized_voltages() - free resources for optvoltages
+ * @dev: device for which we need to free info
+ * @data: data specific to the device
+ */
+static void _free_optimized_voltages(struct device *dev,
+ struct ti_opp_supply_data *data)
+{
+ kfree(data->vdd_table);
+ data->vdd_table = NULL;
+ data->num_vdd_table = 0;
+}
+
+/**
+ * _get_optimal_vdd_voltage() - Finds optimal voltage for the supply
+ * @dev: device for which we need to find info
+ * @data: data specific to the device
+ * @reference_uv: reference voltage (OPP voltage) for which we need value
+ *
+ * Return: if a match is found, return optimized voltage, else return
+ * reference_uv, also return reference_uv if no optimization is needed.
+ */
+static int _get_optimal_vdd_voltage(struct device *dev,
+ struct ti_opp_supply_data *data,
+ int reference_uv)
+{
+ int i;
+ struct ti_opp_supply_optimum_voltage_table *table;
+
+ if (!data->num_vdd_table)
+ return reference_uv;
+
+ table = data->vdd_table;
+ if (!table)
+ return -EINVAL;
+
+ /* Find a exact match - this list is usually very small */
+ for (i = 0; i < data->num_vdd_table; i++, table++)
+ if (table->reference_uv == reference_uv)
+ return table->optimized_uv;
+
+ /* IF things are screwed up, we'd make a mess on console.. ratelimit */
+ dev_err_ratelimited(dev, "%s: Failed optimized voltage match for %d\n",
+ __func__, reference_uv);
+ return reference_uv;
+}
+
+static int _opp_set_voltage(struct device *dev,
+ struct dev_pm_opp_supply *supply,
+ int new_target_uv, struct regulator *reg,
+ char *reg_name)
+{
+ int ret;
+ unsigned long vdd_uv, uv_max;
+
+ if (new_target_uv)
+ vdd_uv = new_target_uv;
+ else
+ vdd_uv = supply->u_volt;
+
+ /*
+ * If we do have an absolute max voltage specified, then we should
+ * use that voltage instead to allow for cases where the voltage rails
+ * are ganged (example if we set the max for an opp as 1.12v, and
+ * the absolute max is 1.5v, for another rail to get 1.25v, it cannot
+ * be achieved if the regulator is constrainted to max of 1.12v, even
+ * if it can function at 1.25v
+ */
+ if (opp_data.vdd_absolute_max_voltage_uv)
+ uv_max = opp_data.vdd_absolute_max_voltage_uv;
+ else
+ uv_max = supply->u_volt_max;
+
+ if (vdd_uv > uv_max ||
+ vdd_uv < supply->u_volt_min ||
+ supply->u_volt_min > uv_max) {
+ dev_warn(dev,
+ "Invalid range voltages [Min:%lu target:%lu Max:%lu]\n",
+ supply->u_volt_min, vdd_uv, uv_max);
+ return -EINVAL;
+ }
+
+ dev_dbg(dev, "%s scaling to %luuV[min %luuV max %luuV]\n", reg_name,
+ vdd_uv, supply->u_volt_min,
+ uv_max);
+
+ ret = regulator_set_voltage_triplet(reg,
+ supply->u_volt_min,
+ vdd_uv,
+ uv_max);
+ if (ret) {
+ dev_err(dev, "%s failed for %luuV[min %luuV max %luuV]\n",
+ reg_name, vdd_uv, supply->u_volt_min,
+ uv_max);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ti_opp_supply_set_opp() - do the opp supply transition
+ * @data: information on regulators and new and old opps provided by
+ * opp core to use in transition
+ *
+ * Return: If successful, 0, else appropriate error value.
+ */
+static int ti_opp_supply_set_opp(struct dev_pm_set_opp_data *data)
+{
+ struct dev_pm_opp_supply *old_supply_vdd = &data->old_opp.supplies[0];
+ struct dev_pm_opp_supply *old_supply_vbb = &data->old_opp.supplies[1];
+ struct dev_pm_opp_supply *new_supply_vdd = &data->new_opp.supplies[0];
+ struct dev_pm_opp_supply *new_supply_vbb = &data->new_opp.supplies[1];
+ struct device *dev = data->dev;
+ unsigned long old_freq = data->old_opp.rate, freq = data->new_opp.rate;
+ struct clk *clk = data->clk;
+ struct regulator *vdd_reg = data->regulators[0];
+ struct regulator *vbb_reg = data->regulators[1];
+ int vdd_uv;
+ int ret;
+
+ vdd_uv = _get_optimal_vdd_voltage(dev, &opp_data,
+ new_supply_vbb->u_volt);
+
+ /* Scaling up? Scale voltage before frequency */
+ if (freq > old_freq) {
+ ret = _opp_set_voltage(dev, new_supply_vdd, vdd_uv, vdd_reg,
+ "vdd");
+ if (ret)
+ goto restore_voltage;
+
+ ret = _opp_set_voltage(dev, new_supply_vbb, 0, vbb_reg, "vbb");
+ if (ret)
+ goto restore_voltage;
+ }
+
+ /* Change frequency */
+ dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n",
+ __func__, old_freq, freq);
+
+ ret = clk_set_rate(clk, freq);
+ if (ret) {
+ dev_err(dev, "%s: failed to set clock rate: %d\n", __func__,
+ ret);
+ goto restore_voltage;
+ }
+
+ /* Scaling down? Scale voltage after frequency */
+ if (freq < old_freq) {
+ ret = _opp_set_voltage(dev, new_supply_vbb, 0, vbb_reg, "vbb");
+ if (ret)
+ goto restore_freq;
+
+ ret = _opp_set_voltage(dev, new_supply_vdd, vdd_uv, vdd_reg,
+ "vdd");
+ if (ret)
+ goto restore_freq;
+ }
+
+ return 0;
+
+restore_freq:
+ ret = clk_set_rate(clk, old_freq);
+ if (ret)
+ dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
+ __func__, old_freq);
+restore_voltage:
+ /* This shouldn't harm even if the voltages weren't updated earlier */
+ if (old_supply_vdd->u_volt) {
+ ret = _opp_set_voltage(dev, old_supply_vbb, 0, vbb_reg, "vbb");
+ if (ret)
+ return ret;
+
+ ret = _opp_set_voltage(dev, old_supply_vdd, 0, vdd_reg,
+ "vdd");
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static const struct ti_opp_supply_of_data omap_generic_of_data = {
+};
+
+static const struct ti_opp_supply_of_data omap_omap5_of_data = {
+ .flags = OPPDM_EFUSE_CLASS0_OPTIMIZED_VOLTAGE,
+ .efuse_voltage_mask = 0xFFF,
+ .efuse_voltage_uv = false,
+};
+
+static const struct ti_opp_supply_of_data omap_omap5core_of_data = {
+ .flags = OPPDM_EFUSE_CLASS0_OPTIMIZED_VOLTAGE | OPPDM_HAS_NO_ABB,
+ .efuse_voltage_mask = 0xFFF,
+ .efuse_voltage_uv = false,
+};
+
+static const struct of_device_id ti_opp_supply_of_match[] = {
+ {.compatible = "ti,omap-opp-supply", .data = &omap_generic_of_data},
+ {.compatible = "ti,omap5-opp-supply", .data = &omap_omap5_of_data},
+ {.compatible = "ti,omap5-core-opp-supply",
+ .data = &omap_omap5core_of_data},
+ {},
+};
+MODULE_DEVICE_TABLE(of, ti_opp_supply_of_match);
+
+static int ti_opp_supply_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device *cpu_dev = get_cpu_device(0);
+ const struct of_device_id *match;
+ const struct ti_opp_supply_of_data *of_data;
+ int ret = 0;
+
+ match = of_match_device(ti_opp_supply_of_match, dev);
+ if (!match) {
+ /* We do not expect this to happen */
+ dev_err(dev, "%s: Unable to match device\n", __func__);
+ return -ENODEV;
+ }
+ if (!match->data) {
+ /* Again, unlikely.. but mistakes do happen */
+ dev_err(dev, "%s: Bad data in match\n", __func__);
+ return -EINVAL;
+ }
+ of_data = match->data;
+
+ dev_set_drvdata(dev, (void *)of_data);
+
+ /* If we need optimized voltage */
+ if (of_data->flags & OPPDM_EFUSE_CLASS0_OPTIMIZED_VOLTAGE) {
+ ret = _store_optimized_voltages(dev, &opp_data);
+ if (ret)
+ return ret;
+ }
+
+ ret = PTR_ERR_OR_ZERO(dev_pm_opp_register_set_opp_helper(cpu_dev,
+ ti_opp_supply_set_opp));
+ if (ret)
+ _free_optimized_voltages(dev, &opp_data);
+
+ return ret;
+}
+
+static struct platform_driver ti_opp_supply_driver = {
+ .probe = ti_opp_supply_probe,
+ .driver = {
+ .name = "ti_opp_supply",
+ .owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(ti_opp_supply_of_match),
+ },
+};
+module_platform_driver(ti_opp_supply_driver);
+
+MODULE_DESCRIPTION("Texas Instruments OMAP OPP Supply driver");
+MODULE_AUTHOR("Texas Instruments Inc.");
+MODULE_LICENSE("GPL v2");
struct dino_device *dino_dev = irq_data_get_irq_chip_data(d);
int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
- DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq);
+ DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq);
/* Clear the matching bit in the IMR register */
dino_dev->imr &= ~(DINO_MASK_IRQ(local_irq));
int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
u32 tmp;
- DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq);
+ DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq);
/*
** clear pending IRQ bits
if (mask) {
if (--ilr_loop > 0)
goto ilr_again;
- printk(KERN_ERR "Dino 0x%p: stuck interrupt %d\n",
+ printk(KERN_ERR "Dino 0x%px: stuck interrupt %d\n",
dino_dev->hba.base_addr, mask);
return IRQ_NONE;
}
struct pci_dev *dev;
struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge));
- DBG(KERN_WARNING "%s(0x%p) bus %d platform_data 0x%p\n",
+ DBG(KERN_WARNING "%s(0x%px) bus %d platform_data 0x%px\n",
__func__, bus, bus->busn_res.start,
bus->bridge->platform_data);
res->flags = IORESOURCE_IO; /* do not mark it busy ! */
if (request_resource(&ioport_resource, res) < 0) {
printk(KERN_ERR "%s: request I/O Port region failed "
- "0x%lx/%lx (hpa 0x%p)\n",
+ "0x%lx/%lx (hpa 0x%px)\n",
name, (unsigned long)res->start, (unsigned long)res->end,
dino_dev->hba.base_addr);
return 1;
return retval;
}
- printk(KERN_INFO "EISA EEPROM at 0x%p\n", eisa_eeprom_addr);
+ printk(KERN_INFO "EISA EEPROM at 0x%px\n", eisa_eeprom_addr);
return 0;
}
pm_generic_complete(dev);
/* Resume device if platform firmware has put it in reset-power-on */
- if (dev->power.direct_complete && pm_resume_via_firmware()) {
+ if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) {
pci_power_t pre_sleep_state = pci_dev->current_state;
pci_update_current_state(pci_dev, pci_dev->current_state);
struct pci_dev *pci_dev = to_pci_dev(dev);
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
- if (dev_pm_smart_suspend_and_suspended(dev))
+ if (dev_pm_smart_suspend_and_suspended(dev)) {
+ dev->power.may_skip_resume = true;
return 0;
+ }
if (pci_has_legacy_pm_support(pci_dev))
return pci_legacy_suspend_late(dev, PMSG_SUSPEND);
Fixup:
pci_fixup_device(pci_fixup_suspend_late, pci_dev);
+ /*
+ * If the target system sleep state is suspend-to-idle, it is sufficient
+ * to check whether or not the device's wakeup settings are good for
+ * runtime PM. Otherwise, the pm_resume_via_firmware() check will cause
+ * pci_pm_complete() to take care of fixing up the device's state
+ * anyway, if need be.
+ */
+ dev->power.may_skip_resume = device_may_wakeup(dev) ||
+ !device_can_wakeup(dev);
+
return 0;
}
struct device_driver *drv = dev->driver;
int error = 0;
+ if (dev_pm_may_skip_resume(dev))
+ return 0;
+
/*
* Devices with DPM_FLAG_SMART_SUSPEND may be left in runtime suspend
* during system suspend, so update their runtime PM status to "active"
if (dev_pm_smart_suspend_and_suspended(dev))
return 0;
- return pm_generic_freeze_late(dev);;
+ return pm_generic_freeze_late(dev);
}
static int pci_pm_freeze_noirq(struct device *dev)
pci_save_state(dev);
+ dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_SMART_SUSPEND |
+ DPM_FLAG_LEAVE_SUSPENDED);
+
if (pci_bridge_d3_possible(dev)) {
/*
* Keep the port resumed 100ms to make sure things like
if (key_code == KEY_RESERVED)
return;
if (pressed)
- pm_wakeup_event(&device->dev, 0);
+ pm_wakeup_dev_event(&device->dev, 0, button->suspended);
if (button->suspended)
return;
input_report_key(input, key_code, pressed?1:0);
error = input_register_device(input);
if (error)
goto err_free_input;
+
+ device_init_wakeup(&device->dev, true);
dev_info(&device->dev,
"%s [%s]\n", name, acpi_device_bid(device));
return 0;
class_unregister(&wmi_bus_class);
}
-subsys_initcall(acpi_wmi_init);
+subsys_initcall_sync(acpi_wmi_init);
module_exit(acpi_wmi_exit);
#include <linux/sysfs.h>
#include <linux/cpu.h>
#include <linux/powercap.h>
+#include <linux/suspend.h>
#include <asm/iosf_mbi.h>
#include <asm/processor.h>
int prim_id; /* primitive ID used to enable */
struct rapl_domain *domain;
const char *name;
+ u64 last_power_limit;
};
static const char pl1_name[] = "long_term";
struct rapl_domain *rd;
char dev_name[17]; /* max domain name = 7 + 1 + 8 for int + 1 for null*/
struct powercap_zone *power_zone = NULL;
- int nr_pl, ret;;
+ int nr_pl, ret;
/* Update the domain data of the new package */
rapl_update_domain_data(rp);
static enum cpuhp_state pcap_rapl_online;
+static void power_limit_state_save(void)
+{
+ struct rapl_package *rp;
+ struct rapl_domain *rd;
+ int nr_pl, ret, i;
+
+ get_online_cpus();
+ list_for_each_entry(rp, &rapl_packages, plist) {
+ if (!rp->power_zone)
+ continue;
+ rd = power_zone_to_rapl_domain(rp->power_zone);
+ nr_pl = find_nr_power_limit(rd);
+ for (i = 0; i < nr_pl; i++) {
+ switch (rd->rpl[i].prim_id) {
+ case PL1_ENABLE:
+ ret = rapl_read_data_raw(rd,
+ POWER_LIMIT1,
+ true,
+ &rd->rpl[i].last_power_limit);
+ if (ret)
+ rd->rpl[i].last_power_limit = 0;
+ break;
+ case PL2_ENABLE:
+ ret = rapl_read_data_raw(rd,
+ POWER_LIMIT2,
+ true,
+ &rd->rpl[i].last_power_limit);
+ if (ret)
+ rd->rpl[i].last_power_limit = 0;
+ break;
+ }
+ }
+ }
+ put_online_cpus();
+}
+
+static void power_limit_state_restore(void)
+{
+ struct rapl_package *rp;
+ struct rapl_domain *rd;
+ int nr_pl, i;
+
+ get_online_cpus();
+ list_for_each_entry(rp, &rapl_packages, plist) {
+ if (!rp->power_zone)
+ continue;
+ rd = power_zone_to_rapl_domain(rp->power_zone);
+ nr_pl = find_nr_power_limit(rd);
+ for (i = 0; i < nr_pl; i++) {
+ switch (rd->rpl[i].prim_id) {
+ case PL1_ENABLE:
+ if (rd->rpl[i].last_power_limit)
+ rapl_write_data_raw(rd,
+ POWER_LIMIT1,
+ rd->rpl[i].last_power_limit);
+ break;
+ case PL2_ENABLE:
+ if (rd->rpl[i].last_power_limit)
+ rapl_write_data_raw(rd,
+ POWER_LIMIT2,
+ rd->rpl[i].last_power_limit);
+ break;
+ }
+ }
+ }
+ put_online_cpus();
+}
+
+static int rapl_pm_callback(struct notifier_block *nb,
+ unsigned long mode, void *_unused)
+{
+ switch (mode) {
+ case PM_SUSPEND_PREPARE:
+ power_limit_state_save();
+ break;
+ case PM_POST_SUSPEND:
+ power_limit_state_restore();
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block rapl_pm_notifier = {
+ .notifier_call = rapl_pm_callback,
+};
+
static int __init rapl_init(void)
{
const struct x86_cpu_id *id;
/* Don't bail out if PSys is not supported */
rapl_register_psys();
+
+ ret = register_pm_notifier(&rapl_pm_notifier);
+ if (ret)
+ goto err_unreg_all;
+
return 0;
+err_unreg_all:
+ cpuhp_remove_state(pcap_rapl_online);
+
err_unreg:
rapl_unregister_powercap();
return ret;
static void __exit rapl_exit(void)
{
+ unregister_pm_notifier(&rapl_pm_notifier);
cpuhp_remove_state(pcap_rapl_online);
rapl_unregister_powercap();
}
static int __init powercap_init(void)
{
- int result = 0;
+ int result;
result = seed_constraint_attributes();
if (result)
return result;
- result = class_register(&powercap_class);
-
- return result;
+ return class_register(&powercap_class);
}
device_initcall(powercap_init);
erp = dasd_3990_erp_handle_match_erp(cqr, erp);
}
+
+ /*
+ * For path verification work we need to stick with the path that was
+ * originally chosen so that the per path configuration data is
+ * assigned correctly.
+ */
+ if (test_bit(DASD_CQR_VERIFY_PATH, &erp->flags) && cqr->lpm) {
+ erp->lpm = cqr->lpm;
+ }
+
if (device->features & DASD_FEATURE_ERPLOG) {
/* print current erp_chain */
dev_err(&device->cdev->dev,
CFLAGS_sclp_early_core.o += -march=z900
endif
+CFLAGS_sclp_early_core.o += -D__NO_FORTIFY
+
obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \
sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \
sclp_early.o sclp_early_core.o
break;
case ASHMEM_SET_SIZE:
ret = -EINVAL;
+ mutex_lock(&ashmem_mutex);
if (!asma->file) {
ret = 0;
asma->size = (size_t)arg;
}
+ mutex_unlock(&ashmem_mutex);
break;
case ASHMEM_GET_SIZE:
ret = asma->size;
* @policy: cpufreq policy.
* @node: list_head to link all cpufreq_cooling_device together.
* @idle_time: idle time stats
- * @plat_get_static_power: callback to calculate the static power
*
* This structure is required for keeping information of each registered
* cpufreq_cooling_device.
struct cpufreq_policy *policy;
struct list_head node;
struct time_in_idle *idle_time;
- get_static_t plat_get_static_power;
};
static DEFINE_IDA(cpufreq_ida);
return load;
}
-/**
- * get_static_power() - calculate the static power consumed by the cpus
- * @cpufreq_cdev: struct &cpufreq_cooling_device for this cpu cdev
- * @tz: thermal zone device in which we're operating
- * @freq: frequency in KHz
- * @power: pointer in which to store the calculated static power
- *
- * Calculate the static power consumed by the cpus described by
- * @cpu_actor running at frequency @freq. This function relies on a
- * platform specific function that should have been provided when the
- * actor was registered. If it wasn't, the static power is assumed to
- * be negligible. The calculated static power is stored in @power.
- *
- * Return: 0 on success, -E* on failure.
- */
-static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev,
- struct thermal_zone_device *tz, unsigned long freq,
- u32 *power)
-{
- struct dev_pm_opp *opp;
- unsigned long voltage;
- struct cpufreq_policy *policy = cpufreq_cdev->policy;
- struct cpumask *cpumask = policy->related_cpus;
- unsigned long freq_hz = freq * 1000;
- struct device *dev;
-
- if (!cpufreq_cdev->plat_get_static_power) {
- *power = 0;
- return 0;
- }
-
- dev = get_cpu_device(policy->cpu);
- WARN_ON(!dev);
-
- opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
- if (IS_ERR(opp)) {
- dev_warn_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n",
- freq_hz, PTR_ERR(opp));
- return -EINVAL;
- }
-
- voltage = dev_pm_opp_get_voltage(opp);
- dev_pm_opp_put(opp);
-
- if (voltage == 0) {
- dev_err_ratelimited(dev, "Failed to get voltage for frequency %lu\n",
- freq_hz);
- return -EINVAL;
- }
-
- return cpufreq_cdev->plat_get_static_power(cpumask, tz->passive_delay,
- voltage, power);
-}
-
/**
* get_dynamic_power() - calculate the dynamic power
* @cpufreq_cdev: &cpufreq_cooling_device for this cdev
u32 *power)
{
unsigned long freq;
- int i = 0, cpu, ret;
- u32 static_power, dynamic_power, total_load = 0;
+ int i = 0, cpu;
+ u32 total_load = 0;
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
struct cpufreq_policy *policy = cpufreq_cdev->policy;
u32 *load_cpu = NULL;
cpufreq_cdev->last_load = total_load;
- dynamic_power = get_dynamic_power(cpufreq_cdev, freq);
- ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
- if (ret) {
- kfree(load_cpu);
- return ret;
- }
+ *power = get_dynamic_power(cpufreq_cdev, freq);
if (load_cpu) {
trace_thermal_power_cpu_get_power(policy->related_cpus, freq,
- load_cpu, i, dynamic_power,
- static_power);
+ load_cpu, i, *power);
kfree(load_cpu);
}
- *power = static_power + dynamic_power;
return 0;
}
unsigned long state, u32 *power)
{
unsigned int freq, num_cpus;
- u32 static_power, dynamic_power;
- int ret;
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
/* Request state should be less than max_level */
num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
freq = cpufreq_cdev->freq_table[state].frequency;
- dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
- ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
- if (ret)
- return ret;
+ *power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
- *power = static_power + dynamic_power;
- return ret;
+ return 0;
}
/**
unsigned long *state)
{
unsigned int cur_freq, target_freq;
- int ret;
- s32 dyn_power;
- u32 last_load, normalised_power, static_power;
+ u32 last_load, normalised_power;
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
struct cpufreq_policy *policy = cpufreq_cdev->policy;
cur_freq = cpufreq_quick_get(policy->cpu);
- ret = get_static_power(cpufreq_cdev, tz, cur_freq, &static_power);
- if (ret)
- return ret;
-
- dyn_power = power - static_power;
- dyn_power = dyn_power > 0 ? dyn_power : 0;
+ power = power > 0 ? power : 0;
last_load = cpufreq_cdev->last_load ?: 1;
- normalised_power = (dyn_power * 100) / last_load;
+ normalised_power = (power * 100) / last_load;
target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
*state = get_level(cpufreq_cdev, target_freq);
* @policy: cpufreq policy
* Normally this should be same as cpufreq policy->related_cpus.
* @capacitance: dynamic power coefficient for these cpus
- * @plat_static_func: function to calculate the static power consumed by these
- * cpus (optional)
*
* This interface function registers the cpufreq cooling device with the name
* "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
*/
static struct thermal_cooling_device *
__cpufreq_cooling_register(struct device_node *np,
- struct cpufreq_policy *policy, u32 capacitance,
- get_static_t plat_static_func)
+ struct cpufreq_policy *policy, u32 capacitance)
{
struct thermal_cooling_device *cdev;
struct cpufreq_cooling_device *cpufreq_cdev;
}
if (capacitance) {
- cpufreq_cdev->plat_get_static_power = plat_static_func;
-
ret = update_freq_table(cpufreq_cdev, capacitance);
if (ret) {
cdev = ERR_PTR(ret);
struct thermal_cooling_device *
cpufreq_cooling_register(struct cpufreq_policy *policy)
{
- return __cpufreq_cooling_register(NULL, policy, 0, NULL);
+ return __cpufreq_cooling_register(NULL, policy, 0);
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
/**
* of_cpufreq_cooling_register - function to create cpufreq cooling device.
- * @np: a valid struct device_node to the cooling device device tree node
* @policy: cpufreq policy
*
* This interface function registers the cpufreq cooling device with the name
* cooling devices. Using this API, the cpufreq cooling device will be
* linked to the device tree node provided.
*
- * Return: a valid struct thermal_cooling_device pointer on success,
- * on failure, it returns a corresponding ERR_PTR().
- */
-struct thermal_cooling_device *
-of_cpufreq_cooling_register(struct device_node *np,
- struct cpufreq_policy *policy)
-{
- if (!np)
- return ERR_PTR(-EINVAL);
-
- return __cpufreq_cooling_register(np, policy, 0, NULL);
-}
-EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
-
-/**
- * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
- * @policy: cpufreq policy
- * @capacitance: dynamic power coefficient for these cpus
- * @plat_static_func: function to calculate the static power consumed by these
- * cpus (optional)
- *
- * This interface function registers the cpufreq cooling device with
- * the name "thermal-cpufreq-%x". This api can support multiple
- * instances of cpufreq cooling devices. Using this function, the
- * cooling device will implement the power extensions by using a
- * simple cpu power model. The cpus must have registered their OPPs
- * using the OPP library.
- *
- * An optional @plat_static_func may be provided to calculate the
- * static power consumed by these cpus. If the platform's static
- * power consumption is unknown or negligible, make it NULL.
- *
- * Return: a valid struct thermal_cooling_device pointer on success,
- * on failure, it returns a corresponding ERR_PTR().
- */
-struct thermal_cooling_device *
-cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance,
- get_static_t plat_static_func)
-{
- return __cpufreq_cooling_register(NULL, policy, capacitance,
- plat_static_func);
-}
-EXPORT_SYMBOL(cpufreq_power_cooling_register);
-
-/**
- * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
- * @np: a valid struct device_node to the cooling device device tree node
- * @policy: cpufreq policy
- * @capacitance: dynamic power coefficient for these cpus
- * @plat_static_func: function to calculate the static power consumed by these
- * cpus (optional)
- *
- * This interface function registers the cpufreq cooling device with
- * the name "thermal-cpufreq-%x". This api can support multiple
- * instances of cpufreq cooling devices. Using this API, the cpufreq
- * cooling device will be linked to the device tree node provided.
* Using this function, the cooling device will implement the power
* extensions by using a simple cpu power model. The cpus must have
* registered their OPPs using the OPP library.
*
- * An optional @plat_static_func may be provided to calculate the
- * static power consumed by these cpus. If the platform's static
- * power consumption is unknown or negligible, make it NULL.
+ * It also takes into account, if property present in policy CPU node, the
+ * static power consumed by the cpu.
*
* Return: a valid struct thermal_cooling_device pointer on success,
- * on failure, it returns a corresponding ERR_PTR().
+ * and NULL on failure.
*/
struct thermal_cooling_device *
-of_cpufreq_power_cooling_register(struct device_node *np,
- struct cpufreq_policy *policy,
- u32 capacitance,
- get_static_t plat_static_func)
+of_cpufreq_cooling_register(struct cpufreq_policy *policy)
{
- if (!np)
- return ERR_PTR(-EINVAL);
+ struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
+ struct thermal_cooling_device *cdev = NULL;
+ u32 capacitance = 0;
+
+ if (!np) {
+ pr_err("cpu_cooling: OF node not available for cpu%d\n",
+ policy->cpu);
+ return NULL;
+ }
+
+ if (of_find_property(np, "#cooling-cells", NULL)) {
+ of_property_read_u32(np, "dynamic-power-coefficient",
+ &capacitance);
- return __cpufreq_cooling_register(np, policy, capacitance,
- plat_static_func);
+ cdev = __cpufreq_cooling_register(np, policy, capacitance);
+ if (IS_ERR(cdev)) {
+ pr_err("cpu_cooling: cpu%d is not running as cooling device: %ld\n",
+ policy->cpu, PTR_ERR(cdev));
+ cdev = NULL;
+ }
+ }
+
+ of_node_put(np);
+ return cdev;
}
-EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
+EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
/**
* cpufreq_cooling_unregister - function to remove cpufreq cooling device.
udc = kzalloc(sizeof(*udc), GFP_KERNEL);
if (!udc)
- goto err1;
-
- ret = device_add(&gadget->dev);
- if (ret)
- goto err2;
+ goto err_put_gadget;
device_initialize(&udc->dev);
udc->dev.release = usb_udc_release;
udc->dev.parent = parent;
ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj));
if (ret)
- goto err3;
+ goto err_put_udc;
+
+ ret = device_add(&gadget->dev);
+ if (ret)
+ goto err_put_udc;
udc->gadget = gadget;
gadget->udc = udc;
ret = device_add(&udc->dev);
if (ret)
- goto err4;
+ goto err_unlist_udc;
usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
udc->vbus = true;
/* pick up one of pending gadget drivers */
ret = check_pending_gadget_drivers(udc);
if (ret)
- goto err5;
+ goto err_del_udc;
mutex_unlock(&udc_lock);
return 0;
-err5:
+ err_del_udc:
device_del(&udc->dev);
-err4:
+ err_unlist_udc:
list_del(&udc->list);
mutex_unlock(&udc_lock);
-err3:
- put_device(&udc->dev);
device_del(&gadget->dev);
-err2:
- kfree(udc);
+ err_put_udc:
+ put_device(&udc->dev);
-err1:
+ err_put_gadget:
put_device(&gadget->dev);
return ret;
}
if (gpio_is_valid(hub->gpio_reset)) {
err = devm_gpio_request_one(dev, hub->gpio_reset,
GPIOF_OUT_INIT_LOW, "usb3503 reset");
+ /* Datasheet defines a hardware reset to be at least 100us */
+ usleep_range(100, 10000);
if (err) {
dev_err(dev,
"unable to request GPIO %d as reset pin (%d)\n",
break;
case MON_IOCQ_RING_SIZE:
+ mutex_lock(&rp->fetch_lock);
ret = rp->b_size;
+ mutex_unlock(&rp->fetch_lock);
break;
case MON_IOCT_RING_SIZE:
unsigned long offset, chunk_idx;
struct page *pageptr;
+ mutex_lock(&rp->fetch_lock);
offset = vmf->pgoff << PAGE_SHIFT;
- if (offset >= rp->b_size)
+ if (offset >= rp->b_size) {
+ mutex_unlock(&rp->fetch_lock);
return VM_FAULT_SIGBUS;
+ }
chunk_idx = offset / CHUNK_SIZE;
pageptr = rp->b_vec[chunk_idx].pg;
get_page(pageptr);
+ mutex_unlock(&rp->fetch_lock);
vmf->page = pageptr;
return 0;
}
{ USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+ { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
{ USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
{ USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+ { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
{ USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
{ USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
{ USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_NO_ATA_1X),
+/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
+UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
+ "Norelsys",
+ "NS1068X",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_IGNORE_UAS),
+
/* Reported-by: Takeo Nakayama <javhera@gmx.com> */
UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
"JMicron",
dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)",
udev->devnum, udev->devpath, usb_speed_string(udev->speed));
- pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
+ pr_debug("tt hub ttport %d\n", udev->ttport);
dev_dbg(dev, " ");
for (i = 0; i < 16; i++)
}
pr_debug("\n");
- dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
-
- dev_dbg(dev,
- "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
- &udev->descriptor, udev->config,
- udev->actconfig, udev->rawdescriptors);
+ dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
+ udev->bus->bus_name);
dev_dbg(dev, "have_langid %d, string_langid %d\n",
udev->have_langid, udev->string_langid);
dev = &urb->dev->dev;
- dev_dbg(dev, " urb :%p\n", urb);
- dev_dbg(dev, " dev :%p\n", urb->dev);
-
usbip_dump_usb_device(urb->dev);
dev_dbg(dev, " pipe :%08x ", urb->pipe);
dev_dbg(dev, " status :%d\n", urb->status);
dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags);
- dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer);
dev_dbg(dev, " transfer_buffer_length:%d\n",
urb->transfer_buffer_length);
dev_dbg(dev, " actual_length :%d\n", urb->actual_length);
- dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet);
if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
usbip_dump_usb_ctrlrequest(
dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets);
dev_dbg(dev, " interval :%d\n", urb->interval);
dev_dbg(dev, " error_count :%d\n", urb->error_count);
- dev_dbg(dev, " context :%p\n", urb->context);
- dev_dbg(dev, " complete :%p\n", urb->complete);
}
EXPORT_SYMBOL_GPL(usbip_dump_urb);
urb_p->new = 1;
urb_p->seqnum = pdu->base.seqnum;
+ if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
+ /* validate packet size and number of packets */
+ unsigned int maxp, packets, bytes;
+
+ maxp = usb_endpoint_maxp(urb_p->ep->desc);
+ maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
+ bytes = pdu->u.cmd_submit.transfer_buffer_length;
+ packets = DIV_ROUND_UP(bytes, maxp);
+
+ if (pdu->u.cmd_submit.number_of_packets < 0 ||
+ pdu->u.cmd_submit.number_of_packets > packets) {
+ dev_err(&udc->gadget.dev,
+ "CMD_SUBMIT: isoc invalid num packets %d\n",
+ pdu->u.cmd_submit.number_of_packets);
+ ret = -EMSGSIZE;
+ goto free_urbp;
+ }
+ }
+
ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
if (ret) {
usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
memset(&pdu_header, 0, sizeof(pdu_header));
memset(&msg, 0, sizeof(msg));
+ if (urb->actual_length > 0 && !urb->transfer_buffer) {
+ dev_err(&udc->gadget.dev,
+ "urb: actual_length %d transfer_buffer null\n",
+ urb->actual_length);
+ return -1;
+ }
+
if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
iovnum = 2 + urb->number_of_packets;
else
/* 1. setup usbip_header */
setup_ret_submit_pdu(&pdu_header, urb_p);
- usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
- pdu_header.base.seqnum, urb);
+ usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+ pdu_header.base.seqnum);
usbip_header_correct_endian(&pdu_header, 1);
iov[iovnum].iov_base = &pdu_header;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset+range].handle == -1) {
- range--;
+ if (map->unmap_ops[offset+range].handle == -1)
break;
- }
range++;
}
err = __unmap_grant_pages(map, offset, range);
out_unlock_put:
mutex_unlock(&priv->lock);
out_put_map:
- if (use_ptemod)
+ if (use_ptemod) {
map->vma = NULL;
+ unmap_grant_pages(map, 0, map->count);
+ }
gntdev_put_map(priv, map);
return err;
}
pvcalls_exit();
return ret;
}
- map2 = kzalloc(sizeof(*map2), GFP_KERNEL);
+ map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
if (map2 == NULL) {
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags);
* However, if we didn't have a callback promise outstanding, or it was
* outstanding on a different server, then it won't break it either...
*/
-static int afs_dir_remove_link(struct dentry *dentry, struct key *key)
+static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
+ unsigned long d_version_before,
+ unsigned long d_version_after)
{
+ bool dir_valid;
int ret = 0;
+ /* There were no intervening changes on the server if the version
+ * number we got back was incremented by exactly 1.
+ */
+ dir_valid = (d_version_after == d_version_before + 1);
+
if (d_really_is_positive(dentry)) {
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
- kdebug("AFS_VNODE_DELETED");
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
-
- ret = afs_validate(vnode, key);
- if (ret == -ESTALE)
+ if (dir_valid) {
+ drop_nlink(&vnode->vfs_inode);
+ if (vnode->vfs_inode.i_nlink == 0) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
ret = 0;
+ } else {
+ clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ kdebug("AFS_VNODE_DELETED");
+
+ ret = afs_validate(vnode, key);
+ if (ret == -ESTALE)
+ ret = 0;
+ }
_debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
}
struct afs_fs_cursor fc;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
struct key *key;
+ unsigned long d_version = (unsigned long)dentry->d_fsdata;
int ret;
_enter("{%x:%u},{%pd}",
afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
ret = afs_end_vnode_operation(&fc);
if (ret == 0)
- ret = afs_dir_remove_link(dentry, key);
+ ret = afs_dir_remove_link(
+ dentry, key, d_version,
+ (unsigned long)dvnode->status.data_version);
}
error_key:
}
read_sequnlock_excl(&vnode->cb_lock);
+
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ clear_nlink(&vnode->vfs_inode);
+
if (valid)
goto valid;
{
struct afs_net *net = call->net;
enum afs_call_state state;
- u32 remote_abort;
+ u32 remote_abort = 0;
int ret;
_enter("{%s,%zu},,%zu,%d",
ret = afs_fill_page(vnode, key, pos + copied,
len - copied, page);
if (ret < 0)
- return ret;
+ goto out;
}
SetPageUptodate(page);
}
set_page_dirty(page);
if (PageDirty(page))
_debug("dirtied");
+ ret = copied;
+
+out:
unlock_page(page);
put_page(page);
-
- return copied;
+ return ret;
}
/*
spin_lock(&root->inode_lock);
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
+
if (node) {
if (btrfs_inode->delayed_node) {
refcount_inc(&node->refs); /* can be accessed */
spin_unlock(&root->inode_lock);
return node;
}
- btrfs_inode->delayed_node = node;
- /* can be accessed and cached in the inode */
- refcount_add(2, &node->refs);
+
+ /*
+ * It's possible that we're racing into the middle of removing
+ * this node from the radix tree. In this case, the refcount
+ * was zero and it should never go back to one. Just return
+ * NULL like it was never in the radix at all; our release
+ * function is in the process of removing it.
+ *
+ * Some implementations of refcount_inc refuse to bump the
+ * refcount once it has hit zero. If we don't do this dance
+ * here, refcount_inc() may decide to just WARN_ONCE() instead
+ * of actually bumping the refcount.
+ *
+ * If this node is properly in the radix, we want to bump the
+ * refcount twice, once for the inode and once for this get
+ * operation.
+ */
+ if (refcount_inc_not_zero(&node->refs)) {
+ refcount_inc(&node->refs);
+ btrfs_inode->delayed_node = node;
+ } else {
+ node = NULL;
+ }
+
spin_unlock(&root->inode_lock);
return node;
}
mutex_unlock(&delayed_node->mutex);
if (refcount_dec_and_test(&delayed_node->refs)) {
- bool free = false;
struct btrfs_root *root = delayed_node->root;
+
spin_lock(&root->inode_lock);
- if (refcount_read(&delayed_node->refs) == 0) {
- radix_tree_delete(&root->delayed_nodes_tree,
- delayed_node->inode_id);
- free = true;
- }
+ /*
+ * Once our refcount goes to zero, nobody is allowed to bump it
+ * back up. We can delete it now.
+ */
+ ASSERT(refcount_read(&delayed_node->refs) == 0);
+ radix_tree_delete(&root->delayed_nodes_tree,
+ delayed_node->inode_id);
spin_unlock(&root->inode_lock);
- if (free)
- kmem_cache_free(delayed_node_cache, delayed_node);
+ kmem_cache_free(delayed_node_cache, delayed_node);
}
}
kfree(dev);
return ERR_PTR(-ENOMEM);
}
- bio_get(dev->flush_bio);
INIT_LIST_HEAD(&dev->dev_list);
INIT_LIST_HEAD(&dev->dev_alloc_list);
current->sas_ss_sp = current->sas_ss_size = 0;
- /* Figure out dumpability. */
+ /*
+ * Figure out dumpability. Note that this checking only of current
+ * is wrong, but userspace depends on it. This should be testing
+ * bprm->secureexec instead.
+ */
if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
- bprm->secureexec)
+ !(uid_eq(current_euid(), current_uid()) &&
+ gid_eq(current_egid(), current_gid())))
set_dumpable(current->mm, suid_dumpable);
else
set_dumpable(current->mm, SUID_DUMP_USER);
hlist_add_head(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
- register_shrinker(&s->s_shrink);
+ err = register_shrinker(&s->s_shrink);
+ if (err) {
+ deactivate_locked_super(s);
+ s = ERR_PTR(err);
+ }
return s;
}
static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
struct userfaultfd_wait_queue *ewq)
{
+ struct userfaultfd_ctx *release_new_ctx;
+
if (WARN_ON_ONCE(current->flags & PF_EXITING))
goto out;
ewq->ctx = ctx;
init_waitqueue_entry(&ewq->wq, current);
+ release_new_ctx = NULL;
spin_lock(&ctx->event_wqh.lock);
/*
new = (struct userfaultfd_ctx *)
(unsigned long)
ewq->msg.arg.reserved.reserved1;
-
- userfaultfd_ctx_put(new);
+ release_new_ctx = new;
}
break;
}
__set_current_state(TASK_RUNNING);
spin_unlock(&ctx->event_wqh.lock);
+ if (release_new_ctx) {
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = release_new_ctx->mm;
+
+ /* the various vma->vm_userfaultfd_ctx still points to it */
+ down_write(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx)
+ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+ up_write(&mm->mmap_sem);
+
+ userfaultfd_ctx_put(release_new_ctx);
+ }
+
/*
* ctx may go away after this if the userfault pseudo fd is
* already released.
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
- if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes)
+ if (offset > mp->m_super->s_maxbytes - count)
count = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
- if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes)
+ if (offset > mp->m_super->s_maxbytes - size)
size = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
}
ASSERT(offset <= mp->m_super->s_maxbytes);
- if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
+ if (offset > mp->m_super->s_maxbytes - length)
length = mp->m_super->s_maxbytes - offset;
offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
-
+STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi);
STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp);
/*
* We use the batch lookup interface to iterate over the dquots as it
qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
- register_shrinker(&qinf->qi_shrinker);
+
+ error = register_shrinker(&qinf->qi_shrinker);
+ if (error)
+ goto out_free_inos;
+
return 0;
+out_free_inos:
+ mutex_destroy(&qinf->qi_quotaofflock);
+ mutex_destroy(&qinf->qi_tree_lock);
+ xfs_qm_destroy_quotainos(qinf);
out_free_lru:
list_lru_destroy(&qinf->qi_lru);
out_free_qinf:
return error;
}
-
/*
* Gets called when unmounting a filesystem or when all quotas get
* turned off.
unregister_shrinker(&qi->qi_shrinker);
list_lru_destroy(&qi->qi_lru);
-
- if (qi->qi_uquotaip) {
- IRELE(qi->qi_uquotaip);
- qi->qi_uquotaip = NULL; /* paranoia */
- }
- if (qi->qi_gquotaip) {
- IRELE(qi->qi_gquotaip);
- qi->qi_gquotaip = NULL;
- }
- if (qi->qi_pquotaip) {
- IRELE(qi->qi_pquotaip);
- qi->qi_pquotaip = NULL;
- }
+ xfs_qm_destroy_quotainos(qi);
+ mutex_destroy(&qi->qi_tree_lock);
mutex_destroy(&qi->qi_quotaofflock);
kmem_free(qi);
mp->m_quotainfo = NULL;
return error;
}
+STATIC void
+xfs_qm_destroy_quotainos(
+ xfs_quotainfo_t *qi)
+{
+ if (qi->qi_uquotaip) {
+ IRELE(qi->qi_uquotaip);
+ qi->qi_uquotaip = NULL; /* paranoia */
+ }
+ if (qi->qi_gquotaip) {
+ IRELE(qi->qi_gquotaip);
+ qi->qi_gquotaip = NULL;
+ }
+ if (qi->qi_pquotaip) {
+ IRELE(qi->qi_pquotaip);
+ qi->qi_pquotaip = NULL;
+ }
+}
+
STATIC void
xfs_qm_dqfree_one(
struct xfs_dquot *dqp)
#include <linux/if_alg.h>
#include <linux/scatterlist.h>
#include <linux/types.h>
+#include <linux/atomic.h>
#include <net/sock.h>
#include <crypto/aead.h>
struct crypto_wait wait;
size_t used;
- size_t rcvused;
+ atomic_t rcvused;
bool more;
bool merge;
struct af_alg_ctx *ctx = ask->private;
return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
- ctx->rcvused, 0);
+ atomic_read(&ctx->rcvused), 0);
}
/**
void __init acpi_old_suspend_ordering(void);
void __init acpi_nvs_nosave(void);
void __init acpi_nvs_nosave_s3(void);
+void __init acpi_sleep_no_blacklist(void);
#endif /* CONFIG_PM_SLEEP */
struct acpi_osc_context {
};
struct bpf_map {
- atomic_t refcnt;
+ /* 1st cacheline with read-mostly members of which some
+ * are also accessed in fast-path (e.g. ops, max_entries).
+ */
+ const struct bpf_map_ops *ops ____cacheline_aligned;
+ struct bpf_map *inner_map_meta;
+#ifdef CONFIG_SECURITY
+ void *security;
+#endif
enum bpf_map_type map_type;
u32 key_size;
u32 value_size;
u32 pages;
u32 id;
int numa_node;
- struct user_struct *user;
- const struct bpf_map_ops *ops;
- struct work_struct work;
+ bool unpriv_array;
+ /* 7 bytes hole */
+
+ /* 2nd cacheline with misc members to avoid false sharing
+ * particularly with refcounting.
+ */
+ struct user_struct *user ____cacheline_aligned;
+ atomic_t refcnt;
atomic_t usercnt;
- struct bpf_map *inner_map_meta;
+ struct work_struct work;
char name[BPF_OBJ_NAME_LEN];
-#ifdef CONFIG_SECURITY
- void *security;
-#endif
};
/* function argument constraints */
struct bpf_array {
struct bpf_map map;
u32 elem_size;
+ u32 index_mask;
/* 'ownership' of prog_array is claimed by the first program that
* is going to use this map or by the first program which FD is stored
* in the map to make sure that all callers and callees have the same
attr->numa_node : NUMA_NO_NODE;
}
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
{
return 0;
}
+
+static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
+ enum bpf_prog_type type)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
return bpf_prog_get_type_dev(ufd, type, false);
}
+bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
+
int bpf_prog_offload_compile(struct bpf_prog *prog);
void bpf_prog_offload_destroy(struct bpf_prog *prog);
#define init_completion(x) __init_completion(x)
static inline void complete_acquire(struct completion *x) {}
static inline void complete_release(struct completion *x) {}
-static inline void complete_release_commit(struct completion *x) {}
#define COMPLETION_INITIALIZER(work) \
{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
+extern ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf);
+
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
const struct attribute_group **groups,
struct cpufreq_policy;
-typedef int (*get_static_t)(cpumask_t *cpumask, int interval,
- unsigned long voltage, u32 *power);
-
#ifdef CONFIG_CPU_THERMAL
/**
* cpufreq_cooling_register - function to create cpufreq cooling device.
struct thermal_cooling_device *
cpufreq_cooling_register(struct cpufreq_policy *policy);
-struct thermal_cooling_device *
-cpufreq_power_cooling_register(struct cpufreq_policy *policy,
- u32 capacitance, get_static_t plat_static_func);
-
-/**
- * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
- * @np: a valid struct device_node to the cooling device device tree node.
- * @policy: cpufreq policy.
- */
-#ifdef CONFIG_THERMAL_OF
-struct thermal_cooling_device *
-of_cpufreq_cooling_register(struct device_node *np,
- struct cpufreq_policy *policy);
-
-struct thermal_cooling_device *
-of_cpufreq_power_cooling_register(struct device_node *np,
- struct cpufreq_policy *policy,
- u32 capacitance,
- get_static_t plat_static_func);
-#else
-static inline struct thermal_cooling_device *
-of_cpufreq_cooling_register(struct device_node *np,
- struct cpufreq_policy *policy)
-{
- return ERR_PTR(-ENOSYS);
-}
-
-static inline struct thermal_cooling_device *
-of_cpufreq_power_cooling_register(struct device_node *np,
- struct cpufreq_policy *policy,
- u32 capacitance,
- get_static_t plat_static_func)
-{
- return NULL;
-}
-#endif
-
/**
* cpufreq_cooling_unregister - function to remove cpufreq cooling device.
* @cdev: thermal cooling device pointer.
{
return ERR_PTR(-ENOSYS);
}
-static inline struct thermal_cooling_device *
-cpufreq_power_cooling_register(struct cpufreq_policy *policy,
- u32 capacitance, get_static_t plat_static_func)
-{
- return NULL;
-}
-static inline struct thermal_cooling_device *
-of_cpufreq_cooling_register(struct device_node *np,
- struct cpufreq_policy *policy)
+static inline
+void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
{
- return ERR_PTR(-ENOSYS);
+ return;
}
+#endif /* CONFIG_CPU_THERMAL */
+#if defined(CONFIG_THERMAL_OF) && defined(CONFIG_CPU_THERMAL)
+/**
+ * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
+ * @policy: cpufreq policy.
+ */
+struct thermal_cooling_device *
+of_cpufreq_cooling_register(struct cpufreq_policy *policy);
+#else
static inline struct thermal_cooling_device *
-of_cpufreq_power_cooling_register(struct device_node *np,
- struct cpufreq_policy *policy,
- u32 capacitance,
- get_static_t plat_static_func)
+of_cpufreq_cooling_register(struct cpufreq_policy *policy)
{
return NULL;
}
-
-static inline
-void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
-{
- return;
-}
-#endif /* CONFIG_CPU_THERMAL */
+#endif /* defined(CONFIG_THERMAL_OF) && defined(CONFIG_CPU_THERMAL) */
#endif /* __CPU_COOLING_H__ */
vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
#define VMCOREINFO_SYMBOL(name) \
vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ARRAY(name) \
+ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
#define VMCOREINFO_SIZE(name) \
vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
(unsigned long)sizeof(name))
struct capsule_info {
efi_capsule_header_t header;
+ efi_capsule_header_t *capsule;
int reset_type;
long index;
size_t count;
size_t total_size;
- phys_addr_t *pages;
+ struct page **pages;
+ phys_addr_t *phys;
size_t page_bytes_remain;
};
{
if (fscache_cookie_valid(cookie) && PageFsCache(page))
return __fscache_maybe_release_page(cookie, page, gfp);
- return false;
+ return true;
}
/**
# define trace_hardirq_enter() \
do { \
current->hardirq_context++; \
- crossrelease_hist_start(XHLOCK_HARD); \
} while (0)
# define trace_hardirq_exit() \
do { \
current->hardirq_context--; \
- crossrelease_hist_end(XHLOCK_HARD); \
} while (0)
# define lockdep_softirq_enter() \
do { \
current->softirq_context++; \
- crossrelease_hist_start(XHLOCK_SOFT); \
} while (0)
# define lockdep_softirq_exit() \
do { \
current->softirq_context--; \
- crossrelease_hist_end(XHLOCK_SOFT); \
} while (0)
# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
#else
#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
{ .name = (_name), .key = (void *)(_key), }
-static inline void crossrelease_hist_start(enum xhlock_context_t c) {}
-static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
static inline void lockdep_invariant_state(bool force) {}
static inline void lockdep_init_task(struct task_struct *task) {}
static inline void lockdep_free_task(struct task_struct *task) {}
* These flags can be set by device drivers at the probe time. They need not be
* cleared by the drivers as the driver core will take care of that.
*
- * NEVER_SKIP: Do not skip system suspend/resume callbacks for the device.
+ * NEVER_SKIP: Do not skip all system suspend/resume callbacks for the device.
* SMART_PREPARE: Check the return value of the driver's ->prepare callback.
* SMART_SUSPEND: No need to resume the device from runtime suspend.
+ * LEAVE_SUSPENDED: Avoid resuming the device during system resume if possible.
*
* Setting SMART_PREPARE instructs bus types and PM domains which may want
* system suspend/resume callbacks to be skipped for the device to return 0 from
* necessary from the driver's perspective. It also may cause them to skip
* invocations of the ->suspend_late and ->suspend_noirq callbacks provided by
* the driver if they decide to leave the device in runtime suspend.
+ *
+ * Setting LEAVE_SUSPENDED informs the PM core and middle-layer code that the
+ * driver prefers the device to be left in suspend after system resume.
*/
-#define DPM_FLAG_NEVER_SKIP BIT(0)
-#define DPM_FLAG_SMART_PREPARE BIT(1)
-#define DPM_FLAG_SMART_SUSPEND BIT(2)
+#define DPM_FLAG_NEVER_SKIP BIT(0)
+#define DPM_FLAG_SMART_PREPARE BIT(1)
+#define DPM_FLAG_SMART_SUSPEND BIT(2)
+#define DPM_FLAG_LEAVE_SUSPENDED BIT(3)
struct dev_pm_info {
pm_message_t power_state;
bool wakeup_path:1;
bool syscore:1;
bool no_pm_callbacks:1; /* Owned by the PM core */
+ unsigned int must_resume:1; /* Owned by the PM core */
+ unsigned int may_skip_resume:1; /* Set by subsystems */
#else
unsigned int should_wakeup:1;
#endif
extern void pm_generic_complete(struct device *dev);
extern void dev_pm_skip_next_resume_phases(struct device *dev);
+extern bool dev_pm_may_skip_resume(struct device *dev);
extern bool dev_pm_smart_suspend_and_suspended(struct device *dev);
#else /* !CONFIG_PM_SLEEP */
return dev->power.can_wakeup && !!dev->power.wakeup;
}
+static inline void device_set_wakeup_path(struct device *dev)
+{
+ dev->power.wakeup_path = true;
+}
+
/* drivers/base/power/wakeup.c */
extern void wakeup_source_prepare(struct wakeup_source *ws, const char *name);
extern struct wakeup_source *wakeup_source_create(const char *name);
return dev->power.can_wakeup && dev->power.should_wakeup;
}
+static inline void device_set_wakeup_path(struct device *dev) {}
+
static inline void __pm_stay_awake(struct wakeup_source *ws) {}
static inline void pm_stay_awake(struct device *dev) {}
unsigned char mac_addr[ETH_ALEN];
unsigned no_ether_link:1;
unsigned ether_link_active_low:1;
- unsigned needs_init:1;
};
#endif
void sctp_transport_burst_reset(struct sctp_transport *);
unsigned long sctp_transport_timeout(struct sctp_transport *);
void sctp_transport_reset(struct sctp_transport *t);
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
void sctp_transport_immediate_rtx(struct sctp_transport *);
void sctp_transport_dst_release(struct sctp_transport *t);
void sctp_transport_dst_confirm(struct sctp_transport *t);
np_applied:1,
instance_applied:1,
version:2,
-reserved_flags2:2;
+ reserved_flags2:2;
#elif defined(__BIG_ENDIAN_BITFIELD)
u8 reserved_flags2:2,
version:2,
#ifdef CONFIG_CPU_THERMAL
TRACE_EVENT(thermal_power_cpu_get_power,
TP_PROTO(const struct cpumask *cpus, unsigned long freq, u32 *load,
- size_t load_len, u32 dynamic_power, u32 static_power),
+ size_t load_len, u32 dynamic_power),
- TP_ARGS(cpus, freq, load, load_len, dynamic_power, static_power),
+ TP_ARGS(cpus, freq, load, load_len, dynamic_power),
TP_STRUCT__entry(
__bitmask(cpumask, num_possible_cpus())
__dynamic_array(u32, load, load_len)
__field(size_t, load_len )
__field(u32, dynamic_power )
- __field(u32, static_power )
),
TP_fast_assign(
load_len * sizeof(*load));
__entry->load_len = load_len;
__entry->dynamic_power = dynamic_power;
- __entry->static_power = static_power;
),
- TP_printk("cpus=%s freq=%lu load={%s} dynamic_power=%d static_power=%d",
+ TP_printk("cpus=%s freq=%lu load={%s} dynamic_power=%d",
__get_bitmask(cpumask), __entry->freq,
__print_array(__get_dynamic_array(load), __entry->load_len, 4),
- __entry->dynamic_power, __entry->static_power)
+ __entry->dynamic_power)
);
TRACE_EVENT(thermal_power_cpu_limit,
#define _UAPI_LINUX_IF_ETHER_H
#include <linux/types.h>
+#include <linux/libc-compat.h>
/*
* IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
* This is an Ethernet frame header.
*/
+#if __UAPI_DEF_ETHHDR
struct ethhdr {
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
unsigned char h_source[ETH_ALEN]; /* source ether addr */
__be16 h_proto; /* packet type ID field */
} __attribute__((packed));
+#endif
#endif /* _UAPI_LINUX_IF_ETHER_H */
/* If we did not see any headers from any supported C libraries,
* or we are being included in the kernel, then define everything
- * that we need. */
+ * that we need. Check for previous __UAPI_* definitions to give
+ * unsupported C libraries a way to opt out of any kernel definition. */
#else /* !defined(__GLIBC__) */
/* Definitions for if.h */
+#ifndef __UAPI_DEF_IF_IFCONF
#define __UAPI_DEF_IF_IFCONF 1
+#endif
+#ifndef __UAPI_DEF_IF_IFMAP
#define __UAPI_DEF_IF_IFMAP 1
+#endif
+#ifndef __UAPI_DEF_IF_IFNAMSIZ
#define __UAPI_DEF_IF_IFNAMSIZ 1
+#endif
+#ifndef __UAPI_DEF_IF_IFREQ
#define __UAPI_DEF_IF_IFREQ 1
+#endif
/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS
#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
+#endif
/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+#endif
/* Definitions for in.h */
+#ifndef __UAPI_DEF_IN_ADDR
#define __UAPI_DEF_IN_ADDR 1
+#endif
+#ifndef __UAPI_DEF_IN_IPPROTO
#define __UAPI_DEF_IN_IPPROTO 1
+#endif
+#ifndef __UAPI_DEF_IN_PKTINFO
#define __UAPI_DEF_IN_PKTINFO 1
+#endif
+#ifndef __UAPI_DEF_IP_MREQ
#define __UAPI_DEF_IP_MREQ 1
+#endif
+#ifndef __UAPI_DEF_SOCKADDR_IN
#define __UAPI_DEF_SOCKADDR_IN 1
+#endif
+#ifndef __UAPI_DEF_IN_CLASS
#define __UAPI_DEF_IN_CLASS 1
+#endif
/* Definitions for in6.h */
+#ifndef __UAPI_DEF_IN6_ADDR
#define __UAPI_DEF_IN6_ADDR 1
+#endif
+#ifndef __UAPI_DEF_IN6_ADDR_ALT
#define __UAPI_DEF_IN6_ADDR_ALT 1
+#endif
+#ifndef __UAPI_DEF_SOCKADDR_IN6
#define __UAPI_DEF_SOCKADDR_IN6 1
+#endif
+#ifndef __UAPI_DEF_IPV6_MREQ
#define __UAPI_DEF_IPV6_MREQ 1
+#endif
+#ifndef __UAPI_DEF_IPPROTO_V6
#define __UAPI_DEF_IPPROTO_V6 1
+#endif
+#ifndef __UAPI_DEF_IPV6_OPTIONS
#define __UAPI_DEF_IPV6_OPTIONS 1
+#endif
+#ifndef __UAPI_DEF_IN6_PKTINFO
#define __UAPI_DEF_IN6_PKTINFO 1
+#endif
+#ifndef __UAPI_DEF_IP6_MTUINFO
#define __UAPI_DEF_IP6_MTUINFO 1
+#endif
/* Definitions for ipx.h */
+#ifndef __UAPI_DEF_SOCKADDR_IPX
#define __UAPI_DEF_SOCKADDR_IPX 1
+#endif
+#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION
#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1
+#endif
+#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION
#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1
+#endif
+#ifndef __UAPI_DEF_IPX_CONFIG_DATA
#define __UAPI_DEF_IPX_CONFIG_DATA 1
+#endif
+#ifndef __UAPI_DEF_IPX_ROUTE_DEF
#define __UAPI_DEF_IPX_ROUTE_DEF 1
+#endif
/* Definitions for xattr.h */
+#ifndef __UAPI_DEF_XATTR
#define __UAPI_DEF_XATTR 1
+#endif
#endif /* __GLIBC__ */
+/* Definitions for if_ether.h */
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
+#ifndef __UAPI_DEF_ETHHDR
+#define __UAPI_DEF_ETHHDR 1
+#endif
+
#endif /* _UAPI_LIBC_COMPAT_H */
#define NF_CT_STATE_INVALID_BIT (1 << 0)
#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
-#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1))
+#define NF_CT_STATE_UNTRACKED_BIT (1 << 6)
/* Bitset representing status of connection. */
enum ip_conntrack_status {
config CPU_ISOLATION
bool "CPU isolation"
+ depends on SMP || COMPILE_TEST
default y
help
Make sure that CPUs running critical tasks are not disturbed by
Enable the bpf() system call that allows to manipulate eBPF
programs and maps via file descriptors.
+config BPF_JIT_ALWAYS_ON
+ bool "Permanently enable BPF JIT and remove BPF interpreter"
+ depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+ help
+ Enables BPF JIT and removes BPF interpreter to avoid
+ speculative execution of BPF instructions by the interpreter
+
config USERFAULTFD
bool "Enable userfaultfd() system call"
select ANON_INODES
{
struct kstatfs sbuf;
- if (time_is_before_jiffies(acct->needcheck))
+ if (time_is_after_jiffies(acct->needcheck))
goto out;
/* May block */
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
+ u32 elem_size, index_mask, max_entries;
+ bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array;
u64 array_size;
- u32 elem_size;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
elem_size = round_up(attr->value_size, 8);
+ max_entries = attr->max_entries;
+ index_mask = roundup_pow_of_two(max_entries) - 1;
+
+ if (unpriv)
+ /* round up array size to nearest power of 2,
+ * since cpu will speculate within index_mask limits
+ */
+ max_entries = index_mask + 1;
+
array_size = sizeof(*array);
if (percpu)
- array_size += (u64) attr->max_entries * sizeof(void *);
+ array_size += (u64) max_entries * sizeof(void *);
else
- array_size += (u64) attr->max_entries * elem_size;
+ array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
if (array_size >= U32_MAX - PAGE_SIZE)
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
return ERR_PTR(-ENOMEM);
+ array->index_mask = index_mask;
+ array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
array->map.map_type = attr->map_type;
if (unlikely(index >= array->map.max_entries))
return NULL;
- return array->value + array->elem_size * index;
+ return array->value + array->elem_size * (index & array->index_mask);
}
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
u32 elem_size = round_up(map->value_size, 8);
const int ret = BPF_REG_0;
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
- *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+ if (map->unpriv_array) {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+ } else {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+ }
if (is_power_of_2(elem_size)) {
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
if (unlikely(index >= array->map.max_entries))
return NULL;
- return this_cpu_ptr(array->pptrs[index]);
+ return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
- pptr = array->pptrs[index];
+ pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size;
return -EEXIST;
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
- memcpy(this_cpu_ptr(array->pptrs[index]),
+ memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size);
else
- memcpy(array->value + array->elem_size * index,
+ memcpy(array->value +
+ array->elem_size * (index & array->index_mask),
value, map->value_size);
return 0;
}
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
- pptr = array->pptrs[index];
+ pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size;
static u32 array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 elem_size = round_up(map->value_size, 8);
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
- *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+ if (map->unpriv_array) {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+ } else {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+ }
if (is_power_of_2(elem_size))
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
else
}
EXPORT_SYMBOL_GPL(__bpf_call_base);
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
/**
* __bpf_prog_run - run eBPF program on a given context
* @ctx: is the data we are operating on
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};
+#else
+static unsigned int __bpf_prog_ret0(const void *ctx,
+ const struct bpf_insn *insn)
+{
+ return 0;
+}
+#endif
+
bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
{
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+#else
+ fp->bpf_func = __bpf_prog_ret0;
+#endif
/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
*/
if (!bpf_prog_is_dev_bound(fp->aux)) {
fp = bpf_int_jit_compile(fp);
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+ if (!fp->jited) {
+ *err = -ENOTSUPP;
+ return fp;
+ }
+#endif
} else {
*err = bpf_prog_offload_compile(fp);
if (*err)
putname(pname);
return ret;
}
-EXPORT_SYMBOL_GPL(bpf_obj_get_user);
+
+static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
+{
+ struct bpf_prog *prog;
+ int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (inode->i_op == &bpf_map_iops)
+ return ERR_PTR(-EINVAL);
+ if (inode->i_op != &bpf_prog_iops)
+ return ERR_PTR(-EACCES);
+
+ prog = inode->i_private;
+
+ ret = security_bpf_prog(prog);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ if (!bpf_prog_get_ok(prog, &type, false))
+ return ERR_PTR(-EINVAL);
+
+ return bpf_prog_inc(prog);
+}
+
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
+{
+ struct bpf_prog *prog;
+ struct path path;
+ int ret = kern_path(name, LOOKUP_FOLLOW, &path);
+ if (ret)
+ return ERR_PTR(ret);
+ prog = __get_prog_inode(d_backing_inode(path.dentry), type);
+ if (!IS_ERR(prog))
+ touch_atime(&path);
+ path_put(&path);
+ return prog;
+}
+EXPORT_SYMBOL(bpf_prog_get_type_path);
static void bpf_evict_inode(struct inode *inode)
{
write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock);
- smap_list_remove(psock, &stab->sock_map[i]);
- smap_release_sock(psock, sock);
+ /* This check handles a racing sock event that can get the
+ * sk_callback_lock before this case but after xchg happens
+ * causing the refcnt to hit zero and sock user data (psock)
+ * to be null and queued for garbage collection.
+ */
+ if (likely(psock)) {
+ smap_list_remove(psock, &stab->sock_map[i]);
+ smap_release_sock(psock, sock);
+ }
write_unlock_bh(&sock->sk_callback_lock);
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
-static bool bpf_prog_get_ok(struct bpf_prog *prog,
+bool bpf_prog_get_ok(struct bpf_prog *prog,
enum bpf_prog_type *attach_type, bool attach_drv)
{
/* not an attachment, just a refcount inc, always allow */
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
if (err)
return err;
+ if (func_id == BPF_FUNC_tail_call) {
+ if (meta.map_ptr == NULL) {
+ verbose(env, "verifier bug\n");
+ return -EINVAL;
+ }
+ env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
+ }
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
if (err)
return err;
*/
insn->imm = 0;
insn->code = BPF_JMP | BPF_TAIL_CALL;
+
+ /* instead of changing every JIT dealing with tail_call
+ * emit two extra insns:
+ * if (index >= max_entries) goto out;
+ * index &= array->index_mask;
+ * to avoid out-of-bounds cpu speculation
+ */
+ map_ptr = env->insn_aux_data[i + delta].map_ptr;
+ if (map_ptr == BPF_MAP_PTR_POISON) {
+ verbose(env, "tail_call obusing map_ptr\n");
+ return -EINVAL;
+ }
+ if (!map_ptr->unpriv_array)
+ continue;
+ insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
+ map_ptr->max_entries, 2);
+ insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
+ container_of(map_ptr,
+ struct bpf_array,
+ map)->index_mask);
+ insn_buf[2] = *insn;
+ cnt = 3;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
continue;
}
*/
do {
css_task_iter_start(&from->self, 0, &it);
- task = css_task_iter_next(&it);
+
+ do {
+ task = css_task_iter_next(&it);
+ } while (task && (task->flags & PF_EXITING));
+
if (task)
get_task_struct(task);
css_task_iter_end(&it);
cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
cft->name);
else
- strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
+ strlcpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
return buf;
}
root->flags = opts->flags;
if (opts->release_agent)
- strcpy(root->release_agent_path, opts->release_agent);
+ strlcpy(root->release_agent_path, opts->release_agent, PATH_MAX);
if (opts->name)
- strcpy(root->name, opts->name);
+ strlcpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
if (opts->cpuset_clone_children)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
}
static void css_task_iter_advance(struct css_task_iter *it)
{
- struct list_head *l = it->task_pos;
+ struct list_head *next;
lockdep_assert_held(&css_set_lock);
- WARN_ON_ONCE(!l);
-
repeat:
/*
* Advance iterator to find next entry. cset->tasks is consumed
* first and then ->mg_tasks. After ->mg_tasks, we move onto the
* next cset.
*/
- l = l->next;
+ next = it->task_pos->next;
- if (l == it->tasks_head)
- l = it->mg_tasks_head->next;
+ if (next == it->tasks_head)
+ next = it->mg_tasks_head->next;
- if (l == it->mg_tasks_head)
+ if (next == it->mg_tasks_head)
css_task_iter_advance_css_set(it);
else
- it->task_pos = l;
+ it->task_pos = next;
/* if PROCS, skip over tasks which aren't group leaders */
if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
--- /dev/null
+CONFIG_PM=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+
+# Triggers PM on OMAP
+CONFIG_CPU_IDLE=n
+
+# Triggers enablement via hibernate callbacks
+CONFIG_XEN=n
+
+# ARM/ARM64 architectures that select PM unconditionally
+CONFIG_ARCH_OMAP2PLUS_TYPICAL=n
+CONFIG_ARCH_RENESAS=n
+CONFIG_ARCH_TEGRA=n
+CONFIG_ARCH_VEXPRESS=n
VMCOREINFO_SYMBOL(contig_page_data);
#endif
#ifdef CONFIG_SPARSEMEM
- VMCOREINFO_SYMBOL(mem_section);
+ VMCOREINFO_SYMBOL_ARRAY(mem_section);
VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
VMCOREINFO_STRUCT_SIZE(mem_section);
VMCOREINFO_OFFSET(mem_section, section_mem_map);
/* if that doesn't kill us, halt */
panic("Oops failed to kill thread");
}
+EXPORT_SYMBOL(abort);
}
if (unlikely(is_child_reaper(pid))) {
- if (pid_ns_prepare_proc(ns)) {
- disable_pid_allocation(ns);
+ if (pid_ns_prepare_proc(ns))
goto out_free;
- }
}
get_pid_ns(ns);
while (++i <= ns->level)
idr_remove(&ns->idr, (pid->numbers + i)->nr);
+ /* On failure to allocate the first pid, reset the state */
+ if (ns->pid_allocated == PIDNS_ADDING)
+ idr_set_cursor(&ns->idr, 0);
+
spin_unlock_irq(&pidmap_lock);
kmem_cache_free(ns->pid_cachep, pid);
spin_lock_irqsave(&x->wait.lock, flags);
- /*
- * Perform commit of crossrelease here.
- */
- complete_release_commit(x);
-
if (x->done != UINT_MAX)
x->done++;
__wake_up_locked(&x->wait, TASK_NORMAL, 1);
rcu_read_unlock();
}
if (!fallback) {
+ preempt_disable();
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+ preempt_enable();
free_cpumask_var(tmpmask);
}
cpus_read_unlock();
************** MIPS/64 **************
***************************************/
#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
+#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
+/*
+ * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C
+ * code below, so we special case MIPS64r6 until the compiler can do better.
+ */
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+ __asm__ ("dmulu %0,%1,%2" \
+ : "=d" ((UDItype)(w0)) \
+ : "d" ((UDItype)(u)), \
+ "d" ((UDItype)(v))); \
+ __asm__ ("dmuhu %0,%1,%2" \
+ : "=d" ((UDItype)(w1)) \
+ : "d" ((UDItype)(u)), \
+ "d" ((UDItype)(v))); \
+} while (0)
+#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
#define umul_ppmm(w1, w0, u, v) \
do { \
typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
return NULL;
}
}
- /* We don't expect to fail. */
if (*err) {
- pr_cont("FAIL to attach err=%d len=%d\n",
+ pr_cont("FAIL to prog_create err=%d len=%d\n",
*err, fprog.len);
return NULL;
}
* checks.
*/
fp = bpf_prog_select_runtime(fp, err);
+ if (*err) {
+ pr_cont("FAIL to select_runtime err=%d\n", *err);
+ return NULL;
+ }
break;
}
pass_cnt++;
continue;
}
-
- return err;
+ err_cnt++;
+ continue;
}
pr_cont("jited:%u ", fp->jited);
*/
int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
- pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
+ pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx",
page, page_ref_count(page), mapcount,
page->mapping, page_to_pgoff(page));
if (PageCompound(page))
#ifdef CONFIG_MEMCG
if (page->mem_cgroup)
- pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup);
+ pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup);
#endif
}
void dump_vma(const struct vm_area_struct *vma)
{
- pr_emerg("vma %p start %p end %p\n"
- "next %p prev %p mm %p\n"
- "prot %lx anon_vma %p vm_ops %p\n"
- "pgoff %lx file %p private_data %p\n"
+ pr_emerg("vma %px start %px end %px\n"
+ "next %px prev %px mm %px\n"
+ "prot %lx anon_vma %px vm_ops %px\n"
+ "pgoff %lx file %px private_data %px\n"
"flags: %#lx(%pGv)\n",
vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
vma->vm_prev, vma->vm_mm,
void dump_mm(const struct mm_struct *mm)
{
- pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n"
+ pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n"
#ifdef CONFIG_MMU
- "get_unmapped_area %p\n"
+ "get_unmapped_area %px\n"
#endif
"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
- "pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
+ "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
"start_brk %lx brk %lx start_stack %lx\n"
"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
- "binfmt %p flags %lx core_state %p\n"
+ "binfmt %px flags %lx core_state %px\n"
#ifdef CONFIG_AIO
- "ioctx_table %p\n"
+ "ioctx_table %px\n"
#endif
#ifdef CONFIG_MEMCG
- "owner %p "
+ "owner %px "
#endif
- "exe_file %p\n"
+ "exe_file %px\n"
#ifdef CONFIG_MMU_NOTIFIER
- "mmu_notifier_mm %p\n"
+ "mmu_notifier_mm %px\n"
#endif
#ifdef CONFIG_NUMA_BALANCING
"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
/* GFP bitmask for kmemleak internal allocations */
#define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
__GFP_NORETRY | __GFP_NOMEMALLOC | \
- __GFP_NOWARN)
+ __GFP_NOWARN | __GFP_NOFAIL)
/* scanning area inside a memory block */
struct kmemleak_scan_area {
next = pmd_addr_end(addr, end);
if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
&& pmd_none_or_clear_bad(pmd))
- continue;
+ goto next;
/* invoke the mmu notifier if the pmd is populated */
if (!mni_start) {
}
/* huge pmd was handled */
- continue;
+ goto next;
}
}
/* fall through, the trans huge pmd just split */
this_pages = change_pte_range(vma, pmd, addr, next, newprot,
dirty_accountable, prot_numa);
pages += this_pages;
+next:
+ cond_resched();
} while (pmd++, addr = next, addr != end);
if (mni_start)
pgcnt = 0;
for_each_resv_unavail_range(i, &start, &end) {
for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) {
+ if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages)))
+ continue;
mm_zero_struct_page(pfn_to_page(pfn));
pgcnt++;
}
if (unlikely(!mem_section)) {
unsigned long size, align;
- size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
+ size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
align = 1 << (INTERNODE_CACHE_SHIFT);
mem_section = memblock_virt_alloc(size, align);
}
*/
void unregister_shrinker(struct shrinker *shrinker)
{
+ if (!shrinker->nr_deferred)
+ return;
down_write(&shrinker_rwsem);
list_del(&shrinker->list);
up_write(&shrinker_rwsem);
kfree(shrinker->nr_deferred);
+ shrinker->nr_deferred = NULL;
}
EXPORT_SYMBOL(unregister_shrinker);
#include <linux/mount.h>
#include <linux/migrate.h>
#include <linux/pagemap.h>
+#include <linux/fs.h>
#define ZSPAGE_MAGIC 0x58
vlan_gvrp_uninit_applicant(real_dev);
}
- /* Take it out of our own structures, but be sure to interlock with
- * HW accelerating devices or SW vlan input packet processing if
- * VLAN is not 0 (leave it there for 802.1p).
- */
- if (vlan_id)
- vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
break;
case L2CAP_CONF_EFS:
- remote_efs = 1;
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
+ remote_efs = 1;
memcpy(&efs, (void *) val, olen);
+ }
break;
case L2CAP_CONF_EWS:
break;
case L2CAP_CONF_EFS:
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
memcpy(&efs, (void *)val, olen);
- if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != chan->local_stype)
- return -ECONNREFUSED;
+ if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != chan->local_stype)
+ return -ECONNREFUSED;
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs, endptr - ptr);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+ (unsigned long) &efs, endptr - ptr);
+ }
break;
case L2CAP_CONF_FCS:
mutex_lock(&caifdevs->lock);
list_add_rcu(&caifd->list, &caifdevs->list);
- strncpy(caifd->layer.name, dev->name,
- sizeof(caifd->layer.name) - 1);
- caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+ strlcpy(caifd->layer.name, dev->name,
+ sizeof(caifd->layer.name));
caifd->layer.transmit = transmit;
cfcnfg_add_phy_layer(cfg,
dev,
dev_add_pack(&caif_usb_type);
pack_added = true;
- strncpy(layer->name, dev->name,
- sizeof(layer->name) - 1);
- layer->name[sizeof(layer->name) - 1] = 0;
+ strlcpy(layer->name, dev->name, sizeof(layer->name));
return 0;
}
case CAIFPROTO_RFM:
l->linktype = CFCTRL_SRV_RFM;
l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
- strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
- sizeof(l->u.rfm.volume)-1);
- l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
+ strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+ sizeof(l->u.rfm.volume));
break;
case CAIFPROTO_UTIL:
l->linktype = CFCTRL_SRV_UTIL;
l->endpoint = 0x00;
l->chtype = 0x00;
- strncpy(l->u.utility.name, s->sockaddr.u.util.service,
- sizeof(l->u.utility.name)-1);
- l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
+ strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
+ sizeof(l->u.utility.name));
caif_assert(sizeof(l->u.utility.name) > 10);
l->u.utility.paramlen = s->param.size;
if (l->u.utility.paramlen > sizeof(l->u.utility.params))
tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
cfpkt_add_body(pkt, &tmp16, 2);
memset(utility_name, 0, sizeof(utility_name));
- strncpy(utility_name, param->u.utility.name,
- UTILITY_NAME_LENGTH - 1);
+ strlcpy(utility_name, param->u.utility.name,
+ UTILITY_NAME_LENGTH);
cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
tmp8 = param->u.utility.paramlen;
cfpkt_add_body(pkt, &tmp8, 1);
int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- return dev_alloc_name_ns(net, dev, name);
+ BUG_ON(!net);
+
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ if (strchr(name, '%'))
+ return dev_alloc_name_ns(net, dev, name);
+ else if (__dev_get_by_name(net, name))
+ return -EEXIST;
+ else if (dev->name != name)
+ strlcpy(dev->name, name, IFNAMSIZ);
+
+ return 0;
}
EXPORT_SYMBOL(dev_get_valid_name);
return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
}
-static void
-warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
-{
- char name[sizeof(current->comm)];
-
- pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
- get_task_comm(name, current), details);
-}
-
/* Query device for its ethtool_cmd settings.
*
* Backward compatibility note: for compatibility with legacy ethtool,
&link_ksettings);
if (err < 0)
return err;
- if (!convert_link_ksettings_to_legacy_settings(&cmd,
- &link_ksettings))
- warn_incomplete_ethtool_legacy_settings_conversion(
- "link modes are only partially reported");
+ convert_link_ksettings_to_legacy_settings(&cmd,
+ &link_ksettings);
/* send a sensible cmd tag back to user */
cmd.cmd = ETHTOOL_GSET;
*/
goto out_err_free;
- /* We are guaranteed to never error here with cBPF to eBPF
- * transitions, since there's no issue with type compatibility
- * checks on program arrays.
- */
fp = bpf_prog_select_runtime(fp, &err);
+ if (err)
+ goto out_err_free;
kfree(old_prog);
return fp;
return false;
}
-static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+static struct net *get_target_net(struct sock *sk, int netnsid)
{
struct net *net;
- net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+ net = get_net_ns_by_id(sock_net(sk), netnsid);
if (!net)
return ERR_PTR(-EINVAL);
/* For now, the caller is required to have CAP_NET_ADMIN in
* the user namespace owning the target net ns.
*/
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
put_net(net);
return ERR_PTR(-EACCES);
}
ifla_policy, NULL) >= 0) {
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(skb->sk, netnsid);
if (IS_ERR(tgt_net)) {
tgt_net = net;
netnsid = -1;
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
if (IS_ERR(tgt_net))
return PTR_ERR(tgt_net);
}
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ NETLINK_SOCK_DIAG, AF_INET6);
break;
}
return 0;
.data = &bpf_jit_enable,
.maxlen = sizeof(int),
.mode = 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
.proc_handler = proc_dointvec
+#else
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &one,
+#endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
{
goto out;
/* hdrincl should be READ_ONCE(inet->hdrincl)
- * but READ_ONCE() doesn't work with bit fields
+ * but READ_ONCE() doesn't work with bit fields.
+ * Doing this indirectly yields the same result.
*/
hdrincl = inet->hdrincl;
+ hdrincl = READ_ONCE(hdrincl);
/*
* Check the flags.
*/
sr_phdr->segments[0] = **addr_p;
*addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
+ if (sr_ihdr->hdrlen > hops * 2) {
+ int tlvs_offset, tlvs_length;
+
+ tlvs_offset = (1 + hops * 2) << 3;
+ tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
+ memcpy((char *)sr_phdr + tlvs_offset,
+ (char *)sr_ihdr + tlvs_offset, tlvs_length);
+ }
+
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(sr_phdr)) {
struct net *net = NULL;
if (!(fn->fn_flags & RTN_RTINFO)) {
RCU_INIT_POINTER(fn->leaf, NULL);
rt6_release(leaf);
+ /* remove null_entry in the root node */
+ } else if (fn->fn_flags & RTN_TL_ROOT &&
+ rcu_access_pointer(fn->leaf) ==
+ net->ipv6.ip6_null_entry) {
+ RCU_INIT_POINTER(fn->leaf, NULL);
}
return fn;
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
- struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
- lockdep_is_held(&table->tb6_lock));
- if (pn != fn && pn_leaf == rt) {
- pn_leaf = NULL;
- RCU_INIT_POINTER(pn->leaf, NULL);
- atomic_dec(&rt->rt6i_ref);
- }
- if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
-#if RT6_DEBUG >= 2
- if (!pn_leaf) {
- WARN_ON(!pn_leaf);
- pn_leaf = info->nl_net->ipv6.ip6_null_entry;
+ if (pn != fn) {
+ struct rt6_info *pn_leaf =
+ rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (pn_leaf == rt) {
+ pn_leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
+ atomic_dec(&rt->rt6i_ref);
}
+ if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn_leaf = fib6_find_prefix(info->nl_net, table,
+ pn);
+#if RT6_DEBUG >= 2
+ if (!pn_leaf) {
+ WARN_ON(!pn_leaf);
+ pn_leaf =
+ info->nl_net->ipv6.ip6_null_entry;
+ }
#endif
- atomic_inc(&pn_leaf->rt6i_ref);
- rcu_assign_pointer(pn->leaf, pn_leaf);
+ atomic_inc(&pn_leaf->rt6i_ref);
+ rcu_assign_pointer(pn->leaf, pn_leaf);
+ }
}
#endif
goto failure;
return err;
failure:
- /* fn->leaf could be NULL if fn is an intermediate node and we
- * failed to add the new route to it in both subtree creation
- * failure and fib6_add_rt2node() failure case.
- * In both cases, fib6_repair_tree() should be called to fix
- * fn->leaf.
+ /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
+ * 1. fn is an intermediate node and we failed to add the new
+ * route to it in both subtree creation failure and fib6_add_rt2node()
+ * failure case.
+ * 2. fn is the root node in the table and we fail to add the first
+ * default route to it.
*/
- if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+ if (fn &&
+ (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
+ (fn->fn_flags & RTN_TL_ROOT &&
+ !rcu_access_pointer(fn->leaf))))
fib6_repair_tree(info->nl_net, table, fn);
/* Always release dst as dst->__refcnt is guaranteed
* to be taken before entering this function
struct fib6_walker *w;
int iter = 0;
+ /* Set fn->leaf to null_entry for root node. */
+ if (fn->fn_flags & RTN_TL_ROOT) {
+ rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
+ return fn;
+ }
+
for (;;) {
struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
lockdep_is_held(&table->tb6_lock));
}
read_unlock(&net->ipv6.fib6_walker_lock);
- /* If it was last route, expunge its radix tree node */
+ /* If it was last route, call fib6_repair_tree() to:
+ * 1. For root node, put back null_entry as how the table was created.
+ * 2. For other nodes, expunge its radix tree node.
+ */
if (!rcu_access_pointer(fn->leaf)) {
- fn->fn_flags &= ~RTN_RTINFO;
- net->ipv6.rt6_stats->fib_route_nodes--;
+ if (!(fn->fn_flags & RTN_TL_ROOT)) {
+ fn->fn_flags &= ~RTN_RTINFO;
+ net->ipv6.rt6_stats->fib_route_nodes--;
+ }
fn = fib6_repair_tree(net, table, fn);
}
cork.base.opt = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
- if (err)
+ if (err) {
+ ip6_cork_release(&cork, &v6_cork);
return ERR_PTR(err);
-
+ }
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
}
- } else if (!(t->parms.flags &
- (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
- /* enable the cache only only if the routing decision does
- * not depend on the current inner header value
+ } else if (t->parms.proto != 0 && !(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS |
+ IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only if neither the outer protocol nor the
+ * routing decision depends on the current inner header value
*/
use_cache = true;
}
{
struct ip6_tnl *tnl = netdev_priv(dev);
- if (tnl->parms.proto == IPPROTO_IPIP) {
- if (new_mtu < ETH_MIN_MTU)
+ if (tnl->parms.proto == IPPROTO_IPV6) {
+ if (new_mtu < IPV6_MIN_MTU)
return -EINVAL;
} else {
- if (new_mtu < IPV6_MIN_MTU)
+ if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
}
if (new_mtu > 0xFFF8 - dev->hard_header_len)
}
return true;
case NL80211_IFTYPE_MESH_POINT:
+ if (ether_addr_equal(sdata->vif.addr, hdr->addr2))
+ return false;
if (multicast)
return true;
return ether_addr_equal(sdata->vif.addr, hdr->addr1);
continue;
list_for_each_entry_rcu(chain, &table->chains, list) {
- if (ctx && ctx->chain[0] &&
+ if (ctx && ctx->chain &&
strcmp(ctx->chain, chain->name) != 0)
continue;
{
struct nft_obj_filter *filter = cb->data;
- kfree(filter->table);
- kfree(filter);
+ if (filter) {
+ kfree(filter->table);
+ kfree(filter);
+ }
return 0;
}
static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
{
- mm_segment_t oldfs = get_fs();
- int retval, fd;
-
if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
return -EINVAL;
- set_fs(KERNEL_DS);
- fd = bpf_obj_get_user(path, 0);
- set_fs(oldfs);
- if (fd < 0)
- return fd;
-
- retval = __bpf_mt_check_fd(fd, ret);
- sys_close(fd);
- return retval;
+ *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+ return PTR_ERR_OR_ZERO(*ret);
}
static int bpf_mt_check(const struct xt_mtchk_param *par)
local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
+ if (args->nr_local == 0)
+ return -EINVAL;
+
/* figure out the number of pages in the vector */
for (i = 0; i < args->nr_local; i++) {
if (copy_from_user(&vec, &local_vec[i],
err:
if (page)
put_page(page);
+ rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
return ret;
if (action == TC_ACT_SHOT)
this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_t *tm = &m->tcf_tm;
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
return;
}
- if (t->param_flags & SPP_PMTUD_ENABLE) {
- /* Update transports view of the MTU */
- sctp_transport_update_pmtu(t, pmtu);
-
- /* Update association pmtu. */
- sctp_assoc_sync_pmtu(asoc);
- }
+ if (!(t->param_flags & SPP_PMTUD_ENABLE))
+ /* We can't allow retransmitting in such case, as the
+ * retransmission would be sized just as before, and thus we
+ * would get another icmp, and retransmit again.
+ */
+ return;
- /* Retransmit with the new pmtu setting.
- * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
- * Needed will never be sent, but if a message was sent before
- * PMTU discovery was disabled that was larger than the PMTU, it
- * would not be fragmented, so it must be re-transmitted fragmented.
+ /* Update transports view of the MTU. Return if no update was needed.
+ * If an update wasn't needed/possible, it also doesn't make sense to
+ * try to retransmit now.
*/
+ if (!sctp_transport_update_pmtu(t, pmtu))
+ return;
+
+ /* Update association pmtu. */
+ sctp_assoc_sync_pmtu(asoc);
+
+ /* Retransmit with the new pmtu setting. */
sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
}
if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
event = sctp_ulpevent_make_sender_dry_event(asoc,
- GFP_ATOMIC);
+ GFP_USER | __GFP_NOWARN);
if (!event)
return -ENOMEM;
if (optlen < sizeof(struct sctp_hmacalgo))
return -EINVAL;
+ optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
+ SCTP_AUTH_NUM_HMACS * sizeof(u16));
hmacs = memdup_user(optval, optlen);
if (IS_ERR(hmacs))
if (optlen <= sizeof(struct sctp_authkey))
return -EINVAL;
+ /* authkey->sca_keylength is u16, so optlen can't be bigger than
+ * this.
+ */
+ optlen = min_t(unsigned int, optlen, USHRT_MAX +
+ sizeof(struct sctp_authkey));
authkey = memdup_user(optval, optlen);
if (IS_ERR(authkey))
if (optlen < sizeof(*params))
return -EINVAL;
+ /* srs_number_streams is u16, so optlen can't be bigger than this. */
+ optlen = min_t(unsigned int, optlen, USHRT_MAX +
+ sizeof(__u16) * sizeof(*params));
params = memdup_user(optval, optlen);
if (IS_ERR(params))
len = sizeof(int);
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int)))
+ if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
return -EFAULT;
return 0;
}
err = -EFAULT;
goto out;
}
+ /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too,
+ * but we can't change it anymore.
+ */
if (put_user(bytes_copied, optlen))
err = -EFAULT;
out:
params.assoc_id = 0;
} else if (len >= sizeof(struct sctp_assoc_value)) {
len = sizeof(struct sctp_assoc_value);
- if (copy_from_user(¶ms, optval, sizeof(params)))
+ if (copy_from_user(¶ms, optval, len))
return -EFAULT;
} else
return -EINVAL;
if (len < sizeof(struct sctp_authkeyid))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid)))
+
+ len = sizeof(struct sctp_authkeyid);
+ if (copy_from_user(&val, optval, len))
return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id);
else
val.scact_keynumber = ep->active_key_id;
- len = sizeof(struct sctp_authkeyid);
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, &val, len))
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
sctp_stream_outq_migrate(stream, NULL, outcnt);
sched->sched_all(stream);
- i = sctp_stream_alloc_out(stream, outcnt, gfp);
- if (i)
- return i;
+ ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (ret)
+ goto out;
stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
if (!incnt)
goto out;
- i = sctp_stream_alloc_in(stream, incnt, gfp);
- if (i) {
- ret = -ENOMEM;
- goto free;
+ ret = sctp_stream_alloc_in(stream, incnt, gfp);
+ if (ret) {
+ sched->free(stream);
+ kfree(stream->out);
+ stream->out = NULL;
+ stream->outcnt = 0;
+ goto out;
}
stream->incnt = incnt;
- goto out;
-free:
- sched->free(stream);
- kfree(stream->out);
- stream->out = NULL;
out:
return ret;
}
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
{
struct dst_entry *dst = sctp_transport_dst_check(t);
+ bool change = true;
if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
- pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
- __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
- /* Use default minimum segment size and disable
- * pmtu discovery on this transport.
- */
- t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
- } else {
- t->pathmtu = pmtu;
+ pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
+ __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
+ /* Use default minimum segment instead */
+ pmtu = SCTP_DEFAULT_MINSEGMENT;
}
+ pmtu = SCTP_TRUNC4(pmtu);
if (dst) {
dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
dst = sctp_transport_dst_check(t);
}
- if (!dst)
+ if (!dst) {
t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
+ dst = t->dst;
+ }
+
+ if (dst) {
+ /* Re-fetch, as under layers may have a higher minimum size */
+ pmtu = SCTP_TRUNC4(dst_mtu(dst));
+ change = t->pathmtu != pmtu;
+ }
+ t->pathmtu = pmtu;
+
+ return change;
}
/* Caches the dst entry and source address for a transport's destination
{
struct file *newfile;
int fd = get_unused_fd_flags(flags);
- if (unlikely(fd < 0))
+ if (unlikely(fd < 0)) {
+ sock_release(sock);
return fd;
+ }
newfile = sock_alloc_file(sock, flags, NULL);
if (likely(!IS_ERR(newfile))) {
core_initcall(sock_init); /* early initcall */
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+ bpf_jit_enable = 1;
+#endif
+ return 0;
+}
+pure_initcall(jit_init);
+
#ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq)
{
static void tipc_group_decr_active(struct tipc_group *grp,
struct tipc_member *m)
{
- if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+ if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
+ m->state == MBR_REMITTED)
grp->active_cnt--;
}
int max_active = grp->max_active;
int reclaim_limit = max_active * 3 / 4;
int active_cnt = grp->active_cnt;
- struct tipc_member *m, *rm;
+ struct tipc_member *m, *rm, *pm;
m = tipc_group_find_member(grp, node, port);
if (!m)
pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
}
+ grp->active_cnt--;
+ list_del_init(&m->list);
+ if (list_empty(&grp->pending))
+ return;
+
+ /* Set oldest pending member to active and advertise */
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
break;
case MBR_RECLAIMING:
case MBR_DISCOVERED:
if (!m || m->state != MBR_RECLAIMING)
return;
- list_del_init(&m->list);
- grp->active_cnt--;
remitted = msg_grp_remitted(hdr);
/* Messages preceding the REMIT still in receive queue */
if (m->advertised > remitted) {
m->state = MBR_REMITTED;
in_flight = m->advertised - remitted;
+ m->advertised = ADV_IDLE + in_flight;
+ return;
}
/* All messages preceding the REMIT have been read */
if (m->advertised <= remitted) {
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
m->advertised = ADV_IDLE + in_flight;
+ grp->active_cnt--;
+ list_del_init(&m->list);
/* Set oldest pending member to active and advertise */
if (list_empty(&grp->pending))
break;
case NL80211_NAN_FUNC_FOLLOW_UP:
if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
- !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] ||
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) {
err = -EINVAL;
goto out;
}
-*.hash.c
*.lex.c
*.tab.c
*.tab.h
switch (type) {
case S_BOOLEAN:
case S_TRISTATE:
- return k_string;
+ val->s = !strcmp(str, "n") ? 0 :
+ !strcmp(str, "m") ? 1 :
+ !strcmp(str, "y") ? 2 : -1;
+ return k_signed;
case S_INT:
val->s = strtoll(str, &tail, 10);
kind = k_signed;
config PAGE_TABLE_ISOLATION
bool "Remove the kernel mapping in user mode"
+ default y
depends on X86_64 && !UML
help
This feature reduces the number of hardware side channels by
ensuring that the majority of kernel addresses are not mapped
into userspace.
- See Documentation/x86/pagetable-isolation.txt for more details.
+ See Documentation/x86/pti.txt for more details.
config SECURITY_INFINIBAND
bool "Infiniband Security Hooks"
continue;
if (profile->xmatch) {
- if (profile->xmatch_len == len) {
- conflict = true;
- continue;
- } else if (profile->xmatch_len > len) {
+ if (profile->xmatch_len >= len) {
unsigned int state;
u32 perm;
perm = dfa_user_allow(profile->xmatch, state);
/* any accepting state means a valid match. */
if (perm & MAY_EXEC) {
+ if (profile->xmatch_len == len) {
+ conflict = true;
+ continue;
+ }
candidate = profile;
len = profile->xmatch_len;
conflict = false;
#define xcheck_labels_profiles(L1, L2, FN, args...) \
xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args)
+#define xcheck_labels(L1, L2, P, FN1, FN2) \
+ xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2)))
+
void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
FLAGS_NONE, GFP_ATOMIC);
}
+/* assumes check for PROFILE_MEDIATES is already done */
/* TODO: conditionals */
static int profile_ptrace_perm(struct aa_profile *profile,
- struct aa_profile *peer, u32 request,
- struct common_audit_data *sa)
+ struct aa_label *peer, u32 request,
+ struct common_audit_data *sa)
{
struct aa_perms perms = { };
- /* need because of peer in cross check */
- if (profile_unconfined(profile) ||
- !PROFILE_MEDIATES(profile, AA_CLASS_PTRACE))
- return 0;
-
- aad(sa)->peer = &peer->label;
- aa_profile_match_label(profile, &peer->label, AA_CLASS_PTRACE, request,
+ aad(sa)->peer = peer;
+ aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request,
&perms);
aa_apply_modes_to_perms(profile, &perms);
return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb);
}
-static int cross_ptrace_perm(struct aa_profile *tracer,
- struct aa_profile *tracee, u32 request,
- struct common_audit_data *sa)
+static int profile_tracee_perm(struct aa_profile *tracee,
+ struct aa_label *tracer, u32 request,
+ struct common_audit_data *sa)
{
+ if (profile_unconfined(tracee) || unconfined(tracer) ||
+ !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE))
+ return 0;
+
+ return profile_ptrace_perm(tracee, tracer, request, sa);
+}
+
+static int profile_tracer_perm(struct aa_profile *tracer,
+ struct aa_label *tracee, u32 request,
+ struct common_audit_data *sa)
+{
+ if (profile_unconfined(tracer))
+ return 0;
+
if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE))
- return xcheck(profile_ptrace_perm(tracer, tracee, request, sa),
- profile_ptrace_perm(tracee, tracer,
- request << PTRACE_PERM_SHIFT,
- sa));
- /* policy uses the old style capability check for ptrace */
- if (profile_unconfined(tracer) || tracer == tracee)
+ return profile_ptrace_perm(tracer, tracee, request, sa);
+
+ /* profile uses the old style capability check for ptrace */
+ if (&tracer->label == tracee)
return 0;
aad(sa)->label = &tracer->label;
- aad(sa)->peer = &tracee->label;
+ aad(sa)->peer = tracee;
aad(sa)->request = 0;
aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1);
int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
u32 request)
{
+ struct aa_profile *profile;
+ u32 xrequest = request << PTRACE_PERM_SHIFT;
DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE);
- return xcheck_labels_profiles(tracer, tracee, cross_ptrace_perm,
- request, &sa);
+ return xcheck_labels(tracer, tracee, profile,
+ profile_tracer_perm(profile, tracee, request, &sa),
+ profile_tracee_perm(profile, tracer, xrequest, &sa));
}
AA_BUG(!mntpath);
AA_BUG(!buffer);
+ if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+ return 0;
+
error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer,
&mntpnt, &info, profile->disconnected);
if (error)
AA_BUG(!profile);
AA_BUG(devpath && !devbuffer);
+ if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+ return 0;
+
if (devpath) {
error = aa_path_name(devpath, path_flags(profile, devpath),
devbuffer, &devname, &info,
AA_BUG(!profile);
AA_BUG(!path);
+ if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+ return 0;
+
error = aa_path_name(path, path_flags(profile, path), buffer, &name,
&info, profile->disconnected);
if (error)
AA_BUG(!new_path);
AA_BUG(!old_path);
- if (profile_unconfined(profile))
+ if (profile_unconfined(profile) ||
+ !PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
return aa_get_newest_label(&profile->label);
error = aa_path_name(old_path, path_flags(profile, old_path),
return m & ~VFS_CAP_FLAGS_EFFECTIVE;
}
-static bool is_v2header(size_t size, __le32 magic)
+static bool is_v2header(size_t size, const struct vfs_cap_data *cap)
{
- __u32 m = le32_to_cpu(magic);
if (size != XATTR_CAPS_SZ_2)
return false;
- return sansflags(m) == VFS_CAP_REVISION_2;
+ return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
}
-static bool is_v3header(size_t size, __le32 magic)
+static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
{
- __u32 m = le32_to_cpu(magic);
-
if (size != XATTR_CAPS_SZ_3)
return false;
- return sansflags(m) == VFS_CAP_REVISION_3;
+ return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
}
/*
fs_ns = inode->i_sb->s_user_ns;
cap = (struct vfs_cap_data *) tmpbuf;
- if (is_v2header((size_t) ret, cap->magic_etc)) {
+ if (is_v2header((size_t) ret, cap)) {
/* If this is sizeof(vfs_cap_data) then we're ok with the
* on-disk value, so return that. */
if (alloc)
else
kfree(tmpbuf);
return ret;
- } else if (!is_v3header((size_t) ret, cap->magic_etc)) {
+ } else if (!is_v3header((size_t) ret, cap)) {
kfree(tmpbuf);
return -EINVAL;
}
return make_kuid(task_ns, rootid);
}
-static bool validheader(size_t size, __le32 magic)
+static bool validheader(size_t size, const struct vfs_cap_data *cap)
{
- return is_v2header(size, magic) || is_v3header(size, magic);
+ return is_v2header(size, cap) || is_v3header(size, cap);
}
/*
if (!*ivalue)
return -EINVAL;
- if (!validheader(size, cap->magic_etc))
+ if (!validheader(size, cap))
return -EINVAL;
if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
return -EPERM;
v = snd_pcm_hw_param_last(pcm, params, var, dir);
else
v = snd_pcm_hw_param_first(pcm, params, var, dir);
- snd_BUG_ON(v < 0);
return v;
}
if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
return tmp;
- mutex_lock(&runtime->oss.params_lock);
while (bytes > 0) {
+ if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
tmp = bytes;
if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
xfer += tmp;
if ((substream->f_flags & O_NONBLOCK) != 0 &&
tmp != runtime->oss.period_bytes)
- break;
+ tmp = -EAGAIN;
}
- }
- mutex_unlock(&runtime->oss.params_lock);
- return xfer;
-
err:
- mutex_unlock(&runtime->oss.params_lock);
+ mutex_unlock(&runtime->oss.params_lock);
+ if (tmp < 0)
+ break;
+ if (signal_pending(current)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
+ tmp = 0;
+ }
return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
}
if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
return tmp;
- mutex_lock(&runtime->oss.params_lock);
while (bytes > 0) {
+ if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
if (runtime->oss.buffer_used == 0) {
tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
bytes -= tmp;
xfer += tmp;
}
- }
- mutex_unlock(&runtime->oss.params_lock);
- return xfer;
-
err:
- mutex_unlock(&runtime->oss.params_lock);
+ mutex_unlock(&runtime->oss.params_lock);
+ if (tmp < 0)
+ break;
+ if (signal_pending(current)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
+ tmp = 0;
+ }
return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
}
snd_pcm_sframes_t frames = size;
plugin = snd_pcm_plug_first(plug);
- while (plugin && frames > 0) {
+ while (plugin) {
+ if (frames <= 0)
+ return frames;
if ((next = plugin->next) != NULL) {
snd_pcm_sframes_t frames1 = frames;
- if (plugin->dst_frames)
+ if (plugin->dst_frames) {
frames1 = plugin->dst_frames(plugin, frames);
+ if (frames1 <= 0)
+ return frames1;
+ }
if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
return err;
}
if (err != frames1) {
frames = err;
- if (plugin->src_frames)
+ if (plugin->src_frames) {
frames = plugin->src_frames(plugin, frames1);
+ if (frames <= 0)
+ return frames;
+ }
}
} else
dst_channels = NULL;
return changed;
if (params->rmask) {
int err = snd_pcm_hw_refine(pcm, params);
- if (snd_BUG_ON(err < 0))
+ if (err < 0)
return err;
}
return snd_pcm_hw_param_value(params, var, dir);
return changed;
if (params->rmask) {
int err = snd_pcm_hw_refine(pcm, params);
- if (snd_BUG_ON(err < 0))
+ if (err < 0)
return err;
}
return snd_pcm_hw_param_value(params, var, dir);
return ret < 0 ? ret : frames;
}
-/* decrease the appl_ptr; returns the processed frames or a negative error */
+/* decrease the appl_ptr; returns the processed frames or zero for error */
static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
snd_pcm_uframes_t frames,
snd_pcm_sframes_t avail)
if (appl_ptr < 0)
appl_ptr += runtime->boundary;
ret = pcm_lib_apply_appl_ptr(substream, appl_ptr);
- return ret < 0 ? ret : frames;
+ /* NOTE: we return zero for errors because PulseAudio gets depressed
+ * upon receiving an error from rewind ioctl and stops processing
+ * any longer. Returning zero means that no rewind is done, so
+ * it's not absolutely wrong to answer like that.
+ */
+ return ret < 0 ? 0 : frames;
}
static snd_pcm_sframes_t snd_pcm_playback_rewind(struct snd_pcm_substream *substream,
#include <sound/core.h>
#include <sound/control.h>
#include <sound/pcm.h>
+#include <sound/pcm_params.h>
#include <sound/info.h>
#include <sound/initval.h>
return 0;
}
-static void params_change_substream(struct loopback_pcm *dpcm,
- struct snd_pcm_runtime *runtime)
-{
- struct snd_pcm_runtime *dst_runtime;
-
- if (dpcm == NULL || dpcm->substream == NULL)
- return;
- dst_runtime = dpcm->substream->runtime;
- if (dst_runtime == NULL)
- return;
- dst_runtime->hw = dpcm->cable->hw;
-}
-
static void params_change(struct snd_pcm_substream *substream)
{
struct snd_pcm_runtime *runtime = substream->runtime;
cable->hw.rate_max = runtime->rate;
cable->hw.channels_min = runtime->channels;
cable->hw.channels_max = runtime->channels;
- params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
- runtime);
- params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
- runtime);
}
static int loopback_prepare(struct snd_pcm_substream *substream)
static int rule_format(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
+ struct loopback_pcm *dpcm = rule->private;
+ struct loopback_cable *cable = dpcm->cable;
+ struct snd_mask m;
- struct snd_pcm_hardware *hw = rule->private;
- struct snd_mask *maskp = hw_param_mask(params, rule->var);
-
- maskp->bits[0] &= (u_int32_t)hw->formats;
- maskp->bits[1] &= (u_int32_t)(hw->formats >> 32);
- memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
- if (! maskp->bits[0] && ! maskp->bits[1])
- return -EINVAL;
- return 0;
+ snd_mask_none(&m);
+ mutex_lock(&dpcm->loopback->cable_lock);
+ m.bits[0] = (u_int32_t)cable->hw.formats;
+ m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
+ mutex_unlock(&dpcm->loopback->cable_lock);
+ return snd_mask_refine(hw_param_mask(params, rule->var), &m);
}
static int rule_rate(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
- struct snd_pcm_hardware *hw = rule->private;
+ struct loopback_pcm *dpcm = rule->private;
+ struct loopback_cable *cable = dpcm->cable;
struct snd_interval t;
- t.min = hw->rate_min;
- t.max = hw->rate_max;
+ mutex_lock(&dpcm->loopback->cable_lock);
+ t.min = cable->hw.rate_min;
+ t.max = cable->hw.rate_max;
+ mutex_unlock(&dpcm->loopback->cable_lock);
t.openmin = t.openmax = 0;
t.integer = 0;
return snd_interval_refine(hw_param_interval(params, rule->var), &t);
static int rule_channels(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
- struct snd_pcm_hardware *hw = rule->private;
+ struct loopback_pcm *dpcm = rule->private;
+ struct loopback_cable *cable = dpcm->cable;
struct snd_interval t;
- t.min = hw->channels_min;
- t.max = hw->channels_max;
+ mutex_lock(&dpcm->loopback->cable_lock);
+ t.min = cable->hw.channels_min;
+ t.max = cable->hw.channels_max;
+ mutex_unlock(&dpcm->loopback->cable_lock);
t.openmin = t.openmax = 0;
t.integer = 0;
return snd_interval_refine(hw_param_interval(params, rule->var), &t);
}
+static void free_cable(struct snd_pcm_substream *substream)
+{
+ struct loopback *loopback = substream->private_data;
+ int dev = get_cable_index(substream);
+ struct loopback_cable *cable;
+
+ cable = loopback->cables[substream->number][dev];
+ if (!cable)
+ return;
+ if (cable->streams[!substream->stream]) {
+ /* other stream is still alive */
+ cable->streams[substream->stream] = NULL;
+ } else {
+ /* free the cable */
+ loopback->cables[substream->number][dev] = NULL;
+ kfree(cable);
+ }
+}
+
static int loopback_open(struct snd_pcm_substream *substream)
{
struct snd_pcm_runtime *runtime = substream->runtime;
struct loopback *loopback = substream->private_data;
struct loopback_pcm *dpcm;
- struct loopback_cable *cable;
+ struct loopback_cable *cable = NULL;
int err = 0;
int dev = get_cable_index(substream);
if (!cable) {
cable = kzalloc(sizeof(*cable), GFP_KERNEL);
if (!cable) {
- kfree(dpcm);
err = -ENOMEM;
goto unlock;
}
/* are cached -> they do not reflect the actual state */
err = snd_pcm_hw_rule_add(runtime, 0,
SNDRV_PCM_HW_PARAM_FORMAT,
- rule_format, &runtime->hw,
+ rule_format, dpcm,
SNDRV_PCM_HW_PARAM_FORMAT, -1);
if (err < 0)
goto unlock;
err = snd_pcm_hw_rule_add(runtime, 0,
SNDRV_PCM_HW_PARAM_RATE,
- rule_rate, &runtime->hw,
+ rule_rate, dpcm,
SNDRV_PCM_HW_PARAM_RATE, -1);
if (err < 0)
goto unlock;
err = snd_pcm_hw_rule_add(runtime, 0,
SNDRV_PCM_HW_PARAM_CHANNELS,
- rule_channels, &runtime->hw,
+ rule_channels, dpcm,
SNDRV_PCM_HW_PARAM_CHANNELS, -1);
if (err < 0)
goto unlock;
else
runtime->hw = cable->hw;
unlock:
+ if (err < 0) {
+ free_cable(substream);
+ kfree(dpcm);
+ }
mutex_unlock(&loopback->cable_lock);
return err;
}
{
struct loopback *loopback = substream->private_data;
struct loopback_pcm *dpcm = substream->runtime->private_data;
- struct loopback_cable *cable;
- int dev = get_cable_index(substream);
loopback_timer_stop(dpcm);
mutex_lock(&loopback->cable_lock);
- cable = loopback->cables[substream->number][dev];
- if (cable->streams[!substream->stream]) {
- /* other stream is still alive */
- cable->streams[substream->stream] = NULL;
- } else {
- /* free the cable */
- loopback->cables[substream->number][dev] = NULL;
- kfree(cable);
- }
+ free_cable(substream);
mutex_unlock(&loopback->cable_lock);
return 0;
}
@$(MAKE) $(build)=objtool
$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
- @./sync-check.sh
+ @$(CONFIG_SHELL) ./sync-check.sh
$(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
}
}
+/*
+ * FIXME: For now, just ignore any alternatives which add retpolines. This is
+ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
+ * But it at least allows objtool to understand the control flow *around* the
+ * retpoline.
+ */
+static int add_nospec_ignores(struct objtool_file *file)
+{
+ struct section *sec;
+ struct rela *rela;
+ struct instruction *insn;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.nospec");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!insn) {
+ WARN("bad .discard.nospec entry");
+ return -1;
+ }
+
+ insn->ignore_alts = true;
+ }
+
+ return 0;
+}
+
/*
* Find the destination instructions for all jumps.
*/
} else if (rela->sym->sec->idx) {
dest_sec = rela->sym->sec;
dest_off = rela->sym->sym.st_value + rela->addend + 4;
+ } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
+ /*
+ * Retpoline jumps are really dynamic jumps in
+ * disguise, so convert them accordingly.
+ */
+ insn->type = INSN_JUMP_DYNAMIC;
+ continue;
} else {
/* sibling call */
insn->jump_dest = 0;
dest_off = insn->offset + insn->len + insn->immediate;
insn->call_dest = find_symbol_by_offset(insn->sec,
dest_off);
+ /*
+ * FIXME: Thanks to retpolines, it's now considered
+ * normal for a function to call within itself. So
+ * disable this warning for now.
+ */
+#if 0
if (!insn->call_dest) {
WARN_FUNC("can't find call dest symbol at offset 0x%lx",
insn->sec, insn->offset, dest_off);
return -1;
}
+#endif
} else if (rela->sym->type == STT_SECTION) {
insn->call_dest = find_symbol_by_offset(rela->sym->sec,
rela->addend+4);
return ret;
list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
- alt = malloc(sizeof(*alt));
- if (!alt) {
- WARN("malloc failed");
- ret = -1;
- goto out;
- }
orig_insn = find_insn(file, special_alt->orig_sec,
special_alt->orig_off);
goto out;
}
+ /* Ignore retpoline alternatives. */
+ if (orig_insn->ignore_alts)
+ continue;
+
new_insn = NULL;
if (!special_alt->group || special_alt->new_len) {
new_insn = find_insn(file, special_alt->new_sec,
goto out;
}
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ ret = -1;
+ goto out;
+ }
+
alt->insn = new_insn;
list_add_tail(&alt->list, &orig_insn->alts);
add_ignores(file);
+ ret = add_nospec_ignores(file);
+ if (ret)
+ return ret;
+
ret = add_jump_destinations(file);
if (ret)
return ret;
unsigned int len;
unsigned char type;
unsigned long immediate;
- bool alt_group, visited, dead_end, ignore, hint, save, restore;
+ bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
struct symbol *call_dest;
struct instruction *jump_dest;
struct list_head alts;
.result = REJECT,
.matches = {
{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
- /* ptr & 0x40 == either 0 or 0x40 */
- {5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
- /* ptr << 2 == unknown, (4n) */
- {7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
- /* (4n) + 14 == (4n+2). We blow our bounds, because
- * the add could overflow.
- */
- {8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
- /* Checked s>=0 */
- {10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
- /* packet pointer + nonnegative (4n+2) */
- {12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
- {14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
- /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
- * We checked the bounds, but it might have been able
- * to overflow if the packet pointer started in the
- * upper half of the address space.
- * So we did not get a 'range' on R6, and the access
- * attempt will fail.
- */
- {16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ /* R5 bitwise operator &= on pointer prohibited */
}
},
{
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
- protection_keys test_vdso
+ protection_keys test_vdso test_vsyscall
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+# define SYS_getcpu 309
+# else
+# define SYS_getcpu 318
+# endif
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+ void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[WARN]\tfailed to find vDSO\n");
+ return;
+ }
+
+ vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gtod)
+ printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+ vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_gettime)
+ printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+ vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+ if (!vdso_time)
+ printf("[WARN]\tfailed to find time in vDSO\n");
+
+ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+ if (!vdso_getcpu) {
+ /* getcpu() was never wired up in the 32-bit vDSO. */
+ printf("[%s]\tfailed to find getcpu in vDSO\n",
+ sizeof(long) == 8 ? "WARN" : "NOTE");
+ }
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+ int nerrs = 0;
+ FILE *maps;
+ char line[128];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+ should_read_vsyscall = true;
+ return 0;
+ }
+
+ while (fgets(line, sizeof(line), maps)) {
+ char r, x;
+ void *start, *end;
+ char name[128];
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ printf("\tvsyscall map: %s", line);
+
+ if (start != (void *)0xffffffffff600000 ||
+ end != (void *)0xffffffffff601000) {
+ printf("[FAIL]\taddress range is nonsense\n");
+ nerrs++;
+ }
+
+ printf("\tvsyscall permissions are %c-%c\n", r, x);
+ should_read_vsyscall = (r == 'r');
+ if (x != 'x') {
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ found = true;
+ break;
+ }
+
+ fclose(maps);
+
+ if (!found) {
+ printf("\tno vsyscall map in /proc/self/maps\n");
+ should_read_vsyscall = false;
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ return nerrs;
+#else
+ return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+ return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+ void* cache)
+{
+ return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+ return (double)(a->tv_sec - b->tv_sec) +
+ (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+ const struct timeval *tv_sys2,
+ const struct timezone *tz_sys,
+ const char *which,
+ const struct timeval *tv_other,
+ const struct timezone *tz_other)
+{
+ int nerrs = 0;
+ double d1, d2;
+
+ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+ printf("[FAIL] %s tz mismatch\n", which);
+ nerrs++;
+ }
+
+ d1 = tv_diff(tv_other, tv_sys1);
+ d2 = tv_diff(tv_sys2, tv_other);
+ printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+ if (d1 < 0 || d2 < 0) {
+ printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+ }
+
+ return nerrs;
+}
+
+static int test_gtod(void)
+{
+ struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+ struct timezone tz_sys, tz_vdso, tz_vsys;
+ long ret_vdso = -1;
+ long ret_vsys = -1;
+ int nerrs = 0;
+
+ printf("[RUN]\ttest gettimeofday()\n");
+
+ if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+ if (vdso_gtod)
+ ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+ if (vgtod)
+ ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+ if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+
+ if (vdso_gtod) {
+ if (ret_vdso == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+ } else {
+ printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+ nerrs++;
+ }
+ }
+
+ if (vgtod) {
+ if (ret_vsys == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+ } else {
+ printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+ nerrs++;
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_time(void) {
+ int nerrs = 0;
+
+ printf("[RUN]\ttest time()\n");
+ long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+ long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+ t_sys1 = sys_time(&t2_sys1);
+ if (vdso_time)
+ t_vdso = vdso_time(&t2_vdso);
+ if (vtime)
+ t_vsys = vtime(&t2_vsys);
+ t_sys2 = sys_time(&t2_sys2);
+ if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+ printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+ nerrs++;
+ return nerrs;
+ }
+
+ if (vdso_time) {
+ if (t_vdso < 0 || t_vdso != t2_vdso) {
+ printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+ nerrs++;
+ } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+ printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO time() is okay\n");
+ }
+ }
+
+ if (vtime) {
+ if (t_vsys < 0 || t_vsys != t2_vsys) {
+ printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+ nerrs++;
+ } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+ printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall time() is okay\n");
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+ int nerrs = 0;
+ long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+ printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+ return nerrs;
+ }
+
+ unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+ unsigned node = 0;
+ bool have_node = false;
+ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+ if (vdso_getcpu)
+ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+ if (vgetcpu)
+ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+ if (ret_sys == 0) {
+ if (cpu_sys != cpu) {
+ printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+ nerrs++;
+ }
+
+ have_node = true;
+ node = node_sys;
+ }
+
+ if (vdso_getcpu) {
+ if (ret_vdso) {
+ printf("[FAIL]\tvDSO getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vdso;
+ }
+
+ if (cpu_vdso != cpu) {
+ printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct CPU\n");
+ }
+
+ if (node_vdso != node) {
+ printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct node\n");
+ }
+ }
+ }
+
+ if (vgetcpu) {
+ if (ret_vsys) {
+ printf("[FAIL]\tvsyscall getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vsys;
+ }
+
+ if (cpu_vsys != cpu) {
+ printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct CPU\n");
+ }
+
+ if (node_vsys != node) {
+ printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct node\n");
+ }
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+ printf("[RUN]\tChecking read access to the vsyscall page\n");
+ bool can_read;
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ *(volatile int *)0xffffffffff600000;
+ can_read = true;
+ } else {
+ can_read = false;
+ }
+
+ if (can_read && !should_read_vsyscall) {
+ printf("[FAIL]\tWe have read access, but we shouldn't\n");
+ return 1;
+ } else if (!can_read && should_read_vsyscall) {
+ printf("[FAIL]\tWe don't have read access, but we should\n");
+ return 1;
+ } else {
+ printf("[OK]\tgot expected result\n");
+ }
+#endif
+
+ return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+ if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+ num_vsyscall_traps++;
+}
+
+static int test_native_vsyscall(void)
+{
+ time_t tmp;
+ bool is_native;
+
+ if (!vtime)
+ return 0;
+
+ printf("[RUN]\tchecking for native vsyscall\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ vtime(&tmp);
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ /*
+ * If vsyscalls are emulated, we expect a single trap in the
+ * vsyscall page -- the call instruction will trap with RIP
+ * pointing to the entry point before emulation takes over.
+ * In native mode, we expect two traps, since whatever code
+ * the vsyscall page contains will be more than just a ret
+ * instruction.
+ */
+ is_native = (num_vsyscall_traps > 1);
+
+ printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+ (is_native ? "native" : "emulated"),
+ (int)num_vsyscall_traps);
+
+ return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ int nerrs = 0;
+
+ init_vdso();
+ nerrs += init_vsys();
+
+ nerrs += test_gtod();
+ nerrs += test_time();
+ nerrs += test_getcpu(0);
+ nerrs += test_getcpu(1);
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+ nerrs += test_native_vsyscall();
+#endif
+
+ return nerrs ? 1 : 0;
+}