Merge branch 'for-4.18/mcsafe' into libnvdimm-for-next

author Dan Williams <dan.j.williams@intel.com>

Fri, 8 Jun 2018 22:16:44 +0000 (15:16 -0700)

committer Dan Williams <dan.j.williams@intel.com>

Fri, 8 Jun 2018 22:16:44 +0000 (15:16 -0700)
author Dan Williams <dan.j.williams@intel.com>
Fri, 8 Jun 2018 22:16:44 +0000 (15:16 -0700)
committer Dan Williams <dan.j.williams@intel.com>
Fri, 8 Jun 2018 22:16:44 +0000 (15:16 -0700)
diff --git a/Documentation/ABI/removed/sysfs-bus-nfit b/Documentation/ABI/removed/sysfs-bus-nfit

new file mode 100644 (file)

index 0000000..ae8c1ca
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-bus-nfit
@@ -0,0 +1,17 @@
+What:          /sys/bus/nd/devices/regionX/nfit/ecc_unit_size
+Date:          Aug, 2017
+KernelVersion: v4.14 (Removed v4.18)
+Contact:       linux-nvdimm@lists.01.org
+Description:
+               (RO) Size of a write request to a DIMM that will not incur a
+               read-modify-write cycle at the memory controller.
+
+               When the nfit driver initializes it runs an ARS (Address Range
+               Scrub) operation across every pmem range. Part of that process
+               involves determining the ARS capabilities of a given address
+               range. One of the capabilities that is reported is the 'Clear
+               Uncorrectable Error Range Length Unit Size' (see: ACPI 6.2
+               section 9.20.7.4 Function Index 1 - Query ARS Capabilities).
+               This property indicates the boundary at which the NVDIMM may
+               need to perform read-modify-write cycles to maintain ECC (Error
+               Correcting Code) blocks.
diff --git a/Documentation/ABI/testing/sysfs-bus-nfit b/Documentation/ABI/testing/sysfs-bus-nfit

index 619eb8ca0f991a91625450e0afe4cd9d247f5ba1..a1cb44dcb90808201f58d18f9006a229a9be8296 100644 (file)
--- a/Documentation/ABI/testing/sysfs-bus-nfit
+++ b/Documentation/ABI/testing/sysfs-bus-nfit
@@ -212,22 +212,3 @@ Description:
                 range. Used by NVDIMM Region Mapping Structure to uniquely refer
                 to this structure. Value of 0 is reserved and not used as an
                 index.
-
-
-What:          /sys/bus/nd/devices/regionX/nfit/ecc_unit_size
-Date:          Aug, 2017
-KernelVersion: v4.14
-Contact:       linux-nvdimm@lists.01.org
-Description:
-               (RO) Size of a write request to a DIMM that will not incur a
-               read-modify-write cycle at the memory controller.
-
-               When the nfit driver initializes it runs an ARS (Address Range
-               Scrub) operation across every pmem range. Part of that process
-               involves determining the ARS capabilities of a given address
-               range. One of the capabilities that is reported is the 'Clear
-               Uncorrectable Error Range Length Unit Size' (see: ACPI 6.2
-               section 9.20.7.4 Function Index 1 - Query ARS Capabilities).
-               This property indicates the boundary at which the NVDIMM may
-               need to perform read-modify-write cycles to maintain ECC (Error
-               Correcting Code) blocks.
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl

index 640f65e79ef1c00c94508b6b9f9fe8b63a1305a6..8e69345c37ccd9e1d74bd86f5858a45b1573f838 100644 (file)
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -244,3 +244,11 @@ Description:    read only
                  Returns 1 if the psl timebase register is synchronized
                  with the core timebase register, 0 otherwise.
  Users:          https://github.com/ibm-capi/libcxl
+
+What:           /sys/class/cxl/<card>/tunneled_ops_supported
+Date:           May 2018
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Returns 1 if tunneled operations are supported in capi mode,
+                0 otherwise.
+Users:          https://github.com/ibm-capi/libcxl
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu

index 025b7cf3768dc62f3e5e68129fcc36bdf994c659..bd4975e132d3438984d2f838daa814b1397fae5f 100644 (file)
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -478,6 +478,7 @@ What:               /sys/devices/system/cpu/vulnerabilities
                 /sys/devices/system/cpu/vulnerabilities/meltdown
                 /sys/devices/system/cpu/vulnerabilities/spectre_v1
                 /sys/devices/system/cpu/vulnerabilities/spectre_v2
+               /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
  Date:          January 2018
  Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
  Description:   Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index 11fc28ecdb6d9f2ea1ce28807caf59c96c93c164..f2040d46f0956bcc8c4502404d8f6d46892f0351 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2680,6 +2680,9 @@
                         allow data leaks with this option, which is equivalent
                         to spectre_v2=off.
  
+       nospec_store_bypass_disable
+                       [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
+
         noxsave         [BUGS=X86] Disables x86 extended register state save
                         and restore using xsave. The kernel will fallback to
                         enabling legacy floating-point and sse state.
@@ -4025,6 +4028,48 @@
                         Not specifying this option is equivalent to
                         spectre_v2=auto.
  
+       spec_store_bypass_disable=
+                       [HW] Control Speculative Store Bypass (SSB) Disable mitigation
+                       (Speculative Store Bypass vulnerability)
+
+                       Certain CPUs are vulnerable to an exploit against a
+                       a common industry wide performance optimization known
+                       as "Speculative Store Bypass" in which recent stores
+                       to the same memory location may not be observed by
+                       later loads during speculative execution. The idea
+                       is that such stores are unlikely and that they can
+                       be detected prior to instruction retirement at the
+                       end of a particular speculation execution window.
+
+                       In vulnerable processors, the speculatively forwarded
+                       store can be used in a cache side channel attack, for
+                       example to read memory to which the attacker does not
+                       directly have access (e.g. inside sandboxed code).
+
+                       This parameter controls whether the Speculative Store
+                       Bypass optimization is used.
+
+                       on      - Unconditionally disable Speculative Store Bypass
+                       off     - Unconditionally enable Speculative Store Bypass
+                       auto    - Kernel detects whether the CPU model contains an
+                                 implementation of Speculative Store Bypass and
+                                 picks the most appropriate mitigation. If the
+                                 CPU is not vulnerable, "off" is selected. If the
+                                 CPU is vulnerable the default mitigation is
+                                 architecture and Kconfig dependent. See below.
+                       prctl   - Control Speculative Store Bypass per thread
+                                 via prctl. Speculative Store Bypass is enabled
+                                 for a process by default. The state of the control
+                                 is inherited on fork.
+                       seccomp - Same as "prctl" above, but all seccomp threads
+                                 will disable SSB unless they explicitly opt out.
+
+                       Not specifying this option is equivalent to
+                       spec_store_bypass_disable=auto.
+
+                       Default mitigations:
+                       X86:    If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
+
         spia_io_base=   [HW,MTD]
         spia_fio_base=
         spia_pedr=
diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt

index 1814fa13f6ab8078a52a32fa2a55156124d696bb..fc019df0d8638df3c08e51bc8827987623c0c244 100644 (file)
--- a/Documentation/devicetree/bindings/net/marvell-pp2.txt
+++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt
@@ -21,9 +21,10 @@ Required properties:
         - main controller clock (for both armada-375-pp2 and armada-7k-pp2)
         - GOP clock (for both armada-375-pp2 and armada-7k-pp2)
         - MG clock (only for armada-7k-pp2)
+       - MG Core clock (only for armada-7k-pp2)
         - AXI clock (only for armada-7k-pp2)
-- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk"
-  and "axi_clk" (the 2 latter only for armada-7k-pp2).
+- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk",
+  "mg_core_clk" and "axi_clk" (the 3 latter only for armada-7k-pp2).
  
  The ethernet ports are represented by subnodes. At least one port is
  required.
@@ -80,8 +81,8 @@ cpm_ethernet: ethernet@0 {
         compatible = "marvell,armada-7k-pp22";
         reg = <0x0 0x100000>, <0x129000 0xb000>;
         clocks = <&cpm_syscon0 1 3>, <&cpm_syscon0 1 9>,
-                <&cpm_syscon0 1 5>, <&cpm_syscon0 1 18>;
-       clock-names = "pp_clk", "gop_clk", "gp_clk", "axi_clk";
+                <&cpm_syscon0 1 5>, <&cpm_syscon0 1 6>, <&cpm_syscon0 1 18>;
+       clock-names = "pp_clk", "gop_clk", "mg_clk", "mg_core_clk", "axi_clk";
  
         eth0: eth0 {
                 interrupts = <ICU_GRP_NSR 39 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt

index 42a248301615d9e69915a424104f1fc932605175..e22d8cfea687435550f72e56385da9b07a0b739e 100644 (file)
--- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
+++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
@@ -57,6 +57,13 @@ KSZ9031:
        - txd2-skew-ps : Skew control of TX data 2 pad
        - txd3-skew-ps : Skew control of TX data 3 pad
  
+    - micrel,force-master:
+        Boolean, force phy to master mode. Only set this option if the phy
+        reference clock provided at CLK125_NDO pin is used as MAC reference
+        clock because the clock jitter in slave mode is to high (errata#2).
+        Attention: The link partner must be configurable as slave otherwise
+        no link will be established.
+
  Examples:
  
         mdio {
diff --git a/Documentation/networking/ppp_generic.txt b/Documentation/networking/ppp_generic.txt

index 091d20273dcbbce70c75d1c1de76af25e100e344..61daf4b39600917a3baec3d9e200047f0831493a 100644 (file)
--- a/Documentation/networking/ppp_generic.txt
+++ b/Documentation/networking/ppp_generic.txt
@@ -300,12 +300,6 @@ unattached instance are:
  The ioctl calls available on an instance of /dev/ppp attached to a
  channel are:
  
-* PPPIOCDETACH detaches the instance from the channel.  This ioctl is
-  deprecated since the same effect can be achieved by closing the
-  instance.  In order to prevent possible races this ioctl will fail
-  with an EINVAL error if more than one file descriptor refers to this
-  instance (i.e. as a result of dup(), dup2() or fork()).
-
  * PPPIOCCONNECT connects this channel to a PPP interface.  The
    argument should point to an int containing the interface unit
    number.  It will return an EINVAL error if the channel is already
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst

index 7b2eb1b7d4cab3f68b2a7569977a5fa8b7a39f23..a3233da7fa88ed94bf73aebceaf2b12a6a1169fc 100644 (file)
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -19,6 +19,7 @@ place where this information is gathered.
     no_new_privs
     seccomp_filter
     unshare
+   spec_ctrl
  
  .. only::  subproject and html
  
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst

new file mode 100644 (file)

index 0000000..32f3d55
--- /dev/null
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -0,0 +1,94 @@
+===================
+Speculation Control
+===================
+
+Quite some CPUs have speculation-related misfeatures which are in
+fact vulnerabilities causing data leaks in various forms even across
+privilege domains.
+
+The kernel provides mitigation for such vulnerabilities in various
+forms. Some of these mitigations are compile-time configurable and some
+can be supplied on the kernel command line.
+
+There is also a class of mitigations which are very expensive, but they can
+be restricted to a certain set of processes or tasks in controlled
+environments. The mechanism to control these mitigations is via
+:manpage:`prctl(2)`.
+
+There are two prctl options which are related to this:
+
+ * PR_GET_SPECULATION_CTRL
+
+ * PR_SET_SPECULATION_CTRL
+
+PR_GET_SPECULATION_CTRL
+-----------------------
+
+PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
+which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
+the following meaning:
+
+==== ===================== ===================================================
+Bit  Define                Description
+==== ===================== ===================================================
+0    PR_SPEC_PRCTL         Mitigation can be controlled per task by
+                           PR_SET_SPECULATION_CTRL.
+1    PR_SPEC_ENABLE        The speculation feature is enabled, mitigation is
+                           disabled.
+2    PR_SPEC_DISABLE       The speculation feature is disabled, mitigation is
+                           enabled.
+3    PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
+                           subsequent prctl(..., PR_SPEC_ENABLE) will fail.
+==== ===================== ===================================================
+
+If all bits are 0 the CPU is not affected by the speculation misfeature.
+
+If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is
+available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
+misfeature will fail.
+
+PR_SET_SPECULATION_CTRL
+-----------------------
+
+PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
+is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
+in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or
+PR_SPEC_FORCE_DISABLE.
+
+Common error codes
+------------------
+======= =================================================================
+Value   Meaning
+======= =================================================================
+EINVAL  The prctl is not implemented by the architecture or unused
+        prctl(2) arguments are not 0.
+
+ENODEV  arg2 is selecting a not supported speculation misfeature.
+======= =================================================================
+
+PR_SET_SPECULATION_CTRL error codes
+-----------------------------------
+======= =================================================================
+Value   Meaning
+======= =================================================================
+0       Success
+
+ERANGE  arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
+        PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE.
+
+ENXIO   Control of the selected speculation misfeature is not possible.
+        See PR_GET_SPECULATION_CTRL.
+
+EPERM   Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller
+        tried to enable it again.
+======= =================================================================
+
+Speculation misfeature controls
+-------------------------------
+- PR_SPEC_STORE_BYPASS: Speculative Store Bypass
+
+  Invocations:
+   * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt

index d4f33eb805dd228a8d8aff796bd77327334aa24b..ab022dcd09117571d215294ed51c5675543165f1 100644 (file)
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
  
  flag                               || value || meaning
  ==================================================================================
-KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
-                                   ||       || determine if there is vCPU pinning
-                                   ||       || and there is no vCPU over-commitment,
+KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
+                                   ||       || determine that vCPUs are never
+                                   ||       || preempted for an unlimited time,
                                     ||       || allowing optimizations
  ----------------------------------------------------------------------------------
diff --git a/MAINTAINERS b/MAINTAINERS

index 58b9861ccf995d73c96da993ce8900f180982e8f..ca4afd68530ca7e83f403b9bb1f5025f6579cd01 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2332,7 +2332,7 @@ F:        drivers/gpio/gpio-ath79.c
  F:     Documentation/devicetree/bindings/gpio/gpio-ath79.txt
  
  ATHEROS ATH GENERIC UTILITIES
-M:     "Luis R. Rodriguez" <mcgrof@do-not-panic.com>
+M:     Kalle Valo <kvalo@codeaurora.org>
  L:     linux-wireless@vger.kernel.org
  S:     Supported
  F:     drivers/net/wireless/ath/*
@@ -2347,7 +2347,7 @@ S:        Maintained
  F:     drivers/net/wireless/ath/ath5k/
  
  ATHEROS ATH6KL WIRELESS DRIVER
-M:     Kalle Valo <kvalo@qca.qualcomm.com>
+M:     Kalle Valo <kvalo@codeaurora.org>
  L:     linux-wireless@vger.kernel.org
  W:     http://wireless.kernel.org/en/users/Drivers/ath6kl
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
@@ -4309,7 +4309,7 @@ F:        Documentation/driver-api/dma-buf.rst
  T:     git git://anongit.freedesktop.org/drm/drm-misc
  
  DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
-M:     Vinod Koul <vinod.koul@intel.com>
+M:     Vinod Koul <vkoul@kernel.org>
  L:     dmaengine@vger.kernel.org
  Q:     https://patchwork.kernel.org/project/linux-dmaengine/list/
  S:     Maintained
@@ -5388,7 +5388,6 @@ S:        Maintained
  F:     drivers/iommu/exynos-iommu.c
  
  EZchip NPS platform support
-M:     Elad Kanfi <eladkan@mellanox.com>
  M:     Vineet Gupta <vgupta@synopsys.com>
  S:     Supported
  F:     arch/arc/plat-eznps
@@ -6504,9 +6503,15 @@ F:       Documentation/networking/hinic.txt
  F:     drivers/net/ethernet/huawei/hinic/
  
  HUGETLB FILESYSTEM
-M:     Nadia Yvette Chambers <nyc@holomorphy.com>
+M:     Mike Kravetz <mike.kravetz@oracle.com>
+L:     linux-mm@kvack.org
  S:     Maintained
  F:     fs/hugetlbfs/
+F:     mm/hugetlb.c
+F:     include/linux/hugetlb.h
+F:     Documentation/admin-guide/mm/hugetlbpage.rst
+F:     Documentation/vm/hugetlbfs_reserv.rst
+F:     Documentation/ABI/testing/sysfs-kernel-mm-hugepages
  
  HVA ST MEDIA DRIVER
  M:     Jean-Christophe Trotin <jean-christophe.trotin@st.com>
@@ -7698,6 +7703,7 @@ KERNEL SELFTEST FRAMEWORK
  M:     Shuah Khan <shuah@kernel.org>
  L:     linux-kselftest@vger.kernel.org
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
+Q:     https://patchwork.kernel.org/project/linux-kselftest/list/
  S:     Maintained
  F:     tools/testing/selftests/
  F:     Documentation/dev-tools/kselftest*
@@ -9020,7 +9026,6 @@ Q:        http://patchwork.ozlabs.org/project/netdev/list/
  F:     drivers/net/ethernet/mellanox/mlx5/core/en_*
  
  MELLANOX ETHERNET INNOVA DRIVER
-M:     Ilan Tayari <ilant@mellanox.com>
  R:     Boris Pismenny <borisp@mellanox.com>
  L:     netdev@vger.kernel.org
  S:     Supported
@@ -9030,7 +9035,6 @@ F:        drivers/net/ethernet/mellanox/mlx5/core/fpga/*
  F:     include/linux/mlx5/mlx5_ifc_fpga.h
  
  MELLANOX ETHERNET INNOVA IPSEC DRIVER
-M:     Ilan Tayari <ilant@mellanox.com>
  R:     Boris Pismenny <borisp@mellanox.com>
  L:     netdev@vger.kernel.org
  S:     Supported
@@ -9086,7 +9090,6 @@ F:        include/uapi/rdma/mlx4-abi.h
  
  MELLANOX MLX5 core VPI driver
  M:     Saeed Mahameed <saeedm@mellanox.com>
-M:     Matan Barak <matanb@mellanox.com>
  M:     Leon Romanovsky <leonro@mellanox.com>
  L:     netdev@vger.kernel.org
  L:     linux-rdma@vger.kernel.org
@@ -9097,7 +9100,6 @@ F:        drivers/net/ethernet/mellanox/mlx5/core/
  F:     include/linux/mlx5/
  
  MELLANOX MLX5 IB driver
-M:     Matan Barak <matanb@mellanox.com>
  M:     Leon Romanovsky <leonro@mellanox.com>
  L:     linux-rdma@vger.kernel.org
  W:     http://www.mellanox.com
@@ -9831,7 +9833,6 @@ F:        net/netfilter/xt_CONNSECMARK.c
  F:     net/netfilter/xt_SECMARK.c
  
  NETWORKING [TLS]
-M:     Ilya Lesokhin <ilyal@mellanox.com>
  M:     Aviad Yehezkel <aviadye@mellanox.com>
  M:     Dave Watson <davejwatson@fb.com>
  L:     netdev@vger.kernel.org
@@ -11631,7 +11632,7 @@ S:      Maintained
  F:     drivers/media/tuners/qt1010*
  
  QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
-M:     Kalle Valo <kvalo@qca.qualcomm.com>
+M:     Kalle Valo <kvalo@codeaurora.org>
  L:     ath10k@lists.infradead.org
  W:     http://wireless.kernel.org/en/users/Drivers/ath10k
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
@@ -11682,7 +11683,7 @@ S:      Maintained
  F:     drivers/media/platform/qcom/venus/
  
  QUALCOMM WCN36XX WIRELESS DRIVER
-M:     Eugene Krasnikov <k.eugene.e@gmail.com>
+M:     Kalle Valo <kvalo@codeaurora.org>
  L:     wcn36xx@lists.infradead.org
  W:     http://wireless.kernel.org/en/users/Drivers/wcn36xx
  T:     git git://github.com/KrasnikovEugene/wcn36xx.git
@@ -12220,7 +12221,7 @@ F:      Documentation/s390/vfio-ccw.txt
  F:     include/uapi/linux/vfio_ccw.h
  
  S390 ZCRYPT DRIVER
-M:     Harald Freudenberger <freude@de.ibm.com>
+M:     Harald Freudenberger <freude@linux.ibm.com>
  L:     linux-s390@vger.kernel.org
  W:     http://www.ibm.com/developerworks/linux/linux390/
  S:     Supported
@@ -13264,6 +13265,12 @@ M:     Jan-Benedict Glaw <jbglaw@lug-owl.de>
  S:     Maintained
  F:     arch/alpha/kernel/srm_env.c
  
+ST STM32 I2C/SMBUS DRIVER
+M:     Pierre-Yves MORDRET <pierre-yves.mordret@st.com>
+L:     linux-i2c@vger.kernel.org
+S:     Maintained
+F:     drivers/i2c/busses/i2c-stm32*
+
  STABLE BRANCH
  M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  L:     stable@vger.kernel.org
diff --git a/Makefile b/Makefile

index ba3106b3659783c563d84cf6683960d18a23bcf3..56ba070dfa09022616736b8add505bb681adfadf 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
  VERSION = 4
  PATCHLEVEL = 17
  SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc7
  NAME = Merciless Moray
  
  # *DOCUMENTATION*
@@ -500,6 +500,9 @@ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
  RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
  export RETPOLINE_CFLAGS
  
+KBUILD_CFLAGS  += $(call cc-option,-fno-PIE)
+KBUILD_AFLAGS  += $(call cc-option,-fno-PIE)
+
  # check for 'asm goto'
  ifeq ($(call shell-cached,$(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
    CC_HAVE_ASM_GOTO := 1
@@ -621,9 +624,9 @@ endif # $(dot-config)
  # Defaults to vmlinux, but the arch makefile usually adds further targets
  all: vmlinux
  
-KBUILD_CFLAGS  += $(call cc-option,-fno-PIE)
-KBUILD_AFLAGS  += $(call cc-option,-fno-PIE)
-CFLAGS_GCOV    := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,)
+CFLAGS_GCOV    := -fprofile-arcs -ftest-coverage \
+       $(call cc-option,-fno-tree-loop-im) \
+       $(call cc-disable-warning,maybe-uninitialized,)
  export CFLAGS_GCOV CFLAGS_KCOV
  
  # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig

index b2022885ced8ab05f9f1837b2c6cfde62f37a249..f19dc31288c83e11fec0e8850835b2fc53c50fc4 100644 (file)
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -211,6 +211,7 @@ config ALPHA_EIGER
  config ALPHA_JENSEN
         bool "Jensen"
         depends on BROKEN
+       select DMA_DIRECT_OPS
         help
           DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one
           of the first-generation Alpha systems. A number of these systems
diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h

index b78f61f20796b2ea20e6b5b17777f44193783a5d..8beeafd4f68e45c8e7e1a6a006719f549cf70ca3 100644 (file)
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -2,11 +2,15 @@
  #ifndef _ALPHA_DMA_MAPPING_H
  #define _ALPHA_DMA_MAPPING_H
  
-extern const struct dma_map_ops *dma_ops;
+extern const struct dma_map_ops alpha_pci_ops;
  
  static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
  {
-       return dma_ops;
+#ifdef CONFIG_ALPHA_JENSEN
+       return &dma_direct_ops;
+#else
+       return &alpha_pci_ops;
+#endif
  }
  
  #endif /* _ALPHA_DMA_MAPPING_H */
diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c

index 3e3d49c254c52ef09d80c1b964530c9289d00ec7..c025a3e5e3578beb3ecaa521333e3b1d97e98760 100644 (file)
--- a/arch/alpha/kernel/io.c
+++ b/arch/alpha/kernel/io.c
@@ -37,20 +37,20 @@ unsigned int ioread32(void __iomem *addr)
  
  void iowrite8(u8 b, void __iomem *addr)
  {
-       IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr);
         mb();
+       IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr);
  }
  
  void iowrite16(u16 b, void __iomem *addr)
  {
-       IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr);
         mb();
+       IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr);
  }
  
  void iowrite32(u32 b, void __iomem *addr)
  {
-       IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr);
         mb();
+       IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr);
  }
  
  EXPORT_SYMBOL(ioread8);
@@ -176,26 +176,26 @@ u64 readq(const volatile void __iomem *addr)
  
  void writeb(u8 b, volatile void __iomem *addr)
  {
-       __raw_writeb(b, addr);
         mb();
+       __raw_writeb(b, addr);
  }
  
  void writew(u16 b, volatile void __iomem *addr)
  {
-       __raw_writew(b, addr);
         mb();
+       __raw_writew(b, addr);
  }
  
  void writel(u32 b, volatile void __iomem *addr)
  {
-       __raw_writel(b, addr);
         mb();
+       __raw_writel(b, addr);
  }
  
  void writeq(u64 b, volatile void __iomem *addr)
  {
-       __raw_writeq(b, addr);
         mb();
+       __raw_writeq(b, addr);
  }
  
  EXPORT_SYMBOL(readb);
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c

index b6ebb65127a80e8bfc3dfce3580655bcc7d0a540..c7c5879869d35092d45fcb9c35a1245f916fbdf4 100644 (file)
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -102,36 +102,3 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
         else
                 return -ENODEV;
  }
-
-static void *alpha_noop_alloc_coherent(struct device *dev, size_t size,
-                                      dma_addr_t *dma_handle, gfp_t gfp,
-                                      unsigned long attrs)
-{
-       void *ret;
-
-       if (!dev || *dev->dma_mask >= 0xffffffffUL)
-               gfp &= ~GFP_DMA;
-       ret = (void *)__get_free_pages(gfp, get_order(size));
-       if (ret) {
-               memset(ret, 0, size);
-               *dma_handle = virt_to_phys(ret);
-       }
-       return ret;
-}
-
-static int alpha_noop_supported(struct device *dev, u64 mask)
-{
-       return mask < 0x00ffffffUL ? 0 : 1;
-}
-
-const struct dma_map_ops alpha_noop_ops = {
-       .alloc                  = alpha_noop_alloc_coherent,
-       .free                   = dma_noop_free_coherent,
-       .map_page               = dma_noop_map_page,
-       .map_sg                 = dma_noop_map_sg,
-       .mapping_error          = dma_noop_mapping_error,
-       .dma_supported          = alpha_noop_supported,
-};
-
-const struct dma_map_ops *dma_ops = &alpha_noop_ops;
-EXPORT_SYMBOL(dma_ops);
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c

index 83b34b9188ea192517ce72a0d9d260abb5b2e2ac..6923b0d9c1e195d1751d19e4335271b40d3c0226 100644 (file)
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -950,6 +950,4 @@ const struct dma_map_ops alpha_pci_ops = {
         .mapping_error          = alpha_pci_mapping_error,
         .dma_supported          = alpha_pci_supported,
  };
-
-const struct dma_map_ops *dma_ops = &alpha_pci_ops;
-EXPORT_SYMBOL(dma_ops);
+EXPORT_SYMBOL(alpha_pci_ops);
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile

index 45a6b9b7af2a56f179427d353ad70c3cd920bb69..6a4e7341ecd33284f835a65fcc83a3e52ee4d524 100644 (file)
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -117,11 +117,9 @@ ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
  asflags-y := -DZIMAGE
  
  # Supply kernel BSS size to the decompressor via a linker symbol.
-KBSS_SZ = $(shell $(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \
-               perl -e 'while (<>) { \
-                       $$bss_start=hex($$1) if /^([[:xdigit:]]+) B __bss_start$$/; \
-                       $$bss_end=hex($$1) if /^([[:xdigit:]]+) B __bss_stop$$/; \
-               }; printf "%d\n", $$bss_end - $$bss_start;')
+KBSS_SZ = $(shell echo $$(($$($(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \
+               sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \
+                      -e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) )
  LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
  # Supply ZRELADDR to the decompressor via a linker symbol.
  ifneq ($(CONFIG_AUTO_ZRELADDR),y)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S

index 45c8823c37503d3bfdc2beee80304c556db8bf53..517e0e18f0b8307855447abfec63f0827cb72cec 100644 (file)
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -29,19 +29,19 @@
  #if defined(CONFIG_DEBUG_ICEDCC)
  
  #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
-               .macro  loadsp, rb, tmp
+               .macro  loadsp, rb, tmp1, tmp2
                 .endm
                 .macro  writeb, ch, rb
                 mcr     p14, 0, \ch, c0, c5, 0
                 .endm
  #elif defined(CONFIG_CPU_XSCALE)
-               .macro  loadsp, rb, tmp
+               .macro  loadsp, rb, tmp1, tmp2
                 .endm
                 .macro  writeb, ch, rb
                 mcr     p14, 0, \ch, c8, c0, 0
                 .endm
  #else
-               .macro  loadsp, rb, tmp
+               .macro  loadsp, rb, tmp1, tmp2
                 .endm
                 .macro  writeb, ch, rb
                 mcr     p14, 0, \ch, c1, c0, 0
@@ -57,7 +57,7 @@
                 .endm
  
  #if defined(CONFIG_ARCH_SA1100)
-               .macro  loadsp, rb, tmp
+               .macro  loadsp, rb, tmp1, tmp2
                 mov     \rb, #0x80000000        @ physical base address
  #ifdef CONFIG_DEBUG_LL_SER3
                 add     \rb, \rb, #0x00050000   @ Ser3
@@ -66,8 +66,8 @@
  #endif
                 .endm
  #else
-               .macro  loadsp, rb, tmp
-               addruart \rb, \tmp
+               .macro  loadsp, rb, tmp1, tmp2
+               addruart \rb, \tmp1, \tmp2
                 .endm
  #endif
  #endif
@@ -561,8 +561,6 @@ not_relocated:      mov     r0, #0
                 bl      decompress_kernel
                 bl      cache_clean_flush
                 bl      cache_off
-               mov     r1, r7                  @ restore architecture number
-               mov     r2, r8                  @ restore atags pointer
  
  #ifdef CONFIG_ARM_VIRT_EXT
                 mrs     r0, spsr                @ Get saved CPU boot mode
@@ -1297,7 +1295,7 @@ phex:             adr     r3, phexbuf
                 b       1b
  
  @ puts corrupts {r0, r1, r2, r3}
-puts:          loadsp  r3, r1
+puts:          loadsp  r3, r2, r1
  1:             ldrb    r2, [r0], #1
                 teq     r2, #0
                 moveq   pc, lr
@@ -1314,8 +1312,8 @@ puts:             loadsp  r3, r1
  @ putc corrupts {r0, r1, r2, r3}
  putc:
                 mov     r2, r0
+               loadsp  r3, r1, r0
                 mov     r0, #0
-               loadsp  r3, r1
                 b       2b
  
  @ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
@@ -1365,6 +1363,8 @@ __hyp_reentry_vectors:
  
  __enter_kernel:
                 mov     r0, #0                  @ must be 0
+               mov     r1, r7                  @ restore architecture number
+               mov     r2, r8                  @ restore atags pointer
   ARM(          mov     pc, r4          )       @ call kernel
   M_CLASS(      add     r4, r4, #1      )       @ enter in Thumb mode for M class
   THUMB(                bx      r4              )       @ entry point is always ARM for A/R classes
diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi

index 699fdf94d139bdffea5b28fbc4bab912895583f2..9fe4f5a6379e3b60d79a6ed8a0327f680434861e 100644 (file)
--- a/arch/arm/boot/dts/bcm-cygnus.dtsi
+++ b/arch/arm/boot/dts/bcm-cygnus.dtsi
@@ -69,7 +69,7 @@
                 timer@20200 {
                         compatible = "arm,cortex-a9-global-timer";
                         reg = <0x20200 0x100>;
-                       interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
                         clocks = <&periph_clk>;
                 };
  
diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts

index a1f4d6d5a569bbb740b6a5d27f7232155853ec4a..0edf769ea95c93c1449dfef12b71213251d7c973 100644 (file)
--- a/arch/arm/boot/dts/da850-lcdk.dts
+++ b/arch/arm/boot/dts/da850-lcdk.dts
@@ -21,8 +21,8 @@
                 stdout-path = "serial2:115200n8";
         };
  
-       memory {
-               device_type = "memory";
+       memory@c0000000 {
+               /* 128 MB DDR2 SDRAM @ 0xc0000000 */
                 reg = <0xc0000000 0x08000000>;
         };
  
diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi

index c66cf78953639db3ed025e4997beb6ef77719064..12010002dbdb65cbdf9b87d2f37bf39e72903b3e 100644 (file)
--- a/arch/arm/boot/dts/da850.dtsi
+++ b/arch/arm/boot/dts/da850.dtsi
@@ -7,10 +7,19 @@
   * Free Software Foundation;  either version 2 of the  License, or (at your
   * option) any later version.
   */
-#include "skeleton.dtsi"
  #include <dt-bindings/interrupt-controller/irq.h>
  
  / {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       chosen { };
+       aliases { };
+
+       memory@c0000000 {
+               device_type = "memory";
+               reg = <0xc0000000 0x0>;
+       };
+
         arm {
                 #address-cells = <1>;
                 #size-cells = <1>;
@@ -46,8 +55,6 @@
                 pmx_core: pinmux@14120 {
                         compatible = "pinctrl-single";
                         reg = <0x14120 0x50>;
-                       #address-cells = <1>;
-                       #size-cells = <0>;
                         #pinctrl-cells = <2>;
                         pinctrl-single,bit-per-mux;
                         pinctrl-single,register-width = <32>;
diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts

index d6657b3bae84b11395b46bc4d130964179b0bde1..85d7b5148b0ac6c6fc0009b72dac20c89ee16271 100644 (file)
--- a/arch/arm/boot/dts/dm8148-evm.dts
+++ b/arch/arm/boot/dts/dm8148-evm.dts
@@ -10,7 +10,7 @@
  
  / {
         model = "DM8148 EVM";
-       compatible = "ti,dm8148-evm", "ti,dm8148";
+       compatible = "ti,dm8148-evm", "ti,dm8148", "ti,dm814";
  
         memory@80000000 {
                 device_type = "memory";
diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts

index 63883b3479f95f22cd4787f8c10730dc2c20dc9b..6418f9cdbe83faaada09aeb75371724afe5bdda1 100644 (file)
--- a/arch/arm/boot/dts/dm8148-t410.dts
+++ b/arch/arm/boot/dts/dm8148-t410.dts
@@ -9,7 +9,7 @@
  
  / {
         model = "HP t410 Smart Zero Client";
-       compatible = "hp,t410", "ti,dm8148";
+       compatible = "hp,t410", "ti,dm8148", "ti,dm814";
  
         memory@80000000 {
                 device_type = "memory";
diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts

index c72a2132aa823b053c5ab9450a92faf266c85878..1d030d567307172b17766e1a649924c5b5a48eb7 100644 (file)
--- a/arch/arm/boot/dts/dm8168-evm.dts
+++ b/arch/arm/boot/dts/dm8168-evm.dts
@@ -10,7 +10,7 @@
  
  / {
         model = "DM8168 EVM";
-       compatible = "ti,dm8168-evm", "ti,dm8168";
+       compatible = "ti,dm8168-evm", "ti,dm8168", "ti,dm816";
  
         memory@80000000 {
                 device_type = "memory";
diff --git a/arch/arm/boot/dts/dra62x-j5eco-evm.dts b/arch/arm/boot/dts/dra62x-j5eco-evm.dts

index fee0547f7302eca5b18fab7ed2d26d7bc0857134..31b824ad5d29fa822ffba094a8151fd0789369dc 100644 (file)
--- a/arch/arm/boot/dts/dra62x-j5eco-evm.dts
+++ b/arch/arm/boot/dts/dra62x-j5eco-evm.dts
@@ -10,7 +10,7 @@
  
  / {
         model = "DRA62x J5 Eco EVM";
-       compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148";
+       compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148", "ti,dm814";
  
         memory@80000000 {
                 device_type = "memory";
diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts

index 0c99ac04ad08b2fb9e95910ab055d8e539f181d7..6464f2560e066b559fd51aa5c806f80f57dd1493 100644 (file)
--- a/arch/arm/boot/dts/imx51-zii-rdu1.dts
+++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts
@@ -523,7 +523,7 @@
         };
  
         touchscreen@20 {
-               compatible = "syna,rmi4_i2c";
+               compatible = "syna,rmi4-i2c";
                 reg = <0x20>;
                 pinctrl-names = "default";
                 pinctrl-0 = <&pinctrl_ts>;
@@ -541,8 +541,8 @@
  
                 rmi4-f11@11 {
                         reg = <0x11>;
-                       touch-inverted-y;
-                       touch-swapped-x-y;
+                       touchscreen-inverted-y;
+                       touchscreen-swapped-x-y;
                         syna,sensor-type = <1>;
                 };
         };
diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi

index 4d42335c0dee991aa18ff8c40526865eb89cd97c..ce85b3ca1a55fc3eebc13607240ab499231e131d 100644 (file)
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -868,6 +868,7 @@
  
                         crypto: caam@30900000 {
                                 compatible = "fsl,sec-v4.0";
+                               fsl,sec-era = <8>;
                                 #address-cells = <1>;
                                 #size-cells = <1>;
                                 reg = <0x30900000 0x40000>;
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi

index b47cac23a04be5d18b9324248ab091885a908077..6fa7bba3e801508fde024cac085e0d9f7b43e96c 100644 (file)
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -26,7 +26,7 @@
                 gpio = <&gpio1 3 0>;   /* gpio_3 */
                 startup-delay-us = <70000>;
                 enable-active-high;
-               vin-supply = <&vmmc2>;
+               vin-supply = <&vaux3>;
         };
  
         /* HS USB Host PHY on PORT 1 */
@@ -82,6 +82,7 @@
                 twl_audio: audio {
                         compatible = "ti,twl4030-audio";
                         codec {
+                               ti,hs_extmute_gpio = <&gpio2 25 GPIO_ACTIVE_HIGH>;
                         };
                 };
         };
@@ -199,6 +200,7 @@
                 pinctrl-single,pins = <
                         OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */
                         OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */
+                       OMAP3_CORE1_IOPAD(0x20ba, PIN_OUTPUT | MUX_MODE4)        /* gpmc_ncs6.gpio_57 */
                 >;
         };
  };
@@ -213,7 +215,7 @@
         };
         wl127x_gpio: pinmux_wl127x_gpio_pin {
                 pinctrl-single,pins = <
-                       OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4)         /* sys_boot0.gpio_2 */
+                       OMAP3_WKUP_IOPAD(0x2a0a, PIN_INPUT | MUX_MODE4)         /* sys_boot0.gpio_2 */
                         OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4)        /* sys_boot1.gpio_3 */
                 >;
         };
@@ -260,6 +262,11 @@
  #include "twl4030.dtsi"
  #include "twl4030_omap3.dtsi"
  
+&vaux3 {
+       regulator-min-microvolt = <2800000>;
+       regulator-max-microvolt = <2800000>;
+};
+
  &twl {
         twl_power: power {
                 compatible = "ti,twl4030-power-idle-osc-off", "ti,twl4030-power-idle";
diff --git a/arch/arm/boot/dts/r8a7790-lager.dts b/arch/arm/boot/dts/r8a7790-lager.dts

index 063fdb65dc60dfc22c915103853a2ce9a137dbf0..f07f9018c3e72e4631967dc3ca3c3da8d6772aef 100644 (file)
--- a/arch/arm/boot/dts/r8a7790-lager.dts
+++ b/arch/arm/boot/dts/r8a7790-lager.dts
@@ -379,7 +379,7 @@
                                 port@0 {
                                         reg = <0>;
                                         adv7511_in: endpoint {
-                                               remote-endpoint = <&du_out_lvds0>;
+                                               remote-endpoint = <&lvds0_out>;
                                         };
                                 };
  
@@ -467,10 +467,8 @@
         status = "okay";
  
         clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 722>,
-                <&cpg CPG_MOD 726>, <&cpg CPG_MOD 725>,
                  <&x13_clk>, <&x2_clk>;
-       clock-names = "du.0", "du.1", "du.2", "lvds.0", "lvds.1",
-                     "dclkin.0", "dclkin.1";
+       clock-names = "du.0", "du.1", "du.2", "dclkin.0", "dclkin.1";
  
         ports {
                 port@0 {
@@ -478,12 +476,26 @@
                                 remote-endpoint = <&adv7123_in>;
                         };
                 };
+       };
+};
+
+&lvds0 {
+       status = "okay";
+
+       ports {
                 port@1 {
                         endpoint {
                                 remote-endpoint = <&adv7511_in>;
                         };
                 };
-               port@2 {
+       };
+};
+
+&lvds1 {
+       status = "okay";
+
+       ports {
+               port@1 {
                         lvds_connector: endpoint {
                         };
                 };
diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi

index e4367cecad18a1d0f5e3c21dde4c1985b0fb7297..05a0fc23ac88f8ded558fe31da8ae15358c1ad15 100644 (file)
--- a/arch/arm/boot/dts/r8a7790.dtsi
+++ b/arch/arm/boot/dts/r8a7790.dtsi
@@ -1627,18 +1627,13 @@
  
                 du: display@feb00000 {
                         compatible = "renesas,du-r8a7790";
-                       reg = <0 0xfeb00000 0 0x70000>,
-                             <0 0xfeb90000 0 0x1c>,
-                             <0 0xfeb94000 0 0x1c>;
-                       reg-names = "du", "lvds.0", "lvds.1";
+                       reg = <0 0xfeb00000 0 0x70000>;
                         interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
                                      <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>,
                                      <GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>;
                         clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
-                                <&cpg CPG_MOD 722>, <&cpg CPG_MOD 726>,
-                                <&cpg CPG_MOD 725>;
-                       clock-names = "du.0", "du.1", "du.2", "lvds.0",
-                                     "lvds.1";
+                                <&cpg CPG_MOD 722>;
+                       clock-names = "du.0", "du.1", "du.2";
                         status = "disabled";
  
                         ports {
@@ -1653,11 +1648,65 @@
                                 port@1 {
                                         reg = <1>;
                                         du_out_lvds0: endpoint {
+                                               remote-endpoint = <&lvds0_in>;
                                         };
                                 };
                                 port@2 {
                                         reg = <2>;
                                         du_out_lvds1: endpoint {
+                                               remote-endpoint = <&lvds1_in>;
+                                       };
+                               };
+                       };
+               };
+
+               lvds0: lvds@feb90000 {
+                       compatible = "renesas,r8a7790-lvds";
+                       reg = <0 0xfeb90000 0 0x1c>;
+                       clocks = <&cpg CPG_MOD 726>;
+                       power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+                       resets = <&cpg 726>;
+                       status = "disabled";
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               port@0 {
+                                       reg = <0>;
+                                       lvds0_in: endpoint {
+                                               remote-endpoint = <&du_out_lvds0>;
+                                       };
+                               };
+                               port@1 {
+                                       reg = <1>;
+                                       lvds0_out: endpoint {
+                                       };
+                               };
+                       };
+               };
+
+               lvds1: lvds@feb94000 {
+                       compatible = "renesas,r8a7790-lvds";
+                       reg = <0 0xfeb94000 0 0x1c>;
+                       clocks = <&cpg CPG_MOD 725>;
+                       power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+                       resets = <&cpg 725>;
+                       status = "disabled";
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               port@0 {
+                                       reg = <0>;
+                                       lvds1_in: endpoint {
+                                               remote-endpoint = <&du_out_lvds1>;
+                                       };
+                               };
+                               port@1 {
+                                       reg = <1>;
+                                       lvds1_out: endpoint {
                                         };
                                 };
                         };
diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts

index f40321a1c917e7d1c7546176319541a0da37ba08..9d7213a0b8b826506e14c51b95fe10724434b9b6 100644 (file)
--- a/arch/arm/boot/dts/r8a7791-koelsch.dts
+++ b/arch/arm/boot/dts/r8a7791-koelsch.dts
@@ -468,10 +468,9 @@
         pinctrl-names = "default";
         status = "okay";
  
-       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>,
+       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
                  <&x13_clk>, <&x2_clk>;
-       clock-names = "du.0", "du.1", "lvds.0",
-                     "dclkin.0", "dclkin.1";
+       clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1";
  
         ports {
                 port@0 {
@@ -479,6 +478,13 @@
                                 remote-endpoint = <&adv7511_in>;
                         };
                 };
+       };
+};
+
+&lvds0 {
+       status = "okay";
+
+       ports {
                 port@1 {
                         lvds_connector: endpoint {
                         };
diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts

index c14e6fe9e4f69b1f070da271a03842ad699d898d..ae9ed9ff53efde994a4d4735227bb0a8199fab81 100644 (file)
--- a/arch/arm/boot/dts/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/r8a7791-porter.dts
@@ -441,10 +441,9 @@
         pinctrl-names = "default";
         status = "okay";
  
-       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>,
+       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
                  <&x3_clk>, <&x16_clk>;
-       clock-names = "du.0", "du.1", "lvds.0",
-                     "dclkin.0", "dclkin.1";
+       clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1";
  
         ports {
                 port@0 {
@@ -455,6 +454,17 @@
         };
  };
  
+&lvds0 {
+       status = "okay";
+
+       ports {
+               port@1 {
+                       lvds_connector: endpoint {
+                       };
+               };
+       };
+};
+
  &rcar_sound {
         pinctrl-0 = <&ssi_pins &audio_clk_pins>;
         pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi

index f11dab71b03a9f7aaef287fe38f324b4c9b0d137..506b20885413398825a353b6b84938175cfd255f 100644 (file)
--- a/arch/arm/boot/dts/r8a7791.dtsi
+++ b/arch/arm/boot/dts/r8a7791.dtsi
@@ -1633,15 +1633,12 @@
  
                 du: display@feb00000 {
                         compatible = "renesas,du-r8a7791";
-                       reg = <0 0xfeb00000 0 0x40000>,
-                             <0 0xfeb90000 0 0x1c>;
-                       reg-names = "du", "lvds.0";
+                       reg = <0 0xfeb00000 0 0x40000>;
                         interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
                                      <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>;
                         clocks = <&cpg CPG_MOD 724>,
-                                <&cpg CPG_MOD 723>,
-                                <&cpg CPG_MOD 726>;
-                       clock-names = "du.0", "du.1", "lvds.0";
+                                <&cpg CPG_MOD 723>;
+                       clock-names = "du.0", "du.1";
                         status = "disabled";
  
                         ports {
@@ -1656,6 +1653,33 @@
                                 port@1 {
                                         reg = <1>;
                                         du_out_lvds0: endpoint {
+                                               remote-endpoint = <&lvds0_in>;
+                                       };
+                               };
+                       };
+               };
+
+               lvds0: lvds@feb90000 {
+                       compatible = "renesas,r8a7791-lvds";
+                       reg = <0 0xfeb90000 0 0x1c>;
+                       clocks = <&cpg CPG_MOD 726>;
+                       power-domains = <&sysc R8A7791_PD_ALWAYS_ON>;
+                       resets = <&cpg 726>;
+                       status = "disabled";
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               port@0 {
+                                       reg = <0>;
+                                       lvds0_in: endpoint {
+                                               remote-endpoint = <&du_out_lvds0>;
+                                       };
+                               };
+                               port@1 {
+                                       reg = <1>;
+                                       lvds0_out: endpoint {
                                         };
                                 };
                         };
diff --git a/arch/arm/boot/dts/r8a7793-gose.dts b/arch/arm/boot/dts/r8a7793-gose.dts

index 9ed6961f2d9a2c4a1a671980f3c336f5d137cc7d..96e117d8b2cce0f8e4070d05fdb48995775ee667 100644 (file)
--- a/arch/arm/boot/dts/r8a7793-gose.dts
+++ b/arch/arm/boot/dts/r8a7793-gose.dts
@@ -447,10 +447,9 @@
         pinctrl-names = "default";
         status = "okay";
  
-       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>,
+       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
                  <&x13_clk>, <&x2_clk>;
-       clock-names = "du.0", "du.1", "lvds.0",
-                     "dclkin.0", "dclkin.1";
+       clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1";
  
         ports {
                 port@0 {
@@ -458,6 +457,11 @@
                                 remote-endpoint = <&adv7511_in>;
                         };
                 };
+       };
+};
+
+&lvds0 {
+       ports {
                 port@1 {
                         lvds_connector: endpoint {
                         };
diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi

index f9c5a557107d932756e02c9c10da743dfe9be673..4f526030dc7cb7011e67f8ff26fad601d6af867f 100644 (file)
--- a/arch/arm/boot/dts/r8a7793.dtsi
+++ b/arch/arm/boot/dts/r8a7793.dtsi
@@ -1292,15 +1292,12 @@
  
                 du: display@feb00000 {
                         compatible = "renesas,du-r8a7793";
-                       reg = <0 0xfeb00000 0 0x40000>,
-                             <0 0xfeb90000 0 0x1c>;
-                       reg-names = "du", "lvds.0";
+                       reg = <0 0xfeb00000 0 0x40000>;
                         interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
                                      <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>;
                         clocks = <&cpg CPG_MOD 724>,
-                                <&cpg CPG_MOD 723>,
-                                <&cpg CPG_MOD 726>;
-                       clock-names = "du.0", "du.1", "lvds.0";
+                                <&cpg CPG_MOD 723>;
+                       clock-names = "du.0", "du.1";
                         status = "disabled";
  
                         ports {
@@ -1315,6 +1312,34 @@
                                 port@1 {
                                         reg = <1>;
                                         du_out_lvds0: endpoint {
+                                               remote-endpoint = <&lvds0_in>;
+                                       };
+                               };
+                       };
+               };
+
+               lvds0: lvds@feb90000 {
+                       compatible = "renesas,r8a7793-lvds";
+                       reg = <0 0xfeb90000 0 0x1c>;
+                       clocks = <&cpg CPG_MOD 726>;
+                       power-domains = <&sysc R8A7793_PD_ALWAYS_ON>;
+                       resets = <&cpg 726>;
+
+                       status = "disabled";
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               port@0 {
+                                       reg = <0>;
+                                       lvds0_in: endpoint {
+                                               remote-endpoint = <&du_out_lvds0>;
+                                       };
+                               };
+                               port@1 {
+                                       reg = <1>;
+                                       lvds0_out: endpoint {
                                         };
                                 };
                         };
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi

index 77e8436beed46c2a5530dd9987af8f17bc9ef413..3a1c6b45c9a1cd3e6ef699d9c3b45fdb2712fcfd 100644 (file)
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -76,7 +76,7 @@
                         allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi";
                         clocks = <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_HDMI0>,
                                  <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>,
-                                <&ccu CLK_DE_BE0>, <&ccu CLK_AHB_DE_FE0>,
+                                <&ccu CLK_DE_BE0>, <&ccu CLK_DE_FE0>,
                                  <&ccu CLK_TCON0_CH1>, <&ccu CLK_HDMI>,
                                  <&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>;
                         status = "disabled";
@@ -88,7 +88,7 @@
                         allwinner,pipeline = "de_fe0-de_be0-lcd0";
                         clocks = <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_DE_BE0>,
                                  <&ccu CLK_AHB_DE_FE0>, <&ccu CLK_DE_BE0>,
-                                <&ccu CLK_AHB_DE_FE0>, <&ccu CLK_TCON0_CH0>,
+                                <&ccu CLK_DE_FE0>, <&ccu CLK_TCON0_CH0>,
                                  <&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>;
                         status = "disabled";
                 };
@@ -99,7 +99,7 @@
                         allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0";
                         clocks = <&ccu CLK_AHB_TVE0>, <&ccu CLK_AHB_LCD0>,
                                  <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>,
-                                <&ccu CLK_DE_BE0>, <&ccu CLK_AHB_DE_FE0>,
+                                <&ccu CLK_DE_BE0>, <&ccu CLK_DE_FE0>,
                                  <&ccu CLK_TCON0_CH1>, <&ccu CLK_DRAM_TVE0>,
                                  <&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>;
                         status = "disabled";
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts

index 3328fe583c9ba3f3d52bb615d33a0a9f03c29647..232f124ce62c0d241864e719aa17adf10f53f286 100644 (file)
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
@@ -117,6 +117,7 @@
         phy-handle = <&int_mii_phy>;
         phy-mode = "mii";
         allwinner,leds-active-low;
+       status = "okay";
  };
  
  &hdmi {
diff --git a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts

index d1311098ea459b3cf65d96f7a67a1297728bea6b..ad173605b1b890f13a2bde6be4726a44f93ba095 100644 (file)
--- a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts
+++ b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts
@@ -51,7 +51,7 @@
  
         leds {
                 /* The LEDs use PG0~2 pins, which conflict with MMC1 */
-               status = "disbaled";
+               status = "disabled";
         };
  };
  
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi

index 0a7136462a1a6dfa698f1f5b909918e6ec3b1cf7..983dd5c1479459f2ba27f43dbefdbaaa411cffb5 100644 (file)
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -741,7 +741,7 @@
                 phy_type = "ulpi";
                 clocks = <&tegra_car TEGRA20_CLK_USB2>,
                          <&tegra_car TEGRA20_CLK_PLL_U>,
-                        <&tegra_car TEGRA20_CLK_PLL_P_OUT4>;
+                        <&tegra_car TEGRA20_CLK_CDEV2>;
                 clock-names = "reg", "pll_u", "ulpi-link";
                 resets = <&tegra_car 58>, <&tegra_car 22>;
                 reset-names = "usb", "utmi-pads";
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h

index bc8d4bbd82e27719a990c7972fd77bfca9dc7aef..9342904cccca67ac3cfba4e0b75bcbc17893ade0 100644 (file)
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -536,4 +536,14 @@ THUMB(     orr     \reg , \reg , #PSR_T_BIT        )
  #endif
         .endm
  
+#ifdef CONFIG_KPROBES
+#define _ASM_NOKPROBE(entry)                           \
+       .pushsection "_kprobe_blacklist", "aw" ;        \
+       .balign 4 ;                                     \
+       .long entry;                                    \
+       .popsection
+#else
+#define _ASM_NOKPROBE(entry)
+#endif
+
  #endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h

index 707a1f06dc5d5e207f0d2c18e0b37844569ca199..f675162663f09a054253b8ad6d86361c74968618 100644 (file)
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -309,6 +309,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
         return 8;
  }
  
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+                                     gpa_t gpa, void *data, unsigned long len)
+{
+       int srcu_idx = srcu_read_lock(&kvm->srcu);
+       int ret = kvm_read_guest(kvm, gpa, data, len);
+
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+       return ret;
+}
+
  static inline void *kvm_get_hyp_vector(void)
  {
         return kvm_ksym_ref(__kvm_hyp_vector);
diff --git a/arch/arm/include/uapi/asm/siginfo.h b/arch/arm/include/uapi/asm/siginfo.h

deleted file mode 100644 (file)

index d051388..0000000
--- a/arch/arm/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_SIGINFO_H
-#define __ASM_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME      0       /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-#endif
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c

index 6b38d7a634c19ffd279f98ca8cc3a113484d1fd3..dd2eb5f76b9f0a7d64f50169dd0d04a402b2ae67 100644 (file)
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -83,7 +83,7 @@ void machine_crash_nonpanic_core(void *unused)
  {
         struct pt_regs regs;
  
-       crash_setup_regs(&regs, NULL);
+       crash_setup_regs(&regs, get_irq_regs());
         printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",
                smp_processor_id());
         crash_save_cpu(&regs, smp_processor_id());
@@ -95,6 +95,27 @@ void machine_crash_nonpanic_core(void *unused)
                 cpu_relax();
  }
  
+void crash_smp_send_stop(void)
+{
+       static int cpus_stopped;
+       unsigned long msecs;
+
+       if (cpus_stopped)
+               return;
+
+       atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+       smp_call_function(machine_crash_nonpanic_core, NULL, false);
+       msecs = 1000; /* Wait at most a second for the other cpus to stop */
+       while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+               mdelay(1);
+               msecs--;
+       }
+       if (atomic_read(&waiting_for_crash_ipi) > 0)
+               pr_warn("Non-crashing CPUs did not react to IPI\n");
+
+       cpus_stopped = 1;
+}
+
  static void machine_kexec_mask_interrupts(void)
  {
         unsigned int i;
@@ -120,19 +141,8 @@ static void machine_kexec_mask_interrupts(void)
  
  void machine_crash_shutdown(struct pt_regs *regs)
  {
-       unsigned long msecs;
-
         local_irq_disable();
-
-       atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
-       smp_call_function(machine_crash_nonpanic_core, NULL, false);
-       msecs = 1000; /* Wait at most a second for the other cpus to stop */
-       while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
-               mdelay(1);
-               msecs--;
-       }
-       if (atomic_read(&waiting_for_crash_ipi) > 0)
-               pr_warn("Non-crashing CPUs did not react to IPI\n");
+       crash_smp_send_stop();
  
         crash_save_cpu(regs, smp_processor_id());
         machine_kexec_mask_interrupts();
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c

index 5e3633c24e636575c19059cdb99bd247a666b5f5..2fe87109ae468bce6d38ff575f395fc2f41cc72e 100644 (file)
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -19,6 +19,7 @@
  #include <linux/uaccess.h>
  #include <linux/hardirq.h>
  #include <linux/kdebug.h>
+#include <linux/kprobes.h>
  #include <linux/module.h>
  #include <linux/kexec.h>
  #include <linux/bug.h>
@@ -417,7 +418,8 @@ void unregister_undef_hook(struct undef_hook *hook)
         raw_spin_unlock_irqrestore(&undef_lock, flags);
  }
  
-static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+static nokprobe_inline
+int call_undef_hook(struct pt_regs *regs, unsigned int instr)
  {
         struct undef_hook *hook;
         unsigned long flags;
@@ -490,6 +492,7 @@ die_sig:
  
         arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
  }
+NOKPROBE_SYMBOL(do_undefinstr)
  
  /*
   * Handle FIQ similarly to NMI on x86 systems.
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S

index df73914e81c8344feccac5df8d5791dcbe92ed60..746e7801dcdf70fed9e339c2d6800b3f275c49b7 100644 (file)
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -38,6 +38,7 @@ ENTRY(__get_user_1)
         mov     r0, #0
         ret     lr
  ENDPROC(__get_user_1)
+_ASM_NOKPROBE(__get_user_1)
  
  ENTRY(__get_user_2)
         check_uaccess r0, 2, r1, r2, __get_user_bad
@@ -58,6 +59,7 @@ rb    .req    r0
         mov     r0, #0
         ret     lr
  ENDPROC(__get_user_2)
+_ASM_NOKPROBE(__get_user_2)
  
  ENTRY(__get_user_4)
         check_uaccess r0, 4, r1, r2, __get_user_bad
@@ -65,6 +67,7 @@ ENTRY(__get_user_4)
         mov     r0, #0
         ret     lr
  ENDPROC(__get_user_4)
+_ASM_NOKPROBE(__get_user_4)
  
  ENTRY(__get_user_8)
         check_uaccess r0, 8, r1, r2, __get_user_bad8
@@ -78,6 +81,7 @@ ENTRY(__get_user_8)
         mov     r0, #0
         ret     lr
  ENDPROC(__get_user_8)
+_ASM_NOKPROBE(__get_user_8)
  
  #ifdef __ARMEB__
  ENTRY(__get_user_32t_8)
@@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8)
         mov     r0, #0
         ret     lr
  ENDPROC(__get_user_32t_8)
+_ASM_NOKPROBE(__get_user_32t_8)
  
  ENTRY(__get_user_64t_1)
         check_uaccess r0, 1, r1, r2, __get_user_bad8
@@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1)
         mov     r0, #0
         ret     lr
  ENDPROC(__get_user_64t_1)
+_ASM_NOKPROBE(__get_user_64t_1)
  
  ENTRY(__get_user_64t_2)
         check_uaccess r0, 2, r1, r2, __get_user_bad8
@@ -114,6 +120,7 @@ rb  .req    r0
         mov     r0, #0
         ret     lr
  ENDPROC(__get_user_64t_2)
+_ASM_NOKPROBE(__get_user_64t_2)
  
  ENTRY(__get_user_64t_4)
         check_uaccess r0, 4, r1, r2, __get_user_bad8
@@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4)
         mov     r0, #0
         ret     lr
  ENDPROC(__get_user_64t_4)
+_ASM_NOKPROBE(__get_user_64t_4)
  #endif
  
  __get_user_bad8:
@@ -131,6 +139,8 @@ __get_user_bad:
         ret     lr
  ENDPROC(__get_user_bad)
  ENDPROC(__get_user_bad8)
+_ASM_NOKPROBE(__get_user_bad)
+_ASM_NOKPROBE(__get_user_bad8)
  
  .pushsection __ex_table, "a"
         .long   1b, __get_user_bad
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c

index 004f9c8de0329cacdb25bfc7c7819a528a78fe77..d1e8ce7b4bd21245d41901709041c113f95f344d 100644 (file)
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -205,12 +205,17 @@ static const short da830_evm_mmc_sd_pins[] = {
         -1
  };
  
+#define DA830_MMCSD_WP_PIN             GPIO_TO_PIN(2, 1)
+#define DA830_MMCSD_CD_PIN             GPIO_TO_PIN(2, 2)
+
  static struct gpiod_lookup_table mmc_gpios_table = {
         .dev_id = "da830-mmc.0",
         .table = {
                 /* gpio chip 1 contains gpio range 32-63 */
-               GPIO_LOOKUP("davinci_gpio.1", 2, "cd", GPIO_ACTIVE_LOW),
-               GPIO_LOOKUP("davinci_gpio.1", 1, "wp", GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_CD_PIN, "cd",
+                           GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_WP_PIN, "wp",
+                           GPIO_ACTIVE_LOW),
         },
  };
  
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c

index 3063478bcc366315d98ed861876380e06b358cf7..158ed9a1483fc87582d66de2746620590f800dd5 100644 (file)
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -763,12 +763,17 @@ static const short da850_evm_mcasp_pins[] __initconst = {
         -1
  };
  
+#define DA850_MMCSD_CD_PIN             GPIO_TO_PIN(4, 0)
+#define DA850_MMCSD_WP_PIN             GPIO_TO_PIN(4, 1)
+
  static struct gpiod_lookup_table mmc_gpios_table = {
         .dev_id = "da830-mmc.0",
         .table = {
                 /* gpio chip 2 contains gpio range 64-95 */
-               GPIO_LOOKUP("davinci_gpio.2", 0, "cd", GPIO_ACTIVE_LOW),
-               GPIO_LOOKUP("davinci_gpio.2", 1, "wp", GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_CD_PIN, "cd",
+                           GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_WP_PIN, "wp",
+                           GPIO_ACTIVE_LOW),
         },
  };
  
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c

index cb30637d9eaf8e379c7ed1dd3e447c40bf56d058..23ab9e8bc04c0e3b37d7a940d23755d1e48a2c80 100644 (file)
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -19,6 +19,7 @@
  #include <linux/gpio.h>
  #include <linux/gpio/machine.h>
  #include <linux/clk.h>
+#include <linux/dm9000.h>
  #include <linux/videodev2.h>
  #include <media/i2c/tvp514x.h>
  #include <linux/spi/spi.h>
@@ -109,12 +110,15 @@ static struct platform_device davinci_nand_device = {
         },
  };
  
+#define DM355_I2C_SDA_PIN      GPIO_TO_PIN(0, 15)
+#define DM355_I2C_SCL_PIN      GPIO_TO_PIN(0, 14)
+
  static struct gpiod_lookup_table i2c_recovery_gpiod_table = {
-       .dev_id = "i2c_davinci",
+       .dev_id = "i2c_davinci.1",
         .table = {
-               GPIO_LOOKUP("davinci_gpio", 15, "sda",
+               GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SDA_PIN, "sda",
                             GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
-               GPIO_LOOKUP("davinci_gpio", 14, "scl",
+               GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SCL_PIN, "scl",
                             GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
         },
  };
@@ -179,11 +183,16 @@ static struct resource dm355evm_dm9000_rsrc[] = {
         },
  };
  
+static struct dm9000_plat_data dm335evm_dm9000_platdata;
+
  static struct platform_device dm355evm_dm9000 = {
         .name           = "dm9000",
         .id             = -1,
         .resource       = dm355evm_dm9000_rsrc,
         .num_resources  = ARRAY_SIZE(dm355evm_dm9000_rsrc),
+       .dev            = {
+               .platform_data = &dm335evm_dm9000_platdata,
+       },
  };
  
  static struct tvp514x_platform_data tvp5146_pdata = {
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c

index 95b55aae1366f6ef9fd6f19277dce0ed30f626fc..509e64ab1994ac3b0c5d94e1144772fd93d99465 100644 (file)
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -17,6 +17,7 @@
  #include <linux/i2c.h>
  #include <linux/platform_data/pcf857x.h>
  #include <linux/platform_data/at24.h>
+#include <linux/platform_data/gpio-davinci.h>
  #include <linux/mtd/mtd.h>
  #include <linux/mtd/rawnand.h>
  #include <linux/mtd/partitions.h>
@@ -596,12 +597,15 @@ static struct i2c_board_info __initdata i2c_info[] =  {
         },
  };
  
+#define DM644X_I2C_SDA_PIN     GPIO_TO_PIN(2, 12)
+#define DM644X_I2C_SCL_PIN     GPIO_TO_PIN(2, 11)
+
  static struct gpiod_lookup_table i2c_recovery_gpiod_table = {
-       .dev_id = "i2c_davinci",
+       .dev_id = "i2c_davinci.1",
         .table = {
-               GPIO_LOOKUP("davinci_gpio", 44, "sda",
+               GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SDA_PIN, "sda",
                             GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
-               GPIO_LOOKUP("davinci_gpio", 43, "scl",
+               GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SCL_PIN, "scl",
                             GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
         },
  };
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c

index 2d37f5b0e1f5ce1ab8e85e0ad19bd25846154cab..a3c0d1e87647847605cfdaca2ac26f5c05aa8894 100644 (file)
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -532,11 +532,12 @@ static struct vpif_display_config dm646x_vpif_display_config = {
         .set_clock      = set_vpif_clock,
         .subdevinfo     = dm646x_vpif_subdev,
         .subdev_count   = ARRAY_SIZE(dm646x_vpif_subdev),
+       .i2c_adapter_id = 1,
         .chan_config[0] = {
                 .outputs = dm6467_ch0_outputs,
                 .output_count = ARRAY_SIZE(dm6467_ch0_outputs),
         },
-       .card_name      = "DM646x EVM",
+       .card_name      = "DM646x EVM Video Display",
  };
  
  /**
@@ -674,6 +675,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = {
         .setup_input_channel_mode = setup_vpif_input_channel_mode,
         .subdev_info = vpif_capture_sdev_info,
         .subdev_count = ARRAY_SIZE(vpif_capture_sdev_info),
+       .i2c_adapter_id = 1,
         .chan_config[0] = {
                 .inputs = dm6467_ch0_inputs,
                 .input_count = ARRAY_SIZE(dm6467_ch0_inputs),
@@ -694,6 +696,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = {
                         .fid_pol = 0,
                 },
         },
+       .card_name = "DM646x EVM Video Capture",
  };
  
  static void __init evm_init_video(void)
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c

index 0d32042b728fa447a77dc286b9310d10c6b7f96d..be8b892a6ea7061a8af9b932e007cc8652701b0f 100644 (file)
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -123,12 +123,16 @@ static const short hawk_mmcsd0_pins[] = {
         -1
  };
  
+#define DA850_HAWK_MMCSD_CD_PIN                GPIO_TO_PIN(3, 12)
+#define DA850_HAWK_MMCSD_WP_PIN                GPIO_TO_PIN(3, 13)
+
  static struct gpiod_lookup_table mmc_gpios_table = {
         .dev_id = "da830-mmc.0",
         .table = {
-               /* CD: gpio3_12: gpio60: chip 1 contains gpio range 32-63*/
-               GPIO_LOOKUP("davinci_gpio.0", 28, "cd", GPIO_ACTIVE_LOW),
-               GPIO_LOOKUP("davinci_gpio.0", 29, "wp", GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_CD_PIN, "cd",
+                           GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_WP_PIN, "wp",
+                           GPIO_ACTIVE_LOW),
         },
  };
  
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c

index 109ab1fa0d2c32b4f0840310281dd8ac1865ad34..c32ca27ab343d1258399648b5e9dfb9aa1e215e0 100644 (file)
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -488,7 +488,8 @@ static u8 dm646x_default_priorities[DAVINCI_N_AINTC_IRQ] = {
         [IRQ_DM646X_MCASP0TXINT]        = 7,
         [IRQ_DM646X_MCASP0RXINT]        = 7,
         [IRQ_DM646X_RESERVED_3]         = 7,
-       [IRQ_DM646X_MCASP1TXINT]        = 7,    /* clockevent */
+       [IRQ_DM646X_MCASP1TXINT]        = 7,
+       [IRQ_TINT0_TINT12]              = 7,    /* clockevent */
         [IRQ_TINT0_TINT34]              = 7,    /* clocksource */
         [IRQ_TINT1_TINT12]              = 7,    /* DSP timer */
         [IRQ_TINT1_TINT34]              = 7,    /* system tick */
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c

index e70feec6fad5e296b869d76e90d31bf5b71d1334..0581ffbedddd3761b832dc5b457bb3269ee34c17 100644 (file)
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -323,7 +323,7 @@ void __init ep93xx_register_eth(struct ep93xx_eth_data *data, int copy_addr)
  
  /* All EP93xx devices use the same two GPIO pins for I2C bit-banging */
  static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = {
-       .dev_id         = "i2c-gpio",
+       .dev_id         = "i2c-gpio.0",
         .table          = {
                 /* Use local offsets on gpiochip/port "G" */
                 GPIO_LOOKUP_IDX("G", 1, NULL, 0,
diff --git a/arch/arm/mach-ixp4xx/avila-setup.c b/arch/arm/mach-ixp4xx/avila-setup.c

index 77def6169f506a0134e2fd337b396b93747cccad..44cbbce6bda6a85da36b895ef3b212bb0e036f2b 100644 (file)
--- a/arch/arm/mach-ixp4xx/avila-setup.c
+++ b/arch/arm/mach-ixp4xx/avila-setup.c
@@ -51,7 +51,7 @@ static struct platform_device avila_flash = {
  };
  
  static struct gpiod_lookup_table avila_i2c_gpiod_table = {
-       .dev_id         = "i2c-gpio",
+       .dev_id         = "i2c-gpio.0",
         .table          = {
                 GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", AVILA_SDA_PIN,
                                 NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c

index 0f5c99941a7d5b14e39663cad535a61698328e68..397190f3a8da6cdbc87a3fac2296f793a497cf8f 100644 (file)
--- a/arch/arm/mach-ixp4xx/dsmg600-setup.c
+++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c
@@ -70,7 +70,7 @@ static struct platform_device dsmg600_flash = {
  };
  
  static struct gpiod_lookup_table dsmg600_i2c_gpiod_table = {
-       .dev_id         = "i2c-gpio",
+       .dev_id         = "i2c-gpio.0",
         .table          = {
                 GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", DSMG600_SDA_PIN,
                                 NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c

index 033f79b35d5125cbaf0b8cd5fb8e725f260a998f..f0a152e365b10cd63ad523667cf2c09f05f98522 100644 (file)
--- a/arch/arm/mach-ixp4xx/fsg-setup.c
+++ b/arch/arm/mach-ixp4xx/fsg-setup.c
@@ -56,7 +56,7 @@ static struct platform_device fsg_flash = {
  };
  
  static struct gpiod_lookup_table fsg_i2c_gpiod_table = {
-       .dev_id         = "i2c-gpio",
+       .dev_id         = "i2c-gpio.0",
         .table          = {
                 GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", FSG_SDA_PIN,
                                 NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c

index b168e2fbdbeb4f55647fccfb137c963a62dffa14..3ec829d52cdd2143a76113f9380cd54696b23395 100644 (file)
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -124,7 +124,7 @@ static struct platform_device ixdp425_flash_nand = {
  #endif /* CONFIG_MTD_NAND_PLATFORM */
  
  static struct gpiod_lookup_table ixdp425_i2c_gpiod_table = {
-       .dev_id         = "i2c-gpio",
+       .dev_id         = "i2c-gpio.0",
         .table          = {
                 GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", IXDP425_SDA_PIN,
                                 NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c

index 76dfff03cb714e575cee08e608a32e8d5143cca4..4138d6aa4c52e6f4ab61a31af908eecdceb1e0ce 100644 (file)
--- a/arch/arm/mach-ixp4xx/nas100d-setup.c
+++ b/arch/arm/mach-ixp4xx/nas100d-setup.c
@@ -102,7 +102,7 @@ static struct platform_device nas100d_leds = {
  };
  
  static struct gpiod_lookup_table nas100d_i2c_gpiod_table = {
-       .dev_id         = "i2c-gpio",
+       .dev_id         = "i2c-gpio.0",
         .table          = {
                 GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", NAS100D_SDA_PIN,
                                 NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c

index 91da63a7d7b5f9d6022d30dc23269e71979496a7..341b263482ef98a7045d4ea31a1a9ddc2ab890d6 100644 (file)
--- a/arch/arm/mach-ixp4xx/nslu2-setup.c
+++ b/arch/arm/mach-ixp4xx/nslu2-setup.c
@@ -70,7 +70,7 @@ static struct platform_device nslu2_flash = {
  };
  
  static struct gpiod_lookup_table nslu2_i2c_gpiod_table = {
-       .dev_id         = "i2c-gpio",
+       .dev_id         = "i2c-gpio.0",
         .table          = {
                 GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", NSLU2_SDA_PIN,
                                 NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c

index fe57e26926292fe3dccc1adaf7d6c5e8552fd055..abca83d22ff3f1d217d642ed31e6719354ae4a3f 100644 (file)
--- a/arch/arm/mach-keystone/pm_domain.c
+++ b/arch/arm/mach-keystone/pm_domain.c
@@ -29,6 +29,7 @@ static struct dev_pm_domain keystone_pm_domain = {
  
  static struct pm_clk_notifier_block platform_domain_notifier = {
         .pm_domain = &keystone_pm_domain,
+       .con_ids = { NULL },
  };
  
  static const struct of_device_id of_keystone_table[] = {
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c

index 793a24a53c5261c20d1e9a39955615bf863fa9df..d7ca9e2b40d274c096333c7488011ac7dcc746db 100644 (file)
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -58,22 +58,24 @@ static irqreturn_t deferred_fiq(int irq, void *dev_id)
                 irq_num = gpio_to_irq(gpio);
                 fiq_count = fiq_buffer[FIQ_CNT_INT_00 + gpio];
  
-               while (irq_counter[gpio] < fiq_count) {
-                       if (gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) {
-                               struct irq_data *d = irq_get_irq_data(irq_num);
-
-                               /*
-                                * It looks like handle_edge_irq() that
-                                * OMAP GPIO edge interrupts default to,
-                                * expects interrupt already unmasked.
-                                */
-                               if (irq_chip && irq_chip->irq_unmask)
+               if (irq_counter[gpio] < fiq_count &&
+                               gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) {
+                       struct irq_data *d = irq_get_irq_data(irq_num);
+
+                       /*
+                        * handle_simple_irq() that OMAP GPIO edge
+                        * interrupts default to since commit 80ac93c27441
+                        * requires interrupt already acked and unmasked.
+                        */
+                       if (irq_chip) {
+                               if (irq_chip->irq_ack)
+                                       irq_chip->irq_ack(d);
+                               if (irq_chip->irq_unmask)
                                         irq_chip->irq_unmask(d);
                         }
-                       generic_handle_irq(irq_num);
-
-                       irq_counter[gpio]++;
                 }
+               for (; irq_counter[gpio] < fiq_count; irq_counter[gpio]++)
+                       generic_handle_irq(irq_num);
         }
         return IRQ_HANDLED;
  }
diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c

index 76eb6ec5f157e9753cf7bc9773801a35d55ffd27..1e6a967cd2d5890342fb76bbe3b0c8c42ec6491d 100644 (file)
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -188,7 +188,7 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag)
                                        ((prev & OMAP_POWERSTATE_MASK) << 0));
                         trace_power_domain_target_rcuidle(pwrdm->name,
                                                           trace_state,
-                                                         smp_processor_id());
+                                                         raw_smp_processor_id());
                 }
                 break;
         default:
@@ -518,7 +518,7 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst)
         if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) {
                 /* Trace the pwrdm desired target state */
                 trace_power_domain_target_rcuidle(pwrdm->name, pwrst,
-                                                 smp_processor_id());
+                                                 raw_smp_processor_id());
                 /* Program the pwrdm desired target state */
                 ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst);
         }
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c

index 5877e547cecd84d40a8ed6a48f0f91a7c8c4ad0c..0adb1bd6208e27ab349c88ce2e912ad6a22109e2 100644 (file)
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -322,7 +322,7 @@ static struct soc_camera_link palmz72_iclink = {
  };
  
  static struct gpiod_lookup_table palmz72_i2c_gpiod_table = {
-       .dev_id         = "i2c-gpio",
+       .dev_id         = "i2c-gpio.0",
         .table          = {
                 GPIO_LOOKUP_IDX("gpio-pxa", 118, NULL, 0,
                                 GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c

index 90d0f277de55a6606fda445e66744085bd93ec16..207dcc2e94e70694376423a4a3c938d434368ff8 100644 (file)
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -460,7 +460,7 @@ static struct platform_device smc91x_device = {
  
  /* i2c */
  static struct gpiod_lookup_table viper_i2c_gpiod_table = {
-       .dev_id         = "i2c-gpio",
+       .dev_id         = "i2c-gpio.1",
         .table          = {
                 GPIO_LOOKUP_IDX("gpio-pxa", VIPER_RTC_I2C_SDA_GPIO,
                                 NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
@@ -789,7 +789,7 @@ static int __init viper_tpm_setup(char *str)
  __setup("tpm=", viper_tpm_setup);
  
  struct gpiod_lookup_table viper_tpm_i2c_gpiod_table = {
-       .dev_id = "i2c-gpio",
+       .dev_id = "i2c-gpio.2",
         .table = {
                 GPIO_LOOKUP_IDX("gpio-pxa", VIPER_TPM_I2C_SDA_GPIO,
                                 NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c

index ace010479eb6c6d538663acbde7fea4a83984c52..f45aed2519ba21979ce107b492059e26857bbae3 100644 (file)
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -327,7 +327,7 @@ static struct platform_device simpad_gpio_leds = {
   * i2c
   */
  static struct gpiod_lookup_table simpad_i2c_gpiod_table = {
-       .dev_id = "i2c-gpio",
+       .dev_id = "i2c-gpio.0",
         .table = {
                 GPIO_LOOKUP_IDX("gpio", 21, NULL, 0,
                                 GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c

index 8c398fedbbb6af30b461fb09cabd89b04cc09648..ada8eb206a90b6824427d24c5019100895d225f2 100644 (file)
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -466,12 +466,6 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
  void __init dma_contiguous_remap(void)
  {
         int i;
-
-       if (!dma_mmu_remap_num)
-               return;
-
-       /* call flush_cache_all() since CMA area would be large enough */
-       flush_cache_all();
         for (i = 0; i < dma_mmu_remap_num; i++) {
                 phys_addr_t start = dma_mmu_remap[i].base;
                 phys_addr_t end = start + dma_mmu_remap[i].size;
@@ -504,15 +498,7 @@ void __init dma_contiguous_remap(void)
                 flush_tlb_kernel_range(__phys_to_virt(start),
                                        __phys_to_virt(end));
  
-               /*
-                * All the memory in CMA region will be on ZONE_MOVABLE.
-                * If that zone is considered as highmem, the memory in CMA
-                * region is also considered as highmem even if it's
-                * physical address belong to lowmem. In this case,
-                * re-mapping isn't required.
-                */
-               if (!is_highmem_idx(ZONE_MOVABLE))
-                       iotable_init(&map, 1);
+               iotable_init(&map, 1);
         }
  }
  
diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c

index bcdecc25461bcaa51f6df405807935bb481de2c5..b2aa9b32bff2b5e9d2e6d102a4cd58f6cf8c5676 100644 (file)
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -165,13 +165,14 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
  {
         unsigned long flags;
         struct kprobe *p = &op->kp;
-       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+       struct kprobe_ctlblk *kcb;
  
         /* Save skipped registers */
         regs->ARM_pc = (unsigned long)op->kp.addr;
         regs->ARM_ORIG_r0 = ~0UL;
  
         local_irq_save(flags);
+       kcb = get_kprobe_ctlblk();
  
         if (kprobe_running()) {
                 kprobes_inc_nmissed_count(&op->kp);
@@ -191,6 +192,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
  
         local_irq_restore(flags);
  }
+NOKPROBE_SYMBOL(optimized_callback)
  
  int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig)
  {
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c

index 4c375e11ae9531bec8b5a05bd14ab8424ccd6653..af4ee2cef2f9650e699de3a335eaedc58dc9d37b 100644 (file)
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -257,7 +257,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
  
         if (exceptions == VFP_EXCEPTION_ERROR) {
                 vfp_panic("unhandled bounce", inst);
-               vfp_raise_sigfpe(FPE_FIXME, regs);
+               vfp_raise_sigfpe(FPE_FLTINV, regs);
                 return;
         }
  
diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi

index c0231d077fa61f83dd66a7d9cd5aeb7423c6f6ff..1ad8677f6a0a622c66f842ef16478091ee22ee73 100644 (file)
--- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi
@@ -1317,7 +1317,7 @@
                         reg = <0x14d60000 0x100>;
                         dmas = <&pdma0 31 &pdma0 30>;
                         dma-names = "tx", "rx";
-                       interrupts = <GIC_SPI 435 IRQ_TYPE_NONE>;
+                       interrupts = <GIC_SPI 435 IRQ_TYPE_LEVEL_HIGH>;
                         clocks = <&cmu_peric CLK_PCLK_I2S1>,
                                  <&cmu_peric CLK_PCLK_I2S1>,
                                  <&cmu_peric CLK_SCLK_I2S1>;
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts

index 724a0d3b76837a06e9aa9735228135609ac3c334..edb4ee0b8896b2c9a5572e1160e273eac42e062d 100644 (file)
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -299,7 +299,6 @@
                 /* GPIO blocks 16 thru 19 do not appear to be routed to pins */
  
                 dwmmc_0: dwmmc0@f723d000 {
-                       max-frequency = <150000000>;
                         cap-mmc-highspeed;
                         mmc-hs200-1_8v;
                         non-removable;
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi

index 48cad7919efa3809e5cbdc84895492304c56d84e..ed2f1237ea1e9a9c320e7b2da56145f07e147079 100644 (file)
--- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
@@ -38,9 +38,10 @@
                         compatible = "marvell,armada-7k-pp22";
                         reg = <0x0 0x100000>, <0x129000 0xb000>;
                         clocks = <&CP110_LABEL(clk) 1 3>, <&CP110_LABEL(clk) 1 9>,
-                                <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 18>;
+                                <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 6>,
+                                <&CP110_LABEL(clk) 1 18>;
                         clock-names = "pp_clk", "gop_clk",
-                                     "mg_clk", "axi_clk";
+                                     "mg_clk", "mg_core_clk", "axi_clk";
                         marvell,system-controller = <&CP110_LABEL(syscon0)>;
                         status = "disabled";
                         dma-coherent;
@@ -141,6 +142,8 @@
                         #size-cells = <0>;
                         compatible = "marvell,xmdio";
                         reg = <0x12a600 0x10>;
+                       clocks = <&CP110_LABEL(clk) 1 5>,
+                                <&CP110_LABEL(clk) 1 6>, <&CP110_LABEL(clk) 1 18>;
                         status = "disabled";
                 };
  
diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi

index a8baad7b80df2d4c9bd39c38e39d3435ded8d0dc..13f57fff147742c22f889412e866dfc64a40ecd5 100644 (file)
--- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
@@ -46,7 +46,7 @@
                                 compatible = "ethernet-phy-ieee802.3-c22";
                                 reg = <0x0>;
                                 interrupt-parent = <&gpio>;
-                               interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_HIGH>;
+                               interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_LOW>;
                         };
                 };
         };
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi

index e62bda1cf2d9db9b7058a2ab48529508417b92e4..c32dd3419c870ef080e58780f7ac46e93c136455 100644 (file)
--- a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
@@ -414,7 +414,7 @@
                         mmc-ddr-1_8v;
                         mmc-hs200-1_8v;
                         mmc-pwrseq = <&emmc_pwrseq>;
-                       cdns,phy-input-delay-legacy = <4>;
+                       cdns,phy-input-delay-legacy = <9>;
                         cdns,phy-input-delay-mmc-highspeed = <2>;
                         cdns,phy-input-delay-mmc-ddr = <3>;
                         cdns,phy-dll-delay-sdclk = <21>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts

index 2c1a92fafbfbe053808b00e4b8b66804f6744e4d..440c2e6a638b998c163b3f8aea91f94e115f516b 100644 (file)
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
@@ -67,3 +67,11 @@
                 reg = <0>;
         };
  };
+
+&pinctrl_ether_rgmii {
+       tx {
+               pins = "RGMII_TXCLK", "RGMII_TXD0", "RGMII_TXD1",
+                      "RGMII_TXD2", "RGMII_TXD3", "RGMII_TXCTL";
+               drive-strength = <9>;
+       };
+};
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi

index 9efe20d075890ee24638bfa125040e17ac8034cd..3a5ed789c056e37bd8dc07e9aa21f8d7e44ea4b8 100644 (file)
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
@@ -519,7 +519,7 @@
                         mmc-ddr-1_8v;
                         mmc-hs200-1_8v;
                         mmc-pwrseq = <&emmc_pwrseq>;
-                       cdns,phy-input-delay-legacy = <4>;
+                       cdns,phy-input-delay-legacy = <9>;
                         cdns,phy-input-delay-mmc-highspeed = <2>;
                         cdns,phy-input-delay-mmc-ddr = <3>;
                         cdns,phy-dll-delay-sdclk = <21>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi

index 7c8f710d9bfa11fb3f9a3432c6e16cb4e18ef06c..e85d6ddea3c2171bec09a6c2a9bf3bfe817c2478 100644 (file)
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
@@ -334,7 +334,7 @@
                         mmc-ddr-1_8v;
                         mmc-hs200-1_8v;
                         mmc-pwrseq = <&emmc_pwrseq>;
-                       cdns,phy-input-delay-legacy = <4>;
+                       cdns,phy-input-delay-legacy = <9>;
                         cdns,phy-input-delay-mmc-highspeed = <2>;
                         cdns,phy-input-delay-mmc-ddr = <3>;
                         cdns,phy-dll-delay-sdclk = <21>;
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h

index 9ef0797380cbbdf182a86e934c2eec5aa97d889d..f9b0b09153e0eaa3b15728fd42471c77c2d1955a 100644 (file)
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v)
         /* LSE atomics */
         "       mvn     %w[i], %w[i]\n"
         "       stclr   %w[i], %[v]")
-       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : [i] "+&r" (w0), [v] "+Q" (v->counter)
         : "r" (x1)
         : __LL_SC_CLOBBERS);
  }
@@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v)                \
         /* LSE atomics */                                               \
         "       mvn     %w[i], %w[i]\n"                                 \
         "       ldclr" #mb "    %w[i], %w[i], %[v]")                    \
-       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
+       : [i] "+&r" (w0), [v] "+Q" (v->counter)                         \
         : "r" (x1)                                                      \
         : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                         \
@@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v)
         /* LSE atomics */
         "       neg     %w[i], %w[i]\n"
         "       stadd   %w[i], %[v]")
-       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : [i] "+&r" (w0), [v] "+Q" (v->counter)
         : "r" (x1)
         : __LL_SC_CLOBBERS);
  }
@@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v)               \
         "       neg     %w[i], %w[i]\n"                                 \
         "       ldadd" #mb "    %w[i], w30, %[v]\n"                     \
         "       add     %w[i], %w[i], w30")                             \
-       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
+       : [i] "+&r" (w0), [v] "+Q" (v->counter)                         \
         : "r" (x1)                                                      \
         : __LL_SC_CLOBBERS , ##cl);                                     \
                                                                         \
@@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v)                \
         /* LSE atomics */                                               \
         "       neg     %w[i], %w[i]\n"                                 \
         "       ldadd" #mb "    %w[i], %w[i], %[v]")                    \
-       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
+       : [i] "+&r" (w0), [v] "+Q" (v->counter)                         \
         : "r" (x1)                                                      \
         : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                         \
@@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
         /* LSE atomics */
         "       mvn     %[i], %[i]\n"
         "       stclr   %[i], %[v]")
-       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : [i] "+&r" (x0), [v] "+Q" (v->counter)
         : "r" (x1)
         : __LL_SC_CLOBBERS);
  }
@@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v)  \
         /* LSE atomics */                                               \
         "       mvn     %[i], %[i]\n"                                   \
         "       ldclr" #mb "    %[i], %[i], %[v]")                      \
-       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
+       : [i] "+&r" (x0), [v] "+Q" (v->counter)                         \
         : "r" (x1)                                                      \
         : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                         \
@@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
         /* LSE atomics */
         "       neg     %[i], %[i]\n"
         "       stadd   %[i], %[v]")
-       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : [i] "+&r" (x0), [v] "+Q" (v->counter)
         : "r" (x1)
         : __LL_SC_CLOBBERS);
  }
@@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
         "       neg     %[i], %[i]\n"                                   \
         "       ldadd" #mb "    %[i], x30, %[v]\n"                      \
         "       add     %[i], %[i], x30")                               \
-       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
+       : [i] "+&r" (x0), [v] "+Q" (v->counter)                         \
         : "r" (x1)                                                      \
         : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                         \
@@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)  \
         /* LSE atomics */                                               \
         "       neg     %[i], %[i]\n"                                   \
         "       ldadd" #mb "    %[i], %[i], %[v]")                      \
-       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
+       : [i] "+&r" (x0), [v] "+Q" (v->counter)                         \
         : "r" (x1)                                                      \
         : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                         \
@@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
         "       sub     x30, x30, %[ret]\n"
         "       cbnz    x30, 1b\n"
         "2:")
-       : [ret] "+r" (x0), [v] "+Q" (v->counter)
+       : [ret] "+&r" (x0), [v] "+Q" (v->counter)
         :
         : __LL_SC_CLOBBERS, "cc", "memory");
  
@@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1,               \
         "       eor     %[old1], %[old1], %[oldval1]\n"                 \
         "       eor     %[old2], %[old2], %[oldval2]\n"                 \
         "       orr     %[old1], %[old1], %[old2]")                     \
-       : [old1] "+r" (x0), [old2] "+r" (x1),                           \
+       : [old1] "+&r" (x0), [old2] "+&r" (x1),                         \
           [v] "+Q" (*(unsigned long *)ptr)                              \
         : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),             \
           [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)              \
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h

index 082110993647b9b7e30cf18da5d9d447c58d362f..6128992c2ded6b7e1aa4ba835474f6f45f6fdc5e 100644 (file)
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -360,6 +360,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
         return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
  }
  
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+                                     gpa_t gpa, void *data, unsigned long len)
+{
+       int srcu_idx = srcu_read_lock(&kvm->srcu);
+       int ret = kvm_read_guest(kvm, gpa, data, len);
+
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+       return ret;
+}
+
  #ifdef CONFIG_KVM_INDIRECT_VECTORS
  /*
   * EL2 vectors can be mapped and rerouted in a number of ways,
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c

index 66be504edb6cf5be422afa59d82aa2db4fd3ed7f..d894a20b70b28f709f776d0330edb598283aecad 100644 (file)
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -75,3 +75,11 @@ NOKPROBE_SYMBOL(_mcount);
         /* arm-smccc */
  EXPORT_SYMBOL(__arm_smccc_smc);
  EXPORT_SYMBOL(__arm_smccc_hvc);
+
+       /* tishift.S */
+extern long long __ashlti3(long long a, int b);
+EXPORT_SYMBOL(__ashlti3);
+extern long long __ashrti3(long long a, int b);
+EXPORT_SYMBOL(__ashrti3);
+extern long long __lshrti3(long long a, int b);
+EXPORT_SYMBOL(__lshrti3);
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S

index d3db9b2cd479bebb64e0dd0b3c18edfef3d552b0..0fdff97794debbdfaae4a146a99b4550e670ab5e 100644 (file)
--- a/arch/arm64/lib/tishift.S
+++ b/arch/arm64/lib/tishift.S
@@ -1,17 +1,6 @@
-/*
- * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2017-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
   */
  
  #include <linux/linkage.h>
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c

index 4165485e8b6ecbc60f161d98c20139992877c416..2af3dd89bcdbed669238b10defa7fc7deb1e2640 100644 (file)
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -293,6 +293,57 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
  static void __do_user_fault(struct siginfo *info, unsigned int esr)
  {
         current->thread.fault_address = (unsigned long)info->si_addr;
+
+       /*
+        * If the faulting address is in the kernel, we must sanitize the ESR.
+        * From userspace's point of view, kernel-only mappings don't exist
+        * at all, so we report them as level 0 translation faults.
+        * (This is not quite the way that "no mapping there at all" behaves:
+        * an alignment fault not caused by the memory type would take
+        * precedence over translation fault for a real access to empty
+        * space. Unfortunately we can't easily distinguish "alignment fault
+        * not caused by memory type" from "alignment fault caused by memory
+        * type", so we ignore this wrinkle and just return the translation
+        * fault.)
+        */
+       if (current->thread.fault_address >= TASK_SIZE) {
+               switch (ESR_ELx_EC(esr)) {
+               case ESR_ELx_EC_DABT_LOW:
+                       /*
+                        * These bits provide only information about the
+                        * faulting instruction, which userspace knows already.
+                        * We explicitly clear bits which are architecturally
+                        * RES0 in case they are given meanings in future.
+                        * We always report the ESR as if the fault was taken
+                        * to EL1 and so ISV and the bits in ISS[23:14] are
+                        * clear. (In fact it always will be a fault to EL1.)
+                        */
+                       esr &= ESR_ELx_EC_MASK | ESR_ELx_IL |
+                               ESR_ELx_CM | ESR_ELx_WNR;
+                       esr |= ESR_ELx_FSC_FAULT;
+                       break;
+               case ESR_ELx_EC_IABT_LOW:
+                       /*
+                        * Claim a level 0 translation fault.
+                        * All other bits are architecturally RES0 for faults
+                        * reported with that DFSC value, so we clear them.
+                        */
+                       esr &= ESR_ELx_EC_MASK | ESR_ELx_IL;
+                       esr |= ESR_ELx_FSC_FAULT;
+                       break;
+               default:
+                       /*
+                        * This should never happen (entry.S only brings us
+                        * into this code for insn and data aborts from a lower
+                        * exception level). Fail safe by not providing an ESR
+                        * context record at all.
+                        */
+                       WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr);
+                       esr = 0;
+                       break;
+               }
+       }
+
         current->thread.fault_code = esr;
         arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current);
  }
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c

index 2dbb2c9f1ec1770e7f9f5aca7176eac2cc153d32..493ff75670ffd98a1dc344a133f0f31a634f93ff 100644 (file)
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -933,13 +933,15 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
  {
         pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
                                         pgprot_val(mk_sect_prot(prot)));
+       pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
  
-       /* ioremap_page_range doesn't honour BBM */
-       if (pud_present(READ_ONCE(*pudp)))
+       /* Only allow permission changes for now */
+       if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
+                                  pud_val(new_pud)))
                 return 0;
  
         BUG_ON(phys & ~PUD_MASK);
-       set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
+       set_pud(pudp, new_pud);
         return 1;
  }
  
@@ -947,13 +949,15 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
  {
         pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
                                         pgprot_val(mk_sect_prot(prot)));
+       pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
  
-       /* ioremap_page_range doesn't honour BBM */
-       if (pmd_present(READ_ONCE(*pmdp)))
+       /* Only allow permission changes for now */
+       if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
+                                  pmd_val(new_pmd)))
                 return 0;
  
         BUG_ON(phys & ~PMD_MASK);
-       set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+       set_pmd(pmdp, new_pmd);
         return 1;
  }
  
diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c

index b3043c08f7694244604af5a09065800e7a97f587..aee8d7b8f09143fd8e4ce30a9552bf827ec357e4 100644 (file)
--- a/arch/mips/boot/compressed/uart-16550.c
+++ b/arch/mips/boot/compressed/uart-16550.c
@@ -18,9 +18,9 @@
  #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset))
  #endif
  
-#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780)
-#include <asm/mach-jz4740/base.h>
-#define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset))
+#ifdef CONFIG_MACH_INGENIC
+#define INGENIC_UART0_BASE_ADDR        0x10030000
+#define PORT(offset) (CKSEG1ADDR(INGENIC_UART0_BASE_ADDR) + (4 * offset))
  #endif
  
  #ifdef CONFIG_CPU_XLR
diff --git a/arch/mips/boot/dts/xilfpga/Makefile b/arch/mips/boot/dts/xilfpga/Makefile

index 9987e0e378c50c6f19eb0457eae914f827f9688b..69ca00590b8de6cbf1b3fb3397e8e4116c690ebf 100644 (file)
--- a/arch/mips/boot/dts/xilfpga/Makefile
+++ b/arch/mips/boot/dts/xilfpga/Makefile
@@ -1,4 +1,2 @@
  # SPDX-License-Identifier: GPL-2.0
  dtb-$(CONFIG_FIT_IMAGE_FDT_XILFPGA)    += nexys4ddr.dtb
-
-obj-y                          += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform

index b51432dd10b6fa3789ff01b1d0b902ac200fe89d..0dd0d5d460a5fc7988b03f856f47563589d7e995 100644 (file)
--- a/arch/mips/generic/Platform
+++ b/arch/mips/generic/Platform
@@ -16,3 +16,4 @@ all-$(CONFIG_MIPS_GENERIC)    := vmlinux.gz.itb
  its-y                                  := vmlinux.its.S
  its-$(CONFIG_FIT_IMAGE_FDT_BOSTON)     += board-boston.its.S
  its-$(CONFIG_FIT_IMAGE_FDT_NI169445)   += board-ni169445.its.S
+its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA)    += board-xilfpga.its.S
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c

index 0b23b1ad99e65f1e21d1810340f9dd306483b8d3..8d098b9f395c13746a4f0855f54a6303bfc21098 100644 (file)
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -463,7 +463,7 @@ static int fpr_get_msa(struct task_struct *target,
  /*
   * Copy the floating-point context to the supplied NT_PRFPREG buffer.
   * Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR and FIR registers separately.
   */
  static int fpr_get(struct task_struct *target,
                    const struct user_regset *regset,
@@ -471,6 +471,7 @@ static int fpr_get(struct task_struct *target,
                    void *kbuf, void __user *ubuf)
  {
         const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+       const int fir_pos = fcr31_pos + sizeof(u32);
         int err;
  
         if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
@@ -483,6 +484,12 @@ static int fpr_get(struct task_struct *target,
         err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
                                   &target->thread.fpu.fcr31,
                                   fcr31_pos, fcr31_pos + sizeof(u32));
+       if (err)
+               return err;
+
+       err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &boot_cpu_data.fpu_id,
+                                 fir_pos, fir_pos + sizeof(u32));
  
         return err;
  }
@@ -531,7 +538,8 @@ static int fpr_set_msa(struct task_struct *target,
  /*
   * Copy the supplied NT_PRFPREG buffer to the floating-point context.
   * Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR register separately.  Ignore the incoming FIR register
+ * contents though, as the register is read-only.
   *
   * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
   * which is supposed to have been guaranteed by the kernel before
@@ -545,6 +553,7 @@ static int fpr_set(struct task_struct *target,
                    const void *kbuf, const void __user *ubuf)
  {
         const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+       const int fir_pos = fcr31_pos + sizeof(u32);
         u32 fcr31;
         int err;
  
@@ -572,6 +581,11 @@ static int fpr_set(struct task_struct *target,
                 ptrace_setfcr31(target, fcr31);
         }
  
+       if (count > 0)
+               err = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                               fir_pos,
+                                               fir_pos + sizeof(u32));
+
         return err;
  }
  
@@ -793,7 +807,7 @@ long arch_ptrace(struct task_struct *child, long request,
                         fregs = get_fpu_regs(child);
  
  #ifdef CONFIG_32BIT
-                       if (test_thread_flag(TIF_32BIT_FPREGS)) {
+                       if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
                                 /*
                                  * The odd registers are actually the high
                                  * order bits of the values stored in the even
@@ -888,7 +902,7 @@ long arch_ptrace(struct task_struct *child, long request,
  
                         init_fp_ctx(child);
  #ifdef CONFIG_32BIT
-                       if (test_thread_flag(TIF_32BIT_FPREGS)) {
+                       if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
                                 /*
                                  * The odd registers are actually the high
                                  * order bits of the values stored in the even
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c

index 2b9260f92ccd3019fe3d733c96a631faa7f59e2b..656a137c1fe2c4dcaa9fb410d5787c9e9b39ce1f 100644 (file)
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -99,7 +99,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                                 break;
                         }
                         fregs = get_fpu_regs(child);
-                       if (test_thread_flag(TIF_32BIT_FPREGS)) {
+                       if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
                                 /*
                                  * The odd registers are actually the high
                                  * order bits of the values stored in the even
@@ -212,7 +212,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                                        sizeof(child->thread.fpu));
                                 child->thread.fpu.fcr31 = 0;
                         }
-                       if (test_thread_flag(TIF_32BIT_FPREGS)) {
+                       if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
                                 /*
                                  * The odd registers are actually the high
                                  * order bits of the values stored in the even
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c

index 2549fdd27ee16842c1ce7dd2bd422f27a2d3a769..0f725e9cee8f69230ca7ddff5f6023c30294395c 100644 (file)
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -45,7 +45,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
         { "cache",        VCPU_STAT(cache_exits),        KVM_STAT_VCPU },
         { "signal",       VCPU_STAT(signal_exits),       KVM_STAT_VCPU },
         { "interrupt",    VCPU_STAT(int_exits),          KVM_STAT_VCPU },
-       { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
+       { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
         { "tlbmod",       VCPU_STAT(tlbmod_exits),       KVM_STAT_VCPU },
         { "tlbmiss_ld",   VCPU_STAT(tlbmiss_ld_exits),   KVM_STAT_VCPU },
         { "tlbmiss_st",   VCPU_STAT(tlbmiss_st_exits),   KVM_STAT_VCPU },
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c

index 6f534b2099717da8c2d7be70bfa035a05ed5aede..e12dfa48b478dd3ec51369236bb84040c044bd82 100644 (file)
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -851,9 +851,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
         /*
          * Either no secondary cache or the available caches don't have the
          * subset property so we have to flush the primary caches
-        * explicitly
+        * explicitly.
+        * If we would need IPI to perform an INDEX-type operation, then
+        * we have to use the HIT-type alternative as IPI cannot be used
+        * here due to interrupts possibly being disabled.
          */
-       if (size >= dcache_size) {
+       if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
                 r4k_blast_dcache();
         } else {
                 R4600_HIT_CACHEOP_WAR_IMPL;
@@ -890,7 +893,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
                 return;
         }
  
-       if (size >= dcache_size) {
+       if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
                 r4k_blast_dcache();
         } else {
                 R4600_HIT_CACHEOP_WAR_IMPL;
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c

index ee5a78a151a6acac9b4520892fac57ba4c6e5f71..e0e1c9775c320b46d85da0f2e6ce22bc2275b9fb 100644 (file)
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -268,7 +268,7 @@ static struct parisc_device *find_device_by_addr(unsigned long hpa)
   * Walks up the device tree looking for a device of the specified type.
   * If it finds it, it returns it.  If not, it returns NULL.
   */
-const struct parisc_device * __init
+const struct parisc_device *
  find_pa_parent_type(const struct parisc_device *padev, int type)
  {
         const struct device *dev = &padev->dev;
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c

index 4065b5e48c9d68e70b38da3743e219e02934fe5b..5e26dbede5fc23d37f734e1511bc405207d37266 100644 (file)
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -423,8 +423,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
  }
  
  #ifdef CONFIG_PROC_FS
-int __init
-setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
  {
         return -EINVAL;
  }
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h

index 471b2274fbeba815f04c1957d975f0f1a74bcdbe..c40b4380951cb45518656a0e1030280d9253d852 100644 (file)
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -74,6 +74,27 @@
   */
  #define EX_R3          EX_DAR
  
+#define STF_ENTRY_BARRIER_SLOT                                         \
+       STF_ENTRY_BARRIER_FIXUP_SECTION;                                \
+       nop;                                                            \
+       nop;                                                            \
+       nop
+
+#define STF_EXIT_BARRIER_SLOT                                          \
+       STF_EXIT_BARRIER_FIXUP_SECTION;                                 \
+       nop;                                                            \
+       nop;                                                            \
+       nop;                                                            \
+       nop;                                                            \
+       nop;                                                            \
+       nop
+
+/*
+ * r10 must be free to use, r13 must be paca
+ */
+#define INTERRUPT_TO_KERNEL                                            \
+       STF_ENTRY_BARRIER_SLOT
+
  /*
   * Macros for annotating the expected destination of (h)rfid
   *
@@ -90,16 +111,19 @@
         rfid
  
  #define RFI_TO_USER                                                    \
+       STF_EXIT_BARRIER_SLOT;                                          \
         RFI_FLUSH_SLOT;                                                 \
         rfid;                                                           \
         b       rfi_flush_fallback
  
  #define RFI_TO_USER_OR_KERNEL                                          \
+       STF_EXIT_BARRIER_SLOT;                                          \
         RFI_FLUSH_SLOT;                                                 \
         rfid;                                                           \
         b       rfi_flush_fallback
  
  #define RFI_TO_GUEST                                                   \
+       STF_EXIT_BARRIER_SLOT;                                          \
         RFI_FLUSH_SLOT;                                                 \
         rfid;                                                           \
         b       rfi_flush_fallback
@@ -108,21 +132,25 @@
         hrfid
  
  #define HRFI_TO_USER                                                   \
+       STF_EXIT_BARRIER_SLOT;                                          \
         RFI_FLUSH_SLOT;                                                 \
         hrfid;                                                          \
         b       hrfi_flush_fallback
  
  #define HRFI_TO_USER_OR_KERNEL                                         \
+       STF_EXIT_BARRIER_SLOT;                                          \
         RFI_FLUSH_SLOT;                                                 \
         hrfid;                                                          \
         b       hrfi_flush_fallback
  
  #define HRFI_TO_GUEST                                                  \
+       STF_EXIT_BARRIER_SLOT;                                          \
         RFI_FLUSH_SLOT;                                                 \
         hrfid;                                                          \
         b       hrfi_flush_fallback
  
  #define HRFI_TO_UNKNOWN                                                        \
+       STF_EXIT_BARRIER_SLOT;                                          \
         RFI_FLUSH_SLOT;                                                 \
         hrfid;                                                          \
         b       hrfi_flush_fallback
@@ -254,6 +282,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
  #define __EXCEPTION_PROLOG_1_PRE(area)                                 \
         OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR);         \
         OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);          \
+       INTERRUPT_TO_KERNEL;                                            \
         SAVE_CTR(r10, area);                                            \
         mfcr    r9;
  
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h

index 1e82eb3caabd19c69289957da188b563d0bcd0d6..a9b64df34e2a365a6916c89786d3398f4311413b 100644 (file)
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,6 +187,22 @@ label##3:                                          \
         FTR_ENTRY_OFFSET label##1b-label##3b;           \
         .popsection;
  
+#define STF_ENTRY_BARRIER_FIXUP_SECTION                        \
+953:                                                   \
+       .pushsection __stf_entry_barrier_fixup,"a";     \
+       .align 2;                                       \
+954:                                                   \
+       FTR_ENTRY_OFFSET 953b-954b;                     \
+       .popsection;
+
+#define STF_EXIT_BARRIER_FIXUP_SECTION                 \
+955:                                                   \
+       .pushsection __stf_exit_barrier_fixup,"a";      \
+       .align 2;                                       \
+956:                                                   \
+       FTR_ENTRY_OFFSET 955b-956b;                     \
+       .popsection;
+
  #define RFI_FLUSH_FIXUP_SECTION                                \
  951:                                                   \
         .pushsection __rfi_flush_fixup,"a";             \
@@ -199,6 +215,9 @@ label##3:                                           \
  #ifndef __ASSEMBLY__
  #include <linux/types.h>
  
+extern long stf_barrier_fallback;
+extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
+extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
  extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
  
  void apply_feature_fixups(void);
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h

index 4c02a7378d067e6dd5afc12b7336f90353879abc..e7377b73cfecaa2874fe240ee861e472cfa9309d 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -96,6 +96,7 @@ struct kvmppc_vcore {
         struct kvm_vcpu *runner;
         struct kvm *kvm;
         u64 tb_offset;          /* guest timebase - host timebase */
+       u64 tb_offset_applied;  /* timebase offset currently in force */
         ulong lpcr;
         u32 arch_compat;
         ulong pcr;
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h

index fa4d2e1cf772c883ec4bf77822c660bfe167711a..44989b22383c24b92caaf3dbb3d9831c79cd967f 100644 (file)
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -12,6 +12,17 @@
  extern unsigned long powerpc_security_features;
  extern bool rfi_flush;
  
+/* These are bit flags */
+enum stf_barrier_type {
+       STF_BARRIER_NONE        = 0x1,
+       STF_BARRIER_FALLBACK    = 0x2,
+       STF_BARRIER_EIEIO       = 0x4,
+       STF_BARRIER_SYNC_ORI    = 0x8,
+};
+
+void setup_stf_barrier(void);
+void do_stf_barrier_fixups(enum stf_barrier_type types);
+
  static inline void security_ftr_set(unsigned long feature)
  {
         powerpc_security_features |= feature;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c

index 6bee65f3cfd34bf896ef770f805ec62d417ae9db..373dc1d6ef44e99854200208466f741489944423 100644 (file)
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -562,6 +562,7 @@ int main(void)
         OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
         OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
         OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
+       OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied);
         OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
         OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
         OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S

index 3f30c994e9316a1476086334de4fa84edba9de0e..458b928dbd8447008a7f6c83ff9db27d06d7508d 100644 (file)
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -28,6 +28,7 @@ _GLOBAL(__setup_cpu_power7)
         beqlr
         li      r0,0
         mtspr   SPRN_LPID,r0
+       mtspr   SPRN_PCR,r0
         mfspr   r3,SPRN_LPCR
         li      r4,(LPCR_LPES1 >> LPCR_LPES_SH)
         bl      __init_LPCR_ISA206
@@ -41,6 +42,7 @@ _GLOBAL(__restore_cpu_power7)
         beqlr
         li      r0,0
         mtspr   SPRN_LPID,r0
+       mtspr   SPRN_PCR,r0
         mfspr   r3,SPRN_LPCR
         li      r4,(LPCR_LPES1 >> LPCR_LPES_SH)
         bl      __init_LPCR_ISA206
@@ -57,6 +59,7 @@ _GLOBAL(__setup_cpu_power8)
         beqlr
         li      r0,0
         mtspr   SPRN_LPID,r0
+       mtspr   SPRN_PCR,r0
         mfspr   r3,SPRN_LPCR
         ori     r3, r3, LPCR_PECEDH
         li      r4,0 /* LPES = 0 */
@@ -78,6 +81,7 @@ _GLOBAL(__restore_cpu_power8)
         beqlr
         li      r0,0
         mtspr   SPRN_LPID,r0
+       mtspr   SPRN_PCR,r0
         mfspr   r3,SPRN_LPCR
         ori     r3, r3, LPCR_PECEDH
         li      r4,0 /* LPES = 0 */
@@ -99,6 +103,7 @@ _GLOBAL(__setup_cpu_power9)
         mtspr   SPRN_PSSCR,r0
         mtspr   SPRN_LPID,r0
         mtspr   SPRN_PID,r0
+       mtspr   SPRN_PCR,r0
         mfspr   r3,SPRN_LPCR
         LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE  | LPCR_HEIC)
         or      r3, r3, r4
@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
         mtspr   SPRN_PSSCR,r0
         mtspr   SPRN_LPID,r0
         mtspr   SPRN_PID,r0
+       mtspr   SPRN_PCR,r0
         mfspr   r3,SPRN_LPCR
         LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
         or      r3, r3, r4
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c

index 8ab51f6ca03af52c4d9cff18e552b456022e459e..c904477abaf38d33c63ee5d0822714f1d36f9b85 100644 (file)
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -101,6 +101,7 @@ static void __restore_cpu_cpufeatures(void)
         if (hv_mode) {
                 mtspr(SPRN_LPID, 0);
                 mtspr(SPRN_HFSCR, system_registers.hfscr);
+               mtspr(SPRN_PCR, 0);
         }
         mtspr(SPRN_FSCR, system_registers.fscr);
  
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S

index ae6a849db60b1ae8440abcc776b8a5b59e57a641..f283958129f27165b1f1c72219479652941268a1 100644 (file)
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -885,7 +885,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
  #endif
  
  
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
+EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
  EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
  TRAMP_KVM(PACA_EXGEN, 0x900)
  EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -961,6 +961,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
         mtctr   r13;                                                    \
         GET_PACA(r13);                                                  \
         std     r10,PACA_EXGEN+EX_R10(r13);                             \
+       INTERRUPT_TO_KERNEL;                                            \
         KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
         HMT_MEDIUM;                                                     \
         mfctr   r9;
@@ -969,7 +970,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
  #define SYSCALL_KVMTEST                                                        \
         HMT_MEDIUM;                                                     \
         mr      r9,r13;                                                 \
-       GET_PACA(r13);
+       GET_PACA(r13);                                                  \
+       INTERRUPT_TO_KERNEL;
  #endif
         
  #define LOAD_SYSCALL_HANDLER(reg)                                      \
@@ -1507,6 +1509,19 @@ masked_##_H##interrupt:                                  \
         b       .;                                      \
         MASKED_DEC_HANDLER(_H)
  
+TRAMP_REAL_BEGIN(stf_barrier_fallback)
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       sync
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ori     31,31,0
+       .rept 14
+       b       1f
+1:
+       .endr
+       blr
+
  TRAMP_REAL_BEGIN(rfi_flush_fallback)
         SET_SCRATCH0(r13);
         GET_PACA(r13);
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c

index bab5a27ea8056c8317340716d33ff084e08d3b2b..b98a722da9151bd41351de9448b21bfbc417cd6b 100644 (file)
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -8,6 +8,7 @@
  #include <linux/device.h>
  #include <linux/seq_buf.h>
  
+#include <asm/debugfs.h>
  #include <asm/security_features.h>
  
  
@@ -86,3 +87,151 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
  
         return s.len;
  }
+
+/*
+ * Store-forwarding barrier support.
+ */
+
+static enum stf_barrier_type stf_enabled_flush_types;
+static bool no_stf_barrier;
+bool stf_barrier;
+
+static int __init handle_no_stf_barrier(char *p)
+{
+       pr_info("stf-barrier: disabled on command line.");
+       no_stf_barrier = true;
+       return 0;
+}
+
+early_param("no_stf_barrier", handle_no_stf_barrier);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_ssbd(char *p)
+{
+       if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) {
+               /* Until firmware tells us, we have the barrier with auto */
+               return 0;
+       } else if (strncmp(p, "off", 3) == 0) {
+               handle_no_stf_barrier(NULL);
+               return 0;
+       } else
+               return 1;
+
+       return 0;
+}
+early_param("spec_store_bypass_disable", handle_ssbd);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_no_ssbd(char *p)
+{
+       handle_no_stf_barrier(NULL);
+       return 0;
+}
+early_param("nospec_store_bypass_disable", handle_no_ssbd);
+
+static void stf_barrier_enable(bool enable)
+{
+       if (enable)
+               do_stf_barrier_fixups(stf_enabled_flush_types);
+       else
+               do_stf_barrier_fixups(STF_BARRIER_NONE);
+
+       stf_barrier = enable;
+}
+
+void setup_stf_barrier(void)
+{
+       enum stf_barrier_type type;
+       bool enable, hv;
+
+       hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+       /* Default to fallback in case fw-features are not available */
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               type = STF_BARRIER_EIEIO;
+       else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+               type = STF_BARRIER_SYNC_ORI;
+       else if (cpu_has_feature(CPU_FTR_ARCH_206))
+               type = STF_BARRIER_FALLBACK;
+       else
+               type = STF_BARRIER_NONE;
+
+       enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+               (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
+                (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
+
+       if (type == STF_BARRIER_FALLBACK) {
+               pr_info("stf-barrier: fallback barrier available\n");
+       } else if (type == STF_BARRIER_SYNC_ORI) {
+               pr_info("stf-barrier: hwsync barrier available\n");
+       } else if (type == STF_BARRIER_EIEIO) {
+               pr_info("stf-barrier: eieio barrier available\n");
+       }
+
+       stf_enabled_flush_types = type;
+
+       if (!no_stf_barrier)
+               stf_barrier_enable(enable);
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) {
+               const char *type;
+               switch (stf_enabled_flush_types) {
+               case STF_BARRIER_EIEIO:
+                       type = "eieio";
+                       break;
+               case STF_BARRIER_SYNC_ORI:
+                       type = "hwsync";
+                       break;
+               case STF_BARRIER_FALLBACK:
+                       type = "fallback";
+                       break;
+               default:
+                       type = "unknown";
+               }
+               return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type);
+       }
+
+       if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+           !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+               return sprintf(buf, "Not affected\n");
+
+       return sprintf(buf, "Vulnerable\n");
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int stf_barrier_set(void *data, u64 val)
+{
+       bool enable;
+
+       if (val == 1)
+               enable = true;
+       else if (val == 0)
+               enable = false;
+       else
+               return -EINVAL;
+
+       /* Only do anything if we're changing state */
+       if (enable != stf_barrier)
+               stf_barrier_enable(enable);
+
+       return 0;
+}
+
+static int stf_barrier_get(void *data, u64 *val)
+{
+       *val = stf_barrier ? 1 : 0;
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+
+static __init int stf_barrier_debugfs_init(void)
+{
+       debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
+       return 0;
+}
+device_initcall(stf_barrier_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S

index c8af90ff49f0526630ffb938c9c5d48cd0279933..b8d82678f8b41b04027df27642f15a01be7321e2 100644 (file)
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -133,6 +133,20 @@ SECTIONS
         RO_DATA(PAGE_SIZE)
  
  #ifdef CONFIG_PPC64
+       . = ALIGN(8);
+       __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
+               __start___stf_entry_barrier_fixup = .;
+               *(__stf_entry_barrier_fixup)
+               __stop___stf_entry_barrier_fixup = .;
+       }
+
+       . = ALIGN(8);
+       __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
+               __start___stf_exit_barrier_fixup = .;
+               *(__stf_exit_barrier_fixup)
+               __stop___stf_exit_barrier_fixup = .;
+       }
+
         . = ALIGN(8);
         __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
                 __start___rfi_flush_fixup = .;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c

index a57eafec4dc2eedb779182e5a12dd1bd6c0bbcd9..361f42c8c73e02f95a4617b967174f37b774504c 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
         if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG))
                 asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
                              : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
-       asm volatile("ptesync": : :"memory");
+       asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
  }
  
  static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
@@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
         /* RIC=1 PRS=0 R=1 IS=2 */
         asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
                      : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
-       asm volatile("ptesync": : :"memory");
+       asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
  }
  
  unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
@@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
  
         ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
         if (ptep && pte_present(*ptep)) {
-               old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
+               old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0,
                                               gpa, shift);
                 kvmppc_radix_tlbie_page(kvm, gpa, shift);
                 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 4d07fca5121c56270aafd6f820009561d399d628..9963f65c212b8cad5a348830651df52b2b7e0c0f 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc)
         vc->in_guest = 0;
         vc->napping_threads = 0;
         vc->conferring_threads = 0;
+       vc->tb_offset_applied = 0;
  }
  
  static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index bd63fa8a08b5dd3edd0800ac34e483a8501f401c..07ca1b2a7966b5f58188b8b0b685eb581cc2b695 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  22:    ld      r8,VCORE_TB_OFFSET(r5)
         cmpdi   r8,0
         beq     37f
+       std     r8, VCORE_TB_OFFSET_APPL(r5)
         mftb    r6              /* current host timebase */
         add     r8,r8,r6
         mtspr   SPRN_TBU40,r8   /* update upper 40 bits */
@@ -940,18 +941,6 @@ FTR_SECTION_ELSE
  ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
  8:
  
-       /*
-        * Set the decrementer to the guest decrementer.
-        */
-       ld      r8,VCPU_DEC_EXPIRES(r4)
-       /* r8 is a host timebase value here, convert to guest TB */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       ld      r6,VCORE_TB_OFFSET(r5)
-       add     r8,r8,r6
-       mftb    r7
-       subf    r3,r7,r8
-       mtspr   SPRN_DEC,r3
-
         ld      r5, VCPU_SPRG0(r4)
         ld      r6, VCPU_SPRG1(r4)
         ld      r7, VCPU_SPRG2(r4)
@@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
         mtspr   SPRN_LPCR,r8
         isync
  
+       /*
+        * Set the decrementer to the guest decrementer.
+        */
+       ld      r8,VCPU_DEC_EXPIRES(r4)
+       /* r8 is a host timebase value here, convert to guest TB */
+       ld      r5,HSTATE_KVM_VCORE(r13)
+       ld      r6,VCORE_TB_OFFSET_APPL(r5)
+       add     r8,r8,r6
+       mftb    r7
+       subf    r3,r7,r8
+       mtspr   SPRN_DEC,r3
+
         /* Check if HDEC expires soon */
         mfspr   r3, SPRN_HDEC
         EXTEND_HDEC(r3)
@@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
  
  guest_bypass:
         stw     r12, STACK_SLOT_TRAP(r1)
-       mr      r3, r12
+
+       /* Save DEC */
+       /* Do this before kvmhv_commence_exit so we know TB is guest TB */
+       ld      r3, HSTATE_KVM_VCORE(r13)
+       mfspr   r5,SPRN_DEC
+       mftb    r6
+       /* On P9, if the guest has large decr enabled, don't sign extend */
+BEGIN_FTR_SECTION
+       ld      r4, VCORE_LPCR(r3)
+       andis.  r4, r4, LPCR_LD@h
+       bne     16f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+       extsw   r5,r5
+16:    add     r5,r5,r6
+       /* r5 is a guest timebase value here, convert to host TB */
+       ld      r4,VCORE_TB_OFFSET_APPL(r3)
+       subf    r5,r4,r5
+       std     r5,VCPU_DEC_EXPIRES(r9)
+
         /* Increment exit count, poke other threads to exit */
+       mr      r3, r12
         bl      kvmhv_commence_exit
         nop
         ld      r9, HSTATE_KVM_VCPU(r13)
@@ -1639,23 +1659,6 @@ guest_bypass:
         mtspr   SPRN_PURR,r3
         mtspr   SPRN_SPURR,r4
  
-       /* Save DEC */
-       ld      r3, HSTATE_KVM_VCORE(r13)
-       mfspr   r5,SPRN_DEC
-       mftb    r6
-       /* On P9, if the guest has large decr enabled, don't sign extend */
-BEGIN_FTR_SECTION
-       ld      r4, VCORE_LPCR(r3)
-       andis.  r4, r4, LPCR_LD@h
-       bne     16f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-       extsw   r5,r5
-16:    add     r5,r5,r6
-       /* r5 is a guest timebase value here, convert to host TB */
-       ld      r4,VCORE_TB_OFFSET(r3)
-       subf    r5,r4,r5
-       std     r5,VCPU_DEC_EXPIRES(r9)
-
  BEGIN_FTR_SECTION
         b       8f
  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
@@ -1905,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         cmpwi   cr2, r0, 0
         beq     cr2, 4f
  
+       /*
+        * Radix: do eieio; tlbsync; ptesync sequence in case we
+        * interrupted the guest between a tlbie and a ptesync.
+        */
+       eieio
+       tlbsync
+       ptesync
+
         /* Radix: Handle the case where the guest used an illegal PID */
         LOAD_REG_ADDR(r4, mmu_base_pid)
         lwz     r3, VCPU_GUEST_PID(r9)
@@ -2017,9 +2028,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  
  27:
         /* Subtract timebase offset from timebase */
-       ld      r8,VCORE_TB_OFFSET(r5)
+       ld      r8, VCORE_TB_OFFSET_APPL(r5)
         cmpdi   r8,0
         beq     17f
+       li      r0, 0
+       std     r0, VCORE_TB_OFFSET_APPL(r5)
         mftb    r6                      /* current guest timebase */
         subf    r8,r8,r6
         mtspr   SPRN_TBU40,r8           /* update upper 40 bits */
@@ -2700,7 +2713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         add     r3, r3, r5
         ld      r4, HSTATE_KVM_VCPU(r13)
         ld      r5, HSTATE_KVM_VCORE(r13)
-       ld      r6, VCORE_TB_OFFSET(r5)
+       ld      r6, VCORE_TB_OFFSET_APPL(r5)
         subf    r3, r6, r3      /* convert to host TB value */
         std     r3, VCPU_DEC_EXPIRES(r4)
  
@@ -2799,7 +2812,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
         /* Restore guest decrementer */
         ld      r3, VCPU_DEC_EXPIRES(r4)
         ld      r5, HSTATE_KVM_VCORE(r13)
-       ld      r6, VCORE_TB_OFFSET(r5)
+       ld      r6, VCORE_TB_OFFSET_APPL(r5)
         add     r3, r3, r6      /* convert host TB to guest TB value */
         mftb    r7
         subf    r3, r7, r3
@@ -3606,12 +3619,9 @@ kvmppc_fix_pmao:
   */
  kvmhv_start_timing:
         ld      r5, HSTATE_KVM_VCORE(r13)
-       lbz     r6, VCORE_IN_GUEST(r5)
-       cmpwi   r6, 0
-       beq     5f                              /* if in guest, need to */
-       ld      r6, VCORE_TB_OFFSET(r5)         /* subtract timebase offset */
-5:     mftb    r5
-       subf    r5, r6, r5
+       ld      r6, VCORE_TB_OFFSET_APPL(r5)
+       mftb    r5
+       subf    r5, r6, r5      /* subtract current timebase offset */
         std     r3, VCPU_CUR_ACTIVITY(r4)
         std     r5, VCPU_ACTIVITY_START(r4)
         blr
@@ -3622,15 +3632,12 @@ kvmhv_start_timing:
   */
  kvmhv_accumulate_time:
         ld      r5, HSTATE_KVM_VCORE(r13)
-       lbz     r8, VCORE_IN_GUEST(r5)
-       cmpwi   r8, 0
-       beq     4f                              /* if in guest, need to */
-       ld      r8, VCORE_TB_OFFSET(r5)         /* subtract timebase offset */
-4:     ld      r5, VCPU_CUR_ACTIVITY(r4)
+       ld      r8, VCORE_TB_OFFSET_APPL(r5)
+       ld      r5, VCPU_CUR_ACTIVITY(r4)
         ld      r6, VCPU_ACTIVITY_START(r4)
         std     r3, VCPU_CUR_ACTIVITY(r4)
         mftb    r7
-       subf    r7, r8, r7
+       subf    r7, r8, r7      /* subtract current timebase offset */
         std     r7, VCPU_ACTIVITY_START(r4)
         cmpdi   r5, 0
         beqlr
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c

index c7a5deadd1cc782ddd45667c9fc29f96166e85e7..99c3620b40d95b91481a5485ab6e2649ea40c37c 100644 (file)
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -11,6 +11,9 @@
  #define XGLUE(a,b) a##b
  #define GLUE(a,b) XGLUE(a,b)
  
+/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
+#define XICS_DUMMY     1
+
  static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
  {
         u8 cppr;
@@ -205,6 +208,10 @@ skip_ipi:
                                 goto skip_ipi;
                 }
  
+               /* If it's the dummy interrupt, continue searching */
+               if (hirq == XICS_DUMMY)
+                       goto skip_ipi;
+
                 /* If fetching, update queue pointers */
                 if (scan_type == scan_fetch) {
                         q->idx = idx;
@@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
         __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
  }
  
+static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
+                                              struct kvmppc_xive_vcpu *xc)
+{
+       unsigned int prio;
+
+       /* For each priority that is now masked */
+       for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+               struct xive_q *q = &xc->queues[prio];
+               struct kvmppc_xive_irq_state *state;
+               struct kvmppc_xive_src_block *sb;
+               u32 idx, toggle, entry, irq, hw_num;
+               struct xive_irq_data *xd;
+               __be32 *qpage;
+               u16 src;
+
+               idx = q->idx;
+               toggle = q->toggle;
+               qpage = READ_ONCE(q->qpage);
+               if (!qpage)
+                       continue;
+
+               /* For each interrupt in the queue */
+               for (;;) {
+                       entry = be32_to_cpup(qpage + idx);
+
+                       /* No more ? */
+                       if ((entry >> 31) == toggle)
+                               break;
+                       irq = entry & 0x7fffffff;
+
+                       /* Skip dummies and IPIs */
+                       if (irq == XICS_DUMMY || irq == XICS_IPI)
+                               goto next;
+                       sb = kvmppc_xive_find_source(xive, irq, &src);
+                       if (!sb)
+                               goto next;
+                       state = &sb->irq_state[src];
+
+                       /* Has it been rerouted ? */
+                       if (xc->server_num == state->act_server)
+                               goto next;
+
+                       /*
+                        * Allright, it *has* been re-routed, kill it from
+                        * the queue.
+                        */
+                       qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
+
+                       /* Find the HW interrupt */
+                       kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+                       /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
+                       if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
+                               GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
+
+                       /* EOI the source */
+                       GLUE(X_PFX,source_eoi)(hw_num, xd);
+
+               next:
+                       idx = (idx + 1) & q->msk;
+                       if (idx == 0)
+                               toggle ^= 1;
+               }
+       }
+}
+
  X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
  {
         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+       struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
         u8 old_cppr;
  
         pr_devel("H_CPPR(cppr=%ld)\n", cppr);
@@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
          */
         smp_mb();
  
-       /*
-        * We are masking less, we need to look for pending things
-        * to deliver and set VP pending bits accordingly to trigger
-        * a new interrupt otherwise we might miss MFRR changes for
-        * which we have optimized out sending an IPI signal.
-        */
-       if (cppr > old_cppr)
+       if (cppr > old_cppr) {
+               /*
+                * We are masking less, we need to look for pending things
+                * to deliver and set VP pending bits accordingly to trigger
+                * a new interrupt otherwise we might miss MFRR changes for
+                * which we have optimized out sending an IPI signal.
+                */
                 GLUE(X_PFX,push_pending_to_hw)(xc);
+       } else {
+               /*
+                * We are masking more, we need to check the queue for any
+                * interrupt that has been routed to another CPU, take
+                * it out (replace it with the dummy) and retrigger it.
+                *
+                * This is necessary since those interrupts may otherwise
+                * never be processed, at least not until this CPU restores
+                * its CPPR.
+                *
+                * This is in theory racy vs. HW adding new interrupts to
+                * the queue. In practice this works because the interesting
+                * cases are when the guest has done a set_xive() to move the
+                * interrupt away, which flushes the xive, followed by the
+                * target CPU doing a H_CPPR. So any new interrupt coming into
+                * the queue must still be routed to us and isn't a source
+                * of concern.
+                */
+               GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
+       }
  
         /* Apply new CPPR */
         xc->hw_cppr = cppr;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c

index 288fe4f0db4ea326321915e1904da3771380978d..e1bcdc32a851cf6439e9cba8aa47d7cb2bf31475 100644 (file)
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -23,6 +23,7 @@
  #include <asm/page.h>
  #include <asm/sections.h>
  #include <asm/setup.h>
+#include <asm/security_features.h>
  #include <asm/firmware.h>
  
  struct fixup_entry {
@@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
  }
  
  #ifdef CONFIG_PPC_BOOK3S_64
+void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
+{
+       unsigned int instrs[3], *dest;
+       long *start, *end;
+       int i;
+
+       start = PTRRELOC(&__start___stf_entry_barrier_fixup),
+       end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
+
+       instrs[0] = 0x60000000; /* nop */
+       instrs[1] = 0x60000000; /* nop */
+       instrs[2] = 0x60000000; /* nop */
+
+       i = 0;
+       if (types & STF_BARRIER_FALLBACK) {
+               instrs[i++] = 0x7d4802a6; /* mflr r10           */
+               instrs[i++] = 0x60000000; /* branch patched below */
+               instrs[i++] = 0x7d4803a6; /* mtlr r10           */
+       } else if (types & STF_BARRIER_EIEIO) {
+               instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+       } else if (types & STF_BARRIER_SYNC_ORI) {
+               instrs[i++] = 0x7c0004ac; /* hwsync             */
+               instrs[i++] = 0xe94d0000; /* ld r10,0(r13)      */
+               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+       }
+
+       for (i = 0; start < end; start++, i++) {
+               dest = (void *)start + *start;
+
+               pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+               patch_instruction(dest, instrs[0]);
+
+               if (types & STF_BARRIER_FALLBACK)
+                       patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback,
+                                    BRANCH_SET_LINK);
+               else
+                       patch_instruction(dest + 1, instrs[1]);
+
+               patch_instruction(dest + 2, instrs[2]);
+       }
+
+       printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
+               (types == STF_BARRIER_NONE)                  ? "no" :
+               (types == STF_BARRIER_FALLBACK)              ? "fallback" :
+               (types == STF_BARRIER_EIEIO)                 ? "eieio" :
+               (types == (STF_BARRIER_SYNC_ORI))            ? "hwsync"
+                                                          : "unknown");
+}
+
+void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
+{
+       unsigned int instrs[6], *dest;
+       long *start, *end;
+       int i;
+
+       start = PTRRELOC(&__start___stf_exit_barrier_fixup),
+       end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
+
+       instrs[0] = 0x60000000; /* nop */
+       instrs[1] = 0x60000000; /* nop */
+       instrs[2] = 0x60000000; /* nop */
+       instrs[3] = 0x60000000; /* nop */
+       instrs[4] = 0x60000000; /* nop */
+       instrs[5] = 0x60000000; /* nop */
+
+       i = 0;
+       if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
+               if (cpu_has_feature(CPU_FTR_HVMODE)) {
+                       instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */
+                       instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */
+               } else {
+                       instrs[i++] = 0x7db243a6; /* mtsprg 2,r13       */
+                       instrs[i++] = 0x7db142a6; /* mfsprg r13,1    */
+               }
+               instrs[i++] = 0x7c0004ac; /* hwsync             */
+               instrs[i++] = 0xe9ad0000; /* ld r13,0(r13)      */
+               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+               if (cpu_has_feature(CPU_FTR_HVMODE)) {
+                       instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */
+               } else {
+                       instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */
+               }
+       } else if (types & STF_BARRIER_EIEIO) {
+               instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+       }
+
+       for (i = 0; start < end; start++, i++) {
+               dest = (void *)start + *start;
+
+               pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+               patch_instruction(dest, instrs[0]);
+               patch_instruction(dest + 1, instrs[1]);
+               patch_instruction(dest + 2, instrs[2]);
+               patch_instruction(dest + 3, instrs[3]);
+               patch_instruction(dest + 4, instrs[4]);
+               patch_instruction(dest + 5, instrs[5]);
+       }
+       printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
+               (types == STF_BARRIER_NONE)                  ? "no" :
+               (types == STF_BARRIER_FALLBACK)              ? "fallback" :
+               (types == STF_BARRIER_EIEIO)                 ? "eieio" :
+               (types == (STF_BARRIER_SYNC_ORI))            ? "hwsync"
+                                                          : "unknown");
+}
+
+
+void do_stf_barrier_fixups(enum stf_barrier_type types)
+{
+       do_stf_entry_barrier_fixups(types);
+       do_stf_exit_barrier_fixups(types);
+}
+
  void do_rfi_flush_fixups(enum l1d_flush_type types)
  {
         unsigned int instrs[3], *dest;
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c

index 1bceb95f422d0f828017128580695c0d4c87ba47..5584247f502929de6e13df0ca5127a1ab92cdbe8 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index)
         return count;
  }
  
+/*
+ * This can be called in the panic path with interrupts off, so use
+ * mdelay in that case.
+ */
  static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
  {
         s64 rc = OPAL_BUSY;
@@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
         while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                 rc = opal_write_nvram(__pa(buf), count, off);
                 if (rc == OPAL_BUSY_EVENT) {
-                       msleep(OPAL_BUSY_DELAY_MS);
+                       if (in_interrupt() || irqs_disabled())
+                               mdelay(OPAL_BUSY_DELAY_MS);
+                       else
+                               msleep(OPAL_BUSY_DELAY_MS);
                         opal_poll_events(NULL);
                 } else if (rc == OPAL_BUSY) {
-                       msleep(OPAL_BUSY_DELAY_MS);
+                       if (in_interrupt() || irqs_disabled())
+                               mdelay(OPAL_BUSY_DELAY_MS);
+                       else
+                               msleep(OPAL_BUSY_DELAY_MS);
                 }
         }
  
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c

index ef8c9ce53a616910d264f4875b9ec72311a110a8..a6648ec99ca76ce30e9e34cb70e0bde63fe93fbc 100644 (file)
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -131,6 +131,7 @@ static void __init pnv_setup_arch(void)
         set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
  
         pnv_setup_rfi_flush();
+       setup_stf_barrier();
  
         /* Initialize SMP */
         pnv_smp_init();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c

index b55ad4286dc7f81f0c9d2e7d130860757773c3b3..fdb32e056ef42a66553387f127e20d19c2e7a666 100644 (file)
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -710,6 +710,7 @@ static void __init pSeries_setup_arch(void)
         fwnmi_init();
  
         pseries_setup_rfi_flush();
+       setup_stf_barrier();
  
         /* By default, only probe PCI (can be overridden by rtas_pci) */
         pci_add_flags(PCI_PROBE_ONLY);
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig

index 6176fe9795caf995daeb986948ee2c1375d8ee10..941d8cc6c9f5990ffe5aa39aa16bd79ff70ed9fc 100644 (file)
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -261,9 +261,9 @@ CONFIG_IP_VS_NQ=m
  CONFIG_IP_VS_FTP=m
  CONFIG_IP_VS_PE_SIP=m
  CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_NF_TABLES_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
  CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
  CONFIG_NFT_CHAIN_NAT_IPV4=m
  CONFIG_IP_NF_IPTABLES=m
  CONFIG_IP_NF_MATCH_AH=m
@@ -284,7 +284,7 @@ CONFIG_IP_NF_ARPTABLES=m
  CONFIG_IP_NF_ARPFILTER=m
  CONFIG_IP_NF_ARP_MANGLE=m
  CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_NF_TABLES_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
  CONFIG_NFT_CHAIN_ROUTE_IPV6=m
  CONFIG_NFT_CHAIN_NAT_IPV6=m
  CONFIG_IP6_NF_IPTABLES=m
@@ -305,7 +305,7 @@ CONFIG_IP6_NF_RAW=m
  CONFIG_IP6_NF_SECURITY=m
  CONFIG_IP6_NF_NAT=m
  CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
  CONFIG_RDS=m
  CONFIG_RDS_RDMA=m
  CONFIG_RDS_TCP=m
@@ -604,7 +604,6 @@ CONFIG_DETECT_HUNG_TASK=y
  CONFIG_WQ_WATCHDOG=y
  CONFIG_PANIC_ON_OOPS=y
  CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
  CONFIG_PROVE_LOCKING=y
  CONFIG_LOCK_STAT=y
  CONFIG_DEBUG_LOCKDEP=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig

index c105bcc6d7a6fc0f932ba7537f85fe6e1a171b1a..eb6f75f242089b6f67115bbbd74c7e8a4f2fbeac 100644 (file)
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -259,9 +259,9 @@ CONFIG_IP_VS_NQ=m
  CONFIG_IP_VS_FTP=m
  CONFIG_IP_VS_PE_SIP=m
  CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_NF_TABLES_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
  CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
  CONFIG_NFT_CHAIN_NAT_IPV4=m
  CONFIG_IP_NF_IPTABLES=m
  CONFIG_IP_NF_MATCH_AH=m
@@ -282,7 +282,7 @@ CONFIG_IP_NF_ARPTABLES=m
  CONFIG_IP_NF_ARPFILTER=m
  CONFIG_IP_NF_ARP_MANGLE=m
  CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_NF_TABLES_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
  CONFIG_NFT_CHAIN_ROUTE_IPV6=m
  CONFIG_NFT_CHAIN_NAT_IPV6=m
  CONFIG_IP6_NF_IPTABLES=m
@@ -303,7 +303,7 @@ CONFIG_IP6_NF_RAW=m
  CONFIG_IP6_NF_SECURITY=m
  CONFIG_IP6_NF_NAT=m
  CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
  CONFIG_RDS=m
  CONFIG_RDS_RDMA=m
  CONFIG_RDS_TCP=m
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S

index e8077f0971f89f1695d5b5637d9c89126cf1e9cc..2bf01ba44107cd678e1fd38b31e5be18080f80b0 100644 (file)
--- a/arch/s390/crypto/crc32be-vx.S
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -13,6 +13,7 @@
   */
  
  #include <linux/linkage.h>
+#include <asm/nospec-insn.h>
  #include <asm/vx-insn.h>
  
  /* Vector register range containing CRC-32 constants */
@@ -67,6 +68,8 @@
  
  .previous
  
+       GEN_BR_THUNK %r14
+
  .text
  /*
   * The CRC-32 function(s) use these calling conventions:
@@ -203,6 +206,6 @@ ENTRY(crc32_be_vgfm_16)
  
  .Ldone:
         VLGVF   %r2,%v2,3
-       br      %r14
+       BR_EX   %r14
  
  .previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S

index d8c67a58c0c53b620c4a8f1837ce9bf3c0db5207..7d6f568bd3ad1fe19586e7597ae127b519c7709f 100644 (file)
--- a/arch/s390/crypto/crc32le-vx.S
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -14,6 +14,7 @@
   */
  
  #include <linux/linkage.h>
+#include <asm/nospec-insn.h>
  #include <asm/vx-insn.h>
  
  /* Vector register range containing CRC-32 constants */
@@ -76,6 +77,7 @@
  
  .previous
  
+       GEN_BR_THUNK %r14
  
  .text
  
@@ -264,6 +266,6 @@ crc32_le_vgfm_generic:
  
  .Ldone:
         VLGVF   %r2,%v2,2
-       br      %r14
+       BR_EX   %r14
  
  .previous
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h

new file mode 100644 (file)

index 0000000..a01f811
--- /dev/null
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_NOSPEC_ASM_H
+#define _ASM_S390_NOSPEC_ASM_H
+
+#include <asm/alternative-asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+
+#ifdef __ASSEMBLY__
+
+#ifdef CONFIG_EXPOLINE
+
+_LC_BR_R1 = __LC_BR_R1
+
+/*
+ * The expoline macros are used to create thunks in the same format
+ * as gcc generates them. The 'comdat' section flag makes sure that
+ * the various thunks are merged into a single copy.
+ */
+       .macro __THUNK_PROLOG_NAME name
+       .pushsection .text.\name,"axG",@progbits,\name,comdat
+       .globl \name
+       .hidden \name
+       .type \name,@function
+\name:
+       CFI_STARTPROC
+       .endm
+
+       .macro __THUNK_EPILOG
+       CFI_ENDPROC
+       .popsection
+       .endm
+
+       .macro __THUNK_PROLOG_BR r1,r2
+       __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
+       .endm
+
+       .macro __THUNK_PROLOG_BC d0,r1,r2
+       __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+       .endm
+
+       .macro __THUNK_BR r1,r2
+       jg      __s390x_indirect_jump_r\r2\()use_r\r1
+       .endm
+
+       .macro __THUNK_BC d0,r1,r2
+       jg      __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+       .endm
+
+       .macro __THUNK_BRASL r1,r2,r3
+       brasl   \r1,__s390x_indirect_jump_r\r3\()use_r\r2
+       .endm
+
+       .macro  __DECODE_RR expand,reg,ruse
+       .set __decode_fail,1
+       .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \reg,%r\r1
+       .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \ruse,%r\r2
+       \expand \r1,\r2
+       .set __decode_fail,0
+       .endif
+       .endr
+       .endif
+       .endr
+       .if __decode_fail == 1
+       .error "__DECODE_RR failed"
+       .endif
+       .endm
+
+       .macro  __DECODE_RRR expand,rsave,rtarget,ruse
+       .set __decode_fail,1
+       .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \rsave,%r\r1
+       .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \rtarget,%r\r2
+       .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \ruse,%r\r3
+       \expand \r1,\r2,\r3
+       .set __decode_fail,0
+       .endif
+       .endr
+       .endif
+       .endr
+       .endif
+       .endr
+       .if __decode_fail == 1
+       .error "__DECODE_RRR failed"
+       .endif
+       .endm
+
+       .macro  __DECODE_DRR expand,disp,reg,ruse
+       .set __decode_fail,1
+       .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \reg,%r\r1
+       .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \ruse,%r\r2
+       \expand \disp,\r1,\r2
+       .set __decode_fail,0
+       .endif
+       .endr
+       .endif
+       .endr
+       .if __decode_fail == 1
+       .error "__DECODE_DRR failed"
+       .endif
+       .endm
+
+       .macro __THUNK_EX_BR reg,ruse
+       # Be very careful when adding instructions to this macro!
+       # The ALTERNATIVE replacement code has a .+10 which targets
+       # the "br \reg" after the code has been patched.
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+       exrl    0,555f
+       j       .
+#else
+       .ifc \reg,%r1
+       ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35
+       j       .
+       .else
+       larl    \ruse,555f
+       ex      0,0(\ruse)
+       j       .
+       .endif
+#endif
+555:   br      \reg
+       .endm
+
+       .macro __THUNK_EX_BC disp,reg,ruse
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+       exrl    0,556f
+       j       .
+#else
+       larl    \ruse,556f
+       ex      0,0(\ruse)
+       j       .
+#endif
+556:   b       \disp(\reg)
+       .endm
+
+       .macro GEN_BR_THUNK reg,ruse=%r1
+       __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse
+       __THUNK_EX_BR \reg,\ruse
+       __THUNK_EPILOG
+       .endm
+
+       .macro GEN_B_THUNK disp,reg,ruse=%r1
+       __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse
+       __THUNK_EX_BC \disp,\reg,\ruse
+       __THUNK_EPILOG
+       .endm
+
+       .macro BR_EX reg,ruse=%r1
+557:   __DECODE_RR __THUNK_BR,\reg,\ruse
+       .pushsection .s390_indirect_branches,"a",@progbits
+       .long   557b-.
+       .popsection
+       .endm
+
+        .macro B_EX disp,reg,ruse=%r1
+558:   __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse
+       .pushsection .s390_indirect_branches,"a",@progbits
+       .long   558b-.
+       .popsection
+       .endm
+
+       .macro BASR_EX rsave,rtarget,ruse=%r1
+559:   __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse
+       .pushsection .s390_indirect_branches,"a",@progbits
+       .long   559b-.
+       .popsection
+       .endm
+
+#else
+       .macro GEN_BR_THUNK reg,ruse=%r1
+       .endm
+
+       .macro GEN_B_THUNK disp,reg,ruse=%r1
+       .endm
+
+        .macro BR_EX reg,ruse=%r1
+       br      \reg
+       .endm
+
+        .macro B_EX disp,reg,ruse=%r1
+       b       \disp(\reg)
+       .endm
+
+       .macro BASR_EX rsave,rtarget,ruse=%r1
+       basr    \rsave,\rtarget
+       .endm
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_NOSPEC_ASM_H */
diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h

index e297bcfc476f65d0c41e591b9df9f43754b03aa7..6090670df51fcee21b2523fc388ba93ca5510cb0 100644 (file)
--- a/arch/s390/include/asm/purgatory.h
+++ b/arch/s390/include/asm/purgatory.h
@@ -13,5 +13,11 @@
  
  int verify_sha256_digest(void);
  
+extern u64 kernel_entry;
+extern u64 kernel_type;
+
+extern u64 crash_start;
+extern u64 crash_size;
+
  #endif /* __ASSEMBLY__ */
  #endif /* _S390_PURGATORY_H_ */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile

index 84ea6225efb4be999ca6c92343d69f0b138447d6..f92dd8ed3884ae4dbd4bc2f47d30248e794241e1 100644 (file)
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -65,6 +65,7 @@ obj-y += nospec-branch.o
  
  extra-y                                += head.o head64.o vmlinux.lds
  
+obj-$(CONFIG_SYSFS)            += nospec-sysfs.o
  CFLAGS_REMOVE_nospec-branch.o  += $(CC_FLAGS_EXPOLINE)
  
  obj-$(CONFIG_MODULES)          += module.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c

index eb2a5c0443cd9c4fb6e0cfd4aa71ebdf14e163fa..11aea745a2a6ebce7ea29a894b822d3bcc560c7b 100644 (file)
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -181,6 +181,7 @@ int main(void)
         OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
         OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
         OFFSET(__LC_GMAP, lowcore, gmap);
+       OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
         /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
         OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
         /* hardware defined lowcore locations 0x1000 - 0x18ff */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S

index f6c56009e822473d701cecbc896d3693b080d6c2..b65874b0b412e40ea1baea814fb1169d04f02104 100644 (file)
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -9,18 +9,22 @@
  
  #include <linux/linkage.h>
  #include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
  #include <asm/ptrace.h>
  #include <asm/sigp.h>
  
+       GEN_BR_THUNK %r9
+       GEN_BR_THUNK %r14
+
  ENTRY(s390_base_mcck_handler)
         basr    %r13,0
  0:     lg      %r15,__LC_PANIC_STACK   # load panic stack
         aghi    %r15,-STACK_FRAME_OVERHEAD
         larl    %r1,s390_base_mcck_handler_fn
-       lg      %r1,0(%r1)
-       ltgr    %r1,%r1
+       lg      %r9,0(%r1)
+       ltgr    %r9,%r9
         jz      1f
-       basr    %r14,%r1
+       BASR_EX %r14,%r9
  1:     la      %r1,4095
         lmg     %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
         lpswe   __LC_MCK_OLD_PSW
@@ -37,10 +41,10 @@ ENTRY(s390_base_ext_handler)
         basr    %r13,0
  0:     aghi    %r15,-STACK_FRAME_OVERHEAD
         larl    %r1,s390_base_ext_handler_fn
-       lg      %r1,0(%r1)
-       ltgr    %r1,%r1
+       lg      %r9,0(%r1)
+       ltgr    %r9,%r9
         jz      1f
-       basr    %r14,%r1
+       BASR_EX %r14,%r9
  1:     lmg     %r0,%r15,__LC_SAVE_AREA_ASYNC
         ni      __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
         lpswe   __LC_EXT_OLD_PSW
@@ -57,10 +61,10 @@ ENTRY(s390_base_pgm_handler)
         basr    %r13,0
  0:     aghi    %r15,-STACK_FRAME_OVERHEAD
         larl    %r1,s390_base_pgm_handler_fn
-       lg      %r1,0(%r1)
-       ltgr    %r1,%r1
+       lg      %r9,0(%r1)
+       ltgr    %r9,%r9
         jz      1f
-       basr    %r14,%r1
+       BASR_EX %r14,%r9
         lmg     %r0,%r15,__LC_SAVE_AREA_SYNC
         lpswe   __LC_PGM_OLD_PSW
  1:     lpswe   disabled_wait_psw-0b(%r13)
@@ -117,7 +121,7 @@ ENTRY(diag308_reset)
         larl    %r4,.Lcontinue_psw      # Restore PSW flags
         lpswe   0(%r4)
  .Lcontinue:
-       br      %r14
+       BR_EX   %r14
  .align 16
  .Lrestart_psw:
         .long   0x00080000,0x80000000 + .Lrestart_part2
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S

index 3f22f139a0413f6880118998bbbb48f280bdad5f..f03402efab4b414eefdfd59135f4ee89dda68e8a 100644 (file)
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -28,6 +28,7 @@
  #include <asm/setup.h>
  #include <asm/nmi.h>
  #include <asm/export.h>
+#include <asm/nospec-insn.h>
  
  __PT_R0      = __PT_GPRS
  __PT_R1      = __PT_GPRS + 8
@@ -183,67 +184,9 @@ _LPP_OFFSET        = __LC_LPP
                     "jnz .+8; .long 0xb2e8d000", 82
         .endm
  
-#ifdef CONFIG_EXPOLINE
-
-       .macro GEN_BR_THUNK name,reg,tmp
-       .section .text.\name,"axG",@progbits,\name,comdat
-       .globl \name
-       .hidden \name
-       .type \name,@function
-\name:
-       CFI_STARTPROC
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
-       exrl    0,0f
-#else
-       larl    \tmp,0f
-       ex      0,0(\tmp)
-#endif
-       j       .
-0:     br      \reg
-       CFI_ENDPROC
-       .endm
-
-       GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1
-       GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1
-       GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11
-
-       .macro BASR_R14_R9
-0:     brasl   %r14,__s390x_indirect_jump_r1use_r9
-       .pushsection .s390_indirect_branches,"a",@progbits
-       .long   0b-.
-       .popsection
-       .endm
-
-       .macro BR_R1USE_R14
-0:     jg      __s390x_indirect_jump_r1use_r14
-       .pushsection .s390_indirect_branches,"a",@progbits
-       .long   0b-.
-       .popsection
-       .endm
-
-       .macro BR_R11USE_R14
-0:     jg      __s390x_indirect_jump_r11use_r14
-       .pushsection .s390_indirect_branches,"a",@progbits
-       .long   0b-.
-       .popsection
-       .endm
-
-#else  /* CONFIG_EXPOLINE */
-
-       .macro BASR_R14_R9
-       basr    %r14,%r9
-       .endm
-
-       .macro BR_R1USE_R14
-       br      %r14
-       .endm
-
-       .macro BR_R11USE_R14
-       br      %r14
-       .endm
-
-#endif /* CONFIG_EXPOLINE */
-
+       GEN_BR_THUNK %r9
+       GEN_BR_THUNK %r14
+       GEN_BR_THUNK %r14,%r11
  
         .section .kprobes.text, "ax"
  .Ldummy:
@@ -260,7 +203,7 @@ _LPP_OFFSET = __LC_LPP
  ENTRY(__bpon)
         .globl __bpon
         BPON
-       BR_R1USE_R14
+       BR_EX   %r14
  
  /*
   * Scheduler resume function, called by switch_to
@@ -284,7 +227,7 @@ ENTRY(__switch_to)
         mvc     __LC_CURRENT_PID(4,%r0),0(%r3)  # store pid of next
         lmg     %r6,%r15,__SF_GPRS(%r15)        # load gprs of next task
         ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
-       BR_R1USE_R14
+       BR_EX   %r14
  
  .L__critical_start:
  
@@ -351,7 +294,7 @@ sie_exit:
         xgr     %r5,%r5
         lmg     %r6,%r14,__SF_GPRS(%r15)        # restore kernel registers
         lg      %r2,__SF_SIE_REASON(%r15)       # return exit reason code
-       BR_R1USE_R14
+       BR_EX   %r14
  .Lsie_fault:
         lghi    %r14,-EFAULT
         stg     %r14,__SF_SIE_REASON(%r15)      # set exit reason code
@@ -410,7 +353,7 @@ ENTRY(system_call)
         lgf     %r9,0(%r8,%r10)                 # get system call add.
         TSTMSK  __TI_flags(%r12),_TIF_TRACE
         jnz     .Lsysc_tracesys
-       BASR_R14_R9                             # call sys_xxxx
+       BASR_EX %r14,%r9                        # call sys_xxxx
         stg     %r2,__PT_R2(%r11)               # store return value
  
  .Lsysc_return:
@@ -595,7 +538,7 @@ ENTRY(system_call)
         lmg     %r3,%r7,__PT_R3(%r11)
         stg     %r7,STACK_FRAME_OVERHEAD(%r15)
         lg      %r2,__PT_ORIG_GPR2(%r11)
-       BASR_R14_R9                     # call sys_xxx
+       BASR_EX %r14,%r9                # call sys_xxx
         stg     %r2,__PT_R2(%r11)       # store return value
  .Lsysc_tracenogo:
         TSTMSK  __TI_flags(%r12),_TIF_TRACE
@@ -619,7 +562,7 @@ ENTRY(ret_from_fork)
         lmg     %r9,%r10,__PT_R9(%r11)  # load gprs
  ENTRY(kernel_thread_starter)
         la      %r2,0(%r10)
-       BASR_R14_R9
+       BASR_EX %r14,%r9
         j       .Lsysc_tracenogo
  
  /*
@@ -701,7 +644,7 @@ ENTRY(pgm_check_handler)
         je      .Lpgm_return
         lgf     %r9,0(%r10,%r1)         # load address of handler routine
         lgr     %r2,%r11                # pass pointer to pt_regs
-       BASR_R14_R9                     # branch to interrupt-handler
+       BASR_EX %r14,%r9                # branch to interrupt-handler
  .Lpgm_return:
         LOCKDEP_SYS_EXIT
         tm      __PT_PSW+1(%r11),0x01   # returning to user ?
@@ -1019,7 +962,7 @@ ENTRY(psw_idle)
         stpt    __TIMER_IDLE_ENTER(%r2)
  .Lpsw_idle_lpsw:
         lpswe   __SF_EMPTY(%r15)
-       BR_R1USE_R14
+       BR_EX   %r14
  .Lpsw_idle_end:
  
  /*
@@ -1061,7 +1004,7 @@ ENTRY(save_fpu_regs)
  .Lsave_fpu_regs_done:
         oi      __LC_CPU_FLAGS+7,_CIF_FPU
  .Lsave_fpu_regs_exit:
-       BR_R1USE_R14
+       BR_EX   %r14
  .Lsave_fpu_regs_end:
  EXPORT_SYMBOL(save_fpu_regs)
  
@@ -1107,7 +1050,7 @@ load_fpu_regs:
  .Lload_fpu_regs_done:
         ni      __LC_CPU_FLAGS+7,255-_CIF_FPU
  .Lload_fpu_regs_exit:
-       BR_R1USE_R14
+       BR_EX   %r14
  .Lload_fpu_regs_end:
  
  .L__critical_end:
@@ -1322,7 +1265,7 @@ cleanup_critical:
         jl      0f
         clg     %r9,BASED(.Lcleanup_table+104)  # .Lload_fpu_regs_end
         jl      .Lcleanup_load_fpu_regs
-0:     BR_R11USE_R14
+0:     BR_EX   %r14
  
         .align  8
  .Lcleanup_table:
@@ -1358,7 +1301,7 @@ cleanup_critical:
         ni      __SIE_PROG0C+3(%r9),0xfe        # no longer in SIE
         lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
         larl    %r9,sie_exit                    # skip forward to sie_exit
-       BR_R11USE_R14
+       BR_EX   %r14
  #endif
  
  .Lcleanup_system_call:
@@ -1412,7 +1355,7 @@ cleanup_critical:
         stg     %r15,56(%r11)           # r15 stack pointer
         # set new psw address and exit
         larl    %r9,.Lsysc_do_svc
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
  .Lcleanup_system_call_insn:
         .quad   system_call
         .quad   .Lsysc_stmg
@@ -1424,7 +1367,7 @@ cleanup_critical:
  
  .Lcleanup_sysc_tif:
         larl    %r9,.Lsysc_tif
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
  
  .Lcleanup_sysc_restore:
         # check if stpt has been executed
@@ -1441,14 +1384,14 @@ cleanup_critical:
         mvc     0(64,%r11),__PT_R8(%r9)
         lmg     %r0,%r7,__PT_R0(%r9)
  1:     lmg     %r8,%r9,__LC_RETURN_PSW
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
  .Lcleanup_sysc_restore_insn:
         .quad   .Lsysc_exit_timer
         .quad   .Lsysc_done - 4
  
  .Lcleanup_io_tif:
         larl    %r9,.Lio_tif
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
  
  .Lcleanup_io_restore:
         # check if stpt has been executed
@@ -1462,7 +1405,7 @@ cleanup_critical:
         mvc     0(64,%r11),__PT_R8(%r9)
         lmg     %r0,%r7,__PT_R0(%r9)
  1:     lmg     %r8,%r9,__LC_RETURN_PSW
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
  .Lcleanup_io_restore_insn:
         .quad   .Lio_exit_timer
         .quad   .Lio_done - 4
@@ -1515,17 +1458,17 @@ cleanup_critical:
         # prepare return psw
         nihh    %r8,0xfcfd              # clear irq & wait state bits
         lg      %r9,48(%r11)            # return from psw_idle
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
  .Lcleanup_idle_insn:
         .quad   .Lpsw_idle_lpsw
  
  .Lcleanup_save_fpu_regs:
         larl    %r9,save_fpu_regs
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
  
  .Lcleanup_load_fpu_regs:
         larl    %r9,load_fpu_regs
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
  
  /*
   * Integer constants
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c

index 94f2099bceb04cbfdc73d8f498a42e777a4b4e07..3d17c41074ca55d59fbe156c5967605912af9734 100644 (file)
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -176,10 +176,9 @@ void do_softirq_own_stack(void)
                 new -= STACK_FRAME_OVERHEAD;
                 ((struct stack_frame *) new)->back_chain = old;
                 asm volatile("   la    15,0(%0)\n"
-                            "   basr  14,%2\n"
+                            "   brasl 14,__do_softirq\n"
                              "   la    15,0(%1)\n"
-                            : : "a" (new), "a" (old),
-                                "a" (__do_softirq)
+                            : : "a" (new), "a" (old)
                              : "0", "1", "2", "3", "4", "5", "14",
                                "cc", "memory" );
         } else {
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S

index 82df7d80fab22090cb943e1d54562ee00acfaec2..27110f3294edcdf30935048d5553f712caf44116 100644 (file)
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -9,13 +9,17 @@
  #include <linux/linkage.h>
  #include <asm/asm-offsets.h>
  #include <asm/ftrace.h>
+#include <asm/nospec-insn.h>
  #include <asm/ptrace.h>
  #include <asm/export.h>
  
+       GEN_BR_THUNK %r1
+       GEN_BR_THUNK %r14
+
         .section .kprobes.text, "ax"
  
  ENTRY(ftrace_stub)
-       br      %r14
+       BR_EX   %r14
  
  #define STACK_FRAME_SIZE  (STACK_FRAME_OVERHEAD + __PT_SIZE)
  #define STACK_PTREGS     (STACK_FRAME_OVERHEAD)
@@ -23,7 +27,7 @@ ENTRY(ftrace_stub)
  #define STACK_PTREGS_PSW  (STACK_PTREGS + __PT_PSW)
  
  ENTRY(_mcount)
-       br      %r14
+       BR_EX   %r14
  
  EXPORT_SYMBOL(_mcount)
  
@@ -53,7 +57,7 @@ ENTRY(ftrace_caller)
  #endif
         lgr     %r3,%r14
         la      %r5,STACK_PTREGS(%r15)
-       basr    %r14,%r1
+       BASR_EX %r14,%r1
  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  # The j instruction gets runtime patched to a nop instruction.
  # See ftrace_enable_ftrace_graph_caller.
@@ -68,7 +72,7 @@ ftrace_graph_caller_end:
  #endif
         lg      %r1,(STACK_PTREGS_PSW+8)(%r15)
         lmg     %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
-       br      %r1
+       BR_EX   %r1
  
  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  
@@ -81,6 +85,6 @@ ENTRY(return_to_handler)
         aghi    %r15,STACK_FRAME_OVERHEAD
         lgr     %r14,%r2
         lmg     %r2,%r5,32(%r15)
-       br      %r14
+       BR_EX   %r14
  
  #endif
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c

index 46d49a11663f5915753c42ece6c0f56870889b80..8ad6a7128b3a5eba73345e2365613284466e7489 100644 (file)
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -1,7 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0
  #include <linux/module.h>
  #include <linux/device.h>
-#include <linux/cpu.h>
  #include <asm/nospec-branch.h>
  
  static int __init nobp_setup_early(char *str)
@@ -44,24 +43,6 @@ static int __init nospec_report(void)
  }
  arch_initcall(nospec_report);
  
-#ifdef CONFIG_SYSFS
-ssize_t cpu_show_spectre_v1(struct device *dev,
-                           struct device_attribute *attr, char *buf)
-{
-       return sprintf(buf, "Mitigation: __user pointer sanitization\n");
-}
-
-ssize_t cpu_show_spectre_v2(struct device *dev,
-                           struct device_attribute *attr, char *buf)
-{
-       if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
-               return sprintf(buf, "Mitigation: execute trampolines\n");
-       if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
-               return sprintf(buf, "Mitigation: limited branch prediction.\n");
-       return sprintf(buf, "Vulnerable\n");
-}
-#endif
-
  #ifdef CONFIG_EXPOLINE
  
  int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
@@ -112,7 +93,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
         s32 *epo;
  
         /* Second part of the instruction replace is always a nop */
-       memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4);
         for (epo = start; epo < end; epo++) {
                 instr = (u8 *) epo + *epo;
                 if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
@@ -133,18 +113,34 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
                         br = thunk + (*(int *)(thunk + 2)) * 2;
                 else
                         continue;
-               if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+               /* Check for unconditional branch 0x07f? or 0x47f???? */
+               if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0)
                         continue;
+
+               memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4);
                 switch (type) {
                 case BRCL_EXPOLINE:
-                       /* brcl to thunk, replace with br + nop */
                         insnbuf[0] = br[0];
                         insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+                       if (br[0] == 0x47) {
+                               /* brcl to b, replace with bc + nopr */
+                               insnbuf[2] = br[2];
+                               insnbuf[3] = br[3];
+                       } else {
+                               /* brcl to br, replace with bcr + nop */
+                       }
                         break;
                 case BRASL_EXPOLINE:
-                       /* brasl to thunk, replace with basr + nop */
-                       insnbuf[0] = 0x0d;
                         insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+                       if (br[0] == 0x47) {
+                               /* brasl to b, replace with bas + nopr */
+                               insnbuf[0] = 0x4d;
+                               insnbuf[2] = br[2];
+                               insnbuf[3] = br[3];
+                       } else {
+                               /* brasl to br, replace with basr + nop */
+                               insnbuf[0] = 0x0d;
+                       }
                         break;
                 }
  
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c

new file mode 100644 (file)

index 0000000..8affad5
--- /dev/null
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+               return sprintf(buf, "Mitigation: execute trampolines\n");
+       if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+               return sprintf(buf, "Mitigation: limited branch prediction\n");
+       return sprintf(buf, "Vulnerable\n");
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c

index 1c9ddd7aa5ec8fd32ee626d036a3c3ea6ed79362..0292d68e7dded707496b7090c9b2d7377aab8b2b 100644 (file)
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -753,6 +753,10 @@ static int __hw_perf_event_init(struct perf_event *event)
          */
         rate = 0;
         if (attr->freq) {
+               if (!attr->sample_freq) {
+                       err = -EINVAL;
+                       goto out;
+               }
                 rate = freq_to_sample_rate(&si, attr->sample_freq);
                 rate = hw_limit_rate(&si, rate);
                 attr->freq = 0;
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S

index 73cc3750f0d3414c56c464585866dbcf738d88f7..7f14adf512c6d229cd4d68dac2c51c8c3f1fe643 100644 (file)
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -7,8 +7,11 @@
  
  #include <linux/linkage.h>
  #include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
  #include <asm/sigp.h>
  
+       GEN_BR_THUNK %r9
+
  #
  # Issue "store status" for the current CPU to its prefix page
  # and call passed function afterwards
@@ -67,9 +70,9 @@ ENTRY(store_status)
         st      %r4,0(%r1)
         st      %r5,4(%r1)
         stg     %r2,8(%r1)
-       lgr     %r1,%r2
+       lgr     %r9,%r2
         lgr     %r2,%r3
-       br      %r1
+       BR_EX   %r9
  
         .section .bss
         .align  8
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S

index e99187149f1717f1ec81c94ea12cc77fa964c2fd..a049a7b9d6e893801a1ecd79d9332d3faea8d0ba 100644 (file)
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -13,6 +13,7 @@
  #include <asm/ptrace.h>
  #include <asm/thread_info.h>
  #include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
  #include <asm/sigp.h>
  
  /*
@@ -24,6 +25,8 @@
   * (see below) in the resume process.
   * This function runs with disabled interrupts.
   */
+       GEN_BR_THUNK %r14
+
         .section .text
  ENTRY(swsusp_arch_suspend)
         stmg    %r6,%r15,__SF_GPRS(%r15)
@@ -103,7 +106,7 @@ ENTRY(swsusp_arch_suspend)
         spx     0x318(%r1)
         lmg     %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
         lghi    %r2,0
-       br      %r14
+       BR_EX   %r14
  
  /*
   * Restore saved memory image to correct place and restore register context.
@@ -197,11 +200,10 @@ pgm_check_entry:
         larl    %r15,init_thread_union
         ahi     %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)
         larl    %r2,.Lpanic_string
-       larl    %r3,sclp_early_printk
         lghi    %r1,0
         sam31
         sigp    %r1,%r0,SIGP_SET_ARCHITECTURE
-       basr    %r14,%r3
+       brasl   %r14,sclp_early_printk
         larl    %r3,.Ldisabled_wait_31
         lpsw    0(%r3)
  4:
@@ -267,7 +269,7 @@ restore_registers:
         /* Return 0 */
         lmg     %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
         lghi    %r2,0
-       br      %r14
+       BR_EX   %r14
  
         .section .data..nosave,"aw",@progbits
         .align  8
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c

index 8961e3970901d4b06c87b20f147115b683ad5170..969882b542669be1648093224c0ef802d027a161 100644 (file)
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -578,7 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  
         gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
         if (gpa && (scb_s->ecb & ECB_TE)) {
-               if (!(gpa & ~0x1fffU)) {
+               if (!(gpa & ~0x1fffUL)) {
                         rc = set_validity_icpt(scb_s, 0x0080U);
                         goto unpin;
                 }
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S

index 495c9c4bacc7b34bcfe4d966adeb28431b1dc032..2311f15be9cf04b3bad1f766453e6ff970d53a83 100644 (file)
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -7,6 +7,9 @@
  
  #include <linux/linkage.h>
  #include <asm/export.h>
+#include <asm/nospec-insn.h>
+
+       GEN_BR_THUNK %r14
  
  /*
   * void *memmove(void *dest, const void *src, size_t n)
@@ -33,14 +36,14 @@ ENTRY(memmove)
  .Lmemmove_forward_remainder:
         larl    %r5,.Lmemmove_mvc
         ex      %r4,0(%r5)
-       br      %r14
+       BR_EX   %r14
  .Lmemmove_reverse:
         ic      %r0,0(%r4,%r3)
         stc     %r0,0(%r4,%r1)
         brctg   %r4,.Lmemmove_reverse
         ic      %r0,0(%r4,%r3)
         stc     %r0,0(%r4,%r1)
-       br      %r14
+       BR_EX   %r14
  .Lmemmove_mvc:
         mvc     0(1,%r1),0(%r3)
  EXPORT_SYMBOL(memmove)
@@ -77,7 +80,7 @@ ENTRY(memset)
  .Lmemset_clear_remainder:
         larl    %r3,.Lmemset_xc
         ex      %r4,0(%r3)
-       br      %r14
+       BR_EX   %r14
  .Lmemset_fill:
         cghi    %r4,1
         lgr     %r1,%r2
@@ -95,10 +98,10 @@ ENTRY(memset)
         stc     %r3,0(%r1)
         larl    %r5,.Lmemset_mvc
         ex      %r4,0(%r5)
-       br      %r14
+       BR_EX   %r14
  .Lmemset_fill_exit:
         stc     %r3,0(%r1)
-       br      %r14
+       BR_EX   %r14
  .Lmemset_xc:
         xc      0(1,%r1),0(%r1)
  .Lmemset_mvc:
@@ -121,7 +124,7 @@ ENTRY(memcpy)
  .Lmemcpy_remainder:
         larl    %r5,.Lmemcpy_mvc
         ex      %r4,0(%r5)
-       br      %r14
+       BR_EX   %r14
  .Lmemcpy_loop:
         mvc     0(256,%r1),0(%r3)
         la      %r1,256(%r1)
@@ -159,10 +162,10 @@ ENTRY(__memset\bits)
         \insn   %r3,0(%r1)
         larl    %r5,.L__memset_mvc\bits
         ex      %r4,0(%r5)
-       br      %r14
+       BR_EX   %r14
  .L__memset_exit\bits:
         \insn   %r3,0(%r2)
-       br      %r14
+       BR_EX   %r14
  .L__memset_mvc\bits:
         mvc     \bytes(1,%r1),0(%r1)
  .endm
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S

index 25bb4643c4f46cc3bcbf904e9fc257a3145f4221..9f794869c1b090a9a6589d573e77739562265cf5 100644 (file)
--- a/arch/s390/net/bpf_jit.S
+++ b/arch/s390/net/bpf_jit.S
@@ -9,6 +9,7 @@
   */
  
  #include <linux/linkage.h>
+#include <asm/nospec-insn.h>
  #include "bpf_jit.h"
  
  /*
@@ -54,7 +55,7 @@ ENTRY(sk_load_##NAME##_pos);                                          \
         clg     %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */     \
         jh      sk_load_##NAME##_slow;                                  \
         LOAD    %r14,-SIZE(%r3,%r12);   /* Get data from skb */         \
-       b       OFF_OK(%r6);            /* Return */                    \
+       B_EX    OFF_OK,%r6;             /* Return */                    \
                                                                         \
  sk_load_##NAME##_slow:;                                                        \
         lgr     %r2,%r7;                /* Arg1 = skb pointer */        \
@@ -64,11 +65,14 @@ sk_load_##NAME##_slow:;                                                     \
         brasl   %r14,skb_copy_bits;     /* Get data from skb */         \
         LOAD    %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */    \
         ltgr    %r2,%r2;                /* Set cc to (%r2 != 0) */      \
-       br      %r6;                    /* Return */
+       BR_EX   %r6;                    /* Return */
  
  sk_load_common(word, 4, llgf)  /* r14 = *(u32 *) (skb->data+offset) */
  sk_load_common(half, 2, llgh)  /* r14 = *(u16 *) (skb->data+offset) */
  
+       GEN_BR_THUNK %r6
+       GEN_B_THUNK OFF_OK,%r6
+
  /*
   * Load 1 byte from SKB (optimized version)
   */
@@ -80,7 +84,7 @@ ENTRY(sk_load_byte_pos)
         clg     %r3,STK_OFF_HLEN(%r15)  # Offset >= hlen?
         jnl     sk_load_byte_slow
         llgc    %r14,0(%r3,%r12)        # Get byte from skb
-       b       OFF_OK(%r6)             # Return OK
+       B_EX    OFF_OK,%r6              # Return OK
  
  sk_load_byte_slow:
         lgr     %r2,%r7                 # Arg1 = skb pointer
@@ -90,7 +94,7 @@ sk_load_byte_slow:
         brasl   %r14,skb_copy_bits      # Get data from skb
         llgc    %r14,STK_OFF_TMP(%r15)  # Load result from temp buffer
         ltgr    %r2,%r2                 # Set cc to (%r2 != 0)
-       br      %r6                     # Return cc
+       BR_EX   %r6                     # Return cc
  
  #define sk_negative_common(NAME, SIZE, LOAD)                           \
  sk_load_##NAME##_slow_neg:;                                            \
@@ -104,7 +108,7 @@ sk_load_##NAME##_slow_neg:;                                         \
         jz      bpf_error;                                              \
         LOAD    %r14,0(%r2);            /* Get data from pointer */     \
         xr      %r3,%r3;                /* Set cc to zero */            \
-       br      %r6;                    /* Return cc */
+       BR_EX   %r6;                    /* Return cc */
  
  sk_negative_common(word, 4, llgf)
  sk_negative_common(half, 2, llgh)
@@ -113,4 +117,4 @@ sk_negative_common(byte, 1, llgc)
  bpf_error:
  # force a return 0 from jit handler
         ltgr    %r15,%r15       # Set condition code
-       br      %r6
+       BR_EX   %r6
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c

index 78a19c93b3802ff48b4c27a90d6428100fc5566f..dd2bcf0e7d00d45406c83e1254879e4f16db36fb 100644 (file)
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -25,6 +25,8 @@
  #include <linux/bpf.h>
  #include <asm/cacheflush.h>
  #include <asm/dis.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
  #include <asm/set_memory.h>
  #include "bpf_jit.h"
  
@@ -41,6 +43,8 @@ struct bpf_jit {
         int base_ip;            /* Base address for literal pool */
         int ret0_ip;            /* Address of return 0 */
         int exit_ip;            /* Address of exit */
+       int r1_thunk_ip;        /* Address of expoline thunk for 'br %r1' */
+       int r14_thunk_ip;       /* Address of expoline thunk for 'br %r14' */
         int tail_call_start;    /* Tail call start offset */
         int labels[1];          /* Labels for local jumps */
  };
@@ -250,6 +254,19 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
         REG_SET_SEEN(b2);                                       \
  })
  
+#define EMIT6_PCREL_RILB(op, b, target)                                \
+({                                                             \
+       int rel = (target - jit->prg) / 2;                      \
+       _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff);       \
+       REG_SET_SEEN(b);                                        \
+})
+
+#define EMIT6_PCREL_RIL(op, target)                            \
+({                                                             \
+       int rel = (target - jit->prg) / 2;                      \
+       _EMIT6(op | rel >> 16, rel & 0xffff);                   \
+})
+
  #define _EMIT6_IMM(op, imm)                                    \
  ({                                                             \
         unsigned int __imm = (imm);                             \
@@ -469,8 +486,45 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
         EMIT4(0xb9040000, REG_2, BPF_REG_0);
         /* Restore registers */
         save_restore_regs(jit, REGS_RESTORE, stack_depth);
+       if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+               jit->r14_thunk_ip = jit->prg;
+               /* Generate __s390_indirect_jump_r14 thunk */
+               if (test_facility(35)) {
+                       /* exrl %r0,.+10 */
+                       EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+               } else {
+                       /* larl %r1,.+14 */
+                       EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+                       /* ex 0,0(%r1) */
+                       EMIT4_DISP(0x44000000, REG_0, REG_1, 0);
+               }
+               /* j . */
+               EMIT4_PCREL(0xa7f40000, 0);
+       }
         /* br %r14 */
         _EMIT2(0x07fe);
+
+       if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable &&
+           (jit->seen & SEEN_FUNC)) {
+               jit->r1_thunk_ip = jit->prg;
+               /* Generate __s390_indirect_jump_r1 thunk */
+               if (test_facility(35)) {
+                       /* exrl %r0,.+10 */
+                       EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+                       /* j . */
+                       EMIT4_PCREL(0xa7f40000, 0);
+                       /* br %r1 */
+                       _EMIT2(0x07f1);
+               } else {
+                       /* larl %r1,.+14 */
+                       EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+                       /* ex 0,S390_lowcore.br_r1_tampoline */
+                       EMIT4_DISP(0x44000000, REG_0, REG_0,
+                                  offsetof(struct lowcore, br_r1_trampoline));
+                       /* j . */
+                       EMIT4_PCREL(0xa7f40000, 0);
+               }
+       }
  }
  
  /*
@@ -966,8 +1020,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
                 /* lg %w1,<d(imm)>(%l) */
                 EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
                               EMIT_CONST_U64(func));
-               /* basr %r14,%w1 */
-               EMIT2(0x0d00, REG_14, REG_W1);
+               if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+                       /* brasl %r14,__s390_indirect_jump_r1 */
+                       EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+               } else {
+                       /* basr %r14,%w1 */
+                       EMIT2(0x0d00, REG_14, REG_W1);
+               }
                 /* lgr %b0,%r2: load return value into %b0 */
                 EMIT4(0xb9040000, BPF_REG_0, REG_2);
                 if ((jit->seen & SEEN_SKB) &&
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c

index 47d3efff6805b9787d4be1721cd8cb790818e10f..09f36c0d9d4fe187886bd6bb49892c7de1a434c2 100644 (file)
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -163,7 +163,8 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
         if (status != EFI_SUCCESS)
                 goto free_struct;
  
-       memcpy(rom->romdata, pci->romimage, pci->romsize);
+       memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
+              pci->romsize);
         return status;
  
  free_struct:
@@ -269,7 +270,8 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
         if (status != EFI_SUCCESS)
                 goto free_struct;
  
-       memcpy(rom->romdata, pci->romimage, pci->romsize);
+       memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
+              pci->romsize);
         return status;
  
  free_struct:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S

index fca012baba19fff5cb9aacad170b7e2d9b5cb2d9..8169e8b7a4dc1280f40e835085b3a5e502c11466 100644 (file)
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,6 +305,25 @@ ENTRY(startup_64)
         /* Set up the stack */
         leaq    boot_stack_end(%rbx), %rsp
  
+       /*
+        * paging_prepare() and cleanup_trampoline() below can have GOT
+        * references. Adjust the table with address we are running at.
+        *
+        * Zero RAX for adjust_got: the GOT was not adjusted before;
+        * there's no adjustment to undo.
+        */
+       xorq    %rax, %rax
+
+       /*
+        * Calculate the address the binary is loaded at and use it as
+        * a GOT adjustment.
+        */
+       call    1f
+1:     popq    %rdi
+       subq    $1b, %rdi
+
+       call    adjust_got
+
         /*
          * At this point we are in long mode with 4-level paging enabled,
          * but we might want to enable 5-level paging or vice versa.
@@ -370,10 +389,14 @@ trampoline_return:
         /*
          * cleanup_trampoline() would restore trampoline memory.
          *
+        * RDI is address of the page table to use instead of page table
+        * in trampoline memory (if required).
+        *
          * RSI holds real mode data and needs to be preserved across
          * this function call.
          */
         pushq   %rsi
+       leaq    top_pgtable(%rbx), %rdi
         call    cleanup_trampoline
         popq    %rsi
  
@@ -381,6 +404,21 @@ trampoline_return:
         pushq   $0
         popfq
  
+       /*
+        * Previously we've adjusted the GOT with address the binary was
+        * loaded at. Now we need to re-adjust for relocation address.
+        *
+        * Calculate the address the binary is loaded at, so that we can
+        * undo the previous GOT adjustment.
+        */
+       call    1f
+1:     popq    %rax
+       subq    $1b, %rax
+
+       /* The new adjustment is the relocation address */
+       movq    %rbx, %rdi
+       call    adjust_got
+
  /*
   * Copy the compressed kernel to the end of our buffer
   * where decompression in place becomes safe.
@@ -481,19 +519,6 @@ relocated:
         shrq    $3, %rcx
         rep     stosq
  
-/*
- * Adjust our own GOT
- */
-       leaq    _got(%rip), %rdx
-       leaq    _egot(%rip), %rcx
-1:
-       cmpq    %rcx, %rdx
-       jae     2f
-       addq    %rbx, (%rdx)
-       addq    $8, %rdx
-       jmp     1b
-2:
-       
  /*
   * Do the extraction, and jump to the new kernel..
   */
@@ -512,6 +537,27 @@ relocated:
   */
         jmp     *%rax
  
+/*
+ * Adjust the global offset table
+ *
+ * RAX is the previous adjustment of the table to undo (use 0 if it's the
+ * first time we touch GOT).
+ * RDI is the new adjustment to apply.
+ */
+adjust_got:
+       /* Walk through the GOT adding the address to the entries */
+       leaq    _got(%rip), %rdx
+       leaq    _egot(%rip), %rcx
+1:
+       cmpq    %rcx, %rdx
+       jae     2f
+       subq    %rax, (%rdx)    /* Undo previous adjustment */
+       addq    %rdi, (%rdx)    /* Apply the new adjustment */
+       addq    $8, %rdx
+       jmp     1b
+2:
+       ret
+
         .code32
  /*
   * This is the 32-bit trampoline that will be copied over to low memory.
@@ -649,3 +695,10 @@ boot_stack_end:
         .balign 4096
  pgtable:
         .fill BOOT_PGT_SIZE, 1, 0
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+top_pgtable:
+       .fill PAGE_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c

index 32af1cbcd9030f64df39554d430d9be9196a53ec..a362fa0b849c70c3da1e290ea9bf536272ca62c7 100644 (file)
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -22,14 +22,6 @@ struct paging_config {
  /* Buffer to preserve trampoline memory */
  static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
  
-/*
- * The page table is going to be used instead of page table in the trampoline
- * memory.
- *
- * It must not be in BSS as BSS is cleared after cleanup_trampoline().
- */
-static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
-
  /*
   * Trampoline address will be printed by extract_kernel() for debugging
   * purposes.
@@ -134,7 +126,7 @@ out:
         return paging_config;
  }
  
-void cleanup_trampoline(void)
+void cleanup_trampoline(void *pgtable)
  {
         void *trampoline_pgtable;
  
@@ -145,8 +137,8 @@ void cleanup_trampoline(void)
          * if it's there.
          */
         if ((void *)__native_read_cr3() == trampoline_pgtable) {
-               memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
-               native_write_cr3((unsigned long)top_pgtable);
+               memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
+               native_write_cr3((unsigned long)pgtable);
         }
  
         /* Restore trampoline memory */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h

index b27da9602a6dfdf406180214b669a712a126f701..aced6c9290d6f96cdaf4eaadab3dd3835d80b94a 100644 (file)
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -140,6 +140,20 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  
  #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
  
+#if defined(__clang__) && !defined(CC_HAVE_ASM_GOTO)
+
+/*
+ * Workaround for the sake of BPF compilation which utilizes kernel
+ * headers, but clang does not support ASM GOTO and fails the build.
+ */
+#ifndef __BPF_TRACING__
+#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
+#endif
+
+#define static_cpu_has(bit)            boot_cpu_has(bit)
+
+#else
+
  /*
   * Static testing of CPU features.  Used the same as boot_cpu_has().
   * These will statically patch the target code for additional
@@ -195,6 +209,7 @@ t_no:
                 boot_cpu_has(bit) :                             \
                 _static_cpu_has(bit)                            \
  )
+#endif
  
  #define cpu_has_bug(c, bit)            cpu_has(c, (bit))
  #define set_cpu_bug(c, bit)            set_cpu_cap(c, (bit))
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h

index 578793e97431da25b0d5f3cbc20ae4c0655db075..fb00a2fca9901eb02ea7b730ddbac957e8ecc947 100644 (file)
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
  #define X86_FEATURE_CAT_L2             ( 7*32+ 5) /* Cache Allocation Technology L2 */
  #define X86_FEATURE_CDP_L3             ( 7*32+ 6) /* Code and Data Prioritization L3 */
  #define X86_FEATURE_INVPCID_SINGLE     ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
-
  #define X86_FEATURE_HW_PSTATE          ( 7*32+ 8) /* AMD HW-PState */
  #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
  #define X86_FEATURE_SME                        ( 7*32+10) /* AMD Secure Memory Encryption */
@@ -207,13 +206,19 @@
  #define X86_FEATURE_RETPOLINE_AMD      ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
  #define X86_FEATURE_INTEL_PPIN         ( 7*32+14) /* Intel Processor Inventory Number */
  #define X86_FEATURE_CDP_L2             ( 7*32+15) /* Code and Data Prioritization L2 */
-
+#define X86_FEATURE_MSR_SPEC_CTRL      ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD               ( 7*32+17) /* Speculative Store Bypass Disable */
  #define X86_FEATURE_MBA                        ( 7*32+18) /* Memory Bandwidth Allocation */
  #define X86_FEATURE_RSB_CTXSW          ( 7*32+19) /* "" Fill RSB on context switches */
  #define X86_FEATURE_SEV                        ( 7*32+20) /* AMD Secure Encrypted Virtualization */
-
  #define X86_FEATURE_USE_IBPB           ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
  #define X86_FEATURE_USE_IBRS_FW                ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE  ( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD                ( 7*32+24)  /* "" AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS               ( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB               ( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP              ( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN                        ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
  
  /* Virtualization flags: Linux defined, word 8 */
  #define X86_FEATURE_TPR_SHADOW         ( 8*32+ 0) /* Intel TPR Shadow */
@@ -274,9 +279,10 @@
  #define X86_FEATURE_CLZERO             (13*32+ 0) /* CLZERO instruction */
  #define X86_FEATURE_IRPERF             (13*32+ 1) /* Instructions Retired Count */
  #define X86_FEATURE_XSAVEERPTR         (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_IBPB               (13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS               (13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP              (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_IBPB           (13*32+12) /* "" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS           (13*32+14) /* "" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP          (13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_VIRT_SSBD          (13*32+25) /* Virtualized Speculative Store Bypass Disable */
  
  /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
  #define X86_FEATURE_DTHERM             (14*32+ 0) /* Digital Thermal Sensor */
@@ -334,6 +340,7 @@
  #define X86_FEATURE_SPEC_CTRL          (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
  #define X86_FEATURE_INTEL_STIBP                (18*32+27) /* "" Single Thread Indirect Branch Predictors */
  #define X86_FEATURE_ARCH_CAPABILITIES  (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD     (18*32+31) /* "" Speculative Store Bypass Disable */
  
  /*
   * BUG word(s)
@@ -363,5 +370,6 @@
  #define X86_BUG_CPU_MELTDOWN           X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
  #define X86_BUG_SPECTRE_V1             X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
  #define X86_BUG_SPECTRE_V2             X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS      X86_BUG(17) /* CPU is affected by speculative store bypass attack */
  
  #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h

index b3e32b010ab194ed613034234c403c4067502776..c2c01f84df75f1f9b35a3c898686a82973026d88 100644 (file)
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
         return insn_offset_displacement(insn) + insn->displacement.nbytes;
  }
  
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+       return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+               (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+                X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
  #endif /* _ASM_X86_INSN_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index c25775fad4ed19d73452f530f4fecfb8d2373176..f4b2588865e9f7ad16696d3e70255a2b794d26b3 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -924,7 +924,7 @@ struct kvm_x86_ops {
         int (*hardware_setup)(void);               /* __init */
         void (*hardware_unsetup)(void);            /* __exit */
         bool (*cpu_has_accelerated_tpr)(void);
-       bool (*cpu_has_high_real_mode_segbase)(void);
+       bool (*has_emulated_msr)(int index);
         void (*cpuid_update)(struct kvm_vcpu *vcpu);
  
         struct kvm *(*vm_alloc)(void);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h

index 57e3785d0d26e19f9134485399d1b3aac01b3a2b..cf9911b5a53cb1de017a27cf20f866e42d2199fd 100644 (file)
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk,
  
  #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
         if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
-               /* pkey 0 is the default and always allocated */
+               /* pkey 0 is the default and allocated implicitly */
                 mm->context.pkey_allocation_map = 0x1;
                 /* -1 means unallocated or invalid */
                 mm->context.execute_only_pkey = -1;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h

index 53d5b1b9255eb8c924b5b1e650d740b0a574f68f..fda2114197b36935558f0a376644dcd0a0c268ce 100644 (file)
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -42,6 +42,8 @@
  #define MSR_IA32_SPEC_CTRL             0x00000048 /* Speculation Control */
  #define SPEC_CTRL_IBRS                 (1 << 0)   /* Indirect Branch Restricted Speculation */
  #define SPEC_CTRL_STIBP                        (1 << 1)   /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_SSBD_SHIFT           2          /* Speculative Store Bypass Disable bit */
+#define SPEC_CTRL_SSBD                 (1 << SPEC_CTRL_SSBD_SHIFT)   /* Speculative Store Bypass Disable */
  
  #define MSR_IA32_PRED_CMD              0x00000049 /* Prediction Command */
  #define PRED_CMD_IBPB                  (1 << 0)   /* Indirect Branch Prediction Barrier */
@@ -68,6 +70,11 @@
  #define MSR_IA32_ARCH_CAPABILITIES     0x0000010a
  #define ARCH_CAP_RDCL_NO               (1 << 0)   /* Not susceptible to Meltdown */
  #define ARCH_CAP_IBRS_ALL              (1 << 1)   /* Enhanced IBRS support */
+#define ARCH_CAP_SSB_NO                        (1 << 4)   /*
+                                                   * Not susceptible to Speculative Store Bypass
+                                                   * attack, so no Speculative Store Bypass
+                                                   * control required.
+                                                   */
  
  #define MSR_IA32_BBL_CR_CTL            0x00000119
  #define MSR_IA32_BBL_CR_CTL3           0x0000011e
@@ -340,6 +347,8 @@
  #define MSR_AMD64_SEV_ENABLED_BIT      0
  #define MSR_AMD64_SEV_ENABLED          BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
  
+#define MSR_AMD64_VIRT_SPEC_CTRL       0xc001011f
+
  /* Fam 17h MSRs */
  #define MSR_F17H_IRPERF                        0xc00000e9
  
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h

index f928ad9b143fedea1085dedc508658fa745b4ceb..8b38df98548e8dfd1176f564b02379f6a728da49 100644 (file)
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -217,6 +217,14 @@ enum spectre_v2_mitigation {
         SPECTRE_V2_IBRS,
  };
  
+/* The Speculative Store Bypass disable variants */
+enum ssb_mitigation {
+       SPEC_STORE_BYPASS_NONE,
+       SPEC_STORE_BYPASS_DISABLE,
+       SPEC_STORE_BYPASS_PRCTL,
+       SPEC_STORE_BYPASS_SECCOMP,
+};
+
  extern char __indirect_thunk_start[];
  extern char __indirect_thunk_end[];
  
@@ -241,22 +249,27 @@ static inline void vmexit_fill_RSB(void)
  #endif
  }
  
-#define alternative_msr_write(_msr, _val, _feature)            \
-       asm volatile(ALTERNATIVE("",                            \
-                                "movl %[msr], %%ecx\n\t"       \
-                                "movl %[val], %%eax\n\t"       \
-                                "movl $0, %%edx\n\t"           \
-                                "wrmsr",                       \
-                                _feature)                      \
-                    : : [msr] "i" (_msr), [val] "i" (_val)     \
-                    : "eax", "ecx", "edx", "memory")
+static __always_inline
+void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
+{
+       asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
+               : : "c" (msr),
+                   "a" ((u32)val),
+                   "d" ((u32)(val >> 32)),
+                   [feature] "i" (feature)
+               : "memory");
+}
  
  static inline void indirect_branch_prediction_barrier(void)
  {
-       alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
-                             X86_FEATURE_USE_IBPB);
+       u64 val = PRED_CMD_IBPB;
+
+       alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
  }
  
+/* The Intel SPEC CTRL MSR base value cache */
+extern u64 x86_spec_ctrl_base;
+
  /*
   * With retpoline, we must use IBRS to restrict branch prediction
   * before calling into firmware.
@@ -265,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void)
   */
  #define firmware_restrict_branch_speculation_start()                   \
  do {                                                                   \
+       u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS;                  \
+                                                                       \
         preempt_disable();                                              \
-       alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,       \
+       alternative_msr_write(MSR_IA32_SPEC_CTRL, val,                  \
                               X86_FEATURE_USE_IBRS_FW);                 \
  } while (0)
  
  #define firmware_restrict_branch_speculation_end()                     \
  do {                                                                   \
-       alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,                    \
+       u64 val = x86_spec_ctrl_base;                                   \
+                                                                       \
+       alternative_msr_write(MSR_IA32_SPEC_CTRL, val,                  \
                               X86_FEATURE_USE_IBRS_FW);                 \
         preempt_enable();                                               \
  } while (0)
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h

index a0ba1ffda0dfd3dc9ee335f04cee58aafdea6fb8..851c04b7a0922cd3ea79f3a574246b6a94262d40 100644 (file)
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -2,6 +2,8 @@
  #ifndef _ASM_X86_PKEYS_H
  #define _ASM_X86_PKEYS_H
  
+#define ARCH_DEFAULT_PKEY      0
+
  #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
  
  extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm);
  static inline int execute_only_pkey(struct mm_struct *mm)
  {
         if (!boot_cpu_has(X86_FEATURE_OSPKE))
-               return 0;
+               return ARCH_DEFAULT_PKEY;
  
         return __execute_only_pkey(mm);
  }
@@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
  {
         /*
          * "Allocated" pkeys are those that have been returned
-        * from pkey_alloc().  pkey 0 is special, and never
-        * returned from pkey_alloc().
+        * from pkey_alloc() or pkey 0 which is allocated
+        * implicitly when the mm is created.
          */
-       if (pkey <= 0)
+       if (pkey < 0)
                 return false;
         if (pkey >= arch_max_pkey())
                 return false;
+       /*
+        * The exec-only pkey is set in the allocation map, but
+        * is not available to any of the user interfaces like
+        * mprotect_pkey().
+        */
+       if (pkey == mm->context.execute_only_pkey)
+               return false;
+
         return mm_pkey_allocation_map(mm) & (1U << pkey);
  }
  
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h

new file mode 100644 (file)

index 0000000..ae7c2c5
--- /dev/null
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SPECCTRL_H_
+#define _ASM_X86_SPECCTRL_H_
+
+#include <linux/thread_info.h>
+#include <asm/nospec-branch.h>
+
+/*
+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
+ * the guest has, while on VMEXIT we restore the host view. This
+ * would be easier if SPEC_CTRL were architecturally maskable or
+ * shadowable for guests but this is not (currently) the case.
+ * Takes the guest view of SPEC_CTRL MSR as a parameter and also
+ * the guest's version of VIRT_SPEC_CTRL, if emulated.
+ */
+extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
+
+/**
+ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
+ * @guest_spec_ctrl:           The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl:      The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ *                             (may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+       x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+}
+
+/**
+ * x86_spec_ctrl_restore_host - Restore host speculation control registers
+ * @guest_spec_ctrl:           The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl:      The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ *                             (may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+       x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+}
+
+/* AMD specific Speculative Store Bypass MSR data */
+extern u64 x86_amd_ls_cfg_base;
+extern u64 x86_amd_ls_cfg_ssbd_mask;
+
+static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
+{
+       BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+       return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
+{
+       BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+       return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
+{
+       return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
+}
+
+#ifdef CONFIG_SMP
+extern void speculative_store_bypass_ht_init(void);
+#else
+static inline void speculative_store_bypass_ht_init(void) { }
+#endif
+
+extern void speculative_store_bypass_update(unsigned long tif);
+
+static inline void speculative_store_bypass_update_current(void)
+{
+       speculative_store_bypass_update(current_thread_info()->flags);
+}
+
+#endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h

index a5d9521bb2cbaa1732bf8d377c2efe04b3764405..2ff2a30a264f4c5f02a01b3b87e4148e8992dc5a 100644 (file)
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,6 +79,7 @@ struct thread_info {
  #define TIF_SIGPENDING         2       /* signal pending */
  #define TIF_NEED_RESCHED       3       /* rescheduling necessary */
  #define TIF_SINGLESTEP         4       /* reenable singlestep on user return*/
+#define TIF_SSBD                       5       /* Reduced data speculation */
  #define TIF_SYSCALL_EMU                6       /* syscall emulation active */
  #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
  #define TIF_SECCOMP            8       /* secure computing */
@@ -105,6 +106,7 @@ struct thread_info {
  #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
  #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
  #define _TIF_SINGLESTEP                (1 << TIF_SINGLESTEP)
+#define _TIF_SSBD              (1 << TIF_SSBD)
  #define _TIF_SYSCALL_EMU       (1 << TIF_SYSCALL_EMU)
  #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
  #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
@@ -144,7 +146,7 @@ struct thread_info {
  
  /* flags to check in __switch_to() */
  #define _TIF_WORK_CTXSW                                                        \
-       (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
+       (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
  
  #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
  #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h

index 4c851ebb3cebd4654f9dabed1e9c19520b072c0d..0ede697c396119be61eb223a40607ff6e6e985ef 100644 (file)
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -29,7 +29,7 @@
  #define KVM_FEATURE_PV_TLB_FLUSH       9
  #define KVM_FEATURE_ASYNC_PF_VMEXIT    10
  
-#define KVM_HINTS_DEDICATED      0
+#define KVM_HINTS_REALTIME      0
  
  /* The last 8 bits are used to indicate how to interpret the flags field
   * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c

index c88e0b127810f22b15b53eb150d11e9584201885..b481b95bd8f6b9e439c5d72e42af21b18d250af4 100644 (file)
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -14,8 +14,11 @@
  #include <asm/amd_nb.h>
  
  #define PCI_DEVICE_ID_AMD_17H_ROOT     0x1450
+#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT        0x15d0
  #define PCI_DEVICE_ID_AMD_17H_DF_F3    0x1463
  #define PCI_DEVICE_ID_AMD_17H_DF_F4    0x1464
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
  
  /* Protect the PCI config register pairs used for SMN and DF indirect access. */
  static DEFINE_MUTEX(smn_mutex);
@@ -24,6 +27,7 @@ static u32 *flush_words;
  
  static const struct pci_device_id amd_root_ids[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
         {}
  };
  
@@ -39,6 +43,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
         {}
  };
@@ -51,6 +56,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
         {}
  };
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c

index 8b04234e010b2616e42bff6cbeabdc3c2087c13c..7685444a106bb29a3994a5d85066e60b2b4c0d09 100644 (file)
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
                         goto update;
         }
         cmsk = cluster_hotplug_mask;
+       cmsk->clusterid = cluster;
         cluster_hotplug_mask = NULL;
  update:
         this_cpu_write(cluster_masks, cmsk);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c

index 12bc0a1139dac57fefe8b299d1ef9685fa0e3147..1b18be3f35a8e8fdb34db6feff1ee8e416a73c96 100644 (file)
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -10,6 +10,7 @@
  #include <asm/processor.h>
  #include <asm/apic.h>
  #include <asm/cpu.h>
+#include <asm/spec-ctrl.h>
  #include <asm/smp.h>
  #include <asm/pci-direct.h>
  #include <asm/delay.h>
@@ -554,6 +555,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
                 rdmsrl(MSR_FAM10H_NODE_ID, value);
                 nodes_per_socket = ((value >> 3) & 7) + 1;
         }
+
+       if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+               unsigned int bit;
+
+               switch (c->x86) {
+               case 0x15: bit = 54; break;
+               case 0x16: bit = 33; break;
+               case 0x17: bit = 10; break;
+               default: return;
+               }
+               /*
+                * Try to cache the base value so further operations can
+                * avoid RMW. If that faults, do not enable SSBD.
+                */
+               if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+                       setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
+                       setup_force_cpu_cap(X86_FEATURE_SSBD);
+                       x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
+               }
+       }
  }
  
  static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -791,6 +812,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
  
  static void init_amd_zn(struct cpuinfo_x86 *c)
  {
+       set_cpu_cap(c, X86_FEATURE_ZEN);
         /*
          * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
          * all up to and including B1.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c

index bfca937bdcc36ce8d9523f03dcc92e93d3c39d5c..7416fc206b4a0e3f17be821e932d9fd840c03079 100644 (file)
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -12,8 +12,10 @@
  #include <linux/utsname.h>
  #include <linux/cpu.h>
  #include <linux/module.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
  
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
  #include <asm/cmdline.h>
  #include <asm/bugs.h>
  #include <asm/processor.h>
@@ -27,6 +29,27 @@
  #include <asm/intel-family.h>
  
  static void __init spectre_v2_select_mitigation(void);
+static void __init ssb_select_mitigation(void);
+
+/*
+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+ * writes to SPEC_CTRL contain whatever reserved bits have been set.
+ */
+u64 __ro_after_init x86_spec_ctrl_base;
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
+
+/*
+ * The vendor and possibly platform specific bits which can be modified in
+ * x86_spec_ctrl_base.
+ */
+static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+
+/*
+ * AMD specific MSR info for Speculative Store Bypass control.
+ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
+ */
+u64 __ro_after_init x86_amd_ls_cfg_base;
+u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
  
  void __init check_bugs(void)
  {
@@ -37,9 +60,27 @@ void __init check_bugs(void)
                 print_cpu_info(&boot_cpu_data);
         }
  
+       /*
+        * Read the SPEC_CTRL MSR to account for reserved bits which may
+        * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
+        * init code as it is not enumerated and depends on the family.
+        */
+       if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+               rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+       /* Allow STIBP in MSR_SPEC_CTRL if supported */
+       if (boot_cpu_has(X86_FEATURE_STIBP))
+               x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+
         /* Select the proper spectre mitigation before patching alternatives */
         spectre_v2_select_mitigation();
  
+       /*
+        * Select proper mitigation for any exposure to the Speculative Store
+        * Bypass vulnerability.
+        */
+       ssb_select_mitigation();
+
  #ifdef CONFIG_X86_32
         /*
          * Check whether we are able to run this kernel safely on SMP.
@@ -93,7 +134,76 @@ static const char *spectre_v2_strings[] = {
  #undef pr_fmt
  #define pr_fmt(fmt)     "Spectre V2 : " fmt
  
-static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+       SPECTRE_V2_NONE;
+
+void
+x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+{
+       u64 msrval, guestval, hostval = x86_spec_ctrl_base;
+       struct thread_info *ti = current_thread_info();
+
+       /* Is MSR_SPEC_CTRL implemented ? */
+       if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+               /*
+                * Restrict guest_spec_ctrl to supported values. Clear the
+                * modifiable bits in the host base value and or the
+                * modifiable bits from the guest value.
+                */
+               guestval = hostval & ~x86_spec_ctrl_mask;
+               guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+
+               /* SSBD controlled in MSR_SPEC_CTRL */
+               if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
+                       hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+
+               if (hostval != guestval) {
+                       msrval = setguest ? guestval : hostval;
+                       wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+               }
+       }
+
+       /*
+        * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
+        * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
+        */
+       if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
+           !static_cpu_has(X86_FEATURE_VIRT_SSBD))
+               return;
+
+       /*
+        * If the host has SSBD mitigation enabled, force it in the host's
+        * virtual MSR value. If its not permanently enabled, evaluate
+        * current's TIF_SSBD thread flag.
+        */
+       if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
+               hostval = SPEC_CTRL_SSBD;
+       else
+               hostval = ssbd_tif_to_spec_ctrl(ti->flags);
+
+       /* Sanitize the guest value */
+       guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;
+
+       if (hostval != guestval) {
+               unsigned long tif;
+
+               tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
+                                ssbd_spec_ctrl_to_tif(hostval);
+
+               speculative_store_bypass_update(tif);
+       }
+}
+EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
+
+static void x86_amd_ssb_disable(void)
+{
+       u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
+
+       if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+               wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
+       else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+               wrmsrl(MSR_AMD64_LS_CFG, msrval);
+}
  
  #ifdef RETPOLINE
  static bool spectre_v2_bad_module;
@@ -312,32 +422,289 @@ retpoline_auto:
  }
  
  #undef pr_fmt
+#define pr_fmt(fmt)    "Speculative Store Bypass: " fmt
+
+static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
+
+/* The kernel command line selection */
+enum ssb_mitigation_cmd {
+       SPEC_STORE_BYPASS_CMD_NONE,
+       SPEC_STORE_BYPASS_CMD_AUTO,
+       SPEC_STORE_BYPASS_CMD_ON,
+       SPEC_STORE_BYPASS_CMD_PRCTL,
+       SPEC_STORE_BYPASS_CMD_SECCOMP,
+};
+
+static const char *ssb_strings[] = {
+       [SPEC_STORE_BYPASS_NONE]        = "Vulnerable",
+       [SPEC_STORE_BYPASS_DISABLE]     = "Mitigation: Speculative Store Bypass disabled",
+       [SPEC_STORE_BYPASS_PRCTL]       = "Mitigation: Speculative Store Bypass disabled via prctl",
+       [SPEC_STORE_BYPASS_SECCOMP]     = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
+};
+
+static const struct {
+       const char *option;
+       enum ssb_mitigation_cmd cmd;
+} ssb_mitigation_options[] = {
+       { "auto",       SPEC_STORE_BYPASS_CMD_AUTO },    /* Platform decides */
+       { "on",         SPEC_STORE_BYPASS_CMD_ON },      /* Disable Speculative Store Bypass */
+       { "off",        SPEC_STORE_BYPASS_CMD_NONE },    /* Don't touch Speculative Store Bypass */
+       { "prctl",      SPEC_STORE_BYPASS_CMD_PRCTL },   /* Disable Speculative Store Bypass via prctl */
+       { "seccomp",    SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
+};
+
+static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
+{
+       enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
+       char arg[20];
+       int ret, i;
+
+       if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+               return SPEC_STORE_BYPASS_CMD_NONE;
+       } else {
+               ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
+                                         arg, sizeof(arg));
+               if (ret < 0)
+                       return SPEC_STORE_BYPASS_CMD_AUTO;
+
+               for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
+                       if (!match_option(arg, ret, ssb_mitigation_options[i].option))
+                               continue;
+
+                       cmd = ssb_mitigation_options[i].cmd;
+                       break;
+               }
+
+               if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
+                       pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+                       return SPEC_STORE_BYPASS_CMD_AUTO;
+               }
+       }
+
+       return cmd;
+}
+
+static enum ssb_mitigation __init __ssb_select_mitigation(void)
+{
+       enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
+       enum ssb_mitigation_cmd cmd;
+
+       if (!boot_cpu_has(X86_FEATURE_SSBD))
+               return mode;
+
+       cmd = ssb_parse_cmdline();
+       if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
+           (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
+            cmd == SPEC_STORE_BYPASS_CMD_AUTO))
+               return mode;
+
+       switch (cmd) {
+       case SPEC_STORE_BYPASS_CMD_AUTO:
+       case SPEC_STORE_BYPASS_CMD_SECCOMP:
+               /*
+                * Choose prctl+seccomp as the default mode if seccomp is
+                * enabled.
+                */
+               if (IS_ENABLED(CONFIG_SECCOMP))
+                       mode = SPEC_STORE_BYPASS_SECCOMP;
+               else
+                       mode = SPEC_STORE_BYPASS_PRCTL;
+               break;
+       case SPEC_STORE_BYPASS_CMD_ON:
+               mode = SPEC_STORE_BYPASS_DISABLE;
+               break;
+       case SPEC_STORE_BYPASS_CMD_PRCTL:
+               mode = SPEC_STORE_BYPASS_PRCTL;
+               break;
+       case SPEC_STORE_BYPASS_CMD_NONE:
+               break;
+       }
+
+       /*
+        * We have three CPU feature flags that are in play here:
+        *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+        *  - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
+        *  - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
+        */
+       if (mode == SPEC_STORE_BYPASS_DISABLE) {
+               setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+               /*
+                * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
+                * a completely different MSR and bit dependent on family.
+                */
+               switch (boot_cpu_data.x86_vendor) {
+               case X86_VENDOR_INTEL:
+                       x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+                       x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+                       wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+                       break;
+               case X86_VENDOR_AMD:
+                       x86_amd_ssb_disable();
+                       break;
+               }
+       }
+
+       return mode;
+}
+
+static void ssb_select_mitigation(void)
+{
+       ssb_mode = __ssb_select_mitigation();
+
+       if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+               pr_info("%s\n", ssb_strings[ssb_mode]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Speculation prctl: " fmt
+
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+       bool update;
+
+       if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
+           ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
+               return -ENXIO;
+
+       switch (ctrl) {
+       case PR_SPEC_ENABLE:
+               /* If speculation is force disabled, enable is not allowed */
+               if (task_spec_ssb_force_disable(task))
+                       return -EPERM;
+               task_clear_spec_ssb_disable(task);
+               update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
+               break;
+       case PR_SPEC_DISABLE:
+               task_set_spec_ssb_disable(task);
+               update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+               break;
+       case PR_SPEC_FORCE_DISABLE:
+               task_set_spec_ssb_disable(task);
+               task_set_spec_ssb_force_disable(task);
+               update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       /*
+        * If being set on non-current task, delay setting the CPU
+        * mitigation until it is next scheduled.
+        */
+       if (task == current && update)
+               speculative_store_bypass_update_current();
+
+       return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+                            unsigned long ctrl)
+{
+       switch (which) {
+       case PR_SPEC_STORE_BYPASS:
+               return ssb_prctl_set(task, ctrl);
+       default:
+               return -ENODEV;
+       }
+}
+
+#ifdef CONFIG_SECCOMP
+void arch_seccomp_spec_mitigate(struct task_struct *task)
+{
+       if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
+               ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+}
+#endif
+
+static int ssb_prctl_get(struct task_struct *task)
+{
+       switch (ssb_mode) {
+       case SPEC_STORE_BYPASS_DISABLE:
+               return PR_SPEC_DISABLE;
+       case SPEC_STORE_BYPASS_SECCOMP:
+       case SPEC_STORE_BYPASS_PRCTL:
+               if (task_spec_ssb_force_disable(task))
+                       return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+               if (task_spec_ssb_disable(task))
+                       return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+               return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+       default:
+               if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+                       return PR_SPEC_ENABLE;
+               return PR_SPEC_NOT_AFFECTED;
+       }
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+       switch (which) {
+       case PR_SPEC_STORE_BYPASS:
+               return ssb_prctl_get(task);
+       default:
+               return -ENODEV;
+       }
+}
+
+void x86_spec_ctrl_setup_ap(void)
+{
+       if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+               wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+       if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+               x86_amd_ssb_disable();
+}
  
  #ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+
+static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+                              char *buf, unsigned int bug)
  {
-       if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+       if (!boot_cpu_has_bug(bug))
                 return sprintf(buf, "Not affected\n");
-       if (boot_cpu_has(X86_FEATURE_PTI))
-               return sprintf(buf, "Mitigation: PTI\n");
+
+       switch (bug) {
+       case X86_BUG_CPU_MELTDOWN:
+               if (boot_cpu_has(X86_FEATURE_PTI))
+                       return sprintf(buf, "Mitigation: PTI\n");
+
+               break;
+
+       case X86_BUG_SPECTRE_V1:
+               return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+
+       case X86_BUG_SPECTRE_V2:
+               return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+                              boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+                              boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+                              spectre_v2_module_string());
+
+       case X86_BUG_SPEC_STORE_BYPASS:
+               return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+
+       default:
+               break;
+       }
+
         return sprintf(buf, "Vulnerable\n");
  }
  
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
+}
+
  ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
  {
-       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
-               return sprintf(buf, "Not affected\n");
-       return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+       return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
  }
  
  ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
  {
-       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
-               return sprintf(buf, "Not affected\n");
+       return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
+}
  
-       return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
-                      boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
-                      boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
-                      spectre_v2_module_string());
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
  }
  #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index ce243f7d2d4e0879c9cbff297ff112e236a6802c..38276f58d3bfe2ce17d51450360be69c5ab70e43 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -757,17 +757,32 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
          * and they also have a different bit for STIBP support. Also,
          * a hypervisor might have set the individual AMD bits even on
          * Intel CPUs, for finer-grained selection of what's available.
-        *
-        * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
-        * features, which are visible in /proc/cpuinfo and used by the
-        * kernel. So set those accordingly from the Intel bits.
          */
         if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
                 set_cpu_cap(c, X86_FEATURE_IBRS);
                 set_cpu_cap(c, X86_FEATURE_IBPB);
+               set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
         }
+
         if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
                 set_cpu_cap(c, X86_FEATURE_STIBP);
+
+       if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) ||
+           cpu_has(c, X86_FEATURE_VIRT_SSBD))
+               set_cpu_cap(c, X86_FEATURE_SSBD);
+
+       if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
+               set_cpu_cap(c, X86_FEATURE_IBRS);
+               set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+       }
+
+       if (cpu_has(c, X86_FEATURE_AMD_IBPB))
+               set_cpu_cap(c, X86_FEATURE_IBPB);
+
+       if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
+               set_cpu_cap(c, X86_FEATURE_STIBP);
+               set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+       }
  }
  
  void get_cpu_cap(struct cpuinfo_x86 *c)
@@ -927,21 +942,47 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
         {}
  };
  
-static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+/* Only list CPUs which speculate but are non susceptible to SSB */
+static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
+       { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_SILVERMONT1     },
+       { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_AIRMONT         },
+       { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_SILVERMONT2     },
+       { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_MERRIFIELD      },
+       { X86_VENDOR_INTEL,     6,      INTEL_FAM6_CORE_YONAH           },
+       { X86_VENDOR_INTEL,     6,      INTEL_FAM6_XEON_PHI_KNL         },
+       { X86_VENDOR_INTEL,     6,      INTEL_FAM6_XEON_PHI_KNM         },
+       { X86_VENDOR_AMD,       0x12,                                   },
+       { X86_VENDOR_AMD,       0x11,                                   },
+       { X86_VENDOR_AMD,       0x10,                                   },
+       { X86_VENDOR_AMD,       0xf,                                    },
+       {}
+};
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
  {
         u64 ia32_cap = 0;
  
-       if (x86_match_cpu(cpu_no_meltdown))
-               return false;
+       if (x86_match_cpu(cpu_no_speculation))
+               return;
+
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
  
         if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
                 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
  
+       if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
+          !(ia32_cap & ARCH_CAP_SSB_NO))
+               setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+
+       if (x86_match_cpu(cpu_no_meltdown))
+               return;
+
         /* Rogue Data Cache Load? No! */
         if (ia32_cap & ARCH_CAP_RDCL_NO)
-               return false;
+               return;
  
-       return true;
+       setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
  }
  
  /*
@@ -992,12 +1033,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
  
         setup_force_cpu_cap(X86_FEATURE_ALWAYS);
  
-       if (!x86_match_cpu(cpu_no_speculation)) {
-               if (cpu_vulnerable_to_meltdown(c))
-                       setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
-               setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
-               setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
-       }
+       cpu_set_bug_bits(c);
  
         fpu__init_system(c);
  
@@ -1359,6 +1395,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
  #endif
         mtrr_ap_init();
         validate_apic_and_package_id(c);
+       x86_spec_ctrl_setup_ap();
  }
  
  static __init int setup_noclflush(char *arg)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h

index e806b11a99af4c72c5868731c7a8555cfb3957d9..37672d299e357430f2d16941905e352e9e89f648 100644 (file)
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -50,4 +50,6 @@ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
  
  unsigned int aperfmperf_get_khz(int cpu);
  
+extern void x86_spec_ctrl_setup_ap(void);
+
  #endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c

index 60d1897041da89c19b97f03ca2c577102ec3186c..577e7f7ae2733f293107f18b0673ed233850c914 100644 (file)
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -188,7 +188,10 @@ static void early_init_intel(struct cpuinfo_x86 *c)
                 setup_clear_cpu_cap(X86_FEATURE_IBPB);
                 setup_clear_cpu_cap(X86_FEATURE_STIBP);
                 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+               setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
                 setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+               setup_clear_cpu_cap(X86_FEATURE_SSBD);
+               setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
         }
  
         /*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c

index f7666eef4a879b8e4b4da6a33ad941bc371babd4..c8e038800591674021c84bc66cae2fe4f02fcfc9 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = {
         [SMCA_SMU]      = { "smu",              "System Management Unit" },
  };
  
+static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
+{
+       [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
+};
+
  const char *smca_get_name(enum smca_bank_types t)
  {
         if (t >= N_SMCA_BANK_TYPES)
@@ -443,20 +448,26 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
         if (!block)
                 return MSR_AMD64_SMCA_MCx_MISC(bank);
  
+       /* Check our cache first: */
+       if (smca_bank_addrs[bank][block] != -1)
+               return smca_bank_addrs[bank][block];
+
         /*
          * For SMCA enabled processors, BLKPTR field of the first MISC register
          * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
          */
         if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
-               return addr;
+               goto out;
  
         if (!(low & MCI_CONFIG_MCAX))
-               return addr;
+               goto out;
  
         if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
             (low & MASK_BLKPTR_LO))
-               return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+               addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
  
+out:
+       smca_bank_addrs[bank][block] = addr;
         return addr;
  }
  
@@ -468,18 +479,6 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
         if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
                 return addr;
  
-       /* Get address from already initialized block. */
-       if (per_cpu(threshold_banks, cpu)) {
-               struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
-
-               if (bankp && bankp->blocks) {
-                       struct threshold_block *blockp = &bankp->blocks[block];
-
-                       if (blockp)
-                               return blockp->address;
-               }
-       }
-
         if (mce_flags.smca)
                 return smca_get_block_address(cpu, bank, block);
  
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c

index 0c408f8c4ed465722ed2411a25e451a439f4fcfa..2d29e47c056ea17d7c24103f41187b494a611da7 100644 (file)
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr)
  }
  #endif
  
+/* Code in __startup_64() can be relocated during execution, but the compiler
+ * doesn't have to generate PC-relative relocations when accessing globals from
+ * that function. Clang actually does not generate them, which leads to
+ * boot-time crashes. To work around this problem, every global pointer must
+ * be adjusted using fixup_pointer().
+ */
  unsigned long __head __startup_64(unsigned long physaddr,
                                   struct boot_params *bp)
  {
@@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
         p4dval_t *p4d;
         pudval_t *pud;
         pmdval_t *pmd, pmd_entry;
+       pteval_t *mask_ptr;
         bool la57;
         int i;
         unsigned int *next_pgt_ptr;
@@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
  
         pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
         /* Filter out unsupported __PAGE_KERNEL_* bits: */
-       pmd_entry &= __supported_pte_mask;
+       mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
+       pmd_entry &= *mask_ptr;
         pmd_entry += sme_get_me_mask();
         pmd_entry +=  physaddr;
  
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c

index 0715f827607c4a2742e140f8d9a656ed4514d226..6f4d42377fe520c52a1c67ea0f25b27fc3eae2e1 100644 (file)
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -370,6 +370,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
         if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
                 return 0;
  
+       /* We should not singlestep on the exception masking instructions */
+       if (insn_masking_exception(insn))
+               return 0;
+
  #ifdef CONFIG_X86_64
         /* Only x86_64 has RIP relative instructions */
         if (insn_rip_relative(insn)) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c

index 7867417cfaff2b59ee2c2531072a632a9e810f41..5b2300b818af9333f8d57f6b082f426b8556b606 100644 (file)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void)
  static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
  {
         native_smp_prepare_cpus(max_cpus);
-       if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+       if (kvm_para_has_hint(KVM_HINTS_REALTIME))
                 static_branch_disable(&virt_spin_lock_key);
  }
  
@@ -553,7 +553,7 @@ static void __init kvm_guest_init(void)
         }
  
         if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
-           !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+           !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
             kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
                 pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
  
@@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void)
         int cpu;
  
         if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
-           !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+           !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
             kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
                 for_each_possible_cpu(cpu) {
                         zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
@@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void)
         if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
                 return;
  
-       if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+       if (kvm_para_has_hint(KVM_HINTS_REALTIME))
                 return;
  
         __pv_init_lock_hash();
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c

index 60cdec6628b0d33a9ef87abb6a575a17f2263718..d1ab07ec8c9aca2090f42153efd1b6ad93d68ffe 100644 (file)
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -57,12 +57,17 @@ static void load_segments(void)
  static void machine_kexec_free_page_tables(struct kimage *image)
  {
         free_page((unsigned long)image->arch.pgd);
+       image->arch.pgd = NULL;
  #ifdef CONFIG_X86_PAE
         free_page((unsigned long)image->arch.pmd0);
+       image->arch.pmd0 = NULL;
         free_page((unsigned long)image->arch.pmd1);
+       image->arch.pmd1 = NULL;
  #endif
         free_page((unsigned long)image->arch.pte0);
+       image->arch.pte0 = NULL;
         free_page((unsigned long)image->arch.pte1);
+       image->arch.pte1 = NULL;
  }
  
  static int machine_kexec_alloc_page_tables(struct kimage *image)
@@ -79,7 +84,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image)
             !image->arch.pmd0 || !image->arch.pmd1 ||
  #endif
             !image->arch.pte0 || !image->arch.pte1) {
-               machine_kexec_free_page_tables(image);
                 return -ENOMEM;
         }
         return 0;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c

index a5e55d832d0a4d25309766057ee86ee9f1d282da..6010449ca6d2951197c533e35c50a36a2d27f99b 100644 (file)
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -39,9 +39,13 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
  static void free_transition_pgtable(struct kimage *image)
  {
         free_page((unsigned long)image->arch.p4d);
+       image->arch.p4d = NULL;
         free_page((unsigned long)image->arch.pud);
+       image->arch.pud = NULL;
         free_page((unsigned long)image->arch.pmd);
+       image->arch.pmd = NULL;
         free_page((unsigned long)image->arch.pte);
+       image->arch.pte = NULL;
  }
  
  static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
@@ -91,7 +95,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
         set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
         return 0;
  err:
-       free_transition_pgtable(image);
         return result;
  }
  
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c

index 03408b942adbad2bd1bca42b89e559e2d198d9da..30ca2d1a92319726ff31d3ddb8264140bcec17cf 100644 (file)
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -38,6 +38,7 @@
  #include <asm/switch_to.h>
  #include <asm/desc.h>
  #include <asm/prctl.h>
+#include <asm/spec-ctrl.h>
  
  /*
   * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -278,6 +279,148 @@ static inline void switch_to_bitmap(struct tss_struct *tss,
         }
  }
  
+#ifdef CONFIG_SMP
+
+struct ssb_state {
+       struct ssb_state        *shared_state;
+       raw_spinlock_t          lock;
+       unsigned int            disable_state;
+       unsigned long           local_state;
+};
+
+#define LSTATE_SSB     0
+
+static DEFINE_PER_CPU(struct ssb_state, ssb_state);
+
+void speculative_store_bypass_ht_init(void)
+{
+       struct ssb_state *st = this_cpu_ptr(&ssb_state);
+       unsigned int this_cpu = smp_processor_id();
+       unsigned int cpu;
+
+       st->local_state = 0;
+
+       /*
+        * Shared state setup happens once on the first bringup
+        * of the CPU. It's not destroyed on CPU hotunplug.
+        */
+       if (st->shared_state)
+               return;
+
+       raw_spin_lock_init(&st->lock);
+
+       /*
+        * Go over HT siblings and check whether one of them has set up the
+        * shared state pointer already.
+        */
+       for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
+               if (cpu == this_cpu)
+                       continue;
+
+               if (!per_cpu(ssb_state, cpu).shared_state)
+                       continue;
+
+               /* Link it to the state of the sibling: */
+               st->shared_state = per_cpu(ssb_state, cpu).shared_state;
+               return;
+       }
+
+       /*
+        * First HT sibling to come up on the core.  Link shared state of
+        * the first HT sibling to itself. The siblings on the same core
+        * which come up later will see the shared state pointer and link
+        * themself to the state of this CPU.
+        */
+       st->shared_state = st;
+}
+
+/*
+ * Logic is: First HT sibling enables SSBD for both siblings in the core
+ * and last sibling to disable it, disables it for the whole core. This how
+ * MSR_SPEC_CTRL works in "hardware":
+ *
+ *  CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
+ */
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+       struct ssb_state *st = this_cpu_ptr(&ssb_state);
+       u64 msr = x86_amd_ls_cfg_base;
+
+       if (!static_cpu_has(X86_FEATURE_ZEN)) {
+               msr |= ssbd_tif_to_amd_ls_cfg(tifn);
+               wrmsrl(MSR_AMD64_LS_CFG, msr);
+               return;
+       }
+
+       if (tifn & _TIF_SSBD) {
+               /*
+                * Since this can race with prctl(), block reentry on the
+                * same CPU.
+                */
+               if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
+                       return;
+
+               msr |= x86_amd_ls_cfg_ssbd_mask;
+
+               raw_spin_lock(&st->shared_state->lock);
+               /* First sibling enables SSBD: */
+               if (!st->shared_state->disable_state)
+                       wrmsrl(MSR_AMD64_LS_CFG, msr);
+               st->shared_state->disable_state++;
+               raw_spin_unlock(&st->shared_state->lock);
+       } else {
+               if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
+                       return;
+
+               raw_spin_lock(&st->shared_state->lock);
+               st->shared_state->disable_state--;
+               if (!st->shared_state->disable_state)
+                       wrmsrl(MSR_AMD64_LS_CFG, msr);
+               raw_spin_unlock(&st->shared_state->lock);
+       }
+}
+#else
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+       u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
+
+       wrmsrl(MSR_AMD64_LS_CFG, msr);
+}
+#endif
+
+static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
+{
+       /*
+        * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
+        * so ssbd_tif_to_spec_ctrl() just works.
+        */
+       wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
+}
+
+static __always_inline void intel_set_ssb_state(unsigned long tifn)
+{
+       u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
+
+       wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+}
+
+static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+{
+       if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
+               amd_set_ssb_virt_state(tifn);
+       else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+               amd_set_core_ssb_state(tifn);
+       else
+               intel_set_ssb_state(tifn);
+}
+
+void speculative_store_bypass_update(unsigned long tif)
+{
+       preempt_disable();
+       __speculative_store_bypass_update(tif);
+       preempt_enable();
+}
+
  void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
                       struct tss_struct *tss)
  {
@@ -309,6 +452,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  
         if ((tifp ^ tifn) & _TIF_NOCPUID)
                 set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
+
+       if ((tifp ^ tifn) & _TIF_SSBD)
+               __speculative_store_bypass_update(tifn);
  }
  
  /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c

index 4b100fe0f5087f3f1a2364c9ffd48e5c1327ab6d..12bb445fb98d6618013be3b78a07aee02ac4d01a 100644 (file)
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -542,6 +542,7 @@ void set_personality_64bit(void)
         clear_thread_flag(TIF_X32);
         /* Pretend that this comes from a 64bit execve */
         task_pt_regs(current)->orig_ax = __NR_execve;
+       current_thread_info()->status &= ~TS_COMPAT;
  
         /* Ensure the corresponding mm is not marked. */
         if (current->mm)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c

index 0f1cbb042f49b82e7a01b7ace0bce08061a8e6f1..9dd324ae4832914e5368911da65fc29cbce7f14e 100644 (file)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -79,6 +79,7 @@
  #include <asm/qspinlock.h>
  #include <asm/intel-family.h>
  #include <asm/cpu_device_id.h>
+#include <asm/spec-ctrl.h>
  
  /* Number of siblings per CPU package */
  int smp_num_siblings = 1;
@@ -244,6 +245,8 @@ static void notrace start_secondary(void *unused)
          */
         check_tsc_sync_target();
  
+       speculative_store_bypass_ht_init();
+
         /*
          * Lock vector_lock, set CPU online and bring the vector
          * allocator online. Online must be set with vector_lock held
@@ -1292,6 +1295,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
         set_mtrr_aps_delayed_init();
  
         smp_quirk_init_udelay();
+
+       speculative_store_bypass_ht_init();
  }
  
  void arch_enable_nonboot_cpus_begin(void)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c

index 85c7ef23d99f7f9b7373e54a6af423aab38ed445..c84bb539695828328f728ffc42aecf0a23f0c06a 100644 (file)
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -299,6 +299,10 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
         if (is_prefix_bad(insn))
                 return -ENOTSUPP;
  
+       /* We should not singlestep on the exception masking instructions */
+       if (insn_masking_exception(insn))
+               return -ENOTSUPP;
+
         if (x86_64)
                 good_insns = good_insns_64;
         else
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c

index 82055b90a8b31480e50eb6fe5ee57febc641d32c..92bf2f2e7cdd241d1b4019c0ed72ce72fbb95d11 100644 (file)
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -379,7 +379,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
  
         /* cpuid 0x80000008.ebx */
         const u32 kvm_cpuid_8000_0008_ebx_x86_features =
-               F(IBPB) | F(IBRS);
+               F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
  
         /* cpuid 0xC0000001.edx */
         const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -408,7 +408,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
         /* cpuid 7.0.edx*/
         const u32 kvm_cpuid_7_0_edx_x86_features =
                 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
-               F(ARCH_CAPABILITIES);
+               F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
  
         /* all calls to cpuid_count() should be made on the same cpu */
         get_cpu();
@@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                                 entry->ecx &= ~F(PKU);
                         entry->edx &= kvm_cpuid_7_0_edx_x86_features;
                         cpuid_mask(&entry->edx, CPUID_7_EDX);
+                       /*
+                        * We emulate ARCH_CAPABILITIES in software even
+                        * if the host doesn't support it.
+                        */
+                       entry->edx |= F(ARCH_CAPABILITIES);
                 } else {
                         entry->ebx = 0;
                         entry->ecx = 0;
@@ -647,13 +652,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                         g_phys_as = phys_as;
                 entry->eax = g_phys_as | (virt_as << 8);
                 entry->edx = 0;
-               /* IBRS and IBPB aren't necessarily present in hardware cpuid */
-               if (boot_cpu_has(X86_FEATURE_IBPB))
-                       entry->ebx |= F(IBPB);
-               if (boot_cpu_has(X86_FEATURE_IBRS))
-                       entry->ebx |= F(IBRS);
+               /*
+                * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
+                * hardware cpuid
+                */
+               if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
+                       entry->ebx |= F(AMD_IBPB);
+               if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
+                       entry->ebx |= F(AMD_IBRS);
+               if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+                       entry->ebx |= F(VIRT_SSBD);
                 entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
                 cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+               if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+                       entry->ebx |= F(VIRT_SSBD);
                 break;
         }
         case 0x80000019:
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c

index 98618e397342297cdff01a94975147ee543419c0..46ff64da44cab46d637facafffb10ba1a1269a1f 100644 (file)
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1260,12 +1260,16 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
         }
  }
  
-static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
  {
-       struct kvm_run *run = vcpu->run;
+       kvm_hv_hypercall_set_result(vcpu, result);
+       ++vcpu->stat.hypercalls;
+       return kvm_skip_emulated_instruction(vcpu);
+}
  
-       kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
-       return 1;
+static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+{
+       return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
  }
  
  static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
@@ -1296,8 +1300,10 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
         if (param & ~KVM_HYPERV_CONN_ID_MASK)
                 return HV_STATUS_INVALID_HYPERCALL_INPUT;
  
-       /* conn_to_evt is protected by vcpu->kvm->srcu */
+       /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
+       rcu_read_lock();
         eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
+       rcu_read_unlock();
         if (!eventfd)
                 return HV_STATUS_INVALID_PORT_ID;
  
@@ -1348,7 +1354,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
         /* Hypercall continuation is not supported yet */
         if (rep_cnt || rep_idx) {
                 ret = HV_STATUS_INVALID_HYPERCALL_CODE;
-               goto set_result;
+               goto out;
         }
  
         switch (code) {
@@ -1379,9 +1385,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
                 break;
         }
  
-set_result:
-       kvm_hv_hypercall_set_result(vcpu, ret);
-       return 1;
+out:
+       return kvm_hv_hypercall_complete(vcpu, ret);
  }
  
  void kvm_hv_init_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c

index b74c9c1405b9978331653daa9093e95c36be0252..3773c462511404bcc94ad69742b16fdfdc26a504 100644 (file)
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1522,11 +1522,23 @@ static bool set_target_expiration(struct kvm_lapic *apic)
  
  static void advance_periodic_target_expiration(struct kvm_lapic *apic)
  {
-       apic->lapic_timer.tscdeadline +=
-               nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+       ktime_t now = ktime_get();
+       u64 tscl = rdtsc();
+       ktime_t delta;
+
+       /*
+        * Synchronize both deadlines to the same time source or
+        * differences in the periods (caused by differences in the
+        * underlying clocks or numerical approximation errors) will
+        * cause the two to drift apart over time as the errors
+        * accumulate.
+        */
         apic->lapic_timer.target_expiration =
                 ktime_add_ns(apic->lapic_timer.target_expiration,
                                 apic->lapic_timer.period);
+       delta = ktime_sub(apic->lapic_timer.target_expiration, now);
+       apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+               nsec_to_cycles(apic->vcpu, delta);
  }
  
  static void start_sw_period(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index 1fc05e428aba824e252bc3c2dcb9e1b6f4fb2e69..26110c202b19c44f6b52f4a105646b49ee7a23b5 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,7 +49,7 @@
  #include <asm/debugreg.h>
  #include <asm/kvm_para.h>
  #include <asm/irq_remapping.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
  
  #include <asm/virtext.h>
  #include "trace.h"
@@ -213,6 +213,12 @@ struct vcpu_svm {
         } host;
  
         u64 spec_ctrl;
+       /*
+        * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
+        * translated into the appropriate L2_CFG bits on the host to
+        * perform speculative control.
+        */
+       u64 virt_spec_ctrl;
  
         u32 *msrpm;
  
@@ -2060,6 +2066,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
  
         vcpu->arch.microcode_version = 0x01000065;
         svm->spec_ctrl = 0;
+       svm->virt_spec_ctrl = 0;
  
         if (!init_event) {
                 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
@@ -4108,11 +4115,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 break;
         case MSR_IA32_SPEC_CTRL:
                 if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
                         return 1;
  
                 msr_info->data = svm->spec_ctrl;
                 break;
+       case MSR_AMD64_VIRT_SPEC_CTRL:
+               if (!msr_info->host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+                       return 1;
+
+               msr_info->data = svm->virt_spec_ctrl;
+               break;
         case MSR_F15H_IC_CFG: {
  
                 int family, model;
@@ -4203,7 +4217,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                 break;
         case MSR_IA32_SPEC_CTRL:
                 if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
                         return 1;
  
                 /* The STIBP bit doesn't fault even if it's not advertised */
@@ -4230,7 +4244,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                 break;
         case MSR_IA32_PRED_CMD:
                 if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
+                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
                         return 1;
  
                 if (data & ~PRED_CMD_IBPB)
@@ -4244,6 +4258,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                         break;
                 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
                 break;
+       case MSR_AMD64_VIRT_SPEC_CTRL:
+               if (!msr->host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+                       return 1;
+
+               if (data & ~SPEC_CTRL_SSBD)
+                       return 1;
+
+               svm->virt_spec_ctrl = data;
+               break;
         case MSR_STAR:
                 svm->vmcb->save.star = data;
                 break;
@@ -5557,8 +5581,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
          * is no need to worry about the conditional branch over the wrmsr
          * being speculatively taken.
          */
-       if (svm->spec_ctrl)
-               native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+       x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
  
         asm volatile (
                 "push %%" _ASM_BP "; \n\t"
@@ -5652,6 +5675,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
  #endif
                 );
  
+       /* Eliminate branch target predictions from guest mode */
+       vmexit_fill_RSB();
+
+#ifdef CONFIG_X86_64
+       wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+#else
+       loadsegment(fs, svm->host.fs);
+#ifndef CONFIG_X86_32_LAZY_GS
+       loadsegment(gs, svm->host.gs);
+#endif
+#endif
+
         /*
          * We do not use IBRS in the kernel. If this vCPU has used the
          * SPEC_CTRL MSR it may have left it on; save the value and
@@ -5670,20 +5705,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
         if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
                 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
  
-       if (svm->spec_ctrl)
-               native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-
-       /* Eliminate branch target predictions from guest mode */
-       vmexit_fill_RSB();
-
-#ifdef CONFIG_X86_64
-       wrmsrl(MSR_GS_BASE, svm->host.gs_base);
-#else
-       loadsegment(fs, svm->host.fs);
-#ifndef CONFIG_X86_32_LAZY_GS
-       loadsegment(gs, svm->host.gs);
-#endif
-#endif
+       x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
  
         reload_tss(vcpu);
  
@@ -5786,7 +5808,7 @@ static bool svm_cpu_has_accelerated_tpr(void)
         return false;
  }
  
-static bool svm_has_high_real_mode_segbase(void)
+static bool svm_has_emulated_msr(int index)
  {
         return true;
  }
@@ -7012,7 +7034,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
         .hardware_enable = svm_hardware_enable,
         .hardware_disable = svm_hardware_disable,
         .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
-       .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
+       .has_emulated_msr = svm_has_emulated_msr,
  
         .vcpu_create = svm_create_vcpu,
         .vcpu_free = svm_free_vcpu,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index c7668806163fd5ac493f59b6d9f08823828c7758..40aa29204baf80aee54056dffb69519cc6cb5f89 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,7 +51,7 @@
  #include <asm/apic.h>
  #include <asm/irq_remapping.h>
  #include <asm/mmu_context.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
  #include <asm/mshyperv.h>
  
  #include "trace.h"
@@ -1494,6 +1494,12 @@ static inline bool cpu_has_vmx_vmfunc(void)
                 SECONDARY_EXEC_ENABLE_VMFUNC;
  }
  
+static bool vmx_umip_emulated(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_DESC;
+}
+
  static inline bool report_flexpriority(void)
  {
         return flexpriority_enabled;
@@ -3523,7 +3529,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 return kvm_get_msr_common(vcpu, msr_info);
         case MSR_IA32_SPEC_CTRL:
                 if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
                         return 1;
  
@@ -3642,12 +3647,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 break;
         case MSR_IA32_SPEC_CTRL:
                 if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
                         return 1;
  
                 /* The STIBP bit doesn't fault even if it's not advertised */
-               if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+               if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
                         return 1;
  
                 vmx->spec_ctrl = data;
@@ -3673,7 +3677,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 break;
         case MSR_IA32_PRED_CMD:
                 if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
                         return 1;
  
@@ -4761,14 +4764,16 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
         else
                 hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
  
-       if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
-               vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
-                             SECONDARY_EXEC_DESC);
-               hw_cr4 &= ~X86_CR4_UMIP;
-       } else if (!is_guest_mode(vcpu) ||
-                  !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
-               vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+       if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
+               if (cr4 & X86_CR4_UMIP) {
+                       vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
                                 SECONDARY_EXEC_DESC);
+                       hw_cr4 &= ~X86_CR4_UMIP;
+               } else if (!is_guest_mode(vcpu) ||
+                       !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
+                       vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+                                       SECONDARY_EXEC_DESC);
+       }
  
         if (cr4 & X86_CR4_VMXE) {
                 /*
@@ -9480,9 +9485,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
  }
  STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
  
-static bool vmx_has_high_real_mode_segbase(void)
+static bool vmx_has_emulated_msr(int index)
  {
-       return enable_unrestricted_guest || emulate_invalid_guest_state;
+       switch (index) {
+       case MSR_IA32_SMBASE:
+               /*
+                * We cannot do SMM unless we can run the guest in big
+                * real mode.
+                */
+               return enable_unrestricted_guest || emulate_invalid_guest_state;
+       case MSR_AMD64_VIRT_SPEC_CTRL:
+               /* This is AMD only.  */
+               return false;
+       default:
+               return true;
+       }
  }
  
  static bool vmx_mpx_supported(void)
@@ -9497,12 +9514,6 @@ static bool vmx_xsaves_supported(void)
                 SECONDARY_EXEC_XSAVES;
  }
  
-static bool vmx_umip_emulated(void)
-{
-       return vmcs_config.cpu_based_2nd_exec_ctrl &
-               SECONDARY_EXEC_DESC;
-}
-
  static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
  {
         u32 exit_intr_info;
@@ -9720,8 +9731,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
          * is no need to worry about the conditional branch over the wrmsr
          * being speculatively taken.
          */
-       if (vmx->spec_ctrl)
-               native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+       x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
  
         vmx->__launched = vmx->loaded_vmcs->launched;
  
@@ -9869,8 +9879,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
         if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
                 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
  
-       if (vmx->spec_ctrl)
-               native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+       x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
  
         /* Eliminate branch target predictions from guest mode */
         vmexit_fill_RSB();
@@ -12630,7 +12639,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
         .hardware_enable = hardware_enable,
         .hardware_disable = hardware_disable,
         .cpu_has_accelerated_tpr = report_flexpriority,
-       .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
+       .has_emulated_msr = vmx_has_emulated_msr,
  
         .vm_init = vmx_vm_init,
         .vm_alloc = vmx_vm_alloc,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 51ecd381793b4f281563779b97a04e5063572fdf..71e7cda6d01430bca8ef226238589ab0e830d6c9 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -114,7 +114,7 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
  static bool __read_mostly report_ignored_msrs = true;
  module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
  
-unsigned int min_timer_period_us = 500;
+unsigned int min_timer_period_us = 200;
  module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
  
  static bool __read_mostly kvmclock_periodic_sync = true;
@@ -843,7 +843,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
  int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
  {
  #ifdef CONFIG_X86_64
-       cr3 &= ~CR3_PCID_INVD;
+       bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+
+       if (pcid_enabled)
+               cr3 &= ~CR3_PCID_INVD;
  #endif
  
         if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
@@ -1058,6 +1061,7 @@ static u32 emulated_msrs[] = {
         MSR_SMI_COUNT,
         MSR_PLATFORM_INFO,
         MSR_MISC_FEATURES_ENABLES,
+       MSR_AMD64_VIRT_SPEC_CTRL,
  };
  
  static unsigned num_emulated_msrs;
@@ -2903,7 +2907,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                  * fringe case that is not enabled except via specific settings
                  * of the module parameters.
                  */
-               r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
+               r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
                 break;
         case KVM_CAP_VAPIC:
                 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
@@ -4603,14 +4607,8 @@ static void kvm_init_msr_list(void)
         num_msrs_to_save = j;
  
         for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
-               switch (emulated_msrs[i]) {
-               case MSR_IA32_SMBASE:
-                       if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
-                               continue;
-                       break;
-               default:
-                       break;
-               }
+               if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+                       continue;
  
                 if (j < i)
                         emulated_msrs[j] = emulated_msrs[i];
@@ -6671,9 +6669,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
  {
         unsigned long nr, a0, a1, a2, a3, ret;
-       int op_64_bit, r;
-
-       r = kvm_skip_emulated_instruction(vcpu);
+       int op_64_bit;
  
         if (kvm_hv_hypercall_enabled(vcpu->kvm))
                 return kvm_hv_hypercall(vcpu);
@@ -6721,8 +6717,9 @@ out:
         if (!op_64_bit)
                 ret = (u32)ret;
         kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+
         ++vcpu->stat.hypercalls;
-       return r;
+       return kvm_skip_emulated_instruction(vcpu);
  }
  EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
  
@@ -7979,6 +7976,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
  {
         struct msr_data apic_base_msr;
         int mmu_reset_needed = 0;
+       int cpuid_update_needed = 0;
         int pending_vec, max_bits, idx;
         struct desc_ptr dt;
         int ret = -EINVAL;
@@ -8017,8 +8015,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
         vcpu->arch.cr0 = sregs->cr0;
  
         mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
+       cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
+                               (X86_CR4_OSXSAVE | X86_CR4_PKE));
         kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
-       if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+       if (cpuid_update_needed)
                 kvm_update_cpuid(vcpu);
  
         idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c

index d7bc0eea20a5ed2fc8ec43ebc06429517cbb362b..6e98e0a7c92315c2a819ee396bf78ae3104688c4 100644 (file)
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
          */
         if (pkey != -1)
                 return pkey;
-       /*
-        * Look for a protection-key-drive execute-only mapping
-        * which is now being given permissions that are not
-        * execute-only.  Move it back to the default pkey.
-        */
-       if (vma_is_pkey_exec_only(vma) &&
-           (prot & (PROT_READ|PROT_WRITE))) {
-               return 0;
-       }
+
         /*
          * The mapping is execute-only.  Go try to get the
          * execute-only protection key.  If we fail to do that,
          * fall through as if we do not have execute-only
-        * support.
+        * support in this mm.
          */
         if (prot == PROT_EXEC) {
                 pkey = execute_only_pkey(vma->vm_mm);
                 if (pkey > 0)
                         return pkey;
+       } else if (vma_is_pkey_exec_only(vma)) {
+               /*
+                * Protections are *not* PROT_EXEC, but the mapping
+                * is using the exec-only pkey.  This mapping was
+                * PROT_EXEC and will no longer be.  Move back to
+                * the default pkey.
+                */
+               return ARCH_DEFAULT_PKEY;
         }
+
         /*
          * This is a vanilla, non-pkey mprotect (or we failed to
          * setup execute-only), inherit the pkey from the VMA we
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c

index d33e7dbe3129fc327fe7f24fb0fb84c802b11edf..2d76106788a31c5638459bc35306445e59f2c02d 100644 (file)
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
  }
  EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
  
-static void xen_flush_tlb_all(void)
+static noinline void xen_flush_tlb_all(void)
  {
         struct mmuext_op *op;
         struct multicall_space mcs;
  
-       trace_xen_mmu_flush_tlb_all(0);
-
         preempt_disable();
  
         mcs = xen_mc_entry(sizeof(*op));
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c

index 486c0a34d00b2b75a27467aa46910142c108e373..2c30cabfda90fb94b36c88f5f35aef91d8f119d0 100644 (file)
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1310,13 +1310,11 @@ unsigned long xen_read_cr2_direct(void)
         return this_cpu_read(xen_vcpu_info.arch.cr2);
  }
  
-static void xen_flush_tlb(void)
+static noinline void xen_flush_tlb(void)
  {
         struct mmuext_op *op;
         struct multicall_space mcs;
  
-       trace_xen_mmu_flush_tlb(0);
-
         preempt_disable();
  
         mcs = xen_mc_entry(sizeof(*op));
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h

index 514aaf948ea900a584b0bb9293ac45b9f5759d04..3825df9234803a84cd5bc9ddd6892452eb87160f 100644 (file)
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -56,6 +56,10 @@ acpi_status acpi_ns_initialize_objects(void);
  
  acpi_status acpi_ns_initialize_devices(u32 flags);
  
+acpi_status
+acpi_ns_init_one_package(acpi_handle obj_handle,
+                        u32 level, void *context, void **return_value);
+
  /*
   * nsload -  Namespace loading
   */
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c

index 99d92cb3280372bae6bb92d2077a90071a8cd9cb..f85c6f3271f64fb060fbf5029a4b41ee151389d5 100644 (file)
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -174,6 +174,13 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state,
                 return_ACPI_STATUS(status);
         }
  
+       /* Complete the initialization/resolution of package objects */
+
+       status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT,
+                                       ACPI_UINT32_MAX, 0,
+                                       acpi_ns_init_one_package, NULL, NULL,
+                                       NULL);
+
         /* Parameter Data (optional) */
  
         if (parameter_node) {
@@ -430,6 +437,13 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc,
                 return_ACPI_STATUS(status);
         }
  
+       /* Complete the initialization/resolution of package objects */
+
+       status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT,
+                                       ACPI_UINT32_MAX, 0,
+                                       acpi_ns_init_one_package, NULL, NULL,
+                                       NULL);
+
         /* Store the ddb_handle into the Target operand */
  
         status = acpi_ex_store(ddb_handle, target, walk_state);
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c

index 77f2b5f4948ad3aa9d2567b010291549f1a81789..d77257d1c827b39f2efc0576ac8a08b433881d83 100644 (file)
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -240,6 +240,58 @@ error_exit:
         return_ACPI_STATUS(status);
  }
  
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_init_one_package
+ *
+ * PARAMETERS:  obj_handle      - Node
+ *              level           - Current nesting level
+ *              context         - Not used
+ *              return_value    - Not used
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Callback from acpi_walk_namespace. Invoked for every package
+ *              within the namespace. Used during dynamic load of an SSDT.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ns_init_one_package(acpi_handle obj_handle,
+                        u32 level, void *context, void **return_value)
+{
+       acpi_status status;
+       union acpi_operand_object *obj_desc;
+       struct acpi_namespace_node *node =
+           (struct acpi_namespace_node *)obj_handle;
+
+       obj_desc = acpi_ns_get_attached_object(node);
+       if (!obj_desc) {
+               return (AE_OK);
+       }
+
+       /* Exit if package is already initialized */
+
+       if (obj_desc->package.flags & AOPOBJ_DATA_VALID) {
+               return (AE_OK);
+       }
+
+       status = acpi_ds_get_package_arguments(obj_desc);
+       if (ACPI_FAILURE(status)) {
+               return (AE_OK);
+       }
+
+       status =
+           acpi_ut_walk_package_tree(obj_desc, NULL,
+                                     acpi_ds_init_package_element, NULL);
+       if (ACPI_FAILURE(status)) {
+               return (AE_OK);
+       }
+
+       obj_desc->package.flags |= AOPOBJ_DATA_VALID;
+       return (AE_OK);
+}
+
  /*******************************************************************************
   *
   * FUNCTION:    acpi_ns_init_one_object
@@ -360,27 +412,11 @@ acpi_ns_init_one_object(acpi_handle obj_handle,
  
         case ACPI_TYPE_PACKAGE:
  
-               info->package_init++;
-               status = acpi_ds_get_package_arguments(obj_desc);
-               if (ACPI_FAILURE(status)) {
-                       break;
-               }
-
-               ACPI_DEBUG_PRINT_RAW((ACPI_DB_PARSE,
-                                     "%s: Completing resolution of Package elements\n",
-                                     ACPI_GET_FUNCTION_NAME));
+               /* Complete the initialization/resolution of the package object */
  
-               /*
-                * Resolve all named references in package objects (and all
-                * sub-packages). This action has been deferred until the entire
-                * namespace has been loaded, in order to support external and
-                * forward references from individual package elements (05/2017).
-                */
-               status = acpi_ut_walk_package_tree(obj_desc, NULL,
-                                                  acpi_ds_init_package_element,
-                                                  NULL);
-
-               obj_desc->package.flags |= AOPOBJ_DATA_VALID;
+               info->package_init++;
+               status =
+                   acpi_ns_init_one_package(obj_handle, level, NULL, NULL);
                 break;
  
         default:
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c

index e2235ed3e4beb266f225625093448d6874d083be..b87252bf4571775ba8344376e7a2c985b91e0487 100644 (file)
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1978,19 +1978,8 @@ static ssize_t range_index_show(struct device *dev,
  }
  static DEVICE_ATTR_RO(range_index);
  
-static ssize_t ecc_unit_size_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nd_region *nd_region = to_nd_region(dev);
-       struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
-
-       return sprintf(buf, "%d\n", nfit_spa->clear_err_unit);
-}
-static DEVICE_ATTR_RO(ecc_unit_size);
-
  static struct attribute *acpi_nfit_region_attributes[] = {
         &dev_attr_range_index.attr,
-       &dev_attr_ecc_unit_size.attr,
         NULL,
  };
  
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c

index 6389c88b3500a0bb220b92efa85531cf564a974c..738fb22978ddcd14ad1956c5119972f19b17d2a6 100644 (file)
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -334,6 +334,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
         { PCI_VDEVICE(INTEL, 0x9c07), board_ahci_mobile }, /* Lynx LP RAID */
         { PCI_VDEVICE(INTEL, 0x9c0e), board_ahci_mobile }, /* Lynx LP RAID */
         { PCI_VDEVICE(INTEL, 0x9c0f), board_ahci_mobile }, /* Lynx LP RAID */
+       { PCI_VDEVICE(INTEL, 0x9dd3), board_ahci_mobile }, /* Cannon Lake PCH-LP AHCI */
         { PCI_VDEVICE(INTEL, 0x1f22), board_ahci }, /* Avoton AHCI */
         { PCI_VDEVICE(INTEL, 0x1f23), board_ahci }, /* Avoton AHCI */
         { PCI_VDEVICE(INTEL, 0x1f24), board_ahci }, /* Avoton RAID */
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c

index 68596bd4cf06c2362692f7ded557b669efc7f983..346b163f6e89eacc24cfe0a205fa2f35f7d7cce6 100644 (file)
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4493,6 +4493,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
         /* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */
         { "C300-CTFDDAC128MAG", "0001",         ATA_HORKAGE_NONCQ, },
  
+       /* Some Sandisk SSDs lock up hard with NCQ enabled.  Reported on
+          SD7SN6S256G and SD8SN8U256G */
+       { "SanDisk SD[78]SN*G", NULL,           ATA_HORKAGE_NONCQ, },
+
         /* devices which puke on READ_NATIVE_MAX */
         { "HDS724040KLSA80",    "KFAOA20N",     ATA_HORKAGE_BROKEN_HPA, },
         { "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA },
@@ -4549,13 +4553,16 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
                                                 ATA_HORKAGE_ZERO_AFTER_TRIM |
                                                 ATA_HORKAGE_NOLPM, },
  
-       /* This specific Samsung model/firmware-rev does not handle LPM well */
+       /* These specific Samsung models/firmware-revs do not handle LPM well */
         { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
+       { "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM, },
  
         /* Sandisk devices which are known to not handle LPM well */
         { "SanDisk SD7UB3Q*G1001",      NULL,   ATA_HORKAGE_NOLPM, },
  
         /* devices that don't properly handle queued TRIM commands */
+       { "Micron_M500IT_*",            "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
         { "Micron_M500_*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                 ATA_HORKAGE_ZERO_AFTER_TRIM, },
         { "Crucial_CT*M500*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c

index 2da998baa75cb26714783bcce909332f8ae8e75e..30cc9c877ebb64c3e384f395daee6ebdc750ddef 100644 (file)
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -534,14 +534,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev,
         return sprintf(buf, "Not affected\n");
  }
  
+ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
+                                         struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Not affected\n");
+}
+
  static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
  static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
  static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
  
  static struct attribute *cpu_root_vulnerabilities_attrs[] = {
         &dev_attr_meltdown.attr,
         &dev_attr_spectre_v1.attr,
         &dev_attr_spectre_v2.attr,
+       &dev_attr_spec_store_bypass.attr,
         NULL
  };
  
diff --git a/drivers/base/node.c b/drivers/base/node.c

index 7a3a580821e076741228473138cdae171fbbfaac..a5e821d09656bfae5df645988001ebec48fe0e3e 100644 (file)
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -490,7 +490,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
         return 0;
  }
  
-int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
+int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages,
+                     bool check_nid)
  {
         unsigned long end_pfn = start_pfn + nr_pages;
         unsigned long pfn;
@@ -514,7 +515,7 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
  
                 mem_blk = find_memory_block_hinted(mem_sect, mem_blk);
  
-               ret = register_mem_sect_under_node(mem_blk, nid, true);
+               ret = register_mem_sect_under_node(mem_blk, nid, check_nid);
                 if (!err)
                         err = ret;
  
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c

index 02a497e7c78549a633d1079212da0936033075d1..e5e067091572e02243fde97f710c0a2d07a4d0b4 100644 (file)
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1923,10 +1923,8 @@ static int device_prepare(struct device *dev, pm_message_t state)
  
         dev->power.wakeup_path = false;
  
-       if (dev->power.no_pm_callbacks) {
-               ret = 1;        /* Let device go direct_complete */
+       if (dev->power.no_pm_callbacks)
                 goto unlock;
-       }
  
         if (dev->pm_domain)
                 callback = dev->pm_domain->ops.prepare;
@@ -1960,7 +1958,8 @@ unlock:
          */
         spin_lock_irq(&dev->power.lock);
         dev->power.direct_complete = state.event == PM_EVENT_SUSPEND &&
-               pm_runtime_suspended(dev) && ret > 0 &&
+               ((pm_runtime_suspended(dev) && ret > 0) ||
+                dev->power.no_pm_callbacks) &&
                 !dev_pm_test_driver_flags(dev, DPM_FLAG_NEVER_SKIP);
         spin_unlock_irq(&dev->power.lock);
         return 0;
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c

index f040aba48d5056c6e735ec4851a82f712dfdfa2c..27e9686b6d3a3bb499da4e724ce05d344ab24e86 100644 (file)
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -184,7 +184,7 @@ static void bcma_core_mips_print_irq(struct bcma_device *dev, unsigned int irq)
  {
         int i;
         static const char *irq_name[] = {"2(S)", "3", "4", "5", "6", "D", "I"};
-       char interrupts[20];
+       char interrupts[25];
         char *ints = interrupts;
  
         for (i = 0; i < ARRAY_SIZE(irq_name); i++)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c

index 5d4e31655d9629732c42b02a602ebdc20bec2cda..55cf554bc91451d1ffe0eb0af50079de6b7fe13a 100644 (file)
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1068,6 +1068,7 @@ static int loop_clr_fd(struct loop_device *lo)
         if (bdev) {
                 bdput(bdev);
                 invalidate_bdev(bdev);
+               bdev->bd_inode->i_mapping->wb_err = 0;
         }
         set_capacity(lo->lo_disk, 0);
         loop_sysfs_exit(lo);
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig

index 41492e980ef4c0deb4fd6f62020d76137c912b42..34968a381d0fdc878d98e7fab60762f2f6588ec5 100644 (file)
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -266,15 +266,13 @@ config COMMON_CLK_STM32MP157
           Support for stm32mp157 SoC family clocks
  
  config COMMON_CLK_STM32F
-       bool "Clock driver for stm32f4 and stm32f7 SoC families"
-       depends on MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746
+       def_bool COMMON_CLK && (MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746)
         help
         ---help---
           Support for stm32f4 and stm32f7 SoC families clocks
  
  config COMMON_CLK_STM32H7
-       bool "Clock driver for stm32h7 SoC family"
-       depends on MACH_STM32H743
+       def_bool COMMON_CLK && MACH_STM32H743
         help
         ---help---
           Support for stm32h7 SoC family clocks
diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c

index 114ecbb94ec5ed8d5e364c981d740ecc305d553a..12320118f8deea7dabba7a4d7af975645fb1d48f 100644 (file)
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -464,7 +464,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
         clk_set_rate(clks[IMX6UL_CLK_AHB], 99000000);
  
         /* Change periph_pre clock to pll2_bus to adjust AXI rate to 264MHz */
-       clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_PLL3_USB_OTG]);
+       clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_OSC]);
         clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_CLK2]);
         clk_set_parent(clks[IMX6UL_CLK_PERIPH_PRE], clks[IMX6UL_CLK_PLL2_BUS]);
         clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_PRE]);
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm

index de55c7d57438c8e094a49d2d1d0b9240c20971ab..96b35b8b36060f4af9eed2ac82c46aa5d2ea08be 100644 (file)
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -20,7 +20,7 @@ config ACPI_CPPC_CPUFREQ
  
  config ARM_ARMADA_37XX_CPUFREQ
         tristate "Armada 37xx CPUFreq support"
-       depends on ARCH_MVEBU
+       depends on ARCH_MVEBU && CPUFREQ_DT
         help
           This adds the CPUFreq driver support for Marvell Armada 37xx SoCs.
           The Armada 37xx PMU supports 4 frequency and VDD levels.
diff --git a/drivers/dax/super.c b/drivers/dax/super.c

index 31b8391133996aae5992e6b2195dc07573801719..88e77b7f0c4bbcc9703791d9b04144bf4c67ce4c 100644 (file)
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -86,6 +86,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
  {
         struct block_device *bdev = sb->s_bdev;
         struct dax_device *dax_dev;
+       bool dax_enabled = false;
         pgoff_t pgoff;
         int err, id;
         void *kaddr;
@@ -134,14 +135,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
                  * on being able to do (page_address(pfn_to_page())).
                  */
                 WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
+               dax_enabled = true;
         } else if (pfn_t_devmap(pfn)) {
-               /* pass */;
-       } else {
+               struct dev_pagemap *pgmap;
+
+               pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
+               if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
+                       dax_enabled = true;
+               put_dev_pagemap(pgmap);
+       }
+
+       if (!dax_enabled) {
                 pr_debug("VFS (%s): error: dax support not enabled\n",
                                 sb->s_id);
                 return -EOPNOTSUPP;
         }
-
         return 0;
  }
  EXPORT_SYMBOL_GPL(__bdev_dax_supported);
@@ -182,8 +190,7 @@ static ssize_t write_cache_show(struct device *dev,
         if (!dax_dev)
                 return -ENXIO;
  
-       rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE,
-                               &dax_dev->flags));
+       rc = sprintf(buf, "%d\n", !!dax_write_cache_enabled(dax_dev));
         put_dax(dax_dev);
         return rc;
  }
@@ -201,10 +208,8 @@ static ssize_t write_cache_store(struct device *dev,
  
         if (rc)
                 len = rc;
-       else if (write_cache)
-               set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
         else
-               clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+               dax_write_cache(dax_dev, write_cache);
  
         put_dax(dax_dev);
         return len;
@@ -296,7 +301,7 @@ EXPORT_SYMBOL_GPL(dax_copy_to_iter);
  void arch_wb_cache_pmem(void *addr, size_t size);
  void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
  {
-       if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)))
+       if (unlikely(!dax_write_cache_enabled(dax_dev)))
                 return;
  
         arch_wb_cache_pmem(addr, size);
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c

index d29275b97e8453a1fd7731fe4564bf0a61b9460c..4a828c18099a6e411ff928256992c836f93ebdcb 100644 (file)
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -524,6 +524,14 @@ static int bam_alloc_chan(struct dma_chan *chan)
         return 0;
  }
  
+static int bam_pm_runtime_get_sync(struct device *dev)
+{
+       if (pm_runtime_enabled(dev))
+               return pm_runtime_get_sync(dev);
+
+       return 0;
+}
+
  /**
   * bam_free_chan - Frees dma resources associated with specific channel
   * @chan: specified channel
@@ -539,7 +547,7 @@ static void bam_free_chan(struct dma_chan *chan)
         unsigned long flags;
         int ret;
  
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
         if (ret < 0)
                 return;
  
@@ -720,7 +728,7 @@ static int bam_pause(struct dma_chan *chan)
         unsigned long flag;
         int ret;
  
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
         if (ret < 0)
                 return ret;
  
@@ -746,7 +754,7 @@ static int bam_resume(struct dma_chan *chan)
         unsigned long flag;
         int ret;
  
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
         if (ret < 0)
                 return ret;
  
@@ -852,7 +860,7 @@ static irqreturn_t bam_dma_irq(int irq, void *data)
         if (srcs & P_IRQ)
                 tasklet_schedule(&bdev->task);
  
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
         if (ret < 0)
                 return ret;
  
@@ -969,7 +977,7 @@ static void bam_start_dma(struct bam_chan *bchan)
         if (!vd)
                 return;
  
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
         if (ret < 0)
                 return;
  
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c

index 14b147135a0cbf31be2d5506790bd55fd25f43c9..2455be8cbc4f75b1d8ebe12044d341c23bf85331 100644 (file)
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -778,6 +778,7 @@ scmi_create_protocol_device(struct device_node *np, struct scmi_info *info,
         if (scmi_mbox_chan_setup(info, &sdev->dev, prot_id)) {
                 dev_err(&sdev->dev, "failed to setup transport\n");
                 scmi_device_destroy(sdev);
+               return;
         }
  
         /* setup handle now as the transport is ready */
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c

index b9bd827caa22ca29fe5303bfdf33395831d4128a..1b4d465cc5d9f9f998869f80d3895095ea86c087 100644 (file)
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -97,6 +97,16 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
                 u32 offset = !IS_ENABLED(CONFIG_DEBUG_ALIGN_RODATA) ?
                              (phys_seed >> 32) & mask : TEXT_OFFSET;
  
+               /*
+                * With CONFIG_RANDOMIZE_TEXT_OFFSET=y, TEXT_OFFSET may not
+                * be a multiple of EFI_KIMG_ALIGN, and we must ensure that
+                * we preserve the misalignment of 'offset' relative to
+                * EFI_KIMG_ALIGN so that statically allocated objects whose
+                * alignment exceeds PAGE_SIZE appear correctly aligned in
+                * memory.
+                */
+               offset |= TEXT_OFFSET % EFI_KIMG_ALIGN;
+
                 /*
                  * If KASLR is enabled, and we have some randomness available,
                  * locate the kernel at a randomized offset in physical memory.
diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c

index dfbd894d5bb712d0df01fc37cc99dc199e095de0..4e24e591ae74106b35d7afee11c184961c02c4cd 100644 (file)
--- a/drivers/firmware/qcom_scm-32.c
+++ b/drivers/firmware/qcom_scm-32.c
@@ -147,7 +147,7 @@ static u32 smc(u32 cmd_addr)
                         "smc    #0      @ switch to secure world\n"
                         : "=r" (r0)
                         : "r" (r0), "r" (r1), "r" (r2)
-                       : "r3");
+                       : "r3", "r12");
         } while (r0 == QCOM_SCM_INTERRUPTED);
  
         return r0;
@@ -263,7 +263,7 @@ static s32 qcom_scm_call_atomic1(u32 svc, u32 cmd, u32 arg1)
                         "smc    #0      @ switch to secure world\n"
                         : "=r" (r0)
                         : "r" (r0), "r" (r1), "r" (r2)
-                       : "r3");
+                       : "r3", "r12");
         return r0;
  }
  
@@ -298,7 +298,7 @@ static s32 qcom_scm_call_atomic2(u32 svc, u32 cmd, u32 arg1, u32 arg2)
                         "smc    #0      @ switch to secure world\n"
                         : "=r" (r0)
                         : "r" (r0), "r" (r1), "r" (r2), "r" (r3)
-                       );
+                       : "r12");
         return r0;
  }
  
@@ -328,7 +328,7 @@ u32 qcom_scm_get_version(void)
                         "smc    #0      @ switch to secure world\n"
                         : "=r" (r0), "=r" (r1)
                         : "r" (r0), "r" (r1)
-                       : "r2", "r3");
+                       : "r2", "r3", "r12");
         } while (r0 == QCOM_SCM_INTERRUPTED);
  
         version = r1;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c

index a1b9338736e3be4aafc5522c217f75ce3dcea832..c2c21d839727869ef30a59245b75708d7c6fbcf7 100644 (file)
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -716,7 +716,7 @@ static void remove_compat_control_link(struct drm_device *dev)
         if (!minor)
                 return;
  
-       name = kasprintf(GFP_KERNEL, "controlD%d", minor->index);
+       name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64);
         if (!name)
                 return;
  
diff --git a/drivers/gpu/drm/drm_dumb_buffers.c b/drivers/gpu/drm/drm_dumb_buffers.c

index 39ac15ce47023055f5a2badb7e5a99c59e19b3dd..9e2ae02f31e08fbad87669a126761c015d25db44 100644 (file)
--- a/drivers/gpu/drm/drm_dumb_buffers.c
+++ b/drivers/gpu/drm/drm_dumb_buffers.c
@@ -65,12 +65,13 @@ int drm_mode_create_dumb_ioctl(struct drm_device *dev,
                 return -EINVAL;
  
         /* overflow checks for 32bit size calculations */
-       /* NOTE: DIV_ROUND_UP() can overflow */
+       if (args->bpp > U32_MAX - 8)
+               return -EINVAL;
         cpp = DIV_ROUND_UP(args->bpp, 8);
-       if (!cpp || cpp > 0xffffffffU / args->width)
+       if (cpp > U32_MAX / args->width)
                 return -EINVAL;
         stride = cpp * args->width;
-       if (args->height > 0xffffffffU / stride)
+       if (args->height > U32_MAX / stride)
                 return -EINVAL;
  
         /* test for wrap-around */
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c

index e394799979a6eabce5c91a7a6b87773dd4b45186..6d9b9453707c5af4cbad984c790ce9538daa5d82 100644 (file)
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -212,6 +212,7 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
                 return -ENOMEM;
  
         filp->private_data = priv;
+       filp->f_mode |= FMODE_UNSIGNED_OFFSET;
         priv->filp = filp;
         priv->pid = get_pid(task_pid(current));
         priv->minor = minor;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c

index d596a8302ca3cca529977a10ebc02965855525af..854bd51b9478a59d6b0c18820c85498674842a66 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -778,6 +778,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
                             I915_USERPTR_UNSYNCHRONIZED))
                 return -EINVAL;
  
+       if (!args->user_size)
+               return -EINVAL;
+
         if (offset_in_page(args->user_ptr | args->user_size))
                 return -EINVAL;
  
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index e6a8c0ee7df113704d103eab2b1f7586253970f7..8a69a9275e28d3c87a4443bb950d7ab2d6351a9c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7326,6 +7326,9 @@ enum {
  #define SLICE_ECO_CHICKEN0                     _MMIO(0x7308)
  #define   PIXEL_MASK_CAMMING_DISABLE           (1 << 14)
  
+#define GEN9_WM_CHICKEN3                       _MMIO(0x5588)
+#define   GEN9_FACTOR_IN_CLR_VAL_HIZ           (1 << 9)
+
  /* WaCatErrorRejectionIssue */
  #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG         _MMIO(0x9030)
  #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB      (1<<11)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c

index 4ba139c27fbad43cb81435229e92bab7eba6a1e1..f7c25828d3bbacb743ecd468177cc510a5f16d4c 100644 (file)
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1149,6 +1149,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
  
+       /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
+       if (IS_GEN9_LP(dev_priv))
+               WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
+
         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
         ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
         if (ret)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index e3a5f673ff673d2784bfc2bff7ca14c0a959578e..8704f7f8d0725664d41140dc2a3cd5e35d1aa612 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -884,6 +884,7 @@ static void execlists_submission_tasklet(unsigned long data)
  
                         head = execlists->csb_head;
                         tail = READ_ONCE(buf[write_idx]);
+                       rmb(); /* Hopefully paired with a wmb() in HW */
                 }
                 GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
                           engine->name,
diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c

index 3d2d3bbd1342052b2ddedcc0e95d0b4b84e6880c..155ad840f3c59d6cfae28caaba6a863ceb3880a9 100644 (file)
--- a/drivers/gpu/drm/rcar-du/rcar_lvds.c
+++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c
@@ -88,6 +88,9 @@ static int rcar_lvds_connector_atomic_check(struct drm_connector *connector,
         const struct drm_display_mode *panel_mode;
         struct drm_crtc_state *crtc_state;
  
+       if (!state->crtc)
+               return 0;
+
         if (list_empty(&connector->modes)) {
                 dev_dbg(lvds->dev, "connector: empty modes list\n");
                 return -EINVAL;
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c

index 94b99c90425a488cfaec255b1f2030f9b31242b3..7c95ed5c5cac0a6fc3358b7491f7e4e8e42eb9d2 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -130,6 +130,7 @@ static void vc4_close(struct drm_device *dev, struct drm_file *file)
         struct vc4_file *vc4file = file->driver_priv;
  
         vc4_perfmon_close_file(vc4file);
+       kfree(vc4file);
  }
  
  static const struct vm_operations_struct vc4_vm_ops = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c

index 70e1a8820a7c01644cc1f5016d0a878b88c9dc23..8b770a8e02cdda09caee81be15ebc27434b4b67e 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1278,8 +1278,6 @@ static void vmw_master_drop(struct drm_device *dev,
         dev_priv->active_master = &dev_priv->fbdev_master;
         ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
         ttm_vt_unlock(&dev_priv->fbdev_master.lock);
-
-       vmw_fb_refresh(dev_priv);
  }
  
  /**
@@ -1483,7 +1481,6 @@ static int vmw_pm_freeze(struct device *kdev)
                         vmw_kms_resume(dev);
                 if (dev_priv->enable_fb)
                         vmw_fb_on(dev_priv);
-               vmw_fb_refresh(dev_priv);
                 return -EBUSY;
         }
  
@@ -1523,8 +1520,6 @@ static int vmw_pm_restore(struct device *kdev)
         if (dev_priv->enable_fb)
                 vmw_fb_on(dev_priv);
  
-       vmw_fb_refresh(dev_priv);
-
         return 0;
  }
  
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h

index f34f368c1a2ebc1713f35a7aab0f94c83061cbfa..5fcbe1620d50b34898815be80ec7560115c68997 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -910,7 +910,6 @@ int vmw_fb_init(struct vmw_private *vmw_priv);
  int vmw_fb_close(struct vmw_private *dev_priv);
  int vmw_fb_off(struct vmw_private *vmw_priv);
  int vmw_fb_on(struct vmw_private *vmw_priv);
-void vmw_fb_refresh(struct vmw_private *vmw_priv);
  
  /**
   * Kernel modesetting - vmwgfx_kms.c
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c

index ba0cdb743c3e50d664848c021a0f63bc72953aa2..54e300365a5ccd04b2d5d6173fbb379bde860322 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -866,21 +866,13 @@ int vmw_fb_on(struct vmw_private *vmw_priv)
         spin_lock_irqsave(&par->dirty.lock, flags);
         par->dirty.active = true;
         spin_unlock_irqrestore(&par->dirty.lock, flags);
- 
-       return 0;
-}
  
-/**
- * vmw_fb_refresh - Refresh fb display
- *
- * @vmw_priv: Pointer to device private
- *
- * Call into kms to show the fbdev display(s).
- */
-void vmw_fb_refresh(struct vmw_private *vmw_priv)
-{
-       if (!vmw_priv->fb_info)
-               return;
+       /*
+        * Need to reschedule a dirty update, because otherwise that's
+        * only done in dirty_mark() if the previous coalesced
+        * dirty region was empty.
+        */
+       schedule_delayed_work(&par->local_work, 0);
  
-       vmw_fb_set_par(vmw_priv->fb_info);
+       return 0;
  }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c

index cdff99211602cc44901135f6dc0cdd781e849a21..21d746bdc922bc55ce27d25cd4d5c2ad935d595e 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -329,8 +329,6 @@ int vmw_host_get_guestinfo(const char *guest_info_param,
         struct rpc_channel channel;
         char *msg, *reply = NULL;
         size_t reply_len = 0;
-       int ret = 0;
-
  
         if (!vmw_msg_enabled)
                 return -ENODEV;
@@ -344,15 +342,14 @@ int vmw_host_get_guestinfo(const char *guest_info_param,
                 return -ENOMEM;
         }
  
-       if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) ||
-           vmw_send_msg(&channel, msg) ||
-           vmw_recv_msg(&channel, (void *) &reply, &reply_len) ||
-           vmw_close_channel(&channel)) {
-               DRM_ERROR("Failed to get %s", guest_info_param);
+       if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM))
+               goto out_open;
  
-               ret = -EINVAL;
-       }
+       if (vmw_send_msg(&channel, msg) ||
+           vmw_recv_msg(&channel, (void *) &reply, &reply_len))
+               goto out_msg;
  
+       vmw_close_channel(&channel);
         if (buffer && reply && reply_len > 0) {
                 /* Remove reply code, which are the first 2 characters of
                  * the reply
@@ -369,7 +366,17 @@ int vmw_host_get_guestinfo(const char *guest_info_param,
         kfree(reply);
         kfree(msg);
  
-       return ret;
+       return 0;
+
+out_msg:
+       vmw_close_channel(&channel);
+       kfree(reply);
+out_open:
+       *length = 0;
+       kfree(msg);
+       DRM_ERROR("Failed to get %s", guest_info_param);
+
+       return -EINVAL;
  }
  
  
@@ -400,15 +407,22 @@ int vmw_host_log(const char *log)
                 return -ENOMEM;
         }
  
-       if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) ||
-           vmw_send_msg(&channel, msg) ||
-           vmw_close_channel(&channel)) {
-               DRM_ERROR("Failed to send log\n");
+       if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM))
+               goto out_open;
  
-               ret = -EINVAL;
-       }
+       if (vmw_send_msg(&channel, msg))
+               goto out_msg;
  
+       vmw_close_channel(&channel);
         kfree(msg);
  
-       return ret;
+       return 0;
+
+out_msg:
+       vmw_close_channel(&channel);
+out_open:
+       kfree(msg);
+       DRM_ERROR("Failed to send log\n");
+
+       return -EINVAL;
  }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h

index 557a033fb610f1dfcb3568aceebd5592c97b3c60..8545488aa0cfbe1bf1b1d14514d6794c0077834a 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h
@@ -135,17 +135,24 @@
  
  #else
  
-/* In the 32-bit version of this macro, we use "m" because there is no
- * more register left for bp
+/*
+ * In the 32-bit version of this macro, we store bp in a memory location
+ * because we've ran out of registers.
+ * Now we can't reference that memory location while we've modified
+ * %esp or %ebp, so we first push it on the stack, just before we push
+ * %ebp, and then when we need it we read it from the stack where we
+ * just pushed it.
   */
  #define VMW_PORT_HB_OUT(cmd, in_ecx, in_si, in_di,     \
                         port_num, magic, bp,            \
                         eax, ebx, ecx, edx, si, di)     \
  ({                                                     \
-       asm volatile ("push %%ebp;"                     \
-               "mov %12, %%ebp;"                       \
+       asm volatile ("push %12;"                       \
+               "push %%ebp;"                           \
+               "mov 0x04(%%esp), %%ebp;"               \
                 "rep outsb;"                            \
-               "pop %%ebp;" :                          \
+               "pop %%ebp;"                            \
+               "add $0x04, %%esp;" :                   \
                 "=a"(eax),                              \
                 "=b"(ebx),                              \
                 "=c"(ecx),                              \
@@ -167,10 +174,12 @@
                        port_num, magic, bp,             \
                        eax, ebx, ecx, edx, si, di)      \
  ({                                                     \
-       asm volatile ("push %%ebp;"                     \
-               "mov %12, %%ebp;"                       \
+       asm volatile ("push %12;"                       \
+               "push %%ebp;"                           \
+               "mov 0x04(%%esp), %%ebp;"               \
                 "rep insb;"                             \
-               "pop %%ebp" :                           \
+               "pop %%ebp;"                            \
+               "add $0x04, %%esp;" :                   \
                 "=a"(eax),                              \
                 "=b"(ebx),                              \
                 "=c"(ecx),                              \
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c

index 648f8127f65ae099baf3492031d30cd2c7280a64..3d667e903beb7bd76d13d8048a26a1763e835709 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -482,6 +482,8 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
                 return ret;
         }
  
+       vps->dmabuf_size = size;
+
         /*
          * TTM already thinks the buffer is pinned, but make sure the
          * pin_count is upped.
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig

index f249a442845804d8f22cdab29ec7e4a708874566..6ec307c93ecef937dbd47b1865df009b028c8d49 100644 (file)
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -272,7 +272,7 @@ config SENSORS_K8TEMP
  
  config SENSORS_K10TEMP
         tristate "AMD Family 10h+ temperature sensor"
-       depends on X86 && PCI
+       depends on X86 && PCI && AMD_NB
         help
           If you say yes here you get support for the temperature
           sensor(s) inside your CPU. Supported are later revisions of
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c

index d2cc55e213747963de9cf5e061896032f1609fd5..3b73dee6fdc68ba6aa2619b083fb4c2dffbf063a 100644 (file)
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -23,6 +23,7 @@
  #include <linux/init.h>
  #include <linux/module.h>
  #include <linux/pci.h>
+#include <asm/amd_nb.h>
  #include <asm/processor.h>
  
  MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor");
@@ -40,8 +41,8 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
  #define PCI_DEVICE_ID_AMD_17H_DF_F3    0x1463
  #endif
  
-#ifndef PCI_DEVICE_ID_AMD_17H_RR_NB
-#define PCI_DEVICE_ID_AMD_17H_RR_NB    0x15d0
+#ifndef PCI_DEVICE_ID_AMD_17H_M10H_DF_F3
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3       0x15eb
  #endif
  
  /* CPUID function 0x80000001, ebx */
@@ -63,10 +64,12 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
  #define  NB_CAP_HTC                    0x00000400
  
  /*
- * For F15h M60h, functionality of REG_REPORTED_TEMPERATURE
- * has been moved to D0F0xBC_xD820_0CA4 [Reported Temperature
- * Control]
+ * For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL
+ * and REG_REPORTED_TEMPERATURE have been moved to
+ * D0F0xBC_xD820_0C64 [Hardware Temperature Control]
+ * D0F0xBC_xD820_0CA4 [Reported Temperature Control]
   */
+#define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET    0xd8200c64
  #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET    0xd8200ca4
  
  /* F17h M01h Access througn SMN */
@@ -74,6 +77,7 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
  
  struct k10temp_data {
         struct pci_dev *pdev;
+       void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
         void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
         int temp_offset;
         u32 temp_adjust_mask;
@@ -98,6 +102,11 @@ static const struct tctl_offset tctl_offset_table[] = {
         { 0x17, "AMD Ryzen Threadripper 1910", 10000 },
  };
  
+static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
+{
+       pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
+}
+
  static void read_tempreg_pci(struct pci_dev *pdev, u32 *regval)
  {
         pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval);
@@ -114,6 +123,12 @@ static void amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn,
         mutex_unlock(&nb_smu_ind_mutex);
  }
  
+static void read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval)
+{
+       amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
+                         F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval);
+}
+
  static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
  {
         amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
@@ -122,8 +137,8 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
  
  static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval)
  {
-       amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0x60,
-                         F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
+       amd_smn_read(amd_pci_dev_to_node_id(pdev),
+                    F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
  }
  
  static ssize_t temp1_input_show(struct device *dev,
@@ -160,8 +175,7 @@ static ssize_t show_temp_crit(struct device *dev,
         u32 regval;
         int value;
  
-       pci_read_config_dword(data->pdev,
-                             REG_HARDWARE_THERMAL_CONTROL, &regval);
+       data->read_htcreg(data->pdev, &regval);
         value = ((regval >> 16) & 0x7f) * 500 + 52000;
         if (show_hyst)
                 value -= ((regval >> 24) & 0xf) * 500;
@@ -181,13 +195,18 @@ static umode_t k10temp_is_visible(struct kobject *kobj,
         struct pci_dev *pdev = data->pdev;
  
         if (index >= 2) {
-               u32 reg_caps, reg_htc;
+               u32 reg;
+
+               if (!data->read_htcreg)
+                       return 0;
  
                 pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES,
-                                     &reg_caps);
-               pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL,
-                                     &reg_htc);
-               if (!(reg_caps & NB_CAP_HTC) || !(reg_htc & HTC_ENABLE))
+                                     &reg);
+               if (!(reg & NB_CAP_HTC))
+                       return 0;
+
+               data->read_htcreg(data->pdev, &reg);
+               if (!(reg & HTC_ENABLE))
                         return 0;
         }
         return attr->mode;
@@ -268,11 +287,13 @@ static int k10temp_probe(struct pci_dev *pdev,
  
         if (boot_cpu_data.x86 == 0x15 && (boot_cpu_data.x86_model == 0x60 ||
                                           boot_cpu_data.x86_model == 0x70)) {
+               data->read_htcreg = read_htcreg_nb_f15;
                 data->read_tempreg = read_tempreg_nb_f15;
         } else if (boot_cpu_data.x86 == 0x17) {
                 data->temp_adjust_mask = 0x80000;
                 data->read_tempreg = read_tempreg_nb_f17;
         } else {
+               data->read_htcreg = read_htcreg_pci;
                 data->read_tempreg = read_tempreg_pci;
         }
  
@@ -302,7 +323,7 @@ static const struct pci_device_id k10temp_id_table[] = {
         { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
         { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
         { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
-       { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_RR_NB) },
+       { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
         {}
  };
  MODULE_DEVICE_TABLE(pci, k10temp_id_table);
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c

index fd36c39ddf4e86efd4ae8f12bd9f4ab9df4da860..0cdba29ae0a9ad25212f21a56acf76f46c1b5213 100644 (file)
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -209,7 +209,10 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
         i2c_dw_disable_int(dev);
  
         /* Enable the adapter */
-       __i2c_dw_enable_and_wait(dev, true);
+       __i2c_dw_enable(dev, true);
+
+       /* Dummy read to avoid the register getting stuck on Bay Trail */
+       dw_readl(dev, DW_IC_ENABLE_STATUS);
  
         /* Clear and enable interrupts */
         dw_readl(dev, DW_IC_CLR_INTR);
diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c

index 2aa0e83174c52895a0fb1416e8a17a00d31b24c6..dae8ac618a5221fdd886afab417f88945af7f143 100644 (file)
--- a/drivers/i2c/busses/i2c-pmcmsp.c
+++ b/drivers/i2c/busses/i2c-pmcmsp.c
@@ -564,10 +564,10 @@ static int pmcmsptwi_master_xfer(struct i2c_adapter *adap,
                  * TODO: We could potentially loop and retry in the case
                  * of MSP_TWI_XFER_TIMEOUT.
                  */
-               return -1;
+               return -EIO;
         }
  
-       return 0;
+       return num;
  }
  
  static u32 pmcmsptwi_i2c_func(struct i2c_adapter *adapter)
diff --git a/drivers/i2c/busses/i2c-viperboard.c b/drivers/i2c/busses/i2c-viperboard.c

index e4be86b3de9a28b201bcd197470ab3a3a78ee683..7235c7302bb7cd000db814ec12de03193a22ef01 100644 (file)
--- a/drivers/i2c/busses/i2c-viperboard.c
+++ b/drivers/i2c/busses/i2c-viperboard.c
@@ -337,7 +337,7 @@ static int vprbrd_i2c_xfer(struct i2c_adapter *i2c, struct i2c_msg *msgs,
                 }
                 mutex_unlock(&vb->lock);
         }
-       return 0;
+       return num;
  error:
         mutex_unlock(&vb->lock);
         return error;
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c

index a9126b3cda61bc95f6a9d1282821ab7552484534..7c3b4740b94b644509ae3658fa2eda4885086bba 100644 (file)
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -445,10 +445,17 @@ static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
         msgs[1].buf = buffer;
  
         ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-       if (ret < 0)
-               dev_err(&client->adapter->dev, "i2c read failed\n");
-       else
+       if (ret < 0) {
+               /* Getting a NACK is unfortunately normal with some DSTDs */
+               if (ret == -EREMOTEIO)
+                       dev_dbg(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n",
+                               data_len, client->addr, cmd, ret);
+               else
+                       dev_err(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n",
+                               data_len, client->addr, cmd, ret);
+       } else {
                 memcpy(data, buffer, data_len);
+       }
  
         kfree(buffer);
         return ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c

index 9a4e899d94b30a843e54f3a06e975ad632428a26..2b6c9b5160705a95d779b22aec4292904ec3b040 100644 (file)
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -119,7 +119,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
         umem->length     = size;
         umem->address    = addr;
         umem->page_shift = PAGE_SHIFT;
-       umem->pid        = get_task_pid(current, PIDTYPE_PID);
         /*
          * We ask for writable memory if any of the following
          * access flags are set.  "Local write" and "remote write"
@@ -132,7 +131,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
                  IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
  
         if (access & IB_ACCESS_ON_DEMAND) {
-               put_pid(umem->pid);
                 ret = ib_umem_odp_get(context, umem, access);
                 if (ret) {
                         kfree(umem);
@@ -148,7 +146,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
  
         page_list = (struct page **) __get_free_page(GFP_KERNEL);
         if (!page_list) {
-               put_pid(umem->pid);
                 kfree(umem);
                 return ERR_PTR(-ENOMEM);
         }
@@ -231,7 +228,6 @@ out:
         if (ret < 0) {
                 if (need_release)
                         __ib_umem_release(context->device, umem, 0);
-               put_pid(umem->pid);
                 kfree(umem);
         } else
                 current->mm->pinned_vm = locked;
@@ -274,8 +270,7 @@ void ib_umem_release(struct ib_umem *umem)
  
         __ib_umem_release(umem->context->device, umem, 1);
  
-       task = get_pid_task(umem->pid, PIDTYPE_PID);
-       put_pid(umem->pid);
+       task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
         if (!task)
                 goto out;
         mm = get_task_mm(task);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c

index e90f2fd8dc16dfd0d5e1b0365fe15514b1f8dbe0..1445918e32392f28ae4ce9ea74e7df0feeddf371 100644 (file)
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -489,10 +489,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
  err_dereg_mem:
         dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
                   mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp);
-err_free_wr_wait:
-       c4iw_put_wr_wait(mhp->wr_waitp);
  err_free_skb:
         kfree_skb(mhp->dereg_skb);
+err_free_wr_wait:
+       c4iw_put_wr_wait(mhp->wr_waitp);
  err_free_mhp:
         kfree(mhp);
         return ERR_PTR(ret);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c

index e6a60fa59f2b5e32f240374e980c68558bdf3f86..e6bdd0c1e80a9b35fb5d0690b1ba198d628a30ab 100644 (file)
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -5944,6 +5944,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
         u64 status;
         u32 sw_index;
         int i = 0;
+       unsigned long irq_flags;
  
         sw_index = dd->hw_to_sw[hw_context];
         if (sw_index >= dd->num_send_contexts) {
@@ -5953,10 +5954,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
                 return;
         }
         sci = &dd->send_contexts[sw_index];
+       spin_lock_irqsave(&dd->sc_lock, irq_flags);
         sc = sci->sc;
         if (!sc) {
                 dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
                            sw_index, hw_context);
+               spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
                 return;
         }
  
@@ -5978,6 +5981,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
          */
         if (sc->type != SC_USER)
                 queue_work(dd->pport->hfi1_wq, &sc->halt_work);
+       spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
  
         /*
          * Update the counters for the corresponding status bits.
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c

index 14734d0d0b76086dfc5ea76e84ae3dc27a2a58b9..3a485f50fede1d59d30bd73240c3517c50dd9450 100644 (file)
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -377,6 +377,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
  
                         hr_cq->set_ci_db = hr_cq->db.db_record;
                         *hr_cq->set_ci_db = 0;
+                       hr_cq->db_en = 1;
                 }
  
                 /* Init mmt table and write buff address to mtt table */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c

index 47e1b6ac1e1acdd38e9f88cfec179a50471fee29..8013d69c5ac496fe5ddaa10b9e48ef80f5db3c5e 100644 (file)
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -722,6 +722,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
         free_mr->mr_free_pd = to_hr_pd(pd);
         free_mr->mr_free_pd->ibpd.device  = &hr_dev->ib_dev;
         free_mr->mr_free_pd->ibpd.uobject = NULL;
+       free_mr->mr_free_pd->ibpd.__internal_mr = NULL;
         atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
  
         attr.qp_access_flags    = IB_ACCESS_REMOTE_WRITE;
@@ -1036,7 +1037,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
  
         do {
                 ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
-               if (ret < 0) {
+               if (ret < 0 && hr_qp) {
                         dev_err(dev,
                            "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
                            hr_qp->qpn, ret, hr_mr->key, ne);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c

index 25916e8522eda4e061c63e2b652db1fa8fdf5e41..1f0965bb64eedab6057ed511636ed4777c1fed30 100644 (file)
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -142,8 +142,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
         unsigned long flags;
         unsigned int ind;
         void *wqe = NULL;
-       u32 tmp_len = 0;
         bool loopback;
+       u32 tmp_len;
         int ret = 0;
         u8 *smac;
         int nreq;
@@ -189,6 +189,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  
                 owner_bit =
                        ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
+               tmp_len = 0;
  
                 /* Corresponding to the QP type, wqe process separately */
                 if (ibqp->qp_type == IB_QPT_GSI) {
@@ -547,16 +548,20 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                 }
  
                 if (i < hr_qp->rq.max_gs) {
-                       dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
-                       dseg[i].addr = 0;
+                       dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
+                       dseg->addr = 0;
                 }
  
                 /* rq support inline data */
-               sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
-               hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge;
-               for (i = 0; i < wr->num_sge; i++) {
-                       sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
-                       sge_list[i].len = wr->sg_list[i].length;
+               if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+                       sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
+                       hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt =
+                                                              (u32)wr->num_sge;
+                       for (i = 0; i < wr->num_sge; i++) {
+                               sge_list[i].addr =
+                                              (void *)(u64)wr->sg_list[i].addr;
+                               sge_list[i].len = wr->sg_list[i].length;
+                       }
                 }
  
                 hr_qp->rq.wrid[ind] = wr->wr_id;
@@ -613,6 +618,8 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev,
         dma_unmap_single(hr_dev->dev, ring->desc_dma_addr,
                          ring->desc_num * sizeof(struct hns_roce_cmq_desc),
                          DMA_BIDIRECTIONAL);
+
+       ring->desc_dma_addr = 0;
         kfree(ring->desc);
  }
  
@@ -1081,6 +1088,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
         if (ret) {
                 dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n",
                         ret);
+               return ret;
         }
  
         /* Get pf resource owned by every pf */
@@ -1372,6 +1380,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
  
         roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S,
                      mr->type == MR_TYPE_MR ? 0 : 1);
+       roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S,
+                    1);
         mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa);
  
         mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
@@ -2169,6 +2179,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
                                     struct hns_roce_v2_qp_context *context,
                                     struct hns_roce_v2_qp_context *qpc_mask)
  {
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
         struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
  
         /*
@@ -2281,7 +2292,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
         context->rq_db_record_addr = hr_qp->rdb.dma >> 32;
         qpc_mask->rq_db_record_addr = 0;
  
-       roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1);
+       roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S,
+                   (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0);
         roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
  
         roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
@@ -4703,6 +4715,8 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
         {0, }
  };
  
+MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
+
  static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
                                   struct hnae3_handle *handle)
  {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c

index 9d48bc07a9e683a22f91c8cac02a273e7f768a9c..96fb6a9ed93c4eb5948629f21d85f3f9b3c40672 100644 (file)
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -199,7 +199,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
  
         memset(props, 0, sizeof(*props));
  
-       props->sys_image_guid = cpu_to_be32(hr_dev->sys_image_guid);
+       props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
         props->max_mr_size = (u64)(~(0ULL));
         props->page_size_cap = hr_dev->caps.page_size_cap;
         props->vendor_id = hr_dev->vendor_id;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c

index d4aad34c21e2ca8a9bc36db6ba1f682ac1f1972f..baaf906f7c2e4d2546c4d53333123eca3350c38b 100644 (file)
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -660,6 +660,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                                 goto err_rq_sge_list;
                         }
                         *hr_qp->rdb.db_record = 0;
+                       hr_qp->rdb_en = 1;
                 }
  
                 /* Allocate QP buf */
@@ -955,7 +956,14 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
         }
  
         if (cur_state == new_state && cur_state == IB_QPS_RESET) {
-               ret = 0;
+               if (hr_dev->caps.min_wqes) {
+                       ret = -EPERM;
+                       dev_err(dev, "cur_state=%d new_state=%d\n", cur_state,
+                               new_state);
+               } else {
+                       ret = 0;
+               }
+
                 goto out;
         }
  
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h

index d5d8c1be345a64ec0de46ddac9e54bc7d13fa95e..2f2b4426ded77569f563da934f9bce10fc67194b 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -207,6 +207,7 @@ struct i40iw_msix_vector {
         u32 irq;
         u32 cpu_affinity;
         u32 ceq_id;
+       cpumask_t mask;
  };
  
  struct l2params_work {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c

index 4cfa8f4647e222ea3ef35fdc699fbe3e0ad9281e..f7c6fd9ff6e2c53101a15a8a81cdec94044afbb6 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -2093,7 +2093,7 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
         if (netif_is_bond_slave(netdev))
                 netdev = netdev_master_upper_dev_get(netdev);
  
-       neigh = dst_neigh_lookup(dst, &dst_addr);
+       neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
  
         rcu_read_lock();
         if (neigh) {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c

index 6139836fb533adf194ede3ee693aefcbe55471d8..c9f62ca7643c85ac1a076c6486c19f17ff3ea80a 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -331,7 +331,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
                 switch (info->ae_id) {
                 case I40IW_AE_LLP_FIN_RECEIVED:
                         if (qp->term_flags)
-                               continue;
+                               break;
                         if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
                                 iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT;
                                 if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) &&
@@ -360,7 +360,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
                         break;
                 case I40IW_AE_LLP_CONNECTION_RESET:
                         if (atomic_read(&iwqp->close_timer_started))
-                               continue;
+                               break;
                         i40iw_cm_disconn(iwqp);
                         break;
                 case I40IW_AE_QP_SUSPEND_COMPLETE:
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c

index 9cd0d3ef90575adff6103e9e3a7cf544d986fe79..05001e6da1f8fd473598e37d9ce7011eb2a19f19 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -687,7 +687,6 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
                                                          struct i40iw_msix_vector *msix_vec)
  {
         enum i40iw_status_code status;
-       cpumask_t mask;
  
         if (iwdev->msix_shared && !ceq_id) {
                 tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
@@ -697,9 +696,9 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
                 status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
         }
  
-       cpumask_clear(&mask);
-       cpumask_set_cpu(msix_vec->cpu_affinity, &mask);
-       irq_set_affinity_hint(msix_vec->irq, &mask);
+       cpumask_clear(&msix_vec->mask);
+       cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask);
+       irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask);
  
         if (status) {
                 i40iw_pr_err("ceq irq config fail\n");
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c

index 40e4f5ab2b46b15abd21081b3bc4f9d2145f4c63..68679ad4c6daf7b83b98d2d1113df522260e0d52 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -394,6 +394,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
  
         list_for_each_entry(iwpbl, pbl_list, list) {
                 if (iwpbl->user_base == va) {
+                       iwpbl->on_list = false;
                         list_del(&iwpbl->list);
                         return iwpbl;
                 }
@@ -614,6 +615,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
                 return ERR_PTR(-ENOMEM);
  
         iwqp = (struct i40iw_qp *)mem;
+       iwqp->allocated_buffer = mem;
         qp = &iwqp->sc_qp;
         qp->back_qp = (void *)iwqp;
         qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
@@ -642,7 +644,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
                 goto error;
         }
  
-       iwqp->allocated_buffer = mem;
         iwqp->iwdev = iwdev;
         iwqp->iwpd = iwpd;
         iwqp->ibqp.qp_num = qp_num;
@@ -1898,6 +1899,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
                         goto error;
                 spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
                 list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+               iwpbl->on_list = true;
                 spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
                 break;
         case IW_MEMREG_TYPE_CQ:
@@ -1908,6 +1910,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
  
                 spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
                 list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+               iwpbl->on_list = true;
                 spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
                 break;
         case IW_MEMREG_TYPE_MEM:
@@ -2045,14 +2048,18 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr,
         switch (iwmr->type) {
         case IW_MEMREG_TYPE_CQ:
                 spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
-               if (!list_empty(&ucontext->cq_reg_mem_list))
+               if (iwpbl->on_list) {
+                       iwpbl->on_list = false;
                         list_del(&iwpbl->list);
+               }
                 spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
                 break;
         case IW_MEMREG_TYPE_QP:
                 spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
-               if (!list_empty(&ucontext->qp_reg_mem_list))
+               if (iwpbl->on_list) {
+                       iwpbl->on_list = false;
                         list_del(&iwpbl->list);
+               }
                 spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
                 break;
         default:
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h

index 9067443cd31151bb501a505e74b06bb56265f1b5..76cf173377ab24c9ff716ca31484b71d25652c80 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -78,6 +78,7 @@ struct i40iw_pbl {
         };
  
         bool pbl_allocated;
+       bool on_list;
         u64 user_base;
         struct i40iw_pble_alloc pble_alloc;
         struct i40iw_mr *iwmr;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c

index b4d8ff8ab807a445ad93cc532ef1f4fe5d4242fb..69716a7ea9934a414594ffe2b1b86fdcb41a411d 100644 (file)
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2416,7 +2416,7 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
         MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
  }
  
-static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
+static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
                            bool inner)
  {
         if (inner) {
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c

index 87b7c1be2a117b0a0d7840ce13d36dabb5c4a13c..2193dc1765fb2581ceacc89300e4369e0765eb36 100644 (file)
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -484,11 +484,6 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
         return 1;
  }
  
-static int first_med_bfreg(void)
-{
-       return 1;
-}
-
  enum {
         /* this is the first blue flame register in the array of bfregs assigned
          * to a processes. Since we do not use it for blue flame but rather
@@ -514,6 +509,12 @@ static int num_med_bfreg(struct mlx5_ib_dev *dev,
         return n >= 0 ? n : 0;
  }
  
+static int first_med_bfreg(struct mlx5_ib_dev *dev,
+                          struct mlx5_bfreg_info *bfregi)
+{
+       return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM;
+}
+
  static int first_hi_bfreg(struct mlx5_ib_dev *dev,
                           struct mlx5_bfreg_info *bfregi)
  {
@@ -541,10 +542,13 @@ static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev,
  static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
                                  struct mlx5_bfreg_info *bfregi)
  {
-       int minidx = first_med_bfreg();
+       int minidx = first_med_bfreg(dev, bfregi);
         int i;
  
-       for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) {
+       if (minidx < 0)
+               return minidx;
+
+       for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) {
                 if (bfregi->count[i] < bfregi->count[minidx])
                         minidx = i;
                 if (!bfregi->count[minidx])
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c

index 7d3763b2e01c99fd7be7d4c72db653c1365e088c..3f9afc02d166b6bce9a2c4db87ad85878c33b3e6 100644 (file)
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -401,49 +401,47 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
  {
         struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
         struct qedr_dev *dev = get_qedr_dev(context->device);
-       unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
-       u64 unmapped_db = dev->db_phys_addr;
+       unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
         unsigned long len = (vma->vm_end - vma->vm_start);
-       int rc = 0;
-       bool found;
+       unsigned long dpi_start;
+
+       dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
  
         DP_DEBUG(dev, QEDR_MSG_INIT,
-                "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
-                vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
-       if (vma->vm_start & (PAGE_SIZE - 1)) {
-               DP_ERR(dev, "Vma_start not page aligned = %ld\n",
-                      vma->vm_start);
+                "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
+                (void *)vma->vm_start, (void *)vma->vm_end,
+                (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
+
+       if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
+               DP_ERR(dev,
+                      "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n",
+                      (void *)vma->vm_start, (void *)vma->vm_end);
                 return -EINVAL;
         }
  
-       found = qedr_search_mmap(ucontext, vm_page, len);
-       if (!found) {
-               DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
+       if (!qedr_search_mmap(ucontext, phys_addr, len)) {
+               DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
                        vma->vm_pgoff);
                 return -EINVAL;
         }
  
-       DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
-
-       if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
-                                                    dev->db_size))) {
-               DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
-               if (vma->vm_flags & VM_READ) {
-                       DP_ERR(dev, "Trying to map doorbell bar for read\n");
-                       return -EPERM;
-               }
-
-               vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+       if (phys_addr < dpi_start ||
+           ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
+               DP_ERR(dev,
+                      "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
+                      (void *)phys_addr, (void *)dpi_start,
+                      ucontext->dpi_size);
+               return -EINVAL;
+       }
  
-               rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-                                       PAGE_SIZE, vma->vm_page_prot);
-       } else {
-               DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
-               rc = remap_pfn_range(vma, vma->vm_start,
-                                    vma->vm_pgoff, len, vma->vm_page_prot);
+       if (vma->vm_flags & VM_READ) {
+               DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
+               return -EINVAL;
         }
-       DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
-       return rc;
+
+       vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+       return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
+                                 vma->vm_page_prot);
  }
  
  struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c

index 2cb52fd48cf12bdace50806228ec2c8c502cf1ae..73a00a1c06f62a80fa7522763ba4669f2c4e1c28 100644 (file)
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -761,7 +761,6 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
         unsigned int mask;
         unsigned int length = 0;
         int i;
-       int must_sched;
  
         while (wr) {
                 mask = wr_opcode_mask(wr->opcode, qp);
@@ -791,14 +790,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
                 wr = wr->next;
         }
  
-       /*
-        * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
-        * and the requester call ip_local_out_sk() that takes spin_lock_bh.
-        */
-       must_sched = (qp_type(qp) == IB_QPT_GSI) ||
-                       (queue_count(qp->sq.queue) > 1);
-
-       rxe_run_task(&qp->req.task, must_sched);
+       rxe_run_task(&qp->req.task, 1);
         if (unlikely(qp->req.state == QP_STATE_ERROR))
                 rxe_run_task(&qp->comp.task, 1);
  
diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig

index fb8b7182f05ebd7413058d54e8be7fca974dcc44..25bf6955b6d0273b7ed4978954274cad20c2512d 100644 (file)
--- a/drivers/infiniband/ulp/srpt/Kconfig
+++ b/drivers/infiniband/ulp/srpt/Kconfig
@@ -1,6 +1,6 @@
  config INFINIBAND_SRPT
         tristate "InfiniBand SCSI RDMA Protocol target support"
-       depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE
+       depends on INFINIBAND_ADDR_TRANS && TARGET_CORE
         ---help---
  
           Support for the SCSI RDMA Protocol (SRP) Target driver. The
diff --git a/drivers/isdn/hardware/eicon/diva.c b/drivers/isdn/hardware/eicon/diva.c

index 944a7f3380991d107b0a3f50a9dfd100ba0719a8..1b25d8bc153aec16ea8e9ee0cd47de0604a2a393 100644 (file)
--- a/drivers/isdn/hardware/eicon/diva.c
+++ b/drivers/isdn/hardware/eicon/diva.c
@@ -388,10 +388,10 @@ void divasa_xdi_driver_unload(void)
  **  Receive and process command from user mode utility
  */
  void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
-                           int length,
+                           int length, void *mptr,
                             divas_xdi_copy_from_user_fn_t cp_fn)
  {
-       diva_xdi_um_cfg_cmd_t msg;
+       diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr;
         diva_os_xdi_adapter_t *a = NULL;
         diva_os_spin_lock_magic_t old_irql;
         struct list_head *tmp;
@@ -401,21 +401,21 @@ void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
                          length, sizeof(diva_xdi_um_cfg_cmd_t)))
                         return NULL;
         }
-       if ((*cp_fn) (os_handle, &msg, src, sizeof(msg)) <= 0) {
+       if ((*cp_fn) (os_handle, msg, src, sizeof(*msg)) <= 0) {
                 DBG_ERR(("A: A(?) open, write error"))
                         return NULL;
         }
         diva_os_enter_spin_lock(&adapter_lock, &old_irql, "open_adapter");
         list_for_each(tmp, &adapter_queue) {
                 a = list_entry(tmp, diva_os_xdi_adapter_t, link);
-               if (a->controller == (int)msg.adapter)
+               if (a->controller == (int)msg->adapter)
                         break;
                 a = NULL;
         }
         diva_os_leave_spin_lock(&adapter_lock, &old_irql, "open_adapter");
  
         if (!a) {
-               DBG_ERR(("A: A(%d) open, adapter not found", msg.adapter))
+               DBG_ERR(("A: A(%d) open, adapter not found", msg->adapter))
                         }
  
         return (a);
@@ -437,8 +437,10 @@ void diva_xdi_close_adapter(void *adapter, void *os_handle)
  
  int
  diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
-              int length, divas_xdi_copy_from_user_fn_t cp_fn)
+              int length, void *mptr,
+              divas_xdi_copy_from_user_fn_t cp_fn)
  {
+       diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr;
         diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) adapter;
         void *data;
  
@@ -459,7 +461,13 @@ diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
                         return (-2);
         }
  
-       length = (*cp_fn) (os_handle, data, src, length);
+       if (msg) {
+               *(diva_xdi_um_cfg_cmd_t *)data = *msg;
+               length = (*cp_fn) (os_handle, (char *)data + sizeof(*msg),
+                                  src + sizeof(*msg), length - sizeof(*msg));
+       } else {
+               length = (*cp_fn) (os_handle, data, src, length);
+       }
         if (length > 0) {
                 if ((*(a->interface.cmd_proc))
                     (a, (diva_xdi_um_cfg_cmd_t *) data, length)) {
diff --git a/drivers/isdn/hardware/eicon/diva.h b/drivers/isdn/hardware/eicon/diva.h

index b067032093a8a2bb656f3e8c58fa91bccad7b056..1ad76650fbf984b3fe8b0385205005a57de52c0d 100644 (file)
--- a/drivers/isdn/hardware/eicon/diva.h
+++ b/drivers/isdn/hardware/eicon/diva.h
@@ -20,10 +20,11 @@ int diva_xdi_read(void *adapter, void *os_handle, void __user *dst,
                   int max_length, divas_xdi_copy_to_user_fn_t cp_fn);
  
  int diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
-                  int length, divas_xdi_copy_from_user_fn_t cp_fn);
+                  int length, void *msg,
+                  divas_xdi_copy_from_user_fn_t cp_fn);
  
  void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
-                           int length,
+                           int length, void *msg,
                             divas_xdi_copy_from_user_fn_t cp_fn);
  
  void diva_xdi_close_adapter(void *adapter, void *os_handle);
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c

index b9980e84f9db56570ef9c2dbc8b007e8818978d3..b6a3950b2564f9142f6e19b188becec62d01d3df 100644 (file)
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -591,19 +591,22 @@ static int divas_release(struct inode *inode, struct file *file)
  static ssize_t divas_write(struct file *file, const char __user *buf,
                            size_t count, loff_t *ppos)
  {
+       diva_xdi_um_cfg_cmd_t msg;
         int ret = -EINVAL;
  
         if (!file->private_data) {
                 file->private_data = diva_xdi_open_adapter(file, buf,
-                                                          count,
+                                                          count, &msg,
                                                            xdi_copy_from_user);
-       }
-       if (!file->private_data) {
-               return (-ENODEV);
+               if (!file->private_data)
+                       return (-ENODEV);
+               ret = diva_xdi_write(file->private_data, file,
+                                    buf, count, &msg, xdi_copy_from_user);
+       } else {
+               ret = diva_xdi_write(file->private_data, file,
+                                    buf, count, NULL, xdi_copy_from_user);
         }
  
-       ret = diva_xdi_write(file->private_data, file,
-                            buf, count, xdi_copy_from_user);
         switch (ret) {
         case -1:                /* Message should be removed from rx mailbox first */
                 ret = -EBUSY;
@@ -622,11 +625,12 @@ static ssize_t divas_write(struct file *file, const char __user *buf,
  static ssize_t divas_read(struct file *file, char __user *buf,
                           size_t count, loff_t *ppos)
  {
+       diva_xdi_um_cfg_cmd_t msg;
         int ret = -EINVAL;
  
         if (!file->private_data) {
                 file->private_data = diva_xdi_open_adapter(file, buf,
-                                                          count,
+                                                          count, &msg,
                                                            xdi_copy_from_user);
         }
         if (!file->private_data) {
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c

index 4e63c6f6c04dfed91f94627dcd6a484a1de1652f..d030ce3025a6a6f365cb55c740e436ba43fd52bf 100644 (file)
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -250,7 +250,9 @@ void bch_debug_exit(void)
  
  int __init bch_debug_init(struct kobject *kobj)
  {
-       bcache_debug = debugfs_create_dir("bcache", NULL);
+       if (!IS_ENABLED(CONFIG_DEBUG_FS))
+               return 0;
  
+       bcache_debug = debugfs_create_dir("bcache", NULL);
         return IS_ERR_OR_NULL(bcache_debug);
  }
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c

index 1b52b8557034bcab08f46c85efa37eee9b3d6f34..2060d1483043d8ee52dba6e4a65d20ba18a2b97a 100644 (file)
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -419,10 +419,25 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
                 /* Verify that EC can process command */
                 for (i = 0; i < len; i++) {
                         rx_byte = rx_buf[i];
+                       /*
+                        * Seeing the PAST_END, RX_BAD_DATA, or NOT_READY
+                        * markers are all signs that the EC didn't fully
+                        * receive our command. e.g., if the EC is flashing
+                        * itself, it can't respond to any commands and instead
+                        * clocks out EC_SPI_PAST_END from its SPI hardware
+                        * buffer. Similar occurrences can happen if the AP is
+                        * too slow to clock out data after asserting CS -- the
+                        * EC will abort and fill its buffer with
+                        * EC_SPI_RX_BAD_DATA.
+                        *
+                        * In all cases, these errors should be safe to retry.
+                        * Report -EAGAIN and let the caller decide what to do
+                        * about that.
+                        */
                         if (rx_byte == EC_SPI_PAST_END  ||
                             rx_byte == EC_SPI_RX_BAD_DATA ||
                             rx_byte == EC_SPI_NOT_READY) {
-                               ret = -EREMOTEIO;
+                               ret = -EAGAIN;
                                 break;
                         }
                 }
@@ -431,7 +446,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
         if (!ret)
                 ret = cros_ec_spi_receive_packet(ec_dev,
                                 ec_msg->insize + sizeof(*response));
-       else
+       else if (ret != -EAGAIN)
                 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
  
         final_ret = terminate_request(ec_dev);
@@ -537,10 +552,11 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
                 /* Verify that EC can process command */
                 for (i = 0; i < len; i++) {
                         rx_byte = rx_buf[i];
+                       /* See comments in cros_ec_pkt_xfer_spi() */
                         if (rx_byte == EC_SPI_PAST_END  ||
                             rx_byte == EC_SPI_RX_BAD_DATA ||
                             rx_byte == EC_SPI_NOT_READY) {
-                               ret = -EREMOTEIO;
+                               ret = -EAGAIN;
                                 break;
                         }
                 }
@@ -549,7 +565,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
         if (!ret)
                 ret = cros_ec_spi_receive_response(ec_dev,
                                 ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
-       else
+       else if (ret != -EAGAIN)
                 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
  
         final_ret = terminate_request(ec_dev);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h

index a4c9c8297a6d825db6321032f7125aa01c9ca751..918d4fb742d1dd98dc3475adf4eef4feeafa7fdf 100644 (file)
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -717,6 +717,7 @@ struct cxl {
         bool perst_select_user;
         bool perst_same_image;
         bool psl_timebase_synced;
+       bool tunneled_ops_supported;
  
         /*
          * number of contexts mapped on to this card. Possible values are:
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c

index 83f1d08058fc234dc9b141b543084ece35c53b0f..4d6736f9d46399b0ea2f1336404acd041af91c86 100644 (file)
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1742,6 +1742,15 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
         /* Required for devices using CAPP DMA mode, harmless for others */
         pci_set_master(dev);
  
+       adapter->tunneled_ops_supported = false;
+
+       if (cxl_is_power9()) {
+               if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1))
+                       dev_info(&dev->dev, "Tunneled operations unsupported\n");
+               else
+                       adapter->tunneled_ops_supported = true;
+       }
+
         if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
                 goto err;
  
@@ -1768,6 +1777,9 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
  {
         struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
  
+       if (cxl_is_power9())
+               pnv_pci_set_tunnel_bar(pdev, 0x00020000E0000000ull, 0);
+
         cxl_native_release_psl_err_irq(adapter);
         cxl_unmap_adapter_regs(adapter);
  
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c

index 95285b7f636ff3f854876e5743658f91d88c3e3d..4b5a4c5d3c012dff77508b9f0b8b9883671e594c 100644 (file)
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -78,6 +78,15 @@ static ssize_t psl_timebase_synced_show(struct device *device,
         return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
  }
  
+static ssize_t tunneled_ops_supported_show(struct device *device,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct cxl *adapter = to_cxl_adapter(device);
+
+       return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->tunneled_ops_supported);
+}
+
  static ssize_t reset_adapter_store(struct device *device,
                                    struct device_attribute *attr,
                                    const char *buf, size_t count)
@@ -183,6 +192,7 @@ static struct device_attribute adapter_attrs[] = {
         __ATTR_RO(base_image),
         __ATTR_RO(image_loaded),
         __ATTR_RO(psl_timebase_synced),
+       __ATTR_RO(tunneled_ops_supported),
         __ATTR_RW(load_image_on_perst),
         __ATTR_RW(perst_reloads_same_image),
         __ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c

index 0c125f207aea82c394f989f5033fccda9cd7e0b1..33053b0d1fdf65c2d590598cd361b7869e663522 100644 (file)
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -518,7 +518,7 @@ static int at24_get_pdata(struct device *dev, struct at24_platform_data *pdata)
         if (of_node && of_match_device(at24_of_match, dev))
                 cdata = of_device_get_match_data(dev);
         else if (id)
-               cdata = (void *)&id->driver_data;
+               cdata = (void *)id->driver_data;
         else
                 cdata = acpi_device_get_match_data(dev);
  
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c

index 9e923cd1d80ebee9c96d70aceabb87b2a0935581..38a7586b00ccafd6bb0eed4307f9a40f91fee90a 100644 (file)
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2485,7 +2485,7 @@ static long mmc_rpmb_ioctl(struct file *filp, unsigned int cmd,
                 break;
         }
  
-       return 0;
+       return ret;
  }
  
  #ifdef CONFIG_COMPAT
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c

index 0ef741bc515d9f354e03576bc2eb079b40a1d7e5..d0e83db42ae52614b5ab9f03d211290f7c100937 100644 (file)
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -33,6 +33,8 @@ struct sdhci_iproc_host {
         const struct sdhci_iproc_data *data;
         u32 shadow_cmd;
         u32 shadow_blk;
+       bool is_cmd_shadowed;
+       bool is_blk_shadowed;
  };
  
  #define REG_OFFSET_IN_BITS(reg) ((reg) << 3 & 0x18)
@@ -48,8 +50,22 @@ static inline u32 sdhci_iproc_readl(struct sdhci_host *host, int reg)
  
  static u16 sdhci_iproc_readw(struct sdhci_host *host, int reg)
  {
-       u32 val = sdhci_iproc_readl(host, (reg & ~3));
-       u16 word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff;
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct sdhci_iproc_host *iproc_host = sdhci_pltfm_priv(pltfm_host);
+       u32 val;
+       u16 word;
+
+       if ((reg == SDHCI_TRANSFER_MODE) && iproc_host->is_cmd_shadowed) {
+               /* Get the saved transfer mode */
+               val = iproc_host->shadow_cmd;
+       } else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) &&
+                  iproc_host->is_blk_shadowed) {
+               /* Get the saved block info */
+               val = iproc_host->shadow_blk;
+       } else {
+               val = sdhci_iproc_readl(host, (reg & ~3));
+       }
+       word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff;
         return word;
  }
  
@@ -105,13 +121,15 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg)
  
         if (reg == SDHCI_COMMAND) {
                 /* Write the block now as we are issuing a command */
-               if (iproc_host->shadow_blk != 0) {
+               if (iproc_host->is_blk_shadowed) {
                         sdhci_iproc_writel(host, iproc_host->shadow_blk,
                                 SDHCI_BLOCK_SIZE);
-                       iproc_host->shadow_blk = 0;
+                       iproc_host->is_blk_shadowed = false;
                 }
                 oldval = iproc_host->shadow_cmd;
-       } else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
+               iproc_host->is_cmd_shadowed = false;
+       } else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) &&
+                  iproc_host->is_blk_shadowed) {
                 /* Block size and count are stored in shadow reg */
                 oldval = iproc_host->shadow_blk;
         } else {
@@ -123,9 +141,11 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg)
         if (reg == SDHCI_TRANSFER_MODE) {
                 /* Save the transfer mode until the command is issued */
                 iproc_host->shadow_cmd = newval;
+               iproc_host->is_cmd_shadowed = true;
         } else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
                 /* Save the block info until the command is issued */
                 iproc_host->shadow_blk = newval;
+               iproc_host->is_blk_shadowed = true;
         } else {
                 /* Command or other regular 32-bit write */
                 sdhci_iproc_writel(host, newval, reg & ~3);
@@ -166,7 +186,7 @@ static const struct sdhci_ops sdhci_iproc_32only_ops = {
  
  static const struct sdhci_pltfm_data sdhci_iproc_cygnus_pltfm_data = {
         .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK,
-       .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN,
+       .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN | SDHCI_QUIRK2_HOST_OFF_CARD_ON,
         .ops = &sdhci_iproc_32only_ops,
  };
  
@@ -206,7 +226,6 @@ static const struct sdhci_iproc_data iproc_data = {
         .caps1 = SDHCI_DRIVER_TYPE_C |
                  SDHCI_DRIVER_TYPE_D |
                  SDHCI_SUPPORT_DDR50,
-       .mmc_caps = MMC_CAP_1_8V_DDR,
  };
  
  static const struct sdhci_pltfm_data sdhci_bcm2835_pltfm_data = {
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c

index db5ec4e8bde912ff482ac9b2194394bfcec6b5e5..ebb1d141b90000c069b0634fe0a3c4d5d5f842d4 100644 (file)
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -1194,11 +1194,13 @@ static void marvell_nfc_hw_ecc_bch_read_chunk(struct nand_chip *chip, int chunk,
                                   NDCB0_CMD2(NAND_CMD_READSTART);
  
         /*
-        * Trigger the naked read operation only on the last chunk.
-        * Otherwise, use monolithic read.
+        * Trigger the monolithic read on the first chunk, then naked read on
+        * intermediate chunks and finally a last naked read on the last chunk.
          */
-       if (lt->nchunks == 1 || (chunk < lt->nchunks - 1))
+       if (chunk == 0)
                 nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW);
+       else if (chunk < lt->nchunks - 1)
+               nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_NAKED_RW);
         else
                 nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_LAST_NAKED_RW);
  
diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c

index 23b45da784cb601a7abf84b212717aee7dc64403..b89acaee12d4364247a694ad989d2ae06ca640aa 100644 (file)
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -354,10 +354,13 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port,
         /* Locate the first rule available */
         if (fs->location == RX_CLS_LOC_ANY)
                 rule_index = find_first_zero_bit(priv->cfp.used,
-                                                bcm_sf2_cfp_rule_size(priv));
+                                                priv->num_cfp_rules);
         else
                 rule_index = fs->location;
  
+       if (rule_index > bcm_sf2_cfp_rule_size(priv))
+               return -ENOSPC;
+
         layout = &udf_tcpip4_layout;
         /* We only use one UDF slice for now */
         slice_num = bcm_sf2_get_slice_number(layout, 0);
@@ -562,19 +565,21 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
          * first half because the HW search is by incrementing addresses.
          */
         if (fs->location == RX_CLS_LOC_ANY)
-               rule_index[0] = find_first_zero_bit(priv->cfp.used,
-                                                   bcm_sf2_cfp_rule_size(priv));
+               rule_index[1] = find_first_zero_bit(priv->cfp.used,
+                                                   priv->num_cfp_rules);
         else
-               rule_index[0] = fs->location;
+               rule_index[1] = fs->location;
+       if (rule_index[1] > bcm_sf2_cfp_rule_size(priv))
+               return -ENOSPC;
  
         /* Flag it as used (cleared on error path) such that we can immediately
          * obtain a second one to chain from.
          */
-       set_bit(rule_index[0], priv->cfp.used);
+       set_bit(rule_index[1], priv->cfp.used);
  
-       rule_index[1] = find_first_zero_bit(priv->cfp.used,
-                                           bcm_sf2_cfp_rule_size(priv));
-       if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) {
+       rule_index[0] = find_first_zero_bit(priv->cfp.used,
+                                           priv->num_cfp_rules);
+       if (rule_index[0] > bcm_sf2_cfp_rule_size(priv)) {
                 ret = -ENOSPC;
                 goto out_err;
         }
@@ -712,14 +717,14 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
         /* Flag the second half rule as being used now, return it as the
          * location, and flag it as unique while dumping rules
          */
-       set_bit(rule_index[1], priv->cfp.used);
+       set_bit(rule_index[0], priv->cfp.used);
         set_bit(rule_index[1], priv->cfp.unique);
         fs->location = rule_index[1];
  
         return ret;
  
  out_err:
-       clear_bit(rule_index[0], priv->cfp.used);
+       clear_bit(rule_index[1], priv->cfp.used);
         return ret;
  }
  
@@ -785,10 +790,6 @@ static int bcm_sf2_cfp_rule_del_one(struct bcm_sf2_priv *priv, int port,
         int ret;
         u32 reg;
  
-       /* Refuse deletion of unused rules, and the default reserved rule */
-       if (!test_bit(loc, priv->cfp.used) || loc == 0)
-               return -EINVAL;
-
         /* Indicate which rule we want to read */
         bcm_sf2_cfp_rule_addr_set(priv, loc);
  
@@ -826,6 +827,13 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port,
         u32 next_loc = 0;
         int ret;
  
+       /* Refuse deleting unused rules, and those that are not unique since
+        * that could leave IPv6 rules with one of the chained rule in the
+        * table.
+        */
+       if (!test_bit(loc, priv->cfp.unique) || loc == 0)
+               return -EINVAL;
+
         ret = bcm_sf2_cfp_rule_del_one(priv, port, loc, &next_loc);
         if (ret)
                 return ret;
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c

index 36c8950dbd2d80699f396f217f0f438479f68355..176861bd225258d6df955196d94966cfe5253cdd 100644 (file)
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -1212,9 +1212,9 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq,
         vp->mii.reg_num_mask = 0x1f;
  
         /* Makes sure rings are at least 16 byte aligned. */
-       vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
+       vp->rx_ring = dma_alloc_coherent(gendev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
                                            + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
-                                          &vp->rx_ring_dma);
+                                          &vp->rx_ring_dma, GFP_KERNEL);
         retval = -ENOMEM;
         if (!vp->rx_ring)
                 goto free_device;
@@ -1476,11 +1476,10 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq,
                 return 0;
  
  free_ring:
-       pci_free_consistent(pdev,
-                                               sizeof(struct boom_rx_desc) * RX_RING_SIZE
-                                                       + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
-                                               vp->rx_ring,
-                                               vp->rx_ring_dma);
+       dma_free_coherent(&pdev->dev,
+               sizeof(struct boom_rx_desc) * RX_RING_SIZE +
+               sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+               vp->rx_ring, vp->rx_ring_dma);
  free_device:
         free_netdev(dev);
         pr_err(PFX "vortex_probe1 fails.  Returns %d\n", retval);
@@ -1751,9 +1750,9 @@ vortex_open(struct net_device *dev)
                                 break;                  /* Bad news!  */
  
                         skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */
-                       dma = pci_map_single(VORTEX_PCI(vp), skb->data,
-                                            PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
-                       if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma))
+                       dma = dma_map_single(vp->gendev, skb->data,
+                                            PKT_BUF_SZ, DMA_FROM_DEVICE);
+                       if (dma_mapping_error(vp->gendev, dma))
                                 break;
                         vp->rx_ring[i].addr = cpu_to_le32(dma);
                 }
@@ -2067,9 +2066,9 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
         if (vp->bus_master) {
                 /* Set the bus-master controller to transfer the packet. */
                 int len = (skb->len + 3) & ~3;
-               vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len,
-                                               PCI_DMA_TODEVICE);
-               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) {
+               vp->tx_skb_dma = dma_map_single(vp->gendev, skb->data, len,
+                                               DMA_TO_DEVICE);
+               if (dma_mapping_error(vp->gendev, vp->tx_skb_dma)) {
                         dev_kfree_skb_any(skb);
                         dev->stats.tx_dropped++;
                         return NETDEV_TX_OK;
@@ -2168,9 +2167,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
                         vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum);
  
         if (!skb_shinfo(skb)->nr_frags) {
-               dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len,
-                                         PCI_DMA_TODEVICE);
-               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
+               dma_addr = dma_map_single(vp->gendev, skb->data, skb->len,
+                                         DMA_TO_DEVICE);
+               if (dma_mapping_error(vp->gendev, dma_addr))
                         goto out_dma_err;
  
                 vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr);
@@ -2178,9 +2177,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
         } else {
                 int i;
  
-               dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data,
-                                         skb_headlen(skb), PCI_DMA_TODEVICE);
-               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
+               dma_addr = dma_map_single(vp->gendev, skb->data,
+                                         skb_headlen(skb), DMA_TO_DEVICE);
+               if (dma_mapping_error(vp->gendev, dma_addr))
                         goto out_dma_err;
  
                 vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr);
@@ -2189,21 +2188,21 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  
-                       dma_addr = skb_frag_dma_map(&VORTEX_PCI(vp)->dev, frag,
+                       dma_addr = skb_frag_dma_map(vp->gendev, frag,
                                                     0,
                                                     frag->size,
                                                     DMA_TO_DEVICE);
-                       if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) {
+                       if (dma_mapping_error(vp->gendev, dma_addr)) {
                                 for(i = i-1; i >= 0; i--)
-                                       dma_unmap_page(&VORTEX_PCI(vp)->dev,
+                                       dma_unmap_page(vp->gendev,
                                                        le32_to_cpu(vp->tx_ring[entry].frag[i+1].addr),
                                                        le32_to_cpu(vp->tx_ring[entry].frag[i+1].length),
                                                        DMA_TO_DEVICE);
  
-                               pci_unmap_single(VORTEX_PCI(vp),
+                               dma_unmap_single(vp->gendev,
                                                  le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
                                                  le32_to_cpu(vp->tx_ring[entry].frag[0].length),
-                                                PCI_DMA_TODEVICE);
+                                                DMA_TO_DEVICE);
  
                                 goto out_dma_err;
                         }
@@ -2218,8 +2217,8 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
                 }
         }
  #else
-       dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, PCI_DMA_TODEVICE);
-       if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
+       dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, DMA_TO_DEVICE);
+       if (dma_mapping_error(vp->gendev, dma_addr))
                 goto out_dma_err;
         vp->tx_ring[entry].addr = cpu_to_le32(dma_addr);
         vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG);
@@ -2254,7 +2253,7 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
  out:
         return NETDEV_TX_OK;
  out_dma_err:
-       dev_err(&VORTEX_PCI(vp)->dev, "Error mapping dma buffer\n");
+       dev_err(vp->gendev, "Error mapping dma buffer\n");
         goto out;
  }
  
@@ -2322,7 +2321,7 @@ vortex_interrupt(int irq, void *dev_id)
                 if (status & DMADone) {
                         if (ioread16(ioaddr + Wn7_MasterStatus) & 0x1000) {
                                 iowrite16(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */
-                               pci_unmap_single(VORTEX_PCI(vp), vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE);
+                               dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE);
                                 pkts_compl++;
                                 bytes_compl += vp->tx_skb->len;
                                 dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */
@@ -2459,19 +2458,19 @@ boomerang_interrupt(int irq, void *dev_id)
                                         struct sk_buff *skb = vp->tx_skbuff[entry];
  #if DO_ZEROCOPY
                                         int i;
-                                       pci_unmap_single(VORTEX_PCI(vp),
+                                       dma_unmap_single(vp->gendev,
                                                         le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
                                                         le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF,
-                                                       PCI_DMA_TODEVICE);
+                                                       DMA_TO_DEVICE);
  
                                         for (i=1; i<=skb_shinfo(skb)->nr_frags; i++)
-                                                       pci_unmap_page(VORTEX_PCI(vp),
+                                                       dma_unmap_page(vp->gendev,
                                                                                          le32_to_cpu(vp->tx_ring[entry].frag[i].addr),
                                                                                          le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF,
-                                                                                        PCI_DMA_TODEVICE);
+                                                                                        DMA_TO_DEVICE);
  #else
-                                       pci_unmap_single(VORTEX_PCI(vp),
-                                               le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE);
+                                       dma_unmap_single(vp->gendev,
+                                               le32_to_cpu(vp->tx_ring[entry].addr), skb->len, DMA_TO_DEVICE);
  #endif
                                         pkts_compl++;
                                         bytes_compl += skb->len;
@@ -2561,14 +2560,14 @@ static int vortex_rx(struct net_device *dev)
                                 /* 'skb_put()' points to the start of sk_buff data area. */
                                 if (vp->bus_master &&
                                         ! (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)) {
-                                       dma_addr_t dma = pci_map_single(VORTEX_PCI(vp), skb_put(skb, pkt_len),
-                                                                          pkt_len, PCI_DMA_FROMDEVICE);
+                                       dma_addr_t dma = dma_map_single(vp->gendev, skb_put(skb, pkt_len),
+                                                                          pkt_len, DMA_FROM_DEVICE);
                                         iowrite32(dma, ioaddr + Wn7_MasterAddr);
                                         iowrite16((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen);
                                         iowrite16(StartDMAUp, ioaddr + EL3_CMD);
                                         while (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)
                                                 ;
-                                       pci_unmap_single(VORTEX_PCI(vp), dma, pkt_len, PCI_DMA_FROMDEVICE);
+                                       dma_unmap_single(vp->gendev, dma, pkt_len, DMA_FROM_DEVICE);
                                 } else {
                                         ioread32_rep(ioaddr + RX_FIFO,
                                                      skb_put(skb, pkt_len),
@@ -2635,11 +2634,11 @@ boomerang_rx(struct net_device *dev)
                         if (pkt_len < rx_copybreak &&
                             (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
                                 skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
-                               pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               dma_sync_single_for_cpu(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
                                 /* 'skb_put()' points to the start of sk_buff data area. */
                                 skb_put_data(skb, vp->rx_skbuff[entry]->data,
                                              pkt_len);
-                               pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               dma_sync_single_for_device(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
                                 vp->rx_copy++;
                         } else {
                                 /* Pre-allocate the replacement skb.  If it or its
@@ -2651,9 +2650,9 @@ boomerang_rx(struct net_device *dev)
                                         dev->stats.rx_dropped++;
                                         goto clear_complete;
                                 }
-                               newdma = pci_map_single(VORTEX_PCI(vp), newskb->data,
-                                                       PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
-                               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) {
+                               newdma = dma_map_single(vp->gendev, newskb->data,
+                                                       PKT_BUF_SZ, DMA_FROM_DEVICE);
+                               if (dma_mapping_error(vp->gendev, newdma)) {
                                         dev->stats.rx_dropped++;
                                         consume_skb(newskb);
                                         goto clear_complete;
@@ -2664,7 +2663,7 @@ boomerang_rx(struct net_device *dev)
                                 vp->rx_skbuff[entry] = newskb;
                                 vp->rx_ring[entry].addr = cpu_to_le32(newdma);
                                 skb_put(skb, pkt_len);
-                               pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               dma_unmap_single(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
                                 vp->rx_nocopy++;
                         }
                         skb->protocol = eth_type_trans(skb, dev);
@@ -2761,8 +2760,8 @@ vortex_close(struct net_device *dev)
         if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */
                 for (i = 0; i < RX_RING_SIZE; i++)
                         if (vp->rx_skbuff[i]) {
-                               pci_unmap_single(       VORTEX_PCI(vp), le32_to_cpu(vp->rx_ring[i].addr),
-                                                                       PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               dma_unmap_single(vp->gendev, le32_to_cpu(vp->rx_ring[i].addr),
+                                                                       PKT_BUF_SZ, DMA_FROM_DEVICE);
                                 dev_kfree_skb(vp->rx_skbuff[i]);
                                 vp->rx_skbuff[i] = NULL;
                         }
@@ -2775,12 +2774,12 @@ vortex_close(struct net_device *dev)
                                 int k;
  
                                 for (k=0; k<=skb_shinfo(skb)->nr_frags; k++)
-                                               pci_unmap_single(VORTEX_PCI(vp),
+                                               dma_unmap_single(vp->gendev,
                                                                                  le32_to_cpu(vp->tx_ring[i].frag[k].addr),
                                                                                  le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF,
-                                                                                PCI_DMA_TODEVICE);
+                                                                                DMA_TO_DEVICE);
  #else
-                               pci_unmap_single(VORTEX_PCI(vp), le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE);
+                               dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, DMA_TO_DEVICE);
  #endif
                                 dev_kfree_skb(skb);
                                 vp->tx_skbuff[i] = NULL;
@@ -3288,11 +3287,10 @@ static void vortex_remove_one(struct pci_dev *pdev)
  
         pci_iounmap(pdev, vp->ioaddr);
  
-       pci_free_consistent(pdev,
-                                               sizeof(struct boom_rx_desc) * RX_RING_SIZE
-                                                       + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
-                                               vp->rx_ring,
-                                               vp->rx_ring_dma);
+       dma_free_coherent(&pdev->dev,
+                       sizeof(struct boom_rx_desc) * RX_RING_SIZE +
+                       sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+                       vp->rx_ring, vp->rx_ring_dma);
  
         pci_release_regions(pdev);
  
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c

index ac99d089ac7266c349fa974e4edabab1e127b90b..1c97e39b478e9f8957ff76f75bc47d819b248c03 100644 (file)
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -164,7 +164,9 @@ bad_clone_list[] __initdata = {
  #define NESM_START_PG  0x40    /* First page of TX buffer */
  #define NESM_STOP_PG   0x80    /* Last page +1 of RX ring */
  
-#if defined(CONFIG_ATARI)      /* 8-bit mode on Atari, normal on Q40 */
+#if defined(CONFIG_MACH_TX49XX)
+#  define DCR_VAL 0x48         /* 8-bit mode */
+#elif defined(CONFIG_ATARI)    /* 8-bit mode on Atari, normal on Q40 */
  #  define DCR_VAL (MACH_IS_ATARI ? 0x48 : 0x49)
  #else
  #  define DCR_VAL 0x49
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c

index a561705f232cd89dbf26c8907a7de31591907a50..be198cc0b10c9c7664bdf80e0f93bb3db69e8cf9 100644 (file)
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1552,22 +1552,26 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
         if (!ioaddr) {
                 if (pcnet32_debug & NETIF_MSG_PROBE)
                         pr_err("card has no PCI IO resources, aborting\n");
-               return -ENODEV;
+               err = -ENODEV;
+               goto err_disable_dev;
         }
  
         err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK);
         if (err) {
                 if (pcnet32_debug & NETIF_MSG_PROBE)
                         pr_err("architecture does not support 32bit PCI busmaster DMA\n");
-               return err;
+               goto err_disable_dev;
         }
         if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) {
                 if (pcnet32_debug & NETIF_MSG_PROBE)
                         pr_err("io address range already allocated\n");
-               return -EBUSY;
+               err = -EBUSY;
+               goto err_disable_dev;
         }
  
         err = pcnet32_probe1(ioaddr, 1, pdev);
+
+err_disable_dev:
         if (err < 0)
                 pci_disable_device(pdev);
  
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h

index b57acb8dc35bd1325d2e1932b9280d49c61d243f..dc25066c59a1052abe53491832a90e96ac3b26b8 100644 (file)
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -419,15 +419,15 @@ static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
         {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
         {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
         {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
-       {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */
-       {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */
-       {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */
-       {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */
-       {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */
-       {0x7b50, 0x7b54, 0x2920, 0x10, 0x10}, /* up_cim_2920_to_2a10 */
-       {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2a14 */
-       {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */
-       {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */
+       {0x7b50, 0x7b54, 0x4900, 0x4, 0x4}, /* up_cim_4900_to_4c60 */
+       {0x7b50, 0x7b54, 0x4904, 0x4, 0x4}, /* up_cim_4904_to_4c64 */
+       {0x7b50, 0x7b54, 0x4908, 0x4, 0x4}, /* up_cim_4908_to_4c68 */
+       {0x7b50, 0x7b54, 0x4910, 0x4, 0x4}, /* up_cim_4910_to_4c70 */
+       {0x7b50, 0x7b54, 0x4914, 0x4, 0x4}, /* up_cim_4914_to_4c74 */
+       {0x7b50, 0x7b54, 0x4920, 0x10, 0x10}, /* up_cim_4920_to_4a10 */
+       {0x7b50, 0x7b54, 0x4924, 0x10, 0x10}, /* up_cim_4924_to_4a14 */
+       {0x7b50, 0x7b54, 0x4928, 0x10, 0x10}, /* up_cim_4928_to_4a18 */
+       {0x7b50, 0x7b54, 0x492c, 0x10, 0x10}, /* up_cim_492c_to_4a1c */
  };
  
  static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
@@ -444,16 +444,6 @@ static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
         {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
         {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
         {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
-       {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */
-       {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */
-       {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */
-       {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */
-       {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */
-       {0x7b50, 0x7b54, 0x2918, 0x4, 0x4}, /* up_cim_2918_to_3d54 */
-       {0x7b50, 0x7b54, 0x291c, 0x4, 0x4}, /* up_cim_291c_to_3d58 */
-       {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2914 */
-       {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */
-       {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */
  };
  
  static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c

index db92f1858060ec685d7b59740ada8422097f178f..b76447baccaf35968156747d662ade1d71c98dc5 100644 (file)
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -836,7 +836,7 @@ bool is_filter_exact_match(struct adapter *adap,
  {
         struct tp_params *tp = &adap->params.tp;
         u64 hash_filter_mask = tp->hash_filter_mask;
-       u32 mask;
+       u64 ntuple_mask = 0;
  
         if (!is_hashfilter(adap))
                 return false;
@@ -865,73 +865,45 @@ bool is_filter_exact_match(struct adapter *adap,
         if (!fs->val.fport || fs->mask.fport != 0xffff)
                 return false;
  
-       if (tp->fcoe_shift >= 0) {
-               mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W;
-               if (mask && !fs->mask.fcoe)
-                       return false;
-       }
+       /* calculate tuple mask and compare with mask configured in hw */
+       if (tp->fcoe_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.fcoe << tp->fcoe_shift;
  
-       if (tp->port_shift >= 0) {
-               mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W;
-               if (mask && !fs->mask.iport)
-                       return false;
-       }
+       if (tp->port_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.iport << tp->port_shift;
  
         if (tp->vnic_shift >= 0) {
-               mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W;
-
-               if ((adap->params.tp.ingress_config & VNIC_F)) {
-                       if (mask && !fs->mask.pfvf_vld)
-                               return false;
-               } else {
-                       if (mask && !fs->mask.ovlan_vld)
-                               return false;
-               }
+               if ((adap->params.tp.ingress_config & VNIC_F))
+                       ntuple_mask |= (u64)fs->mask.pfvf_vld << tp->vnic_shift;
+               else
+                       ntuple_mask |= (u64)fs->mask.ovlan_vld <<
+                               tp->vnic_shift;
         }
  
-       if (tp->vlan_shift >= 0) {
-               mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W;
-               if (mask && !fs->mask.ivlan)
-                       return false;
-       }
+       if (tp->vlan_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.ivlan << tp->vlan_shift;
  
-       if (tp->tos_shift >= 0) {
-               mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W;
-               if (mask && !fs->mask.tos)
-                       return false;
-       }
+       if (tp->tos_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.tos << tp->tos_shift;
  
-       if (tp->protocol_shift >= 0) {
-               mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W;
-               if (mask && !fs->mask.proto)
-                       return false;
-       }
+       if (tp->protocol_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.proto << tp->protocol_shift;
  
-       if (tp->ethertype_shift >= 0) {
-               mask = (hash_filter_mask >> tp->ethertype_shift) &
-                       FT_ETHERTYPE_W;
-               if (mask && !fs->mask.ethtype)
-                       return false;
-       }
+       if (tp->ethertype_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.ethtype << tp->ethertype_shift;
  
-       if (tp->macmatch_shift >= 0) {
-               mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W;
-               if (mask && !fs->mask.macidx)
-                       return false;
-       }
+       if (tp->macmatch_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.macidx << tp->macmatch_shift;
+
+       if (tp->matchtype_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.matchtype << tp->matchtype_shift;
+
+       if (tp->frag_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.frag << tp->frag_shift;
+
+       if (ntuple_mask != hash_filter_mask)
+               return false;
  
-       if (tp->matchtype_shift >= 0) {
-               mask = (hash_filter_mask >> tp->matchtype_shift) &
-                       FT_MPSHITTYPE_W;
-               if (mask && !fs->mask.matchtype)
-                       return false;
-       }
-       if (tp->frag_shift >= 0) {
-               mask = (hash_filter_mask >> tp->frag_shift) &
-                       FT_FRAGMENTATION_W;
-               if (mask && !fs->mask.frag)
-                       return false;
-       }
         return true;
  }
  
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c

index 81684acf52afa576bda05d9c535a7890c5ee8deb..8a8b12b720ef99bb923a3a441c933ee36e596e80 100644 (file)
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2747,11 +2747,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         pci_set_master(pdev);
  
         /* Query PCI controller on system for DMA addressing
-        * limitation for the device.  Try 64-bit first, and
+        * limitation for the device.  Try 47-bit first, and
          * fail to 32-bit.
          */
  
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(47));
         if (err) {
                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
                 if (err) {
@@ -2765,10 +2765,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                         goto err_out_release_regions;
                 }
         } else {
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(47));
                 if (err) {
                         dev_err(dev, "Unable to obtain %u-bit DMA "
-                               "for consistent allocations, aborting\n", 64);
+                               "for consistent allocations, aborting\n", 47);
                         goto err_out_release_regions;
                 }
                 using_dac = 1;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c

index d4604bc8eb5b04742534100c4c285065bda2021e..9d3eed46830d73041b53c12411bf68d8939041e2 100644 (file)
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
  /*
   * Fast Ethernet Controller (FEC) driver for Motorola MPC8xx.
   * Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c

index f81439796ac77dfdd47e9a350b576122bfc0b1d9..43d9732150404100fc09be91ebfa3e750b419577 100644 (file)
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * Fast Ethernet Controller (ENET) PTP driver for MX6x.
   *
   * Copyright (C) 2012 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
   */
  
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c

index 6e8d6a6f6aaf2c809a92349f64c83f406bd35e9a..5ec1185808e5d13e82990732299c413f7d9120a1 100644 (file)
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -192,6 +192,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
         if (adapter->fw_done_rc) {
                 dev_err(dev, "Couldn't map long term buffer,rc = %d\n",
                         adapter->fw_done_rc);
+               dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
                 return -1;
         }
         return 0;
@@ -795,9 +796,11 @@ static int ibmvnic_login(struct net_device *netdev)
         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
         unsigned long timeout = msecs_to_jiffies(30000);
         int retry_count = 0;
+       bool retry;
         int rc;
  
         do {
+               retry = false;
                 if (retry_count > IBMVNIC_MAX_QUEUES) {
                         netdev_warn(netdev, "Login attempts exceeded\n");
                         return -1;
@@ -821,6 +824,9 @@ static int ibmvnic_login(struct net_device *netdev)
                         retry_count++;
                         release_sub_crqs(adapter, 1);
  
+                       retry = true;
+                       netdev_dbg(netdev,
+                                  "Received partial success, retrying...\n");
                         adapter->init_done_rc = 0;
                         reinit_completion(&adapter->init_done);
                         send_cap_queries(adapter);
@@ -848,7 +854,7 @@ static int ibmvnic_login(struct net_device *netdev)
                         netdev_warn(netdev, "Adapter login failed\n");
                         return -1;
                 }
-       } while (adapter->init_done_rc == PARTIALSUCCESS);
+       } while (retry);
  
         /* handle pending MAC address changes after successful login */
         if (adapter->mac_change_pending) {
@@ -1821,9 +1827,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                         if (rc)
                                 return rc;
                 }
+               ibmvnic_disable_irqs(adapter);
         }
-
-       ibmvnic_disable_irqs(adapter);
         adapter->state = VNIC_CLOSED;
  
         if (reset_state == VNIC_CLOSED)
@@ -2617,18 +2622,21 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
  {
         struct device *dev = &adapter->vdev->dev;
         unsigned long rc;
-       u64 val;
  
         if (scrq->hw_irq > 0x100000000ULL) {
                 dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq);
                 return 1;
         }
  
-       val = (0xff000000) | scrq->hw_irq;
-       rc = plpar_hcall_norets(H_EOI, val);
-       if (rc)
-               dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
-                       val, rc);
+       if (adapter->resetting &&
+           adapter->reset_reason == VNIC_RESET_MOBILITY) {
+               u64 val = (0xff000000) | scrq->hw_irq;
+
+               rc = plpar_hcall_norets(H_EOI, val);
+               if (rc)
+                       dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
+                               val, rc);
+       }
  
         rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
                                 H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0);
@@ -4586,14 +4594,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
                 release_crq_queue(adapter);
         }
  
-       rc = init_stats_buffers(adapter);
-       if (rc)
-               return rc;
-
-       rc = init_stats_token(adapter);
-       if (rc)
-               return rc;
-
         return rc;
  }
  
@@ -4662,13 +4662,21 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
                         goto ibmvnic_init_fail;
         } while (rc == EAGAIN);
  
+       rc = init_stats_buffers(adapter);
+       if (rc)
+               goto ibmvnic_init_fail;
+
+       rc = init_stats_token(adapter);
+       if (rc)
+               goto ibmvnic_stats_fail;
+
         netdev->mtu = adapter->req_mtu - ETH_HLEN;
         netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
         netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
  
         rc = device_create_file(&dev->dev, &dev_attr_failover);
         if (rc)
-               goto ibmvnic_init_fail;
+               goto ibmvnic_dev_file_err;
  
         netif_carrier_off(netdev);
         rc = register_netdev(netdev);
@@ -4687,6 +4695,12 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
  ibmvnic_register_fail:
         device_remove_file(&dev->dev, &dev_attr_failover);
  
+ibmvnic_dev_file_err:
+       release_stats_token(adapter);
+
+ibmvnic_stats_fail:
+       release_stats_buffers(adapter);
+
  ibmvnic_init_fail:
         release_sub_crqs(adapter, 1);
         release_crq_queue(adapter);
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c

index a822f7a56bc5bc7727bfcff9c148053776a9855e..685337d58276fc91baeeb64387c52985e1bc6dda 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -43,12 +43,12 @@
  #include "fw.h"
  
  /*
- * We allocate in as big chunks as we can, up to a maximum of 256 KB
- * per chunk.
+ * We allocate in page size (default 4KB on many archs) chunks to avoid high
+ * order memory allocations in fragmented/high usage memory situation.
   */
  enum {
-       MLX4_ICM_ALLOC_SIZE     = 1 << 18,
-       MLX4_TABLE_CHUNK_SIZE   = 1 << 18
+       MLX4_ICM_ALLOC_SIZE     = PAGE_SIZE,
+       MLX4_TABLE_CHUNK_SIZE   = PAGE_SIZE,
  };
  
  static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
@@ -398,9 +398,11 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
         u64 size;
  
         obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
+       if (WARN_ON(!obj_per_chunk))
+               return -EINVAL;
         num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
  
-       table->icm      = kcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL);
+       table->icm      = kvzalloc(num_icm * sizeof(*table->icm), GFP_KERNEL);
         if (!table->icm)
                 return -ENOMEM;
         table->virt     = virt;
@@ -446,7 +448,7 @@ err:
                         mlx4_free_icm(dev, table->icm[i], use_coherent);
                 }
  
-       kfree(table->icm);
+       kvfree(table->icm);
  
         return -ENOMEM;
  }
@@ -462,5 +464,5 @@ void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
                         mlx4_free_icm(dev, table->icm[i], table->coherent);
                 }
  
-       kfree(table->icm);
+       kvfree(table->icm);
  }
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c

index 2edcce98ab2d6a33599ca874b5a7d9b2359f6c52..65482f004e50a11bc3f13b7253716c89a0a02338 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -172,7 +172,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
                 list_add_tail(&dev_ctx->list, &priv->ctx_list);
                 spin_unlock_irqrestore(&priv->ctx_lock, flags);
  
-               mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n",
+               mlx4_dbg(dev, "Interface for protocol %d restarted with bonded mode %s\n",
                          dev_ctx->intf->protocol, enable ?
                          "enabled" : "disabled");
         }
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c

index 211578ffc70d2ba12105e6f20d9ab7ecf7efd0ac..60172a38c4a43abfc0856b3159f9caeac452cf6c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2929,6 +2929,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
                 mlx4_err(dev, "Failed to create file for port %d\n", port);
                 devlink_port_unregister(&info->devlink_port);
                 info->port = -1;
+               return err;
         }
  
         sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
@@ -2950,9 +2951,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
                                    &info->port_attr);
                 devlink_port_unregister(&info->devlink_port);
                 info->port = -1;
+               return err;
         }
  
-       return err;
+       return 0;
  }
  
  static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c

index 3aaf4bad6c5a5bd7cbdf111a8f6411697a3b4289..427e7a31862c2a000bc51d068c7254e8e72a9e6a 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -393,11 +393,11 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
         struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
         struct mlx4_qp *qp;
  
-       spin_lock(&qp_table->lock);
+       spin_lock_irq(&qp_table->lock);
  
         qp = __mlx4_qp_lookup(dev, qpn);
  
-       spin_unlock(&qp_table->lock);
+       spin_unlock_irq(&qp_table->lock);
         return qp;
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 176645762e491687dae7ae0fb28da282fa29c49f..1ff0b0e9380410510c094ea1cb5699545ef9fc4f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -615,6 +615,45 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth)
         return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
  }
  
+static __be32 mlx5e_get_fcs(struct sk_buff *skb)
+{
+       int last_frag_sz, bytes_in_prev, nr_frags;
+       u8 *fcs_p1, *fcs_p2;
+       skb_frag_t *last_frag;
+       __be32 fcs_bytes;
+
+       if (!skb_is_nonlinear(skb))
+               return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN);
+
+       nr_frags = skb_shinfo(skb)->nr_frags;
+       last_frag = &skb_shinfo(skb)->frags[nr_frags - 1];
+       last_frag_sz = skb_frag_size(last_frag);
+
+       /* If all FCS data is in last frag */
+       if (last_frag_sz >= ETH_FCS_LEN)
+               return *(__be32 *)(skb_frag_address(last_frag) +
+                                  last_frag_sz - ETH_FCS_LEN);
+
+       fcs_p2 = (u8 *)skb_frag_address(last_frag);
+       bytes_in_prev = ETH_FCS_LEN - last_frag_sz;
+
+       /* Find where the other part of the FCS is - Linear or another frag */
+       if (nr_frags == 1) {
+               fcs_p1 = skb_tail_pointer(skb);
+       } else {
+               skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2];
+
+               fcs_p1 = skb_frag_address(prev_frag) +
+                           skb_frag_size(prev_frag);
+       }
+       fcs_p1 -= bytes_in_prev;
+
+       memcpy(&fcs_bytes, fcs_p1, bytes_in_prev);
+       memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz);
+
+       return fcs_bytes;
+}
+
  static inline void mlx5e_handle_csum(struct net_device *netdev,
                                      struct mlx5_cqe64 *cqe,
                                      struct mlx5e_rq *rq,
@@ -643,6 +682,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
                         skb->csum = csum_partial(skb->data + ETH_HLEN,
                                                  network_depth - ETH_HLEN,
                                                  skb->csum);
+               if (unlikely(netdev->features & NETIF_F_RXFCS))
+                       skb->csum = csum_add(skb->csum,
+                                            (__force __wsum)mlx5e_get_fcs(skb));
                 rq->stats.csum_complete++;
                 return;
         }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c

index 0f5da499a22339fa11eb30fa73334b5d8ec4c039..fad8c2e3804e4f49da4461976da18c3e6316264c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -237,19 +237,17 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
         context->buf.sg[0].data = &context->command;
  
         spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
-       list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
+       res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
+       if (!res)
+               list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
         spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
  
-       res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
         if (res) {
-               mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
-                              res);
-               spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
-               list_del(&context->list);
-               spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+               mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res);
                 kfree(context);
                 return ERR_PTR(res);
         }
+
         /* Context will be freed by wait func after completion */
         return context;
  }
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c

index 1dc424685f4e772966ea9aa0bfc757cc0f41b18b..35fb31f682af4db9bae2efd946d7d2596fbd7eed 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -335,7 +335,7 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
                 return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem);
  
         start = mem;
-       while (mem - start + 8 < nfp_cpp_area_size(area)) {
+       while (mem - start + 8 <= nfp_cpp_area_size(area)) {
                 u8 __iomem *value;
                 u32 type, length;
  
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c

index 00f41c145d4d01674d146fe1eda41a346b3cc5a1..820b226d6ff8c91952b266e585a5c2ea91b625cb 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -77,7 +77,7 @@
  #define ILT_CFG_REG(cli, reg)  PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET
  
  /* ILT entry structure */
-#define ILT_ENTRY_PHY_ADDR_MASK                0x000FFFFFFFFFFFULL
+#define ILT_ENTRY_PHY_ADDR_MASK                (~0ULL >> 12)
  #define ILT_ENTRY_PHY_ADDR_SHIFT       0
  #define ILT_ENTRY_VALID_MASK           0x1ULL
  #define ILT_ENTRY_VALID_SHIFT          52
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c

index 38502815d681d08086f02bbb49e340c06658766d..468c59d2e491aa0fd6616bd624c2cc956c5759b6 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -292,6 +292,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
         struct qed_ll2_tx_packet *p_pkt = NULL;
         struct qed_ll2_info *p_ll2_conn;
         struct qed_ll2_tx_queue *p_tx;
+       unsigned long flags = 0;
         dma_addr_t tx_frag;
  
         p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
@@ -300,6 +301,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
  
         p_tx = &p_ll2_conn->tx_queue;
  
+       spin_lock_irqsave(&p_tx->lock, flags);
         while (!list_empty(&p_tx->active_descq)) {
                 p_pkt = list_first_entry(&p_tx->active_descq,
                                          struct qed_ll2_tx_packet, list_entry);
@@ -309,6 +311,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
                 list_del(&p_pkt->list_entry);
                 b_last_packet = list_empty(&p_tx->active_descq);
                 list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
+               spin_unlock_irqrestore(&p_tx->lock, flags);
                 if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
                         struct qed_ooo_buffer *p_buffer;
  
@@ -328,7 +331,9 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
                                                       b_last_frag,
                                                       b_last_packet);
                 }
+               spin_lock_irqsave(&p_tx->lock, flags);
         }
+       spin_unlock_irqrestore(&p_tx->lock, flags);
  }
  
  static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
@@ -556,6 +561,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
         struct qed_ll2_info *p_ll2_conn = NULL;
         struct qed_ll2_rx_packet *p_pkt = NULL;
         struct qed_ll2_rx_queue *p_rx;
+       unsigned long flags = 0;
  
         p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
         if (!p_ll2_conn)
@@ -563,13 +569,14 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
  
         p_rx = &p_ll2_conn->rx_queue;
  
+       spin_lock_irqsave(&p_rx->lock, flags);
         while (!list_empty(&p_rx->active_descq)) {
                 p_pkt = list_first_entry(&p_rx->active_descq,
                                          struct qed_ll2_rx_packet, list_entry);
                 if (!p_pkt)
                         break;
-
                 list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
+               spin_unlock_irqrestore(&p_rx->lock, flags);
  
                 if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
                         struct qed_ooo_buffer *p_buffer;
@@ -588,7 +595,30 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
                                                       cookie,
                                                       rx_buf_addr, b_last);
                 }
+               spin_lock_irqsave(&p_rx->lock, flags);
         }
+       spin_unlock_irqrestore(&p_rx->lock, flags);
+}
+
+static bool
+qed_ll2_lb_rxq_handler_slowpath(struct qed_hwfn *p_hwfn,
+                               struct core_rx_slow_path_cqe *p_cqe)
+{
+       struct ooo_opaque *iscsi_ooo;
+       u32 cid;
+
+       if (p_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH)
+               return false;
+
+       iscsi_ooo = (struct ooo_opaque *)&p_cqe->opaque_data;
+       if (iscsi_ooo->ooo_opcode != TCP_EVENT_DELETE_ISLES)
+               return false;
+
+       /* Need to make a flush */
+       cid = le32_to_cpu(iscsi_ooo->cid);
+       qed_ooo_release_connection_isles(p_hwfn, p_hwfn->p_ooo_info, cid);
+
+       return true;
  }
  
  static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
@@ -617,6 +647,11 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
                 cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
                 cqe_type = cqe->rx_cqe_sp.type;
  
+               if (cqe_type == CORE_RX_CQE_TYPE_SLOW_PATH)
+                       if (qed_ll2_lb_rxq_handler_slowpath(p_hwfn,
+                                                           &cqe->rx_cqe_sp))
+                               continue;
+
                 if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) {
                         DP_NOTICE(p_hwfn,
                                   "Got a non-regular LB LL2 completion [type 0x%02x]\n",
@@ -794,6 +829,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
         struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie;
         int rc;
  
+       if (!QED_LL2_RX_REGISTERED(p_ll2_conn))
+               return 0;
+
         rc = qed_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn);
         if (rc)
                 return rc;
@@ -814,6 +852,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
         u16 new_idx = 0, num_bds = 0;
         int rc;
  
+       if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
+               return 0;
+
         new_idx = le16_to_cpu(*p_tx->p_fw_cons);
         num_bds = ((s16)new_idx - (s16)p_tx->bds_idx);
  
@@ -1867,17 +1908,25 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
  
         /* Stop Tx & Rx of connection, if needed */
         if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
+               p_ll2_conn->tx_queue.b_cb_registred = false;
+               smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
                 rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn);
                 if (rc)
                         goto out;
+
                 qed_ll2_txq_flush(p_hwfn, connection_handle);
+               qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
         }
  
         if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
+               p_ll2_conn->rx_queue.b_cb_registred = false;
+               smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
                 rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn);
                 if (rc)
                         goto out;
+
                 qed_ll2_rxq_flush(p_hwfn, connection_handle);
+               qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index);
         }
  
         if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
@@ -1925,16 +1974,6 @@ void qed_ll2_release_connection(void *cxt, u8 connection_handle)
         if (!p_ll2_conn)
                 return;
  
-       if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
-               p_ll2_conn->rx_queue.b_cb_registred = false;
-               qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index);
-       }
-
-       if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
-               p_ll2_conn->tx_queue.b_cb_registred = false;
-               qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
-       }
-
         kfree(p_ll2_conn->tx_queue.descq_mem);
         qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain);
  
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c

index a01e7d6e5442f079e9006811b82b4feb02dc23bc..f6655e251bbd71c5bec0404062612738122f8f17 100644 (file)
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1066,13 +1066,12 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
  
         DP_INFO(edev, "Starting qede_remove\n");
  
+       qede_rdma_dev_remove(edev);
         unregister_netdev(ndev);
         cancel_delayed_work_sync(&edev->sp_task);
  
         qede_ptp_disable(edev);
  
-       qede_rdma_dev_remove(edev);
-
         edev->ops->common->set_power_state(cdev, PCI_D0);
  
         pci_set_drvdata(pdev, NULL);
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h

index a5b792ce2ae7d046e78ec4c7bfa886a805bc00e8..1bf930d4a1e52c1891953f8c709355eb0e6a6be9 100644 (file)
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -163,7 +163,7 @@ enum {
  };
  
  /* Driver's parameters */
-#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_RENESAS)
  #define SH_ETH_RX_ALIGN                32
  #else
  #define SH_ETH_RX_ALIGN                2
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c

index 450eec264a5ea53fe0e592e467de3626321a9f6d..4377c26f714d0522ebf5d1de6ac774b6e42024ea 100644 (file)
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -792,8 +792,10 @@ static int ipvlan_device_event(struct notifier_block *unused,
                 break;
  
         case NETDEV_CHANGEADDR:
-               list_for_each_entry(ipvlan, &port->ipvlans, pnode)
+               list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
                         ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr);
+                       call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev);
+               }
                 break;
  
         case NETDEV_PRE_TYPE_CHANGE:
diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c

index 6838129839ca457b7c06ddfbddeb8936f60b4f19..e757b09f188944befe4f769b8637c98adbf3f627 100644 (file)
--- a/drivers/net/phy/bcm-cygnus.c
+++ b/drivers/net/phy/bcm-cygnus.c
@@ -61,17 +61,17 @@ static int bcm_cygnus_afe_config(struct phy_device *phydev)
                 return rc;
  
         /* make rcal=100, since rdb default is 000 */
-       rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB1, 0x10);
+       rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB1, 0x10);
         if (rc < 0)
                 return rc;
  
         /* CORE_EXPB0, Reset R_CAL/RC_CAL Engine */
-       rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x10);
+       rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x10);
         if (rc < 0)
                 return rc;
  
         /* CORE_EXPB0, Disable Reset R_CAL/RC_CAL Engine */
-       rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x00);
+       rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x00);
  
         return 0;
  }
diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c

index 5ad130c3da43c869b39dc8ec83ec6795aa82be7d..d5e0833d69b9b8a27c36842286d29029402a3156 100644 (file)
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -56,7 +56,7 @@ int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum)
         /* The register must be written to both the Shadow Register Select and
          * the Shadow Read Register Selector
          */
-       phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum |
+       phy_write(phydev, MII_BCM54XX_AUX_CTL, MII_BCM54XX_AUXCTL_SHDWSEL_MASK |
                   regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT);
         return phy_read(phydev, MII_BCM54XX_AUX_CTL);
  }
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h

index 7c73808cbbded22bb01b60ef2b616456267f99cc..81cceaa412fe32439a31561416610198c6c6c3e5 100644 (file)
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -14,11 +14,18 @@
  #ifndef _LINUX_BCM_PHY_LIB_H
  #define _LINUX_BCM_PHY_LIB_H
  
+#include <linux/brcmphy.h>
  #include <linux/phy.h>
  
  int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val);
  int bcm_phy_read_exp(struct phy_device *phydev, u16 reg);
  
+static inline int bcm_phy_write_exp_sel(struct phy_device *phydev,
+                                       u16 reg, u16 val)
+{
+       return bcm_phy_write_exp(phydev, reg | MII_BCM54XX_EXP_SEL_ER, val);
+}
+
  int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val);
  int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum);
  
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c

index 29b1c88b55cc494bae079da9b2283170508b4e9d..01d2ff2f62413226caecebe5e18b45ccfb12c808 100644 (file)
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -65,10 +65,10 @@ struct bcm7xxx_phy_priv {
  static void r_rc_cal_reset(struct phy_device *phydev)
  {
         /* Reset R_CAL/RC_CAL Engine */
-       bcm_phy_write_exp(phydev, 0x00b0, 0x0010);
+       bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0010);
  
         /* Disable Reset R_AL/RC_CAL Engine */
-       bcm_phy_write_exp(phydev, 0x00b0, 0x0000);
+       bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0000);
  }
  
  static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev)
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c

index f41b224a9cdbf49ccf82d72b5052686548c005a7..ab195f0916d69e49719b24319821a0e908f31064 100644 (file)
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -573,9 +573,40 @@ static int ksz9031_config_init(struct phy_device *phydev)
                 ksz9031_of_load_skew_values(phydev, of_node,
                                 MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
                                 tx_data_skews, 4);
+
+               /* Silicon Errata Sheet (DS80000691D or DS80000692D):
+                * When the device links in the 1000BASE-T slave mode only,
+                * the optional 125MHz reference output clock (CLK125_NDO)
+                * has wide duty cycle variation.
+                *
+                * The optional CLK125_NDO clock does not meet the RGMII
+                * 45/55 percent (min/max) duty cycle requirement and therefore
+                * cannot be used directly by the MAC side for clocking
+                * applications that have setup/hold time requirements on
+                * rising and falling clock edges.
+                *
+                * Workaround:
+                * Force the phy to be the master to receive a stable clock
+                * which meets the duty cycle requirement.
+                */
+               if (of_property_read_bool(of_node, "micrel,force-master")) {
+                       result = phy_read(phydev, MII_CTRL1000);
+                       if (result < 0)
+                               goto err_force_master;
+
+                       /* enable master mode, config & prefer master */
+                       result |= CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER;
+                       result = phy_write(phydev, MII_CTRL1000, result);
+                       if (result < 0)
+                               goto err_force_master;
+               }
         }
  
         return ksz9031_center_flp_timing(phydev);
+
+err_force_master:
+       phydev_err(phydev, "failed to force the phy to master mode\n");
+       return result;
  }
  
  #define KSZ8873MLL_GLOBAL_CONTROL_4    0x06
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c

index dc7c7ec432023a7ac8e7a63410f7ec04437ea7b7..02ad03a2fab773cd36707e50434cc559fc639f7c 100644 (file)
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -605,30 +605,13 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  
         if (cmd == PPPIOCDETACH) {
                 /*
-                * We have to be careful here... if the file descriptor
-                * has been dup'd, we could have another process in the
-                * middle of a poll using the same file *, so we had
-                * better not free the interface data structures -
-                * instead we fail the ioctl.  Even in this case, we
-                * shut down the interface if we are the owner of it.
-                * Actually, we should get rid of PPPIOCDETACH, userland
-                * (i.e. pppd) could achieve the same effect by closing
-                * this fd and reopening /dev/ppp.
+                * PPPIOCDETACH is no longer supported as it was heavily broken,
+                * and is only known to have been used by pppd older than
+                * ppp-2.4.2 (released November 2003).
                  */
+               pr_warn_once("%s (%d) used obsolete PPPIOCDETACH ioctl\n",
+                            current->comm, current->pid);
                 err = -EINVAL;
-               if (pf->kind == INTERFACE) {
-                       ppp = PF_TO_PPP(pf);
-                       rtnl_lock();
-                       if (file == ppp->owner)
-                               unregister_netdevice(ppp->dev);
-                       rtnl_unlock();
-               }
-               if (atomic_long_read(&file->f_count) < 2) {
-                       ppp_release(NULL, file);
-                       err = 0;
-               } else
-                       pr_warn("PPPIOCDETACH file->f_count=%ld\n",
-                               atomic_long_read(&file->f_count));
                 goto out;
         }
  
diff --git a/drivers/net/tun.c b/drivers/net/tun.c

index ef33950a45d909b34dfe937396873ece728314f6..45d807796a18a1584ea7af3d43cb0b8daab9ca8f 100644 (file)
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -681,15 +681,6 @@ static void tun_queue_purge(struct tun_file *tfile)
         skb_queue_purge(&tfile->sk.sk_error_queue);
  }
  
-static void tun_cleanup_tx_ring(struct tun_file *tfile)
-{
-       if (tfile->tx_ring.queue) {
-               ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
-               xdp_rxq_info_unreg(&tfile->xdp_rxq);
-               memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
-       }
-}
-
  static void __tun_detach(struct tun_file *tfile, bool clean)
  {
         struct tun_file *ntfile;
@@ -736,7 +727,9 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
                             tun->dev->reg_state == NETREG_REGISTERED)
                                 unregister_netdevice(tun->dev);
                 }
-               tun_cleanup_tx_ring(tfile);
+               if (tun)
+                       xdp_rxq_info_unreg(&tfile->xdp_rxq);
+               ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
                 sock_put(&tfile->sk);
         }
  }
@@ -783,14 +776,14 @@ static void tun_detach_all(struct net_device *dev)
                 tun_napi_del(tun, tfile);
                 /* Drop read queue */
                 tun_queue_purge(tfile);
+               xdp_rxq_info_unreg(&tfile->xdp_rxq);
                 sock_put(&tfile->sk);
-               tun_cleanup_tx_ring(tfile);
         }
         list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
                 tun_enable_queue(tfile);
                 tun_queue_purge(tfile);
+               xdp_rxq_info_unreg(&tfile->xdp_rxq);
                 sock_put(&tfile->sk);
-               tun_cleanup_tx_ring(tfile);
         }
         BUG_ON(tun->numdisabled != 0);
  
@@ -834,7 +827,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
         }
  
         if (!tfile->detached &&
-           ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) {
+           ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len,
+                           GFP_KERNEL, tun_ptr_free)) {
                 err = -ENOMEM;
                 goto out;
         }
@@ -1429,6 +1423,13 @@ static void tun_net_init(struct net_device *dev)
         dev->max_mtu = MAX_MTU - dev->hard_header_len;
  }
  
+static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile)
+{
+       struct sock *sk = tfile->socket.sk;
+
+       return (tun->dev->flags & IFF_UP) && sock_writeable(sk);
+}
+
  /* Character device part */
  
  /* Poll */
@@ -1451,10 +1452,14 @@ static __poll_t tun_chr_poll(struct file *file, poll_table *wait)
         if (!ptr_ring_empty(&tfile->tx_ring))
                 mask |= EPOLLIN | EPOLLRDNORM;
  
-       if (tun->dev->flags & IFF_UP &&
-           (sock_writeable(sk) ||
-            (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
-             sock_writeable(sk))))
+       /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to
+        * guarantee EPOLLOUT to be raised by either here or
+        * tun_sock_write_space(). Then process could get notification
+        * after it writes to a down device and meets -EIO.
+        */
+       if (tun_sock_writeable(tun, tfile) ||
+           (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+            tun_sock_writeable(tun, tfile)))
                 mask |= EPOLLOUT | EPOLLWRNORM;
  
         if (tun->dev->reg_state != NETREG_REGISTERED)
@@ -3219,6 +3224,11 @@ static int tun_chr_open(struct inode *inode, struct file * file)
                                             &tun_proto, 0);
         if (!tfile)
                 return -ENOMEM;
+       if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) {
+               sk_free(&tfile->sk);
+               return -ENOMEM;
+       }
+
         RCU_INIT_POINTER(tfile->tun, NULL);
         tfile->flags = 0;
         tfile->ifindex = 0;
@@ -3239,8 +3249,6 @@ static int tun_chr_open(struct inode *inode, struct file * file)
  
         sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
  
-       memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
-
         return 0;
  }
  
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

index 770422e953f75e1e9a1bb3a317d83f8070e6ae36..032e1ac10a3091ef48346c48756d18fd9c038246 100644 (file)
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -707,6 +707,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                 void *data;
                 u32 act;
  
+               /* Transient failure which in theory could occur if
+                * in-flight packets from before XDP was enabled reach
+                * the receive path after XDP is loaded.
+                */
+               if (unlikely(hdr->hdr.gso_type))
+                       goto err_xdp;
+
                 /* This happens when rx buffer size is underestimated
                  * or headroom is not enough because of the buffer
                  * was refilled before XDP is set. This should only
@@ -727,14 +734,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                         xdp_page = page;
                 }
  
-               /* Transient failure which in theory could occur if
-                * in-flight packets from before XDP was enabled reach
-                * the receive path after XDP is loaded. In practice I
-                * was not able to create this condition.
-                */
-               if (unlikely(hdr->hdr.gso_type))
-                       goto err_xdp;
-
                 /* Allow consuming headroom but reserve enough space to push
                  * the descriptor on if we get an XDP_TX return code.
                  */
@@ -775,7 +774,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                         }
                         *xdp_xmit = true;
                         if (unlikely(xdp_page != page))
-                               goto err_xdp;
+                               put_page(page);
                         rcu_read_unlock();
                         goto xdp_xmit;
                 case XDP_REDIRECT:
@@ -787,7 +786,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                         }
                         *xdp_xmit = true;
                         if (unlikely(xdp_page != page))
-                               goto err_xdp;
+                               put_page(page);
                         rcu_read_unlock();
                         goto xdp_xmit;
                 default:
@@ -875,7 +874,7 @@ err_xdp:
         rcu_read_unlock();
  err_skb:
         put_page(page);
-       while (--num_buf) {
+       while (num_buf-- > 1) {
                 buf = virtqueue_get_buf(rq->vq, &len);
                 if (unlikely(!buf)) {
                         pr_debug("%s: rx error: %d buffers missing\n",
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c

index 9ebe2a689966d056a1b9a94e5ea557f0d0f9a55e..27a9bb8c9611ce1bb44c3372923248c7b7ccee6a 100644 (file)
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
  
         gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
         while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
+               /* Prevent any &gdesc->tcd field from being (speculatively)
+                * read before (&gdesc->tcd)->gen is read.
+                */
+               dma_rmb();
+
                 completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
                                                &gdesc->tcd), tq, adapter->pdev,
                                                adapter);
@@ -1103,6 +1108,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
                 gdesc->txd.tci = skb_vlan_tag_get(skb);
         }
  
+       /* Ensure that the write to (&gdesc->txd)->gen will be observed after
+        * all other writes to &gdesc->txd.
+        */
+       dma_wmb();
+
         /* finally flips the GEN bit of the SOP desc. */
         gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
                                                   VMXNET3_TXD_GEN);
@@ -1298,6 +1308,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                          */
                         break;
                 }
+
+               /* Prevent any rcd field from being (speculatively) read before
+                * rcd->gen is read.
+                */
+               dma_rmb();
+
                 BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
                        rcd->rqID != rq->dataRingQid);
                 idx = rcd->rxdIdx;
@@ -1528,6 +1544,12 @@ rcd_done:
                 ring->next2comp = idx;
                 num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
                 ring = rq->rx_ring + ring_idx;
+
+               /* Ensure that the writes to rxd->gen bits will be observed
+                * after all other writes to rxd objects.
+                */
+               dma_wmb();
+
                 while (num_to_alloc) {
                         vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
                                           &rxCmdDesc);
@@ -2688,7 +2710,7 @@ vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
  /* ==================== initialization and cleanup routines ============ */
  
  static int
-vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
+vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter)
  {
         int err;
         unsigned long mmio_start, mmio_len;
@@ -2700,30 +2722,12 @@ vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
                 return err;
         }
  
-       if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
-               if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
-                       dev_err(&pdev->dev,
-                               "pci_set_consistent_dma_mask failed\n");
-                       err = -EIO;
-                       goto err_set_mask;
-               }
-               *dma64 = true;
-       } else {
-               if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
-                       dev_err(&pdev->dev,
-                               "pci_set_dma_mask failed\n");
-                       err = -EIO;
-                       goto err_set_mask;
-               }
-               *dma64 = false;
-       }
-
         err = pci_request_selected_regions(pdev, (1 << 2) - 1,
                                            vmxnet3_driver_name);
         if (err) {
                 dev_err(&pdev->dev,
                         "Failed to request region for adapter: error %d\n", err);
-               goto err_set_mask;
+               goto err_enable_device;
         }
  
         pci_set_master(pdev);
@@ -2751,7 +2755,7 @@ err_bar1:
         iounmap(adapter->hw_addr0);
  err_ioremap:
         pci_release_selected_regions(pdev, (1 << 2) - 1);
-err_set_mask:
+err_enable_device:
         pci_disable_device(pdev);
         return err;
  }
@@ -3254,7 +3258,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
  #endif
         };
         int err;
-       bool dma64 = false; /* stupid gcc */
+       bool dma64;
         u32 ver;
         struct net_device *netdev;
         struct vmxnet3_adapter *adapter;
@@ -3300,6 +3304,24 @@ vmxnet3_probe_device(struct pci_dev *pdev,
         adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
         adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
  
+       if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
+               if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
+                       dev_err(&pdev->dev,
+                               "pci_set_consistent_dma_mask failed\n");
+                       err = -EIO;
+                       goto err_set_mask;
+               }
+               dma64 = true;
+       } else {
+               if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
+                       dev_err(&pdev->dev,
+                               "pci_set_dma_mask failed\n");
+                       err = -EIO;
+                       goto err_set_mask;
+               }
+               dma64 = false;
+       }
+
         spin_lock_init(&adapter->cmd_lock);
         adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
                                              sizeof(struct vmxnet3_adapter),
@@ -3307,7 +3329,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
         if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
                 dev_err(&pdev->dev, "Failed to map dma\n");
                 err = -EFAULT;
-               goto err_dma_map;
+               goto err_set_mask;
         }
         adapter->shared = dma_alloc_coherent(
                                 &adapter->pdev->dev,
@@ -3358,7 +3380,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
         }
  #endif /* VMXNET3_RSS */
  
-       err = vmxnet3_alloc_pci_resources(adapter, &dma64);
+       err = vmxnet3_alloc_pci_resources(adapter);
         if (err < 0)
                 goto err_alloc_pci;
  
@@ -3504,7 +3526,7 @@ err_alloc_queue_desc:
  err_alloc_shared:
         dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
                          sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
-err_dma_map:
+err_set_mask:
         free_netdev(netdev);
         return err;
  }
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h

index a3326463b71f1e26cd74b33b4480044f57ec4f31..a2c554f8a61bc3262823d9ffd551af6ccedfc299 100644 (file)
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,12 @@
  /*
   * Version numbers
   */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.14.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.16.0-k"
  
-/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040e00
+/* Each byte of this 32-bit integer encodes a version number in
+ * VMXNET3_DRIVER_VERSION_STRING.
+ */
+#define VMXNET3_DRIVER_VERSION_NUM      0x01041000
  
  #if defined(CONFIG_PCI_MSI)
         /* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c

index 4a017a0d71ea47ae270119593f4c9e1db3632513..920c23e542a57755bd29946a5b46492b0133251b 100644 (file)
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3340,7 +3340,7 @@ out_err:
  static int hwsim_dump_radio_nl(struct sk_buff *skb,
                                struct netlink_callback *cb)
  {
-       int last_idx = cb->args[0];
+       int last_idx = cb->args[0] - 1;
         struct mac80211_hwsim_data *data = NULL;
         int res = 0;
         void *hdr;
@@ -3368,7 +3368,7 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb,
                 last_idx = data->idx;
         }
  
-       cb->args[0] = last_idx;
+       cb->args[0] = last_idx + 1;
  
         /* list changed, but no new element sent, set interrupted flag */
         if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) {
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c

index a64023690cadeec34e251c38efeb4b036f0a91ac..27902a8799b1e022049078efc2ce14269dbea672 100644 (file)
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -100,6 +100,9 @@ static int nvdimm_bus_probe(struct device *dev)
         if (!try_module_get(provider))
                 return -ENXIO;
  
+       dev_dbg(&nvdimm_bus->dev, "START: %s.probe(%s)\n",
+                       dev->driver->name, dev_name(dev));
+
         nvdimm_bus_probe_start(nvdimm_bus);
         rc = nd_drv->probe(dev);
         if (rc == 0)
@@ -108,7 +111,7 @@ static int nvdimm_bus_probe(struct device *dev)
                 nd_region_disable(nvdimm_bus, dev);
         nvdimm_bus_probe_end(nvdimm_bus);
  
-       dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
+       dev_dbg(&nvdimm_bus->dev, "END: %s.probe(%s) = %d\n", dev->driver->name,
                         dev_name(dev), rc);
  
         if (rc != 0)
@@ -566,14 +569,18 @@ int nvdimm_revalidate_disk(struct gendisk *disk)
  {
         struct device *dev = disk_to_dev(disk)->parent;
         struct nd_region *nd_region = to_nd_region(dev->parent);
-       const char *pol = nd_region->ro ? "only" : "write";
+       int disk_ro = get_disk_ro(disk);
  
-       if (nd_region->ro == get_disk_ro(disk))
+       /*
+        * Upgrade to read-only if the region is read-only preserve as
+        * read-only if the disk is already read-only.
+        */
+       if (disk_ro || nd_region->ro == disk_ro)
                 return 0;
  
-       dev_info(dev, "%s read-%s, marking %s read-%s\n",
-                       dev_name(&nd_region->dev), pol, disk->disk_name, pol);
-       set_disk_ro(disk, nd_region->ro);
+       dev_info(dev, "%s read-only, marking %s read-only\n",
+                       dev_name(&nd_region->dev), disk->disk_name);
+       set_disk_ro(disk, 1);
  
         return 0;
  
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c

index 6f9a6ffd7cde25f3e4714b2035eb519db1099fd8..521eaf53a52aada9c99e804971f7041fcc327563 100644 (file)
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -38,12 +38,27 @@ static int e820_range_to_nid(resource_size_t addr)
  }
  #endif
  
+static int e820_register_one(struct resource *res, void *data)
+{
+       struct nd_region_desc ndr_desc;
+       struct nvdimm_bus *nvdimm_bus = data;
+
+       memset(&ndr_desc, 0, sizeof(ndr_desc));
+       ndr_desc.res = res;
+       ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
+       ndr_desc.numa_node = e820_range_to_nid(res->start);
+       set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+       if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
+               return -ENXIO;
+       return 0;
+}
+
  static int e820_pmem_probe(struct platform_device *pdev)
  {
         static struct nvdimm_bus_descriptor nd_desc;
         struct device *dev = &pdev->dev;
         struct nvdimm_bus *nvdimm_bus;
-       struct resource *p;
+       int rc = -ENXIO;
  
         nd_desc.attr_groups = e820_pmem_attribute_groups;
         nd_desc.provider_name = "e820";
@@ -53,27 +68,15 @@ static int e820_pmem_probe(struct platform_device *pdev)
                 goto err;
         platform_set_drvdata(pdev, nvdimm_bus);
  
-       for (p = iomem_resource.child; p ; p = p->sibling) {
-               struct nd_region_desc ndr_desc;
-
-               if (p->desc != IORES_DESC_PERSISTENT_MEMORY_LEGACY)
-                       continue;
-
-               memset(&ndr_desc, 0, sizeof(ndr_desc));
-               ndr_desc.res = p;
-               ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
-               ndr_desc.numa_node = e820_range_to_nid(p->start);
-               set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
-               if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
-                       goto err;
-       }
-
+       rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY,
+                       IORESOURCE_MEM, 0, -1, nvdimm_bus, e820_register_one);
+       if (rc)
+               goto err;
         return 0;
-
- err:
+err:
         nvdimm_bus_unregister(nvdimm_bus);
         dev_err(dev, "failed to register legacy persistent memory ranges\n");
-       return -ENXIO;
+       return rc;
  }
  
  static struct platform_driver e820_pmem_driver = {
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c

index 30b08791597d7f76175fec0f4432d8fda1b83109..3f7ad5bc443ee80416236143e0a0d58d8b138c04 100644 (file)
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -561,8 +561,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
         res->start += start_pad;
         res->end -= end_trunc;
  
-       pgmap->type = MEMORY_DEVICE_HOST;
-
         if (nd_pfn->mode == PFN_MODE_RAM) {
                 if (offset < SZ_8K)
                         return -EINVAL;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c

index 6d3da8c92868bd3bc72768e934d15fa23ee72d2b..68940356cad3f100f4cfbdd325d42235ea3c5da4 100644 (file)
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -164,11 +164,6 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
         return rc;
  }
  
-/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
-#ifndef REQ_FLUSH
-#define REQ_FLUSH REQ_PREFLUSH
-#endif
-
  static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
  {
         blk_status_t rc = 0;
@@ -179,7 +174,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
         struct pmem_device *pmem = q->queuedata;
         struct nd_region *nd_region = to_region(pmem);
  
-       if (bio->bi_opf & REQ_FLUSH)
+       if (bio->bi_opf & REQ_PREFLUSH)
                 nvdimm_flush(nd_region);
  
         do_acct = nd_iostat_start(bio, &start);
@@ -301,12 +296,33 @@ static void pmem_release_disk(void *__pmem)
         put_disk(pmem->disk);
  }
  
+static void pmem_release_pgmap_ops(void *__pgmap)
+{
+       dev_pagemap_put_ops();
+}
+
+static void fsdax_pagefree(struct page *page, void *data)
+{
+       wake_up_var(&page->_refcount);
+}
+
+static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap)
+{
+       dev_pagemap_get_ops();
+       if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap))
+               return -ENOMEM;
+       pgmap->type = MEMORY_DEVICE_FS_DAX;
+       pgmap->page_free = fsdax_pagefree;
+
+       return 0;
+}
+
  static int pmem_attach_disk(struct device *dev,
                 struct nd_namespace_common *ndns)
  {
         struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
         struct nd_region *nd_region = to_nd_region(dev->parent);
-       int nid = dev_to_node(dev), fua, wbc;
+       int nid = dev_to_node(dev), fua;
         struct resource *res = &nsio->res;
         struct resource bb_res;
         struct nd_pfn *nd_pfn = NULL;
@@ -342,7 +358,6 @@ static int pmem_attach_disk(struct device *dev,
                 dev_warn(dev, "unable to guarantee persistence of writes\n");
                 fua = 0;
         }
-       wbc = nvdimm_has_cache(nd_region);
  
         if (!devm_request_mem_region(dev, res->start, resource_size(res),
                                 dev_name(&ndns->dev))) {
@@ -360,6 +375,8 @@ static int pmem_attach_disk(struct device *dev,
         pmem->pfn_flags = PFN_DEV;
         pmem->pgmap.ref = &q->q_usage_counter;
         if (is_nd_pfn(dev)) {
+               if (setup_pagemap_fsdax(dev, &pmem->pgmap))
+                       return -ENOMEM;
                 addr = devm_memremap_pages(dev, &pmem->pgmap);
                 pfn_sb = nd_pfn->pfn_sb;
                 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
@@ -371,6 +388,8 @@ static int pmem_attach_disk(struct device *dev,
         } else if (pmem_should_map_pages(dev)) {
                 memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
                 pmem->pgmap.altmap_valid = false;
+               if (setup_pagemap_fsdax(dev, &pmem->pgmap))
+                       return -ENOMEM;
                 addr = devm_memremap_pages(dev, &pmem->pgmap);
                 pmem->pfn_flags |= PFN_MAP;
                 memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
@@ -389,7 +408,7 @@ static int pmem_attach_disk(struct device *dev,
                 return PTR_ERR(addr);
         pmem->virt_addr = addr;
  
-       blk_queue_write_cache(q, wbc, fua);
+       blk_queue_write_cache(q, true, fua);
         blk_queue_make_request(q, pmem_make_request);
         blk_queue_physical_block_size(q, PAGE_SIZE);
         blk_queue_logical_block_size(q, pmem_sector_size(ndns));
@@ -420,7 +439,7 @@ static int pmem_attach_disk(struct device *dev,
                 put_disk(disk);
                 return -ENOMEM;
         }
-       dax_write_cache(dax_dev, wbc);
+       dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
         pmem->dax_dev = dax_dev;
  
         gendev = disk_to_dev(disk);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c

index a612be6f019d49b1ce08da99e981493b1f9e22d7..ec3543b83330f25e040894599c44b3898d6bd66b 100644 (file)
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1132,7 +1132,8 @@ EXPORT_SYMBOL_GPL(nvdimm_has_flush);
  
  int nvdimm_has_cache(struct nd_region *nd_region)
  {
-       return is_nd_pmem(&nd_region->dev);
+       return is_nd_pmem(&nd_region->dev) &&
+               !test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
  }
  EXPORT_SYMBOL_GPL(nvdimm_has_cache);
  
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig

index 88a8b5916624ae6b0805366ee8ed8325a3191a08..dbb7464c018cac028c2601d76811fd302f34e117 100644 (file)
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -27,7 +27,7 @@ config NVME_FABRICS
  
  config NVME_RDMA
         tristate "NVM Express over Fabrics RDMA host driver"
-       depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
+       depends on INFINIBAND_ADDR_TRANS && BLOCK
         select NVME_CORE
         select NVME_FABRICS
         select SG_POOL
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig

index 3c7b61ddb0d186a017196d56378184a9988d525a..7595664ee7531d8b4856091eb6beec11aaded673 100644 (file)
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -27,7 +27,7 @@ config NVME_TARGET_LOOP
  
  config NVME_TARGET_RDMA
         tristate "NVMe over Fabrics RDMA target support"
-       depends on INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on INFINIBAND_ADDR_TRANS
         depends on NVME_TARGET
         select SGL_ALLOC
         help
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c

index 126cf19e869bad39d88ad6a769423d98dd3b706b..297599fcbc3251bf284512dae20727c188befa67 100644 (file)
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1195,7 +1195,7 @@ void * ccio_get_iommu(const struct parisc_device *dev)
   * to/from certain pages.  To avoid this happening, we mark these pages
   * as `used', and ensure that nothing will try to allocate from them.
   */
-void ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp)
+void __init ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp)
  {
         unsigned int idx;
         struct parisc_device *dev = parisc_parent(cujo);
diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c

index e7bbdf947bbcf93ce4acebbcecd13c17ff247d5c..8350ca2311c73c3af5dd52eb60c967737e44950d 100644 (file)
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -91,6 +91,8 @@ static int send_command(struct cros_ec_device *ec_dev,
                         usleep_range(10000, 11000);
  
                         ret = (*xfer_fxn)(ec_dev, status_msg);
+                       if (ret == -EAGAIN)
+                               continue;
                         if (ret < 0)
                                 break;
  
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig

index bc309c5327ffdbe544214bb020e5b76ada5965e3..566644bb496ac82bf3263bb6ea285c7ab1fb070f 100644 (file)
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -168,8 +168,8 @@ config DELL_WMI
         depends on DMI
         depends on INPUT
         depends on ACPI_VIDEO || ACPI_VIDEO = n
+       depends on DELL_SMBIOS
         select DELL_WMI_DESCRIPTOR
-       select DELL_SMBIOS
         select INPUT_SPARSEKMAP
         ---help---
           Say Y here if you want to support WMI-based hotkeys on Dell laptops.
diff --git a/drivers/reset/reset-uniphier.c b/drivers/reset/reset-uniphier.c

index 360e06b20c5345528aaf089cf358f761f57a2b9e..ac18f2f27881091d529b746c96c6dff2ab3f99d3 100644 (file)
--- a/drivers/reset/reset-uniphier.c
+++ b/drivers/reset/reset-uniphier.c
@@ -110,7 +110,7 @@ static const struct uniphier_reset_data uniphier_ld20_sys_reset_data[] = {
         UNIPHIER_RESETX(4, 0x200c, 2),          /* eMMC */
         UNIPHIER_RESETX(6, 0x200c, 6),          /* Ether */
         UNIPHIER_RESETX(8, 0x200c, 8),          /* STDMAC (HSC) */
-       UNIPHIER_RESETX(12, 0x200c, 5),         /* GIO (PCIe, USB3) */
+       UNIPHIER_RESETX(14, 0x200c, 5),         /* USB30 */
         UNIPHIER_RESETX(16, 0x200c, 12),        /* USB30-PHY0 */
         UNIPHIER_RESETX(17, 0x200c, 13),        /* USB30-PHY1 */
         UNIPHIER_RESETX(18, 0x200c, 14),        /* USB30-PHY2 */
@@ -127,8 +127,8 @@ static const struct uniphier_reset_data uniphier_pxs3_sys_reset_data[] = {
         UNIPHIER_RESETX(6, 0x200c, 9),          /* Ether0 */
         UNIPHIER_RESETX(7, 0x200c, 10),         /* Ether1 */
         UNIPHIER_RESETX(8, 0x200c, 12),         /* STDMAC */
-       UNIPHIER_RESETX(12, 0x200c, 4),         /* USB30 link (GIO0) */
-       UNIPHIER_RESETX(13, 0x200c, 5),         /* USB31 link (GIO1) */
+       UNIPHIER_RESETX(12, 0x200c, 4),         /* USB30 link */
+       UNIPHIER_RESETX(13, 0x200c, 5),         /* USB31 link */
         UNIPHIER_RESETX(16, 0x200c, 16),        /* USB30-PHY0 */
         UNIPHIER_RESETX(17, 0x200c, 18),        /* USB30-PHY1 */
         UNIPHIER_RESETX(18, 0x200c, 20),        /* USB30-PHY2 */
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c

index 439991d71b146698c90540c5633cfdd0fbe1bc16..4c14ce428e92d8927fedb8702d3d9dd26801ddad 100644 (file)
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -141,7 +141,7 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues)
         int i;
  
         for (i = 0; i < nr_queues; i++) {
-               q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL);
+               q = kmem_cache_zalloc(qdio_q_cache, GFP_KERNEL);
                 if (!q)
                         return -ENOMEM;
  
@@ -456,7 +456,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
  {
         struct ciw *ciw;
         struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data;
-       int rc;
  
         memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib));
         memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag));
@@ -493,16 +492,14 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
         ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE);
         if (!ciw) {
                 DBF_ERROR("%4x NO EQ", irq_ptr->schid.sch_no);
-               rc = -EINVAL;
-               goto out_err;
+               return -EINVAL;
         }
         irq_ptr->equeue = *ciw;
  
         ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE);
         if (!ciw) {
                 DBF_ERROR("%4x NO AQ", irq_ptr->schid.sch_no);
-               rc = -EINVAL;
-               goto out_err;
+               return -EINVAL;
         }
         irq_ptr->aqueue = *ciw;
  
@@ -512,9 +509,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
         init_data->cdev->handler = qdio_int_handler;
         spin_unlock_irq(get_ccwdev_lock(irq_ptr->cdev));
         return 0;
-out_err:
-       qdio_release_memory(irq_ptr);
-       return rc;
  }
  
  void qdio_print_subchannel_info(struct qdio_irq *irq_ptr,
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c

index 2c7550797ec2f51130c6e580a7b10d2bfc1155c5..dce92b2a895d6ff3bbe38104ed08ea32c7979432 100644 (file)
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -715,6 +715,10 @@ void cp_free(struct channel_program *cp)
   * and stores the result to ccwchain list. @cp must have been
   * initialized by a previous call with cp_init(). Otherwise, undefined
   * behavior occurs.
+ * For each chain composing the channel program:
+ * - On entry ch_len holds the count of CCWs to be translated.
+ * - On exit ch_len is adjusted to the count of successfully translated CCWs.
+ * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
   *
   * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
   * as helpers to do ccw chain translation inside the kernel. Basically
@@ -749,11 +753,18 @@ int cp_prefetch(struct channel_program *cp)
                 for (idx = 0; idx < len; idx++) {
                         ret = ccwchain_fetch_one(chain, idx, cp);
                         if (ret)
-                               return ret;
+                               goto out_err;
                 }
         }
  
         return 0;
+out_err:
+       /* Only cleanup the chain elements that were actually translated. */
+       chain->ch_len = idx;
+       list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
+               chain->ch_len = 0;
+       }
+       return ret;
  }
  
  /**
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c

index a8b831000b2d687b9608a9658ac90650c7131b8e..18c4f933e8b9a82c51fa20e113b6f6ca20566311 100644 (file)
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -4,7 +4,7 @@
   *
   * Debug traces for zfcp.
   *
- * Copyright IBM Corp. 2002, 2017
+ * Copyright IBM Corp. 2002, 2018
   */
  
  #define KMSG_COMPONENT "zfcp"
@@ -308,6 +308,27 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter,
         spin_unlock_irqrestore(&dbf->rec_lock, flags);
  }
  
+/**
+ * zfcp_dbf_rec_trig_lock - trace event related to triggered recovery with lock
+ * @tag: identifier for event
+ * @adapter: adapter on which the erp_action should run
+ * @port: remote port involved in the erp_action
+ * @sdev: scsi device involved in the erp_action
+ * @want: wanted erp_action
+ * @need: required erp_action
+ *
+ * The adapter->erp_lock must not be held.
+ */
+void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter,
+                           struct zfcp_port *port, struct scsi_device *sdev,
+                           u8 want, u8 need)
+{
+       unsigned long flags;
+
+       read_lock_irqsave(&adapter->erp_lock, flags);
+       zfcp_dbf_rec_trig(tag, adapter, port, sdev, want, need);
+       read_unlock_irqrestore(&adapter->erp_lock, flags);
+}
  
  /**
   * zfcp_dbf_rec_run_lvl - trace event related to running recovery
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h

index bf8ea4df2bb8c9fa621da9061795dd99c56ee091..e5eed8aac0ce6ba1922e0585439f5d0c1426419a 100644 (file)
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -4,7 +4,7 @@
   *
   * External function declarations.
   *
- * Copyright IBM Corp. 2002, 2016
+ * Copyright IBM Corp. 2002, 2018
   */
  
  #ifndef ZFCP_EXT_H
@@ -35,6 +35,9 @@ extern int zfcp_dbf_adapter_register(struct zfcp_adapter *);
  extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *);
  extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *,
                               struct zfcp_port *, struct scsi_device *, u8, u8);
+extern void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter,
+                                  struct zfcp_port *port,
+                                  struct scsi_device *sdev, u8 want, u8 need);
  extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *);
  extern void zfcp_dbf_rec_run_lvl(int level, char *tag,
                                  struct zfcp_erp_action *erp);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c

index 4d2ba5682493221bf32f0c4000021da54dc57044..22f9562f415cbb09a098a83318818c49217a8237 100644 (file)
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -4,7 +4,7 @@
   *
   * Interface to Linux SCSI midlayer.
   *
- * Copyright IBM Corp. 2002, 2017
+ * Copyright IBM Corp. 2002, 2018
   */
  
  #define KMSG_COMPONENT "zfcp"
@@ -618,9 +618,9 @@ static void zfcp_scsi_rport_register(struct zfcp_port *port)
         ids.port_id = port->d_id;
         ids.roles = FC_RPORT_ROLE_FCP_TARGET;
  
-       zfcp_dbf_rec_trig("scpaddy", port->adapter, port, NULL,
-                         ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD,
-                         ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD);
+       zfcp_dbf_rec_trig_lock("scpaddy", port->adapter, port, NULL,
+                              ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD,
+                              ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD);
         rport = fc_remote_port_add(port->adapter->scsi_host, 0, &ids);
         if (!rport) {
                 dev_err(&port->adapter->ccw_device->dev,
@@ -642,9 +642,9 @@ static void zfcp_scsi_rport_block(struct zfcp_port *port)
         struct fc_rport *rport = port->rport;
  
         if (rport) {
-               zfcp_dbf_rec_trig("scpdely", port->adapter, port, NULL,
-                                 ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL,
-                                 ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL);
+               zfcp_dbf_rec_trig_lock("scpdely", port->adapter, port, NULL,
+                                      ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL,
+                                      ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL);
                 fc_remote_port_delete(rport);
                 port->rport = NULL;
         }
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile

index e29f9b8fd66db1b21167fd7d15eaf0723b59b826..56c940394729e896b46e504a14e5d0eadb324f0a 100644 (file)
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -182,7 +182,7 @@ zalon7xx-objs       := zalon.o ncr53c8xx.o
  NCR_Q720_mod-objs      := NCR_Q720.o ncr53c8xx.o
  
  # Files generated that shall be removed upon make clean
-clean-files := 53c700_d.h 53c700_u.h
+clean-files := 53c700_d.h 53c700_u.h scsi_devinfo_tbl.c
  
  $(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h
  
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c

index 0156c9623c35d55012f8c77b24efd19b0b42d980..d62ddd63f4fe115d849f5aea3e16a649fa0531eb 100644 (file)
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -724,6 +724,8 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
         int wait;
         unsigned long flags = 0;
         unsigned long mflags = 0;
+       struct aac_hba_cmd_req *hbacmd = (struct aac_hba_cmd_req *)
+                       fibptr->hw_fib_va;
  
         fibptr->flags = (FIB_CONTEXT_FLAG | FIB_CONTEXT_FLAG_NATIVE_HBA);
         if (callback) {
@@ -734,11 +736,9 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
                 wait = 1;
  
  
-       if (command == HBA_IU_TYPE_SCSI_CMD_REQ) {
-               struct aac_hba_cmd_req *hbacmd =
-                       (struct aac_hba_cmd_req *)fibptr->hw_fib_va;
+       hbacmd->iu_type = command;
  
-               hbacmd->iu_type = command;
+       if (command == HBA_IU_TYPE_SCSI_CMD_REQ) {
                 /* bit1 of request_id must be 0 */
                 hbacmd->request_id =
                         cpu_to_le32((((u32)(fibptr - dev->fibs)) << 2) + 1);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c

index c198b96368dd69beba2dcffbe16efe7a3c74ae1d..5c40d809830f85916a7ed210798e54b2522fcda7 100644 (file)
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1894,7 +1894,7 @@ retry:
                 num = (rem_sz > scatter_elem_sz_prev) ?
                         scatter_elem_sz_prev : rem_sz;
  
-               schp->pages[k] = alloc_pages(gfp_mask, order);
+               schp->pages[k] = alloc_pages(gfp_mask | __GFP_ZERO, order);
                 if (!schp->pages[k])
                         goto out;
  
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c

index 2a21f2d4859229693381e955f141beff3fa64fea..35fab1e18adc3414935b182fe1774c911d733291 100644 (file)
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -188,9 +188,13 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
         struct scsi_device *SDev;
         struct scsi_sense_hdr sshdr;
         int result, err = 0, retries = 0;
+       unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE], *senseptr = NULL;
  
         SDev = cd->device;
  
+       if (cgc->sense)
+               senseptr = sense_buffer;
+
        retry:
         if (!scsi_block_when_processing_errors(SDev)) {
                 err = -ENODEV;
@@ -198,10 +202,12 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
         }
  
         result = scsi_execute(SDev, cgc->cmd, cgc->data_direction,
-                             cgc->buffer, cgc->buflen,
-                             (unsigned char *)cgc->sense, &sshdr,
+                             cgc->buffer, cgc->buflen, senseptr, &sshdr,
                               cgc->timeout, IOCTL_RETRIES, 0, 0, NULL);
  
+       if (cgc->sense)
+               memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense));
+
         /* Minimal error checking.  Ignore cases we know about, and report the rest. */
         if (driver_byte(result) != 0) {
                 switch (sshdr.sense_key) {
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c

index c374e3b5c678d215bfa9e7ed33e2d033e5d4bfb3..777e5f1e52d10968d5f23e0e316db05b8209511d 100644 (file)
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -609,7 +609,7 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
                         break;
  
                 case BTSTAT_ABORTQUEUE:
-                       cmd->result = (DID_ABORT << 16);
+                       cmd->result = (DID_BUS_BUSY << 16);
                         break;
  
                 case BTSTAT_SCSIPARITY:
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c

index 1596d35498c5a5567bc844c11c2fd1463867dde5..6573152ce8936e728cfb86a0210ce33eeb4e81d8 100644 (file)
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -490,7 +490,7 @@ static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi,
  
  static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi)
  {
-       if (!has_bspi(qspi) || (qspi->bspi_enabled))
+       if (!has_bspi(qspi))
                 return;
  
         qspi->bspi_enabled = 1;
@@ -505,7 +505,7 @@ static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi)
  
  static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi)
  {
-       if (!has_bspi(qspi) || (!qspi->bspi_enabled))
+       if (!has_bspi(qspi))
                 return;
  
         qspi->bspi_enabled = 0;
@@ -519,16 +519,19 @@ static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi)
  
  static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs)
  {
-       u32 data = 0;
+       u32 rd = 0;
+       u32 wr = 0;
  
-       if (qspi->curr_cs == cs)
-               return;
         if (qspi->base[CHIP_SELECT]) {
-               data = bcm_qspi_read(qspi, CHIP_SELECT, 0);
-               data = (data & ~0xff) | (1 << cs);
-               bcm_qspi_write(qspi, CHIP_SELECT, 0, data);
+               rd = bcm_qspi_read(qspi, CHIP_SELECT, 0);
+               wr = (rd & ~0xff) | (1 << cs);
+               if (rd == wr)
+                       return;
+               bcm_qspi_write(qspi, CHIP_SELECT, 0, wr);
                 usleep_range(10, 20);
         }
+
+       dev_dbg(&qspi->pdev->dev, "using cs:%d\n", cs);
         qspi->curr_cs = cs;
  }
  
@@ -755,8 +758,13 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
                         dev_dbg(&qspi->pdev->dev, "WR %04x\n", val);
                 }
                 mspi_cdram = MSPI_CDRAM_CONT_BIT;
-               mspi_cdram |= (~(1 << spi->chip_select) &
-                              MSPI_CDRAM_PCS);
+
+               if (has_bspi(qspi))
+                       mspi_cdram &= ~1;
+               else
+                       mspi_cdram |= (~(1 << spi->chip_select) &
+                                      MSPI_CDRAM_PCS);
+
                 mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 :
                                 MSPI_CDRAM_BITSE_BIT);
  
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c

index 1431cb98fe403a4b7039195bf172bf47bceb6702..3094d818cf06d4751122611bc2eb807e71d965fd 100644 (file)
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -184,6 +184,11 @@ static irqreturn_t bcm2835aux_spi_interrupt(int irq, void *dev_id)
         struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
         irqreturn_t ret = IRQ_NONE;
  
+       /* IRQ may be shared, so return if our interrupts are disabled */
+       if (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_CNTL1) &
+             (BCM2835_AUX_SPI_CNTL1_TXEMPTY | BCM2835_AUX_SPI_CNTL1_IDLE)))
+               return ret;
+
         /* check if we have data to read */
         while (bs->rx_len &&
                (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT) &
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c

index 5c9516ae4942e5cf8b2ef381d2ecd496803cbf14..4a001634023e09b8e83b8e6b82b5af557e2c0853 100644 (file)
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -313,6 +313,14 @@ static void cdns_spi_fill_tx_fifo(struct cdns_spi *xspi)
  
         while ((trans_cnt < CDNS_SPI_FIFO_DEPTH) &&
                (xspi->tx_bytes > 0)) {
+
+               /* When xspi in busy condition, bytes may send failed,
+                * then spi control did't work thoroughly, add one byte delay
+                */
+               if (cdns_spi_read(xspi, CDNS_SPI_ISR) &
+                   CDNS_SPI_IXR_TXFULL)
+                       usleep_range(10, 20);
+
                 if (xspi->txbuf)
                         cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
                 else
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c

index 6f57592a7f95ad0980362dd69840d72846cbad77..a056ee88a960bf8e0cb33a871ef573d0d81e417e 100644 (file)
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -1701,7 +1701,7 @@ static struct platform_driver spi_imx_driver = {
  };
  module_platform_driver(spi_imx_driver);
  
-MODULE_DESCRIPTION("SPI Master Controller driver");
+MODULE_DESCRIPTION("SPI Controller driver");
  MODULE_AUTHOR("Sascha Hauer, Pengutronix");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h

index 513ec6c6e25b310b62d0024cd6c71a23c4890b5e..0ae7defd3492001dd096d8772bfde53db63226e7 100644 (file)
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -38,7 +38,7 @@ struct driver_data {
  
         /* SSP register addresses */
         void __iomem *ioaddr;
-       u32 ssdr_physical;
+       phys_addr_t ssdr_physical;
  
         /* SSP masks*/
         u32 dma_cr1;
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c

index ae086aab57d51edc2c8014160f4abeab365ea9cc..8171eedbfc90033b5bf942667cfcfe5214453d4e 100644 (file)
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -283,6 +283,7 @@ static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
         }
  
         k = min_t(int, k, ARRAY_SIZE(sh_msiof_spi_div_table) - 1);
+       brps = min_t(int, brps, 32);
  
         scr = sh_msiof_spi_div_table[k].brdv | SCR_BRPS(brps);
         sh_msiof_write(p, TSCR, scr);
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig

index 9371651d801776702dd34cde1d04d3501f1c54ef..c574dd210500ad836a17cf89d7f2d70a5dc0effb 100644 (file)
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -117,7 +117,7 @@ config SSB_SERIAL
  
  config SSB_DRIVER_PCICORE_POSSIBLE
         bool
-       depends on SSB_PCIHOST && SSB = y
+       depends on SSB_PCIHOST
         default y
  
  config SSB_DRIVER_PCICORE
@@ -131,7 +131,7 @@ config SSB_DRIVER_PCICORE
  
  config SSB_PCICORE_HOSTMODE
         bool "Hostmode support for SSB PCI core"
-       depends on SSB_DRIVER_PCICORE && SSB_DRIVER_MIPS
+       depends on SSB_DRIVER_PCICORE && SSB_DRIVER_MIPS && SSB = y
         help
           PCIcore hostmode operation (external PCI bus).
  
diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig

index ad049e6f24e47a73b6894340be2742d95b21f692..f3b1ad4bd3dc77b41c1e6725470639251ba4829f 100644 (file)
--- a/drivers/staging/lustre/lnet/Kconfig
+++ b/drivers/staging/lustre/lnet/Kconfig
@@ -34,7 +34,7 @@ config LNET_SELFTEST
  
  config LNET_XPRT_IB
         tristate "LNET infiniband support"
-       depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on LNET && PCI && INFINIBAND_ADDR_TRANS
         default LNET && INFINIBAND
         help
           This option allows the LNET users to use infiniband as an
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c

index 4ad89ea71a70118dad2e5d960f89f54431a1ad67..4f26bdc3d1dc5dcc98f1942bc9ab34ee814029f1 100644 (file)
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -2121,6 +2121,8 @@ static ssize_t tcmu_qfull_time_out_store(struct config_item *item,
  
         if (val >= 0) {
                 udev->qfull_time_out = val * MSEC_PER_SEC;
+       } else if (val == -1) {
+               udev->qfull_time_out = val;
         } else {
                 printk(KERN_ERR "Invalid qfull timeout value %d\n", val);
                 return -EINVAL;
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c

index 0124a91c8d7139cd6840cfffeea64adc1cd1825c..dd46b758852aa9ba2866348e6f973da3447b3623 100644 (file)
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -238,6 +238,17 @@ static int params_from_user(struct tee_context *ctx, struct tee_param *params,
                         if (IS_ERR(shm))
                                 return PTR_ERR(shm);
  
+                       /*
+                        * Ensure offset + size does not overflow offset
+                        * and does not overflow the size of the referred
+                        * shared memory object.
+                        */
+                       if ((ip.a + ip.b) < ip.a ||
+                           (ip.a + ip.b) > shm->size) {
+                               tee_shm_put(shm);
+                               return -EINVAL;
+                       }
+
                         params[n].u.memref.shm_offs = ip.a;
                         params[n].u.memref.size = ip.b;
                         params[n].u.memref.shm = shm;
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c

index 556960a1bab3b4631b10a2803ab54167b06c376b..07d3be6f0780db209ac2be07354ac390c31d6be8 100644 (file)
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -360,9 +360,10 @@ int tee_shm_get_fd(struct tee_shm *shm)
         if (!(shm->flags & TEE_SHM_DMA_BUF))
                 return -EINVAL;
  
+       get_dma_buf(shm->dmabuf);
         fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC);
-       if (fd >= 0)
-               get_dma_buf(shm->dmabuf);
+       if (fd < 0)
+               dma_buf_put(shm->dmabuf);
         return fd;
  }
  
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c

index 72ebbc908e19f7ea9701ca0e831ee703874d1908..32cd52ca8318b6bcb6a9e12f7dbc5c0b18185672 100644 (file)
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -354,7 +354,7 @@ int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci,
  
         slot_id = 0;
         for (i = 0; i < MAX_HC_SLOTS; i++) {
-               if (!xhci->devs[i])
+               if (!xhci->devs[i] || !xhci->devs[i]->udev)
                         continue;
                 speed = xhci->devs[i]->udev->speed;
                 if (((speed >= USB_SPEED_SUPER) == (hcd->speed >= HCD_USB3))
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c

index e7f99d55922af9c82a19722f83794905879d24fc..15a42cee0a9c27ad930fd28ae637fb6d1d9262f9 100644 (file)
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2524,8 +2524,11 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
  {
         struct musb     *musb = hcd_to_musb(hcd);
         u8              devctl;
+       int             ret;
  
-       musb_port_suspend(musb, true);
+       ret = musb_port_suspend(musb, true);
+       if (ret)
+               return ret;
  
         if (!is_host_active(musb))
                 return 0;
diff --git a/drivers/usb/musb/musb_host.h b/drivers/usb/musb/musb_host.h

index 72392bbcd0a4b72bfead8dc2c26c7d40ea0947ee..2999845632cefb2fafa17d7da8622c06772dcda5 100644 (file)
--- a/drivers/usb/musb/musb_host.h
+++ b/drivers/usb/musb/musb_host.h
@@ -67,7 +67,7 @@ extern void musb_host_rx(struct musb *, u8);
  extern void musb_root_disconnect(struct musb *musb);
  extern void musb_host_resume_root_hub(struct musb *musb);
  extern void musb_host_poke_root_hub(struct musb *musb);
-extern void musb_port_suspend(struct musb *musb, bool do_suspend);
+extern int musb_port_suspend(struct musb *musb, bool do_suspend);
  extern void musb_port_reset(struct musb *musb, bool do_reset);
  extern void musb_host_finish_resume(struct work_struct *work);
  #else
@@ -99,7 +99,10 @@ static inline void musb_root_disconnect(struct musb *musb)   {}
  static inline void musb_host_resume_root_hub(struct musb *musb)        {}
  static inline void musb_host_poll_rh_status(struct musb *musb) {}
  static inline void musb_host_poke_root_hub(struct musb *musb)  {}
-static inline void musb_port_suspend(struct musb *musb, bool do_suspend) {}
+static inline int musb_port_suspend(struct musb *musb, bool do_suspend)
+{
+       return 0;
+}
  static inline void musb_port_reset(struct musb *musb, bool do_reset) {}
  static inline void musb_host_finish_resume(struct work_struct *work) {}
  #endif
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c

index 5165d2b07ade01985d2e9b104552b33d92850f47..2f8dd9826e9481a99e28f16a65be89e1e13a8955 100644 (file)
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -48,14 +48,14 @@ void musb_host_finish_resume(struct work_struct *work)
         spin_unlock_irqrestore(&musb->lock, flags);
  }
  
-void musb_port_suspend(struct musb *musb, bool do_suspend)
+int musb_port_suspend(struct musb *musb, bool do_suspend)
  {
         struct usb_otg  *otg = musb->xceiv->otg;
         u8              power;
         void __iomem    *mbase = musb->mregs;
  
         if (!is_host_active(musb))
-               return;
+               return 0;
  
         /* NOTE:  this doesn't necessarily put PHY into low power mode,
          * turning off its clock; that's a function of PHY integration and
@@ -66,16 +66,20 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
         if (do_suspend) {
                 int retries = 10000;
  
-               power &= ~MUSB_POWER_RESUME;
-               power |= MUSB_POWER_SUSPENDM;
-               musb_writeb(mbase, MUSB_POWER, power);
+               if (power & MUSB_POWER_RESUME)
+                       return -EBUSY;
  
-               /* Needed for OPT A tests */
-               power = musb_readb(mbase, MUSB_POWER);
-               while (power & MUSB_POWER_SUSPENDM) {
+               if (!(power & MUSB_POWER_SUSPENDM)) {
+                       power |= MUSB_POWER_SUSPENDM;
+                       musb_writeb(mbase, MUSB_POWER, power);
+
+                       /* Needed for OPT A tests */
                         power = musb_readb(mbase, MUSB_POWER);
-                       if (retries-- < 1)
-                               break;
+                       while (power & MUSB_POWER_SUSPENDM) {
+                               power = musb_readb(mbase, MUSB_POWER);
+                               if (retries-- < 1)
+                                       break;
+                       }
                 }
  
                 musb_dbg(musb, "Root port suspended, power %02x", power);
@@ -111,6 +115,7 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
                 schedule_delayed_work(&musb->finish_resume_work,
                                       msecs_to_jiffies(USB_RESUME_TIMEOUT));
         }
+       return 0;
  }
  
  void musb_port_reset(struct musb *musb, bool do_reset)
diff --git a/drivers/usb/usbip/stub.h b/drivers/usb/usbip/stub.h

index 14a72357800ac2b10d067da9b36b7180ba5f70df..35618ceb279134bc02c6d8ff83671f55b25c82a8 100644 (file)
--- a/drivers/usb/usbip/stub.h
+++ b/drivers/usb/usbip/stub.h
@@ -73,6 +73,7 @@ struct bus_id_priv {
         struct stub_device *sdev;
         struct usb_device *udev;
         char shutdown_busid;
+       spinlock_t busid_lock;
  };
  
  /* stub_priv is allocated from stub_priv_cache */
@@ -83,6 +84,7 @@ extern struct usb_device_driver stub_driver;
  
  /* stub_main.c */
  struct bus_id_priv *get_busid_priv(const char *busid);
+void put_busid_priv(struct bus_id_priv *bid);
  int del_match_busid(char *busid);
  void stub_device_cleanup_urbs(struct stub_device *sdev);
  
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c

index dd8ef36ab10ec7d612bdb2358017f142a43906d8..c0d6ff1baa721754d42d1cae3b076685dcd86fe2 100644 (file)
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -300,9 +300,9 @@ static int stub_probe(struct usb_device *udev)
         struct stub_device *sdev = NULL;
         const char *udev_busid = dev_name(&udev->dev);
         struct bus_id_priv *busid_priv;
-       int rc;
+       int rc = 0;
  
-       dev_dbg(&udev->dev, "Enter\n");
+       dev_dbg(&udev->dev, "Enter probe\n");
  
         /* check we should claim or not by busid_table */
         busid_priv = get_busid_priv(udev_busid);
@@ -317,13 +317,15 @@ static int stub_probe(struct usb_device *udev)
                  * other matched drivers by the driver core.
                  * See driver_probe_device() in driver/base/dd.c
                  */
-               return -ENODEV;
+               rc = -ENODEV;
+               goto call_put_busid_priv;
         }
  
         if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) {
                 dev_dbg(&udev->dev, "%s is a usb hub device... skip!\n",
                          udev_busid);
-               return -ENODEV;
+               rc = -ENODEV;
+               goto call_put_busid_priv;
         }
  
         if (!strcmp(udev->bus->bus_name, "vhci_hcd")) {
@@ -331,13 +333,16 @@ static int stub_probe(struct usb_device *udev)
                         "%s is attached on vhci_hcd... skip!\n",
                         udev_busid);
  
-               return -ENODEV;
+               rc = -ENODEV;
+               goto call_put_busid_priv;
         }
  
         /* ok, this is my device */
         sdev = stub_device_alloc(udev);
-       if (!sdev)
-               return -ENOMEM;
+       if (!sdev) {
+               rc = -ENOMEM;
+               goto call_put_busid_priv;
+       }
  
         dev_info(&udev->dev,
                 "usbip-host: register new device (bus %u dev %u)\n",
@@ -369,7 +374,9 @@ static int stub_probe(struct usb_device *udev)
         }
         busid_priv->status = STUB_BUSID_ALLOC;
  
-       return 0;
+       rc = 0;
+       goto call_put_busid_priv;
+
  err_files:
         usb_hub_release_port(udev->parent, udev->portnum,
                              (struct usb_dev_state *) udev);
@@ -379,6 +386,9 @@ err_port:
  
         busid_priv->sdev = NULL;
         stub_device_free(sdev);
+
+call_put_busid_priv:
+       put_busid_priv(busid_priv);
         return rc;
  }
  
@@ -404,7 +414,7 @@ static void stub_disconnect(struct usb_device *udev)
         struct bus_id_priv *busid_priv;
         int rc;
  
-       dev_dbg(&udev->dev, "Enter\n");
+       dev_dbg(&udev->dev, "Enter disconnect\n");
  
         busid_priv = get_busid_priv(udev_busid);
         if (!busid_priv) {
@@ -417,7 +427,7 @@ static void stub_disconnect(struct usb_device *udev)
         /* get stub_device */
         if (!sdev) {
                 dev_err(&udev->dev, "could not get device");
-               return;
+               goto call_put_busid_priv;
         }
  
         dev_set_drvdata(&udev->dev, NULL);
@@ -432,12 +442,12 @@ static void stub_disconnect(struct usb_device *udev)
                                   (struct usb_dev_state *) udev);
         if (rc) {
                 dev_dbg(&udev->dev, "unable to release port\n");
-               return;
+               goto call_put_busid_priv;
         }
  
         /* If usb reset is called from event handler */
         if (usbip_in_eh(current))
-               return;
+               goto call_put_busid_priv;
  
         /* shutdown the current connection */
         shutdown_busid(busid_priv);
@@ -448,12 +458,11 @@ static void stub_disconnect(struct usb_device *udev)
         busid_priv->sdev = NULL;
         stub_device_free(sdev);
  
-       if (busid_priv->status == STUB_BUSID_ALLOC) {
+       if (busid_priv->status == STUB_BUSID_ALLOC)
                 busid_priv->status = STUB_BUSID_ADDED;
-       } else {
-               busid_priv->status = STUB_BUSID_OTHER;
-               del_match_busid((char *)udev_busid);
-       }
+
+call_put_busid_priv:
+       put_busid_priv(busid_priv);
  }
  
  #ifdef CONFIG_PM
diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c

index d41d0cdeec0f2a1861d8d42385a9719789cab558..bf8a5feb0ee937a35ccd7a478e7c3dd01770b4e1 100644 (file)
--- a/drivers/usb/usbip/stub_main.c
+++ b/drivers/usb/usbip/stub_main.c
@@ -14,6 +14,7 @@
  #define DRIVER_DESC "USB/IP Host Driver"
  
  struct kmem_cache *stub_priv_cache;
+
  /*
   * busid_tables defines matching busids that usbip can grab. A user can change
   * dynamically what device is locally used and what device is exported to a
@@ -25,6 +26,8 @@ static spinlock_t busid_table_lock;
  
  static void init_busid_table(void)
  {
+       int i;
+
         /*
          * This also sets the bus_table[i].status to
          * STUB_BUSID_OTHER, which is 0.
@@ -32,6 +35,9 @@ static void init_busid_table(void)
         memset(busid_table, 0, sizeof(busid_table));
  
         spin_lock_init(&busid_table_lock);
+
+       for (i = 0; i < MAX_BUSID; i++)
+               spin_lock_init(&busid_table[i].busid_lock);
  }
  
  /*
@@ -43,15 +49,20 @@ static int get_busid_idx(const char *busid)
         int i;
         int idx = -1;
  
-       for (i = 0; i < MAX_BUSID; i++)
+       for (i = 0; i < MAX_BUSID; i++) {
+               spin_lock(&busid_table[i].busid_lock);
                 if (busid_table[i].name[0])
                         if (!strncmp(busid_table[i].name, busid, BUSID_SIZE)) {
                                 idx = i;
+                               spin_unlock(&busid_table[i].busid_lock);
                                 break;
                         }
+               spin_unlock(&busid_table[i].busid_lock);
+       }
         return idx;
  }
  
+/* Returns holding busid_lock. Should call put_busid_priv() to unlock */
  struct bus_id_priv *get_busid_priv(const char *busid)
  {
         int idx;
@@ -59,13 +70,22 @@ struct bus_id_priv *get_busid_priv(const char *busid)
  
         spin_lock(&busid_table_lock);
         idx = get_busid_idx(busid);
-       if (idx >= 0)
+       if (idx >= 0) {
                 bid = &(busid_table[idx]);
+               /* get busid_lock before returning */
+               spin_lock(&bid->busid_lock);
+       }
         spin_unlock(&busid_table_lock);
  
         return bid;
  }
  
+void put_busid_priv(struct bus_id_priv *bid)
+{
+       if (bid)
+               spin_unlock(&bid->busid_lock);
+}
+
  static int add_match_busid(char *busid)
  {
         int i;
@@ -78,15 +98,19 @@ static int add_match_busid(char *busid)
                 goto out;
         }
  
-       for (i = 0; i < MAX_BUSID; i++)
+       for (i = 0; i < MAX_BUSID; i++) {
+               spin_lock(&busid_table[i].busid_lock);
                 if (!busid_table[i].name[0]) {
                         strlcpy(busid_table[i].name, busid, BUSID_SIZE);
                         if ((busid_table[i].status != STUB_BUSID_ALLOC) &&
                             (busid_table[i].status != STUB_BUSID_REMOV))
                                 busid_table[i].status = STUB_BUSID_ADDED;
                         ret = 0;
+                       spin_unlock(&busid_table[i].busid_lock);
                         break;
                 }
+               spin_unlock(&busid_table[i].busid_lock);
+       }
  
  out:
         spin_unlock(&busid_table_lock);
@@ -107,6 +131,8 @@ int del_match_busid(char *busid)
         /* found */
         ret = 0;
  
+       spin_lock(&busid_table[idx].busid_lock);
+
         if (busid_table[idx].status == STUB_BUSID_OTHER)
                 memset(busid_table[idx].name, 0, BUSID_SIZE);
  
@@ -114,6 +140,7 @@ int del_match_busid(char *busid)
             (busid_table[idx].status != STUB_BUSID_ADDED))
                 busid_table[idx].status = STUB_BUSID_REMOV;
  
+       spin_unlock(&busid_table[idx].busid_lock);
  out:
         spin_unlock(&busid_table_lock);
  
@@ -126,9 +153,12 @@ static ssize_t match_busid_show(struct device_driver *drv, char *buf)
         char *out = buf;
  
         spin_lock(&busid_table_lock);
-       for (i = 0; i < MAX_BUSID; i++)
+       for (i = 0; i < MAX_BUSID; i++) {
+               spin_lock(&busid_table[i].busid_lock);
                 if (busid_table[i].name[0])
                         out += sprintf(out, "%s ", busid_table[i].name);
+               spin_unlock(&busid_table[i].busid_lock);
+       }
         spin_unlock(&busid_table_lock);
         out += sprintf(out, "\n");
  
@@ -169,6 +199,51 @@ static ssize_t match_busid_store(struct device_driver *dev, const char *buf,
  }
  static DRIVER_ATTR_RW(match_busid);
  
+static int do_rebind(char *busid, struct bus_id_priv *busid_priv)
+{
+       int ret;
+
+       /* device_attach() callers should hold parent lock for USB */
+       if (busid_priv->udev->dev.parent)
+               device_lock(busid_priv->udev->dev.parent);
+       ret = device_attach(&busid_priv->udev->dev);
+       if (busid_priv->udev->dev.parent)
+               device_unlock(busid_priv->udev->dev.parent);
+       if (ret < 0) {
+               dev_err(&busid_priv->udev->dev, "rebind failed\n");
+               return ret;
+       }
+       return 0;
+}
+
+static void stub_device_rebind(void)
+{
+#if IS_MODULE(CONFIG_USBIP_HOST)
+       struct bus_id_priv *busid_priv;
+       int i;
+
+       /* update status to STUB_BUSID_OTHER so probe ignores the device */
+       spin_lock(&busid_table_lock);
+       for (i = 0; i < MAX_BUSID; i++) {
+               if (busid_table[i].name[0] &&
+                   busid_table[i].shutdown_busid) {
+                       busid_priv = &(busid_table[i]);
+                       busid_priv->status = STUB_BUSID_OTHER;
+               }
+       }
+       spin_unlock(&busid_table_lock);
+
+       /* now run rebind - no need to hold locks. driver files are removed */
+       for (i = 0; i < MAX_BUSID; i++) {
+               if (busid_table[i].name[0] &&
+                   busid_table[i].shutdown_busid) {
+                       busid_priv = &(busid_table[i]);
+                       do_rebind(busid_table[i].name, busid_priv);
+               }
+       }
+#endif
+}
+
  static ssize_t rebind_store(struct device_driver *dev, const char *buf,
                                  size_t count)
  {
@@ -186,16 +261,17 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf,
         if (!bid)
                 return -ENODEV;
  
-       /* device_attach() callers should hold parent lock for USB */
-       if (bid->udev->dev.parent)
-               device_lock(bid->udev->dev.parent);
-       ret = device_attach(&bid->udev->dev);
-       if (bid->udev->dev.parent)
-               device_unlock(bid->udev->dev.parent);
-       if (ret < 0) {
-               dev_err(&bid->udev->dev, "rebind failed\n");
+       /* mark the device for deletion so probe ignores it during rescan */
+       bid->status = STUB_BUSID_OTHER;
+       /* release the busid lock */
+       put_busid_priv(bid);
+
+       ret = do_rebind((char *) buf, bid);
+       if (ret < 0)
                 return ret;
-       }
+
+       /* delete device from busid_table */
+       del_match_busid((char *) buf);
  
         return count;
  }
@@ -317,6 +393,9 @@ static void __exit usbip_host_exit(void)
          */
         usb_deregister_device_driver(&stub_driver);
  
+       /* initiate scan to attach devices */
+       stub_device_rebind();
+
         kmem_cache_destroy(stub_priv_cache);
  }
  
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c

index f3bd8e9412245919ba3a73fb9d94677c280d1427..f0be5f35ab28f2a009a0e56d75bbb0c46fbcb4d9 100644 (file)
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -981,6 +981,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
  {
         int ret = 0;
  
+       mutex_lock(&dev->mutex);
         vhost_dev_lock_vqs(dev);
         switch (msg->type) {
         case VHOST_IOTLB_UPDATE:
@@ -1016,6 +1017,8 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
         }
  
         vhost_dev_unlock_vqs(dev);
+       mutex_unlock(&dev->mutex);
+
         return ret;
  }
  ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c

index e1c60899fdbc8de0b530078bb310f28bea33dd2c..a6f9ba85dc4ba8df4dd9519b317664b9e2ece94f 100644 (file)
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -351,7 +351,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
          * physical address */
         phys = xen_bus_to_phys(dev_addr);
  
-       if (((dev_addr + size - 1 > dma_mask)) ||
+       if (((dev_addr + size - 1 <= dma_mask)) ||
             range_straddles_page_boundary(phys, size))
                 xen_destroy_contiguous_region(phys, order);
  
diff --git a/fs/Kconfig b/fs/Kconfig

index bc821a86d965196e0952f452f23f2fa4bda8ee20..1e050e012eb97088146e9f18a0664385fc94fab2 100644 (file)
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -38,6 +38,7 @@ config FS_DAX
         bool "Direct Access (DAX) support"
         depends on MMU
         depends on !(ARM || MIPS || SPARC)
+       select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
         select FS_IOMAP
         select DAX
         help
diff --git a/fs/affs/namei.c b/fs/affs/namei.c

index d8aa0ae3d037c8b91ab13f7d3f6d209bd2326797..41c5749f4db78078b2e1d5b65c31aa0b2c7724c1 100644 (file)
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -201,14 +201,16 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
         struct super_block *sb = dir->i_sb;
         struct buffer_head *bh;
         struct inode *inode = NULL;
+       struct dentry *res;
  
         pr_debug("%s(\"%pd\")\n", __func__, dentry);
  
         affs_lock_dir(dir);
         bh = affs_find_entry(dir, dentry);
-       affs_unlock_dir(dir);
-       if (IS_ERR(bh))
+       if (IS_ERR(bh)) {
+               affs_unlock_dir(dir);
                 return ERR_CAST(bh);
+       }
         if (bh) {
                 u32 ino = bh->b_blocknr;
  
@@ -222,11 +224,12 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
                 }
                 affs_brelse(bh);
                 inode = affs_iget(sb, ino);
-               if (IS_ERR(inode))
-                       return ERR_CAST(inode);
         }
-       d_add(dentry, inode);
-       return NULL;
+       res = d_splice_alias(inode, dentry);
+       if (!IS_ERR_OR_NULL(res))
+               res->d_fsdata = dentry->d_fsdata;
+       affs_unlock_dir(dir);
+       return res;
  }
  
  int
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c

index 3bedfed608a22eb77b1addd59f7a263f1bd722b8..7587fb665ff189b088b1a9bb9e8c1b7c7005c083 100644 (file)
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -121,7 +121,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
         p = text;
         do {
                 struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
-               char tdelim = delim;
+               const char *q, *stop;
  
                 if (*p == delim) {
                         p++;
@@ -130,28 +130,33 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
  
                 if (*p == '[') {
                         p++;
-                       tdelim = ']';
+                       q = memchr(p, ']', end - p);
+               } else {
+                       for (q = p; q < end; q++)
+                               if (*q == '+' || *q == delim)
+                                       break;
                 }
  
-               if (in4_pton(p, end - p,
+               if (in4_pton(p, q - p,
                              (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
-                            tdelim, &p)) {
+                            -1, &stop)) {
                         srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
                         srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
                         srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-               } else if (in6_pton(p, end - p,
+               } else if (in6_pton(p, q - p,
                                     srx->transport.sin6.sin6_addr.s6_addr,
-                                   tdelim, &p)) {
+                                   -1, &stop)) {
                         /* Nothing to do */
                 } else {
                         goto bad_address;
                 }
  
-               if (tdelim == ']') {
-                       if (p == end || *p != ']')
-                               goto bad_address;
+               if (stop != q)
+                       goto bad_address;
+
+               p = q;
+               if (q < end && *q == ']')
                         p++;
-               }
  
                 if (p < end) {
                         if (*p == '+') {
diff --git a/fs/afs/callback.c b/fs/afs/callback.c

index abd9a84f4e88a6dbded5eb02aa75e46a239a4c56..571437dcb252842578b92a6f3b5b60a574703b94 100644 (file)
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -23,36 +23,55 @@
  /*
   * Set up an interest-in-callbacks record for a volume on a server and
   * register it with the server.
- * - Called with volume->server_sem held.
+ * - Called with vnode->io_lock held.
   */
  int afs_register_server_cb_interest(struct afs_vnode *vnode,
-                                   struct afs_server_entry *entry)
+                                   struct afs_server_list *slist,
+                                   unsigned int index)
  {
-       struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x;
+       struct afs_server_entry *entry = &slist->servers[index];
+       struct afs_cb_interest *cbi, *vcbi, *new, *old;
         struct afs_server *server = entry->server;
  
  again:
+       if (vnode->cb_interest &&
+           likely(vnode->cb_interest == entry->cb_interest))
+               return 0;
+
+       read_lock(&slist->lock);
+       cbi = afs_get_cb_interest(entry->cb_interest);
+       read_unlock(&slist->lock);
+
         vcbi = vnode->cb_interest;
         if (vcbi) {
-               if (vcbi == cbi)
+               if (vcbi == cbi) {
+                       afs_put_cb_interest(afs_v2net(vnode), cbi);
                         return 0;
+               }
  
+               /* Use a new interest in the server list for the same server
+                * rather than an old one that's still attached to a vnode.
+                */
                 if (cbi && vcbi->server == cbi->server) {
                         write_seqlock(&vnode->cb_lock);
-                       vnode->cb_interest = afs_get_cb_interest(cbi);
+                       old = vnode->cb_interest;
+                       vnode->cb_interest = cbi;
                         write_sequnlock(&vnode->cb_lock);
-                       afs_put_cb_interest(afs_v2net(vnode), cbi);
+                       afs_put_cb_interest(afs_v2net(vnode), old);
                         return 0;
                 }
  
+               /* Re-use the one attached to the vnode. */
                 if (!cbi && vcbi->server == server) {
-                       afs_get_cb_interest(vcbi);
-                       x = cmpxchg(&entry->cb_interest, cbi, vcbi);
-                       if (x != cbi) {
-                               cbi = x;
-                               afs_put_cb_interest(afs_v2net(vnode), vcbi);
+                       write_lock(&slist->lock);
+                       if (entry->cb_interest) {
+                               write_unlock(&slist->lock);
+                               afs_put_cb_interest(afs_v2net(vnode), cbi);
                                 goto again;
                         }
+
+                       entry->cb_interest = cbi;
+                       write_unlock(&slist->lock);
                         return 0;
                 }
         }
@@ -72,13 +91,16 @@ again:
                 list_add_tail(&new->cb_link, &server->cb_interests);
                 write_unlock(&server->cb_break_lock);
  
-               x = cmpxchg(&entry->cb_interest, cbi, new);
-               if (x == cbi) {
+               write_lock(&slist->lock);
+               if (!entry->cb_interest) {
+                       entry->cb_interest = afs_get_cb_interest(new);
                         cbi = new;
+                       new = NULL;
                 } else {
-                       cbi = x;
-                       afs_put_cb_interest(afs_v2net(vnode), new);
+                       cbi = afs_get_cb_interest(entry->cb_interest);
                 }
+               write_unlock(&slist->lock);
+               afs_put_cb_interest(afs_v2net(vnode), new);
         }
  
         ASSERT(cbi);
@@ -88,11 +110,14 @@ again:
          */
         write_seqlock(&vnode->cb_lock);
  
-       vnode->cb_interest = afs_get_cb_interest(cbi);
+       old = vnode->cb_interest;
+       vnode->cb_interest = cbi;
         vnode->cb_s_break = cbi->server->cb_s_break;
+       vnode->cb_v_break = vnode->volume->cb_v_break;
         clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
  
         write_sequnlock(&vnode->cb_lock);
+       afs_put_cb_interest(afs_v2net(vnode), old);
         return 0;
  }
  
@@ -171,13 +196,24 @@ static void afs_break_one_callback(struct afs_server *server,
                 if (cbi->vid != fid->vid)
                         continue;
  
-               data.volume = NULL;
-               data.fid = *fid;
-               inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data);
-               if (inode) {
-                       vnode = AFS_FS_I(inode);
-                       afs_break_callback(vnode);
-                       iput(inode);
+               if (fid->vnode == 0 && fid->unique == 0) {
+                       /* The callback break applies to an entire volume. */
+                       struct afs_super_info *as = AFS_FS_S(cbi->sb);
+                       struct afs_volume *volume = as->volume;
+
+                       write_lock(&volume->cb_break_lock);
+                       volume->cb_v_break++;
+                       write_unlock(&volume->cb_break_lock);
+               } else {
+                       data.volume = NULL;
+                       data.fid = *fid;
+                       inode = ilookup5_nowait(cbi->sb, fid->vnode,
+                                               afs_iget5_test, &data);
+                       if (inode) {
+                               vnode = AFS_FS_I(inode);
+                               afs_break_callback(vnode);
+                               iput(inode);
+                       }
                 }
         }
  
@@ -195,6 +231,8 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
         ASSERT(server != NULL);
         ASSERTCMP(count, <=, AFSCBMAX);
  
+       /* TODO: Sort the callback break list by volume ID */
+
         for (; count > 0; callbacks++, count--) {
                 _debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
                        callbacks->fid.vid,
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c

index 357de908df3ab258c6b0d2d3fbf8cdf94e7b1575..c332c95a6940f50fe3a9e220ebbb6928b0f5d27c 100644 (file)
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -133,21 +133,10 @@ bool afs_cm_incoming_call(struct afs_call *call)
  }
  
  /*
- * clean up a cache manager call
+ * Clean up a cache manager call.
   */
  static void afs_cm_destructor(struct afs_call *call)
  {
-       _enter("");
-
-       /* Break the callbacks here so that we do it after the final ACK is
-        * received.  The step number here must match the final number in
-        * afs_deliver_cb_callback().
-        */
-       if (call->unmarshall == 5) {
-               ASSERT(call->cm_server && call->count && call->request);
-               afs_break_callbacks(call->cm_server, call->count, call->request);
-       }
-
         kfree(call->buffer);
         call->buffer = NULL;
  }
@@ -161,14 +150,14 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
  
         _enter("");
  
-       /* be sure to send the reply *before* attempting to spam the AFS server
-        * with FSFetchStatus requests on the vnodes with broken callbacks lest
-        * the AFS server get into a vicious cycle of trying to break further
-        * callbacks because it hadn't received completion of the CBCallBack op
-        * yet */
-       afs_send_empty_reply(call);
+       /* We need to break the callbacks before sending the reply as the
+        * server holds up change visibility till it receives our reply so as
+        * to maintain cache coherency.
+        */
+       if (call->cm_server)
+               afs_break_callbacks(call->cm_server, call->count, call->request);
  
-       afs_break_callbacks(call->cm_server, call->count, call->request);
+       afs_send_empty_reply(call);
         afs_put_call(call);
         _leave("");
  }
@@ -180,7 +169,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
  {
         struct afs_callback_break *cb;
         struct sockaddr_rxrpc srx;
-       struct afs_server *server;
         __be32 *bp;
         int ret, loop;
  
@@ -267,15 +255,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
  
                 call->offset = 0;
                 call->unmarshall++;
-
-               /* Record that the message was unmarshalled successfully so
-                * that the call destructor can know do the callback breaking
-                * work, even if the final ACK isn't received.
-                *
-                * If the step number changes, then afs_cm_destructor() must be
-                * updated also.
-                */
-               call->unmarshall++;
         case 5:
                 break;
         }
@@ -286,10 +265,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
         /* we'll need the file server record as that tells us which set of
          * vnodes to operate upon */
         rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-       server = afs_find_server(call->net, &srx);
-       if (!server)
-               return -ENOTCONN;
-       call->cm_server = server;
+       call->cm_server = afs_find_server(call->net, &srx);
+       if (!call->cm_server)
+               trace_afs_cm_no_server(call, &srx);
  
         return afs_queue_call_work(call);
  }
@@ -303,7 +281,8 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
  
         _enter("{%p}", call->cm_server);
  
-       afs_init_callback_state(call->cm_server);
+       if (call->cm_server)
+               afs_init_callback_state(call->cm_server);
         afs_send_empty_reply(call);
         afs_put_call(call);
         _leave("");
@@ -315,7 +294,6 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
  static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
  {
         struct sockaddr_rxrpc srx;
-       struct afs_server *server;
         int ret;
  
         _enter("");
@@ -328,10 +306,9 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
  
         /* we'll need the file server record as that tells us which set of
          * vnodes to operate upon */
-       server = afs_find_server(call->net, &srx);
-       if (!server)
-               return -ENOTCONN;
-       call->cm_server = server;
+       call->cm_server = afs_find_server(call->net, &srx);
+       if (!call->cm_server)
+               trace_afs_cm_no_server(call, &srx);
  
         return afs_queue_call_work(call);
  }
@@ -341,8 +318,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
   */
  static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
  {
-       struct sockaddr_rxrpc srx;
-       struct afs_server *server;
         struct afs_uuid *r;
         unsigned loop;
         __be32 *b;
@@ -398,11 +373,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
  
         /* we'll need the file server record as that tells us which set of
          * vnodes to operate upon */
-       rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-       server = afs_find_server(call->net, &srx);
-       if (!server)
-               return -ENOTCONN;
-       call->cm_server = server;
+       rcu_read_lock();
+       call->cm_server = afs_find_server_by_uuid(call->net, call->request);
+       rcu_read_unlock();
+       if (!call->cm_server)
+               trace_afs_cm_no_server_u(call, call->request);
  
         return afs_queue_call_work(call);
  }
diff --git a/fs/afs/dir.c b/fs/afs/dir.c

index 5889f70d4d273a8622aefbd32148e8ff8e1b7d55..7d623008157ffaf9c77ac6a614a0e329f769c35e 100644 (file)
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file)
   * get reclaimed during the iteration.
   */
  static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
+       __acquires(&dvnode->validate_lock)
  {
         struct afs_read *req;
         loff_t i_size;
@@ -261,18 +262,21 @@ retry:
         /* If we're going to reload, we need to lock all the pages to prevent
          * races.
          */
-       if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
-               ret = -ERESTARTSYS;
-               for (i = 0; i < req->nr_pages; i++)
-                       if (lock_page_killable(req->pages[i]) < 0)
-                               goto error_unlock;
+       ret = -ERESTARTSYS;
+       if (down_read_killable(&dvnode->validate_lock) < 0)
+               goto error;
  
-               if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
-                       goto success;
+       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+               goto success;
+
+       up_read(&dvnode->validate_lock);
+       if (down_write_killable(&dvnode->validate_lock) < 0)
+               goto error;
  
+       if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
                 ret = afs_fetch_data(dvnode, key, req);
                 if (ret < 0)
-                       goto error_unlock_all;
+                       goto error_unlock;
  
                 task_io_account_read(PAGE_SIZE * req->nr_pages);
  
@@ -284,33 +288,26 @@ retry:
                 for (i = 0; i < req->nr_pages; i++)
                         if (!afs_dir_check_page(dvnode, req->pages[i],
                                                 req->actual_len))
-                               goto error_unlock_all;
+                               goto error_unlock;
  
                 // TODO: Trim excess pages
  
                 set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
         }
  
+       downgrade_write(&dvnode->validate_lock);
  success:
-       i = req->nr_pages;
-       while (i > 0)
-               unlock_page(req->pages[--i]);
         return req;
  
-error_unlock_all:
-       i = req->nr_pages;
  error_unlock:
-       while (i > 0)
-               unlock_page(req->pages[--i]);
+       up_write(&dvnode->validate_lock);
  error:
         afs_put_read(req);
         _leave(" = %d", ret);
         return ERR_PTR(ret);
  
  content_has_grown:
-       i = req->nr_pages;
-       while (i > 0)
-               unlock_page(req->pages[--i]);
+       up_write(&dvnode->validate_lock);
         afs_put_read(req);
         goto retry;
  }
@@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
         }
  
  out:
+       up_read(&dvnode->validate_lock);
         afs_put_read(req);
         _leave(" = %d", ret);
         return ret;
@@ -1143,7 +1141,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                         afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
                                       &newfid, &newstatus, &newcb);
                 }
@@ -1213,7 +1211,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                         afs_fs_remove(&fc, dentry->d_name.name, true,
                                       data_version);
                 }
@@ -1316,7 +1314,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                         afs_fs_remove(&fc, dentry->d_name.name, false,
                                       data_version);
                 }
@@ -1373,7 +1371,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                         afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
                                       &newfid, &newstatus, &newcb);
                 }
@@ -1443,8 +1441,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
                 }
  
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
-                       fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+                       fc.cb_break_2 = afs_calc_vnode_cb_break(vnode);
                         afs_fs_link(&fc, vnode, dentry->d_name.name, data_version);
                 }
  
@@ -1512,7 +1510,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                         afs_fs_symlink(&fc, dentry->d_name.name,
                                        content, data_version,
                                        &newfid, &newstatus);
@@ -1588,8 +1586,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
                         }
                 }
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break;
-                       fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode);
+                       fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode);
                         afs_fs_rename(&fc, old_dentry->d_name.name,
                                       new_dvnode, new_dentry->d_name.name,
                                       orig_data_version, new_data_version);
diff --git a/fs/afs/file.c b/fs/afs/file.c

index c24c08016dd96e2309ce9e8052d643b2af6da2a6..7d4f26198573d7f6a4dffb7ff4a82ee0f8fbb573 100644 (file)
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -238,7 +238,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, vnode, key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                         afs_fs_fetch_data(&fc, desc);
                 }
  
diff --git a/fs/afs/flock.c b/fs/afs/flock.c

index 7a0e017070ecede45ed1eb622ac89f6f5a33a5d1..dc62d15a964b8809d7028d33a393c41b6963242b 100644 (file)
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -86,7 +86,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, vnode, key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                         afs_fs_set_lock(&fc, type);
                 }
  
@@ -117,7 +117,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, vnode, key)) {
                 while (afs_select_current_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                         afs_fs_extend_lock(&fc);
                 }
  
@@ -148,7 +148,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, vnode, key)) {
                 while (afs_select_current_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                         afs_fs_release_lock(&fc);
                 }
  
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c

index efacdb7c1dee59ad78e0c600e73468ea90285638..b273e1d60478c3c9f89a84a7b85fb6325a9a2c84 100644 (file)
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -134,6 +134,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
                                      struct afs_read *read_req)
  {
         const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
+       bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
         u64 data_version, size;
         u32 type, abort_code;
         u8 flags = 0;
@@ -142,13 +143,32 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
         if (vnode)
                 write_seqlock(&vnode->cb_lock);
  
+       abort_code = ntohl(xdr->abort_code);
+
         if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) {
+               if (xdr->if_version == htonl(0) &&
+                   abort_code != 0 &&
+                   inline_error) {
+                       /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus
+                        * whereby it doesn't set the interface version in the error
+                        * case.
+                        */
+                       status->abort_code = abort_code;
+                       ret = 0;
+                       goto out;
+               }
+
                 pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
                 goto bad;
         }
  
+       if (abort_code != 0 && inline_error) {
+               status->abort_code = abort_code;
+               ret = 0;
+               goto out;
+       }
+
         type = ntohl(xdr->type);
-       abort_code = ntohl(xdr->abort_code);
         switch (type) {
         case AFS_FTYPE_FILE:
         case AFS_FTYPE_DIR:
@@ -165,13 +185,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
                 }
                 status->type = type;
                 break;
-       case AFS_FTYPE_INVALID:
-               if (abort_code != 0) {
-                       status->abort_code = abort_code;
-                       ret = 0;
-                       goto out;
-               }
-               /* Fall through */
         default:
                 goto bad;
         }
@@ -248,7 +261,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
  
         write_seqlock(&vnode->cb_lock);
  
-       if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) {
+       if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
                 vnode->cb_version       = ntohl(*bp++);
                 cb_expiry               = ntohl(*bp++);
                 vnode->cb_type          = ntohl(*bp++);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c

index 06194cfe9724ca8bfa9ce7e0cad9d44bbe06a262..479b7fdda1244f5bf210694e275826cba99b5553 100644 (file)
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -108,7 +108,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, vnode, key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                         afs_fs_fetch_file_status(&fc, NULL, new_inode);
                 }
  
@@ -393,15 +393,18 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
         read_seqlock_excl(&vnode->cb_lock);
  
         if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
-               if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
+               if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
+                   vnode->cb_v_break != vnode->volume->cb_v_break) {
                         vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+                       vnode->cb_v_break = vnode->volume->cb_v_break;
+                       valid = false;
                 } else if (vnode->status.type == AFS_FTYPE_DIR &&
                            test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) &&
                            vnode->cb_expires_at - 10 > now) {
-                               valid = true;
+                       valid = true;
                 } else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
                            vnode->cb_expires_at - 10 > now) {
-                               valid = true;
+                       valid = true;
                 }
         } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
                 valid = true;
@@ -415,7 +418,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
         if (valid)
                 goto valid;
  
-       mutex_lock(&vnode->validate_lock);
+       down_write(&vnode->validate_lock);
  
         /* if the promise has expired, we need to check the server again to get
          * a new promise - note that if the (parent) directory's metadata was
@@ -444,13 +447,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
          * different */
         if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
                 afs_zap_data(vnode);
-       mutex_unlock(&vnode->validate_lock);
+       up_write(&vnode->validate_lock);
  valid:
         _leave(" = 0");
         return 0;
  
  error_unlock:
-       mutex_unlock(&vnode->validate_lock);
+       up_write(&vnode->validate_lock);
         _leave(" = %d", ret);
         return ret;
  }
@@ -574,7 +577,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, vnode, key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                         afs_fs_setattr(&fc, attr);
                 }
  
diff --git a/fs/afs/internal.h b/fs/afs/internal.h

index f8086ec95e24161eb9b9900745275e1cc4c1cfc6..e3f8a46663dbade0149d44e0d543bb7f171170a7 100644 (file)
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -396,6 +396,7 @@ struct afs_server {
  #define AFS_SERVER_FL_PROBED   5               /* The fileserver has been probed */
  #define AFS_SERVER_FL_PROBING  6               /* Fileserver is being probed */
  #define AFS_SERVER_FL_NO_IBULK 7               /* Fileserver doesn't support FS.InlineBulkStatus */
+#define AFS_SERVER_FL_MAY_HAVE_CB 8            /* May have callbacks on this fileserver */
         atomic_t                usage;
         u32                     addr_version;   /* Address list version */
  
@@ -433,6 +434,7 @@ struct afs_server_list {
         unsigned short          index;          /* Server currently in use */
         unsigned short          vnovol_mask;    /* Servers to be skipped due to VNOVOL */
         unsigned int            seq;            /* Set to ->servers_seq when installed */
+       rwlock_t                lock;
         struct afs_server_entry servers[];
  };
  
@@ -459,6 +461,9 @@ struct afs_volume {
         rwlock_t                servers_lock;   /* Lock for ->servers */
         unsigned int            servers_seq;    /* Incremented each time ->servers changes */
  
+       unsigned                cb_v_break;     /* Break-everything counter. */
+       rwlock_t                cb_break_lock;
+
         afs_voltype_t           type;           /* type of volume */
         short                   error;
         char                    type_force;     /* force volume type (suppress R/O -> R/W) */
@@ -494,7 +499,7 @@ struct afs_vnode {
  #endif
         struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
         struct mutex            io_lock;        /* Lock for serialising I/O on this mutex */
-       struct mutex            validate_lock;  /* lock for validating this vnode */
+       struct rw_semaphore     validate_lock;  /* lock for validating this vnode */
         spinlock_t              wb_lock;        /* lock for wb_keys */
         spinlock_t              lock;           /* waitqueue/flags lock */
         unsigned long           flags;
@@ -519,6 +524,7 @@ struct afs_vnode {
         /* outstanding callback notification on this file */
         struct afs_cb_interest  *cb_interest;   /* Server on which this resides */
         unsigned int            cb_s_break;     /* Mass break counter on ->server */
+       unsigned int            cb_v_break;     /* Mass break counter on ->volume */
         unsigned int            cb_break;       /* Break counter on vnode */
         seqlock_t               cb_lock;        /* Lock for ->cb_interest, ->status, ->cb_*break */
  
@@ -648,16 +654,29 @@ extern void afs_init_callback_state(struct afs_server *);
  extern void afs_break_callback(struct afs_vnode *);
  extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
  
-extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *);
+extern int afs_register_server_cb_interest(struct afs_vnode *,
+                                          struct afs_server_list *, unsigned int);
  extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
  extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *);
  
  static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
  {
-       refcount_inc(&cbi->usage);
+       if (cbi)
+               refcount_inc(&cbi->usage);
         return cbi;
  }
  
+static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
+{
+       return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+}
+
+static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode,
+                                           struct afs_cb_interest *cbi)
+{
+       return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break;
+}
+
  /*
   * cell.c
   */
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c

index ac0feac9d7468cfeb48b4161ed70961b2088b663..e065bc0768e6a5068ef762e3d3967fc54d50e2b2 100644 (file)
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -179,7 +179,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                          */
                         if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
                                 fc->ac.error = -EREMOTEIO;
-                               goto failed;
+                               goto next_server;
                         }
  
                         write_lock(&vnode->volume->servers_lock);
@@ -201,7 +201,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                          */
                         if (vnode->volume->servers == fc->server_list) {
                                 fc->ac.error = -EREMOTEIO;
-                               goto failed;
+                               goto next_server;
                         }
  
                         /* Try again */
@@ -350,8 +350,8 @@ use_server:
          * break request before we've finished decoding the reply and
          * installing the vnode.
          */
-       fc->ac.error = afs_register_server_cb_interest(
-               vnode, &fc->server_list->servers[fc->index]);
+       fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
+                                                      fc->index);
         if (fc->ac.error < 0)
                 goto failed;
  
@@ -369,8 +369,16 @@ use_server:
         if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
                 fc->ac.alist = afs_get_addrlist(alist);
  
-               if (!afs_probe_fileserver(fc))
-                       goto failed;
+               if (!afs_probe_fileserver(fc)) {
+                       switch (fc->ac.error) {
+                       case -ENOMEM:
+                       case -ERESTARTSYS:
+                       case -EINTR:
+                               goto failed;
+                       default:
+                               goto next_server;
+                       }
+               }
         }
  
         if (!fc->ac.alist)
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c

index 5c6263972ec9a5651a28fd7ea7c80398e737a090..08735948f15d4caec78be59d5e1c4623591e92d4 100644 (file)
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -41,6 +41,7 @@ int afs_open_socket(struct afs_net *net)
  {
         struct sockaddr_rxrpc srx;
         struct socket *socket;
+       unsigned int min_level;
         int ret;
  
         _enter("");
@@ -60,6 +61,12 @@ int afs_open_socket(struct afs_net *net)
         srx.transport.sin6.sin6_family  = AF_INET6;
         srx.transport.sin6.sin6_port    = htons(AFS_CM_PORT);
  
+       min_level = RXRPC_SECURITY_ENCRYPT;
+       ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
+                               (void *)&min_level, sizeof(min_level));
+       if (ret < 0)
+               goto error_2;
+
         ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
         if (ret == -EADDRINUSE) {
                 srx.transport.sin6.sin6_port = 0;
@@ -482,8 +489,12 @@ static void afs_deliver_to_call(struct afs_call *call)
                 state = READ_ONCE(call->state);
                 switch (ret) {
                 case 0:
-                       if (state == AFS_CALL_CL_PROC_REPLY)
+                       if (state == AFS_CALL_CL_PROC_REPLY) {
+                               if (call->cbi)
+                                       set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
+                                               &call->cbi->server->flags);
                                 goto call_complete;
+                       }
                         ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
                         goto done;
                 case -EINPROGRESS:
@@ -493,11 +504,6 @@ static void afs_deliver_to_call(struct afs_call *call)
                 case -ECONNABORTED:
                         ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
                         goto done;
-               case -ENOTCONN:
-                       abort_code = RX_CALL_DEAD;
-                       rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-                                               abort_code, ret, "KNC");
-                       goto local_abort;
                 case -ENOTSUPP:
                         abort_code = RXGEN_OPCODE;
                         rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
diff --git a/fs/afs/security.c b/fs/afs/security.c

index cea2fff313dc6e3a1efb3c3030a8a59f92136933..1992b0ffa54374da648efbd7ab5b9a87ae3c1889 100644 (file)
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -147,8 +147,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                                         break;
                                 }
  
-                               if (cb_break != (vnode->cb_break +
-                                                vnode->cb_interest->server->cb_s_break)) {
+                               if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) {
                                         changed = true;
                                         break;
                                 }
@@ -178,7 +177,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                 }
         }
  
-       if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break))
+       if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest))
                 goto someone_else_changed_it;
  
         /* We need a ref on any permits list we want to copy as we'll have to
@@ -257,7 +256,7 @@ found:
  
         spin_lock(&vnode->lock);
         zap = rcu_access_pointer(vnode->permit_cache);
-       if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) &&
+       if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) &&
             zap == permits)
                 rcu_assign_pointer(vnode->permit_cache, replacement);
         else
diff --git a/fs/afs/server.c b/fs/afs/server.c

index 629c74986cff4ad1535fcc3c814b6150011c5dec..3af4625e2f8cc7049185048a602826c53a169c03 100644 (file)
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -67,12 +67,6 @@ struct afs_server *afs_find_server(struct afs_net *net,
                                                               sizeof(struct in6_addr));
                                         if (diff == 0)
                                                 goto found;
-                                       if (diff < 0) {
-                                               // TODO: Sort the list
-                                               //if (i == alist->nr_ipv4)
-                                               //      goto not_found;
-                                               break;
-                                       }
                                 }
                         }
                 } else {
@@ -87,17 +81,10 @@ struct afs_server *afs_find_server(struct afs_net *net,
                                                         (u32 __force)b->sin6_addr.s6_addr32[3]);
                                         if (diff == 0)
                                                 goto found;
-                                       if (diff < 0) {
-                                               // TODO: Sort the list
-                                               //if (i == 0)
-                                               //      goto not_found;
-                                               break;
-                                       }
                                 }
                         }
                 }
  
-       //not_found:
                 server = NULL;
         found:
                 if (server && !atomic_inc_not_zero(&server->usage))
@@ -395,14 +382,16 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
         struct afs_addr_cursor ac = {
                 .alist  = alist,
-               .addr   = &alist->addrs[0],
                 .start  = alist->index,
-               .index  = alist->index,
+               .index  = 0,
+               .addr   = &alist->addrs[alist->index],
                 .error  = 0,
         };
         _enter("%p", server);
  
-       afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+       if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
+               afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+
         call_rcu(&server->rcu, afs_server_rcu);
         afs_dec_servers_outstanding(net);
  }
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c

index 0f8dc4c8f07c43b3efb0f899b8a40b8f1a697e6c..8a5760aa583213a608d686b60f0782ecbed648e1 100644 (file)
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -49,6 +49,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
                 goto error;
  
         refcount_set(&slist->usage, 1);
+       rwlock_init(&slist->lock);
  
         /* Make sure a records exists for each server in the list. */
         for (i = 0; i < vldb->nr_servers; i++) {
@@ -64,9 +65,11 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
                         goto error_2;
                 }
  
-               /* Insertion-sort by server pointer */
+               /* Insertion-sort by UUID */
                 for (j = 0; j < slist->nr_servers; j++)
-                       if (slist->servers[j].server >= server)
+                       if (memcmp(&slist->servers[j].server->uuid,
+                                  &server->uuid,
+                                  sizeof(server->uuid)) >= 0)
                                 break;
                 if (j < slist->nr_servers) {
                         if (slist->servers[j].server == server) {
diff --git a/fs/afs/super.c b/fs/afs/super.c

index 65081ec3c36e572c5822d756b86abb52ed757f90..9e5d7966621c4abaa5cdc51f278a1a05a7e6afac 100644 (file)
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode)
         memset(vnode, 0, sizeof(*vnode));
         inode_init_once(&vnode->vfs_inode);
         mutex_init(&vnode->io_lock);
-       mutex_init(&vnode->validate_lock);
+       init_rwsem(&vnode->validate_lock);
         spin_lock_init(&vnode->wb_lock);
         spin_lock_init(&vnode->lock);
         INIT_LIST_HEAD(&vnode->wb_keys);
@@ -688,7 +688,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
         if (afs_begin_vnode_operation(&fc, vnode, key)) {
                 fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                         afs_fs_get_volume_status(&fc, &vs);
                 }
  
diff --git a/fs/afs/write.c b/fs/afs/write.c

index c164698dc30481156eb9738e0c3f1b91d5ab5108..8b39e6ebb40bc17905cf2eca24b1eacadf933584 100644 (file)
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -351,7 +351,7 @@ found_key:
         ret = -ERESTARTSYS;
         if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
                 while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                         afs_fs_store_data(&fc, mapping, first, last, offset, to);
                 }
  
diff --git a/fs/aio.c b/fs/aio.c

index 88d7927ffbc61910c8c47948157fccfa8e6f10ed..8061d9787e547d305ea6953a092d61df933b332a 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1078,8 +1078,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
  
         ctx = rcu_dereference(table->table[id]);
         if (ctx && ctx->user_id == ctx_id) {
-               percpu_ref_get(&ctx->users);
-               ret = ctx;
+               if (percpu_ref_tryget_live(&ctx->users))
+                       ret = ctx;
         }
  out:
         rcu_read_unlock();
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c

index af2832aaeec50ec4ba524612dcc9c1a346d6ab31..4700b45344397b26e385e38821217f9b861daee6 100644 (file)
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -198,23 +198,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
  
         if (ret == BEFS_BT_NOT_FOUND) {
                 befs_debug(sb, "<--- %s %pd not found", __func__, dentry);
-               d_add(dentry, NULL);
-               return ERR_PTR(-ENOENT);
-
+               inode = NULL;
         } else if (ret != BEFS_OK || offset == 0) {
                 befs_error(sb, "<--- %s Error", __func__);
-               return ERR_PTR(-ENODATA);
+               inode = ERR_PTR(-ENODATA);
+       } else {
+               inode = befs_iget(dir->i_sb, (ino_t) offset);
         }
-
-       inode = befs_iget(dir->i_sb, (ino_t) offset);
-       if (IS_ERR(inode))
-               return ERR_CAST(inode);
-
-       d_add(dentry, inode);
-
         befs_debug(sb, "<--- %s", __func__);
  
-       return NULL;
+       return d_splice_alias(inode, dentry);
  }
  
  static int
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c

index 3fd44835b3869effbfe6ced6c3b8b373617698e5..8c68961925b1482517bfe2c96635f5ee1cc79694 100644 (file)
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2436,10 +2436,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
         if (p->reada != READA_NONE)
                 reada_for_search(fs_info, p, level, slot, key->objectid);
  
-       btrfs_release_path(p);
-
         ret = -EAGAIN;
-       tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
+       tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
                               &first_key);
         if (!IS_ERR(tmp)) {
                 /*
@@ -2454,6 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
         } else {
                 ret = PTR_ERR(tmp);
         }
+
+       btrfs_release_path(p);
         return ret;
  }
  
@@ -5414,12 +5414,24 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
         down_read(&fs_info->commit_root_sem);
         left_level = btrfs_header_level(left_root->commit_root);
         left_root_level = left_level;
-       left_path->nodes[left_level] = left_root->commit_root;
+       left_path->nodes[left_level] =
+                       btrfs_clone_extent_buffer(left_root->commit_root);
+       if (!left_path->nodes[left_level]) {
+               up_read(&fs_info->commit_root_sem);
+               ret = -ENOMEM;
+               goto out;
+       }
         extent_buffer_get(left_path->nodes[left_level]);
  
         right_level = btrfs_header_level(right_root->commit_root);
         right_root_level = right_level;
-       right_path->nodes[right_level] = right_root->commit_root;
+       right_path->nodes[right_level] =
+                       btrfs_clone_extent_buffer(right_root->commit_root);
+       if (!right_path->nodes[right_level]) {
+               up_read(&fs_info->commit_root_sem);
+               ret = -ENOMEM;
+               goto out;
+       }
         extent_buffer_get(right_path->nodes[right_level]);
         up_read(&fs_info->commit_root_sem);
  
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index 2771cc56a622f50cdf834a291aa4f43a86798cb7..0d422c9908b8085f531a752a4bd6d5bf1b430e02 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3182,6 +3182,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                               u64 *orig_start, u64 *orig_block_len,
                               u64 *ram_bytes);
  
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+                               struct btrfs_inode *inode);
  struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
  int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
  int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 60caa68c3618d5a072f332be97e7db4ea7ba67b0..c3504b4d281b5cd76bb0861b781e228bcec4b3bb 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3818,6 +3818,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
         set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
  
         btrfs_free_qgroup_config(fs_info);
+       ASSERT(list_empty(&fs_info->delalloc_roots));
  
         if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
                 btrfs_info(fs_info, "at unmount delalloc count %lld",
@@ -4125,15 +4126,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
  
  static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
  {
+       /* cleanup FS via transaction */
+       btrfs_cleanup_transaction(fs_info);
+
         mutex_lock(&fs_info->cleaner_mutex);
         btrfs_run_delayed_iputs(fs_info);
         mutex_unlock(&fs_info->cleaner_mutex);
  
         down_write(&fs_info->cleanup_work_sem);
         up_write(&fs_info->cleanup_work_sem);
-
-       /* cleanup FS via transaction */
-       btrfs_cleanup_transaction(fs_info);
  }
  
  static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
@@ -4258,19 +4259,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
         list_splice_init(&root->delalloc_inodes, &splice);
  
         while (!list_empty(&splice)) {
+               struct inode *inode = NULL;
                 btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
                                                delalloc_inodes);
-
-               list_del_init(&btrfs_inode->delalloc_inodes);
-               clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-                         &btrfs_inode->runtime_flags);
+               __btrfs_del_delalloc_inode(root, btrfs_inode);
                 spin_unlock(&root->delalloc_lock);
  
-               btrfs_invalidate_inodes(btrfs_inode->root);
-
+               /*
+                * Make sure we get a live inode and that it'll not disappear
+                * meanwhile.
+                */
+               inode = igrab(&btrfs_inode->vfs_inode);
+               if (inode) {
+                       invalidate_inode_pages2(inode->i_mapping);
+                       iput(inode);
+               }
                 spin_lock(&root->delalloc_lock);
         }
-
         spin_unlock(&root->delalloc_lock);
  }
  
@@ -4286,7 +4291,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
         while (!list_empty(&splice)) {
                 root = list_first_entry(&splice, struct btrfs_root,
                                          delalloc_root);
-               list_del_init(&root->delalloc_root);
                 root = btrfs_grab_fs_root(root);
                 BUG_ON(!root);
                 spin_unlock(&fs_info->delalloc_root_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index d241285a0d2a8dea0ff858f58a5590c18298f2b0..0b86cf10cf2ac79732ea548bbe970819ea758d4e 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1742,12 +1742,12 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
         spin_unlock(&root->delalloc_lock);
  }
  
-static void btrfs_del_delalloc_inode(struct btrfs_root *root,
-                                    struct btrfs_inode *inode)
+
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+                               struct btrfs_inode *inode)
  {
         struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
  
-       spin_lock(&root->delalloc_lock);
         if (!list_empty(&inode->delalloc_inodes)) {
                 list_del_init(&inode->delalloc_inodes);
                 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
@@ -1760,6 +1760,13 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
                         spin_unlock(&fs_info->delalloc_root_lock);
                 }
         }
+}
+
+static void btrfs_del_delalloc_inode(struct btrfs_root *root,
+                                    struct btrfs_inode *inode)
+{
+       spin_lock(&root->delalloc_lock);
+       __btrfs_del_delalloc_inode(root, inode);
         spin_unlock(&root->delalloc_lock);
  }
  
@@ -6579,8 +6586,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                 goto out_unlock_inode;
         } else {
                 btrfs_update_inode(trans, root, inode);
-               unlock_new_inode(inode);
-               d_instantiate(dentry, inode);
+               d_instantiate_new(dentry, inode);
         }
  
  out_unlock:
@@ -6656,8 +6662,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                 goto out_unlock_inode;
  
         BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
  
  out_unlock:
         btrfs_end_transaction(trans);
@@ -6802,12 +6807,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         if (err)
                 goto out_fail_inode;
  
-       d_instantiate(dentry, inode);
-       /*
-        * mkdir is special.  We're unlocking after we call d_instantiate
-        * to avoid a race with nfsd calling d_instantiate.
-        */
-       unlock_new_inode(inode);
+       d_instantiate_new(dentry, inode);
         drop_on_err = 0;
  
  out_fail:
@@ -9117,7 +9117,8 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
                                                  BTRFS_EXTENT_DATA_KEY);
                 trans->block_rsv = &fs_info->trans_block_rsv;
                 if (ret != -ENOSPC && ret != -EAGAIN) {
-                       err = ret;
+                       if (ret < 0)
+                               err = ret;
                         break;
                 }
  
@@ -10250,8 +10251,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                 goto out_unlock_inode;
         }
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
  
  out_unlock:
         btrfs_end_transaction(trans);
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c

index 53a8c95828e33a537525a193dfaf20e030505bbc..dc6140013ae8194739a8aa6a387f12c35794bdf9 100644 (file)
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -380,6 +380,7 @@ static int prop_compression_apply(struct inode *inode,
                                   const char *value,
                                   size_t len)
  {
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
         int type;
  
         if (len == 0) {
@@ -390,14 +391,17 @@ static int prop_compression_apply(struct inode *inode,
                 return 0;
         }
  
-       if (!strncmp("lzo", value, 3))
+       if (!strncmp("lzo", value, 3)) {
                 type = BTRFS_COMPRESS_LZO;
-       else if (!strncmp("zlib", value, 4))
+               btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+       } else if (!strncmp("zlib", value, 4)) {
                 type = BTRFS_COMPRESS_ZLIB;
-       else if (!strncmp("zstd", value, len))
+       } else if (!strncmp("zstd", value, len)) {
                 type = BTRFS_COMPRESS_ZSTD;
-       else
+               btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+       } else {
                 return -EINVAL;
+       }
  
         BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
         BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 43758e30aa7a97cb240c7b12146f040f5ce8d3d9..8f23a94dab770c00933f39a4ed1fb845edac22dd 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4320,6 +4320,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
         return ret;
  }
  
+/*
+ * Log all prealloc extents beyond the inode's i_size to make sure we do not
+ * lose them after doing a fast fsync and replaying the log. We scan the
+ * subvolume's root instead of iterating the inode's extent map tree because
+ * otherwise we can log incorrect extent items based on extent map conversion.
+ * That can happen due to the fact that extent maps are merged when they
+ * are not in the extent map tree's list of modified extents.
+ */
+static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
+                                     struct btrfs_inode *inode,
+                                     struct btrfs_path *path)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_key key;
+       const u64 i_size = i_size_read(&inode->vfs_inode);
+       const u64 ino = btrfs_ino(inode);
+       struct btrfs_path *dst_path = NULL;
+       u64 last_extent = (u64)-1;
+       int ins_nr = 0;
+       int start_slot;
+       int ret;
+
+       if (!(inode->flags & BTRFS_INODE_PREALLOC))
+               return 0;
+
+       key.objectid = ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = i_size;
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+
+       while (true) {
+               struct extent_buffer *leaf = path->nodes[0];
+               int slot = path->slots[0];
+
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       if (ins_nr > 0) {
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, start_slot,
+                                                ins_nr, 1, 0);
+                               if (ret < 0)
+                                       goto out;
+                               ins_nr = 0;
+                       }
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto out;
+                       if (ret > 0) {
+                               ret = 0;
+                               break;
+                       }
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+               if (key.objectid > ino)
+                       break;
+               if (WARN_ON_ONCE(key.objectid < ino) ||
+                   key.type < BTRFS_EXTENT_DATA_KEY ||
+                   key.offset < i_size) {
+                       path->slots[0]++;
+                       continue;
+               }
+               if (last_extent == (u64)-1) {
+                       last_extent = key.offset;
+                       /*
+                        * Avoid logging extent items logged in past fsync calls
+                        * and leading to duplicate keys in the log tree.
+                        */
+                       do {
+                               ret = btrfs_truncate_inode_items(trans,
+                                                        root->log_root,
+                                                        &inode->vfs_inode,
+                                                        i_size,
+                                                        BTRFS_EXTENT_DATA_KEY);
+                       } while (ret == -EAGAIN);
+                       if (ret)
+                               goto out;
+               }
+               if (ins_nr == 0)
+                       start_slot = slot;
+               ins_nr++;
+               path->slots[0]++;
+               if (!dst_path) {
+                       dst_path = btrfs_alloc_path();
+                       if (!dst_path) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+               }
+       }
+       if (ins_nr > 0) {
+               ret = copy_items(trans, inode, dst_path, path, &last_extent,
+                                start_slot, ins_nr, 1, 0);
+               if (ret > 0)
+                       ret = 0;
+       }
+out:
+       btrfs_release_path(path);
+       btrfs_free_path(dst_path);
+       return ret;
+}
+
  static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                      struct btrfs_root *root,
                                      struct btrfs_inode *inode,
@@ -4362,6 +4466,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                 if (em->generation <= test_gen)
                         continue;
  
+               /* We log prealloc extents beyond eof later. */
+               if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
+                   em->start >= i_size_read(&inode->vfs_inode))
+                       continue;
+
                 if (em->start < logged_start)
                         logged_start = em->start;
                 if ((em->start + em->len - 1) > logged_end)
@@ -4374,31 +4483,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                 num++;
         }
  
-       /*
-        * Add all prealloc extents beyond the inode's i_size to make sure we
-        * don't lose them after doing a fast fsync and replaying the log.
-        */
-       if (inode->flags & BTRFS_INODE_PREALLOC) {
-               struct rb_node *node;
-
-               for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
-                       em = rb_entry(node, struct extent_map, rb_node);
-                       if (em->start < i_size_read(&inode->vfs_inode))
-                               break;
-                       if (!list_empty(&em->list))
-                               continue;
-                       /* Same as above loop. */
-                       if (++num > 32768) {
-                               list_del_init(&tree->modified_extents);
-                               ret = -EFBIG;
-                               goto process;
-                       }
-                       refcount_inc(&em->refs);
-                       set_bit(EXTENT_FLAG_LOGGING, &em->flags);
-                       list_add_tail(&em->list, &extents);
-               }
-       }
-
         list_sort(NULL, &extents, extent_cmp);
         btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
         /*
@@ -4443,6 +4527,9 @@ process:
         up_write(&inode->dio_sem);
  
         btrfs_release_path(path);
+       if (!ret)
+               ret = btrfs_log_prealloc_extents(trans, inode, path);
+
         return ret;
  }
  
@@ -4827,6 +4914,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
         struct extent_map_tree *em_tree = &inode->extent_tree;
         u64 logged_isize = 0;
         bool need_log_inode_item = true;
+       bool xattrs_logged = false;
  
         path = btrfs_alloc_path();
         if (!path)
@@ -5128,6 +5216,7 @@ next_key:
         err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
         if (err)
                 goto out_unlock;
+       xattrs_logged = true;
         if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
                 btrfs_release_path(path);
                 btrfs_release_path(dst_path);
@@ -5140,6 +5229,11 @@ log_extents:
         btrfs_release_path(dst_path);
         if (need_log_inode_item) {
                 err = log_inode_item(trans, log, dst_path, inode);
+               if (!err && !xattrs_logged) {
+                       err = btrfs_log_all_xattrs(trans, root, inode, path,
+                                                  dst_path);
+                       btrfs_release_path(path);
+               }
                 if (err)
                         goto out_unlock;
         }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 292266f6ab9c9d8dfa18422998ec1884ae57369d..be3fc701f38948e37e5d776ee93413e0d06e85ff 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4052,6 +4052,15 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
                 return 0;
         }
  
+       /*
+        * A ro->rw remount sequence should continue with the paused balance
+        * regardless of who pauses it, system or the user as of now, so set
+        * the resume flag.
+        */
+       spin_lock(&fs_info->balance_lock);
+       fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
+       spin_unlock(&fs_info->balance_lock);
+
         tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
         return PTR_ERR_OR_ZERO(tsk);
  }
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c

index 0daa1e3fe0df837e3cbbade70184287aa129c294..ab0bbe93b398ce68dd0dc04652a626635d2c7c23 100644 (file)
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -572,6 +572,11 @@ lookup_again:
                         if (ret < 0)
                                 goto create_error;
  
+                       if (unlikely(d_unhashed(next))) {
+                               dput(next);
+                               inode_unlock(d_inode(dir));
+                               goto lookup_again;
+                       }
                         ASSERT(d_backing_inode(next));
  
                         _debug("mkdir -> %p{%p{ino=%lu}}",
@@ -764,6 +769,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
         /* search the current directory for the element name */
         inode_lock(d_inode(dir));
  
+retry:
         start = jiffies;
         subdir = lookup_one_len(dirname, dir, strlen(dirname));
         cachefiles_hist(cachefiles_lookup_histogram, start);
@@ -793,6 +799,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
                 if (ret < 0)
                         goto mkdir_error;
  
+               if (unlikely(d_unhashed(subdir))) {
+                       dput(subdir);
+                       goto retry;
+               }
                 ASSERT(d_backing_inode(subdir));
  
                 _debug("mkdir -> %p{%p{ino=%lu}}",
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig

index 5f132d59dfc2668cd0d737eae36b03f30226dc7a..d61e2de8d0eb5524768477e28c1da099976e879b 100644 (file)
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -197,7 +197,7 @@ config CIFS_SMB311
  
  config CIFS_SMB_DIRECT
         bool "SMB Direct support (Experimental)"
-       depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
+       depends on CIFS=m && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND_ADDR_TRANS=y
         help
           Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1.
           SMB Direct allows transferring SMB packets over RDMA. If unsure,
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c

index 017b0ab19bc4d98625349ce65109f7f48f5551c8..124b093d14e5671e549c77fb9c2c5398f9e93fc1 100644 (file)
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -492,7 +492,7 @@ static void cramfs_kill_sb(struct super_block *sb)
  {
         struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
  
-       if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) {
+       if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) {
                 if (sbi && sbi->mtd_point_size)
                         mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
                 kill_mtd_super(sb);
diff --git a/fs/dax.c b/fs/dax.c

index 34a2d435ae4bcaae5eff26445a6f598ff252d36d..1f5f14a2ce4c5bcac373b6c36998d043b88fee3b 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
         }
  }
  
+static struct page *dax_busy_page(void *entry)
+{
+       unsigned long pfn;
+
+       for_each_mapped_pfn(entry, pfn) {
+               struct page *page = pfn_to_page(pfn);
+
+               if (page_ref_count(page) > 1)
+                       return page;
+       }
+       return NULL;
+}
+
  /*
   * Find radix tree entry at given index. If it points to an exceptional entry,
   * return it with the radix tree entry locked. If the radix tree doesn't
@@ -492,6 +505,90 @@ restart:
         return entry;
  }
  
+/**
+ * dax_layout_busy_page - find first pinned page in @mapping
+ * @mapping: address space to scan for a page with ref count > 1
+ *
+ * DAX requires ZONE_DEVICE mapped pages. These pages are never
+ * 'onlined' to the page allocator so they are considered idle when
+ * page->count == 1. A filesystem uses this interface to determine if
+ * any page in the mapping is busy, i.e. for DMA, or other
+ * get_user_pages() usages.
+ *
+ * It is expected that the filesystem is holding locks to block the
+ * establishment of new mappings in this address_space. I.e. it expects
+ * to be able to run unmap_mapping_range() and subsequently not race
+ * mapping_mapped() becoming true.
+ */
+struct page *dax_layout_busy_page(struct address_space *mapping)
+{
+       pgoff_t indices[PAGEVEC_SIZE];
+       struct page *page = NULL;
+       struct pagevec pvec;
+       pgoff_t index, end;
+       unsigned i;
+
+       /*
+        * In the 'limited' case get_user_pages() for dax is disabled.
+        */
+       if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+               return NULL;
+
+       if (!dax_mapping(mapping) || !mapping_mapped(mapping))
+               return NULL;
+
+       pagevec_init(&pvec);
+       index = 0;
+       end = -1;
+
+       /*
+        * If we race get_user_pages_fast() here either we'll see the
+        * elevated page count in the pagevec_lookup and wait, or
+        * get_user_pages_fast() will see that the page it took a reference
+        * against is no longer mapped in the page tables and bail to the
+        * get_user_pages() slow path.  The slow path is protected by
+        * pte_lock() and pmd_lock(). New references are not taken without
+        * holding those locks, and unmap_mapping_range() will not zero the
+        * pte or pmd without holding the respective lock, so we are
+        * guaranteed to either see new references or prevent new
+        * references from being established.
+        */
+       unmap_mapping_range(mapping, 0, 0, 1);
+
+       while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
+                               min(end - index, (pgoff_t)PAGEVEC_SIZE),
+                               indices)) {
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *pvec_ent = pvec.pages[i];
+                       void *entry;
+
+                       index = indices[i];
+                       if (index >= end)
+                               break;
+
+                       if (!radix_tree_exceptional_entry(pvec_ent))
+                               continue;
+
+                       xa_lock_irq(&mapping->i_pages);
+                       entry = get_unlocked_mapping_entry(mapping, index, NULL);
+                       if (entry)
+                               page = dax_busy_page(entry);
+                       put_unlocked_mapping_entry(mapping, index, entry);
+                       xa_unlock_irq(&mapping->i_pages);
+                       if (page)
+                               break;
+               }
+               pagevec_remove_exceptionals(&pvec);
+               pagevec_release(&pvec);
+               index++;
+
+               if (page)
+                       break;
+       }
+       return page;
+}
+EXPORT_SYMBOL_GPL(dax_layout_busy_page);
+
  static int __dax_invalidate_mapping_entry(struct address_space *mapping,
                                           pgoff_t index, bool trunc)
  {
@@ -912,7 +1009,6 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
         unsigned long vaddr = vmf->address;
         int ret = VM_FAULT_NOPAGE;
         struct page *zero_page;
-       void *entry2;
         pfn_t pfn;
  
         zero_page = ZERO_PAGE(0);
@@ -922,13 +1018,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
         }
  
         pfn = page_to_pfn_t(zero_page);
-       entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
-                       RADIX_DAX_ZERO_PAGE, false);
-       if (IS_ERR(entry2)) {
-               ret = VM_FAULT_SIGBUS;
-               goto out;
-       }
-
+       dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
+                       false);
         vm_insert_mixed(vmf->vma, vaddr, pfn);
  out:
         trace_dax_load_hole(inode, vmf, ret);
@@ -1243,10 +1334,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
  
                 entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                                                  0, write && !sync);
-               if (IS_ERR(entry)) {
-                       error = PTR_ERR(entry);
-                       goto error_finish_iomap;
-               }
  
                 /*
                  * If we are doing synchronous page fault and inode needs fsync,
@@ -1330,8 +1417,6 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
         pfn = page_to_pfn_t(zero_page);
         ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                         RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
-       if (IS_ERR(ret))
-               goto fallback;
  
         ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
         if (!pmd_none(*(vmf->pmd))) {
@@ -1453,8 +1538,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
  
                 entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                                                 RADIX_DAX_PMD, write && !sync);
-               if (IS_ERR(entry))
-                       goto finish_iomap;
  
                 /*
                  * If we are doing synchronous page fault and inode needs fsync,
diff --git a/fs/dcache.c b/fs/dcache.c

index 86d2de63461e1550efe205643d66800dd12f15ac..2acfc69878f55cc175d019156c15f031b3741c80 100644 (file)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1899,6 +1899,28 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
  }
  EXPORT_SYMBOL(d_instantiate);
  
+/*
+ * This should be equivalent to d_instantiate() + unlock_new_inode(),
+ * with lockdep-related part of unlock_new_inode() done before
+ * anything else.  Use that instead of open-coding d_instantiate()/
+ * unlock_new_inode() combinations.
+ */
+void d_instantiate_new(struct dentry *entry, struct inode *inode)
+{
+       BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
+       BUG_ON(!inode);
+       lockdep_annotate_inode_mutex_key(inode);
+       security_d_instantiate(entry, inode);
+       spin_lock(&inode->i_lock);
+       __d_instantiate(entry, inode);
+       WARN_ON(!(inode->i_state & I_NEW));
+       inode->i_state &= ~I_NEW;
+       smp_mb();
+       wake_up_bit(&inode->i_state, __I_NEW);
+       spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(d_instantiate_new);
+
  /**
   * d_instantiate_no_diralias - instantiate a non-aliased dentry
   * @entry: dentry to complete
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c

index 97d17eaeba0731fb5d755c7a3106af1abaf2cff0..49121e5a8de228acfb1ea126250e5ad94e4ec812 100644 (file)
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -283,8 +283,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
                 iget_failed(ecryptfs_inode);
                 goto out;
         }
-       unlock_new_inode(ecryptfs_inode);
-       d_instantiate(ecryptfs_dentry, ecryptfs_inode);
+       d_instantiate_new(ecryptfs_dentry, ecryptfs_inode);
  out:
         return rc;
  }
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index 1e01fabef130a602ef82e17a6e77f158534c4bd7..71635909df3b55dfb8cfa3d4804e98542e7437d7 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1264,21 +1264,11 @@ do_indirects:
  
  static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
  {
-       /*
-        * XXX: it seems like a bug here that we don't allow
-        * IS_APPEND inode to have blocks-past-i_size trimmed off.
-        * review and fix this.
-        *
-        * Also would be nice to be able to handle IO errors and such,
-        * but that's probably too much to ask.
-        */
         if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
             S_ISLNK(inode->i_mode)))
                 return;
         if (ext2_inode_is_fast_symlink(inode))
                 return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return;
  
         dax_sem_down_write(EXT2_I(inode));
         __ext2_truncate_blocks(inode, offset);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c

index 55f7caadb09333a1d73603f839de3fa5df2f9e42..152453a9187763a7173c2d1c41f33714c5287db9 100644 (file)
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,8 +41,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
  {
         int err = ext2_add_link(dentry, inode);
         if (!err) {
-               unlock_new_inode(inode);
-               d_instantiate(dentry, inode);
+               d_instantiate_new(dentry, inode);
                 return 0;
         }
         inode_dec_link_count(inode);
@@ -255,8 +254,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
         if (err)
                 goto out_fail;
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
  out:
         return err;
  
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index b1f21e3a076327df20e5218451ff1feec7e3a13f..4a09063ce1d215492c313a2ce54a10418cf63d8b 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2411,8 +2411,7 @@ static int ext4_add_nondir(handle_t *handle,
         int err = ext4_add_entry(handle, dentry, inode);
         if (!err) {
                 ext4_mark_inode_dirty(handle, inode);
-               unlock_new_inode(inode);
-               d_instantiate(dentry, inode);
+               d_instantiate_new(dentry, inode);
                 return 0;
         }
         drop_nlink(inode);
@@ -2651,8 +2650,7 @@ out_clear_inode:
         err = ext4_mark_inode_dirty(handle, dir);
         if (err)
                 goto out_clear_inode;
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         if (IS_DIRSYNC(dir))
                 ext4_handle_sync(handle);
  
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c

index d5098efe577c0adfb5798498bbe561f0b637f628..75e37fd720b2ba999c05dbb0219f7f4850d6f25e 100644 (file)
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -294,8 +294,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
  
         alloc_nid_done(sbi, ino);
  
-       d_instantiate(dentry, inode);
-       unlock_new_inode(inode);
+       d_instantiate_new(dentry, inode);
  
         if (IS_DIRSYNC(dir))
                 f2fs_sync_fs(sbi->sb, 1);
@@ -597,8 +596,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
         err = page_symlink(inode, disk_link.name, disk_link.len);
  
  err_out:
-       d_instantiate(dentry, inode);
-       unlock_new_inode(inode);
+       d_instantiate_new(dentry, inode);
  
         /*
          * Let's flush symlink data in order to avoid broken symlink as much as
@@ -661,8 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  
         alloc_nid_done(sbi, inode->i_ino);
  
-       d_instantiate(dentry, inode);
-       unlock_new_inode(inode);
+       d_instantiate_new(dentry, inode);
  
         if (IS_DIRSYNC(dir))
                 f2fs_sync_fs(sbi->sb, 1);
@@ -713,8 +710,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
  
         alloc_nid_done(sbi, inode->i_ino);
  
-       d_instantiate(dentry, inode);
-       unlock_new_inode(inode);
+       d_instantiate_new(dentry, inode);
  
         if (IS_DIRSYNC(dir))
                 f2fs_sync_fs(sbi->sb, 1);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c

index 513c357c734b0d838f331bebc71b1deeb9f8f57f..a6c0f54c48c30f25315865f8080d412fb428c00a 100644 (file)
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -588,6 +588,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
         return 0;
  
  out_put_hidden_dir:
+       cancel_delayed_work_sync(&sbi->sync_work);
         iput(sbi->hidden_dir);
  out_put_root:
         dput(sb->s_root);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c

index 0a754f38462e9623e5608e65f95606abfef453be..e5a6deb38e1e1be47803250b3de5d115ce5c7e88 100644 (file)
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
                   __func__, inode->i_ino, inode->i_mode, inode->i_nlink,
                   f->inocache->pino_nlink, inode->i_mapping->nrpages);
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         return 0;
  
   fail:
@@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
         mutex_unlock(&dir_f->sem);
         jffs2_complete_reservation(c);
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         return 0;
  
   fail:
@@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
         mutex_unlock(&dir_f->sem);
         jffs2_complete_reservation(c);
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         return 0;
  
   fail:
@@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
         mutex_unlock(&dir_f->sem);
         jffs2_complete_reservation(c);
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         return 0;
  
   fail:
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c

index b41596d71858b4a1fcaf4b3e133ebc7b72d48afd..56c3fcbfe80ed0b69156bcab2f981b4d322b0aba 100644 (file)
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
                 unlock_new_inode(ip);
                 iput(ip);
         } else {
-               unlock_new_inode(ip);
-               d_instantiate(dentry, ip);
+               d_instantiate_new(dentry, ip);
         }
  
        out2:
@@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
                 unlock_new_inode(ip);
                 iput(ip);
         } else {
-               unlock_new_inode(ip);
-               d_instantiate(dentry, ip);
+               d_instantiate_new(dentry, ip);
         }
  
        out2:
@@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
                 unlock_new_inode(ip);
                 iput(ip);
         } else {
-               unlock_new_inode(ip);
-               d_instantiate(dentry, ip);
+               d_instantiate_new(dentry, ip);
         }
  
        out2:
@@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
                 unlock_new_inode(ip);
                 iput(ip);
         } else {
-               unlock_new_inode(ip);
-               d_instantiate(dentry, ip);
+               d_instantiate_new(dentry, ip);
         }
  
        out1:
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c

index 26dd9a50f38382a069c2680260d0a619d0db60e3..ff2716f9322e673d7a3e701c764c0f859ed84051 100644 (file)
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -316,6 +316,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
  
         info->root = root;
         info->ns = ns;
+       INIT_LIST_HEAD(&info->node);
  
         sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
                          &init_user_ns, info);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index 2410b093a2e61213a86dc7be29ca281ff77f0206..b0555d7d8200f237bd2feafa58bd5303494bd3b2 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1201,6 +1201,28 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 break;
         case S_IFDIR:
                 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+               if (!host_err && unlikely(d_unhashed(dchild))) {
+                       struct dentry *d;
+                       d = lookup_one_len(dchild->d_name.name,
+                                          dchild->d_parent,
+                                          dchild->d_name.len);
+                       if (IS_ERR(d)) {
+                               host_err = PTR_ERR(d);
+                               break;
+                       }
+                       if (unlikely(d_is_negative(d))) {
+                               dput(d);
+                               err = nfserr_serverfault;
+                               goto out;
+                       }
+                       dput(resfhp->fh_dentry);
+                       resfhp->fh_dentry = dget(d);
+                       err = fh_update(resfhp);
+                       dput(dchild);
+                       dchild = d;
+                       if (err)
+                               goto out;
+               }
                 break;
         case S_IFCHR:
         case S_IFBLK:
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c

index 1a2894aa01942597e05dba587d33d42bd2da7c0a..dd52d3f82e8d673bd43dd99dfdfff6658b959357 100644 (file)
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
         int err = nilfs_add_link(dentry, inode);
  
         if (!err) {
-               d_instantiate(dentry, inode);
-               unlock_new_inode(inode);
+               d_instantiate_new(dentry, inode);
                 return 0;
         }
         inode_dec_link_count(inode);
@@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
                 goto out_fail;
  
         nilfs_mark_inode_dirty(inode);
-       d_instantiate(dentry, inode);
-       unlock_new_inode(inode);
+       d_instantiate_new(dentry, inode);
  out:
         if (!err)
                 err = nilfs_transaction_commit(dir->i_sb);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c

index 91a8889abf9ba9f4d3df399996346942f48c2555..ea8c551bcd7e4302f5b460a3c145ef2dbf0d4398 100644 (file)
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -570,16 +570,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
                      current_page, vec_len, vec_start);
  
                 len = bio_add_page(bio, page, vec_len, vec_start);
-               if (len != vec_len) {
-                       mlog(ML_ERROR, "Adding page[%d] to bio failed, "
-                            "page %p, len %d, vec_len %u, vec_start %u, "
-                            "bi_sector %llu\n", current_page, page, len,
-                            vec_len, vec_start,
-                            (unsigned long long)bio->bi_iter.bi_sector);
-                       bio_put(bio);
-                       bio = ERR_PTR(-EIO);
-                       return bio;
-               }
+               if (len != vec_len) break;
  
                 cs += vec_len / (PAGE_SIZE/spp);
                 vec_start = 0;
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c

index 6e3134e6d98a51c14a7c170ec6b98fdd7b78ff8e..1b5707c44c3f61dcf54fc6f2bc7901b420d2e322 100644 (file)
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -75,8 +75,7 @@ static int orangefs_create(struct inode *dir,
                      get_khandle_from_ino(inode),
                      dentry);
  
-       d_instantiate(dentry, inode);
-       unlock_new_inode(inode);
+       d_instantiate_new(dentry, inode);
         orangefs_set_timeout(dentry);
         ORANGEFS_I(inode)->getattr_time = jiffies - 1;
         ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
@@ -332,8 +331,7 @@ static int orangefs_symlink(struct inode *dir,
                      "Assigned symlink inode new number of %pU\n",
                      get_khandle_from_ino(inode));
  
-       d_instantiate(dentry, inode);
-       unlock_new_inode(inode);
+       d_instantiate_new(dentry, inode);
         orangefs_set_timeout(dentry);
         ORANGEFS_I(inode)->getattr_time = jiffies - 1;
         ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
@@ -402,8 +400,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
                      "Assigned dir inode new number of %pU\n",
                      get_khandle_from_ino(inode));
  
-       d_instantiate(dentry, inode);
-       unlock_new_inode(inode);
+       d_instantiate_new(dentry, inode);
         orangefs_set_timeout(dentry);
         ORANGEFS_I(inode)->getattr_time = jiffies - 1;
         ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
diff --git a/fs/proc/array.c b/fs/proc/array.c

index ae2c807fd719540312d18a94c7676ceb51b10f12..72391b3f6927716ea15de244a6fc1d2cdea8b4a5 100644 (file)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -85,6 +85,7 @@
  #include <linux/delayacct.h>
  #include <linux/seq_file.h>
  #include <linux/pid_namespace.h>
+#include <linux/prctl.h>
  #include <linux/ptrace.h>
  #include <linux/tracehook.h>
  #include <linux/string_helpers.h>
@@ -335,6 +336,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
  #ifdef CONFIG_SECCOMP
         seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
  #endif
+       seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+       switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
+       case -EINVAL:
+               seq_printf(m, "unknown");
+               break;
+       case PR_SPEC_NOT_AFFECTED:
+               seq_printf(m, "not vulnerable");
+               break;
+       case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
+               seq_printf(m, "thread force mitigated");
+               break;
+       case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
+               seq_printf(m, "thread mitigated");
+               break;
+       case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
+               seq_printf(m, "thread vulnerable");
+               break;
+       case PR_SPEC_DISABLE:
+               seq_printf(m, "globally mitigated");
+               break;
+       default:
+               seq_printf(m, "vulnerable");
+               break;
+       }
         seq_putc(m, '\n');
  }
  
diff --git a/fs/proc/base.c b/fs/proc/base.c

index 1b2ede6abcdfc97f1ab87244a18e0cb4c9035e5f..1a76d751cf3c24c2f4271b1a855ff5d7bca9ee85 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -261,7 +261,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
          * Inherently racy -- command line shares address space
          * with code and data.
          */
-       rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
+       rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
         if (rv <= 0)
                 goto out_free_page;
  
@@ -279,7 +279,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
                         int nr_read;
  
                         _count = min3(count, len, PAGE_SIZE);
-                       nr_read = access_remote_vm(mm, p, page, _count, 0);
+                       nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
                         if (nr_read < 0)
                                 rv = nr_read;
                         if (nr_read <= 0)
@@ -325,7 +325,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
                                 bool final;
  
                                 _count = min3(count, len, PAGE_SIZE);
-                               nr_read = access_remote_vm(mm, p, page, _count, 0);
+                               nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
                                 if (nr_read < 0)
                                         rv = nr_read;
                                 if (nr_read <= 0)
@@ -946,7 +946,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
                 max_len = min_t(size_t, PAGE_SIZE, count);
                 this_len = min(max_len, this_len);
  
-               retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
+               retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
  
                 if (retval <= 0) {
                         ret = retval;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c

index bd39a998843da62db4b634800813c3de29d969c7..5089dac0266020d705e54dcb8f06ca1a998ccec2 100644 (file)
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
         reiserfs_update_inode_transaction(inode);
         reiserfs_update_inode_transaction(dir);
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         retval = journal_end(&th);
  
  out_failed:
@@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
                 goto out_failed;
         }
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         retval = journal_end(&th);
  
  out_failed:
@@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
         /* the above add_entry did not update dir's stat data */
         reiserfs_update_sd(&th, dir);
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         retval = journal_end(&th);
  out_failed:
         reiserfs_write_unlock(dir->i_sb);
@@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
                 goto out_failed;
         }
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         retval = journal_end(&th);
  out_failed:
         reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/seq_file.c b/fs/seq_file.c

index c6c27f1f9c9850634700e4898adae6ab7755e113..4cc090b50cc528a2185a567656f835790e0f09bc 100644 (file)
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -709,11 +709,6 @@ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
         if (m->count + width >= m->size)
                 goto overflow;
  
-       if (num < 10) {
-               m->buf[m->count++] = num + '0';
-               return;
-       }
-
         len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
         if (!len)
                 goto overflow;
diff --git a/fs/super.c b/fs/super.c

index 122c402049a25b29c5ed1e9aa0ef6ace1f657e6c..4b5b562176d074fe924329da795661650369b831 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -121,13 +121,23 @@ static unsigned long super_cache_count(struct shrinker *shrink,
         sb = container_of(shrink, struct super_block, s_shrink);
  
         /*
-        * Don't call trylock_super as it is a potential
-        * scalability bottleneck. The counts could get updated
-        * between super_cache_count and super_cache_scan anyway.
-        * Call to super_cache_count with shrinker_rwsem held
-        * ensures the safety of call to list_lru_shrink_count() and
-        * s_op->nr_cached_objects().
+        * We don't call trylock_super() here as it is a scalability bottleneck,
+        * so we're exposed to partial setup state. The shrinker rwsem does not
+        * protect filesystem operations backing list_lru_shrink_count() or
+        * s_op->nr_cached_objects(). Counts can change between
+        * super_cache_count and super_cache_scan, so we really don't need locks
+        * here.
+        *
+        * However, if we are currently mounting the superblock, the underlying
+        * filesystem might be in a state of partial construction and hence it
+        * is dangerous to access it.  trylock_super() uses a SB_BORN check to
+        * avoid this situation, so do the same here. The memory barrier is
+        * matched with the one in mount_fs() as we don't hold locks here.
          */
+       if (!(sb->s_flags & SB_BORN))
+               return 0;
+       smp_rmb();
+
         if (sb->s_op && sb->s_op->nr_cached_objects)
                 total_objects = sb->s_op->nr_cached_objects(sb, sc);
  
@@ -1272,6 +1282,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
         sb = root->d_sb;
         BUG_ON(!sb);
         WARN_ON(!sb->s_bdi);
+
+       /*
+        * Write barrier is for super_cache_count(). We place it before setting
+        * SB_BORN as the data dependency between the two functions is the
+        * superblock structure contents that we just set up, not the SB_BORN
+        * flag.
+        */
+       smp_wmb();
         sb->s_flags |= SB_BORN;
  
         error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c

index b428d317ae92a74d40ddf20ee3e64a830a04433b..92682fcc41f6c976f1d38cf3111eab509a9028d8 100644 (file)
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -25,7 +25,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
  {
         struct dentry *root;
         void *ns;
-       bool new_sb;
+       bool new_sb = false;
  
         if (!(flags & SB_KERNMOUNT)) {
                 if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
@@ -35,9 +35,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
         ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
         root = kernfs_mount_ns(fs_type, flags, sysfs_root,
                                 SYSFS_MAGIC, &new_sb, ns);
-       if (IS_ERR(root) || !new_sb)
+       if (!new_sb)
                 kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
-       else if (new_sb)
+       else if (!IS_ERR(root))
                 root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
  
         return root;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c

index 0458dd47e105b0c2724064bd816fd4f7ef24e28a..c586026508db82d0a27a1df1b964bcbf3fcec45c 100644 (file)
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -622,8 +622,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
         if (fibh.sbh != fibh.ebh)
                 brelse(fibh.ebh);
         brelse(fibh.sbh);
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
  
         return 0;
  }
@@ -733,8 +732,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         inc_nlink(dir);
         dir->i_ctime = dir->i_mtime = current_time(dir);
         mark_inode_dirty(dir);
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         if (fibh.sbh != fibh.ebh)
                 brelse(fibh.ebh);
         brelse(fibh.sbh);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c

index 32545cd00cebf0fb50a403a7275efc194ef6bb4f..d5f43ba76c598dea592339f8926327401b181483 100644 (file)
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -39,8 +39,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
  {
         int err = ufs_add_link(dentry, inode);
         if (!err) {
-               unlock_new_inode(inode);
-               d_instantiate(dentry, inode);
+               d_instantiate_new(dentry, inode);
                 return 0;
         }
         inode_dec_link_count(inode);
@@ -193,8 +192,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
         if (err)
                 goto out_fail;
  
-       unlock_new_inode(inode);
-       d_instantiate(dentry, inode);
+       d_instantiate_new(dentry, inode);
         return 0;
  
  out_fail:
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index e70fb8cceceaa5d2333573e49460beba75629815..19b0c3e0e232203ffb2c8f7c60d640424542b2b7 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -312,7 +312,7 @@ restart:
         if (error <= 0)
                 return error;
  
-       error = xfs_break_layouts(inode, iolock);
+       error = xfs_break_layouts(inode, iolock, BREAK_WRITE);
         if (error)
                 return error;
  
@@ -718,6 +718,69 @@ buffered:
         return ret;
  }
  
+static void
+xfs_wait_dax_page(
+       struct inode            *inode,
+       bool                    *did_unlock)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       *did_unlock = true;
+       xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+       schedule();
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+}
+
+static int
+xfs_break_dax_layouts(
+       struct inode            *inode,
+       uint                    iolock,
+       bool                    *did_unlock)
+{
+       struct page             *page;
+
+       ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
+
+       page = dax_layout_busy_page(inode->i_mapping);
+       if (!page)
+               return 0;
+
+       return ___wait_var_event(&page->_refcount,
+                       atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
+                       0, 0, xfs_wait_dax_page(inode, did_unlock));
+}
+
+int
+xfs_break_layouts(
+       struct inode            *inode,
+       uint                    *iolock,
+       enum layout_break_reason reason)
+{
+       bool                    retry;
+       int                     error;
+
+       ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL));
+
+       do {
+               retry = false;
+               switch (reason) {
+               case BREAK_UNMAP:
+                       error = xfs_break_dax_layouts(inode, *iolock, &retry);
+                       if (error || retry)
+                               break;
+                       /* fall through */
+               case BREAK_WRITE:
+                       error = xfs_break_leased_layouts(inode, iolock, &retry);
+                       break;
+               default:
+                       WARN_ON_ONCE(1);
+                       error = -EINVAL;
+               }
+       } while (error == 0 && retry);
+
+       return error;
+}
+
  #define        XFS_FALLOC_FL_SUPPORTED                                         \
                 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
                  FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
@@ -734,7 +797,7 @@ xfs_file_fallocate(
         struct xfs_inode        *ip = XFS_I(inode);
         long                    error;
         enum xfs_prealloc_flags flags = 0;
-       uint                    iolock = XFS_IOLOCK_EXCL;
+       uint                    iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
         loff_t                  new_size = 0;
         bool                    do_file_insert = false;
  
@@ -744,13 +807,10 @@ xfs_file_fallocate(
                 return -EOPNOTSUPP;
  
         xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock);
+       error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
         if (error)
                 goto out_unlock;
  
-       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
-       iolock |= XFS_MMAPLOCK_EXCL;
-
         if (mode & FALLOC_FL_PUNCH_HOLE) {
                 error = xfs_free_file_space(ip, offset, len);
                 if (error)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index 1eebc53df7d72f91e5b465d0b2295c3dafccfd89..e5b849815ce1865597a1e3f991c61cd5a1cc8c63 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -378,6 +378,20 @@ static inline void xfs_ifunlock(struct xfs_inode *ip)
  #define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) \
                                         >> XFS_ILOCK_SHIFT)
  
+/*
+ * Layouts are broken in the BREAK_WRITE case to ensure that
+ * layout-holders do not collide with local writes. Additionally,
+ * layouts are broken in the BREAK_UNMAP case to make sure the
+ * layout-holder has a consistent view of the file's extent map. While
+ * BREAK_WRITE breaks can be satisfied by recalling FL_LAYOUT leases,
+ * BREAK_UNMAP breaks additionally require waiting for busy dax-pages to
+ * go idle.
+ */
+enum layout_break_reason {
+        BREAK_WRITE,
+        BREAK_UNMAP,
+};
+
  /*
   * For multiple groups support: if S_ISGID bit is set in the parent
   * directory, group of new file is set to that of the parent, and
@@ -443,6 +457,8 @@ enum xfs_prealloc_flags {
  
  int    xfs_update_prealloc_flags(struct xfs_inode *ip,
                                   enum xfs_prealloc_flags flags);
+int    xfs_break_layouts(struct inode *inode, uint *iolock,
+               enum layout_break_reason reason);
  
  /* from xfs_iops.c */
  extern void xfs_setup_inode(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index 89fb1eb80aae88b3a47e4fd63cb64857639fbb76..91e73d6630990461ed058e34ab2018e217b01b83 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -39,7 +39,6 @@
  #include "xfs_icache.h"
  #include "xfs_symlink.h"
  #include "xfs_trans.h"
-#include "xfs_pnfs.h"
  #include "xfs_acl.h"
  #include "xfs_btree.h"
  #include <linux/fsmap.h>
@@ -614,7 +613,7 @@ xfs_ioc_space(
         struct xfs_inode        *ip = XFS_I(inode);
         struct iattr            iattr;
         enum xfs_prealloc_flags flags = 0;
-       uint                    iolock = XFS_IOLOCK_EXCL;
+       uint                    iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
         int                     error;
  
         /*
@@ -644,13 +643,10 @@ xfs_ioc_space(
                 return error;
  
         xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock);
+       error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
         if (error)
                 goto out_unlock;
  
-       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
-       iolock |= XFS_MMAPLOCK_EXCL;
-
         switch (bf->l_whence) {
         case 0: /*SEEK_SET*/
                 break;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index a3ed3c811dfa4c8291a3a079ee35b37c5e08f318..ce0c1f9466a8ca88fd95877ba51beef86c897451 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -37,7 +37,6 @@
  #include "xfs_da_btree.h"
  #include "xfs_dir2.h"
  #include "xfs_trans_space.h"
-#include "xfs_pnfs.h"
  #include "xfs_iomap.h"
  
  #include <linux/capability.h>
@@ -1030,14 +1029,19 @@ xfs_vn_setattr(
         int                     error;
  
         if (iattr->ia_valid & ATTR_SIZE) {
-               struct xfs_inode        *ip = XFS_I(d_inode(dentry));
-               uint                    iolock = XFS_IOLOCK_EXCL;
+               struct inode            *inode = d_inode(dentry);
+               struct xfs_inode        *ip = XFS_I(inode);
+               uint                    iolock;
  
-               error = xfs_break_layouts(d_inode(dentry), &iolock);
-               if (error)
+               xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+               iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
+
+               error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
+               if (error) {
+                       xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
                         return error;
+               }
  
-               xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
                 error = xfs_vn_setattr_size(dentry, iattr);
                 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
         } else {
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c

index aa6c5c193f4581d1c233578b34a897b871a1bf94..f44c3599527d07441fc6eb689c9d442e29add600 100644 (file)
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -31,19 +31,20 @@
   * rules in the page fault path we don't bother.
   */
  int
-xfs_break_layouts(
+xfs_break_leased_layouts(
         struct inode            *inode,
-       uint                    *iolock)
+       uint                    *iolock,
+       bool                    *did_unlock)
  {
         struct xfs_inode        *ip = XFS_I(inode);
         int                     error;
  
-       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL));
-
         while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
                 xfs_iunlock(ip, *iolock);
+               *did_unlock = true;
                 error = break_layout(inode, true);
-               *iolock = XFS_IOLOCK_EXCL;
+               *iolock &= ~XFS_IOLOCK_SHARED;
+               *iolock |= XFS_IOLOCK_EXCL;
                 xfs_ilock(ip, *iolock);
         }
  
@@ -120,8 +121,8 @@ xfs_fs_map_blocks(
          * Lock out any other I/O before we flush and invalidate the pagecache,
          * and then hand out a layout to the remote system.  This is very
          * similar to direct I/O, except that the synchronization is much more
-        * complicated.  See the comment near xfs_break_layouts for a detailed
-        * explanation.
+        * complicated.  See the comment near xfs_break_leased_layouts
+        * for a detailed explanation.
          */
         xfs_ilock(ip, XFS_IOLOCK_EXCL);
  
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h

index bf45951e28fe5428ea484ca1d73158aa382f3bb3..940c6c2ad88c5589b7526ea5fb6e1c13fcc75410 100644 (file)
--- a/fs/xfs/xfs_pnfs.h
+++ b/fs/xfs/xfs_pnfs.h
@@ -9,10 +9,11 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
  int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps,
                 struct iattr *iattr);
  
-int xfs_break_layouts(struct inode *inode, uint *iolock);
+int xfs_break_leased_layouts(struct inode *inode, uint *iolock,
+               bool *did_unlock);
  #else
  static inline int
-xfs_break_layouts(struct inode *inode, uint *iolock)
+xfs_break_leased_layouts(struct inode *inode, uint *iolock, bool *did_unlock)
  {
         return 0;
  }
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h

index 7e61c395fddffea20ed6dfc8a92ad3e2f848a20e..df36b1b08af0554fd60ac05c6ee8233154db70d2 100644 (file)
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -142,10 +142,11 @@ struct bpf_verifier_state_list {
  struct bpf_insn_aux_data {
         union {
                 enum bpf_reg_type ptr_type;     /* pointer type for load/store insns */
-               struct bpf_map *map_ptr;        /* pointer for call insn into lookup_elem */
+               unsigned long map_state;        /* pointer/poison value for maps */
                 s32 call_imm;                   /* saved imm field of call insn */
         };
         int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
+       int sanitize_stack_off; /* stack slot to be cleared */
         bool seen; /* this insn was processed by the verifier */
  };
  
diff --git a/include/linux/cpu.h b/include/linux/cpu.h

index 7b01bc11c6929b7aaf4906a1587ee1b4e19b2e70..a97a63eef59f629bc54e0c7bc6be4d6c4b966bcf 100644 (file)
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -53,6 +53,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev,
                                    struct device_attribute *attr, char *buf);
  extern ssize_t cpu_show_spectre_v2(struct device *dev,
                                    struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
+                                         struct device_attribute *attr, char *buf);
  
  extern __printf(4, 5)
  struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/dax.h b/include/linux/dax.h

index a43b396fb336f65081927d543e320187f6ad5c2b..b51db4264c83bd4aa7a0e886df6efd9e2d1676c9 100644 (file)
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -86,6 +86,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
  struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
  int dax_writeback_mapping_range(struct address_space *mapping,
                 struct block_device *bdev, struct writeback_control *wbc);
+
+struct page *dax_layout_busy_page(struct address_space *mapping);
  #else
  static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
  {
@@ -106,6 +108,11 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
         return NULL;
  }
  
+static inline struct page *dax_layout_busy_page(struct address_space *mapping)
+{
+       return NULL;
+}
+
  static inline int dax_writeback_mapping_range(struct address_space *mapping,
                 struct block_device *bdev, struct writeback_control *wbc)
  {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h

index 94acbde17bb190e3299c35e92ad96525f575812b..66c6e17e61e5af907b1c8bec24021f22aa819824 100644 (file)
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -224,6 +224,7 @@ extern seqlock_t rename_lock;
   * These are the low-level FS interfaces to the dcache..
   */
  extern void d_instantiate(struct dentry *, struct inode *);
+extern void d_instantiate_new(struct dentry *, struct inode *);
  extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
  extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *);
  extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
diff --git a/include/linux/efi.h b/include/linux/efi.h

index f1b7d68ac4600a3df4ec3280ef2c25086dfc6670..3016d8c456bcfd4a8f2dfe139aab476e9387dc63 100644 (file)
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -395,8 +395,8 @@ typedef struct {
         u32 attributes;
         u32 get_bar_attributes;
         u32 set_bar_attributes;
-       uint64_t romsize;
-       void *romimage;
+       u64 romsize;
+       u32 romimage;
  } efi_pci_io_protocol_32;
  
  typedef struct {
@@ -415,8 +415,8 @@ typedef struct {
         u64 attributes;
         u64 get_bar_attributes;
         u64 set_bar_attributes;
-       uint64_t romsize;
-       void *romimage;
+       u64 romsize;
+       u64 romimage;
  } efi_pci_io_protocol_64;
  
  typedef struct {
diff --git a/include/linux/gfp.h b/include/linux/gfp.h

index 1a4582b44d3273d41d5e0e65ca38857cee14561e..fc5ab85278d5fb2ff1f32c1f66b1aca1befc21a0 100644 (file)
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -464,7 +464,7 @@ static inline struct page *
  __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
  {
         VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
-       VM_WARN_ON(!node_online(nid));
+       VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid));
  
         return __alloc_pages(gfp_mask, order, nid);
  }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index 6930c63126c78a9ef665b5b5653a60a8773b4d4c..6d6e79c59e68fa7fd5387f48814341082d6b8526 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1045,13 +1045,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
  
  #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  
-#ifdef CONFIG_S390
-#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
-#elif defined(CONFIG_ARM64)
-#define KVM_MAX_IRQ_ROUTES 4096
-#else
-#define KVM_MAX_IRQ_ROUTES 1024
-#endif
+#define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */
  
  bool kvm_arch_can_set_irq_routing(struct kvm *kvm);
  int kvm_set_irq_routing(struct kvm *kvm,
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h

index e0e49b5b1ee138344185e6e1ae5796fc4ecf4fdf..2b0265265c286b9fffb53e290039bf0686046908 100644 (file)
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -216,6 +216,9 @@ void put_online_mems(void);
  void mem_hotplug_begin(void);
  void mem_hotplug_done(void);
  
+extern void set_zone_contiguous(struct zone *zone);
+extern void clear_zone_contiguous(struct zone *zone);
+
  #else /* ! CONFIG_MEMORY_HOTPLUG */
  #define pfn_to_online_page(pfn)                        \
  ({                                             \
diff --git a/include/linux/memremap.h b/include/linux/memremap.h

index 7b4899c06f49c7c727da4ae64eb83d10e7a14e83..5ebfff65da4d4ae2f2470d49662cc0393ba17817 100644 (file)
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -1,7 +1,6 @@
  /* SPDX-License-Identifier: GPL-2.0 */
  #ifndef _LINUX_MEMREMAP_H_
  #define _LINUX_MEMREMAP_H_
-#include <linux/mm.h>
  #include <linux/ioport.h>
  #include <linux/percpu-refcount.h>
  
@@ -30,13 +29,6 @@ struct vmem_altmap {
   * Specialize ZONE_DEVICE memory into multiple types each having differents
   * usage.
   *
- * MEMORY_DEVICE_HOST:
- * Persistent device memory (pmem): struct page might be allocated in different
- * memory and architecture might want to perform special actions. It is similar
- * to regular memory, in that the CPU can access it transparently. However,
- * it is likely to have different bandwidth and latency than regular memory.
- * See Documentation/nvdimm/nvdimm.txt for more information.
- *
   * MEMORY_DEVICE_PRIVATE:
   * Device memory that is not directly addressable by the CPU: CPU can neither
   * read nor write private memory. In this case, we do still have struct pages
@@ -53,11 +45,19 @@ struct vmem_altmap {
   * driver can hotplug the device memory using ZONE_DEVICE and with that memory
   * type. Any page of a process can be migrated to such memory. However no one
   * should be allow to pin such memory so that it can always be evicted.
+ *
+ * MEMORY_DEVICE_FS_DAX:
+ * Host memory that has similar access semantics as System RAM i.e. DMA
+ * coherent and supports page pinning. In support of coordinating page
+ * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a
+ * wakeup event whenever a page is unpinned and becomes idle. This
+ * wakeup is used to coordinate physical address space management (ex:
+ * fs truncate/hole punch) vs pinned pages (ex: device dma).
   */
  enum memory_type {
-       MEMORY_DEVICE_HOST = 0,
-       MEMORY_DEVICE_PRIVATE,
+       MEMORY_DEVICE_PRIVATE = 1,
         MEMORY_DEVICE_PUBLIC,
+       MEMORY_DEVICE_FS_DAX,
  };
  
  /*
@@ -129,8 +129,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
  
  unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
  void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
-
-static inline bool is_zone_device_page(const struct page *page);
  #else
  static inline void *devm_memremap_pages(struct device *dev,
                 struct dev_pagemap *pgmap)
@@ -161,20 +159,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
  }
  #endif /* CONFIG_ZONE_DEVICE */
  
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-static inline bool is_device_private_page(const struct page *page)
-{
-       return is_zone_device_page(page) &&
-               page->pgmap->type == MEMORY_DEVICE_PRIVATE;
-}
-
-static inline bool is_device_public_page(const struct page *page)
-{
-       return is_zone_device_page(page) &&
-               page->pgmap->type == MEMORY_DEVICE_PUBLIC;
-}
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
  static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
  {
         if (pgmap)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h

index 2a156c5dfadd1d70a2dc881f97ee9af4131b2e18..d703774982cadb352359769e1a69c6f55dc94acf 100644 (file)
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1286,17 +1286,7 @@ enum {
  static inline const struct cpumask *
  mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector)
  {
-       struct irq_desc *desc;
-       unsigned int irq;
-       int eqn;
-       int err;
-
-       err = mlx5_vector2eqn(dev, vector, &eqn, &irq);
-       if (err)
-               return NULL;
-
-       desc = irq_to_desc(irq);
-       return desc->affinity_hint;
+       return dev->priv.irq_info[vector].mask;
  }
  
  #endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 1ac1f06a4be6b22faf3883c760515a042a6d347e..274d5242bd0d8443124fcee6aec5b15cf637ee99 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -821,27 +821,65 @@ static inline bool is_zone_device_page(const struct page *page)
  }
  #endif
  
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-void put_zone_device_private_or_public_page(struct page *page);
-DECLARE_STATIC_KEY_FALSE(device_private_key);
-#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key)
-static inline bool is_device_private_page(const struct page *page);
-static inline bool is_device_public_page(const struct page *page);
-#else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-static inline void put_zone_device_private_or_public_page(struct page *page)
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+void dev_pagemap_get_ops(void);
+void dev_pagemap_put_ops(void);
+void __put_devmap_managed_page(struct page *page);
+DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
+static inline bool put_devmap_managed_page(struct page *page)
+{
+       if (!static_branch_unlikely(&devmap_managed_key))
+               return false;
+       if (!is_zone_device_page(page))
+               return false;
+       switch (page->pgmap->type) {
+       case MEMORY_DEVICE_PRIVATE:
+       case MEMORY_DEVICE_PUBLIC:
+       case MEMORY_DEVICE_FS_DAX:
+               __put_devmap_managed_page(page);
+               return true;
+       default:
+               break;
+       }
+       return false;
+}
+
+static inline bool is_device_private_page(const struct page *page)
+{
+       return is_zone_device_page(page) &&
+               page->pgmap->type == MEMORY_DEVICE_PRIVATE;
+}
+
+static inline bool is_device_public_page(const struct page *page)
+{
+       return is_zone_device_page(page) &&
+               page->pgmap->type == MEMORY_DEVICE_PUBLIC;
+}
+
+#else /* CONFIG_DEV_PAGEMAP_OPS */
+static inline void dev_pagemap_get_ops(void)
  {
  }
-#define IS_HMM_ENABLED 0
+
+static inline void dev_pagemap_put_ops(void)
+{
+}
+
+static inline bool put_devmap_managed_page(struct page *page)
+{
+       return false;
+}
+
  static inline bool is_device_private_page(const struct page *page)
  {
         return false;
  }
+
  static inline bool is_device_public_page(const struct page *page)
  {
         return false;
  }
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
  
  static inline void get_page(struct page *page)
  {
@@ -859,16 +897,13 @@ static inline void put_page(struct page *page)
         page = compound_head(page);
  
         /*
-        * For private device pages we need to catch refcount transition from
-        * 2 to 1, when refcount reach one it means the private device page is
-        * free and we need to inform the device driver through callback. See
+        * For devmap managed pages we need to catch refcount transition from
+        * 2 to 1, when refcount reach one it means the page is free and we
+        * need to inform the device driver through callback. See
          * include/linux/memremap.h and HMM for details.
          */
-       if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) ||
-           unlikely(is_device_public_page(page)))) {
-               put_zone_device_private_or_public_page(page);
+       if (put_devmap_managed_page(page))
                 return;
-       }
  
         if (put_page_testzero(page))
                 __put_page(page);
@@ -2109,7 +2144,6 @@ extern void setup_per_cpu_pageset(void);
  
  extern void zone_pcp_update(struct zone *zone);
  extern void zone_pcp_reset(struct zone *zone);
-extern void setup_zone_pageset(struct zone *zone);
  
  /* page_alloc.c */
  extern int min_free_kbytes;
@@ -2466,6 +2500,13 @@ static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
         return VM_FAULT_NOPAGE;
  }
  
+static inline vm_fault_t vmf_error(int err)
+{
+       if (err == -ENOMEM)
+               return VM_FAULT_OOM;
+       return VM_FAULT_SIGBUS;
+}
+
  struct page *follow_page_mask(struct vm_area_struct *vma,
                               unsigned long address, unsigned int foll_flags,
                               unsigned int *page_mask);
@@ -2493,6 +2534,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
  #define FOLL_MLOCK     0x1000  /* lock present pages */
  #define FOLL_REMOTE    0x2000  /* we are working on non-current tsk/mm */
  #define FOLL_COW       0x4000  /* internal GUP flag */
+#define FOLL_ANON      0x8000  /* don't do file mappings */
  
  static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
  {
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h

index b5b43f94f311626ee364157515c4342aba976e6f..01b990e4b228a90ef26bc302d8b8476293a81869 100644 (file)
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -312,7 +312,7 @@ void map_destroy(struct mtd_info *mtd);
  ({                                                                     \
         int i, ret = 1;                                                 \
         for (i = 0; i < map_words(map); i++) {                          \
-               if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) {       \
+               if (((val1).x[i] & (val2).x[i]) != (val3).x[i]) {       \
                         ret = 0;                                        \
                         break;                                          \
                 }                                                       \
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h

index 5dad59b312440a9c6cf44160276f162a82130738..17c919436f48726450069ca1e589ccd3fc48e712 100644 (file)
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -867,12 +867,18 @@ struct nand_op_instr {
   * tBERS (during an erase) which all of them are u64 values that cannot be
   * divided by usual kernel macros and must be handled with the special
   * DIV_ROUND_UP_ULL() macro.
+ *
+ * Cast to type of dividend is needed here to guarantee that the result won't
+ * be an unsigned long long when the dividend is an unsigned long (or smaller),
+ * which is what the compiler does when it sees ternary operator with 2
+ * different return types (picks the largest type to make sure there's no
+ * loss).
   */
-#define __DIVIDE(dividend, divisor) ({                                 \
-       sizeof(dividend) == sizeof(u32) ?                               \
-               DIV_ROUND_UP(dividend, divisor) :                       \
-               DIV_ROUND_UP_ULL(dividend, divisor);                    \
-               })
+#define __DIVIDE(dividend, divisor) ({                                         \
+       (__typeof__(dividend))(sizeof(dividend) <= sizeof(unsigned long) ?      \
+                              DIV_ROUND_UP(dividend, divisor) :                \
+                              DIV_ROUND_UP_ULL(dividend, divisor));            \
+       })
  #define PSEC_TO_NSEC(x) __DIVIDE(x, 1000)
  #define PSEC_TO_MSEC(x) __DIVIDE(x, 1000000000)
  
diff --git a/include/linux/node.h b/include/linux/node.h

index 41f171861dccdc4aa6727303e2261e702bbfe6bd..6d336e38d155e46ad4deff6dbabb7bf413795b5b 100644 (file)
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -32,9 +32,11 @@ extern struct node *node_devices[];
  typedef  void (*node_registration_func_t)(struct node *);
  
  #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
-extern int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages);
+extern int link_mem_sections(int nid, unsigned long start_pfn,
+                            unsigned long nr_pages, bool check_nid);
  #else
-static inline int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
+static inline int link_mem_sections(int nid, unsigned long start_pfn,
+                                   unsigned long nr_pages, bool check_nid)
  {
         return 0;
  }
@@ -57,7 +59,7 @@ static inline int register_one_node(int nid)
                 if (error)
                         return error;
                 /* link memory sections under this node */
-               error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages);
+               error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages, true);
         }
  
         return error;
diff --git a/include/linux/nospec.h b/include/linux/nospec.h

index e791ebc65c9c0776325cdc5e72de5d995038d7e0..0c5ef54fd4162830b55aa676c1ecae4ea6ac23f5 100644 (file)
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -7,6 +7,8 @@
  #define _LINUX_NOSPEC_H
  #include <asm/barrier.h>
  
+struct task_struct;
+
  /**
   * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
   * @index: array element index
@@ -55,4 +57,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
                                                                         \
         (typeof(_i)) (_i & _mask);                                      \
  })
+
+/* Speculation control prctl */
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which);
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+                            unsigned long ctrl);
+/* Speculation control for seccomp enforced mitigation */
+void arch_seccomp_spec_mitigate(struct task_struct *task);
+
  #endif /* _LINUX_NOSPEC_H */
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h

index b1f37a89e368683233499bc5942b43e353b637a3..79b99d653e030d113e4401fc26c7b47e81dcff8c 100644 (file)
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -133,7 +133,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
         lock_release(&sem->rw_sem.dep_map, 1, ip);
  #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
         if (!read)
-               sem->rw_sem.owner = NULL;
+               sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN;
  #endif
  }
  
@@ -141,6 +141,10 @@ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
                                         bool read, unsigned long ip)
  {
         lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+       if (!read)
+               sem->rw_sem.owner = current;
+#endif
  }
  
  #endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h

index 56707d5ff6adddce20b7ae417d91abfd9c393744..ab93b6eae6968e4eef4b29b2f1f134c4b21fe5d1 100644 (file)
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -44,6 +44,12 @@ struct rw_semaphore {
  #endif
  };
  
+/*
+ * Setting bit 0 of the owner field with other non-zero bits will indicate
+ * that the rwsem is writer-owned with an unknown owner.
+ */
+#define RWSEM_OWNER_UNKNOWN    ((struct task_struct *)-1L)
+
  extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
  extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
  extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
diff --git a/include/linux/sched.h b/include/linux/sched.h

index c2413703f45dcf005c7601b1e8c8c24c51d2a844..ca3f3eae8980c3981509e016134064ac00fea83d 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1433,7 +1433,8 @@ static inline bool is_percpu_thread(void)
  #define PFA_NO_NEW_PRIVS               0       /* May not gain new privileges. */
  #define PFA_SPREAD_PAGE                        1       /* Spread page cache over cpuset */
  #define PFA_SPREAD_SLAB                        2       /* Spread some slab caches over cpuset */
-
+#define PFA_SPEC_SSB_DISABLE           3       /* Speculative Store Bypass disabled */
+#define PFA_SPEC_SSB_FORCE_DISABLE     4       /* Speculative Store Bypass force disabled*/
  
  #define TASK_PFA_TEST(name, func)                                      \
         static inline bool task_##func(struct task_struct *p)           \
@@ -1458,6 +1459,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
  TASK_PFA_SET(SPREAD_SLAB, spread_slab)
  TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
  
+TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+
+TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+
  static inline void
  current_restore_flags(unsigned long orig_flags, unsigned long flags)
  {
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h

index c723a5c4e3ffdfc151fbf2e82c20a4c7ba7eeff7..e5320f6c865439c0b8910afc17fcfd1598bcc4fb 100644 (file)
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -4,8 +4,9 @@
  
  #include <uapi/linux/seccomp.h>
  
-#define SECCOMP_FILTER_FLAG_MASK       (SECCOMP_FILTER_FLAG_TSYNC | \
-                                        SECCOMP_FILTER_FLAG_LOG)
+#define SECCOMP_FILTER_FLAG_MASK       (SECCOMP_FILTER_FLAG_TSYNC      | \
+                                        SECCOMP_FILTER_FLAG_LOG        | \
+                                        SECCOMP_FILTER_FLAG_SPEC_ALLOW)
  
  #ifdef CONFIG_SECCOMP
  
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h

index cd368d1b8cb8439fffb094fa00b228a88142ea14..a1e28dd5d0bffd45f819b819f24594341fba26bd 100644 (file)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -170,6 +170,7 @@ struct nft_data_desc {
  int nft_data_init(const struct nft_ctx *ctx,
                   struct nft_data *data, unsigned int size,
                   struct nft_data_desc *desc, const struct nlattr *nla);
+void nft_data_hold(const struct nft_data *data, enum nft_data_types type);
  void nft_data_release(const struct nft_data *data, enum nft_data_types type);
  int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
                   enum nft_data_types type, unsigned int len);
@@ -736,6 +737,10 @@ struct nft_expr_ops {
         int                             (*init)(const struct nft_ctx *ctx,
                                                 const struct nft_expr *expr,
                                                 const struct nlattr * const tb[]);
+       void                            (*activate)(const struct nft_ctx *ctx,
+                                                   const struct nft_expr *expr);
+       void                            (*deactivate)(const struct nft_ctx *ctx,
+                                                     const struct nft_expr *expr);
         void                            (*destroy)(const struct nft_ctx *ctx,
                                                    const struct nft_expr *expr);
         int                             (*dump)(struct sk_buff *skb,
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h

index 28b996d6349072fb031de91e10115b688d89b42d..35498e613ff5458e5433facbf409e9c90a5153b7 100644 (file)
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -103,6 +103,8 @@ void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int);
  /*
   * sctp/socket.c
   */
+int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr,
+                     int addr_len, int flags);
  int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
  int sctp_inet_listen(struct socket *sock, int backlog);
  void sctp_write_space(struct sock *sk);
diff --git a/include/net/tls.h b/include/net/tls.h

index b400d0bb744831b10eb6271721163c1b75c5b510..f5fb16da38606ba07b392a667845847df9be95cc 100644 (file)
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -97,6 +97,9 @@ struct tls_sw_context {
         u8 control;
         bool decrypted;
  
+       char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE];
+       char rx_aad_plaintext[TLS_AAD_SPACE_SIZE];
+
         /* Sending context */
         char aad_space[TLS_AAD_SPACE_SIZE];
  
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h

index 23159dd5be184bc5db37f4d34a30653afe8b97fd..a1fd63871d17289ee61ac1e22e7f2aeed42d82cf 100644 (file)
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -48,7 +48,6 @@ struct ib_umem {
         int                     writable;
         int                     hugetlb;
         struct work_struct      work;
-       struct pid             *pid;
         struct mm_struct       *mm;
         unsigned long           diff;
         struct ib_umem_odp     *odp_data;
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h

index 4a4201d997a73c9c769b23b8c34d225c828ab786..095383a4bd1a6f560944b65266eacbc118df09f4 100644 (file)
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -411,13 +411,13 @@ static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs
  static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle,
                                         u16 idx)
  {
-       struct ib_uobject *uobj =
-               uverbs_attr_get(attrs_bundle, idx)->obj_attr.uobject;
+       const struct uverbs_attr *attr;
  
-       if (IS_ERR(uobj))
-               return uobj;
+       attr = uverbs_attr_get(attrs_bundle, idx);
+       if (IS_ERR(attr))
+               return ERR_CAST(attr);
  
-       return uobj->object;
+       return attr->obj_attr.uobject->object;
  }
  
  static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h

index f0820554caa9d0630410dc12fbd06bae76f4eeac..d0a341bc45404b06c08f434d26030dd8be3138db 100644 (file)
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -575,6 +575,48 @@ TRACE_EVENT(afs_protocol_error,
                       __entry->call, __entry->error, __entry->where)
             );
  
+TRACE_EVENT(afs_cm_no_server,
+           TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx),
+
+           TP_ARGS(call, srx),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int,                       call    )
+                   __field(unsigned int,                       op_id   )
+                   __field_struct(struct sockaddr_rxrpc,       srx     )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call->debug_id;
+                   __entry->op_id = call->operation_ID;
+                   memcpy(&__entry->srx, srx, sizeof(__entry->srx));
+                          ),
+
+           TP_printk("c=%08x op=%u %pISpc",
+                     __entry->call, __entry->op_id, &__entry->srx.transport)
+           );
+
+TRACE_EVENT(afs_cm_no_server_u,
+           TP_PROTO(struct afs_call *call, const uuid_t *uuid),
+
+           TP_ARGS(call, uuid),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int,                       call    )
+                   __field(unsigned int,                       op_id   )
+                   __field_struct(uuid_t,                      uuid    )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call->debug_id;
+                   __entry->op_id = call->operation_ID;
+                   memcpy(&__entry->uuid, uuid, sizeof(__entry->uuid));
+                          ),
+
+           TP_printk("c=%08x op=%u %pU",
+                     __entry->call, __entry->op_id, &__entry->uuid)
+           );
+
  #endif /* _TRACE_AFS_H */
  
  /* This part must be outside protection */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h

index bc01e06bc7167fb2557ccb94e21c58e55f9c8fdb..0be866c91f62d055f5a6a3ab25a263a11f111916 100644 (file)
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -435,7 +435,9 @@ TRACE_EVENT(sched_pi_setprio,
                 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
                 __entry->pid            = tsk->pid;
                 __entry->oldprio        = tsk->prio;
-               __entry->newprio        = pi_task ? pi_task->prio : tsk->prio;
+               __entry->newprio        = pi_task ?
+                               min(tsk->normal_prio, pi_task->prio) :
+                               tsk->normal_prio;
                 /* XXX SCHED_DEADLINE bits missing */
         ),
  
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h

index 7dd8f34c37dfea26f8ec460d3937a46c8f109f4d..fdcf88bcf0ea3dec3df105aa7e7d40fe2aaaaf0c 100644 (file)
--- a/include/trace/events/xen.h
+++ b/include/trace/events/xen.h
@@ -352,22 +352,6 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd,
  DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin);
  DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin);
  
-TRACE_EVENT(xen_mmu_flush_tlb_all,
-           TP_PROTO(int x),
-           TP_ARGS(x),
-           TP_STRUCT__entry(__array(char, x, 0)),
-           TP_fast_assign((void)x),
-           TP_printk("%s", "")
-       );
-
-TRACE_EVENT(xen_mmu_flush_tlb,
-           TP_PROTO(int x),
-           TP_ARGS(x),
-           TP_STRUCT__entry(__array(char, x, 0)),
-           TP_fast_assign((void)x),
-           TP_printk("%s", "")
-       );
-
  TRACE_EVENT(xen_mmu_flush_tlb_one_user,
             TP_PROTO(unsigned long addr),
             TP_ARGS(addr),
diff --git a/include/uapi/linux/netfilter/nf_conntrack_tcp.h b/include/uapi/linux/netfilter/nf_conntrack_tcp.h

index 74b91151d49463f8773f2e16db4710464b5550b7..bcba72def817ab704c08434b8f080c5113f9d88e 100644 (file)
--- a/include/uapi/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_tcp.h
@@ -46,6 +46,9 @@ enum tcp_conntrack {
  /* Marks possibility for expected RFC5961 challenge ACK */
  #define IP_CT_EXP_CHALLENGE_ACK                0x40
  
+/* Simultaneous open initialized */
+#define IP_CT_TCP_SIMULTANEOUS_OPEN            0x80
+
  struct nf_ct_tcp_flags {
         __u8 flags;
         __u8 mask;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h

index 9c3630146cec0f739a0c3cea6ec98e95e69e44bb..271b93783d282d113bddd8a4bc0db780cd52a397 100644 (file)
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2698,7 +2698,7 @@ enum nl80211_attrs {
  #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
  #define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
  
-#define NL80211_WIPHY_NAME_MAXLEN              128
+#define NL80211_WIPHY_NAME_MAXLEN              64
  
  #define NL80211_MAX_SUPP_RATES                 32
  #define NL80211_MAX_SUPP_HT_RATES              77
diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h

index b19a9c249b156f5877a0e8651a1d4d2330f6088d..784c2e3e572e013a24aeabf96ef400f453ffb72e 100644 (file)
--- a/include/uapi/linux/ppp-ioctl.h
+++ b/include/uapi/linux/ppp-ioctl.h
@@ -106,7 +106,7 @@ struct pppol2tp_ioc_stats {
  #define PPPIOCGIDLE    _IOR('t', 63, struct ppp_idle) /* get idle time */
  #define PPPIOCNEWUNIT  _IOWR('t', 62, int)     /* create new ppp unit */
  #define PPPIOCATTACH   _IOW('t', 61, int)      /* attach to ppp unit */
-#define PPPIOCDETACH   _IOW('t', 60, int)      /* detach from ppp unit/chan */
+#define PPPIOCDETACH   _IOW('t', 60, int)      /* obsolete, do not use */
  #define PPPIOCSMRRU    _IOW('t', 59, int)      /* set multilink MRU */
  #define PPPIOCCONNECT  _IOW('t', 58, int)      /* connect channel to unit */
  #define PPPIOCDISCONN  _IO('t', 57)            /* disconnect channel */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h

index af5f8c2df87ac51b401acab46c817b649e99893d..db9f15f5db047e643780ce23a0d33f9cc599ddf4 100644 (file)
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -207,4 +207,16 @@ struct prctl_mm_map {
  # define PR_SVE_VL_LEN_MASK            0xffff
  # define PR_SVE_VL_INHERIT             (1 << 17) /* inherit across exec */
  
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL                52
+#define PR_SET_SPECULATION_CTRL                53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS          0
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED          0
+# define PR_SPEC_PRCTL                 (1UL << 0)
+# define PR_SPEC_ENABLE                        (1UL << 1)
+# define PR_SPEC_DISABLE               (1UL << 2)
+# define PR_SPEC_FORCE_DISABLE         (1UL << 3)
+
  #endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h

index 2a0bd9dd104dc625f91b7938fa4f128e9a3c4df2..9efc0e73d50bee2416dc966254c9affbcd7ba413 100644 (file)
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -17,8 +17,9 @@
  #define SECCOMP_GET_ACTION_AVAIL       2
  
  /* Valid flags for SECCOMP_SET_MODE_FILTER */
-#define SECCOMP_FILTER_FLAG_TSYNC      1
-#define SECCOMP_FILTER_FLAG_LOG                2
+#define SECCOMP_FILTER_FLAG_TSYNC      (1UL << 0)
+#define SECCOMP_FILTER_FLAG_LOG                (1UL << 1)
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
  
  /*
   * All BPF programs must return a 32-bit value.
diff --git a/init/Kconfig b/init/Kconfig

index f013afc74b1139e2fe24f1dcc7df17bb91810ab4..18b151f0ddc1fba93777e1ca9f40b9f0fd072711 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -738,7 +738,7 @@ config CFS_BANDWIDTH
           tasks running within the fair group scheduler.  Groups with no limit
           set are considered to be unconstrained and will run with no
           restriction.
-         See tip/Documentation/scheduler/sched-bwc.txt for more information.
+         See Documentation/scheduler/sched-bwc.txt for more information.
  
  config RT_GROUP_SCHED
         bool "Group scheduling for SCHED_RR/FIFO"
diff --git a/init/main.c b/init/main.c

index fd37315835b4ae18235af293cc689b6cdd055850..3b4ada11ed521a1bb25ead3ca461640ba1026923 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -91,6 +91,7 @@
  #include <linux/cache.h>
  #include <linux/rodata_test.h>
  #include <linux/jump_label.h>
+#include <linux/mem_encrypt.h>
  
  #include <asm/io.h>
  #include <asm/bugs.h>
diff --git a/ipc/shm.c b/ipc/shm.c

index 3cf48988d68cec1e3a899f932de846818d4c729a..d73269381ec7ed831eaa65e544aa8b5ab4e504dd 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1363,14 +1363,17 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
  
         if (addr) {
                 if (addr & (shmlba - 1)) {
-                       /*
-                        * Round down to the nearest multiple of shmlba.
-                        * For sane do_mmap_pgoff() parameters, avoid
-                        * round downs that trigger nil-page and MAP_FIXED.
-                        */
-                       if ((shmflg & SHM_RND) && addr >= shmlba)
-                               addr &= ~(shmlba - 1);
-                       else
+                       if (shmflg & SHM_RND) {
+                               addr &= ~(shmlba - 1);  /* round down */
+
+                               /*
+                                * Ensure that the round-down is non-nil
+                                * when remapping. This can happen for
+                                * cases when addr < shmlba.
+                                */
+                               if (!addr && (shmflg & SHM_REMAP))
+                                       goto out;
+                       } else
  #ifndef __ARCH_FORCE_SHMLBA
                                 if (addr & ~PAGE_MASK)
  #endif
diff --git a/kernel/Makefile b/kernel/Makefile

index f85ae5dfa474008536c4ec3e088ffb5591c41a21..9b924136131158b489e8b9b91b28a37eabcd17d1 100644 (file)
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -112,7 +112,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
  obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
  obj-$(CONFIG_TORTURE_TEST) += torture.o
  
-obj-$(CONFIG_HAS_IOMEM) += memremap.o
+obj-$(CONFIG_HAS_IOMEM) += iomem.o
+obj-$(CONFIG_ZONE_DEVICE) += memremap.o
  
  $(obj)/configs.o: $(obj)/config_data.h
  
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index ba03ec39efb399846c375c1fcf2b93dfd2fd8a1d..6ef6746a7871b32829158d6833a464cc2efa6195 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -218,47 +218,84 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
         return 0;
  }
  
-static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
+static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta,
+                               u32 curr, const bool probe_pass)
  {
+       const s64 imm_min = S32_MIN, imm_max = S32_MAX;
+       s64 imm = insn->imm;
+
+       if (curr < pos && curr + imm + 1 > pos)
+               imm += delta;
+       else if (curr > pos + delta && curr + imm + 1 <= pos + delta)
+               imm -= delta;
+       if (imm < imm_min || imm > imm_max)
+               return -ERANGE;
+       if (!probe_pass)
+               insn->imm = imm;
+       return 0;
+}
+
+static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta,
+                               u32 curr, const bool probe_pass)
+{
+       const s32 off_min = S16_MIN, off_max = S16_MAX;
+       s32 off = insn->off;
+
+       if (curr < pos && curr + off + 1 > pos)
+               off += delta;
+       else if (curr > pos + delta && curr + off + 1 <= pos + delta)
+               off -= delta;
+       if (off < off_min || off > off_max)
+               return -ERANGE;
+       if (!probe_pass)
+               insn->off = off;
+       return 0;
+}
+
+static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
+                           const bool probe_pass)
+{
+       u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0);
         struct bpf_insn *insn = prog->insnsi;
-       u32 i, insn_cnt = prog->len;
-       bool pseudo_call;
-       u8 code;
-       int off;
+       int ret = 0;
  
         for (i = 0; i < insn_cnt; i++, insn++) {
+               u8 code;
+
+               /* In the probing pass we still operate on the original,
+                * unpatched image in order to check overflows before we
+                * do any other adjustments. Therefore skip the patchlet.
+                */
+               if (probe_pass && i == pos) {
+                       i += delta + 1;
+                       insn++;
+               }
                 code = insn->code;
-               if (BPF_CLASS(code) != BPF_JMP)
-                       continue;
-               if (BPF_OP(code) == BPF_EXIT)
+               if (BPF_CLASS(code) != BPF_JMP ||
+                   BPF_OP(code) == BPF_EXIT)
                         continue;
+               /* Adjust offset of jmps if we cross patch boundaries. */
                 if (BPF_OP(code) == BPF_CALL) {
-                       if (insn->src_reg == BPF_PSEUDO_CALL)
-                               pseudo_call = true;
-                       else
+                       if (insn->src_reg != BPF_PSEUDO_CALL)
                                 continue;
+                       ret = bpf_adj_delta_to_imm(insn, pos, delta, i,
+                                                  probe_pass);
                 } else {
-                       pseudo_call = false;
+                       ret = bpf_adj_delta_to_off(insn, pos, delta, i,
+                                                  probe_pass);
                 }
-               off = pseudo_call ? insn->imm : insn->off;
-
-               /* Adjust offset of jmps if we cross boundaries. */
-               if (i < pos && i + off + 1 > pos)
-                       off += delta;
-               else if (i > pos + delta && i + off + 1 <= pos + delta)
-                       off -= delta;
-
-               if (pseudo_call)
-                       insn->imm = off;
-               else
-                       insn->off = off;
+               if (ret)
+                       break;
         }
+
+       return ret;
  }
  
  struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
                                        const struct bpf_insn *patch, u32 len)
  {
         u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
+       const u32 cnt_max = S16_MAX;
         struct bpf_prog *prog_adj;
  
         /* Since our patchlet doesn't expand the image, we're done. */
@@ -269,6 +306,15 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
  
         insn_adj_cnt = prog->len + insn_delta;
  
+       /* Reject anything that would potentially let the insn->off
+        * target overflow when we have excessive program expansions.
+        * We need to probe here before we do any reallocation where
+        * we afterwards may not fail anymore.
+        */
+       if (insn_adj_cnt > cnt_max &&
+           bpf_adj_branches(prog, off, insn_delta, true))
+               return NULL;
+
         /* Several new instructions need to be inserted. Make room
          * for them. Likely, there's no need for a new allocation as
          * last page could have large enough tailroom.
@@ -294,7 +340,11 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
                 sizeof(*patch) * insn_rest);
         memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
  
-       bpf_adj_branches(prog_adj, off, insn_delta);
+       /* We are guaranteed to not fail at this point, otherwise
+        * the ship has sailed to reverse to the original state. An
+        * overflow cannot happen at this point.
+        */
+       BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false));
  
         return prog_adj;
  }
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c

index 098eca568c2bb7dfef746e81bc3568a5da36256e..95a84b2f10cecf5c6bea495e3c07e3ffcefec82d 100644 (file)
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1703,11 +1703,11 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                  * we increment the refcnt. If this is the case abort with an
                  * error.
                  */
-               verdict = bpf_prog_inc_not_zero(stab->bpf_verdict);
+               verdict = bpf_prog_inc_not_zero(verdict);
                 if (IS_ERR(verdict))
                         return PTR_ERR(verdict);
  
-               parse = bpf_prog_inc_not_zero(stab->bpf_parse);
+               parse = bpf_prog_inc_not_zero(parse);
                 if (IS_ERR(parse)) {
                         bpf_prog_put(verdict);
                         return PTR_ERR(parse);
@@ -1715,12 +1715,12 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
         }
  
         if (tx_msg) {
-               tx_msg = bpf_prog_inc_not_zero(stab->bpf_tx_msg);
+               tx_msg = bpf_prog_inc_not_zero(tx_msg);
                 if (IS_ERR(tx_msg)) {
-                       if (verdict)
-                               bpf_prog_put(verdict);
-                       if (parse)
+                       if (parse && verdict) {
                                 bpf_prog_put(parse);
+                               bpf_prog_put(verdict);
+                       }
                         return PTR_ERR(tx_msg);
                 }
         }
@@ -1805,10 +1805,10 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
  out_free:
         smap_release_sock(psock, sock);
  out_progs:
-       if (verdict)
-               bpf_prog_put(verdict);
-       if (parse)
+       if (parse && verdict) {
                 bpf_prog_put(parse);
+               bpf_prog_put(verdict);
+       }
         if (tx_msg)
                 bpf_prog_put(tx_msg);
         write_unlock_bh(&sock->sk_callback_lock);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 5dd1dcb902bf445ba50df106ee4aeb129164a4ac..1904e814f2828c01027c3c6b5002198b5233703b 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -156,7 +156,29 @@ struct bpf_verifier_stack_elem {
  #define BPF_COMPLEXITY_LIMIT_INSNS     131072
  #define BPF_COMPLEXITY_LIMIT_STACK     1024
  
-#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
+#define BPF_MAP_PTR_UNPRIV     1UL
+#define BPF_MAP_PTR_POISON     ((void *)((0xeB9FUL << 1) +     \
+                                         POISON_POINTER_DELTA))
+#define BPF_MAP_PTR(X)         ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
+
+static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
+{
+       return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
+}
+
+static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
+{
+       return aux->map_state & BPF_MAP_PTR_UNPRIV;
+}
+
+static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
+                             const struct bpf_map *map, bool unpriv)
+{
+       BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
+       unpriv |= bpf_map_ptr_unpriv(aux);
+       aux->map_state = (unsigned long)map |
+                        (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
+}
  
  struct bpf_call_arg_meta {
         struct bpf_map *map_ptr;
@@ -978,7 +1000,7 @@ static bool register_is_null(struct bpf_reg_state *reg)
   */
  static int check_stack_write(struct bpf_verifier_env *env,
                              struct bpf_func_state *state, /* func where register points to */
-                            int off, int size, int value_regno)
+                            int off, int size, int value_regno, int insn_idx)
  {
         struct bpf_func_state *cur; /* state of the current function */
         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
@@ -1017,8 +1039,33 @@ static int check_stack_write(struct bpf_verifier_env *env,
                 state->stack[spi].spilled_ptr = cur->regs[value_regno];
                 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
  
-               for (i = 0; i < BPF_REG_SIZE; i++)
+               for (i = 0; i < BPF_REG_SIZE; i++) {
+                       if (state->stack[spi].slot_type[i] == STACK_MISC &&
+                           !env->allow_ptr_leaks) {
+                               int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
+                               int soff = (-spi - 1) * BPF_REG_SIZE;
+
+                               /* detected reuse of integer stack slot with a pointer
+                                * which means either llvm is reusing stack slot or
+                                * an attacker is trying to exploit CVE-2018-3639
+                                * (speculative store bypass)
+                                * Have to sanitize that slot with preemptive
+                                * store of zero.
+                                */
+                               if (*poff && *poff != soff) {
+                                       /* disallow programs where single insn stores
+                                        * into two different stack slots, since verifier
+                                        * cannot sanitize them
+                                        */
+                                       verbose(env,
+                                               "insn %d cannot access two stack slots fp%d and fp%d",
+                                               insn_idx, *poff, soff);
+                                       return -EINVAL;
+                               }
+                               *poff = soff;
+                       }
                         state->stack[spi].slot_type[i] = STACK_SPILL;
+               }
         } else {
                 u8 type = STACK_MISC;
  
@@ -1694,7 +1741,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
  
                 if (t == BPF_WRITE)
                         err = check_stack_write(env, state, off, size,
-                                               value_regno);
+                                               value_regno, insn_idx);
                 else
                         err = check_stack_read(env, state, off, size,
                                                value_regno);
@@ -2333,6 +2380,29 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
         return 0;
  }
  
+static int
+record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+               int func_id, int insn_idx)
+{
+       struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+
+       if (func_id != BPF_FUNC_tail_call &&
+           func_id != BPF_FUNC_map_lookup_elem)
+               return 0;
+       if (meta->map_ptr == NULL) {
+               verbose(env, "kernel subsystem misconfigured verifier\n");
+               return -EINVAL;
+       }
+
+       if (!BPF_MAP_PTR(aux->map_state))
+               bpf_map_ptr_store(aux, meta->map_ptr,
+                                 meta->map_ptr->unpriv_array);
+       else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
+               bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
+                                 meta->map_ptr->unpriv_array);
+       return 0;
+}
+
  static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
  {
         const struct bpf_func_proto *fn = NULL;
@@ -2387,13 +2457,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
         err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
         if (err)
                 return err;
-       if (func_id == BPF_FUNC_tail_call) {
-               if (meta.map_ptr == NULL) {
-                       verbose(env, "verifier bug\n");
-                       return -EINVAL;
-               }
-               env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
-       }
         err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
         if (err)
                 return err;
@@ -2404,6 +2467,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
         if (err)
                 return err;
  
+       err = record_func_map(env, &meta, func_id, insn_idx);
+       if (err)
+               return err;
+
         /* Mark slots with STACK_MISC in case of raw mode, stack offset
          * is inferred from register state.
          */
@@ -2428,8 +2495,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
         } else if (fn->ret_type == RET_VOID) {
                 regs[BPF_REG_0].type = NOT_INIT;
         } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
-               struct bpf_insn_aux_data *insn_aux;
-
                 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
                 /* There is no offset yet applied, variable or fixed */
                 mark_reg_known_zero(env, regs, BPF_REG_0);
@@ -2445,11 +2510,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                 }
                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
                 regs[BPF_REG_0].id = ++env->id_gen;
-               insn_aux = &env->insn_aux_data[insn_idx];
-               if (!insn_aux->map_ptr)
-                       insn_aux->map_ptr = meta.map_ptr;
-               else if (insn_aux->map_ptr != meta.map_ptr)
-                       insn_aux->map_ptr = BPF_MAP_PTR_POISON;
         } else {
                 verbose(env, "unknown return type %d of func %s#%d\n",
                         fn->ret_type, func_id_name(func_id), func_id);
@@ -5169,6 +5229,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                 else
                         continue;
  
+               if (type == BPF_WRITE &&
+                   env->insn_aux_data[i + delta].sanitize_stack_off) {
+                       struct bpf_insn patch[] = {
+                               /* Sanitize suspicious stack slot with zero.
+                                * There are no memory dependencies for this store,
+                                * since it's only using frame pointer and immediate
+                                * constant of zero
+                                */
+                               BPF_ST_MEM(BPF_DW, BPF_REG_FP,
+                                          env->insn_aux_data[i + delta].sanitize_stack_off,
+                                          0),
+                               /* the original STX instruction will immediately
+                                * overwrite the same stack slot with appropriate value
+                                */
+                               *insn,
+                       };
+
+                       cnt = ARRAY_SIZE(patch);
+                       new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta    += cnt - 1;
+                       env->prog = new_prog;
+                       insn      = new_prog->insnsi + i + delta;
+                       continue;
+               }
+
                 if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
                         continue;
  
@@ -5417,6 +5505,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
         struct bpf_insn *insn = prog->insnsi;
         const struct bpf_func_proto *fn;
         const int insn_cnt = prog->len;
+       struct bpf_insn_aux_data *aux;
         struct bpf_insn insn_buf[16];
         struct bpf_prog *new_prog;
         struct bpf_map *map_ptr;
@@ -5491,19 +5580,22 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                         insn->imm = 0;
                         insn->code = BPF_JMP | BPF_TAIL_CALL;
  
+                       aux = &env->insn_aux_data[i + delta];
+                       if (!bpf_map_ptr_unpriv(aux))
+                               continue;
+
                         /* instead of changing every JIT dealing with tail_call
                          * emit two extra insns:
                          * if (index >= max_entries) goto out;
                          * index &= array->index_mask;
                          * to avoid out-of-bounds cpu speculation
                          */
-                       map_ptr = env->insn_aux_data[i + delta].map_ptr;
-                       if (map_ptr == BPF_MAP_PTR_POISON) {
+                       if (bpf_map_ptr_poisoned(aux)) {
                                 verbose(env, "tail_call abusing map_ptr\n");
                                 return -EINVAL;
                         }
-                       if (!map_ptr->unpriv_array)
-                               continue;
+
+                       map_ptr = BPF_MAP_PTR(aux->map_state);
                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
                                                   map_ptr->max_entries, 2);
                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
@@ -5527,9 +5619,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                  */
                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
                     insn->imm == BPF_FUNC_map_lookup_elem) {
-                       map_ptr = env->insn_aux_data[i + delta].map_ptr;
-                       if (map_ptr == BPF_MAP_PTR_POISON ||
-                           !map_ptr->ops->map_gen_lookup)
+                       aux = &env->insn_aux_data[i + delta];
+                       if (bpf_map_ptr_poisoned(aux))
+                               goto patch_call_imm;
+
+                       map_ptr = BPF_MAP_PTR(aux->map_state);
+                       if (!map_ptr->ops->map_gen_lookup)
                                 goto patch_call_imm;
  
                         cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
diff --git a/kernel/iomem.c b/kernel/iomem.c

new file mode 100644 (file)

index 0000000..f7525e1
--- /dev/null
+++ b/kernel/iomem.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
+#ifndef ioremap_cache
+/* temporary while we convert existing ioremap_cache users to memremap */
+__weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size)
+{
+       return ioremap(offset, size);
+}
+#endif
+
+#ifndef arch_memremap_wb
+static void *arch_memremap_wb(resource_size_t offset, unsigned long size)
+{
+       return (__force void *)ioremap_cache(offset, size);
+}
+#endif
+
+#ifndef arch_memremap_can_ram_remap
+static bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+                                       unsigned long flags)
+{
+       return true;
+}
+#endif
+
+static void *try_ram_remap(resource_size_t offset, size_t size,
+                          unsigned long flags)
+{
+       unsigned long pfn = PHYS_PFN(offset);
+
+       /* In the simple case just return the existing linear address */
+       if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)) &&
+           arch_memremap_can_ram_remap(offset, size, flags))
+               return __va(offset);
+
+       return NULL; /* fallback to arch_memremap_wb */
+}
+
+/**
+ * memremap() - remap an iomem_resource as cacheable memory
+ * @offset: iomem resource start address
+ * @size: size of remap
+ * @flags: any of MEMREMAP_WB, MEMREMAP_WT, MEMREMAP_WC,
+ *               MEMREMAP_ENC, MEMREMAP_DEC
+ *
+ * memremap() is "ioremap" for cases where it is known that the resource
+ * being mapped does not have i/o side effects and the __iomem
+ * annotation is not applicable. In the case of multiple flags, the different
+ * mapping types will be attempted in the order listed below until one of
+ * them succeeds.
+ *
+ * MEMREMAP_WB - matches the default mapping for System RAM on
+ * the architecture.  This is usually a read-allocate write-back cache.
+ * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
+ * memremap() will bypass establishing a new mapping and instead return
+ * a pointer into the direct map.
+ *
+ * MEMREMAP_WT - establish a mapping whereby writes either bypass the
+ * cache or are written through to memory and never exist in a
+ * cache-dirty state with respect to program visibility.  Attempts to
+ * map System RAM with this mapping type will fail.
+ *
+ * MEMREMAP_WC - establish a writecombine mapping, whereby writes may
+ * be coalesced together (e.g. in the CPU's write buffers), but is otherwise
+ * uncached. Attempts to map System RAM with this mapping type will fail.
+ */
+void *memremap(resource_size_t offset, size_t size, unsigned long flags)
+{
+       int is_ram = region_intersects(offset, size,
+                                      IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
+       void *addr = NULL;
+
+       if (!flags)
+               return NULL;
+
+       if (is_ram == REGION_MIXED) {
+               WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n",
+                               &offset, (unsigned long) size);
+               return NULL;
+       }
+
+       /* Try all mapping types requested until one returns non-NULL */
+       if (flags & MEMREMAP_WB) {
+               /*
+                * MEMREMAP_WB is special in that it can be satisifed
+                * from the direct map.  Some archs depend on the
+                * capability of memremap() to autodetect cases where
+                * the requested range is potentially in System RAM.
+                */
+               if (is_ram == REGION_INTERSECTS)
+                       addr = try_ram_remap(offset, size, flags);
+               if (!addr)
+                       addr = arch_memremap_wb(offset, size);
+       }
+
+       /*
+        * If we don't have a mapping yet and other request flags are
+        * present then we will be attempting to establish a new virtual
+        * address mapping.  Enforce that this mapping is not aliasing
+        * System RAM.
+        */
+       if (!addr && is_ram == REGION_INTERSECTS && flags != MEMREMAP_WB) {
+               WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
+                               &offset, (unsigned long) size);
+               return NULL;
+       }
+
+       if (!addr && (flags & MEMREMAP_WT))
+               addr = ioremap_wt(offset, size);
+
+       if (!addr && (flags & MEMREMAP_WC))
+               addr = ioremap_wc(offset, size);
+
+       return addr;
+}
+EXPORT_SYMBOL(memremap);
+
+void memunmap(void *addr)
+{
+       if (is_vmalloc_addr(addr))
+               iounmap((void __iomem *) addr);
+}
+EXPORT_SYMBOL(memunmap);
+
+static void devm_memremap_release(struct device *dev, void *res)
+{
+       memunmap(*(void **)res);
+}
+
+static int devm_memremap_match(struct device *dev, void *res, void *match_data)
+{
+       return *(void **)res == match_data;
+}
+
+void *devm_memremap(struct device *dev, resource_size_t offset,
+               size_t size, unsigned long flags)
+{
+       void **ptr, *addr;
+
+       ptr = devres_alloc_node(devm_memremap_release, sizeof(*ptr), GFP_KERNEL,
+                       dev_to_node(dev));
+       if (!ptr)
+               return ERR_PTR(-ENOMEM);
+
+       addr = memremap(offset, size, flags);
+       if (addr) {
+               *ptr = addr;
+               devres_add(dev, ptr);
+       } else {
+               devres_free(ptr);
+               return ERR_PTR(-ENXIO);
+       }
+
+       return addr;
+}
+EXPORT_SYMBOL(devm_memremap);
+
+void devm_memunmap(struct device *dev, void *addr)
+{
+       WARN_ON(devres_release(dev, devm_memremap_release,
+                               devm_memremap_match, addr));
+}
+EXPORT_SYMBOL(devm_memunmap);
diff --git a/kernel/kthread.c b/kernel/kthread.c

index 2017a39ab4904e8e2fffd648718aa7d05ecb8932..481951bf091d49fbe4378bb21504b6482e11919f 100644 (file)
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -193,7 +193,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme);
  
  void kthread_park_complete(struct task_struct *k)
  {
-       complete(&to_kthread(k)->parked);
+       complete_all(&to_kthread(k)->parked);
  }
  
  static int kthread(void *_create)
@@ -459,6 +459,7 @@ void kthread_unpark(struct task_struct *k)
         if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
                 __kthread_bind(k, kthread->cpu, TASK_PARKED);
  
+       reinit_completion(&kthread->parked);
         clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
         wake_up_state(k, TASK_PARKED);
  }
@@ -483,9 +484,6 @@ int kthread_park(struct task_struct *k)
         if (WARN_ON(k->flags & PF_EXITING))
                 return -ENOSYS;
  
-       if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
-               return -EBUSY;
-
         set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
         if (k != current) {
                 wake_up_process(k);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c

index e795908f36070dd33ed94630bb63b188065f21ca..a903367793758f3e1cc52ab34c18f1bfa78f38e3 100644 (file)
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -352,16 +352,15 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
         struct task_struct *owner;
         bool ret = true;
  
+       BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
+
         if (need_resched())
                 return false;
  
         rcu_read_lock();
         owner = READ_ONCE(sem->owner);
-       if (!rwsem_owner_is_writer(owner)) {
-               /*
-                * Don't spin if the rwsem is readers owned.
-                */
-               ret = !rwsem_owner_is_reader(owner);
+       if (!owner || !is_rwsem_owner_spinnable(owner)) {
+               ret = !owner;   /* !owner is spinnable */
                 goto done;
         }
  
@@ -382,11 +381,11 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
  {
         struct task_struct *owner = READ_ONCE(sem->owner);
  
-       if (!rwsem_owner_is_writer(owner))
-               goto out;
+       if (!is_rwsem_owner_spinnable(owner))
+               return false;
  
         rcu_read_lock();
-       while (sem->owner == owner) {
+       while (owner && (READ_ONCE(sem->owner) == owner)) {
                 /*
                  * Ensure we emit the owner->on_cpu, dereference _after_
                  * checking sem->owner still matches owner, if that fails,
@@ -408,12 +407,12 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
                 cpu_relax();
         }
         rcu_read_unlock();
-out:
+
         /*
          * If there is a new owner or the owner is not set, we continue
          * spinning.
          */
-       return !rwsem_owner_is_reader(READ_ONCE(sem->owner));
+       return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
  }
  
  static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c

index 30465a2f2b6cf8e072ee797d1b58219f0799cb66..bc1e507be9ff7aea311261e78002d53375f9a6d7 100644 (file)
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -221,5 +221,3 @@ void up_read_non_owner(struct rw_semaphore *sem)
  EXPORT_SYMBOL(up_read_non_owner);
  
  #endif
-
-
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h

index a17cba8d94bb10b4e3d6d038ea90259349e8a193..b9d0e72aa80f4064542a53854feb55da12a5f960 100644 (file)
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,20 +1,24 @@
  /* SPDX-License-Identifier: GPL-2.0 */
  /*
   * The owner field of the rw_semaphore structure will be set to
- * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
+ * RWSEM_READER_OWNED when a reader grabs the lock. A writer will clear
   * the owner field when it unlocks. A reader, on the other hand, will
   * not touch the owner field when it unlocks.
   *
- * In essence, the owner field now has the following 3 states:
+ * In essence, the owner field now has the following 4 states:
   *  1) 0
   *     - lock is free or the owner hasn't set the field yet
   *  2) RWSEM_READER_OWNED
   *     - lock is currently or previously owned by readers (lock is free
   *       or not set by owner yet)
- *  3) Other non-zero value
- *     - a writer owns the lock
+ *  3) RWSEM_ANONYMOUSLY_OWNED bit set with some other bits set as well
+ *     - lock is owned by an anonymous writer, so spinning on the lock
+ *       owner should be disabled.
+ *  4) Other non-zero value
+ *     - a writer owns the lock and other writers can spin on the lock owner.
   */
-#define RWSEM_READER_OWNED     ((struct task_struct *)1UL)
+#define RWSEM_ANONYMOUSLY_OWNED        (1UL << 0)
+#define RWSEM_READER_OWNED     ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED)
  
  #ifdef CONFIG_DEBUG_RWSEMS
  # define DEBUG_RWSEMS_WARN_ON(c)       DEBUG_LOCKS_WARN_ON(c)
@@ -51,14 +55,22 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
                 WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
  }
  
-static inline bool rwsem_owner_is_writer(struct task_struct *owner)
+/*
+ * Return true if the a rwsem waiter can spin on the rwsem's owner
+ * and steal the lock, i.e. the lock is not anonymously owned.
+ * N.B. !owner is considered spinnable.
+ */
+static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
  {
-       return owner && owner != RWSEM_READER_OWNED;
+       return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED);
  }
  
-static inline bool rwsem_owner_is_reader(struct task_struct *owner)
+/*
+ * Return true if rwsem is owned by an anonymous writer or readers.
+ */
+static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
  {
-       return owner == RWSEM_READER_OWNED;
+       return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
  }
  #else
  static inline void rwsem_set_owner(struct rw_semaphore *sem)
diff --git a/kernel/memremap.c b/kernel/memremap.c

index 895e6b76b25e0604b980e31fa8dba388b25e6581..5857267a4af5dab0cb0eb85ed9b61cdfcbc57dcc 100644 (file)
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -1,15 +1,5 @@
-/*
- * Copyright(c) 2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2015 Intel Corporation. All rights reserved. */
  #include <linux/radix-tree.h>
  #include <linux/device.h>
  #include <linux/types.h>
@@ -19,170 +9,8 @@
  #include <linux/memory_hotplug.h>
  #include <linux/swap.h>
  #include <linux/swapops.h>
+#include <linux/wait_bit.h>
  
-#ifndef ioremap_cache
-/* temporary while we convert existing ioremap_cache users to memremap */
-__weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size)
-{
-       return ioremap(offset, size);
-}
-#endif
-
-#ifndef arch_memremap_wb
-static void *arch_memremap_wb(resource_size_t offset, unsigned long size)
-{
-       return (__force void *)ioremap_cache(offset, size);
-}
-#endif
-
-#ifndef arch_memremap_can_ram_remap
-static bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
-                                       unsigned long flags)
-{
-       return true;
-}
-#endif
-
-static void *try_ram_remap(resource_size_t offset, size_t size,
-                          unsigned long flags)
-{
-       unsigned long pfn = PHYS_PFN(offset);
-
-       /* In the simple case just return the existing linear address */
-       if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)) &&
-           arch_memremap_can_ram_remap(offset, size, flags))
-               return __va(offset);
-
-       return NULL; /* fallback to arch_memremap_wb */
-}
-
-/**
- * memremap() - remap an iomem_resource as cacheable memory
- * @offset: iomem resource start address
- * @size: size of remap
- * @flags: any of MEMREMAP_WB, MEMREMAP_WT, MEMREMAP_WC,
- *               MEMREMAP_ENC, MEMREMAP_DEC
- *
- * memremap() is "ioremap" for cases where it is known that the resource
- * being mapped does not have i/o side effects and the __iomem
- * annotation is not applicable. In the case of multiple flags, the different
- * mapping types will be attempted in the order listed below until one of
- * them succeeds.
- *
- * MEMREMAP_WB - matches the default mapping for System RAM on
- * the architecture.  This is usually a read-allocate write-back cache.
- * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
- * memremap() will bypass establishing a new mapping and instead return
- * a pointer into the direct map.
- *
- * MEMREMAP_WT - establish a mapping whereby writes either bypass the
- * cache or are written through to memory and never exist in a
- * cache-dirty state with respect to program visibility.  Attempts to
- * map System RAM with this mapping type will fail.
- *
- * MEMREMAP_WC - establish a writecombine mapping, whereby writes may
- * be coalesced together (e.g. in the CPU's write buffers), but is otherwise
- * uncached. Attempts to map System RAM with this mapping type will fail.
- */
-void *memremap(resource_size_t offset, size_t size, unsigned long flags)
-{
-       int is_ram = region_intersects(offset, size,
-                                      IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
-       void *addr = NULL;
-
-       if (!flags)
-               return NULL;
-
-       if (is_ram == REGION_MIXED) {
-               WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n",
-                               &offset, (unsigned long) size);
-               return NULL;
-       }
-
-       /* Try all mapping types requested until one returns non-NULL */
-       if (flags & MEMREMAP_WB) {
-               /*
-                * MEMREMAP_WB is special in that it can be satisifed
-                * from the direct map.  Some archs depend on the
-                * capability of memremap() to autodetect cases where
-                * the requested range is potentially in System RAM.
-                */
-               if (is_ram == REGION_INTERSECTS)
-                       addr = try_ram_remap(offset, size, flags);
-               if (!addr)
-                       addr = arch_memremap_wb(offset, size);
-       }
-
-       /*
-        * If we don't have a mapping yet and other request flags are
-        * present then we will be attempting to establish a new virtual
-        * address mapping.  Enforce that this mapping is not aliasing
-        * System RAM.
-        */
-       if (!addr && is_ram == REGION_INTERSECTS && flags != MEMREMAP_WB) {
-               WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
-                               &offset, (unsigned long) size);
-               return NULL;
-       }
-
-       if (!addr && (flags & MEMREMAP_WT))
-               addr = ioremap_wt(offset, size);
-
-       if (!addr && (flags & MEMREMAP_WC))
-               addr = ioremap_wc(offset, size);
-
-       return addr;
-}
-EXPORT_SYMBOL(memremap);
-
-void memunmap(void *addr)
-{
-       if (is_vmalloc_addr(addr))
-               iounmap((void __iomem *) addr);
-}
-EXPORT_SYMBOL(memunmap);
-
-static void devm_memremap_release(struct device *dev, void *res)
-{
-       memunmap(*(void **)res);
-}
-
-static int devm_memremap_match(struct device *dev, void *res, void *match_data)
-{
-       return *(void **)res == match_data;
-}
-
-void *devm_memremap(struct device *dev, resource_size_t offset,
-               size_t size, unsigned long flags)
-{
-       void **ptr, *addr;
-
-       ptr = devres_alloc_node(devm_memremap_release, sizeof(*ptr), GFP_KERNEL,
-                       dev_to_node(dev));
-       if (!ptr)
-               return ERR_PTR(-ENOMEM);
-
-       addr = memremap(offset, size, flags);
-       if (addr) {
-               *ptr = addr;
-               devres_add(dev, ptr);
-       } else {
-               devres_free(ptr);
-               return ERR_PTR(-ENXIO);
-       }
-
-       return addr;
-}
-EXPORT_SYMBOL(devm_memremap);
-
-void devm_memunmap(struct device *dev, void *addr)
-{
-       WARN_ON(devres_release(dev, devm_memremap_release,
-                               devm_memremap_match, addr));
-}
-EXPORT_SYMBOL(devm_memunmap);
-
-#ifdef CONFIG_ZONE_DEVICE
  static DEFINE_MUTEX(pgmap_lock);
  static RADIX_TREE(pgmap_radix, GFP_KERNEL);
  #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
@@ -473,10 +301,32 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
  
         return pgmap;
  }
-#endif /* CONFIG_ZONE_DEVICE */
+EXPORT_SYMBOL_GPL(get_dev_pagemap);
+
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
+EXPORT_SYMBOL_GPL(devmap_managed_key);
+static atomic_t devmap_enable;
+
+/*
+ * Toggle the static key for ->page_free() callbacks when dev_pagemap
+ * pages go idle.
+ */
+void dev_pagemap_get_ops(void)
+{
+       if (atomic_inc_return(&devmap_enable) == 1)
+               static_branch_enable(&devmap_managed_key);
+}
+EXPORT_SYMBOL_GPL(dev_pagemap_get_ops);
+
+void dev_pagemap_put_ops(void)
+{
+       if (atomic_dec_and_test(&devmap_enable))
+               static_branch_disable(&devmap_managed_key);
+}
+EXPORT_SYMBOL_GPL(dev_pagemap_put_ops);
  
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
-void put_zone_device_private_or_public_page(struct page *page)
+void __put_devmap_managed_page(struct page *page)
  {
         int count = page_ref_dec_return(page);
  
@@ -496,5 +346,5 @@ void put_zone_device_private_or_public_page(struct page *page)
         } else if (!count)
                 __put_page(page);
  }
-EXPORT_SYMBOL(put_zone_device_private_or_public_page);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
+EXPORT_SYMBOL_GPL(__put_devmap_managed_page);
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
diff --git a/kernel/resource.c b/kernel/resource.c

index 2af6c03858b94eb1e13336fb6fc90a7390bd7efe..b85f59e8a4b84f8ff71d38be18c185bdab33484c 100644 (file)
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -448,6 +448,7 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
  
         return __walk_iomem_res_desc(&res, desc, false, arg, func);
  }
+EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
  
  /*
   * This function calls the @func callback against all memory ranges of type
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

index e7b3008b85bb022076efec9a14cc6bc139da424a..1356afd1eeb6d6468522c6e6dd50fb6b36dc5b7d 100644 (file)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1117,7 +1117,7 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
   * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
   * So, overflow is not an issue here.
   */
-u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
+static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
  {
         u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
         u64 u_act;
@@ -2731,8 +2731,6 @@ bool dl_cpu_busy(unsigned int cpu)
  #endif
  
  #ifdef CONFIG_SCHED_DEBUG
-extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
-
  void print_dl_stats(struct seq_file *m, int cpu)
  {
         print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

index 7aef6b4e885a5e058ce75bdbc4f5bf756a2783b2..ef3c4e6f53457ba52151fe243c5d62c160ecc115 100644 (file)
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2701,8 +2701,6 @@ int sched_rr_handler(struct ctl_table *table, int write,
  }
  
  #ifdef CONFIG_SCHED_DEBUG
-extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
-
  void print_rt_stats(struct seq_file *m, int cpu)
  {
         rt_rq_iter_t iter;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 15750c222ca2ad4c7e7e20806de1dd143a099f87..1f0a4bc6a39d2063f618f12e8532362172eb951c 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2025,8 +2025,9 @@ extern bool sched_debug_enabled;
  extern void print_cfs_stats(struct seq_file *m, int cpu);
  extern void print_rt_stats(struct seq_file *m, int cpu);
  extern void print_dl_stats(struct seq_file *m, int cpu);
-extern void
-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
+extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
  #ifdef CONFIG_NUMA_BALANCING
  extern void
  show_numa_stats(struct task_struct *p, struct seq_file *m);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c

index 64cc564f52556ba58b4bc6edc907c0e7d124ec52..61a1125c1ae4224e8f69938406f0d08ad47519b7 100644 (file)
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1708,7 +1708,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
         rcu_read_unlock();
  
         if (rq && sched_debug_enabled) {
-               pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
+               pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
                         cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
         }
  
diff --git a/kernel/seccomp.c b/kernel/seccomp.c

index dc77548167ef0993487a61bcf6cbbabb2f6f2434..e691d9a6c58d1b629a390cf9e5da6c374c877873 100644 (file)
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -19,6 +19,8 @@
  #include <linux/compat.h>
  #include <linux/coredump.h>
  #include <linux/kmemleak.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
  #include <linux/sched.h>
  #include <linux/sched/task_stack.h>
  #include <linux/seccomp.h>
@@ -227,8 +229,11 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
         return true;
  }
  
+void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
+
  static inline void seccomp_assign_mode(struct task_struct *task,
-                                      unsigned long seccomp_mode)
+                                      unsigned long seccomp_mode,
+                                      unsigned long flags)
  {
         assert_spin_locked(&task->sighand->siglock);
  
@@ -238,6 +243,9 @@ static inline void seccomp_assign_mode(struct task_struct *task,
          * filter) is set.
          */
         smp_mb__before_atomic();
+       /* Assume default seccomp processes want spec flaw mitigation. */
+       if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
+               arch_seccomp_spec_mitigate(task);
         set_tsk_thread_flag(task, TIF_SECCOMP);
  }
  
@@ -305,7 +313,7 @@ static inline pid_t seccomp_can_sync_threads(void)
   * without dropping the locks.
   *
   */
-static inline void seccomp_sync_threads(void)
+static inline void seccomp_sync_threads(unsigned long flags)
  {
         struct task_struct *thread, *caller;
  
@@ -346,7 +354,8 @@ static inline void seccomp_sync_threads(void)
                  * allow one thread to transition the other.
                  */
                 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
-                       seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+                       seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
+                                           flags);
         }
  }
  
@@ -469,7 +478,7 @@ static long seccomp_attach_filter(unsigned int flags,
  
         /* Now that the new filter is in place, synchronize to all threads. */
         if (flags & SECCOMP_FILTER_FLAG_TSYNC)
-               seccomp_sync_threads();
+               seccomp_sync_threads(flags);
  
         return 0;
  }
@@ -818,7 +827,7 @@ static long seccomp_set_mode_strict(void)
  #ifdef TIF_NOTSC
         disable_TSC();
  #endif
-       seccomp_assign_mode(current, seccomp_mode);
+       seccomp_assign_mode(current, seccomp_mode, 0);
         ret = 0;
  
  out:
@@ -876,7 +885,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
         /* Do not free the successfully attached filter. */
         prepared = NULL;
  
-       seccomp_assign_mode(current, seccomp_mode);
+       seccomp_assign_mode(current, seccomp_mode, flags);
  out:
         spin_unlock_irq(&current->sighand->siglock);
         if (flags & SECCOMP_FILTER_FLAG_TSYNC)
diff --git a/kernel/sys.c b/kernel/sys.c

index ad692183dfe9327ca3fd9a35de137240f3f28b78..d1b2b8d934bb7c660674583b5bd09d28c9f22167 100644 (file)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -61,6 +61,8 @@
  #include <linux/uidgid.h>
  #include <linux/cred.h>
  
+#include <linux/nospec.h>
+
  #include <linux/kmsg_dump.h>
  /* Move somewhere else to avoid recompiling? */
  #include <generated/utsrelease.h>
@@ -69,6 +71,9 @@
  #include <asm/io.h>
  #include <asm/unistd.h>
  
+/* Hardening for Spectre-v1 */
+#include <linux/nospec.h>
+
  #include "uid16.h"
  
  #ifndef SET_UNALIGN_CTL
@@ -1451,6 +1456,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
         if (resource >= RLIM_NLIMITS)
                 return -EINVAL;
  
+       resource = array_index_nospec(resource, RLIM_NLIMITS);
         task_lock(current->group_leader);
         x = current->signal->rlim[resource];
         task_unlock(current->group_leader);
@@ -1470,6 +1476,7 @@ COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
         if (resource >= RLIM_NLIMITS)
                 return -EINVAL;
  
+       resource = array_index_nospec(resource, RLIM_NLIMITS);
         task_lock(current->group_leader);
         r = current->signal->rlim[resource];
         task_unlock(current->group_leader);
@@ -2242,6 +2249,17 @@ static int propagate_has_child_subreaper(struct task_struct *p, void *data)
         return 1;
  }
  
+int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
+{
+       return -EINVAL;
+}
+
+int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
+                                   unsigned long ctrl)
+{
+       return -EINVAL;
+}
+
  SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                 unsigned long, arg4, unsigned long, arg5)
  {
@@ -2450,6 +2468,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
         case PR_SVE_GET_VL:
                 error = SVE_GET_VL();
                 break;
+       case PR_GET_SPECULATION_CTRL:
+               if (arg3 || arg4 || arg5)
+                       return -EINVAL;
+               error = arch_prctl_spec_ctrl_get(me, arg2);
+               break;
+       case PR_SET_SPECULATION_CTRL:
+               if (arg4 || arg5)
+                       return -EINVAL;
+               error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
+               break;
         default:
                 error = -EINVAL;
                 break;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c

index b398c2ea69b290cdaec1769b7d11cbc501646652..aa2094d5dd275372f999d051887aec22bfab9f19 100644 (file)
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -612,6 +612,14 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
         now = ktime_get();
         /* Find all expired events */
         for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
+               /*
+                * Required for !SMP because for_each_cpu() reports
+                * unconditionally CPU0 as set on UP kernels.
+                */
+               if (!IS_ENABLED(CONFIG_SMP) &&
+                   cpumask_empty(tick_broadcast_oneshot_mask))
+                       break;
+
                 td = &per_cpu(tick_cpu_device, cpu);
                 if (td->evtdev->next_event <= now) {
                         cpumask_set_cpu(cpu, tmpmask);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c

index 70ebc8ede143f7599b482cc4e0704fde5ef99188..7e43cd54c84ca3da2d77b02e7112c69386428a2b 100644 (file)
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1073,7 +1073,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
  }
  EXPORT_SYMBOL(iov_iter_gap_alignment);
  
-static inline size_t __pipe_get_pages(struct iov_iter *i,
+static inline ssize_t __pipe_get_pages(struct iov_iter *i,
                                 size_t maxsize,
                                 struct page **pages,
                                 int idx,
@@ -1163,7 +1163,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
                    size_t *start)
  {
         struct page **p;
-       size_t n;
+       ssize_t n;
         int idx;
         int npages;
  
diff --git a/lib/radix-tree.c b/lib/radix-tree.c

index da9e10c827df554040b8c5eac71badbd2b588a95..a9e41aed6de4bfb063af9d16d41942a041a23fdd 100644 (file)
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1612,11 +1612,9 @@ static void set_iter_tags(struct radix_tree_iter *iter,
  static void __rcu **skip_siblings(struct radix_tree_node **nodep,
                         void __rcu **slot, struct radix_tree_iter *iter)
  {
-       void *sib = node_to_entry(slot - 1);
-
         while (iter->index < iter->next_index) {
                 *nodep = rcu_dereference_raw(*slot);
-               if (*nodep && *nodep != sib)
+               if (*nodep && !is_sibling_entry(iter->node, *nodep))
                         return slot;
                 slot++;
                 iter->index = __radix_tree_iter_add(iter, 1);
@@ -1631,7 +1629,7 @@ void __rcu **__radix_tree_next_slot(void __rcu **slot,
                                 struct radix_tree_iter *iter, unsigned flags)
  {
         unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
-       struct radix_tree_node *node = rcu_dereference_raw(*slot);
+       struct radix_tree_node *node;
  
         slot = skip_siblings(&node, slot, iter);
  
@@ -2036,10 +2034,12 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
                              unsigned long index, void *item)
  {
         struct radix_tree_node *node = NULL;
-       void __rcu **slot;
+       void __rcu **slot = NULL;
         void *entry;
  
         entry = __radix_tree_lookup(root, index, &node, &slot);
+       if (!slot)
+               return NULL;
         if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
                                                 get_slot_offset(node, slot))))
                 return NULL;
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c

index de16f7869fb19b72b6db134fbc13f8d3b4495f90..6cd7d0740005954ed37f33617298faee60816386 100644 (file)
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -331,23 +331,32 @@ static void noinline __init test_mem_optimisations(void)
         unsigned int start, nbits;
  
         for (start = 0; start < 1024; start += 8) {
-               memset(bmap1, 0x5a, sizeof(bmap1));
-               memset(bmap2, 0x5a, sizeof(bmap2));
                 for (nbits = 0; nbits < 1024 - start; nbits += 8) {
+                       memset(bmap1, 0x5a, sizeof(bmap1));
+                       memset(bmap2, 0x5a, sizeof(bmap2));
+
                         bitmap_set(bmap1, start, nbits);
                         __bitmap_set(bmap2, start, nbits);
-                       if (!bitmap_equal(bmap1, bmap2, 1024))
+                       if (!bitmap_equal(bmap1, bmap2, 1024)) {
                                 printk("set not equal %d %d\n", start, nbits);
-                       if (!__bitmap_equal(bmap1, bmap2, 1024))
+                               failed_tests++;
+                       }
+                       if (!__bitmap_equal(bmap1, bmap2, 1024)) {
                                 printk("set not __equal %d %d\n", start, nbits);
+                               failed_tests++;
+                       }
  
                         bitmap_clear(bmap1, start, nbits);
                         __bitmap_clear(bmap2, start, nbits);
-                       if (!bitmap_equal(bmap1, bmap2, 1024))
+                       if (!bitmap_equal(bmap1, bmap2, 1024)) {
                                 printk("clear not equal %d %d\n", start, nbits);
-                       if (!__bitmap_equal(bmap1, bmap2, 1024))
+                               failed_tests++;
+                       }
+                       if (!__bitmap_equal(bmap1, bmap2, 1024)) {
                                 printk("clear not __equal %d %d\n", start,
                                                                         nbits);
+                               failed_tests++;
+                       }
                 }
         }
  }
diff --git a/lib/vsprintf.c b/lib/vsprintf.c

index 30c0cb8cc9bce78089cb6ad48bcb6b3d5d02e6b2..23920c5ff72859c79f4f50e8e267b56c8f230c7c 100644 (file)
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1669,19 +1669,22 @@ char *pointer_string(char *buf, char *end, const void *ptr,
         return number(buf, end, (unsigned long int)ptr, spec);
  }
  
-static bool have_filled_random_ptr_key __read_mostly;
+static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
  static siphash_key_t ptr_key __read_mostly;
  
-static void fill_random_ptr_key(struct random_ready_callback *unused)
+static void enable_ptr_key_workfn(struct work_struct *work)
  {
         get_random_bytes(&ptr_key, sizeof(ptr_key));
-       /*
-        * have_filled_random_ptr_key==true is dependent on get_random_bytes().
-        * ptr_to_id() needs to see have_filled_random_ptr_key==true
-        * after get_random_bytes() returns.
-        */
-       smp_mb();
-       WRITE_ONCE(have_filled_random_ptr_key, true);
+       /* Needs to run from preemptible context */
+       static_branch_disable(&not_filled_random_ptr_key);
+}
+
+static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
+
+static void fill_random_ptr_key(struct random_ready_callback *unused)
+{
+       /* This may be in an interrupt handler. */
+       queue_work(system_unbound_wq, &enable_ptr_key_work);
  }
  
  static struct random_ready_callback random_ready = {
@@ -1695,7 +1698,8 @@ static int __init initialize_ptr_random(void)
         if (!ret) {
                 return 0;
         } else if (ret == -EALREADY) {
-               fill_random_ptr_key(&random_ready);
+               /* This is in preemptible context */
+               enable_ptr_key_workfn(&enable_ptr_key_work);
                 return 0;
         }
  
@@ -1709,7 +1713,7 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
         unsigned long hashval;
         const int default_width = 2 * sizeof(ptr);
  
-       if (unlikely(!have_filled_random_ptr_key)) {
+       if (static_branch_unlikely(&not_filled_random_ptr_key)) {
                 spec.field_width = default_width;
                 /* string length must be less than default_width */
                 return string(buf, end, "(ptrval)", spec);
diff --git a/mm/Kconfig b/mm/Kconfig

index d5004d82a1d6d7f9b7ec8bc994bd9320f7ea8ced..5f39bca5d82b005d9cb12ba7d5df10fce2b2881d 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -636,6 +636,7 @@ config DEFERRED_STRUCT_PAGE_INIT
         default n
         depends on NO_BOOTMEM
         depends on !FLATMEM
+       depends on !NEED_PER_CPU_KM
         help
           Ordinarily all struct pages are initialised during early boot in a
           single thread. On very large machines this can take a considerable
@@ -692,6 +693,9 @@ config ARCH_HAS_HMM
  config MIGRATE_VMA_HELPER
         bool
  
+config DEV_PAGEMAP_OPS
+       bool
+
  config HMM
         bool
         select MIGRATE_VMA_HELPER
@@ -712,6 +716,7 @@ config DEVICE_PRIVATE
         bool "Unaddressable device memory (GPU memory, ...)"
         depends on ARCH_HAS_HMM
         select HMM
+       select DEV_PAGEMAP_OPS
  
         help
           Allows creation of struct pages to represent unaddressable device
@@ -722,6 +727,7 @@ config DEVICE_PUBLIC
         bool "Addressable device memory (like GPU memory)"
         depends on ARCH_HAS_HMM
         select HMM
+       select DEV_PAGEMAP_OPS
  
         help
           Allows creation of struct pages to represent addressable device
diff --git a/mm/cma.c b/mm/cma.c

index aa40e6c7b042e95f5fb30a24cb05014c55ce9152..5809bbe360d7fb724a435309e6e693a98f9efbfb 100644 (file)
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -39,7 +39,6 @@
  #include <trace/events/cma.h>
  
  #include "cma.h"
-#include "internal.h"
  
  struct cma cma_areas[MAX_CMA_AREAS];
  unsigned cma_area_count;
@@ -110,25 +109,23 @@ static int __init cma_activate_area(struct cma *cma)
         if (!cma->bitmap)
                 return -ENOMEM;
  
+       WARN_ON_ONCE(!pfn_valid(pfn));
+       zone = page_zone(pfn_to_page(pfn));
+
         do {
                 unsigned j;
  
                 base_pfn = pfn;
-               if (!pfn_valid(base_pfn))
-                       goto err;
-
-               zone = page_zone(pfn_to_page(base_pfn));
                 for (j = pageblock_nr_pages; j; --j, pfn++) {
-                       if (!pfn_valid(pfn))
-                               goto err;
-
+                       WARN_ON_ONCE(!pfn_valid(pfn));
                         /*
-                        * In init_cma_reserved_pageblock(), present_pages
-                        * is adjusted with assumption that all pages in
-                        * the pageblock come from a single zone.
+                        * alloc_contig_range requires the pfn range
+                        * specified to be in the same zone. Make this
+                        * simple by forcing the entire CMA resv range
+                        * to be in the same zone.
                          */
                         if (page_zone(pfn_to_page(pfn)) != zone)
-                               goto err;
+                               goto not_in_zone;
                 }
                 init_cma_reserved_pageblock(pfn_to_page(base_pfn));
         } while (--i);
@@ -142,7 +139,7 @@ static int __init cma_activate_area(struct cma *cma)
  
         return 0;
  
-err:
+not_in_zone:
         pr_err("CMA area %s could not be activated\n", cma->name);
         kfree(cma->bitmap);
         cma->count = 0;
@@ -152,41 +149,6 @@ err:
  static int __init cma_init_reserved_areas(void)
  {
         int i;
-       struct zone *zone;
-       pg_data_t *pgdat;
-
-       if (!cma_area_count)
-               return 0;
-
-       for_each_online_pgdat(pgdat) {
-               unsigned long start_pfn = UINT_MAX, end_pfn = 0;
-
-               zone = &pgdat->node_zones[ZONE_MOVABLE];
-
-               /*
-                * In this case, we cannot adjust the zone range
-                * since it is now maximum node span and we don't
-                * know original zone range.
-                */
-               if (populated_zone(zone))
-                       continue;
-
-               for (i = 0; i < cma_area_count; i++) {
-                       if (pfn_to_nid(cma_areas[i].base_pfn) !=
-                               pgdat->node_id)
-                               continue;
-
-                       start_pfn = min(start_pfn, cma_areas[i].base_pfn);
-                       end_pfn = max(end_pfn, cma_areas[i].base_pfn +
-                                               cma_areas[i].count);
-               }
-
-               if (!end_pfn)
-                       continue;
-
-               zone->zone_start_pfn = start_pfn;
-               zone->spanned_pages = end_pfn - start_pfn;
-       }
  
         for (i = 0; i < cma_area_count; i++) {
                 int ret = cma_activate_area(&cma_areas[i]);
@@ -195,32 +157,9 @@ static int __init cma_init_reserved_areas(void)
                         return ret;
         }
  
-       /*
-        * Reserved pages for ZONE_MOVABLE are now activated and
-        * this would change ZONE_MOVABLE's managed page counter and
-        * the other zones' present counter. We need to re-calculate
-        * various zone information that depends on this initialization.
-        */
-       build_all_zonelists(NULL);
-       for_each_populated_zone(zone) {
-               if (zone_idx(zone) == ZONE_MOVABLE) {
-                       zone_pcp_reset(zone);
-                       setup_zone_pageset(zone);
-               } else
-                       zone_pcp_update(zone);
-
-               set_zone_contiguous(zone);
-       }
-
-       /*
-        * We need to re-init per zone wmark by calling
-        * init_per_zone_wmark_min() but doesn't call here because it is
-        * registered on core_initcall and it will be called later than us.
-        */
-
         return 0;
  }
-pure_initcall(cma_init_reserved_areas);
+core_initcall(cma_init_reserved_areas);
  
  /**
   * cma_init_reserved_mem() - create custom contiguous area from reserved memory
diff --git a/mm/compaction.c b/mm/compaction.c

index 028b7210a669636bf9ce58bdd67bde71cf23bc6c..29bd1df18b98aff1d4a2a59c9253d5734c32bb20 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1450,12 +1450,14 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
          * if compaction succeeds.
          * For costly orders, we require low watermark instead of min for
          * compaction to proceed to increase its chances.
+        * ALLOC_CMA is used, as pages in CMA pageblocks are considered
+        * suitable migration targets
          */
         watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
                                 low_wmark_pages(zone) : min_wmark_pages(zone);
         watermark += compact_gap(order);
         if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
-                                               0, wmark_target))
+                                               ALLOC_CMA, wmark_target))
                 return COMPACT_SKIPPED;
  
         return COMPACT_CONTINUE;
diff --git a/mm/gup.c b/mm/gup.c

index 76af4cfeaf68149f365cf9b29bef64ebe6f1474b..3d8472d48a0b88366e6e60e3c17fee1fe0dae7d3 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -544,6 +544,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
         if (vm_flags & (VM_IO | VM_PFNMAP))
                 return -EFAULT;
  
+       if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
+               return -EFAULT;
+
         if (write) {
                 if (!(vm_flags & VM_WRITE)) {
                         if (!(gup_flags & FOLL_FORCE))
@@ -1456,32 +1459,48 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
         return 1;
  }
  
-static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                 unsigned long end, struct page **pages, int *nr)
  {
         unsigned long fault_pfn;
+       int nr_start = *nr;
+
+       fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+       if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
+               return 0;
  
-       fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-       return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+       if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
+               undo_dev_pagemap(nr, nr_start, pages);
+               return 0;
+       }
+       return 1;
  }
  
-static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
                 unsigned long end, struct page **pages, int *nr)
  {
         unsigned long fault_pfn;
+       int nr_start = *nr;
+
+       fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+       if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
+               return 0;
  
-       fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-       return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+       if (unlikely(pud_val(orig) != pud_val(*pudp))) {
+               undo_dev_pagemap(nr, nr_start, pages);
+               return 0;
+       }
+       return 1;
  }
  #else
-static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                 unsigned long end, struct page **pages, int *nr)
  {
         BUILD_BUG();
         return 0;
  }
  
-static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
                 unsigned long end, struct page **pages, int *nr)
  {
         BUILD_BUG();
@@ -1499,7 +1518,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                 return 0;
  
         if (pmd_devmap(orig))
-               return __gup_device_huge_pmd(orig, addr, end, pages, nr);
+               return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
  
         refs = 0;
         page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
@@ -1537,7 +1556,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
                 return 0;
  
         if (pud_devmap(orig))
-               return __gup_device_huge_pud(orig, addr, end, pages, nr);
+               return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr);
  
         refs = 0;
         page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
diff --git a/mm/hmm.c b/mm/hmm.c

index 486dc394a5a3cd1fe226e215717631619c8a4195..de7b6bf7720104fa9312dab54618d7cc425c3f05 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -35,15 +35,6 @@
  
  #define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
  
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-/*
- * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h
- */
-DEFINE_STATIC_KEY_FALSE(device_private_key);
-EXPORT_SYMBOL(device_private_key);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
-
  #if IS_ENABLED(CONFIG_HMM_MIRROR)
  static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
  
@@ -1167,7 +1158,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
         resource_size_t addr;
         int ret;
  
-       static_branch_enable(&device_private_key);
+       dev_pagemap_get_ops();
  
         devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem),
                                    GFP_KERNEL, dev_to_node(device));
@@ -1261,7 +1252,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
         if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY)
                 return ERR_PTR(-EINVAL);
  
-       static_branch_enable(&device_private_key);
+       dev_pagemap_get_ops();
  
         devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem),
                                    GFP_KERNEL, dev_to_node(device));
diff --git a/mm/internal.h b/mm/internal.h

index 62d8c34e63d54dbf45c2d0d4c3fd215d92933e2c..502d141897945dcfc41d346faf5b44daddc49878 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -168,9 +168,6 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
                                         gfp_t gfp_flags);
  extern int user_min_free_kbytes;
  
-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
  #if defined CONFIG_COMPACTION || defined CONFIG_CMA
  
  /*
@@ -498,6 +495,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
  #define ALLOC_HARDER           0x10 /* try to alloc harder */
  #define ALLOC_HIGH             0x20 /* __GFP_HIGH set */
  #define ALLOC_CPUSET           0x40 /* check for correct cpuset */
+#define ALLOC_CMA              0x80 /* allow allocations from CMA areas */
  
  enum ttu_flags;
  struct tlbflush_unmap_batch;
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c

index bc0e68f7dc756104ff47005a4f9403af7e08fe4e..f185455b34065d27efa2b6a90c9dd2c1dfe92ae9 100644 (file)
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -792,6 +792,40 @@ DEFINE_ASAN_SET_SHADOW(f5);
  DEFINE_ASAN_SET_SHADOW(f8);
  
  #ifdef CONFIG_MEMORY_HOTPLUG
+static bool shadow_mapped(unsigned long addr)
+{
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       if (pgd_none(*pgd))
+               return false;
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d))
+               return false;
+       pud = pud_offset(p4d, addr);
+       if (pud_none(*pud))
+               return false;
+
+       /*
+        * We can't use pud_large() or pud_huge(), the first one is
+        * arch-specific, the last one depends on HUGETLB_PAGE.  So let's abuse
+        * pud_bad(), if pud is bad then it's bad because it's huge.
+        */
+       if (pud_bad(*pud))
+               return true;
+       pmd = pmd_offset(pud, addr);
+       if (pmd_none(*pmd))
+               return false;
+
+       if (pmd_bad(*pmd))
+               return true;
+       pte = pte_offset_kernel(pmd, addr);
+       return !pte_none(*pte);
+}
+
  static int __meminit kasan_mem_notifier(struct notifier_block *nb,
                         unsigned long action, void *data)
  {
@@ -813,6 +847,14 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb,
         case MEM_GOING_ONLINE: {
                 void *ret;
  
+               /*
+                * If shadow is mapped already than it must have been mapped
+                * during the boot. This could happen if we onlining previously
+                * offlined memory.
+                */
+               if (shadow_mapped(shadow_start))
+                       return NOTIFY_OK;
+
                 ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
                                         shadow_end, GFP_KERNEL,
                                         PAGE_KERNEL, VM_NO_GUARD,
@@ -824,8 +866,26 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb,
                 kmemleak_ignore(ret);
                 return NOTIFY_OK;
         }
-       case MEM_OFFLINE:
-               vfree((void *)shadow_start);
+       case MEM_CANCEL_ONLINE:
+       case MEM_OFFLINE: {
+               struct vm_struct *vm;
+
+               /*
+                * shadow_start was either mapped during boot by kasan_init()
+                * or during memory online by __vmalloc_node_range().
+                * In the latter case we can use vfree() to free shadow.
+                * Non-NULL result of the find_vm_area() will tell us if
+                * that was the second case.
+                *
+                * Currently it's not possible to free shadow mapped
+                * during boot by kasan_init(). It's because the code
+                * to do that hasn't been written yet. So we'll just
+                * leak the memory.
+                */
+               vm = find_vm_area((void *)shadow_start);
+               if (vm)
+                       vfree((void *)shadow_start);
+       }
         }
  
         return NOTIFY_OK;
@@ -838,5 +898,5 @@ static int __init kasan_memhotplug_init(void)
         return 0;
  }
  
-module_init(kasan_memhotplug_init);
+core_initcall(kasan_memhotplug_init);
  #endif
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index f74826cdceea1112e648462393f46f034e4d46c2..25982467800b35756ad1eaf31a8782aaa52e0318 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1158,7 +1158,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
                  * nodes have to go through register_node.
                  * TODO clean up this mess.
                  */
-               ret = link_mem_sections(nid, start_pfn, nr_pages);
+               ret = link_mem_sections(nid, start_pfn, nr_pages, false);
  register_fail:
                 /*
                  * If sysfs file of new node can't create, cpu on the node
diff --git a/mm/mmap.c b/mm/mmap.c

index 78e14facdb6ed281f736450fb434905c0fc7a6c1..fc41c0543d7fab21542d2c314f40dfbea69ebbe3 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1327,7 +1327,7 @@ static inline int mlock_future_check(struct mm_struct *mm,
  static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
  {
         if (S_ISREG(inode->i_mode))
-               return inode->i_sb->s_maxbytes;
+               return MAX_LFS_FILESIZE;
  
         if (S_ISBLK(inode->i_mode))
                 return MAX_LFS_FILESIZE;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 905db9d7962fcb1776c0e7ffb1618fb6e4084a75..22320ea27489acdcce17eccdec5ef7cb95bfc782 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1743,38 +1743,16 @@ void __init page_alloc_init_late(void)
  }
  
  #ifdef CONFIG_CMA
-static void __init adjust_present_page_count(struct page *page, long count)
-{
-       struct zone *zone = page_zone(page);
-
-       /* We don't need to hold a lock since it is boot-up process */
-       zone->present_pages += count;
-}
-
  /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
  void __init init_cma_reserved_pageblock(struct page *page)
  {
         unsigned i = pageblock_nr_pages;
-       unsigned long pfn = page_to_pfn(page);
         struct page *p = page;
-       int nid = page_to_nid(page);
-
-       /*
-        * ZONE_MOVABLE will steal present pages from other zones by
-        * changing page links so page_zone() is changed. Before that,
-        * we need to adjust previous zone's page count first.
-        */
-       adjust_present_page_count(page, -pageblock_nr_pages);
  
         do {
                 __ClearPageReserved(p);
                 set_page_count(p, 0);
-
-               /* Steal pages from other zones */
-               set_page_links(p, ZONE_MOVABLE, nid, pfn);
-       } while (++p, ++pfn, --i);
-
-       adjust_present_page_count(page, pageblock_nr_pages);
+       } while (++p, --i);
  
         set_pageblock_migratetype(page, MIGRATE_CMA);
  
@@ -2889,7 +2867,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
                  * exists.
                  */
                 watermark = min_wmark_pages(zone) + (1UL << order);
-               if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+               if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
                         return 0;
  
                 __mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -3165,6 +3143,12 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
         }
  
  
+#ifdef CONFIG_CMA
+       /* If allocation can't use CMA areas don't use free CMA pages */
+       if (!(alloc_flags & ALLOC_CMA))
+               free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
+#endif
+
         /*
          * Check watermarks for an order-0 allocation request. If these
          * are not met, then a high-order request also cannot go ahead
@@ -3191,8 +3175,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                 }
  
  #ifdef CONFIG_CMA
-               if (!list_empty(&area->free_list[MIGRATE_CMA]))
+               if ((alloc_flags & ALLOC_CMA) &&
+                   !list_empty(&area->free_list[MIGRATE_CMA])) {
                         return true;
+               }
  #endif
                 if (alloc_harder &&
                         !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
@@ -3212,6 +3198,13 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
                 unsigned long mark, int classzone_idx, unsigned int alloc_flags)
  {
         long free_pages = zone_page_state(z, NR_FREE_PAGES);
+       long cma_pages = 0;
+
+#ifdef CONFIG_CMA
+       /* If allocation can't use CMA areas don't use free CMA pages */
+       if (!(alloc_flags & ALLOC_CMA))
+               cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
+#endif
  
         /*
          * Fast check for order-0 only. If this fails then the reserves
@@ -3220,7 +3213,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
          * the caller is !atomic then it'll uselessly search the free
          * list. That corner case is then slower but it is harmless.
          */
-       if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx])
+       if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
                 return true;
  
         return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
@@ -3856,6 +3849,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
         } else if (unlikely(rt_task(current)) && !in_interrupt())
                 alloc_flags |= ALLOC_HARDER;
  
+#ifdef CONFIG_CMA
+       if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+               alloc_flags |= ALLOC_CMA;
+#endif
         return alloc_flags;
  }
  
@@ -4322,6 +4319,9 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
         if (should_fail_alloc_page(gfp_mask, order))
                 return false;
  
+       if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
+               *alloc_flags |= ALLOC_CMA;
+
         return true;
  }
  
@@ -6204,7 +6204,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
  {
         enum zone_type j;
         int nid = pgdat->node_id;
-       unsigned long node_end_pfn = 0;
  
         pgdat_resize_init(pgdat);
  #ifdef CONFIG_NUMA_BALANCING
@@ -6232,13 +6231,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                 struct zone *zone = pgdat->node_zones + j;
                 unsigned long size, realsize, freesize, memmap_pages;
                 unsigned long zone_start_pfn = zone->zone_start_pfn;
-               unsigned long movable_size = 0;
  
                 size = zone->spanned_pages;
                 realsize = freesize = zone->present_pages;
-               if (zone_end_pfn(zone) > node_end_pfn)
-                       node_end_pfn = zone_end_pfn(zone);
-
  
                 /*
                  * Adjust freesize so that it accounts for how much memory
@@ -6287,30 +6282,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                 zone_seqlock_init(zone);
                 zone_pcp_init(zone);
  
-               /*
-                * The size of the CMA area is unknown now so we need to
-                * prepare the memory for the usemap at maximum.
-                */
-               if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE &&
-                       pgdat->node_spanned_pages) {
-                       movable_size = node_end_pfn - pgdat->node_start_pfn;
-               }
-
-               if (!size && !movable_size)
+               if (!size)
                         continue;
  
                 set_pageblock_order();
-               if (movable_size) {
-                       zone->zone_start_pfn = pgdat->node_start_pfn;
-                       zone->spanned_pages = movable_size;
-                       setup_usemap(pgdat, zone,
-                               pgdat->node_start_pfn, movable_size);
-                       init_currently_empty_zone(zone,
-                               pgdat->node_start_pfn, movable_size);
-               } else {
-                       setup_usemap(pgdat, zone, zone_start_pfn, size);
-                       init_currently_empty_zone(zone, zone_start_pfn, size);
-               }
+               setup_usemap(pgdat, zone, zone_start_pfn, size);
+               init_currently_empty_zone(zone, zone_start_pfn, size);
                 memmap_init(size, nid, j, zone_start_pfn);
         }
  }
@@ -7621,11 +7598,12 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
         unsigned long pfn, iter, found;
  
         /*
-        * For avoiding noise data, lru_add_drain_all() should be called
-        * If ZONE_MOVABLE, the zone never contains unmovable pages
+        * TODO we could make this much more efficient by not checking every
+        * page in the range if we know all of them are in MOVABLE_ZONE and
+        * that the movable zone guarantees that pages are migratable but
+        * the later is not the case right now unfortunatelly. E.g. movablecore
+        * can still lead to having bootmem allocations in zone_movable.
          */
-       if (zone_idx(zone) == ZONE_MOVABLE)
-               return false;
  
         /*
          * CMA allocations (alloc_contig_range) really need to mark isolate
@@ -7646,7 +7624,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                 page = pfn_to_page(check);
  
                 if (PageReserved(page))
-                       return true;
+                       goto unmovable;
  
                 /*
                  * Hugepages are not in LRU lists, but they're movable.
@@ -7696,9 +7674,12 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                  * page at boot.
                  */
                 if (found > count)
-                       return true;
+                       goto unmovable;
         }
         return false;
+unmovable:
+       WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
+       return true;
  }
  
  bool is_pageblock_removable_nolock(struct page *page)
@@ -7951,7 +7932,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
  }
  #endif
  
-#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA
+#ifdef CONFIG_MEMORY_HOTPLUG
  /*
   * The zone indicated has a new number of managed_pages; batch sizes and percpu
   * page high values need to be recalulated.
diff --git a/mm/swap.c b/mm/swap.c

index 3dd518832096ea94fe8a3ffa5d94c9ffd5752951..26fc9b5f1b6c1b4ad67226c19b1166c62616a9af 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -29,6 +29,7 @@
  #include <linux/cpu.h>
  #include <linux/notifier.h>
  #include <linux/backing-dev.h>
+#include <linux/memremap.h>
  #include <linux/memcontrol.h>
  #include <linux/gfp.h>
  #include <linux/uio.h>
@@ -743,7 +744,7 @@ void release_pages(struct page **pages, int nr)
                                                        flags);
                                 locked_pgdat = NULL;
                         }
-                       put_zone_device_private_or_public_page(page);
+                       put_devmap_managed_page(page);
                         continue;
                 }
  
diff --git a/mm/swapfile.c b/mm/swapfile.c

index cc2cf04d9018ad0c546648dfa6cb8167082bbc99..78a015fcec3b1efe29b56580eb3d4dc510243f6a 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3112,6 +3112,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
         unsigned long *frontswap_map = NULL;
         struct page *page = NULL;
         struct inode *inode = NULL;
+       bool inced_nr_rotate_swap = false;
  
         if (swap_flags & ~SWAP_FLAGS_VALID)
                 return -EINVAL;
@@ -3215,8 +3216,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                         cluster = per_cpu_ptr(p->percpu_cluster, cpu);
                         cluster_set_null(&cluster->index);
                 }
-       } else
+       } else {
                 atomic_inc(&nr_rotate_swap);
+               inced_nr_rotate_swap = true;
+       }
  
         error = swap_cgroup_swapon(p->type, maxpages);
         if (error)
@@ -3307,6 +3310,8 @@ bad_swap:
         vfree(swap_map);
         kvfree(cluster_info);
         kvfree(frontswap_map);
+       if (inced_nr_rotate_swap)
+               atomic_dec(&nr_rotate_swap);
         if (swap_file) {
                 if (inode && S_ISREG(inode->i_mode)) {
                         inode_unlock(inode);
diff --git a/net/9p/Kconfig b/net/9p/Kconfig

index e6014e0e51f7a212dd48cdefcb26dffb44b2fba4..46c39f7da444d0c5e27797041bf8cd140d0a1b26 100644 (file)
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -32,7 +32,7 @@ config NET_9P_XEN
  
  
  config NET_9P_RDMA
-       depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on INET && INFINIBAND_ADDR_TRANS
         tristate "9P RDMA Transport (Experimental)"
         help
           This builds support for an RDMA transport.
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c

index a11d3d89f0120a17c26f41a0559b6c40ebbbc6ca..a35f597e8c8bf4f15ad0f01aff29849d2bebe36f 100644 (file)
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1536,7 +1536,7 @@ out:
  
         if (!ret && primary_if)
                 *primary_if = hard_iface;
-       else
+       else if (hard_iface)
                 batadv_hardif_put(hard_iface);
  
         return ret;
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c

index 0225616d5771d0986127322142fc591780fc25b0..3986551397caa5ffb6ba7338eeb4769c8b8f99fb 100644 (file)
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -862,7 +862,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
         struct batadv_orig_node_vlan *vlan;
         u8 *tt_change_ptr;
  
-       rcu_read_lock();
+       spin_lock_bh(&orig_node->vlan_list_lock);
         hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
                 num_vlan++;
                 num_entries += atomic_read(&vlan->tt.num_entries);
@@ -900,7 +900,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
         *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
  
  out:
-       rcu_read_unlock();
+       spin_unlock_bh(&orig_node->vlan_list_lock);
         return tvlv_len;
  }
  
@@ -931,15 +931,20 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
         struct batadv_tvlv_tt_vlan_data *tt_vlan;
         struct batadv_softif_vlan *vlan;
         u16 num_vlan = 0;
-       u16 num_entries = 0;
+       u16 vlan_entries = 0;
+       u16 total_entries = 0;
         u16 tvlv_len;
         u8 *tt_change_ptr;
         int change_offset;
  
-       rcu_read_lock();
+       spin_lock_bh(&bat_priv->softif_vlan_list_lock);
         hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+               vlan_entries = atomic_read(&vlan->tt.num_entries);
+               if (vlan_entries < 1)
+                       continue;
+
                 num_vlan++;
-               num_entries += atomic_read(&vlan->tt.num_entries);
+               total_entries += vlan_entries;
         }
  
         change_offset = sizeof(**tt_data);
@@ -947,7 +952,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
  
         /* if tt_len is negative, allocate the space needed by the full table */
         if (*tt_len < 0)
-               *tt_len = batadv_tt_len(num_entries);
+               *tt_len = batadv_tt_len(total_entries);
  
         tvlv_len = *tt_len;
         tvlv_len += change_offset;
@@ -964,6 +969,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
  
         tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
         hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+               vlan_entries = atomic_read(&vlan->tt.num_entries);
+               if (vlan_entries < 1)
+                       continue;
+
                 tt_vlan->vid = htons(vlan->vid);
                 tt_vlan->crc = htonl(vlan->tt.crc);
  
@@ -974,7 +983,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
         *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
  
  out:
-       rcu_read_unlock();
+       spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
         return tvlv_len;
  }
  
@@ -1538,6 +1547,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
   *  handled by a given originator
   * @entry: the TT global entry to check
   * @orig_node: the originator to search in the list
+ * @flags: a pointer to store TT flags for the given @entry received
+ *  from @orig_node
   *
   * find out if an orig_node is already in the list of a tt_global_entry.
   *
@@ -1545,7 +1556,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
   */
  static bool
  batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
-                               const struct batadv_orig_node *orig_node)
+                               const struct batadv_orig_node *orig_node,
+                               u8 *flags)
  {
         struct batadv_tt_orig_list_entry *orig_entry;
         bool found = false;
@@ -1553,6 +1565,10 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
         orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node);
         if (orig_entry) {
                 found = true;
+
+               if (flags)
+                       *flags = orig_entry->flags;
+
                 batadv_tt_orig_list_entry_put(orig_entry);
         }
  
@@ -1731,7 +1747,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
                         if (!(common->flags & BATADV_TT_CLIENT_TEMP))
                                 goto out;
                         if (batadv_tt_global_entry_has_orig(tt_global_entry,
-                                                           orig_node))
+                                                           orig_node, NULL))
                                 goto out_remove;
                         batadv_tt_global_del_orig_list(tt_global_entry);
                         goto add_orig_entry;
@@ -2880,23 +2896,46 @@ unlock:
  }
  
  /**
- * batadv_tt_local_valid() - verify that given tt entry is a valid one
+ * batadv_tt_local_valid() - verify local tt entry and get flags
   * @entry_ptr: to be checked local tt entry
   * @data_ptr: not used but definition required to satisfy the callback prototype
+ * @flags: a pointer to store TT flags for this client to
+ *
+ * Checks the validity of the given local TT entry. If it is, then the provided
+ * flags pointer is updated.
   *
   * Return: true if the entry is a valid, false otherwise.
   */
-static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
+static bool batadv_tt_local_valid(const void *entry_ptr,
+                                 const void *data_ptr,
+                                 u8 *flags)
  {
         const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
  
         if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW)
                 return false;
+
+       if (flags)
+               *flags = tt_common_entry->flags;
+
         return true;
  }
  
+/**
+ * batadv_tt_global_valid() - verify global tt entry and get flags
+ * @entry_ptr: to be checked global tt entry
+ * @data_ptr: an orig_node object (may be NULL)
+ * @flags: a pointer to store TT flags for this client to
+ *
+ * Checks the validity of the given global TT entry. If it is, then the provided
+ * flags pointer is updated either with the common (summed) TT flags if data_ptr
+ * is NULL or the specific, per originator TT flags otherwise.
+ *
+ * Return: true if the entry is a valid, false otherwise.
+ */
  static bool batadv_tt_global_valid(const void *entry_ptr,
-                                  const void *data_ptr)
+                                  const void *data_ptr,
+                                  u8 *flags)
  {
         const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
         const struct batadv_tt_global_entry *tt_global_entry;
@@ -2910,7 +2949,8 @@ static bool batadv_tt_global_valid(const void *entry_ptr,
                                        struct batadv_tt_global_entry,
                                        common);
  
-       return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node);
+       return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node,
+                                              flags);
  }
  
  /**
@@ -2920,25 +2960,34 @@ static bool batadv_tt_global_valid(const void *entry_ptr,
   * @hash: hash table containing the tt entries
   * @tt_len: expected tvlv tt data buffer length in number of bytes
   * @tvlv_buff: pointer to the buffer to fill with the TT data
- * @valid_cb: function to filter tt change entries
+ * @valid_cb: function to filter tt change entries and to return TT flags
   * @cb_data: data passed to the filter function as argument
+ *
+ * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb
+ * is not provided then this becomes a no-op.
   */
  static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
                                     struct batadv_hashtable *hash,
                                     void *tvlv_buff, u16 tt_len,
                                     bool (*valid_cb)(const void *,
-                                                    const void *),
+                                                    const void *,
+                                                    u8 *flags),
                                     void *cb_data)
  {
         struct batadv_tt_common_entry *tt_common_entry;
         struct batadv_tvlv_tt_change *tt_change;
         struct hlist_head *head;
         u16 tt_tot, tt_num_entries = 0;
+       u8 flags;
+       bool ret;
         u32 i;
  
         tt_tot = batadv_tt_entries(tt_len);
         tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff;
  
+       if (!valid_cb)
+               return;
+
         rcu_read_lock();
         for (i = 0; i < hash->size; i++) {
                 head = &hash->table[i];
@@ -2948,11 +2997,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
                         if (tt_tot == tt_num_entries)
                                 break;
  
-                       if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data)))
+                       ret = valid_cb(tt_common_entry, cb_data, &flags);
+                       if (!ret)
                                 continue;
  
                         ether_addr_copy(tt_change->addr, tt_common_entry->addr);
-                       tt_change->flags = tt_common_entry->flags;
+                       tt_change->flags = flags;
                         tt_change->vid = htons(tt_common_entry->vid);
                         memset(tt_change->reserved, 0,
                                sizeof(tt_change->reserved));
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c

index 47ba98db145dd4ff05017e1f779b03e42ee19e98..46c1fe7637ea8f5f2b68c83998664fd37f090d49 100644 (file)
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -161,8 +161,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
         /* Make sure the match only receives stp frames */
         if (!par->nft_compat &&
             (!ether_addr_equal(e->destmac, eth_stp_addr) ||
-            !is_broadcast_ether_addr(e->destmsk) ||
-            !(e->bitmask & EBT_DESTMAC)))
+            !(e->bitmask & EBT_DESTMAC) ||
+            !is_broadcast_ether_addr(e->destmsk)))
                 return -EINVAL;
  
         return 0;
diff --git a/net/core/dev.c b/net/core/dev.c

index af0558b00c6c7d5831d0b9e5281d2d327e9773a5..2af787e8b1303a4257b738ccbfc76801e6cbc6d5 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2124,7 +2124,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
                 int i, j;
  
                 for (i = count, j = offset; i--; j++) {
-                       if (!remove_xps_queue(dev_maps, cpu, j))
+                       if (!remove_xps_queue(dev_maps, tci, j))
                                 break;
                 }
  
diff --git a/net/core/filter.c b/net/core/filter.c

index e77c30ca491df723bacf923ac2f328cc50a580b0..201ff36b17a8c89edb14fcf3295650cd8d57d942 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -481,11 +481,18 @@ do_pass:
  
  #define BPF_EMIT_JMP                                                   \
         do {                                                            \
+               const s32 off_min = S16_MIN, off_max = S16_MAX;         \
+               s32 off;                                                \
+                                                                       \
                 if (target >= len || target < 0)                        \
                         goto err;                                       \
-               insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0;   \
+               off = addrs ? addrs[target] - addrs[i] - 1 : 0;         \
                 /* Adjust pc relative offset for 2nd or 3rd insn. */    \
-               insn->off -= insn - tmp_insns;                          \
+               off -= insn - tmp_insns;                                \
+               /* Reject anything not fitting into insn->off. */       \
+               if (off < off_min || off > off_max)                     \
+                       goto err;                                       \
+               insn->off = off;                                        \
         } while (0)
  
                 case BPF_JMP | BPF_JA:
diff --git a/net/core/sock.c b/net/core/sock.c

index 6444525f610cf8039516744ad26aec58485b9b8a..3b6d02854e57736254975963c45369515f369ddc 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1606,7 +1606,7 @@ static void __sk_free(struct sock *sk)
         if (likely(sk->sk_net_refcnt))
                 sock_inuse_add(sock_net(sk), -1);
  
-       if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
+       if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
                 sock_diag_broadcast_destroy(sk);
         else
                 sk_destruct(sk);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c

index 84cd4e3fd01b1dec5ed4234291dde60d4f1d1d61..0d56e36a6db7b77dcdeb9697dd81bf62895e6e4c 100644 (file)
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -283,9 +283,7 @@ int dccp_disconnect(struct sock *sk, int flags)
  
         dccp_clear_xmit_timers(sk);
         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
-       ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
         dp->dccps_hc_rx_ccid = NULL;
-       dp->dccps_hc_tx_ccid = NULL;
  
         __skb_queue_purge(&sk->sk_receive_queue);
         __skb_queue_purge(&sk->sk_write_queue);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c

index adf50fbc4c13e7de8baa63881f2b5fc5314dc874..47725250b4cad75d4d37ad92075cc2117ae82c6f 100644 (file)
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -258,11 +258,13 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
  static int dsa_port_setup(struct dsa_port *dp)
  {
         struct dsa_switch *ds = dp->ds;
-       int err;
+       int err = 0;
  
         memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
  
-       err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
+       if (dp->type != DSA_PORT_TYPE_UNUSED)
+               err = devlink_port_register(ds->devlink, &dp->devlink_port,
+                                           dp->index);
         if (err)
                 return err;
  
@@ -293,7 +295,8 @@ static int dsa_port_setup(struct dsa_port *dp)
  
  static void dsa_port_teardown(struct dsa_port *dp)
  {
-       devlink_port_unregister(&dp->devlink_port);
+       if (dp->type != DSA_PORT_TYPE_UNUSED)
+               devlink_port_unregister(&dp->devlink_port);
  
         switch (dp->type) {
         case DSA_PORT_TYPE_UNUSED:
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c

index f05afaf3235c0500a9087eae6365b7001aa64663..e66172aaf241874ea5b4011c4d1ea3ed00ee8c3f 100644 (file)
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -326,10 +326,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
                                  u8 tos, int oif, struct net_device *dev,
                                  int rpf, struct in_device *idev, u32 *itag)
  {
+       struct net *net = dev_net(dev);
+       struct flow_keys flkeys;
         int ret, no_addr;
         struct fib_result res;
         struct flowi4 fl4;
-       struct net *net = dev_net(dev);
         bool dev_match;
  
         fl4.flowi4_oif = 0;
@@ -347,6 +348,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
         no_addr = idev->ifa_list == NULL;
  
         fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
+       if (!fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys)) {
+               fl4.flowi4_proto = 0;
+               fl4.fl4_sport = 0;
+               fl4.fl4_dport = 0;
+       }
  
         trace_fib_validate_source(dev, &fl4);
  
@@ -643,6 +649,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
         [RTA_ENCAP]             = { .type = NLA_NESTED },
         [RTA_UID]               = { .type = NLA_U32 },
         [RTA_MARK]              = { .type = NLA_U32 },
+       [RTA_TABLE]             = { .type = NLA_U32 },
  };
  
  static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c

index 9c169bb2444d5990c7562692ba1c92030898bca4..f200b304f76cb37c704e4bb84c1e9acd2f76df0e 100644 (file)
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -722,10 +722,12 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
                 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
                                     tunnel->index,
                                     truncate, true);
-       else
+       else if (tunnel->erspan_ver == 2)
                 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
                                        tunnel->dir, tunnel->hwid,
                                        truncate, true);
+       else
+               goto free_skb;
  
         tunnel->parms.o_flags &= ~TUNNEL_KEY;
         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index 83c73bab2c3de90254e11e4126d7cb77ed998f03..d54abc0978006169be261e6d77fa0b296e94b9c4 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1045,7 +1045,8 @@ alloc_new_skb:
                 if (copy > length)
                         copy = length;
  
-               if (!(rt->dst.dev->features&NETIF_F_SG)) {
+               if (!(rt->dst.dev->features&NETIF_F_SG) &&
+                   skb_tailroom(skb) >= copy) {
                         unsigned int off;
  
                         off = skb->len;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c

index 5ad2d8ed3a3fe2aa51d814af442df7ff5e074d3e..57bbb060faaf834bedabb8c23175a126df5910a6 100644 (file)
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -505,8 +505,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
         int err;
         int copied;
  
-       WARN_ON_ONCE(sk->sk_family == AF_INET6);
-
         err = -EAGAIN;
         skb = sock_dequeue_err_skb(sk);
         if (!skb)
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c

index 4fe97723b53f4deb5ebbb1d7633ceb1b4efc3af3..30221701614c599d380229c69b83558537c69de8 100644 (file)
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -43,7 +43,10 @@ mr_table_alloc(struct net *net, u32 id,
         write_pnet(&mrt->net, net);
  
         mrt->ops = *ops;
-       rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+       if (rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params)) {
+               kfree(mrt);
+               return NULL;
+       }
         INIT_LIST_HEAD(&mrt->mfc_cache_list);
         INIT_LIST_HEAD(&mrt->mfc_unres_queue);
  
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c

index 44b308d93ec2496899a74bcc378d6e50fbb41055..e85f35b89c49a676e2736f1809e26a14c40743b8 100644 (file)
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -34,6 +34,7 @@
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
  MODULE_DESCRIPTION("IPv4 packet filter");
+MODULE_ALIAS("ipt_icmp");
  
  void *ipt_alloc_initial_table(const struct xt_table *info)
  {
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c

index fd01f13c896a153c6ec54b2df5503da6c311cf4f..12843c9ef1421d204fba6bea42a85615e2e69cc7 100644 (file)
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -89,10 +89,10 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
                         return true ^ invert;
         }
  
+       memset(&flow, 0, sizeof(flow));
         flow.flowi4_iif = LOOPBACK_IFINDEX;
         flow.daddr = iph->saddr;
         flow.saddr = rpfilter_get_saddr(iph->daddr);
-       flow.flowi4_oif = 0;
         flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
         flow.flowi4_tos = RT_TOS(iph->tos);
         flow.flowi4_scope = RT_SCOPE_UNIVERSE;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index 29268efad24777b6839923e25d7bf138c9cf0643..2cfa1b518f8d6368a563c1ae14d7dff7ce43e473 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1961,8 +1961,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
         fl4.saddr = saddr;
         fl4.flowi4_uid = sock_net_uid(net, NULL);
  
-       if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+       if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
                 flkeys = &_flkeys;
+       } else {
+               fl4.flowi4_proto = 0;
+               fl4.fl4_sport = 0;
+               fl4.fl4_dport = 0;
+       }
  
         err = fib_lookup(net, &fl4, res, 0);
         if (err != 0) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 383cac0ff0ec059ca7dbc1a6304cc7f8183e008d..d07e34f8e3091144976358674b92458076f92bfb 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2833,8 +2833,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
                 return -EBUSY;
  
         if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
-               if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
-                       BUG();
+               if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
+                       WARN_ON_ONCE(1);
+                       return -EINVAL;
+               }
                 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
                         return -ENOMEM;
         }
@@ -3342,6 +3344,7 @@ static void tcp_connect_init(struct sock *sk)
         sock_reset_flag(sk, SOCK_DONE);
         tp->snd_wnd = 0;
         tcp_init_wl(tp, 0);
+       tcp_write_queue_purge(sk);
         tp->snd_una = tp->write_seq;
         tp->snd_sml = tp->write_seq;
         tp->snd_up = tp->write_seq;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c

index 69727bc168cb027009dac95431e40b71291697da..458de353f5d9618f31d3fa28b7ae71c5a246f4a9 100644 (file)
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -71,6 +71,7 @@ struct ip6gre_net {
         struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
  
         struct ip6_tnl __rcu *collect_md_tun;
+       struct ip6_tnl __rcu *collect_md_tun_erspan;
         struct net_device *fb_tunnel_dev;
  };
  
@@ -81,6 +82,7 @@ static int ip6gre_tunnel_init(struct net_device *dev);
  static void ip6gre_tunnel_setup(struct net_device *dev);
  static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
  static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu);
  
  /* Tunnel hash table */
  
@@ -232,7 +234,12 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
         if (cand)
                 return cand;
  
-       t = rcu_dereference(ign->collect_md_tun);
+       if (gre_proto == htons(ETH_P_ERSPAN) ||
+           gre_proto == htons(ETH_P_ERSPAN2))
+               t = rcu_dereference(ign->collect_md_tun_erspan);
+       else
+               t = rcu_dereference(ign->collect_md_tun);
+
         if (t && t->dev->flags & IFF_UP)
                 return t;
  
@@ -261,6 +268,31 @@ static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
         return &ign->tunnels[prio][h];
  }
  
+static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+       if (t->parms.collect_md)
+               rcu_assign_pointer(ign->collect_md_tun, t);
+}
+
+static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+       if (t->parms.collect_md)
+               rcu_assign_pointer(ign->collect_md_tun_erspan, t);
+}
+
+static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+       if (t->parms.collect_md)
+               rcu_assign_pointer(ign->collect_md_tun, NULL);
+}
+
+static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign,
+                                      struct ip6_tnl *t)
+{
+       if (t->parms.collect_md)
+               rcu_assign_pointer(ign->collect_md_tun_erspan, NULL);
+}
+
  static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
                 const struct ip6_tnl *t)
  {
@@ -271,9 +303,6 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
  {
         struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
  
-       if (t->parms.collect_md)
-               rcu_assign_pointer(ign->collect_md_tun, t);
-
         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
         rcu_assign_pointer(*tp, t);
  }
@@ -283,9 +312,6 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
         struct ip6_tnl __rcu **tp;
         struct ip6_tnl *iter;
  
-       if (t->parms.collect_md)
-               rcu_assign_pointer(ign->collect_md_tun, NULL);
-
         for (tp = ip6gre_bucket(ign, t);
              (iter = rtnl_dereference(*tp)) != NULL;
              tp = &iter->next) {
@@ -374,11 +400,23 @@ failed_free:
         return NULL;
  }
  
+static void ip6erspan_tunnel_uninit(struct net_device *dev)
+{
+       struct ip6_tnl *t = netdev_priv(dev);
+       struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+
+       ip6erspan_tunnel_unlink_md(ign, t);
+       ip6gre_tunnel_unlink(ign, t);
+       dst_cache_reset(&t->dst_cache);
+       dev_put(dev);
+}
+
  static void ip6gre_tunnel_uninit(struct net_device *dev)
  {
         struct ip6_tnl *t = netdev_priv(dev);
         struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
  
+       ip6gre_tunnel_unlink_md(ign, t);
         ip6gre_tunnel_unlink(ign, t);
         dst_cache_reset(&t->dst_cache);
         dev_put(dev);
@@ -698,6 +736,9 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
         else
                 fl6->daddr = tunnel->parms.raddr;
  
+       if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+               return -ENOMEM;
+
         /* Push GRE header. */
         protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
  
@@ -908,7 +949,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                 truncate = true;
         }
  
-       if (skb_cow_head(skb, dev->needed_headroom))
+       if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
                 goto tx_err;
  
         t->parms.o_flags &= ~TUNNEL_KEY;
@@ -979,11 +1020,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                         erspan_build_header(skb, ntohl(t->parms.o_key),
                                             t->parms.index,
                                             truncate, false);
-               else
+               else if (t->parms.erspan_ver == 2)
                         erspan_build_header_v2(skb, ntohl(t->parms.o_key),
                                                t->parms.dir,
                                                t->parms.hwid,
                                                truncate, false);
+               else
+                       goto tx_err;
+
                 fl6.daddr = t->parms.raddr;
         }
  
@@ -1019,12 +1063,11 @@ tx_err:
         return NETDEV_TX_OK;
  }
  
-static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+static void ip6gre_tnl_link_config_common(struct ip6_tnl *t)
  {
         struct net_device *dev = t->dev;
         struct __ip6_tnl_parm *p = &t->parms;
         struct flowi6 *fl6 = &t->fl.u.ip6;
-       int t_hlen;
  
         if (dev->type != ARPHRD_ETHER) {
                 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1051,12 +1094,13 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
                 dev->flags |= IFF_POINTOPOINT;
         else
                 dev->flags &= ~IFF_POINTOPOINT;
+}
  
-       t->tun_hlen = gre_calc_hlen(t->parms.o_flags);
-
-       t->hlen = t->encap_hlen + t->tun_hlen;
-
-       t_hlen = t->hlen + sizeof(struct ipv6hdr);
+static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
+                                        int t_hlen)
+{
+       const struct __ip6_tnl_parm *p = &t->parms;
+       struct net_device *dev = t->dev;
  
         if (p->flags & IP6_TNL_F_CAP_XMIT) {
                 int strict = (ipv6_addr_type(&p->raddr) &
@@ -1088,8 +1132,26 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
         }
  }
  
-static int ip6gre_tnl_change(struct ip6_tnl *t,
-       const struct __ip6_tnl_parm *p, int set_mtu)
+static int ip6gre_calc_hlen(struct ip6_tnl *tunnel)
+{
+       int t_hlen;
+
+       tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+       tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+
+       t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+       tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+       return t_hlen;
+}
+
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+       ip6gre_tnl_link_config_common(t);
+       ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t));
+}
+
+static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
+                                    const struct __ip6_tnl_parm *p)
  {
         t->parms.laddr = p->laddr;
         t->parms.raddr = p->raddr;
@@ -1105,6 +1167,12 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
         t->parms.o_flags = p->o_flags;
         t->parms.fwmark = p->fwmark;
         dst_cache_reset(&t->dst_cache);
+}
+
+static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p,
+                            int set_mtu)
+{
+       ip6gre_tnl_copy_tnl_parm(t, p);
         ip6gre_tnl_link_config(t, set_mtu);
         return 0;
  }
@@ -1381,11 +1449,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
                 return ret;
         }
  
-       tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
-       tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
-       t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
-
-       dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+       t_hlen = ip6gre_calc_hlen(tunnel);
         dev->mtu = ETH_DATA_LEN - t_hlen;
         if (dev->type == ARPHRD_ETHER)
                 dev->mtu -= ETH_HLEN;
@@ -1728,6 +1792,19 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
         .ndo_get_iflink = ip6_tnl_get_iflink,
  };
  
+static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel)
+{
+       int t_hlen;
+
+       tunnel->tun_hlen = 8;
+       tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+                      erspan_hdr_len(tunnel->parms.erspan_ver);
+
+       t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+       tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+       return t_hlen;
+}
+
  static int ip6erspan_tap_init(struct net_device *dev)
  {
         struct ip6_tnl *tunnel;
@@ -1751,12 +1828,7 @@ static int ip6erspan_tap_init(struct net_device *dev)
                 return ret;
         }
  
-       tunnel->tun_hlen = 8;
-       tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
-                      erspan_hdr_len(tunnel->parms.erspan_ver);
-       t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
-
-       dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+       t_hlen = ip6erspan_calc_hlen(tunnel);
         dev->mtu = ETH_DATA_LEN - t_hlen;
         if (dev->type == ARPHRD_ETHER)
                 dev->mtu -= ETH_HLEN;
@@ -1764,14 +1836,14 @@ static int ip6erspan_tap_init(struct net_device *dev)
                 dev->mtu -= 8;
  
         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
-       ip6gre_tnl_link_config(tunnel, 1);
+       ip6erspan_tnl_link_config(tunnel, 1);
  
         return 0;
  }
  
  static const struct net_device_ops ip6erspan_netdev_ops = {
         .ndo_init =             ip6erspan_tap_init,
-       .ndo_uninit =           ip6gre_tunnel_uninit,
+       .ndo_uninit =           ip6erspan_tunnel_uninit,
         .ndo_start_xmit =       ip6erspan_tunnel_xmit,
         .ndo_set_mac_address =  eth_mac_addr,
         .ndo_validate_addr =    eth_validate_addr,
@@ -1835,13 +1907,11 @@ static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
         return ret;
  }
  
-static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
-                         struct nlattr *tb[], struct nlattr *data[],
-                         struct netlink_ext_ack *extack)
+static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
+                                struct nlattr *tb[], struct nlattr *data[],
+                                struct netlink_ext_ack *extack)
  {
         struct ip6_tnl *nt;
-       struct net *net = dev_net(dev);
-       struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
         struct ip_tunnel_encap ipencap;
         int err;
  
@@ -1854,16 +1924,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
                         return err;
         }
  
-       ip6gre_netlink_parms(data, &nt->parms);
-
-       if (nt->parms.collect_md) {
-               if (rtnl_dereference(ign->collect_md_tun))
-                       return -EEXIST;
-       } else {
-               if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
-                       return -EEXIST;
-       }
-
         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
                 eth_hw_addr_random(dev);
  
@@ -1874,51 +1934,94 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
         if (err)
                 goto out;
  
-       ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
-
         if (tb[IFLA_MTU])
                 ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
  
         dev_hold(dev);
-       ip6gre_tunnel_link(ign, nt);
  
  out:
         return err;
  }
  
-static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
-                            struct nlattr *data[],
-                            struct netlink_ext_ack *extack)
+static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
+                         struct nlattr *tb[], struct nlattr *data[],
+                         struct netlink_ext_ack *extack)
+{
+       struct ip6_tnl *nt = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+       struct ip6gre_net *ign;
+       int err;
+
+       ip6gre_netlink_parms(data, &nt->parms);
+       ign = net_generic(net, ip6gre_net_id);
+
+       if (nt->parms.collect_md) {
+               if (rtnl_dereference(ign->collect_md_tun))
+                       return -EEXIST;
+       } else {
+               if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+                       return -EEXIST;
+       }
+
+       err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+       if (!err) {
+               ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+               ip6gre_tunnel_link_md(ign, nt);
+               ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
+       }
+       return err;
+}
+
+static struct ip6_tnl *
+ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[],
+                        struct nlattr *data[], struct __ip6_tnl_parm *p_p,
+                        struct netlink_ext_ack *extack)
  {
         struct ip6_tnl *t, *nt = netdev_priv(dev);
         struct net *net = nt->net;
         struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
-       struct __ip6_tnl_parm p;
         struct ip_tunnel_encap ipencap;
  
         if (dev == ign->fb_tunnel_dev)
-               return -EINVAL;
+               return ERR_PTR(-EINVAL);
  
         if (ip6gre_netlink_encap_parms(data, &ipencap)) {
                 int err = ip6_tnl_encap_setup(nt, &ipencap);
  
                 if (err < 0)
-                       return err;
+                       return ERR_PTR(err);
         }
  
-       ip6gre_netlink_parms(data, &p);
+       ip6gre_netlink_parms(data, p_p);
  
-       t = ip6gre_tunnel_locate(net, &p, 0);
+       t = ip6gre_tunnel_locate(net, p_p, 0);
  
         if (t) {
                 if (t->dev != dev)
-                       return -EEXIST;
+                       return ERR_PTR(-EEXIST);
         } else {
                 t = nt;
         }
  
+       return t;
+}
+
+static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+                            struct nlattr *data[],
+                            struct netlink_ext_ack *extack)
+{
+       struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+       struct __ip6_tnl_parm p;
+       struct ip6_tnl *t;
+
+       t = ip6gre_changelink_common(dev, tb, data, &p, extack);
+       if (IS_ERR(t))
+               return PTR_ERR(t);
+
+       ip6gre_tunnel_unlink_md(ign, t);
         ip6gre_tunnel_unlink(ign, t);
         ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+       ip6gre_tunnel_link_md(ign, t);
         ip6gre_tunnel_link(ign, t);
         return 0;
  }
@@ -2068,6 +2171,69 @@ static void ip6erspan_tap_setup(struct net_device *dev)
         netif_keep_dst(dev);
  }
  
+static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
+                            struct nlattr *tb[], struct nlattr *data[],
+                            struct netlink_ext_ack *extack)
+{
+       struct ip6_tnl *nt = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+       struct ip6gre_net *ign;
+       int err;
+
+       ip6gre_netlink_parms(data, &nt->parms);
+       ign = net_generic(net, ip6gre_net_id);
+
+       if (nt->parms.collect_md) {
+               if (rtnl_dereference(ign->collect_md_tun_erspan))
+                       return -EEXIST;
+       } else {
+               if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+                       return -EEXIST;
+       }
+
+       err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+       if (!err) {
+               ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]);
+               ip6erspan_tunnel_link_md(ign, nt);
+               ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
+       }
+       return err;
+}
+
+static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+       ip6gre_tnl_link_config_common(t);
+       ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t));
+}
+
+static int ip6erspan_tnl_change(struct ip6_tnl *t,
+                               const struct __ip6_tnl_parm *p, int set_mtu)
+{
+       ip6gre_tnl_copy_tnl_parm(t, p);
+       ip6erspan_tnl_link_config(t, set_mtu);
+       return 0;
+}
+
+static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
+                               struct nlattr *data[],
+                               struct netlink_ext_ack *extack)
+{
+       struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+       struct __ip6_tnl_parm p;
+       struct ip6_tnl *t;
+
+       t = ip6gre_changelink_common(dev, tb, data, &p, extack);
+       if (IS_ERR(t))
+               return PTR_ERR(t);
+
+       ip6gre_tunnel_unlink_md(ign, t);
+       ip6gre_tunnel_unlink(ign, t);
+       ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
+       ip6erspan_tunnel_link_md(ign, t);
+       ip6gre_tunnel_link(ign, t);
+       return 0;
+}
+
  static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
         .kind           = "ip6gre",
         .maxtype        = IFLA_GRE_MAX,
@@ -2104,8 +2270,8 @@ static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
         .priv_size      = sizeof(struct ip6_tnl),
         .setup          = ip6erspan_tap_setup,
         .validate       = ip6erspan_tap_validate,
-       .newlink        = ip6gre_newlink,
-       .changelink     = ip6gre_changelink,
+       .newlink        = ip6erspan_newlink,
+       .changelink     = ip6erspan_changelink,
         .get_size       = ip6gre_get_size,
         .fill_info      = ip6gre_fill_info,
         .get_link_net   = ip6_tnl_get_link_net,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

index 2e891d2c30efd3909e42810d3debba677776dd94..7b6d1689087bd58db1e92fc8991d84ae205629f0 100644 (file)
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1503,7 +1503,8 @@ alloc_new_skb:
                 if (copy > length)
                         copy = length;
  
-               if (!(rt->dst.dev->features&NETIF_F_SG)) {
+               if (!(rt->dst.dev->features&NETIF_F_SG) &&
+                   skb_tailroom(skb) >= copy) {
                         unsigned int off;
  
                         off = skb->len;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c

index 65c9e1a583056aa07b014ad4f701a2a234b9a26e..97f79dc943d793956040767cd649cd13c5bf4f37 100644 (file)
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -38,6 +38,7 @@
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
  MODULE_DESCRIPTION("IPv6 packet filter");
+MODULE_ALIAS("ip6t_icmp6");
  
  void *ip6t_alloc_initial_table(const struct xt_table *info)
  {
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c

index 0f6c9ca590628ccfaff872ec89c7e5bc9149424e..5b5b0f95ffd13ecef6feeab6a0f2d64e36ba6561 100644 (file)
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -401,7 +401,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta)
  
  static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
                                struct sta_info *sta,
-                              struct ieee802_11_elems *elems, bool insert)
+                              struct ieee802_11_elems *elems)
  {
         struct ieee80211_local *local = sdata->local;
         struct ieee80211_supported_band *sband;
@@ -447,7 +447,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
                 sta->sta.bandwidth = IEEE80211_STA_RX_BW_20;
         }
  
-       if (insert)
+       if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
                 rate_control_rate_init(sta);
         else
                 rate_control_rate_update(local, sband, sta, changed);
@@ -551,7 +551,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
         rcu_read_lock();
         sta = sta_info_get(sdata, addr);
         if (sta) {
-               mesh_sta_info_init(sdata, sta, elems, false);
+               mesh_sta_info_init(sdata, sta, elems);
         } else {
                 rcu_read_unlock();
                 /* can't run atomic */
@@ -561,7 +561,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
                         return NULL;
                 }
  
-               mesh_sta_info_init(sdata, sta, elems, true);
+               mesh_sta_info_init(sdata, sta, elems);
  
                 if (sta_info_insert_rcu(sta))
                         return NULL;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c

index 0f6b8172fb9ab1bed02439b130b306b42e22adf6..206fb2c4c319da69c6016920955f1509ee5cff27 100644 (file)
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -585,7 +585,8 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
  EXPORT_SYMBOL(nf_nat_decode_session_hook);
  #endif
  
-static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
+static void __net_init
+__netfilter_net_init(struct nf_hook_entries __rcu **e, int max)
  {
         int h;
  
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c

index 370abbf6f4217362ffe53e766964015776cc976f..75de46576f5100d40ce9edf5ddb3ce7818447a53 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -232,7 +232,10 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
  static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
  {
         unsigned int hash;
-       bool ret;
+       bool ret = false;
+
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               return refcount_dec_if_one(&cp->refcnt);
  
         hash = ip_vs_conn_hashkey_conn(cp);
  
@@ -240,15 +243,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
         spin_lock(&cp->lock);
  
         if (cp->flags & IP_VS_CONN_F_HASHED) {
-               ret = false;
                 /* Decrease refcnt and unlink conn only if we are last user */
                 if (refcount_dec_if_one(&cp->refcnt)) {
                         hlist_del_rcu(&cp->c_list);
                         cp->flags &= ~IP_VS_CONN_F_HASHED;
                         ret = true;
                 }
-       } else
-               ret = refcount_read(&cp->refcnt) ? false : true;
+       }
  
         spin_unlock(&cp->lock);
         ct_write_unlock_bh(hash);
@@ -454,12 +455,6 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
  }
  EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
  
-static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
-{
-       __ip_vs_conn_put(cp);
-       ip_vs_conn_expire(&cp->timer);
-}
-
  /*
   *      Put back the conn and restart its timer with its timeout
   */
@@ -478,7 +473,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
             (refcount_read(&cp->refcnt) == 1) &&
             !timer_pending(&cp->timer))
                 /* expire connection immediately */
-               __ip_vs_conn_put_notimer(cp);
+               ip_vs_conn_expire(&cp->timer);
         else
                 __ip_vs_conn_put_timer(cp);
  }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c

index 5f6f73cf2174d1494a685d73ca94ea124da83de5..0679dd101e72af062b0a167f817ae43904eb3da8 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -119,6 +119,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                 struct ip_vs_cpu_stats *s;
                 struct ip_vs_service *svc;
  
+               local_bh_disable();
+
                 s = this_cpu_ptr(dest->stats.cpustats);
                 u64_stats_update_begin(&s->syncp);
                 s->cnt.inpkts++;
@@ -137,6 +139,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                 s->cnt.inpkts++;
                 s->cnt.inbytes += skb->len;
                 u64_stats_update_end(&s->syncp);
+
+               local_bh_enable();
         }
  }
  
@@ -151,6 +155,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                 struct ip_vs_cpu_stats *s;
                 struct ip_vs_service *svc;
  
+               local_bh_disable();
+
                 s = this_cpu_ptr(dest->stats.cpustats);
                 u64_stats_update_begin(&s->syncp);
                 s->cnt.outpkts++;
@@ -169,6 +175,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                 s->cnt.outpkts++;
                 s->cnt.outbytes += skb->len;
                 u64_stats_update_end(&s->syncp);
+
+               local_bh_enable();
         }
  }
  
@@ -179,6 +187,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
         struct netns_ipvs *ipvs = svc->ipvs;
         struct ip_vs_cpu_stats *s;
  
+       local_bh_disable();
+
         s = this_cpu_ptr(cp->dest->stats.cpustats);
         u64_stats_update_begin(&s->syncp);
         s->cnt.conns++;
@@ -193,6 +203,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
         u64_stats_update_begin(&s->syncp);
         s->cnt.conns++;
         u64_stats_update_end(&s->syncp);
+
+       local_bh_enable();
  }
  
  
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c

index e97cdc1cf98c2618dffe838c4c7ef395e03ed8e0..8e67910185a05717628837d34c7e4fe829ca7424 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -981,6 +981,17 @@ static int tcp_packet(struct nf_conn *ct,
                         return NF_ACCEPT; /* Don't change state */
                 }
                 break;
+       case TCP_CONNTRACK_SYN_SENT2:
+               /* tcp_conntracks table is not smart enough to handle
+                * simultaneous open.
+                */
+               ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
+               break;
+       case TCP_CONNTRACK_SYN_RECV:
+               if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
+                   ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
+                       new_state = TCP_CONNTRACK_ESTABLISHED;
+               break;
         case TCP_CONNTRACK_CLOSE:
                 if (index == TCP_RST_SET
                     && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index 04d4e377258409dbd2b554a139215c6a8c4d8b51..91e80aa852d630b01ceede24f79dedbeca166ece 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -214,6 +214,34 @@ static int nft_delchain(struct nft_ctx *ctx)
         return err;
  }
  
+static void nft_rule_expr_activate(const struct nft_ctx *ctx,
+                                  struct nft_rule *rule)
+{
+       struct nft_expr *expr;
+
+       expr = nft_expr_first(rule);
+       while (expr != nft_expr_last(rule) && expr->ops) {
+               if (expr->ops->activate)
+                       expr->ops->activate(ctx, expr);
+
+               expr = nft_expr_next(expr);
+       }
+}
+
+static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
+                                    struct nft_rule *rule)
+{
+       struct nft_expr *expr;
+
+       expr = nft_expr_first(rule);
+       while (expr != nft_expr_last(rule) && expr->ops) {
+               if (expr->ops->deactivate)
+                       expr->ops->deactivate(ctx, expr);
+
+               expr = nft_expr_next(expr);
+       }
+}
+
  static int
  nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
  {
@@ -259,6 +287,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
                 nft_trans_destroy(trans);
                 return err;
         }
+       nft_rule_expr_deactivate(ctx, rule);
  
         return 0;
  }
@@ -2238,6 +2267,13 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
         kfree(rule);
  }
  
+static void nf_tables_rule_release(const struct nft_ctx *ctx,
+                                  struct nft_rule *rule)
+{
+       nft_rule_expr_deactivate(ctx, rule);
+       nf_tables_rule_destroy(ctx, rule);
+}
+
  #define NFT_RULE_MAXEXPRS      128
  
  static struct nft_expr_info *info;
@@ -2402,7 +2438,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
         return 0;
  
  err2:
-       nf_tables_rule_destroy(&ctx, rule);
+       nf_tables_rule_release(&ctx, rule);
  err1:
         for (i = 0; i < n; i++) {
                 if (info[i].ops != NULL)
@@ -4044,8 +4080,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                         if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
                             nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
                             nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
-                           nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
-                               return -EBUSY;
+                           nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) {
+                               err = -EBUSY;
+                               goto err5;
+                       }
                         if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
                              nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
                              memcmp(nft_set_ext_data(ext),
@@ -4130,7 +4168,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
   *     NFT_GOTO verdicts. This function must be called on active data objects
   *     from the second phase of the commit protocol.
   */
-static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
  {
         if (type == NFT_DATA_VERDICT) {
                 switch (data->verdict.code) {
@@ -5761,7 +5799,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
         }
  }
  
-static void nf_tables_commit_release(struct nft_trans *trans)
+static void nft_commit_release(struct nft_trans *trans)
  {
         switch (trans->msg_type) {
         case NFT_MSG_DELTABLE:
@@ -5790,6 +5828,21 @@ static void nf_tables_commit_release(struct nft_trans *trans)
         kfree(trans);
  }
  
+static void nf_tables_commit_release(struct net *net)
+{
+       struct nft_trans *trans, *next;
+
+       if (list_empty(&net->nft.commit_list))
+               return;
+
+       synchronize_rcu();
+
+       list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+               list_del(&trans->list);
+               nft_commit_release(trans);
+       }
+}
+
  static int nf_tables_commit(struct net *net, struct sk_buff *skb)
  {
         struct nft_trans *trans, *next;
@@ -5920,13 +5973,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                 }
         }
  
-       synchronize_rcu();
-
-       list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
-               list_del(&trans->list);
-               nf_tables_commit_release(trans);
-       }
-
+       nf_tables_commit_release(net);
         nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
  
         return 0;
@@ -6006,10 +6053,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
                 case NFT_MSG_NEWRULE:
                         trans->ctx.chain->use--;
                         list_del_rcu(&nft_trans_rule(trans)->list);
+                       nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans));
                         break;
                 case NFT_MSG_DELRULE:
                         trans->ctx.chain->use++;
                         nft_clear(trans->ctx.net, nft_trans_rule(trans));
+                       nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
                         nft_trans_destroy(trans);
                         break;
                 case NFT_MSG_NEWSET:
@@ -6585,7 +6634,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
         list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
                 list_del(&rule->list);
                 ctx->chain->use--;
-               nf_tables_rule_destroy(ctx, rule);
+               nf_tables_rule_release(ctx, rule);
         }
         list_del(&ctx->chain->list);
         ctx->table->use--;
@@ -6623,7 +6672,7 @@ static void __nft_release_tables(struct net *net)
                         list_for_each_entry_safe(rule, nr, &chain->rules, list) {
                                 list_del(&rule->list);
                                 chain->use--;
-                               nf_tables_rule_destroy(&ctx, rule);
+                               nf_tables_rule_release(&ctx, rule);
                         }
                 }
                 list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c

index dfd0bf3810d2e81690a4a2d3ac5648cb8d881090..942702a2776f6a57313b07c7f88bb67f98710016 100644 (file)
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -119,15 +119,22 @@ DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
  static noinline void nft_update_chain_stats(const struct nft_chain *chain,
                                             const struct nft_pktinfo *pkt)
  {
+       struct nft_base_chain *base_chain;
         struct nft_stats *stats;
  
-       local_bh_disable();
-       stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
-       u64_stats_update_begin(&stats->syncp);
-       stats->pkts++;
-       stats->bytes += pkt->skb->len;
-       u64_stats_update_end(&stats->syncp);
-       local_bh_enable();
+       base_chain = nft_base_chain(chain);
+       if (!base_chain->stats)
+               return;
+
+       stats = this_cpu_ptr(rcu_dereference(base_chain->stats));
+       if (stats) {
+               local_bh_disable();
+               u64_stats_update_begin(&stats->syncp);
+               stats->pkts++;
+               stats->bytes += pkt->skb->len;
+               u64_stats_update_end(&stats->syncp);
+               local_bh_enable();
+       }
  }
  
  struct nft_jumpstack {
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c

index b9505bcd3827d9604bfcf4371e807ede4f09f93f..6ddf89183e7b47e6c029b28cf5b524c73a790498 100644 (file)
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -115,7 +115,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
                 nfacct->flags = flags;
         }
  
-       strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
+       nla_strlcpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
  
         if (tb[NFACCT_BYTES]) {
                 atomic64_set(&nfacct->bytes,
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c

index 4a4b293fb2e54cbcf5df1ccd1c2b9950770bf4d8..fa026b269b3691d5186e28020eb2b08e93dc3679 100644 (file)
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -149,8 +149,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
             !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
                 return -EINVAL;
  
-       strncpy(expect_policy->name,
-               nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN);
+       nla_strlcpy(expect_policy->name,
+                   nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN);
         expect_policy->max_expected =
                 ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
         if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
@@ -234,7 +234,8 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
         if (ret < 0)
                 goto err1;
  
-       strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
+       nla_strlcpy(helper->name,
+                   nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
         size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
         if (size > FIELD_SIZEOF(struct nf_conn_help, data)) {
                 ret = -ENOMEM;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c

index 8e23726b90810d3a7d49c145e5b735245d669b57..1d99a1efdafcda5e209eadb975c4e53c859d5495 100644 (file)
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -27,14 +27,31 @@ struct nft_xt {
         struct list_head        head;
         struct nft_expr_ops     ops;
         unsigned int            refcnt;
+
+       /* Unlike other expressions, ops doesn't have static storage duration.
+        * nft core assumes they do.  We use kfree_rcu so that nft core can
+        * can check expr->ops->size even after nft_compat->destroy() frees
+        * the nft_xt struct that holds the ops structure.
+        */
+       struct rcu_head         rcu_head;
+};
+
+/* Used for matches where *info is larger than X byte */
+#define NFT_MATCH_LARGE_THRESH 192
+
+struct nft_xt_match_priv {
+       void *info;
  };
  
-static void nft_xt_put(struct nft_xt *xt)
+static bool nft_xt_put(struct nft_xt *xt)
  {
         if (--xt->refcnt == 0) {
                 list_del(&xt->head);
-               kfree(xt);
+               kfree_rcu(xt, rcu_head);
+               return true;
         }
+
+       return false;
  }
  
  static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -226,6 +243,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
         struct xt_target *target = expr->ops->data;
         struct xt_tgchk_param par;
         size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
+       struct nft_xt *nft_xt;
         u16 proto = 0;
         bool inv = false;
         union nft_entry e = {};
@@ -236,25 +254,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
         if (ctx->nla[NFTA_RULE_COMPAT]) {
                 ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
                 if (ret < 0)
-                       goto err;
+                       return ret;
         }
  
         nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
  
         ret = xt_check_target(&par, size, proto, inv);
         if (ret < 0)
-               goto err;
+               return ret;
  
         /* The standard target cannot be used */
-       if (target->target == NULL) {
-               ret = -EINVAL;
-               goto err;
-       }
+       if (!target->target)
+               return -EINVAL;
  
+       nft_xt = container_of(expr->ops, struct nft_xt, ops);
+       nft_xt->refcnt++;
         return 0;
-err:
-       module_put(target->me);
-       return ret;
  }
  
  static void
@@ -271,8 +286,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
         if (par.target->destroy != NULL)
                 par.target->destroy(&par);
  
-       nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
-       module_put(target->me);
+       if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
+               module_put(target->me);
  }
  
  static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -316,11 +331,11 @@ static int nft_target_validate(const struct nft_ctx *ctx,
         return 0;
  }
  
-static void nft_match_eval(const struct nft_expr *expr,
-                          struct nft_regs *regs,
-                          const struct nft_pktinfo *pkt)
+static void __nft_match_eval(const struct nft_expr *expr,
+                            struct nft_regs *regs,
+                            const struct nft_pktinfo *pkt,
+                            void *info)
  {
-       void *info = nft_expr_priv(expr);
         struct xt_match *match = expr->ops->data;
         struct sk_buff *skb = pkt->skb;
         bool ret;
@@ -344,6 +359,22 @@ static void nft_match_eval(const struct nft_expr *expr,
         }
  }
  
+static void nft_match_large_eval(const struct nft_expr *expr,
+                                struct nft_regs *regs,
+                                const struct nft_pktinfo *pkt)
+{
+       struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+       __nft_match_eval(expr, regs, pkt, priv->info);
+}
+
+static void nft_match_eval(const struct nft_expr *expr,
+                          struct nft_regs *regs,
+                          const struct nft_pktinfo *pkt)
+{
+       __nft_match_eval(expr, regs, pkt, nft_expr_priv(expr));
+}
+
  static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
         [NFTA_MATCH_NAME]       = { .type = NLA_NUL_STRING },
         [NFTA_MATCH_REV]        = { .type = NLA_U32 },
@@ -404,13 +435,14 @@ static void match_compat_from_user(struct xt_match *m, void *in, void *out)
  }
  
  static int
-nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
-               const struct nlattr * const tb[])
+__nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                const struct nlattr * const tb[],
+                void *info)
  {
-       void *info = nft_expr_priv(expr);
         struct xt_match *match = expr->ops->data;
         struct xt_mtchk_param par;
         size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
+       struct nft_xt *nft_xt;
         u16 proto = 0;
         bool inv = false;
         union nft_entry e = {};
@@ -421,26 +453,50 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
         if (ctx->nla[NFTA_RULE_COMPAT]) {
                 ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
                 if (ret < 0)
-                       goto err;
+                       return ret;
         }
  
         nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
  
         ret = xt_check_match(&par, size, proto, inv);
         if (ret < 0)
-               goto err;
+               return ret;
  
+       nft_xt = container_of(expr->ops, struct nft_xt, ops);
+       nft_xt->refcnt++;
         return 0;
-err:
-       module_put(match->me);
+}
+
+static int
+nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+              const struct nlattr * const tb[])
+{
+       return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr));
+}
+
+static int
+nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                    const struct nlattr * const tb[])
+{
+       struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+       struct xt_match *m = expr->ops->data;
+       int ret;
+
+       priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL);
+       if (!priv->info)
+               return -ENOMEM;
+
+       ret = __nft_match_init(ctx, expr, tb, priv->info);
+       if (ret)
+               kfree(priv->info);
         return ret;
  }
  
  static void
-nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+__nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                   void *info)
  {
         struct xt_match *match = expr->ops->data;
-       void *info = nft_expr_priv(expr);
         struct xt_mtdtor_param par;
  
         par.net = ctx->net;
@@ -450,13 +506,28 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
         if (par.match->destroy != NULL)
                 par.match->destroy(&par);
  
-       nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
-       module_put(match->me);
+       if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
+               module_put(match->me);
  }
  
-static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void
+nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+       __nft_match_destroy(ctx, expr, nft_expr_priv(expr));
+}
+
+static void
+nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+       struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+       __nft_match_destroy(ctx, expr, priv->info);
+       kfree(priv->info);
+}
+
+static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr,
+                           void *info)
  {
-       void *info = nft_expr_priv(expr);
         struct xt_match *match = expr->ops->data;
  
         if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
@@ -470,6 +541,18 @@ nla_put_failure:
         return -1;
  }
  
+static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       return __nft_match_dump(skb, expr, nft_expr_priv(expr));
+}
+
+static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e)
+{
+       struct nft_xt_match_priv *priv = nft_expr_priv(e);
+
+       return __nft_match_dump(skb, e, priv->info);
+}
+
  static int nft_match_validate(const struct nft_ctx *ctx,
                               const struct nft_expr *expr,
                               const struct nft_data **data)
@@ -637,6 +720,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
  {
         struct nft_xt *nft_match;
         struct xt_match *match;
+       unsigned int matchsize;
         char *mt_name;
         u32 rev, family;
         int err;
@@ -654,13 +738,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
         list_for_each_entry(nft_match, &nft_match_list, head) {
                 struct xt_match *match = nft_match->ops.data;
  
-               if (nft_match_cmp(match, mt_name, rev, family)) {
-                       if (!try_module_get(match->me))
-                               return ERR_PTR(-ENOENT);
-
-                       nft_match->refcnt++;
+               if (nft_match_cmp(match, mt_name, rev, family))
                         return &nft_match->ops;
-               }
         }
  
         match = xt_request_find_match(family, mt_name, rev);
@@ -679,9 +758,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
                 goto err;
         }
  
-       nft_match->refcnt = 1;
+       nft_match->refcnt = 0;
         nft_match->ops.type = &nft_match_type;
-       nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
         nft_match->ops.eval = nft_match_eval;
         nft_match->ops.init = nft_match_init;
         nft_match->ops.destroy = nft_match_destroy;
@@ -689,6 +767,18 @@ nft_match_select_ops(const struct nft_ctx *ctx,
         nft_match->ops.validate = nft_match_validate;
         nft_match->ops.data = match;
  
+       matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
+       if (matchsize > NFT_MATCH_LARGE_THRESH) {
+               matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv));
+
+               nft_match->ops.eval = nft_match_large_eval;
+               nft_match->ops.init = nft_match_large_init;
+               nft_match->ops.destroy = nft_match_large_destroy;
+               nft_match->ops.dump = nft_match_large_dump;
+       }
+
+       nft_match->ops.size = matchsize;
+
         list_add(&nft_match->head, &nft_match_list);
  
         return &nft_match->ops;
@@ -739,13 +829,8 @@ nft_target_select_ops(const struct nft_ctx *ctx,
         list_for_each_entry(nft_target, &nft_target_list, head) {
                 struct xt_target *target = nft_target->ops.data;
  
-               if (nft_target_cmp(target, tg_name, rev, family)) {
-                       if (!try_module_get(target->me))
-                               return ERR_PTR(-ENOENT);
-
-                       nft_target->refcnt++;
+               if (nft_target_cmp(target, tg_name, rev, family))
                         return &nft_target->ops;
-               }
         }
  
         target = xt_request_find_target(family, tg_name, rev);
@@ -764,7 +849,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
                 goto err;
         }
  
-       nft_target->refcnt = 1;
+       nft_target->refcnt = 0;
         nft_target->ops.type = &nft_target_type;
         nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
         nft_target->ops.init = nft_target_init;
@@ -823,6 +908,32 @@ err_match:
  
  static void __exit nft_compat_module_exit(void)
  {
+       struct nft_xt *xt, *next;
+
+       /* list should be empty here, it can be non-empty only in case there
+        * was an error that caused nft_xt expr to not be initialized fully
+        * and noone else requested the same expression later.
+        *
+        * In this case, the lists contain 0-refcount entries that still
+        * hold module reference.
+        */
+       list_for_each_entry_safe(xt, next, &nft_target_list, head) {
+               struct xt_target *target = xt->ops.data;
+
+               if (WARN_ON_ONCE(xt->refcnt))
+                       continue;
+               module_put(target->me);
+               kfree(xt);
+       }
+
+       list_for_each_entry_safe(xt, next, &nft_match_list, head) {
+               struct xt_match *match = xt->ops.data;
+
+               if (WARN_ON_ONCE(xt->refcnt))
+                       continue;
+               module_put(match->me);
+               kfree(xt);
+       }
         nfnetlink_subsys_unregister(&nfnl_compat_subsys);
         nft_unregister_expr(&nft_target_type);
         nft_unregister_expr(&nft_match_type);
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c

index 4717d77969271c324087ed7677df636b414e54ad..aa87ff8beae82cf733303b1b32d8b50ba5af65b7 100644 (file)
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -69,8 +69,16 @@ err1:
         return err;
  }
  
-static void nft_immediate_destroy(const struct nft_ctx *ctx,
-                                 const struct nft_expr *expr)
+static void nft_immediate_activate(const struct nft_ctx *ctx,
+                                  const struct nft_expr *expr)
+{
+       const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+       return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static void nft_immediate_deactivate(const struct nft_ctx *ctx,
+                                    const struct nft_expr *expr)
  {
         const struct nft_immediate_expr *priv = nft_expr_priv(expr);
  
@@ -108,7 +116,8 @@ static const struct nft_expr_ops nft_imm_ops = {
         .size           = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
         .eval           = nft_immediate_eval,
         .init           = nft_immediate_init,
-       .destroy        = nft_immediate_destroy,
+       .activate       = nft_immediate_activate,
+       .deactivate     = nft_immediate_deactivate,
         .dump           = nft_immediate_dump,
         .validate       = nft_immediate_validate,
  };
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c

index 71325fef647da706f4dc23eaee28c3a6d1b6799d..cb7cb300c3bc9b85c0c3a6a84de304e03105bb22 100644 (file)
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -183,6 +183,9 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
         struct xt_match *m;
         int err = -ENOENT;
  
+       if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+               return ERR_PTR(-EINVAL);
+
         mutex_lock(&xt[af].mutex);
         list_for_each_entry(m, &xt[af].match, list) {
                 if (strcmp(m->name, name) == 0) {
@@ -229,6 +232,9 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
         struct xt_target *t;
         int err = -ENOENT;
  
+       if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+               return ERR_PTR(-EINVAL);
+
         mutex_lock(&xt[af].mutex);
         list_for_each_entry(t, &xt[af].target, list) {
                 if (strcmp(t->name, name) == 0) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c

index 01f3515cada0da4c2b2a7e314a5feb8406993f49..acb7b86574cd3d6f13790550c00f11616caff2e3 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2903,13 +2903,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
         if (skb == NULL)
                 goto out_unlock;
  
-       skb_set_network_header(skb, reserve);
+       skb_reset_network_header(skb);
  
         err = -EINVAL;
         if (sock->type == SOCK_DGRAM) {
                 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
                 if (unlikely(offset < 0))
                         goto out_free;
+       } else if (reserve) {
+               skb_reserve(skb, -reserve);
         }
  
         /* Returns -EFAULT on error */
diff --git a/net/rds/Kconfig b/net/rds/Kconfig

index bffde4b46c5d2058027da6158c6d780edde78950..1a31502ee7db307bb6d9ddb0ba52f521305ddb70 100644 (file)
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -8,7 +8,7 @@ config RDS
  
  config RDS_RDMA
         tristate "RDS over Infiniband"
-       depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on RDS && INFINIBAND_ADDR_TRANS
         ---help---
           Allow RDS to use Infiniband as a transport.
           This transport supports RDMA operations.
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c

index 853604685965128dcd4af54ad05f784b4237e205..1fb39e1f9d077beb4fdb440459f18116b561f334 100644 (file)
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -161,6 +161,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                         case htons(ETH_P_8021AD):
                                 break;
                         default:
+                               if (exists)
+                                       tcf_idr_release(*a, bind);
                                 return -EPROTONOSUPPORT;
                         }
                 } else {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c

index 963e4bf0aab8ae23d999305208c757fbc0f49ebe..a57e112d9b3ee517f1502f466a23cf8ea3099ac3 100644 (file)
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1588,7 +1588,7 @@ int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
                 return ret;
         ok_count = ret;
  
-       if (!exts)
+       if (!exts || ok_count)
                 return ok_count;
         ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
         if (ret < 0)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c

index 16644b3d236271533b6e96a62705bc1903ab1d8a..56c181c3feeb27a428a56fb76b26d0bc3622ea33 100644 (file)
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -222,10 +222,11 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
                                          extack);
                 if (IS_ERR(child))
                         return PTR_ERR(child);
-       }
  
-       if (child != &noop_qdisc)
+               /* child is fifo, no need to check for noop_qdisc */
                 qdisc_hash_add(child, true);
+       }
+
         sch_tree_lock(sch);
         q->flags = ctl->flags;
         q->limit = ctl->limit;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c

index 03225a8df9730cee7e020331b42a805d42b6f25c..6f74a426f159e440f33755e13656c849408bbc18 100644 (file)
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -383,6 +383,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
                         err = PTR_ERR(child);
                         goto done;
                 }
+
+               /* child is fifo, no need to check for noop_qdisc */
+               qdisc_hash_add(child, true);
         }
  
         sch_tree_lock(sch);
@@ -391,8 +394,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
                                           q->qdisc->qstats.backlog);
                 qdisc_destroy(q->qdisc);
                 q->qdisc = child;
-               if (child != &noop_qdisc)
-                       qdisc_hash_add(child, true);
         }
         q->limit = qopt->limit;
         if (tb[TCA_TBF_PBURST])
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c

index 42247110d842e75d61068ad20d577d8dc6dd4263..0cd2e764f47ff0874438301324de25e4bf33dd95 100644 (file)
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1006,7 +1006,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
         .owner             = THIS_MODULE,
         .release           = inet6_release,
         .bind              = inet6_bind,
-       .connect           = inet_dgram_connect,
+       .connect           = sctp_inet_connect,
         .socketpair        = sock_no_socketpair,
         .accept            = inet_accept,
         .getname           = sctp_getname,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c

index d685f84567624aedfb043c9ba56d3977849f16e5..6bf0a99718884a4d4952ab530c43b769bb5dafbb 100644 (file)
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1012,7 +1012,7 @@ static const struct proto_ops inet_seqpacket_ops = {
         .owner             = THIS_MODULE,
         .release           = inet_release,      /* Needs to be wrapped... */
         .bind              = inet_bind,
-       .connect           = inet_dgram_connect,
+       .connect           = sctp_inet_connect,
         .socketpair        = sock_no_socketpair,
         .accept            = inet_accept,
         .getname           = inet_getname,      /* Semantics are different.  */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c

index 80835ac26d2c3ce6559f75aaaa0b315fb77d9adf..ae7e7c606f72187aaaf3b4c25bf2f0981eec1fae 100644 (file)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1086,7 +1086,7 @@ out:
   */
  static int __sctp_connect(struct sock *sk,
                           struct sockaddr *kaddrs,
-                         int addrs_size,
+                         int addrs_size, int flags,
                           sctp_assoc_t *assoc_id)
  {
         struct net *net = sock_net(sk);
@@ -1104,7 +1104,6 @@ static int __sctp_connect(struct sock *sk,
         union sctp_addr *sa_addr = NULL;
         void *addr_buf;
         unsigned short port;
-       unsigned int f_flags = 0;
  
         sp = sctp_sk(sk);
         ep = sp->ep;
@@ -1254,13 +1253,7 @@ static int __sctp_connect(struct sock *sk,
         sp->pf->to_sk_daddr(sa_addr, sk);
         sk->sk_err = 0;
  
-       /* in-kernel sockets don't generally have a file allocated to them
-        * if all they do is call sock_create_kern().
-        */
-       if (sk->sk_socket->file)
-               f_flags = sk->sk_socket->file->f_flags;
-
-       timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
+       timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
  
         if (assoc_id)
                 *assoc_id = asoc->assoc_id;
@@ -1348,7 +1341,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
                                       sctp_assoc_t *assoc_id)
  {
         struct sockaddr *kaddrs;
-       int err = 0;
+       int err = 0, flags = 0;
  
         pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
                  __func__, sk, addrs, addrs_size);
@@ -1367,7 +1360,13 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
         if (err)
                 goto out_free;
  
-       err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
+       /* in-kernel sockets don't generally have a file allocated to them
+        * if all they do is call sock_create_kern().
+        */
+       if (sk->sk_socket->file)
+               flags = sk->sk_socket->file->f_flags;
+
+       err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id);
  
  out_free:
         kvfree(kaddrs);
@@ -4397,16 +4396,26 @@ out_nounlock:
   * len: the size of the address.
   */
  static int sctp_connect(struct sock *sk, struct sockaddr *addr,
-                       int addr_len)
+                       int addr_len, int flags)
  {
-       int err = 0;
+       struct inet_sock *inet = inet_sk(sk);
         struct sctp_af *af;
+       int err = 0;
  
         lock_sock(sk);
  
         pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk,
                  addr, addr_len);
  
+       /* We may need to bind the socket. */
+       if (!inet->inet_num) {
+               if (sk->sk_prot->get_port(sk, 0)) {
+                       release_sock(sk);
+                       return -EAGAIN;
+               }
+               inet->inet_sport = htons(inet->inet_num);
+       }
+
         /* Validate addr_len before calling common connect/connectx routine. */
         af = sctp_get_af_specific(addr->sa_family);
         if (!af || addr_len < af->sockaddr_len) {
@@ -4415,13 +4424,25 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr,
                 /* Pass correct addr len to common routine (so it knows there
                  * is only one address being passed.
                  */
-               err = __sctp_connect(sk, addr, af->sockaddr_len, NULL);
+               err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL);
         }
  
         release_sock(sk);
         return err;
  }
  
+int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr,
+                     int addr_len, int flags)
+{
+       if (addr_len < sizeof(uaddr->sa_family))
+               return -EINVAL;
+
+       if (uaddr->sa_family == AF_UNSPEC)
+               return -EOPNOTSUPP;
+
+       return sctp_connect(sock->sk, uaddr, addr_len, flags);
+}
+
  /* FIXME: Write comments. */
  static int sctp_disconnect(struct sock *sk, int flags)
  {
@@ -8724,7 +8745,6 @@ struct proto sctp_prot = {
         .name        =  "SCTP",
         .owner       =  THIS_MODULE,
         .close       =  sctp_close,
-       .connect     =  sctp_connect,
         .disconnect  =  sctp_disconnect,
         .accept      =  sctp_accept,
         .ioctl       =  sctp_ioctl,
@@ -8767,7 +8787,6 @@ struct proto sctpv6_prot = {
         .name           = "SCTPv6",
         .owner          = THIS_MODULE,
         .close          = sctp_close,
-       .connect        = sctp_connect,
         .disconnect     = sctp_disconnect,
         .accept         = sctp_accept,
         .ioctl          = sctp_ioctl,
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c

index 74568cdbca7087532c20b891496edeb467b13829..d7b88b2d1b224195b2d82523c047052c67f2e1eb 100644 (file)
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -245,40 +245,45 @@ out:
  static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
                                struct nlattr *tb[])
  {
-       char *string, *ibname = NULL;
-       int rc = 0;
+       char *string, *ibname;
+       int rc;
  
         memset(pnetelem, 0, sizeof(*pnetelem));
         INIT_LIST_HEAD(&pnetelem->list);
-       if (tb[SMC_PNETID_NAME]) {
-               string = (char *)nla_data(tb[SMC_PNETID_NAME]);
-               if (!smc_pnetid_valid(string, pnetelem->pnet_name)) {
-                       rc = -EINVAL;
-                       goto error;
-               }
-       }
-       if (tb[SMC_PNETID_ETHNAME]) {
-               string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
-               pnetelem->ndev = dev_get_by_name(net, string);
-               if (!pnetelem->ndev)
-                       return -ENOENT;
-       }
-       if (tb[SMC_PNETID_IBNAME]) {
-               ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
-               ibname = strim(ibname);
-               pnetelem->smcibdev = smc_pnet_find_ib(ibname);
-               if (!pnetelem->smcibdev) {
-                       rc = -ENOENT;
-                       goto error;
-               }
-       }
-       if (tb[SMC_PNETID_IBPORT]) {
-               pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
-               if (pnetelem->ib_port > SMC_MAX_PORTS) {
-                       rc = -EINVAL;
-                       goto error;
-               }
-       }
+
+       rc = -EINVAL;
+       if (!tb[SMC_PNETID_NAME])
+               goto error;
+       string = (char *)nla_data(tb[SMC_PNETID_NAME]);
+       if (!smc_pnetid_valid(string, pnetelem->pnet_name))
+               goto error;
+
+       rc = -EINVAL;
+       if (!tb[SMC_PNETID_ETHNAME])
+               goto error;
+       rc = -ENOENT;
+       string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
+       pnetelem->ndev = dev_get_by_name(net, string);
+       if (!pnetelem->ndev)
+               goto error;
+
+       rc = -EINVAL;
+       if (!tb[SMC_PNETID_IBNAME])
+               goto error;
+       rc = -ENOENT;
+       ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+       ibname = strim(ibname);
+       pnetelem->smcibdev = smc_pnet_find_ib(ibname);
+       if (!pnetelem->smcibdev)
+               goto error;
+
+       rc = -EINVAL;
+       if (!tb[SMC_PNETID_IBPORT])
+               goto error;
+       pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+       if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
+               goto error;
+
         return 0;
  
  error:
@@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
         void *hdr;
         int rc;
  
+       if (!info->attrs[SMC_PNETID_NAME])
+               return -EINVAL;
         pnetelem = smc_pnet_find_pnetid(
                                 (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
         if (!pnetelem)
@@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
  
  static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
  {
+       if (!info->attrs[SMC_PNETID_NAME])
+               return -EINVAL;
         return smc_pnet_remove_by_pnetid(
                                 (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
  }
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig

index ac09ca8032965bfd4280fb3f6d3410c08cbda243..6358e52710700d78a40910fa92d66600f8b2fc32 100644 (file)
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -50,7 +50,7 @@ config SUNRPC_DEBUG
  
  config SUNRPC_XPRT_RDMA
         tristate "RPC-over-RDMA transport"
-       depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on SUNRPC && INFINIBAND_ADDR_TRANS
         default SUNRPC && INFINIBAND
         select SG_POOL
         help
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c

index 71e79597f940a20b7eb49d35e5bcccf5d2c60963..e1c93ce74e0fe4dd7cc845d297226f229650fbd4 100644 (file)
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -680,7 +680,6 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
         struct scatterlist *sgin = &sgin_arr[0];
         struct strp_msg *rxm = strp_msg(skb);
         int ret, nsg = ARRAY_SIZE(sgin_arr);
-       char aad_recv[TLS_AAD_SPACE_SIZE];
         struct sk_buff *unused;
  
         ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
@@ -698,13 +697,13 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
         }
  
         sg_init_table(sgin, nsg);
-       sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv));
+       sg_set_buf(&sgin[0], ctx->rx_aad_ciphertext, TLS_AAD_SPACE_SIZE);
  
         nsg = skb_to_sgvec(skb, &sgin[1],
                            rxm->offset + tls_ctx->rx.prepend_size,
                            rxm->full_len - tls_ctx->rx.prepend_size);
  
-       tls_make_aad(aad_recv,
+       tls_make_aad(ctx->rx_aad_ciphertext,
                      rxm->full_len - tls_ctx->rx.overhead_size,
                      tls_ctx->rx.rec_seq,
                      tls_ctx->rx.rec_seq_size,
@@ -803,12 +802,12 @@ int tls_sw_recvmsg(struct sock *sk,
                         if (to_copy <= len && page_count < MAX_SKB_FRAGS &&
                             likely(!(flags & MSG_PEEK)))  {
                                 struct scatterlist sgin[MAX_SKB_FRAGS + 1];
-                               char unused[21];
                                 int pages = 0;
  
                                 zc = true;
                                 sg_init_table(sgin, MAX_SKB_FRAGS + 1);
-                               sg_set_buf(&sgin[0], unused, 13);
+                               sg_set_buf(&sgin[0], ctx->rx_aad_plaintext,
+                                          TLS_AAD_SPACE_SIZE);
  
                                 err = zerocopy_from_iter(sk, &msg->msg_iter,
                                                          to_copy, &pages,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c

index a052693c2e852b856f96d304cb97b6ddd0ff5319..7c5135a92d764448bac44bff8c4107cba7ea5c1c 100644 (file)
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -15555,7 +15555,8 @@ void cfg80211_ft_event(struct net_device *netdev,
         if (!ft_event->target_ap)
                 return;
  
-       msg = nlmsg_new(100 + ft_event->ric_ies_len, GFP_KERNEL);
+       msg = nlmsg_new(100 + ft_event->ies_len + ft_event->ric_ies_len,
+                       GFP_KERNEL);
         if (!msg)
                 return;
  
diff --git a/net/wireless/reg.c b/net/wireless/reg.c

index ac3e12c32aa30053a110d6a46f5bb813d660c8b6..5fcec5c94eb727cafd5a5168f7634eaec59b11d1 100644 (file)
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -916,6 +916,9 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
         const struct fwdb_header *hdr = regdb;
         const struct fwdb_country *country;
  
+       if (!regdb)
+               return -ENODATA;
+
         if (IS_ERR(regdb))
                 return PTR_ERR(regdb);
  
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile

index 4d6a6edd4bf6e9200f5be30c7c78ff0e26854731..092947676143c842ad19a18cf54ca7b7e7e7df24 100644 (file)
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -255,7 +255,7 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
  $(obj)/%.o: $(src)/%.c
         $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
                 -I$(srctree)/tools/testing/selftests/bpf/ \
-               -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+               -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
                 -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \
                 -Wno-gnu-variable-sized-type-not-at-end \
                 -Wno-address-of-packed-member -Wno-tautological-compare \
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl

index e16d6713f2368473d96eaec321fda1685cb3cac5..2d42eb9cd1a5639637df3f1fec35b4216d1c5fa5 100755 (executable)
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5041,7 +5041,7 @@ sub process {
                                 $tmp_stmt =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
                                 $tmp_stmt =~ s/\#+\s*$arg\b//g;
                                 $tmp_stmt =~ s/\b$arg\s*\#\#//g;
-                               my $use_cnt = $tmp_stmt =~ s/\b$arg\b//g;
+                               my $use_cnt = () = $tmp_stmt =~ /\b$arg\b/g;
                                 if ($use_cnt > 1) {
                                         CHK("MACRO_ARG_REUSE",
                                             "Macro argument reuse '$arg' - possible side-effects?\n" . "$herectx");
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c

index 4cafe6a19167613cb64b29ac59c895e91285b390..179dd20bec0a26669d4e00420144903644e07c82 100644 (file)
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1568,8 +1568,15 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
                         /* Called from d_instantiate or d_splice_alias. */
                         dentry = dget(opt_dentry);
                 } else {
-                       /* Called from selinux_complete_init, try to find a dentry. */
+                       /*
+                        * Called from selinux_complete_init, try to find a dentry.
+                        * Some filesystems really want a connected one, so try
+                        * that first.  We could split SECURITY_FS_USE_XATTR in
+                        * two, depending upon that...
+                        */
                         dentry = d_find_alias(inode);
+                       if (!dentry)
+                               dentry = d_find_any_alias(inode);
                 }
                 if (!dentry) {
                         /*
@@ -1674,14 +1681,19 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
                 if ((sbsec->flags & SE_SBGENFS) && !S_ISLNK(inode->i_mode)) {
                         /* We must have a dentry to determine the label on
                          * procfs inodes */
-                       if (opt_dentry)
+                       if (opt_dentry) {
                                 /* Called from d_instantiate or
                                  * d_splice_alias. */
                                 dentry = dget(opt_dentry);
-                       else
+                       } else {
                                 /* Called from selinux_complete_init, try to
-                                * find a dentry. */
+                                * find a dentry.  Some filesystems really want
+                                * a connected one, so try that first.
+                                */
                                 dentry = d_find_alias(inode);
+                               if (!dentry)
+                                       dentry = d_find_any_alias(inode);
+                       }
                         /*
                          * This can be hit on boot when a file is accessed
                          * before the policy is loaded.  When we load policy we
@@ -4576,6 +4588,7 @@ static int selinux_socket_post_create(struct socket *sock, int family,
  static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
  {
         struct sock *sk = sock->sk;
+       struct sk_security_struct *sksec = sk->sk_security;
         u16 family;
         int err;
  
@@ -4587,11 +4600,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
         family = sk->sk_family;
         if (family == PF_INET || family == PF_INET6) {
                 char *addrp;
-               struct sk_security_struct *sksec = sk->sk_security;
                 struct common_audit_data ad;
                 struct lsm_network_audit net = {0,};
                 struct sockaddr_in *addr4 = NULL;
                 struct sockaddr_in6 *addr6 = NULL;
+               u16 family_sa = address->sa_family;
                 unsigned short snum;
                 u32 sid, node_perm;
  
@@ -4601,11 +4614,20 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                  * need to check address->sa_family as it is possible to have
                  * sk->sk_family = PF_INET6 with addr->sa_family = AF_INET.
                  */
-               switch (address->sa_family) {
+               switch (family_sa) {
+               case AF_UNSPEC:
                 case AF_INET:
                         if (addrlen < sizeof(struct sockaddr_in))
                                 return -EINVAL;
                         addr4 = (struct sockaddr_in *)address;
+                       if (family_sa == AF_UNSPEC) {
+                               /* see __inet_bind(), we only want to allow
+                                * AF_UNSPEC if the address is INADDR_ANY
+                                */
+                               if (addr4->sin_addr.s_addr != htonl(INADDR_ANY))
+                                       goto err_af;
+                               family_sa = AF_INET;
+                       }
                         snum = ntohs(addr4->sin_port);
                         addrp = (char *)&addr4->sin_addr.s_addr;
                         break;
@@ -4617,15 +4639,14 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                         addrp = (char *)&addr6->sin6_addr.s6_addr;
                         break;
                 default:
-                       /* Note that SCTP services expect -EINVAL, whereas
-                        * others expect -EAFNOSUPPORT.
-                        */
-                       if (sksec->sclass == SECCLASS_SCTP_SOCKET)
-                               return -EINVAL;
-                       else
-                               return -EAFNOSUPPORT;
+                       goto err_af;
                 }
  
+               ad.type = LSM_AUDIT_DATA_NET;
+               ad.u.net = &net;
+               ad.u.net->sport = htons(snum);
+               ad.u.net->family = family_sa;
+
                 if (snum) {
                         int low, high;
  
@@ -4637,10 +4658,6 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                                                       snum, &sid);
                                 if (err)
                                         goto out;
-                               ad.type = LSM_AUDIT_DATA_NET;
-                               ad.u.net = &net;
-                               ad.u.net->sport = htons(snum);
-                               ad.u.net->family = family;
                                 err = avc_has_perm(&selinux_state,
                                                    sksec->sid, sid,
                                                    sksec->sclass,
@@ -4672,16 +4689,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                         break;
                 }
  
-               err = sel_netnode_sid(addrp, family, &sid);
+               err = sel_netnode_sid(addrp, family_sa, &sid);
                 if (err)
                         goto out;
  
-               ad.type = LSM_AUDIT_DATA_NET;
-               ad.u.net = &net;
-               ad.u.net->sport = htons(snum);
-               ad.u.net->family = family;
-
-               if (address->sa_family == AF_INET)
+               if (family_sa == AF_INET)
                         ad.u.net->v4info.saddr = addr4->sin_addr.s_addr;
                 else
                         ad.u.net->v6info.saddr = addr6->sin6_addr;
@@ -4694,6 +4706,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
         }
  out:
         return err;
+err_af:
+       /* Note that SCTP services expect -EINVAL, others -EAFNOSUPPORT. */
+       if (sksec->sclass == SECCLASS_SCTP_SOCKET)
+               return -EINVAL;
+       return -EAFNOSUPPORT;
  }
  
  /* This supports connect(2) and SCTP connect services such as sctp_connectx(3)
@@ -4771,7 +4788,7 @@ static int selinux_socket_connect_helper(struct socket *sock,
                 ad.type = LSM_AUDIT_DATA_NET;
                 ad.u.net = &net;
                 ad.u.net->dport = htons(snum);
-               ad.u.net->family = sk->sk_family;
+               ad.u.net->family = address->sa_family;
                 err = avc_has_perm(&selinux_state,
                                    sksec->sid, sid, sksec->sclass, perm, &ad);
                 if (err)
@@ -5272,6 +5289,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname,
         while (walk_size < addrlen) {
                 addr = addr_buf;
                 switch (addr->sa_family) {
+               case AF_UNSPEC:
                 case AF_INET:
                         len = sizeof(struct sockaddr_in);
                         break;
@@ -5279,7 +5297,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname,
                         len = sizeof(struct sockaddr_in6);
                         break;
                 default:
-                       return -EAFNOSUPPORT;
+                       return -EINVAL;
                 }
  
                 err = -EINVAL;
diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c

index a848836a5de0468534d5eecfd24adf4bc743f9f2..507fd5210c1cd54d764e718ab00c7c657857ad6c 100644 (file)
--- a/sound/core/control_compat.c
+++ b/sound/core/control_compat.c
@@ -396,8 +396,7 @@ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file,
         if (copy_from_user(&data->id, &data32->id, sizeof(data->id)) ||
             copy_from_user(&data->type, &data32->type, 3 * sizeof(u32)))
                 goto error;
-       if (get_user(data->owner, &data32->owner) ||
-           get_user(data->type, &data32->type))
+       if (get_user(data->owner, &data32->owner))
                 goto error;
         switch (data->type) {
         case SNDRV_CTL_ELEM_TYPE_BOOLEAN:
diff --git a/sound/core/timer.c b/sound/core/timer.c

index dc87728c5b745e5a8e36e7adfc853470ae91ab7f..0ddcae4958381758ab6189ca6ad7a97375ddf184 100644 (file)
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -592,7 +592,7 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop)
         else
                 timeri->flags |= SNDRV_TIMER_IFLG_PAUSED;
         snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
-                         SNDRV_TIMER_EVENT_CONTINUE);
+                         SNDRV_TIMER_EVENT_PAUSE);
   unlock:
         spin_unlock_irqrestore(&timer->lock, flags);
         return result;
@@ -614,7 +614,7 @@ static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop)
                 list_del_init(&timeri->ack_list);
                 list_del_init(&timeri->active_list);
                 snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
-                                 SNDRV_TIMER_EVENT_CONTINUE);
+                                 SNDRV_TIMER_EVENT_PAUSE);
                 spin_unlock(&timeri->timer->lock);
         }
         spin_unlock_irqrestore(&slave_active_lock, flags);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c

index b0c8c79848a94fd9aae962207ed1c673e0d815ed..a0c93b9c9a283684ce1894c6d963c7917e4651d0 100644 (file)
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2210,6 +2210,8 @@ static struct snd_pci_quirk power_save_blacklist[] = {
         SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
         /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
         SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
+       /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */
+       SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0),
         /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
         SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0),
         {}
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h

index 321e78baa63ca830332f7be3221cf4b999c2a4fe..9bd935216c1866677841c0aaa6fe943a7d7e313b 100644 (file)
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -622,8 +622,10 @@ snd_hda_check_power_state(struct hda_codec *codec, hda_nid_t nid,
  {
         return snd_hdac_check_power_state(&codec->core, nid, target_state);
  }
-static inline bool snd_hda_sync_power_state(struct hda_codec *codec,
-                          hda_nid_t nid, unsigned int target_state)
+
+static inline unsigned int snd_hda_sync_power_state(struct hda_codec *codec,
+                                                   hda_nid_t nid,
+                                                   unsigned int target_state)
  {
         return snd_hdac_sync_power_state(&codec->core, nid, target_state);
  }
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c

index 2dd34dd7744788d507bfc4b3f7c3b1168a7fa959..01a6643fc7d4727818f5335a04b91f2ab45e4f29 100644 (file)
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2363,6 +2363,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
         SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
         SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
         SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950),
         SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
         SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
         SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c

index 344d7b069d5994ff242a9f0a3f9bf19f4201ffaa..bb5ab7a7dfa58b0d21cb08acfeff2cca29ba7865 100644 (file)
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -967,6 +967,14 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval,
                 }
                 break;
  
+       case USB_ID(0x0d8c, 0x0103):
+               if (!strcmp(kctl->id.name, "PCM Playback Volume")) {
+                       usb_audio_info(chip,
+                                "set volume quirk for CM102-A+/102S+\n");
+                       cval->min = -256;
+               }
+               break;
+
         case USB_ID(0x0471, 0x0101):
         case USB_ID(0x0471, 0x0104):
         case USB_ID(0x0471, 0x0105):
diff --git a/sound/usb/stream.c b/sound/usb/stream.c

index 956be9f7c72a44e9326bd15c3df328540747124d..5ed334575fc73cd7f184a9be450f555f190a4d61 100644 (file)
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c
@@ -576,7 +576,7 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip,
  
         if (protocol == UAC_VERSION_1) {
                 attributes = csep->bmAttributes;
-       } else {
+       } else if (protocol == UAC_VERSION_2) {
                 struct uac2_iso_endpoint_descriptor *csep2 =
                         (struct uac2_iso_endpoint_descriptor *) csep;
  
@@ -585,6 +585,13 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip,
                 /* emulate the endpoint attributes of a v1 device */
                 if (csep2->bmControls & UAC2_CONTROL_PITCH)
                         attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL;
+       } else { /* UAC_VERSION_3 */
+               struct uac3_iso_endpoint_descriptor *csep3 =
+                       (struct uac3_iso_endpoint_descriptor *) csep;
+
+               /* emulate the endpoint attributes of a v1 device */
+               if (le32_to_cpu(csep3->bmControls) & UAC2_CONTROL_PITCH)
+                       attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL;
         }
  
         return attributes;
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h

index b21b586b985424a03338023f96a3e9e3d996b2af..1738c0391da4af73793716edb2d1109726c690f9 100644 (file)
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -6,8 +6,9 @@
  #include <stdbool.h>
  
  #define spinlock_t             pthread_mutex_t
-#define DEFINE_SPINLOCK(x)     pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+#define DEFINE_SPINLOCK(x)     pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
  #define __SPIN_LOCK_UNLOCKED(x)        (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
+#define spin_lock_init(x)      pthread_mutex_init(x, NULL)
  
  #define spin_lock_irqsave(x, f)                (void)f, pthread_mutex_lock(x)
  #define spin_unlock_irqrestore(x, f)   (void)f, pthread_mutex_unlock(x)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c

index 5922443063f09468c801447f61efc6f35cc41905..0f9f06df49bce5bf16c6138ce403b21b1e4b2579 100644 (file)
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2035,7 +2035,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                 return -EINVAL;
  
         obj = bpf_object__open(attr->file);
-       if (IS_ERR(obj))
+       if (IS_ERR_OR_NULL(obj))
                 return -ENOENT;
  
         bpf_object__for_each_program(prog, obj) {
diff --git a/tools/objtool/arch/x86/include/asm/insn.h b/tools/objtool/arch/x86/include/asm/insn.h

index b3e32b010ab194ed613034234c403c4067502776..c2c01f84df75f1f9b35a3c898686a82973026d88 100644 (file)
--- a/tools/objtool/arch/x86/include/asm/insn.h
+++ b/tools/objtool/arch/x86/include/asm/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
         return insn_offset_displacement(insn) + insn->displacement.nbytes;
  }
  
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+       return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+               (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+                X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
  #endif /* _ASM_X86_INSN_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c

index 5409f6f6c48d63128642454572f215717aac22f2..3a31b238f88564a94943ebd4622b887dc0815193 100644 (file)
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file,
         return next;
  }
  
+static struct instruction *next_insn_same_func(struct objtool_file *file,
+                                              struct instruction *insn)
+{
+       struct instruction *next = list_next_entry(insn, list);
+       struct symbol *func = insn->func;
+
+       if (!func)
+               return NULL;
+
+       if (&next->list != &file->insn_list && next->func == func)
+               return next;
+
+       /* Check if we're already in the subfunction: */
+       if (func == func->cfunc)
+               return NULL;
+
+       /* Move to the subfunction: */
+       return find_insn(file, func->cfunc->sec, func->cfunc->offset);
+}
+
+#define func_for_each_insn_all(file, func, insn)                       \
+       for (insn = find_insn(file, func->sec, func->offset);           \
+            insn;                                                      \
+            insn = next_insn_same_func(file, insn))
+
  #define func_for_each_insn(file, func, insn)                           \
         for (insn = find_insn(file, func->sec, func->offset);           \
              insn && &insn->list != &file->insn_list &&                 \
@@ -149,10 +174,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
                         if (!strcmp(func->name, global_noreturns[i]))
                                 return 1;
  
-       if (!func->sec)
+       if (!func->len)
                 return 0;
  
-       func_for_each_insn(file, func, insn) {
+       insn = find_insn(file, func->sec, func->offset);
+       if (!insn->func)
+               return 0;
+
+       func_for_each_insn_all(file, func, insn) {
                 empty = false;
  
                 if (insn->type == INSN_RETURN)
@@ -167,35 +196,28 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
          * case, the function's dead-end status depends on whether the target
          * of the sibling call returns.
          */
-       func_for_each_insn(file, func, insn) {
-               if (insn->sec != func->sec ||
-                   insn->offset >= func->offset + func->len)
-                       break;
-
+       func_for_each_insn_all(file, func, insn) {
                 if (insn->type == INSN_JUMP_UNCONDITIONAL) {
                         struct instruction *dest = insn->jump_dest;
-                       struct symbol *dest_func;
  
                         if (!dest)
                                 /* sibling call to another file */
                                 return 0;
  
-                       if (dest->sec != func->sec ||
-                           dest->offset < func->offset ||
-                           dest->offset >= func->offset + func->len) {
-                               /* local sibling call */
-                               dest_func = find_symbol_by_offset(dest->sec,
-                                                                 dest->offset);
-                               if (!dest_func)
-                                       continue;
+                       if (dest->func && dest->func->pfunc != insn->func->pfunc) {
  
+                               /* local sibling call */
                                 if (recursion == 5) {
-                                       WARN_FUNC("infinite recursion (objtool bug!)",
-                                                 dest->sec, dest->offset);
-                                       return -1;
+                                       /*
+                                        * Infinite recursion: two functions
+                                        * have sibling calls to each other.
+                                        * This is a very rare case.  It means
+                                        * they aren't dead ends.
+                                        */
+                                       return 0;
                                 }
  
-                               return __dead_end_function(file, dest_func,
+                               return __dead_end_function(file, dest->func,
                                                            recursion + 1);
                         }
                 }
@@ -422,7 +444,7 @@ static void add_ignores(struct objtool_file *file)
                         if (!ignore_func(file, func))
                                 continue;
  
-                       func_for_each_insn(file, func, insn)
+                       func_for_each_insn_all(file, func, insn)
                                 insn->ignore = true;
                 }
         }
@@ -782,30 +804,35 @@ out:
         return ret;
  }
  
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
-                           struct instruction *insn, struct rela *table,
-                           struct rela *next_table)
+static int add_switch_table(struct objtool_file *file, struct instruction *insn,
+                           struct rela *table, struct rela *next_table)
  {
         struct rela *rela = table;
         struct instruction *alt_insn;
         struct alternative *alt;
+       struct symbol *pfunc = insn->func->pfunc;
+       unsigned int prev_offset = 0;
  
         list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
                 if (rela == next_table)
                         break;
  
-               if (rela->sym->sec != insn->sec ||
-                   rela->addend <= func->offset ||
-                   rela->addend >= func->offset + func->len)
+               /* Make sure the switch table entries are consecutive: */
+               if (prev_offset && rela->offset != prev_offset + 8)
                         break;
  
-               alt_insn = find_insn(file, insn->sec, rela->addend);
-               if (!alt_insn) {
-                       WARN("%s: can't find instruction at %s+0x%x",
-                            file->rodata->rela->name, insn->sec->name,
-                            rela->addend);
-                       return -1;
-               }
+               /* Detect function pointers from contiguous objects: */
+               if (rela->sym->sec == pfunc->sec &&
+                   rela->addend == pfunc->offset)
+                       break;
+
+               alt_insn = find_insn(file, rela->sym->sec, rela->addend);
+               if (!alt_insn)
+                       break;
+
+               /* Make sure the jmp dest is in the function or subfunction: */
+               if (alt_insn->func->pfunc != pfunc)
+                       break;
  
                 alt = malloc(sizeof(*alt));
                 if (!alt) {
@@ -815,6 +842,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
  
                 alt->insn = alt_insn;
                 list_add_tail(&alt->list, &insn->alts);
+               prev_offset = rela->offset;
+       }
+
+       if (!prev_offset) {
+               WARN_FUNC("can't find switch jump table",
+                         insn->sec, insn->offset);
+               return -1;
         }
  
         return 0;
@@ -869,40 +903,21 @@ static struct rela *find_switch_table(struct objtool_file *file,
  {
         struct rela *text_rela, *rodata_rela;
         struct instruction *orig_insn = insn;
+       unsigned long table_offset;
  
-       text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
-       if (text_rela && text_rela->sym == file->rodata->sym) {
-               /* case 1 */
-               rodata_rela = find_rela_by_dest(file->rodata,
-                                               text_rela->addend);
-               if (rodata_rela)
-                       return rodata_rela;
-
-               /* case 2 */
-               rodata_rela = find_rela_by_dest(file->rodata,
-                                               text_rela->addend + 4);
-               if (!rodata_rela)
-                       return NULL;
-
-               file->ignore_unreachables = true;
-               return rodata_rela;
-       }
-
-       /* case 3 */
         /*
          * Backward search using the @first_jump_src links, these help avoid
          * much of the 'in between' code. Which avoids us getting confused by
          * it.
          */
-       for (insn = list_prev_entry(insn, list);
-
+       for (;
              &insn->list != &file->insn_list &&
              insn->sec == func->sec &&
              insn->offset >= func->offset;
  
              insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
  
-               if (insn->type == INSN_JUMP_DYNAMIC)
+               if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
                         break;
  
                 /* allow small jumps within the range */
@@ -918,18 +933,29 @@ static struct rela *find_switch_table(struct objtool_file *file,
                 if (!text_rela || text_rela->sym != file->rodata->sym)
                         continue;
  
+               table_offset = text_rela->addend;
+               if (text_rela->type == R_X86_64_PC32)
+                       table_offset += 4;
+
                 /*
                  * Make sure the .rodata address isn't associated with a
                  * symbol.  gcc jump tables are anonymous data.
                  */
-               if (find_symbol_containing(file->rodata, text_rela->addend))
+               if (find_symbol_containing(file->rodata, table_offset))
                         continue;
  
-               rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
-               if (!rodata_rela)
-                       continue;
+               rodata_rela = find_rela_by_dest(file->rodata, table_offset);
+               if (rodata_rela) {
+                       /*
+                        * Use of RIP-relative switch jumps is quite rare, and
+                        * indicates a rare GCC quirk/bug which can leave dead
+                        * code behind.
+                        */
+                       if (text_rela->type == R_X86_64_PC32)
+                               file->ignore_unreachables = true;
  
-               return rodata_rela;
+                       return rodata_rela;
+               }
         }
  
         return NULL;
@@ -943,7 +969,7 @@ static int add_func_switch_tables(struct objtool_file *file,
         struct rela *rela, *prev_rela = NULL;
         int ret;
  
-       func_for_each_insn(file, func, insn) {
+       func_for_each_insn_all(file, func, insn) {
                 if (!last)
                         last = insn;
  
@@ -974,8 +1000,7 @@ static int add_func_switch_tables(struct objtool_file *file,
                  * the beginning of another switch table in the same function.
                  */
                 if (prev_jump) {
-                       ret = add_switch_table(file, func, prev_jump, prev_rela,
-                                              rela);
+                       ret = add_switch_table(file, prev_jump, prev_rela, rela);
                         if (ret)
                                 return ret;
                 }
@@ -985,7 +1010,7 @@ static int add_func_switch_tables(struct objtool_file *file,
         }
  
         if (prev_jump) {
-               ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+               ret = add_switch_table(file, prev_jump, prev_rela, NULL);
                 if (ret)
                         return ret;
         }
@@ -1749,15 +1774,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
         while (1) {
                 next_insn = next_insn_same_sec(file, insn);
  
-
-               if (file->c_file && func && insn->func && func != insn->func) {
+               if (file->c_file && func && insn->func && func != insn->func->pfunc) {
                         WARN("%s() falls through to next function %s()",
                              func->name, insn->func->name);
                         return 1;
                 }
  
-               if (insn->func)
-                       func = insn->func;
+               func = insn->func ? insn->func->pfunc : NULL;
  
                 if (func && insn->ignore) {
                         WARN_FUNC("BUG: why am I validating an ignored function?",
@@ -1778,7 +1801,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
  
                                 i = insn;
                                 save_insn = NULL;
-                               func_for_each_insn_continue_reverse(file, func, i) {
+                               func_for_each_insn_continue_reverse(file, insn->func, i) {
                                         if (i->save) {
                                                 save_insn = i;
                                                 break;
@@ -1865,7 +1888,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
                 case INSN_JUMP_UNCONDITIONAL:
                         if (insn->jump_dest &&
                             (!func || !insn->jump_dest->func ||
-                            func == insn->jump_dest->func)) {
+                            insn->jump_dest->func->pfunc == func)) {
                                 ret = validate_branch(file, insn->jump_dest,
                                                       state);
                                 if (ret)
@@ -2060,7 +2083,7 @@ static int validate_functions(struct objtool_file *file)
  
         for_each_sec(file, sec) {
                 list_for_each_entry(func, &sec->symbol_list, list) {
-                       if (func->type != STT_FUNC)
+                       if (func->type != STT_FUNC || func->pfunc != func)
                                 continue;
  
                         insn = find_insn(file, sec, func->offset);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c

index c1c338661699788c8189becaab8465ed1bdcd775..4e60e105583ee803916589ca56df0e81e12b8fb3 100644 (file)
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
         return NULL;
  }
  
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+{
+       struct section *sec;
+       struct symbol *sym;
+
+       list_for_each_entry(sec, &elf->sections, list)
+               list_for_each_entry(sym, &sec->symbol_list, list)
+                       if (!strcmp(sym->name, name))
+                               return sym;
+
+       return NULL;
+}
+
  struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
  {
         struct symbol *sym;
@@ -203,10 +216,11 @@ static int read_sections(struct elf *elf)
  
  static int read_symbols(struct elf *elf)
  {
-       struct section *symtab;
-       struct symbol *sym;
+       struct section *symtab, *sec;
+       struct symbol *sym, *pfunc;
         struct list_head *entry, *tmp;
         int symbols_nr, i;
+       char *coldstr;
  
         symtab = find_section_by_name(elf, ".symtab");
         if (!symtab) {
@@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf)
                 hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
         }
  
+       /* Create parent/child links for any cold subfunctions */
+       list_for_each_entry(sec, &elf->sections, list) {
+               list_for_each_entry(sym, &sec->symbol_list, list) {
+                       if (sym->type != STT_FUNC)
+                               continue;
+                       sym->pfunc = sym->cfunc = sym;
+                       coldstr = strstr(sym->name, ".cold.");
+                       if (coldstr) {
+                               coldstr[0] = '\0';
+                               pfunc = find_symbol_by_name(elf, sym->name);
+                               coldstr[0] = '.';
+
+                               if (!pfunc) {
+                                       WARN("%s(): can't find parent function",
+                                            sym->name);
+                                       goto err;
+                               }
+
+                               sym->pfunc = pfunc;
+                               pfunc->cfunc = sym;
+                       }
+               }
+       }
+
         return 0;
  
  err:
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h

index d86e2ff14466148d3b8ae46065274956a5b3c4f8..de5cd2ddded987bf524be46e446bd1e814422761 100644 (file)
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -61,6 +61,7 @@ struct symbol {
         unsigned char bind, type;
         unsigned long offset;
         unsigned int len;
+       struct symbol *pfunc, *cfunc;
  };
  
  struct rela {
@@ -86,6 +87,7 @@ struct elf {
  struct elf *elf_open(const char *name, int flags);
  struct section *find_section_by_name(struct elf *elf, const char *name);
  struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
  struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
  struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
  struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh

index 016882dbbc16bddda77e180f421d0bcff2ad1760..ee86473643be59d117184e645f6233df858a5cea 100755 (executable)
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -16,7 +16,7 @@ nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
  trace_libc_inet_pton_backtrace() {
         idx=0
         expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
-       expected[1]=".*inet_pton[[:space:]]\($libc\)$"
+       expected[1]=".*inet_pton[[:space:]]\($libc|inlined\)$"
         case "$(uname -m)" in
         s390x)
                 eventattr='call-graph=dwarf,max-stack=4'
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c

index 536ee148bff82576d7ab21949b1c20d749929539..5d74a30fe00f1f96873c5123e65631c1f8c51d5a 100644 (file)
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1263,6 +1263,9 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
                                 max_percent = sample->percent;
                 }
  
+               if (al->samples_nr > nr_percent)
+                       nr_percent = al->samples_nr;
+
                 if (max_percent < min_pcnt)
                         return -1;
  
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c

index 40020b1ca54f4ecbd05bc3524538634f435dc6be..bf16dc9ee507d98bbb8a762f4f4221bbe7733548 100644 (file)
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -239,6 +239,7 @@ static void cs_etm__free(struct perf_session *session)
         for (i = 0; i < aux->num_cpu; i++)
                 zfree(&aux->metadata[i]);
  
+       thread__zput(aux->unknown_thread);
         zfree(&aux->metadata);
         zfree(&aux);
  }
@@ -612,8 +613,8 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
         return buff->len;
  }
  
-static void  cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
-                                    struct auxtrace_queue *queue)
+static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
+                                   struct auxtrace_queue *queue)
  {
         struct cs_etm_queue *etmq = queue->priv;
  
@@ -1357,6 +1358,23 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
         etm->auxtrace.free = cs_etm__free;
         session->auxtrace = &etm->auxtrace;
  
+       etm->unknown_thread = thread__new(999999999, 999999999);
+       if (!etm->unknown_thread)
+               goto err_free_queues;
+
+       /*
+        * Initialize list node so that at thread__zput() we can avoid
+        * segmentation fault at list_del_init().
+        */
+       INIT_LIST_HEAD(&etm->unknown_thread->node);
+
+       err = thread__set_comm(etm->unknown_thread, "unknown", 0);
+       if (err)
+               goto err_delete_thread;
+
+       if (thread__init_map_groups(etm->unknown_thread, etm->machine))
+               goto err_delete_thread;
+
         if (dump_trace) {
                 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
                 return 0;
@@ -1371,16 +1389,18 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
  
         err = cs_etm__synth_events(etm, session);
         if (err)
-               goto err_free_queues;
+               goto err_delete_thread;
  
         err = auxtrace_queues__process_index(&etm->queues, session);
         if (err)
-               goto err_free_queues;
+               goto err_delete_thread;
  
         etm->data_queued = etm->queues.populated;
  
         return 0;
  
+err_delete_thread:
+       thread__zput(etm->unknown_thread);
  err_free_queues:
         auxtrace_queues__free(&etm->queues);
         session->auxtrace = NULL;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c

index 2fb0272146d8db3c4d2397f0497d85620df28310..b8b8a9558d325c1b904d312e2c96f82699fd82a8 100644 (file)
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1715,7 +1715,7 @@ int parse_events(struct perf_evlist *evlist, const char *str,
                 struct perf_evsel *last;
  
                 if (list_empty(&parse_state.list)) {
-                       WARN_ONCE(true, "WARNING: event parser found nothing");
+                       WARN_ONCE(true, "WARNING: event parser found nothing\n");
                         return -1;
                 }
  
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile

index fa7ee369b3c919c534e610a621c1bdd54bf1e986..db66f8a0d4bed71aa581cdbf82c9acf422daef05 100644 (file)
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -17,7 +17,7 @@ ifeq ($(BUILD), 32)
         LDFLAGS += -m32
  endif
  
-targets: mapshift $(TARGETS)
+targets: generated/map-shift.h $(TARGETS)
  
  main:  $(OFILES)
  
@@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c
  idr.c: ../../../lib/idr.c
         sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
  
-.PHONY: mapshift
-
-mapshift:
+generated/map-shift.h:
         @if ! grep -qws $(SHIFT) generated/map-shift.h; then            \
                 echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >          \
                                 generated/map-shift.h;                  \
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c

index 6c645eb77d4218fd102968fb2a24b19350e3edd9..ee820fcc29b0d3c201cbd63e56daa2392e89bcbf 100644 (file)
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -252,6 +252,13 @@ void idr_checks(void)
         idr_remove(&idr, 3);
         idr_remove(&idr, 0);
  
+       assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0);
+       idr_remove(&idr, 1);
+       for (i = 1; i < RADIX_TREE_MAP_SIZE; i++)
+               assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i);
+       idr_remove(&idr, 1 << 30);
+       idr_destroy(&idr);
+
         for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
                 struct item *item = item_create(i, 0);
                 assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c

index 59245b3d587c35c039c77e673bdd706319368167..7bf405638b0beef5d7ca578645e4918c02c06560 100644 (file)
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -16,6 +16,7 @@
  #include <linux/radix-tree.h>
  #include <linux/slab.h>
  #include <linux/errno.h>
+#include <pthread.h>
  
  #include "test.h"
  
@@ -624,6 +625,67 @@ static void multiorder_account(void)
         item_kill_tree(&tree);
  }
  
+bool stop_iteration = false;
+
+static void *creator_func(void *ptr)
+{
+       /* 'order' is set up to ensure we have sibling entries */
+       unsigned int order = RADIX_TREE_MAP_SHIFT - 1;
+       struct radix_tree_root *tree = ptr;
+       int i;
+
+       for (i = 0; i < 10000; i++) {
+               item_insert_order(tree, 0, order);
+               item_delete_rcu(tree, 0);
+       }
+
+       stop_iteration = true;
+       return NULL;
+}
+
+static void *iterator_func(void *ptr)
+{
+       struct radix_tree_root *tree = ptr;
+       struct radix_tree_iter iter;
+       struct item *item;
+       void **slot;
+
+       while (!stop_iteration) {
+               rcu_read_lock();
+               radix_tree_for_each_slot(slot, tree, &iter, 0) {
+                       item = radix_tree_deref_slot(slot);
+
+                       if (!item)
+                               continue;
+                       if (radix_tree_deref_retry(item)) {
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
+                       }
+
+                       item_sanity(item, iter.index);
+               }
+               rcu_read_unlock();
+       }
+       return NULL;
+}
+
+static void multiorder_iteration_race(void)
+{
+       const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+       pthread_t worker_thread[num_threads];
+       RADIX_TREE(tree, GFP_KERNEL);
+       int i;
+
+       pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
+       for (i = 1; i < num_threads; i++)
+               pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
+
+       for (i = 0; i < num_threads; i++)
+               pthread_join(worker_thread[i], NULL);
+
+       item_kill_tree(&tree);
+}
+
  void multiorder_checks(void)
  {
         int i;
@@ -644,6 +706,7 @@ void multiorder_checks(void)
         multiorder_join();
         multiorder_split();
         multiorder_account();
+       multiorder_iteration_race();
  
         radix_tree_cpu_dead(0);
  }
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c

index 5978ab1f403d97f135ae7f026a07caab71113212..def6015570b22a46274cd51b6f2135c6b6157090 100644 (file)
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index)
         return 0;
  }
  
+static void item_free_rcu(struct rcu_head *head)
+{
+       struct item *item = container_of(head, struct item, rcu_head);
+
+       free(item);
+}
+
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index)
+{
+       struct item *item = radix_tree_delete(root, index);
+
+       if (item) {
+               item_sanity(item, index);
+               call_rcu(&item->rcu_head, item_free_rcu);
+               return 1;
+       }
+       return 0;
+}
+
  void item_check_present(struct radix_tree_root *root, unsigned long index)
  {
         struct item *item;
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h

index d9c031dbeb1a9634451608a29592a74002958dc3..31f1d9b6f506ef884c978b568aa9dc5d89fd8312 100644 (file)
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -5,6 +5,7 @@
  #include <linux/rcupdate.h>
  
  struct item {
+       struct rcu_head rcu_head;
         unsigned long index;
         unsigned int order;
  };
@@ -12,9 +13,11 @@ struct item {
  struct item *item_create(unsigned long index, unsigned int order);
  int __item_insert(struct radix_tree_root *root, struct item *item);
  int item_insert(struct radix_tree_root *root, unsigned long index);
+void item_sanity(struct item *item, unsigned long index);
  int item_insert_order(struct radix_tree_root *root, unsigned long index,
                         unsigned order);
  int item_delete(struct radix_tree_root *root, unsigned long index);
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index);
  struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
  
  void item_check_present(struct radix_tree_root *root, unsigned long index);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config

index 983dd25d49f452d91191191188634beb8b47dc26..1eefe211a4a88a3dfbac5be585932384061b9edd 100644 (file)
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y
  CONFIG_TEST_BPF=m
  CONFIG_CGROUP_BPF=y
  CONFIG_NETDEVSIM=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_SCH_INGRESS=y
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c

index 3e7718b1a9ae49c176a407d9f14ad31704255701..fd7de7eb329eacc82af76c652d5e3731dc325fe4 100644 (file)
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -11713,6 +11713,11 @@ static void get_unpriv_disabled()
         FILE *fd;
  
         fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+       if (!fd) {
+               perror("fopen /proc/sys/"UNPRIV_SYSCTL);
+               unpriv_disabled = true;
+               return;
+       }
         if (fgets(buf, 2, fd) == buf && atoi(buf))
                 unpriv_disabled = true;
         fclose(fd);
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile

index 2ddcc96ae4561acd3d773db814dffbb4c0acdb51..d9d00319b07cd691848a2f468c48e3bf10127284 100644 (file)
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -15,7 +15,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
  
  INSTALL_HDR_PATH = $(top_srcdir)/usr
  LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D)
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
  
  # After inclusion, $(OUTPUT) is defined and
  # $(TEST_GEN_PROGS) starts with $(OUTPUT)/
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h

index 7ab98e41324ffd012720338e3a59074795f23ef7..ac53730b30aa48c9f9ecf54e3e6362f8cdf0e1c4 100644 (file)
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -19,6 +19,7 @@
  #include <errno.h>
  #include <unistd.h>
  #include <fcntl.h>
+#include "kselftest.h"
  
  ssize_t test_write(int fd, const void *buf, size_t count);
  ssize_t test_read(int fd, void *buf, size_t count);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c

index 2cedfda181d4560dbc02ca0d2c5e44ab54326c61..37e2a787d2fcc6cc6cedcb30b9620456e99e43c0 100644 (file)
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -50,8 +50,8 @@ int kvm_check_cap(long cap)
         int kvm_fd;
  
         kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-       TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
-               KVM_DEV_PATH, kvm_fd, errno);
+       if (kvm_fd < 0)
+               exit(KSFT_SKIP);
  
         ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
         TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
@@ -91,8 +91,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
  
         vm->mode = mode;
         kvm_fd = open(KVM_DEV_PATH, perm);
-       TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
-               KVM_DEV_PATH, kvm_fd, errno);
+       if (kvm_fd < 0)
+               exit(KSFT_SKIP);
  
         /* Create VM. */
         vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
@@ -418,8 +418,8 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
  
         cpuid = allocate_kvm_cpuid2();
         kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-       TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
-               KVM_DEV_PATH, kvm_fd, errno);
+       if (kvm_fd < 0)
+               exit(KSFT_SKIP);
  
         ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
         TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
@@ -675,8 +675,8 @@ static int vcpu_mmap_sz(void)
         int dev_fd, ret;
  
         dev_fd = open(KVM_DEV_PATH, O_RDONLY);
-       TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i",
-               __func__, KVM_DEV_PATH, dev_fd, errno);
+       if (dev_fd < 0)
+               exit(KSFT_SKIP);
  
         ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
         TEST_ASSERT(ret >= sizeof(struct kvm_run),
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c

index 428e9473f5e20cec7a3fe2df19d615c259491072..eae1ece3c31b8505e99877bf11d8fa8fb6907cdc 100644 (file)
--- a/tools/testing/selftests/kvm/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/sync_regs_test.c
@@ -85,6 +85,9 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
  {
  }
  
+#define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
  int main(int argc, char *argv[])
  {
         struct kvm_vm *vm;
@@ -98,9 +101,14 @@ int main(int argc, char *argv[])
         setbuf(stdout, NULL);
  
         cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
-       TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS,
-                   "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n",
-                   cap, KVM_SYNC_X86_VALID_FIELDS);
+       if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
+               fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
+               exit(KSFT_SKIP);
+       }
+       if ((cap & INVALID_SYNC_FIELD) != 0) {
+               fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
+               exit(KSFT_SKIP);
+       }
  
         /* Create VM */
         vm = vm_create_default(VCPU_ID, guest_code);
@@ -108,7 +116,14 @@ int main(int argc, char *argv[])
         run = vcpu_state(vm, VCPU_ID);
  
         /* Request reading invalid register set from VCPU. */
-       run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+       run->kvm_valid_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vm, VCPU_ID);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+                   rv);
+       vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
         rv = _vcpu_run(vm, VCPU_ID);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
                     "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
@@ -116,7 +131,14 @@ int main(int argc, char *argv[])
         vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
  
         /* Request setting invalid register set into VCPU. */
-       run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vm, VCPU_ID);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+                   rv);
+       vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
         rv = _vcpu_run(vm, VCPU_ID);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
                     "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
@@ -125,7 +147,7 @@ int main(int argc, char *argv[])
  
         /* Request and verify all valid register sets. */
         /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
-       run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
         rv = _vcpu_run(vm, VCPU_ID);
         TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
                     "Unexpected exit reason: %u (%s),\n",
@@ -146,7 +168,7 @@ int main(int argc, char *argv[])
         run->s.regs.sregs.apic_base = 1 << 11;
         /* TODO run->s.regs.events.XYZ = ABC; */
  
-       run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
         run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
         rv = _vcpu_run(vm, VCPU_ID);
         TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -172,7 +194,7 @@ int main(int argc, char *argv[])
         /* Clear kvm_dirty_regs bits, verify new s.regs values are
          * overwritten with existing guest values.
          */
-       run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
         run->kvm_dirty_regs = 0;
         run->s.regs.regs.r11 = 0xDEADBEEF;
         rv = _vcpu_run(vm, VCPU_ID);
@@ -211,7 +233,7 @@ int main(int argc, char *argv[])
          * with kvm_sync_regs values.
          */
         run->kvm_valid_regs = 0;
-       run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS;
+       run->kvm_dirty_regs = TEST_SYNC_FIELDS;
         run->s.regs.regs.r11 = 0xBBBB;
         rv = _vcpu_run(vm, VCPU_ID);
         TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c

index 8f7f62093add0fca51d5923196189cc3110e9597..aaa633263b2c42b38445e3b73815a9f2db977670 100644 (file)
--- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -189,8 +189,8 @@ int main(int argc, char *argv[])
         struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
  
         if (!(entry->ecx & CPUID_VMX)) {
-               printf("nested VMX not enabled, skipping test");
-               return 0;
+               fprintf(stderr, "nested VMX not enabled, skipping test\n");
+               exit(KSFT_SKIP);
         }
  
         vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config

index 6a75a3ea44adb5bde3cc1c00000a96307f88471e..7ba089b33e8b8248ec08d8421a582be66c9f7e87 100644 (file)
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_IPV6=y
  CONFIG_IPV6_MULTIPLE_TABLES=y
  CONFIG_VETH=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_NET_IPVTI=y
+CONFIG_INET6_XFRM_MODE_TUNNEL=y
+CONFIG_IPV6_VTI=y
+CONFIG_DUMMY=y
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c

index 365c32e841897e0c43e8372a21086d1ccabe35c5..c9f478b40996d3bc0037c8e3b08dc477725a85d0 100644 (file)
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -23,6 +23,8 @@
  #include <unistd.h>
  #include <numa.h>
  
+#include "../kselftest.h"
+
  static const int PORT = 8888;
  
  static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
@@ -229,7 +231,7 @@ int main(void)
         int *rcv_fd, nodes;
  
         if (numa_available() < 0)
-               error(1, errno, "no numa api support");
+               ksft_exit_skip("no numa api support\n");
  
         nodes = numa_max_node() + 1;
  
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c

index 168c66d74fc5ca67f300cd7235182e4fd3d572aa..e1473234968de7d0166d7f4446b5f6c796d5204e 100644 (file)
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -134,11 +134,15 @@ struct seccomp_data {
  #endif
  
  #ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
  #endif
  
  #ifndef SECCOMP_FILTER_FLAG_LOG
-#define SECCOMP_FILTER_FLAG_LOG 2
+#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
  #endif
  
  #ifndef PTRACE_SECCOMP_GET_METADATA
@@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock)
  TEST(detect_seccomp_filter_flags)
  {
         unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
-                                SECCOMP_FILTER_FLAG_LOG };
+                                SECCOMP_FILTER_FLAG_LOG,
+                                SECCOMP_FILTER_FLAG_SPEC_ALLOW };
         unsigned int flag, all_flags;
         int i;
         long ret;
  
         /* Test detection of known-good filter flags */
         for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+               int bits = 0;
+
                 flag = flags[i];
+               /* Make sure the flag is a single bit! */
+               while (flag) {
+                       if (flag & 0x1)
+                               bits ++;
+                       flag >>= 1;
+               }
+               ASSERT_EQ(1, bits);
+               flag = flags[i];
+
                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
                 ASSERT_NE(ENOSYS, errno) {
                         TH_LOG("Kernel does not support seccomp syscall!");
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile

index d744991c0f4f44d56bda208ad3039ad81500f303..39f66bc29b8249f059886fb57f2a3e75dd81975a 100644 (file)
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
  
  TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
                         check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
-                       protection_keys test_vdso test_vsyscall
+                       protection_keys test_vdso test_vsyscall mov_ss_trap
  TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
                         test_FCMOV test_FCOMI test_FISTTP \
                         vdso_restorer
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c

new file mode 100644 (file)

index 0000000..3c3a022
--- /dev/null
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
+ *
+ * This does MOV SS from a watchpointed address followed by various
+ * types of kernel entries.  A MOV SS that hits a watchpoint will queue
+ * up a #DB trap but will not actually deliver that trap.  The trap
+ * will be delivered after the next instruction instead.  The CPU's logic
+ * seems to be:
+ *
+ *  - Any fault: drop the pending #DB trap.
+ *  - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
+ *    deliver #DB.
+ *  - ICEBP: enter the kernel but do not deliver the watchpoint trap
+ *  - breakpoint: only one #DB is delivered (phew!)
+ *
+ * There are plenty of ways for a kernel to handle this incorrectly.  This
+ * test tries to exercise all the cases.
+ *
+ * This should mostly cover CVE-2018-1087 and CVE-2018-8897.
+ */
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <setjmp.h>
+#include <sys/prctl.h>
+
+#define X86_EFLAGS_RF (1UL << 16)
+
+#if __x86_64__
+# define REG_IP REG_RIP
+#else
+# define REG_IP REG_EIP
+#endif
+
+unsigned short ss;
+extern unsigned char breakpoint_insn[];
+sigjmp_buf jmpbuf;
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void enable_watchpoint(void)
+{
+       pid_t parent = getpid();
+       int status;
+
+       pid_t child = fork();
+       if (child < 0)
+               err(1, "fork");
+
+       if (child) {
+               if (waitpid(child, &status, 0) != child)
+                       err(1, "waitpid for child");
+       } else {
+               unsigned long dr0, dr1, dr7;
+
+               dr0 = (unsigned long)&ss;
+               dr1 = (unsigned long)breakpoint_insn;
+               dr7 = ((1UL << 1) |     /* G0 */
+                      (3UL << 16) |    /* RW0 = read or write */
+                      (1UL << 18) |    /* LEN0 = 2 bytes */
+                      (1UL << 3));     /* G1, RW1 = insn */
+
+               if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
+                       err(1, "PTRACE_ATTACH");
+
+               if (waitpid(parent, &status, 0) != parent)
+                       err(1, "waitpid for child");
+
+               if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
+                       err(1, "PTRACE_POKEUSER DR0");
+
+               if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
+                       err(1, "PTRACE_POKEUSER DR1");
+
+               if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
+                       err(1, "PTRACE_POKEUSER DR7");
+
+               printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
+
+               if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
+                       err(1, "PTRACE_DETACH");
+
+               exit(0);
+       }
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+                      int flags)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_sigaction = handler;
+       sa.sa_flags = SA_SIGINFO | flags;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+static char const * const signames[] = {
+       [SIGSEGV] = "SIGSEGV",
+       [SIGBUS] = "SIBGUS",
+       [SIGTRAP] = "SIGTRAP",
+       [SIGILL] = "SIGILL",
+};
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+       ucontext_t *ctx = ctx_void;
+
+       printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
+              (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
+              !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
+}
+
+static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
+{
+       ucontext_t *ctx = ctx_void;
+
+       printf("\tGot %s with RIP=%lx\n", signames[sig],
+              (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+}
+
+static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
+{
+       ucontext_t *ctx = ctx_void;
+
+       printf("\tGot %s with RIP=%lx\n", signames[sig],
+              (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+
+       siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+       unsigned long nr;
+
+       asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
+       printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
+
+       if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
+               printf("\tPR_SET_PTRACER_ANY succeeded\n");
+
+       printf("\tSet up a watchpoint\n");
+       sethandler(SIGTRAP, sigtrap, 0);
+       enable_watchpoint();
+
+       printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
+       asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; INT3\n");
+       asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; INT 3\n");
+       asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; CS CS INT3\n");
+       asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; CSx14 INT3\n");
+       asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; INT 4\n");
+       sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+       asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
+
+#ifdef __i386__
+       printf("[RUN]\tMOV SS; INTO\n");
+       sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+       nr = -1;
+       asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
+                     : [tmp] "+r" (nr) : [ss] "m" (ss));
+#endif
+
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; ICEBP\n");
+
+               /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
+               sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+
+               asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
+       }
+
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; CLI\n");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+               asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
+       }
+
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; #PF\n");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+               asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
+                             : [tmp] "=r" (nr) : [ss] "m" (ss));
+       }
+
+       /*
+        * INT $1: if #DB has DPL=3 and there isn't special handling,
+        * then the kernel will die.
+        */
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; INT 1\n");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+               asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
+       }
+
+#ifdef __x86_64__
+       /*
+        * In principle, we should test 32-bit SYSCALL as well, but
+        * the calling convention is so unpredictable that it's
+        * not obviously worth the effort.
+        */
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; SYSCALL\n");
+               sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+               nr = SYS_getpid;
+               /*
+                * Toggle the high bit of RSP to make it noncanonical to
+                * strengthen this test on non-SMAP systems.
+                */
+               asm volatile ("btc $63, %%rsp\n\t"
+                             "mov %[ss], %%ss; syscall\n\t"
+                             "btc $63, %%rsp"
+                             : "+a" (nr) : [ss] "m" (ss)
+                             : "rcx"
+#ifdef __x86_64__
+                               , "r11"
+#endif
+                       );
+       }
+#endif
+
+       printf("[RUN]\tMOV SS; breakpointed NOP\n");
+       asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
+
+       /*
+        * Invoking SYSENTER directly breaks all the rules.  Just handle
+        * the SIGSEGV.
+        */
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; SYSENTER\n");
+               stack_t stack = {
+                       .ss_sp = altstack_data,
+                       .ss_size = SIGSTKSZ,
+               };
+               if (sigaltstack(&stack, NULL) != 0)
+                       err(1, "sigaltstack");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
+               nr = SYS_getpid;
+               asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+                             : [ss] "m" (ss) : "flags", "rcx"
+#ifdef __x86_64__
+                               , "r11"
+#endif
+                       );
+
+               /* We're unreachable here.  SYSENTER forgets RIP. */
+       }
+
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; INT $0x80\n");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+               nr = 20;        /* compat getpid */
+               asm volatile ("mov %[ss], %%ss; int $0x80"
+                             : "+a" (nr) : [ss] "m" (ss)
+                             : "flags"
+#ifdef __x86_64__
+                               , "r8", "r9", "r10", "r11"
+#endif
+                       );
+       }
+
+       printf("[OK]\tI aten't dead\n");
+       return 0;
+}
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c

index 9c0325e1ea6844f666bfdcc8204763a8614b9875..50f7e92724813a3525154ede4f2b282af7e5a839 100644 (file)
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -368,6 +368,11 @@ static int expected_bnd_index = -1;
  uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
  unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
  
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR   3
+#endif
+
  /*
   * The kernel is supposed to provide some information about the bounds
   * exception in the siginfo.  It should match what we have in the bounds
@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
                 br_count++;
                 dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
  
-#define SEGV_BNDERR     3  /* failed address bound checks */
-
                 dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
                                 status, ip, br_reason);
                 dprintf2("si_signo: %d\n", si->si_signo);
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h

index b3cb7670e02661cd2ab66fd3da98b3940dd44c70..254e5436bdd9926091dc7c53bdd37be8bdb19121 100644 (file)
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...)
  {
         va_list ap;
  
-       va_start(ap, format);
         if (!dprint_in_signal) {
+               va_start(ap, format);
                 vprintf(format, ap);
+               va_end(ap);
         } else {
                 int ret;
-               int len = vsnprintf(dprint_in_signal_buffer,
-                                   DPRINT_IN_SIGNAL_BUF_SIZE,
-                                   format, ap);
                 /*
-                * len is amount that would have been printed,
-                * but actual write is truncated at BUF_SIZE.
+                * No printf() functions are signal-safe.
+                * They deadlock easily. Write the format
+                * string to get some output, even if
+                * incomplete.
                  */
-               if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
-                       len = DPRINT_IN_SIGNAL_BUF_SIZE;
-               ret = write(1, dprint_in_signal_buffer, len);
+               ret = write(1, format, strlen(format));
                 if (ret < 0)
-                       abort();
+                       exit(1);
         }
-       va_end(ap);
  }
  #define dprintf_level(level, args...) do {     \
         if (level <= DEBUG_LEVEL)               \
                 sigsafe_printf(args);           \
-       fflush(NULL);                           \
  } while (0)
  #define dprintf0(args...) dprintf_level(0, args)
  #define dprintf1(args...) dprintf_level(1, args)
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c

index f15aa5a76fe3457e96e438c15e7ad40d3c7fbce0..460b4bdf4c1edff9d5dfa0d451dbaa393d53b80c 100644 (file)
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -72,10 +72,9 @@ extern void abort_hooks(void);
                                 test_nr, iteration_nr); \
                 dprintf0("errno at assert: %d", errno); \
                 abort_hooks();                  \
-               assert(condition);              \
+               exit(__LINE__);                 \
         }                                       \
  } while (0)
-#define raw_assert(cond) assert(cond)
  
  void cat_into_file(char *str, char *file)
  {
@@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file)
          * these need to be raw because they are called under
          * pkey_assert()
          */
-       raw_assert(fd >= 0);
+       if (fd < 0) {
+               fprintf(stderr, "error opening '%s'\n", str);
+               perror("error: ");
+               exit(__LINE__);
+       }
+
         ret = write(fd, str, strlen(str));
         if (ret != strlen(str)) {
                 perror("write to file failed");
                 fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
-               raw_assert(0);
+               exit(__LINE__);
         }
         close(fd);
  }
@@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me)
  #ifdef __i386__
  
  #ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
+# define SYS_mprotect_key      380
  #endif
+
  #ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc         381
-# define SYS_pkey_free  382
+# define SYS_pkey_alloc                381
+# define SYS_pkey_free         382
  #endif
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
+
+#define REG_IP_IDX             REG_EIP
+#define si_pkey_offset         0x14
  
  #else
  
  #ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
+# define SYS_mprotect_key      329
  #endif
+
  #ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc         330
-# define SYS_pkey_free  331
+# define SYS_pkey_alloc                330
+# define SYS_pkey_free         331
  #endif
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
+
+#define REG_IP_IDX             REG_RIP
+#define si_pkey_offset         0x20
  
  #endif
  
@@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes)
         }
  }
  
-#define SEGV_BNDERR     3  /* failed address bound checks */
-#define SEGV_PKUERR     4
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR           3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR           4
+#endif
  
  static char *si_code_str(int si_code)
  {
@@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
                 dump_mem(pkru_ptr - 128, 256);
         pkey_assert(*pkru_ptr);
  
-       si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
-       dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
-       dump_mem(si_pkey_ptr - 8, 24);
-       siginfo_pkey = *si_pkey_ptr;
-       pkey_assert(siginfo_pkey < NR_PKEYS);
-       last_si_pkey = siginfo_pkey;
-
         if ((si->si_code == SEGV_MAPERR) ||
             (si->si_code == SEGV_ACCERR) ||
             (si->si_code == SEGV_BNDERR)) {
@@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
                 exit(4);
         }
  
+       si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+       dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+       dump_mem((u8 *)si_pkey_ptr - 8, 24);
+       siginfo_pkey = *si_pkey_ptr;
+       pkey_assert(siginfo_pkey < NR_PKEYS);
+       last_si_pkey = siginfo_pkey;
+
         dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
         /* need __rdpkru() version so we do not do shadow_pkru checking */
         dprintf1("signal pkru from  pkru: %08x\n", __rdpkru());
@@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
         dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
         pkru_faults++;
         dprintf1("<<<<==================================================\n");
-       return;
-       if (trapno == 14) {
-               fprintf(stderr,
-                       "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
-                       trapno, ip);
-               fprintf(stderr, "si_addr %p\n", si->si_addr);
-               fprintf(stderr, "REG_ERR: %lx\n",
-                               (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
-               exit(1);
-       } else {
-               fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
-               fprintf(stderr, "si_addr %p\n", si->si_addr);
-               fprintf(stderr, "REG_ERR: %lx\n",
-                               (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
-               exit(2);
-       }
         dprint_in_signal = 0;
  }
  
@@ -393,10 +391,15 @@ pid_t fork_lazy_child(void)
         return forkret;
  }
  
-#define PKEY_DISABLE_ACCESS    0x1
-#define PKEY_DISABLE_WRITE     0x2
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS   0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE    0x2
+#endif
  
-u32 pkey_get(int pkey, unsigned long flags)
+static u32 hw_pkey_get(int pkey, unsigned long flags)
  {
         u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
         u32 pkru = __rdpkru();
@@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags)
         return masked_pkru;
  }
  
-int pkey_set(int pkey, unsigned long rights, unsigned long flags)
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
  {
         u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
         u32 old_pkru = __rdpkru();
@@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags)
                 pkey, flags);
         pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
  
-       pkey_rights = pkey_get(pkey, syscall_flags);
+       pkey_rights = hw_pkey_get(pkey, syscall_flags);
  
-       dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+       dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
                         pkey, pkey, pkey_rights);
         pkey_assert(pkey_rights >= 0);
  
         pkey_rights |= flags;
  
-       ret = pkey_set(pkey, pkey_rights, syscall_flags);
+       ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
         assert(!ret);
         /*pkru and flags have the same format */
         shadow_pkru |= flags << (pkey * 2);
@@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags)
  
         pkey_assert(ret >= 0);
  
-       pkey_rights = pkey_get(pkey, syscall_flags);
-       dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+       pkey_rights = hw_pkey_get(pkey, syscall_flags);
+       dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
                         pkey, pkey, pkey_rights);
  
         dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags)
  {
         unsigned long syscall_flags = 0;
         int ret;
-       int pkey_rights = pkey_get(pkey, syscall_flags);
+       int pkey_rights = hw_pkey_get(pkey, syscall_flags);
         u32 orig_pkru = rdpkru();
  
         pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
  
-       dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+       dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
                         pkey, pkey, pkey_rights);
         pkey_assert(pkey_rights >= 0);
  
         pkey_rights |= flags;
  
-       ret = pkey_set(pkey, pkey_rights, 0);
+       ret = hw_pkey_set(pkey, pkey_rights, 0);
         /* pkru and flags have the same format */
         shadow_pkru &= ~(flags << (pkey * 2));
         pkey_assert(ret >= 0);
  
-       pkey_rights = pkey_get(pkey, syscall_flags);
-       dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+       pkey_rights = hw_pkey_get(pkey, syscall_flags);
+       dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
                         pkey, pkey, pkey_rights);
  
         dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
  struct pkey_malloc_record {
         void *ptr;
         long size;
+       int prot;
  };
  struct pkey_malloc_record *pkey_malloc_records;
+struct pkey_malloc_record *pkey_last_malloc_record;
  long nr_pkey_malloc_records;
-void record_pkey_malloc(void *ptr, long size)
+void record_pkey_malloc(void *ptr, long size, int prot)
  {
         long i;
         struct pkey_malloc_record *rec = NULL;
@@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size)
                 (int)(rec - pkey_malloc_records), rec, ptr, size);
         rec->ptr = ptr;
         rec->size = size;
+       rec->prot = prot;
+       pkey_last_malloc_record = rec;
         nr_pkey_malloc_records++;
  }
  
@@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
         pkey_assert(ptr != (void *)-1);
         ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
         pkey_assert(!ret);
-       record_pkey_malloc(ptr, size);
+       record_pkey_malloc(ptr, size, prot);
         rdpkru();
  
         dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
@@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
         size = ALIGN_UP(size, HPAGE_SIZE * 2);
         ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
         pkey_assert(ptr != (void *)-1);
-       record_pkey_malloc(ptr, size);
+       record_pkey_malloc(ptr, size, prot);
         mprotect_pkey(ptr, size, prot, pkey);
  
         dprintf1("unaligned ptr: %p\n", ptr);
@@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
         pkey_assert(ptr != (void *)-1);
         mprotect_pkey(ptr, size, prot, pkey);
  
-       record_pkey_malloc(ptr, size);
+       record_pkey_malloc(ptr, size, prot);
  
         dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
         return ptr;
@@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
  
         mprotect_pkey(ptr, size, prot, pkey);
  
-       record_pkey_malloc(ptr, size);
+       record_pkey_malloc(ptr, size, prot);
  
         dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
         close(fd);
@@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey)
  }
  
  int last_pkru_faults;
+#define UNKNOWN_PKEY -2
  void expected_pk_fault(int pkey)
  {
         dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
                         __func__, last_pkru_faults, pkru_faults);
         dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
         pkey_assert(last_pkru_faults + 1 == pkru_faults);
-       pkey_assert(last_si_pkey == pkey);
+
+       /*
+       * For exec-only memory, we do not know the pkey in
+       * advance, so skip this check.
+       */
+       if (pkey != UNKNOWN_PKEY)
+               pkey_assert(last_si_pkey == pkey);
+
         /*
          * The signal handler shold have cleared out PKRU to let the
          * test program continue.  We now have to restore it.
@@ -939,10 +954,11 @@ void expected_pk_fault(int pkey)
         last_si_pkey = -1;
  }
  
-void do_not_expect_pk_fault(void)
-{
-       pkey_assert(last_pkru_faults == pkru_faults);
-}
+#define do_not_expect_pk_fault(msg)    do {                    \
+       if (last_pkru_faults != pkru_faults)                    \
+               dprintf0("unexpected PK fault: %s\n", msg);     \
+       pkey_assert(last_pkru_faults == pkru_faults);           \
+} while (0)
  
  int test_fds[10] = { -1 };
  int nr_test_fds;
@@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
         pkey_assert(i < NR_PKEYS*2);
  
         /*
-        * There are 16 pkeys supported in hardware.  One is taken
-        * up for the default (0) and another can be taken up by
-        * an execute-only mapping.  Ensure that we can allocate
-        * at least 14 (16-2).
+        * There are 16 pkeys supported in hardware.  Three are
+        * allocated by the time we get here:
+        *   1. The default key (0)
+        *   2. One possibly consumed by an execute-only mapping.
+        *   3. One allocated by the test code and passed in via
+        *      'pkey' to this function.
+        * Ensure that we can allocate at least another 13 (16-3).
          */
-       pkey_assert(i >= NR_PKEYS-2);
+       pkey_assert(i >= NR_PKEYS-3);
  
         for (i = 0; i < nr_allocated_pkeys; i++) {
                 err = sys_pkey_free(allocated_pkeys[i]);
@@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
         }
  }
  
+/*
+ * pkey 0 is special.  It is allocated by default, so you do not
+ * have to call pkey_alloc() to use it first.  Make sure that it
+ * is usable.
+ */
+void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+{
+       long size;
+       int prot;
+
+       assert(pkey_last_malloc_record);
+       size = pkey_last_malloc_record->size;
+       /*
+        * This is a bit of a hack.  But mprotect() requires
+        * huge-page-aligned sizes when operating on hugetlbfs.
+        * So, make sure that we use something that's a multiple
+        * of a huge page when we can.
+        */
+       if (size >= HPAGE_SIZE)
+               size = HPAGE_SIZE;
+       prot = pkey_last_malloc_record->prot;
+
+       /* Use pkey 0 */
+       mprotect_pkey(ptr, size, prot, 0);
+
+       /* Make sure that we can set it back to the original pkey. */
+       mprotect_pkey(ptr, size, prot, pkey);
+}
+
  void test_ptrace_of_child(int *ptr, u16 pkey)
  {
         __attribute__((__unused__)) int peek_result;
@@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
         pkey_assert(ret != -1);
         /* Now access from the current task, and expect NO exception: */
         peek_result = read_ptr(plain_ptr);
-       do_not_expect_pk_fault();
+       do_not_expect_pk_fault("read plain pointer after ptrace");
  
         ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
         pkey_assert(ret != -1);
@@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
         free(plain_ptr_unaligned);
  }
  
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+void *get_pointer_to_instructions(void)
  {
         void *p1;
-       int scratch;
-       int ptr_contents;
-       int ret;
  
         p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
         dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
@@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
         /* Point 'p1' at the *second* page of the function: */
         p1 += PAGE_SIZE;
  
+       /*
+        * Try to ensure we fault this in on next touch to ensure
+        * we get an instruction fault as opposed to a data one
+        */
         madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+
+       return p1;
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+       void *p1;
+       int scratch;
+       int ptr_contents;
+       int ret;
+
+       p1 = get_pointer_to_instructions();
         lots_o_noops_around_write(&scratch);
         ptr_contents = read_ptr(p1);
         dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
@@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
          */
         madvise(p1, PAGE_SIZE, MADV_DONTNEED);
         lots_o_noops_around_write(&scratch);
-       do_not_expect_pk_fault();
+       do_not_expect_pk_fault("executing on PROT_EXEC memory");
         ptr_contents = read_ptr(p1);
         dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
         expected_pk_fault(pkey);
  }
  
+void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+{
+       void *p1;
+       int scratch;
+       int ptr_contents;
+       int ret;
+
+       dprintf1("%s() start\n", __func__);
+
+       p1 = get_pointer_to_instructions();
+       lots_o_noops_around_write(&scratch);
+       ptr_contents = read_ptr(p1);
+       dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+       /* Use a *normal* mprotect(), not mprotect_pkey(): */
+       ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
+       pkey_assert(!ret);
+
+       dprintf2("pkru: %x\n", rdpkru());
+
+       /* Make sure this is an *instruction* fault */
+       madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+       lots_o_noops_around_write(&scratch);
+       do_not_expect_pk_fault("executing on PROT_EXEC memory");
+       ptr_contents = read_ptr(p1);
+       dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+       expected_pk_fault(UNKNOWN_PKEY);
+
+       /*
+        * Put the memory back to non-PROT_EXEC.  Should clear the
+        * exec-only pkey off the VMA and allow it to be readable
+        * again.  Go to PROT_NONE first to check for a kernel bug
+        * that did not clear the pkey when doing PROT_NONE.
+        */
+       ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
+       pkey_assert(!ret);
+
+       ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
+       pkey_assert(!ret);
+       ptr_contents = read_ptr(p1);
+       do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+}
+
  void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
  {
         int size = PAGE_SIZE;
@@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
         test_kernel_gup_of_access_disabled_region,
         test_kernel_gup_write_to_write_disabled_region,
         test_executing_on_unreadable_memory,
+       test_implicit_mprotect_exec_only_memory,
+       test_mprotect_with_pkey_0,
         test_ptrace_of_child,
         test_pkey_syscalls_on_non_allocated_pkey,
         test_pkey_syscalls_bad_args,
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c

index 10b38178cff207a9ae1e8d59970e3f19fc7be7f6..4ffc0b5e610560c752c99de33d6f5857e7344773 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-debug.c
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -211,6 +211,7 @@ static int vgic_debug_show(struct seq_file *s, void *v)
         struct vgic_state_iter *iter = (struct vgic_state_iter *)v;
         struct vgic_irq *irq;
         struct kvm_vcpu *vcpu = NULL;
+       unsigned long flags;
  
         if (iter->dist_id == 0) {
                 print_dist_state(s, &kvm->arch.vgic);
@@ -227,9 +228,9 @@ static int vgic_debug_show(struct seq_file *s, void *v)
                 irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
         }
  
-       spin_lock(&irq->irq_lock);
+       spin_lock_irqsave(&irq->irq_lock, flags);
         print_irq_state(s, irq, vcpu);
-       spin_unlock(&irq->irq_lock);
+       spin_unlock_irqrestore(&irq->irq_lock, flags);
  
         return 0;
  }
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c

index a8f07243aa9f0533dd2a8fe57c7589c849142e53..4ed79c939fb447188d0a2bc7c740e85e1d82135c 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -52,6 +52,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
         struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq;
+       unsigned long flags;
         int ret;
  
         /* In this case there is no put, since we keep the reference. */
@@ -71,7 +72,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
         irq->intid = intid;
         irq->target_vcpu = vcpu;
  
-       spin_lock(&dist->lpi_list_lock);
+       spin_lock_irqsave(&dist->lpi_list_lock, flags);
  
         /*
          * There could be a race with another vgic_add_lpi(), so we need to
@@ -99,7 +100,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
         dist->lpi_list_count++;
  
  out_unlock:
-       spin_unlock(&dist->lpi_list_lock);
+       spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  
         /*
          * We "cache" the configuration table entries in our struct vgic_irq's.
@@ -280,8 +281,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
         int ret;
         unsigned long flags;
  
-       ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
-                            &prop, 1);
+       ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
+                                 &prop, 1);
  
         if (ret)
                 return ret;
@@ -315,6 +316,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
  {
         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
         struct vgic_irq *irq;
+       unsigned long flags;
         u32 *intids;
         int irq_count, i = 0;
  
@@ -330,7 +332,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
         if (!intids)
                 return -ENOMEM;
  
-       spin_lock(&dist->lpi_list_lock);
+       spin_lock_irqsave(&dist->lpi_list_lock, flags);
         list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
                 if (i == irq_count)
                         break;
@@ -339,7 +341,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
                         continue;
                 intids[i++] = irq->intid;
         }
-       spin_unlock(&dist->lpi_list_lock);
+       spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  
         *intid_ptr = intids;
         return i;
@@ -348,10 +350,11 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
  static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
  {
         int ret = 0;
+       unsigned long flags;
  
-       spin_lock(&irq->irq_lock);
+       spin_lock_irqsave(&irq->irq_lock, flags);
         irq->target_vcpu = vcpu;
-       spin_unlock(&irq->irq_lock);
+       spin_unlock_irqrestore(&irq->irq_lock, flags);
  
         if (irq->hw) {
                 struct its_vlpi_map map;
@@ -441,8 +444,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
                  * this very same byte in the last iteration. Reuse that.
                  */
                 if (byte_offset != last_byte_offset) {
-                       ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset,
-                                            &pendmask, 1);
+                       ret = kvm_read_guest_lock(vcpu->kvm,
+                                                 pendbase + byte_offset,
+                                                 &pendmask, 1);
                         if (ret) {
                                 kfree(intids);
                                 return ret;
@@ -786,7 +790,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
                 return false;
  
         /* Each 1st level entry is represented by a 64-bit value. */
-       if (kvm_read_guest(its->dev->kvm,
+       if (kvm_read_guest_lock(its->dev->kvm,
                            BASER_ADDRESS(baser) + index * sizeof(indirect_ptr),
                            &indirect_ptr, sizeof(indirect_ptr)))
                 return false;
@@ -1367,8 +1371,8 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
         cbaser = CBASER_ADDRESS(its->cbaser);
  
         while (its->cwriter != its->creadr) {
-               int ret = kvm_read_guest(kvm, cbaser + its->creadr,
-                                        cmd_buf, ITS_CMD_SIZE);
+               int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr,
+                                             cmd_buf, ITS_CMD_SIZE);
                 /*
                  * If kvm_read_guest() fails, this could be due to the guest
                  * programming a bogus value in CBASER or something else going
@@ -1893,7 +1897,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
                 int next_offset;
                 size_t byte_offset;
  
-               ret = kvm_read_guest(kvm, gpa, entry, esz);
+               ret = kvm_read_guest_lock(kvm, gpa, entry, esz);
                 if (ret)
                         return ret;
  
@@ -2263,7 +2267,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
         int ret;
  
         BUG_ON(esz > sizeof(val));
-       ret = kvm_read_guest(kvm, gpa, &val, esz);
+       ret = kvm_read_guest_lock(kvm, gpa, &val, esz);
         if (ret)
                 return ret;
         val = le64_to_cpu(val);
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c

index c7423f3768e5f1ecdc710332c125ea0b4ad60df2..bdcf8e7a6161298d373a7605dc5fe6be43fa872e 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -344,7 +344,7 @@ retry:
         bit_nr = irq->intid % BITS_PER_BYTE;
         ptr = pendbase + byte_offset;
  
-       ret = kvm_read_guest(kvm, ptr, &val, 1);
+       ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
         if (ret)
                 return ret;
  
@@ -397,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
                 ptr = pendbase + byte_offset;
  
                 if (byte_offset != last_byte_offset) {
-                       ret = kvm_read_guest(kvm, ptr, &val, 1);
+                       ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
                         if (ret)
                                 return ret;
                         last_byte_offset = byte_offset;
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c

index 97bfba8d9a590870b46f30cb796d3b510181a84d..33c8325c8f35662c03c37319605d28e2f2f9d7a4 100644 (file)
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -43,9 +43,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
   * kvm->lock (mutex)
   *   its->cmd_lock (mutex)
   *     its->its_lock (mutex)
- *       vgic_cpu->ap_list_lock
- *         kvm->lpi_list_lock
- *           vgic_irq->irq_lock
+ *       vgic_cpu->ap_list_lock                must be taken with IRQs disabled
+ *         kvm->lpi_list_lock          must be taken with IRQs disabled
+ *           vgic_irq->irq_lock                must be taken with IRQs disabled
+ *
+ * As the ap_list_lock might be taken from the timer interrupt handler,
+ * we have to disable IRQs before taking this lock and everything lower
+ * than it.
   *
   * If you need to take multiple locks, always take the upper lock first,
   * then the lower ones, e.g. first take the its_lock, then the irq_lock.
@@ -72,8 +76,9 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
         struct vgic_irq *irq = NULL;
+       unsigned long flags;
  
-       spin_lock(&dist->lpi_list_lock);
+       spin_lock_irqsave(&dist->lpi_list_lock, flags);
  
         list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
                 if (irq->intid != intid)
@@ -89,7 +94,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
         irq = NULL;
  
  out_unlock:
-       spin_unlock(&dist->lpi_list_lock);
+       spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  
         return irq;
  }
@@ -134,19 +139,20 @@ static void vgic_irq_release(struct kref *ref)
  void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
+       unsigned long flags;
  
         if (irq->intid < VGIC_MIN_LPI)
                 return;
  
-       spin_lock(&dist->lpi_list_lock);
+       spin_lock_irqsave(&dist->lpi_list_lock, flags);
         if (!kref_put(&irq->refcount, vgic_irq_release)) {
-               spin_unlock(&dist->lpi_list_lock);
+               spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
                 return;
         };
  
         list_del(&irq->lpi_list);
         dist->lpi_list_count--;
-       spin_unlock(&dist->lpi_list_lock);
+       spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  
         kfree(irq);
  }
author	Dan Williams <dan.j.williams@intel.com>
	Fri, 8 Jun 2018 22:16:44 +0000 (15:16 -0700)
committer	Dan Williams <dan.j.williams@intel.com>
	Fri, 8 Jun 2018 22:16:44 +0000 (15:16 -0700)