Merge branches 'arm/tegra', 'arm/mediatek', 'arm/smmu', 'x86/vt-d', 'x86/amd' and...

author Joerg Roedel <jroedel@suse.de>

Tue, 7 May 2019 07:40:12 +0000 (09:40 +0200)

committer Joerg Roedel <jroedel@suse.de>

Tue, 7 May 2019 07:40:12 +0000 (09:40 +0200)
author Joerg Roedel <jroedel@suse.de>
Tue, 7 May 2019 07:40:12 +0000 (09:40 +0200)
committer Joerg Roedel <jroedel@suse.de>
Tue, 7 May 2019 07:40:12 +0000 (09:40 +0200)
diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt

index 24c5cdaba8d279a4b132fbd2f964ae1460b3fd0f..ca83dcc84fb8ee5cfd876cf0bb3d8af5fd85ba6b 100644 (file)
--- a/Documentation/devicetree/bindings/net/davinci_emac.txt
+++ b/Documentation/devicetree/bindings/net/davinci_emac.txt
@@ -20,6 +20,8 @@ Required properties:
  Optional properties:
  - phy-handle: See ethernet.txt file in the same directory.
                If absent, davinci_emac driver defaults to 100/FULL.
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
  - ti,davinci-rmii-en: 1 byte, 1 means use RMII
  - ti,davinci-no-bd-ram: boolean, does EMAC have BD RAM?
  
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt

index cfc376bc977aa0a25e64d4e1ef617a1a326fe634..a6862158058461f5af428498ea14c98aed1f7775 100644 (file)
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -10,15 +10,14 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
    the boot program; should be used in cases where the MAC address assigned to
    the device by the boot program is different from the "local-mac-address"
    property;
-- nvmem-cells: phandle, reference to an nvmem node for the MAC address;
-- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used;
  - max-speed: number, specifies maximum speed in Mbit/s supported by the device;
  - max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
    the maximum frame size (there's contradiction in the Devicetree
    Specification).
  - phy-mode: string, operation mode of the PHY interface. This is now a de-facto
    standard property; supported values are:
-  * "internal"
+  * "internal" (Internal means there is not a standard bus between the MAC and
+     the PHY, something proprietary is being used to embed the PHY in the MAC.)
    * "mii"
    * "gmii"
    * "sgmii"
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt

index 174f292d8a3e8c14cf7d5d1105380b5f3d358544..8b80515729d7145cc05c9293857212ba914e0607 100644 (file)
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -26,6 +26,10 @@ Required properties:
         Optional elements: 'tsu_clk'
  - clocks: Phandles to input clocks.
  
+Optional properties:
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
+
  Optional properties for PHY child node:
  - reset-gpios : Should specify the gpio for phy reset
  - magic-packet : If present, indicates that the hardware supports waking
diff --git a/Documentation/networking/decnet.txt b/Documentation/networking/decnet.txt

index e12a4900cf72cb00b1ade4c0257a23c93d2d8f21..d192f8b9948b5483c16b83f41a5b25c1e5cda846 100644 (file)
--- a/Documentation/networking/decnet.txt
+++ b/Documentation/networking/decnet.txt
@@ -22,8 +22,6 @@ you'll need the following options as well...
      CONFIG_DECNET_ROUTER (to be able to add/delete routes)
      CONFIG_NETFILTER (will be required for the DECnet routing daemon)
  
-    CONFIG_DECNET_ROUTE_FWMARK is optional
-
  Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
  that you need it, in general you won't and it can cause ifconfig to
  malfunction.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt

index acdfb5d2bcaa44a8a0ecdcfcae14202d1ed75bc3..e2142fe40cdad09e80f43110a2f93dc59b5900f8 100644 (file)
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -422,6 +422,7 @@ tcp_min_rtt_wlen - INTEGER
         minimum RTT when it is moved to a longer path (e.g., due to traffic
         engineering). A longer window makes the filter more resistant to RTT
         inflations such as transient congestion. The unit is seconds.
+       Possible values: 0 - 86400 (1 day)
         Default: 300
  
  tcp_moderate_rcvbuf - BOOLEAN
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt

index 6af24cdb25ccb51a947d0bf50f3442b53a963d81..3f13d8599337ea8a010d3a33ae605201691a427e 100644 (file)
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -866,14 +866,14 @@ The intent is that compaction has less work to do in the future and to
  increase the success rate of future high-order allocations such as SLUB
  allocations, THP and hugetlbfs pages.
  
-To make it sensible with respect to the watermark_scale_factor parameter,
-the unit is in fractions of 10,000. The default value of 15,000 means
-that up to 150% of the high watermark will be reclaimed in the event of
-a pageblock being mixed due to fragmentation. The level of reclaim is
-determined by the number of fragmentation events that occurred in the
-recent past. If this value is smaller than a pageblock then a pageblocks
-worth of pages will be reclaimed (e.g.  2MB on 64-bit x86). A boost factor
-of 0 will disable the feature.
+To make it sensible with respect to the watermark_scale_factor
+parameter, the unit is in fractions of 10,000. The default value of
+15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
+watermark will be reclaimed in the event of a pageblock being mixed due
+to fragmentation. The level of reclaim is determined by the number of
+fragmentation events that occurred in the recent past. If this value is
+smaller than a pageblock then a pageblocks worth of pages will be reclaimed
+(e.g.  2MB on 64-bit x86). A boost factor of 0 will disable the feature.
  
  =============================================================
  
diff --git a/MAINTAINERS b/MAINTAINERS

index 09f43f1bdd15eb92acb67e22891c3b4575c71b1b..5c38f21aee787f9fb9f6c6ffa09e9de5ce1f82aa 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3121,6 +3121,7 @@ F:        drivers/cpufreq/bmips-cpufreq.c
  BROADCOM BMIPS MIPS ARCHITECTURE
  M:     Kevin Cernekee <cernekee@gmail.com>
  M:     Florian Fainelli <f.fainelli@gmail.com>
+L:     bcm-kernel-feedback-list@broadcom.com
  L:     linux-mips@vger.kernel.org
  T:     git git://github.com/broadcom/stblinux.git
  S:     Maintained
@@ -8707,6 +8708,7 @@ F:        scripts/leaking_addresses.pl
  LED SUBSYSTEM
  M:     Jacek Anaszewski <jacek.anaszewski@gmail.com>
  M:     Pavel Machek <pavel@ucw.cz>
+R:     Dan Murphy <dmurphy@ti.com>
  L:     linux-leds@vger.kernel.org
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
  S:     Maintained
diff --git a/Makefile b/Makefile

index abe13538a8c04af16e30424282ecebe449a0d88f..2b99679148dc74fa73f05149adb2c144c0988a5d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
  VERSION = 5
  PATCHLEVEL = 1
  SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
  NAME = Shy Crocodile
  
  # *DOCUMENTATION*
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl

index 63ed39cbd3bd13a40e98ec3a9aff3b3266b3e235..165f268beafc471e14eac4c8d6d08e52c5c89864 100644 (file)
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -463,3 +463,7 @@
  532    common  getppid                         sys_getppid
  # all other architectures have common numbers for new syscall, alpha
  # is the exception.
+534    common  pidfd_send_signal               sys_pidfd_send_signal
+535    common  io_uring_setup                  sys_io_uring_setup
+536    common  io_uring_enter                  sys_io_uring_enter
+537    common  io_uring_register               sys_io_uring_register
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index 850b4805e2d171436e539b326867d6ce08a6f9d6..9aed25a6019bc991166294b0923121ba513509dc 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -73,7 +73,7 @@ config ARM
         select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
         select HAVE_EXIT_THREAD
         select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
-       select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL
+       select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
         select HAVE_FUNCTION_TRACER if !XIP_KERNEL
         select HAVE_GCC_PLUGINS
         select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug

index 6d6e0330930b52f7369a46536473fa7174fad2d9..e388af4594a6e5e42a860469e10a53b89522e7bf 100644 (file)
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -47,8 +47,8 @@ config DEBUG_WX
  
  choice
         prompt "Choose kernel unwinder"
-       default UNWINDER_ARM if AEABI && !FUNCTION_GRAPH_TRACER
-       default UNWINDER_FRAME_POINTER if !AEABI || FUNCTION_GRAPH_TRACER
+       default UNWINDER_ARM if AEABI
+       default UNWINDER_FRAME_POINTER if !AEABI
         help
           This determines which method will be used for unwinding kernel stack
           traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
@@ -65,7 +65,7 @@ config UNWINDER_FRAME_POINTER
  
  config UNWINDER_ARM
         bool "ARM EABI stack unwinder"
-       depends on AEABI
+       depends on AEABI && !FUNCTION_GRAPH_TRACER
         select ARM_UNWIND
         help
           This option enables stack unwinding support in the kernel
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S

index 6c7ccb428c079c3e43ef9cce2c344ec4b6809369..7135820f76d4f8b8d24374738332c0c4c0644bf7 100644 (file)
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -1438,7 +1438,21 @@ ENTRY(efi_stub_entry)
  
                 @ Preserve return value of efi_entry() in r4
                 mov     r4, r0
-               bl      cache_clean_flush
+
+               @ our cache maintenance code relies on CP15 barrier instructions
+               @ but since we arrived here with the MMU and caches configured
+               @ by UEFI, we must check that the CP15BEN bit is set in SCTLR.
+               @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in
+               @ the enable path will be executed on v7+ only.
+               mrc     p15, 0, r1, c1, c0, 0   @ read SCTLR
+               tst     r1, #(1 << 5)           @ CP15BEN bit set?
+               bne     0f
+               orr     r1, r1, #(1 << 5)       @ CP15 barrier instructions
+               mcr     p15, 0, r1, c1, c0, 0   @ write SCTLR
+ ARM(          .inst   0xf57ff06f              @ v7+ isb       )
+ THUMB(                isb                                             )
+
+0:             bl      cache_clean_flush
                 bl      cache_off
  
                 @ Set parameters for booting zImage according to boot protocol
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S

index c08d2d890f7b918981c472c155c6df368a1b30b3..b38bbd011b358f433e3c9201fdb2015611286252 100644 (file)
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -133,9 +133,9 @@ __secondary_data:
   */
         .text
  __after_proc_init:
-#ifdef CONFIG_ARM_MPU
  M_CLASS(movw   r12, #:lower16:BASEADDR_V7M_SCB)
  M_CLASS(movt   r12, #:upper16:BASEADDR_V7M_SCB)
+#ifdef CONFIG_ARM_MPU
  M_CLASS(ldr    r3, [r12, 0x50])
  AR_CLASS(mrc   p15, 0, r3, c0, c1, 4)          @ Read ID_MMFR0
         and     r3, r3, #(MMFR0_PMSA)           @ PMSA field
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl

index 9016f4081bb9cff33886860e9a1d48f0ee58e47c..0393917eaa57aaf8cda4548b9a34a705be6c73a0 100644 (file)
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -437,3 +437,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h

index d1dd93436e1eedad0ea3cf83ba1cdc6b3fd50c22..f2a83ff6b73c2414110c02dc14aa24686d6ada9c 100644 (file)
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
  #define __ARM_NR_compat_set_tls                (__ARM_NR_COMPAT_BASE + 5)
  #define __ARM_NR_COMPAT_END            (__ARM_NR_COMPAT_BASE + 0x800)
  
-#define __NR_compat_syscalls           424
+#define __NR_compat_syscalls           428
  #endif
  
  #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h

index 5590f262369079bca3b66561a51e9b3f4705cdd7..23f1a44acada413fb4e2ad5411624d2925c71835 100644 (file)
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64)
  __SYSCALL(__NR_futex_time64, sys_futex)
  #define __NR_sched_rr_get_interval_time64 423
  __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
  
  /*
   * Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c

index 07b298120182042d2a1dea18160ef63e5a678b9d..65a51331088eb0afd0db70e52fb03335cc8151dc 100644 (file)
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -103,10 +103,15 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
                  * to be revisited if support for multiple ftrace entry points
                  * is added in the future, but for now, the pr_err() below
                  * deals with a theoretical issue only.
+                *
+                * Note that PLTs are place relative, and plt_entries_equal()
+                * checks whether they point to the same target. Here, we need
+                * to check if the actual opcodes are in fact identical,
+                * regardless of the offset in memory so use memcmp() instead.
                  */
                 trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
-               if (!plt_entries_equal(mod->arch.ftrace_trampoline,
-                                      &trampoline)) {
+               if (memcmp(mod->arch.ftrace_trampoline, &trampoline,
+                          sizeof(trampoline))) {
                         if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) {
                                 pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
                                 return -EINVAL;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c

index 6bc135042f5e4dc244dbf14e8ea953121931ad2b..7cae155e81a5fb71aa8148865e44d9482bfb5b9a 100644 (file)
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -363,7 +363,7 @@ void __init arm64_memblock_init(void)
                  * Otherwise, this is a no-op
                  */
                 u64 base = phys_initrd_start & PAGE_MASK;
-               u64 size = PAGE_ALIGN(phys_initrd_size);
+               u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
  
                 /*
                  * We can only add back the initrd memory if we don't end up
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl

index ab9cda5f6136ad60753de5e725f6a6271ad88e9c..56e3d0b685e19119afc0a3e244ca64c3752aca4e 100644 (file)
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -344,3 +344,7 @@
  332    common  pkey_free                       sys_pkey_free
  333    common  rseq                            sys_rseq
  # 334 through 423 are reserved to sync up with other architectures
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl

index 125c14178979c010648895bd7925f85e5e54b10d..df4ec3ec71d1518bfac752044f7a1eae9291535a 100644 (file)
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -423,3 +423,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl

index 8ee3a8c18498eb591ab9d1fc2b2044d43afa1cd4..4964947732af3e37bd5d651aaad9a3f3ccd39056 100644 (file)
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -429,3 +429,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c

index 4a70c5de8c929bad778788db2f6c46b3cc2633a2..25a57895a3a359f6f7ded09785bf6e4cc15ea1db 100644 (file)
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -210,12 +210,6 @@ const char *get_system_type(void)
         return ath79_sys_type;
  }
  
-int get_c0_perfcount_int(void)
-{
-       return ATH79_MISC_IRQ(5);
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
  unsigned int get_c0_compare_int(void)
  {
         return CP0_LEGACY_COMPARE_IRQ;
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S

index f158c5894a9a8760d3c1ec3430617bad976fac97..feb2653490dfe7a744b0eaa41d913297d9392ed5 100644 (file)
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -125,7 +125,7 @@ trace_a_syscall:
         subu    t1, v0,  __NR_O32_Linux
         move    a1, v0
         bnez    t1, 1f /* __NR_syscall at offset 0 */
-       lw      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
+       ld      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
         .set    pop
  
  1:     jal     syscall_trace_enter
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl

index 15f4117900ee8d8c9285b61dd332b3c1832558ea..9392dfe33f97ec48a74014d3bc49940dafdc944d 100644 (file)
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -362,3 +362,7 @@
  421    n32     rt_sigtimedwait_time64          compat_sys_rt_sigtimedwait_time64
  422    n32     futex_time64                    sys_futex
  423    n32     sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    n32     pidfd_send_signal               sys_pidfd_send_signal
+425    n32     io_uring_setup                  sys_io_uring_setup
+426    n32     io_uring_enter                  sys_io_uring_enter
+427    n32     io_uring_register               sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl

index c85502e67b44145420d6638489300aae4388cefe..cd0c8aa21fbacfb7563c39123f0880d2b753a7c2 100644 (file)
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -338,3 +338,7 @@
  327    n64     rseq                            sys_rseq
  328    n64     io_pgetevents                   sys_io_pgetevents
  # 329 through 423 are reserved to sync up with other architectures
+424    n64     pidfd_send_signal               sys_pidfd_send_signal
+425    n64     io_uring_setup                  sys_io_uring_setup
+426    n64     io_uring_enter                  sys_io_uring_enter
+427    n64     io_uring_register               sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl

index 2e063d0f837e78c3cb566d68374c33e7bcdd9a8e..e849e8ffe4a25b4516cdc748abfa96bb2c918ebe 100644 (file)
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -411,3 +411,7 @@
  421    o32     rt_sigtimedwait_time64          sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait_time64
  422    o32     futex_time64                    sys_futex                       sys_futex
  423    o32     sched_rr_get_interval_time64    sys_sched_rr_get_interval       sys_sched_rr_get_interval
+424    o32     pidfd_send_signal               sys_pidfd_send_signal
+425    o32     io_uring_setup                  sys_io_uring_setup
+426    o32     io_uring_enter                  sys_io_uring_enter
+427    o32     io_uring_register               sys_io_uring_register
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl

index b26766c6647dc7a40fd3235460902112c20cd3d4..fe8ca623add89a627710b697f7886fc879589ac2 100644 (file)
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -420,3 +420,7 @@
  421    32      rt_sigtimedwait_time64          sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait_time64
  422    32      futex_time64                    sys_futex                       sys_futex
  423    32      sched_rr_get_interval_time64    sys_sched_rr_get_interval       sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig

index 5ba131c30f6bcded4e65ccc40bb8aa2595e44ff1..1bcd468ab422dc100b120607b03d5d587850b453 100644 (file)
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -266,6 +266,7 @@ CONFIG_UDF_FS=m
  CONFIG_MSDOS_FS=m
  CONFIG_VFAT_FS=m
  CONFIG_PROC_KCORE=y
+CONFIG_HUGETLBFS=y
  # CONFIG_MISC_FILESYSTEMS is not set
  # CONFIG_NETWORK_FILESYSTEMS is not set
  CONFIG_NLS=y
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl

index b18abb0c3dae6248cfd697b1b9ee2343c39e1ba3..00f5a63c8d9a65aefd60df95b75d9cfae1fe8493 100644 (file)
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -505,3 +505,7 @@
  421    32      rt_sigtimedwait_time64          sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait_time64
  422    32      futex_time64                    sys_futex                       sys_futex
  423    32      sched_rr_get_interval_time64    sys_sched_rr_get_interval       sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c

index e7a9c4f6bfca49585beffcb6fc3dc755eb054e8f..8330f135294f48ecfff9bb5d3555f6fa3e3514c3 100644 (file)
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -95,28 +95,15 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
                               unsigned long entries, unsigned long dev_hpa,
                               struct mm_iommu_table_group_mem_t **pmem)
  {
-       struct mm_iommu_table_group_mem_t *mem;
-       long i, ret, locked_entries = 0;
+       struct mm_iommu_table_group_mem_t *mem, *mem2;
+       long i, ret, locked_entries = 0, pinned = 0;
         unsigned int pageshift;
-
-       mutex_lock(&mem_list_mutex);
-
-       list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
-                       next) {
-               /* Overlap? */
-               if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
-                               (ua < (mem->ua +
-                                      (mem->entries << PAGE_SHIFT)))) {
-                       ret = -EINVAL;
-                       goto unlock_exit;
-               }
-
-       }
+       unsigned long entry, chunk;
  
         if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
                 ret = mm_iommu_adjust_locked_vm(mm, entries, true);
                 if (ret)
-                       goto unlock_exit;
+                       return ret;
  
                 locked_entries = entries;
         }
@@ -148,17 +135,27 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
         }
  
         down_read(&mm->mmap_sem);
-       ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
+       chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) /
+                       sizeof(struct vm_area_struct *);
+       chunk = min(chunk, entries);
+       for (entry = 0; entry < entries; entry += chunk) {
+               unsigned long n = min(entries - entry, chunk);
+
+               ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n,
+                               FOLL_WRITE, mem->hpages + entry, NULL);
+               if (ret == n) {
+                       pinned += n;
+                       continue;
+               }
+               if (ret > 0)
+                       pinned += ret;
+               break;
+       }
         up_read(&mm->mmap_sem);
-       if (ret != entries) {
-               /* free the reference taken */
-               for (i = 0; i < ret; i++)
-                       put_page(mem->hpages[i]);
-
-               vfree(mem->hpas);
-               kfree(mem);
-               ret = -EFAULT;
-               goto unlock_exit;
+       if (pinned != entries) {
+               if (!ret)
+                       ret = -EFAULT;
+               goto free_exit;
         }
  
         pageshift = PAGE_SHIFT;
@@ -183,21 +180,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
         }
  
  good_exit:
-       ret = 0;
         atomic64_set(&mem->mapped, 1);
         mem->used = 1;
         mem->ua = ua;
         mem->entries = entries;
-       *pmem = mem;
  
-       list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+       mutex_lock(&mem_list_mutex);
  
-unlock_exit:
-       if (locked_entries && ret)
-               mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+       list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
+               /* Overlap? */
+               if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
+                               (ua < (mem2->ua +
+                                      (mem2->entries << PAGE_SHIFT)))) {
+                       ret = -EINVAL;
+                       mutex_unlock(&mem_list_mutex);
+                       goto free_exit;
+               }
+       }
+
+       list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
  
         mutex_unlock(&mem_list_mutex);
  
+       *pmem = mem;
+
+       return 0;
+
+free_exit:
+       /* free the reference taken */
+       for (i = 0; i < pinned; i++)
+               put_page(mem->hpages[i]);
+
+       vfree(mem->hpas);
+       kfree(mem);
+
+unlock_exit:
+       mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+
         return ret;
  }
  
@@ -266,7 +285,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
  long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
  {
         long ret = 0;
-       unsigned long entries, dev_hpa;
+       unsigned long unlock_entries = 0;
  
         mutex_lock(&mem_list_mutex);
  
@@ -287,17 +306,17 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
                 goto unlock_exit;
         }
  
+       if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+               unlock_entries = mem->entries;
+
         /* @mapped became 0 so now mappings are disabled, release the region */
-       entries = mem->entries;
-       dev_hpa = mem->dev_hpa;
         mm_iommu_release(mem);
  
-       if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
-               mm_iommu_adjust_locked_vm(mm, entries, false);
-
  unlock_exit:
         mutex_unlock(&mem_list_mutex);
  
+       mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
+
         return ret;
  }
  EXPORT_SYMBOL_GPL(mm_iommu_put);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype

index 842b2c7e156aba4cb2a04d8897fb7aa6128c3b4d..50cd09b4e05d51a9d9c46722065f9ebf5e55295c 100644 (file)
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -324,7 +324,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
  
  config PPC_RADIX_MMU
         bool "Radix MMU Support"
-       depends on PPC_BOOK3S_64
+       depends on PPC_BOOK3S_64 && HUGETLB_PAGE
         select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
         default y
         help
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl

index 02579f95f391b6524ddd28004cc6d6a511be974b..061418f787c3712f4091cfeb94b8dfb5d2b1eb03 100644 (file)
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
  421    32      rt_sigtimedwait_time64  -                               compat_sys_rt_sigtimedwait_time64
  422    32      futex_time64            -                               sys_futex
  423    32      sched_rr_get_interval_time64    -                       sys_sched_rr_get_interval
+424  common    pidfd_send_signal       sys_pidfd_send_signal           sys_pidfd_send_signal
+425  common    io_uring_setup          sys_io_uring_setup              sys_io_uring_setup
+426  common    io_uring_enter          sys_io_uring_enter              sys_io_uring_enter
+427  common    io_uring_register       sys_io_uring_register           sys_io_uring_register
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl

index bfda678576e4335788f844db6ec7632fda5faedf..480b057556ee45a3871485ce7301d2436cca8255 100644 (file)
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl

index b9a5a04b2d2c543791088b69aae612ed56a97e5e..a1dd24307b001aa95801d3e24003ffd719711728 100644 (file)
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -469,3 +469,7 @@
  421    32      rt_sigtimedwait_time64          sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait_time64
  422    32      futex_time64                    sys_futex                       sys_futex
  423    32      sched_rr_get_interval_time64    sys_sched_rr_get_interval       sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c

index c0d6c560df69e0e63941a34539660770304ff612..5a237e8dbf8d563504a6cfcb4a67a2b7350bed0a 100644 (file)
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
         boot_params->hdr.loadflags &= ~KASLR_FLAG;
  
         /* Save RSDP address for later use. */
-       boot_params->acpi_rsdp_addr = get_rsdp_addr();
+       /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */
  
         sanitize_boot_params(boot_params);
  
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c

index 94a4b7fc75d0ecf344bade95be1cf563576250d2..d41de9af7a39b52bcd2913b2f1ef5edf4f3552b3 100644 (file)
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -76,15 +76,15 @@
   *                            Scope: Package (physical package)
   *     MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
   *                            perf code: 0x04
- *                            Available model: HSW ULT,CNL
+ *                            Available model: HSW ULT,KBL,CNL
   *                            Scope: Package (physical package)
   *     MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
   *                            perf code: 0x05
- *                            Available model: HSW ULT,CNL
+ *                            Available model: HSW ULT,KBL,CNL
   *                            Scope: Package (physical package)
   *     MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
   *                            perf code: 0x06
- *                            Available model: HSW ULT,GLM,CNL
+ *                            Available model: HSW ULT,KBL,GLM,CNL
   *                            Scope: Package (physical package)
   *
   */
@@ -566,8 +566,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
         X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
         X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
  
-       X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  snb_cstates),
-       X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  hswult_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates),
  
         X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates),
  
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c

index f905a2371080beee339dac3f5dad13d0ab1e7ef8..8dacdb96899ec5a76749751d2675b5b827855141 100644 (file)
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -5,6 +5,7 @@
  #include <linux/memblock.h>
  #include <linux/swapfile.h>
  #include <linux/swapops.h>
+#include <linux/kmemleak.h>
  
  #include <asm/set_memory.h>
  #include <asm/e820/api.h>
@@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
         if (debug_pagealloc_enabled()) {
                 pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
                         begin, end - 1);
+               /*
+                * Inform kmemleak about the hole in the memory since the
+                * corresponding pages will be unmapped.
+                */
+               kmemleak_free_part((void *)begin, end - begin);
                 set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
         } else {
                 /*
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl

index 6af49929de857b24f9ddb134ef79efd0f7c08e72..30084eaf84227ac89eb6e5baa0aed77d6fad5f50 100644 (file)
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -394,3 +394,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/crypto/lrw.c b/crypto/lrw.c

index 0430ccd08728655ecf4e44c65ef16e956bd928c1..08a0e458bc3e62dcb17e0dbec9151290b895aa33 100644 (file)
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -212,8 +212,12 @@ static void crypt_done(struct crypto_async_request *areq, int err)
  {
         struct skcipher_request *req = areq->data;
  
-       if (!err)
+       if (!err) {
+               struct rctx *rctx = skcipher_request_ctx(req);
+
+               rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
                 err = xor_tweak_post(req);
+       }
  
         skcipher_request_complete(req, err);
  }
diff --git a/crypto/xts.c b/crypto/xts.c

index 847f54f7678972dcc50c06b867ac2842a7535b4a..2f948328cabbd97f8504941b4fd50cce1122b086 100644 (file)
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -137,8 +137,12 @@ static void crypt_done(struct crypto_async_request *areq, int err)
  {
         struct skcipher_request *req = areq->data;
  
-       if (!err)
+       if (!err) {
+               struct rctx *rctx = skcipher_request_ctx(req);
+
+               rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
                 err = xor_tweak_post(req);
+       }
  
         skcipher_request_complete(req, err);
  }
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c

index e48894e002ba8c4e2895ef72afae72f025dfee8e..4000902e57f075c62446886bfabfe433592090b8 100644 (file)
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1028,6 +1028,14 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
         dev_dbg(dev, "dma_pfn_offset(%#08llx)\n", offset);
  }
  
+static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node)
+{
+       struct acpi_iort_root_complex *pci_rc;
+
+       pci_rc = (struct acpi_iort_root_complex *)node->node_data;
+       return pci_rc->ats_attribute & ACPI_IORT_ATS_SUPPORTED;
+}
+
  /**
   * iort_iommu_configure - Set-up IOMMU configuration for a device.
   *
@@ -1063,6 +1071,9 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
                 info.node = node;
                 err = pci_for_each_dma_alias(to_pci_dev(dev),
                                              iort_pci_iommu_init, &info);
+
+               if (!err && iort_pci_rc_supports_ats(node))
+                       dev->iommu_fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS;
         } else {
                 int i = 0;
  
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c

index 11e1663bdc4dee0e2cfd7cd9ba61783d00277bbf..b2c06da4f62e336ce262f1445e3329803da4d5cb 100644 (file)
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1646,7 +1646,7 @@ static irqreturn_t fs_irq (int irq, void *dev_id)
         }
  
         if (status & ISR_TBRQ_W) {
-               fs_dprintk (FS_DEBUG_IRQ, "Data tramsitted!\n");
+               fs_dprintk (FS_DEBUG_IRQ, "Data transmitted!\n");
                 process_txdone_queue (dev, &dev->tx_relq);
         }
  
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c

index 399cad7daae77b37508033ec1cac61bebefbc550..d58a359a66225f39682c067739eb9843bae36b80 100644 (file)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -774,18 +774,18 @@ struct zram_work {
         struct zram *zram;
         unsigned long entry;
         struct bio *bio;
+       struct bio_vec bvec;
  };
  
  #if PAGE_SIZE != 4096
  static void zram_sync_read(struct work_struct *work)
  {
-       struct bio_vec bvec;
         struct zram_work *zw = container_of(work, struct zram_work, work);
         struct zram *zram = zw->zram;
         unsigned long entry = zw->entry;
         struct bio *bio = zw->bio;
  
-       read_from_bdev_async(zram, &bvec, entry, bio);
+       read_from_bdev_async(zram, &zw->bvec, entry, bio);
  }
  
  /*
@@ -798,6 +798,7 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
  {
         struct zram_work work;
  
+       work.bvec = *bvec;
         work.zram = zram;
         work.entry = entry;
         work.bio = bio;
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c

index ec8a291d62bab3c58f584a61699d3cdc0118a689..54093ffd0aefa8a7c3a40f887958c61b2222d03c 100644 (file)
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -671,7 +671,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg(
         d = bcm2835_dma_create_cb_chain(chan, direction, false,
                                         info, extra,
                                         frames, src, dst, 0, 0,
-                                       GFP_KERNEL);
+                                       GFP_NOWAIT);
         if (!d)
                 return NULL;
  
diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c

index 131f3974740d5d75a67141f15b019b06d7011c8a..814853842e29f9e103beab75468de580ac34bca7 100644 (file)
--- a/drivers/dma/mediatek/mtk-cqdma.c
+++ b/drivers/dma/mediatek/mtk-cqdma.c
@@ -253,7 +253,7 @@ static void mtk_cqdma_start(struct mtk_cqdma_pchan *pc,
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
         mtk_dma_set(pc, MTK_CQDMA_DST2, cvd->dest >> MTK_CQDMA_ADDR2_SHFIT);
  #else
-       mtk_dma_set(pc, MTK_CQDMA_SRC2, 0);
+       mtk_dma_set(pc, MTK_CQDMA_DST2, 0);
  #endif
  
         /* setup the length */
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c

index 2b4f2569816956621e1d6201851ae16d21db27f3..e2a5398f89b51129345fbb076710083ef24f9188 100644 (file)
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1282,6 +1282,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
         enum dma_status status;
         unsigned int residue = 0;
         unsigned int dptr = 0;
+       unsigned int chcrb;
+       unsigned int tcrb;
+       unsigned int i;
  
         if (!desc)
                 return 0;
@@ -1329,6 +1332,24 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
                 return 0;
         }
  
+       /*
+        * We need to read two registers.
+        * Make sure the control register does not skip to next chunk
+        * while reading the counter.
+        * Trying it 3 times should be enough: Initial read, retry, retry
+        * for the paranoid.
+        */
+       for (i = 0; i < 3; i++) {
+               chcrb = rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+                                           RCAR_DMACHCRB_DPTR_MASK;
+               tcrb = rcar_dmac_chan_read(chan, RCAR_DMATCRB);
+               /* Still the same? */
+               if (chcrb == (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+                             RCAR_DMACHCRB_DPTR_MASK))
+                       break;
+       }
+       WARN_ONCE(i >= 3, "residue might be not continuous!");
+
         /*
          * In descriptor mode the descriptor running pointer is not maintained
          * by the interrupt handler, find the running descriptor from the
@@ -1336,8 +1357,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
          * mode just use the running descriptor pointer.
          */
         if (desc->hwdescs.use) {
-               dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
-                       RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
+               dptr = chcrb >> RCAR_DMACHCRB_DPTR_SHIFT;
                 if (dptr == 0)
                         dptr = desc->nchunks;
                 dptr--;
@@ -1355,7 +1375,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
         }
  
         /* Add the residue for the current chunk. */
-       residue += rcar_dmac_chan_read(chan, RCAR_DMATCRB) << desc->xfer_shift;
+       residue += tcrb << desc->xfer_shift;
  
         return residue;
  }
@@ -1368,6 +1388,7 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
         enum dma_status status;
         unsigned long flags;
         unsigned int residue;
+       bool cyclic;
  
         status = dma_cookie_status(chan, cookie, txstate);
         if (status == DMA_COMPLETE || !txstate)
@@ -1375,10 +1396,11 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
  
         spin_lock_irqsave(&rchan->lock, flags);
         residue = rcar_dmac_chan_get_residue(rchan, cookie);
+       cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false;
         spin_unlock_irqrestore(&rchan->lock, flags);
  
         /* if there's no residue, the cookie is complete */
-       if (!residue)
+       if (!residue && !cyclic)
                 return DMA_COMPLETE;
  
         dma_set_residue(txstate, residue);
diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c

index f0223cee97744825ee508e5c7ffe6057829359bf..77092268ee955fe280926f8426bd4613a949595f 100644 (file)
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -414,6 +414,7 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type)
                         irq_set_handler_locked(data, handle_edge_irq);
                         break;
                 case IRQ_TYPE_EDGE_BOTH:
+                       sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0);
                         sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1);
                         irq_set_handler_locked(data, handle_edge_irq);
                         break;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c

index 0495bf1d480a4cfe464e8ff330922264d03deff7..bca3e7740ef66c8fac2b8f89866935e3655fc9c3 100644 (file)
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1379,7 +1379,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
  
         status = gpiochip_add_irqchip(chip, lock_key, request_key);
         if (status)
-               goto err_remove_chip;
+               goto err_free_gpiochip_mask;
  
         status = of_gpiochip_add(chip);
         if (status)
@@ -1387,7 +1387,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
  
         status = gpiochip_init_valid_mask(chip);
         if (status)
-               goto err_remove_chip;
+               goto err_remove_of_chip;
  
         for (i = 0; i < chip->ngpio; i++) {
                 struct gpio_desc *desc = &gdev->descs[i];
@@ -1415,14 +1415,18 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
         if (gpiolib_initialized) {
                 status = gpiochip_setup_dev(gdev);
                 if (status)
-                       goto err_remove_chip;
+                       goto err_remove_acpi_chip;
         }
         return 0;
  
-err_remove_chip:
+err_remove_acpi_chip:
         acpi_gpiochip_remove(chip);
+err_remove_of_chip:
         gpiochip_free_hogs(chip);
         of_gpiochip_remove(chip);
+err_remove_chip:
+       gpiochip_irqchip_remove(chip);
+err_free_gpiochip_mask:
         gpiochip_free_valid_mask(chip);
  err_remove_irqchip_mask:
         gpiochip_irqchip_free_valid_mask(chip);
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c

index db761329a1e3ef19d2fa05f86fdaf5b3b06c6b53..ab7968c8f6a29937177c0a464f6da4db9e297631 100644 (file)
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -1046,6 +1046,10 @@ static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi)
         if (hdmi->version < 0x200a)
                 return false;
  
+       /* Disable if no DDC bus */
+       if (!hdmi->ddc)
+               return false;
+
         /* Disable if SCDC is not supported, or if an HF-VSDB block is absent */
         if (!display->hdmi.scdc.supported ||
             !display->hdmi.scdc.scrambling.supported)
@@ -1684,13 +1688,13 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
                          * Source Devices compliant shall set the
                          * Source Version = 1.
                          */
-                       drm_scdc_readb(&hdmi->i2c->adap, SCDC_SINK_VERSION,
+                       drm_scdc_readb(hdmi->ddc, SCDC_SINK_VERSION,
                                        &bytes);
-                       drm_scdc_writeb(&hdmi->i2c->adap, SCDC_SOURCE_VERSION,
+                       drm_scdc_writeb(hdmi->ddc, SCDC_SOURCE_VERSION,
                                 min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
  
                         /* Enabled Scrambling in the Sink */
-                       drm_scdc_set_scrambling(&hdmi->i2c->adap, 1);
+                       drm_scdc_set_scrambling(hdmi->ddc, 1);
  
                         /*
                          * To activate the scrambler feature, you must ensure
@@ -1706,7 +1710,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
                         hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL);
                         hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ,
                                     HDMI_MC_SWRSTZ);
-                       drm_scdc_set_scrambling(&hdmi->i2c->adap, 0);
+                       drm_scdc_set_scrambling(hdmi->ddc, 0);
                 }
         }
  
@@ -1800,6 +1804,8 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi)
          * iteration for others.
          * The Amlogic Meson GX SoCs (v2.01a) have been identified as needing
          * the workaround with a single iteration.
+        * The Rockchip RK3288 SoC (v2.00a) and RK3328/RK3399 SoCs (v2.11a) have
+        * been identified as needing the workaround with a single iteration.
          */
  
         switch (hdmi->version) {
@@ -1808,7 +1814,9 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi)
                 break;
         case 0x131a:
         case 0x132a:
+       case 0x200a:
         case 0x201a:
+       case 0x211a:
         case 0x212a:
                 count = 1;
                 break;
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c

index ab4e60dfd6a3460001cbcae4691f1ede8ebb230e..98cea1f4b3bf05500dcd7fe24b2f367fa6c9e3eb 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -3862,14 +3862,16 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
                 ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state);
         else
                 ret = intel_dp_compute_config(encoder, pipe_config, conn_state);
+       if (ret)
+               return ret;
  
-       if (IS_GEN9_LP(dev_priv) && ret)
+       if (IS_GEN9_LP(dev_priv))
                 pipe_config->lane_lat_optim_mask =
                         bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count);
  
         intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
  
-       return ret;
+       return 0;
  
  }
  
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c

index 8891f29a8c7fffacad25f29e718376aa164261f7..48da4a969a0a9afabf6be3db6aff252fd02d3c83 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1886,6 +1886,9 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
         int pipe_bpp;
         int ret;
  
+       pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) &&
+               intel_dp_supports_fec(intel_dp, pipe_config);
+
         if (!intel_dp_supports_dsc(intel_dp, pipe_config))
                 return -EINVAL;
  
@@ -2116,9 +2119,6 @@ intel_dp_compute_config(struct intel_encoder *encoder,
         if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
                 return -EINVAL;
  
-       pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) &&
-                                 intel_dp_supports_fec(intel_dp, pipe_config);
-
         ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state);
         if (ret < 0)
                 return ret;
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c

index e8f694b57b8ac857528824051ddcc42016e86239..376ffe842e2678d1f31ee68acd38908cff8a85d9 100644 (file)
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -338,8 +338,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
                                     bool *enabled, int width, int height)
  {
         struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
+       unsigned long conn_configured, conn_seq, mask;
         unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
-       unsigned long conn_configured, conn_seq;
         int i, j;
         bool *save_enabled;
         bool fallback = true, ret = true;
@@ -357,9 +357,10 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
                 drm_modeset_backoff(&ctx);
  
         memcpy(save_enabled, enabled, count);
-       conn_seq = GENMASK(count - 1, 0);
+       mask = GENMASK(count - 1, 0);
         conn_configured = 0;
  retry:
+       conn_seq = conn_configured;
         for (i = 0; i < count; i++) {
                 struct drm_fb_helper_connector *fb_conn;
                 struct drm_connector *connector;
@@ -372,8 +373,7 @@ retry:
                 if (conn_configured & BIT(i))
                         continue;
  
-               /* First pass, only consider tiled connectors */
-               if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile)
+               if (conn_seq == 0 && !connector->has_tile)
                         continue;
  
                 if (connector->status == connector_status_connected)
@@ -477,10 +477,8 @@ retry:
                 conn_configured |= BIT(i);
         }
  
-       if (conn_configured != conn_seq) { /* repeat until no more are found */
-               conn_seq = conn_configured;
+       if ((conn_configured & mask) != mask && conn_configured != conn_seq)
                 goto retry;
-       }
  
         /*
          * If the BIOS didn't enable everything it could, fall back to have the
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c

index ec3602ebbc1cd1e87da13c9c909078927ccf2287..54011df8c2e807d7984dc7985764525899ff49d7 100644 (file)
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -71,7 +71,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc,
         if (disable_partial)
                 ipu_plane_disable(ipu_crtc->plane[1], true);
         if (disable_full)
-               ipu_plane_disable(ipu_crtc->plane[0], false);
+               ipu_plane_disable(ipu_crtc->plane[0], true);
  }
  
  static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c

index 19fc601c9eeb52fc9704bbfc7c164dbfa71b7717..a1bec2779e76220c8568f5c78a7345ef2f7c3d36 100644 (file)
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -366,10 +366,9 @@ void drm_sched_increase_karma(struct drm_sched_job *bad)
  EXPORT_SYMBOL(drm_sched_increase_karma);
  
  /**
- * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job
+ * drm_sched_stop - stop the scheduler
   *
   * @sched: scheduler instance
- * @bad: bad scheduler job
   *
   */
  void drm_sched_stop(struct drm_gpu_scheduler *sched)
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c

index 3ebd9f5e2719d7f028c2c87b1e2cedd6c60a5365..29258b404e549fbd31d67ec164c3bc852a10eb14 100644 (file)
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -16,6 +16,7 @@
  #include <linux/of_reserved_mem.h>
  
  #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
  #include <drm/drm_fb_cma_helper.h>
  #include <drm/drm_fb_helper.h>
  #include <drm/drm_gem_cma_helper.h>
@@ -85,6 +86,8 @@ static int sun4i_drv_bind(struct device *dev)
                 ret = -ENOMEM;
                 goto free_drm;
         }
+
+       dev_set_drvdata(dev, drm);
         drm->dev_private = drv;
         INIT_LIST_HEAD(&drv->frontend_list);
         INIT_LIST_HEAD(&drv->engine_list);
@@ -144,8 +147,12 @@ static void sun4i_drv_unbind(struct device *dev)
  
         drm_dev_unregister(drm);
         drm_kms_helper_poll_fini(drm);
+       drm_atomic_helper_shutdown(drm);
         drm_mode_config_cleanup(drm);
+
+       component_unbind_all(dev, NULL);
         of_reserved_mem_device_release(dev);
+
         drm_dev_put(drm);
  }
  
@@ -395,6 +402,8 @@ static int sun4i_drv_probe(struct platform_device *pdev)
  
  static int sun4i_drv_remove(struct platform_device *pdev)
  {
+       component_master_del(&pdev->dev, &sun4i_drv_master_ops);
+
         return 0;
  }
  
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c

index 0fa5034b9f9e05bc0f8511d6f3c6545030699fe6..1a01669b159ab78c0c9b849616bcc7d852738b77 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -49,9 +49,8 @@ static void ttm_bo_global_kobj_release(struct kobject *kobj);
   * ttm_global_mutex - protecting the global BO state
   */
  DEFINE_MUTEX(ttm_global_mutex);
-struct ttm_bo_global ttm_bo_glob = {
-       .use_count = 0
-};
+unsigned ttm_bo_glob_use_count;
+struct ttm_bo_global ttm_bo_glob;
  
  static struct attribute ttm_bo_count = {
         .name = "bo_count",
@@ -1531,12 +1530,13 @@ static void ttm_bo_global_release(void)
         struct ttm_bo_global *glob = &ttm_bo_glob;
  
         mutex_lock(&ttm_global_mutex);
-       if (--glob->use_count > 0)
+       if (--ttm_bo_glob_use_count > 0)
                 goto out;
  
         kobject_del(&glob->kobj);
         kobject_put(&glob->kobj);
         ttm_mem_global_release(&ttm_mem_glob);
+       memset(glob, 0, sizeof(*glob));
  out:
         mutex_unlock(&ttm_global_mutex);
  }
@@ -1548,7 +1548,7 @@ static int ttm_bo_global_init(void)
         unsigned i;
  
         mutex_lock(&ttm_global_mutex);
-       if (++glob->use_count > 1)
+       if (++ttm_bo_glob_use_count > 1)
                 goto out;
  
         ret = ttm_mem_global_init(&ttm_mem_glob);
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c

index f1567c353b543a3376c6b64ab5fb6c4550f91b23..9a0909decb3668ee1e56a729c7664f3f01a33a72 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -461,8 +461,8 @@ out_no_zone:
  
  void ttm_mem_global_release(struct ttm_mem_global *glob)
  {
-       unsigned int i;
         struct ttm_mem_zone *zone;
+       unsigned int i;
  
         /* let the page allocator first stop the shrink work. */
         ttm_page_alloc_fini();
@@ -475,9 +475,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
                 zone = glob->zones[i];
                 kobject_del(&zone->kobj);
                 kobject_put(&zone->kobj);
-                       }
+       }
         kobject_del(&glob->kobj);
         kobject_put(&glob->kobj);
+       memset(glob, 0, sizeof(*glob));
  }
  
  static void ttm_check_swapping(struct ttm_mem_global *glob)
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c

index 730008d3da761e2eb37d9821c518672567a6e1c6..1baa10e9448472510006b7390e3e574841c0163a 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -1042,7 +1042,7 @@ static void
  vc4_crtc_reset(struct drm_crtc *crtc)
  {
         if (crtc->state)
-               __drm_atomic_helper_crtc_destroy_state(crtc->state);
+               vc4_crtc_destroy_state(crtc, crtc->state);
  
         crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
         if (crtc->state)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c

index b996ac1d4fcc9cb3ca1ff86cad8be0c364c80c30..af92964b6889dd0dbfaadac5558cb27ee78b3d56 100644 (file)
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -205,10 +205,14 @@ static struct drm_driver driver = {
  #if defined(CONFIG_DEBUG_FS)
         .debugfs_init = virtio_gpu_debugfs_init,
  #endif
+       .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+       .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
         .gem_prime_export = drm_gem_prime_export,
         .gem_prime_import = drm_gem_prime_import,
         .gem_prime_pin = virtgpu_gem_prime_pin,
         .gem_prime_unpin = virtgpu_gem_prime_unpin,
+       .gem_prime_get_sg_table = virtgpu_gem_prime_get_sg_table,
+       .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
         .gem_prime_vmap = virtgpu_gem_prime_vmap,
         .gem_prime_vunmap = virtgpu_gem_prime_vunmap,
         .gem_prime_mmap = virtgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h

index 3238fdf58eb480ed9447d0639aaded9a88d28dcc..d577cb76f5ad6b66d26124284159c82706f44699 100644 (file)
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -354,6 +354,10 @@ int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait);
  /* virtgpu_prime.c */
  int virtgpu_gem_prime_pin(struct drm_gem_object *obj);
  void virtgpu_gem_prime_unpin(struct drm_gem_object *obj);
+struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
+       struct drm_device *dev, struct dma_buf_attachment *attach,
+       struct sg_table *sgt);
  void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj);
  void virtgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
  int virtgpu_gem_prime_mmap(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c

index c59ec34c80a5df2c6b3a91f7ec73cd05f85445ad..eb51a78e11991c01cce73d34cf74907cf9202764 100644 (file)
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@ -39,6 +39,18 @@ void virtgpu_gem_prime_unpin(struct drm_gem_object *obj)
         WARN_ONCE(1, "not implemented");
  }
  
+struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+       return ERR_PTR(-ENODEV);
+}
+
+struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
+       struct drm_device *dev, struct dma_buf_attachment *attach,
+       struct sg_table *table)
+{
+       return ERR_PTR(-ENODEV);
+}
+
  void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj)
  {
         struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c

index 6165fe2c4504de07d0626c92892293db75d7354c..1bfa353d995cf5bb7ca4c4d1ce8a46ba686e55d6 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -545,30 +545,14 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv)
         dev_priv->initial_height = height;
  }
  
-/**
- * vmw_assume_iommu - Figure out whether coherent dma-remapping might be
- * taking place.
- * @dev: Pointer to the struct drm_device.
- *
- * Return: true if iommu present, false otherwise.
- */
-static bool vmw_assume_iommu(struct drm_device *dev)
-{
-       const struct dma_map_ops *ops = get_dma_ops(dev->dev);
-
-       return !dma_is_direct(ops) && ops &&
-               ops->map_page != dma_direct_map_page;
-}
-
  /**
   * vmw_dma_select_mode - Determine how DMA mappings should be set up for this
   * system.
   *
   * @dev_priv: Pointer to a struct vmw_private
   *
- * This functions tries to determine the IOMMU setup and what actions
- * need to be taken by the driver to make system pages visible to the
- * device.
+ * This functions tries to determine what actions need to be taken by the
+ * driver to make system pages visible to the device.
   * If this function decides that DMA is not possible, it returns -EINVAL.
   * The driver may then try to disable features of the device that require
   * DMA.
@@ -578,23 +562,16 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
         static const char *names[vmw_dma_map_max] = {
                 [vmw_dma_phys] = "Using physical TTM page addresses.",
                 [vmw_dma_alloc_coherent] = "Using coherent TTM pages.",
-               [vmw_dma_map_populate] = "Keeping DMA mappings.",
+               [vmw_dma_map_populate] = "Caching DMA mappings.",
                 [vmw_dma_map_bind] = "Giving up DMA mappings early."};
  
         if (vmw_force_coherent)
                 dev_priv->map_mode = vmw_dma_alloc_coherent;
-       else if (vmw_assume_iommu(dev_priv->dev))
-               dev_priv->map_mode = vmw_dma_map_populate;
-       else if (!vmw_force_iommu)
-               dev_priv->map_mode = vmw_dma_phys;
-       else if (IS_ENABLED(CONFIG_SWIOTLB) && swiotlb_nr_tbl())
-               dev_priv->map_mode = vmw_dma_alloc_coherent;
+       else if (vmw_restrict_iommu)
+               dev_priv->map_mode = vmw_dma_map_bind;
         else
                 dev_priv->map_mode = vmw_dma_map_populate;
  
-       if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu)
-               dev_priv->map_mode = vmw_dma_map_bind;
-
         /* No TTM coherent page pool? FIXME: Ask TTM instead! */
          if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) &&
             (dev_priv->map_mode == vmw_dma_alloc_coherent))
diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c

index 9b2b3fa479c462d1c4d7b8b02180ad22eb20a715..5e44ff1f20851a16afdb42bfdaf73caab97ebff5 100644 (file)
--- a/drivers/gpu/ipu-v3/ipu-dp.c
+++ b/drivers/gpu/ipu-v3/ipu-dp.c
@@ -195,7 +195,8 @@ int ipu_dp_setup_channel(struct ipu_dp *dp,
                 ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs,
                                 DP_COM_CONF_CSC_DEF_BOTH);
         } else {
-               if (flow->foreground.in_cs == flow->out_cs)
+               if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN ||
+                   flow->foreground.in_cs == flow->out_cs)
                         /*
                          * foreground identical to output, apply color
                          * conversion on background
@@ -261,6 +262,8 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync)
         struct ipu_dp_priv *priv = flow->priv;
         u32 reg, csc;
  
+       dp->in_cs = IPUV3_COLORSPACE_UNKNOWN;
+
         if (!dp->foreground)
                 return;
  
@@ -268,8 +271,9 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync)
  
         reg = readl(flow->base + DP_COM_CONF);
         csc = reg & DP_COM_CONF_CSC_DEF_MASK;
-       if (csc == DP_COM_CONF_CSC_DEF_FG)
-               reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+       reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+       if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG)
+               reg |= DP_COM_CONF_CSC_DEF_BG;
  
         reg &= ~DP_COM_CONF_FG_EN;
         writel(reg, flow->base + DP_COM_CONF);
@@ -347,6 +351,8 @@ int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base)
         mutex_init(&priv->mutex);
  
         for (i = 0; i < IPUV3_NUM_FLOWS; i++) {
+               priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN;
+               priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN;
                 priv->flow[i].foreground.foreground = true;
                 priv->flow[i].base = priv->base + ipu_dp_flow_base[i];
                 priv->flow[i].priv = priv;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h

index ea0bc6885517b30da5c7a20d6e3805c582f3824b..32cc8fe7902f13dd5f6289ca36f5a8aca84a172b 100644 (file)
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -160,6 +160,7 @@ struct ib_uverbs_file {
  
         struct mutex umap_lock;
         struct list_head umaps;
+       struct page *disassociate_page;
  
         struct idr              idr;
         /* spinlock protects write access to idr */
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c

index f2e7ffe6fc546612f62da9cde853b9c1bf37d8bb..7843e89235c34b4e6831dedaebb7d2170986099d 100644 (file)
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref)
                 kref_put(&file->async_file->ref,
                          ib_uverbs_release_async_event_file);
         put_device(&file->device->dev);
+
+       if (file->disassociate_page)
+               __free_pages(file->disassociate_page, 0);
         kfree(file);
  }
  
@@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma)
         kfree(priv);
  }
  
+/*
+ * Once the zap_vma_ptes has been called touches to the VMA will come here and
+ * we return a dummy writable zero page for all the pfns.
+ */
+static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
+{
+       struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
+       struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
+       vm_fault_t ret = 0;
+
+       if (!priv)
+               return VM_FAULT_SIGBUS;
+
+       /* Read only pages can just use the system zero page. */
+       if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
+               vmf->page = ZERO_PAGE(vmf->vm_start);
+               get_page(vmf->page);
+               return 0;
+       }
+
+       mutex_lock(&ufile->umap_lock);
+       if (!ufile->disassociate_page)
+               ufile->disassociate_page =
+                       alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
+
+       if (ufile->disassociate_page) {
+               /*
+                * This VMA is forced to always be shared so this doesn't have
+                * to worry about COW.
+                */
+               vmf->page = ufile->disassociate_page;
+               get_page(vmf->page);
+       } else {
+               ret = VM_FAULT_SIGBUS;
+       }
+       mutex_unlock(&ufile->umap_lock);
+
+       return ret;
+}
+
  static const struct vm_operations_struct rdma_umap_ops = {
         .open = rdma_umap_open,
         .close = rdma_umap_close,
+       .fault = rdma_umap_fault,
  };
  
  static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
@@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
         struct ib_uverbs_file *ufile = ucontext->ufile;
         struct rdma_umap_priv *priv;
  
+       if (!(vma->vm_flags & VM_SHARED))
+               return ERR_PTR(-EINVAL);
+
         if (vma->vm_end - vma->vm_start != size)
                 return ERR_PTR(-EINVAL);
  
@@ -992,7 +1039,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
                  * at a time to get the lock ordering right. Typically there
                  * will only be one mm, so no big deal.
                  */
-               down_write(&mm->mmap_sem);
+               down_read(&mm->mmap_sem);
                 if (!mmget_still_valid(mm))
                         goto skip_mm;
                 mutex_lock(&ufile->umap_lock);
@@ -1006,11 +1053,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
  
                         zap_vma_ptes(vma, vma->vm_start,
                                      vma->vm_end - vma->vm_start);
-                       vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
                 }
                 mutex_unlock(&ufile->umap_lock);
         skip_mm:
-               up_write(&mm->mmap_sem);
+               up_read(&mm->mmap_sem);
                 mmput(mm);
         }
  }
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c

index 66cdf625534ff8901a6efdf90295eaee3cc0145f..60cf9f03e9414e98e97f325cc7f0b937af36bcc0 100644 (file)
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -533,7 +533,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
  
  static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
  {
-       if (attr->qp_type == IB_QPT_XRC_TGT)
+       if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr)
                 return 0;
  
         return 1;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c

index 531ff20b32ade6ccb4d0b3533bc1f8ceceed1b26..d3dd290ae1b176d609c14937c4ffedc2fe992a20 100644 (file)
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1119,6 +1119,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                 if (MLX5_CAP_GEN(mdev, qp_packet_based))
                         resp.flags |=
                                 MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
+
+               resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
         }
  
         if (field_avail(typeof(resp), sw_parsing_caps,
@@ -2066,6 +2068,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
  
         if (vma->vm_flags & VM_WRITE)
                 return -EPERM;
+       vma->vm_flags &= ~VM_MAYWRITE;
  
         if (!dev->mdev->clock_info_page)
                 return -EOPNOTSUPP;
@@ -2231,19 +2234,18 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
  
                 if (vma->vm_flags & VM_WRITE)
                         return -EPERM;
+               vma->vm_flags &= ~VM_MAYWRITE;
  
                 /* Don't expose to user-space information it shouldn't have */
                 if (PAGE_SIZE > 4096)
                         return -EOPNOTSUPP;
  
-               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
                 pfn = (dev->mdev->iseg_base +
                        offsetof(struct mlx5_init_seg, internal_timer_h)) >>
                         PAGE_SHIFT;
-               if (io_remap_pfn_range(vma, vma->vm_start, pfn,
-                                      PAGE_SIZE, vma->vm_page_prot))
-                       return -EAGAIN;
-               break;
+               return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
+                                        PAGE_SIZE,
+                                        pgprot_noncached(vma->vm_page_prot));
         case MLX5_IB_MMAP_CLOCK_INFO:
                 return mlx5_ib_mmap_clock_info_page(dev, vma, context);
  
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c

index 7cd006da1daef05cd335dc77cda8281e179630c4..8870c350fda0b109cc4cb98c9787fd0452821865 100644 (file)
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1818,13 +1818,16 @@ static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr,
  
         rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
  
-       if (rcqe_sz == 128) {
-               MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+       if (init_attr->qp_type == MLX5_IB_QPT_DCT) {
+               if (rcqe_sz == 128)
+                       MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+
                 return;
         }
  
-       if (init_attr->qp_type != MLX5_IB_QPT_DCT)
-               MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
+       MLX5_SET(qpc, qpc, cs_res,
+                rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
+                                 MLX5_RES_SCAT_DATA32_CQE);
  }
  
  static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c

index 7287950434969243335e904aa8045d7588ca2d2f..0bb6e39dd03a730783249586d409be6477a6d317 100644 (file)
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
         if (unlikely(mapped_segs == mr->mr.max_segs))
                 return -ENOMEM;
  
-       if (mr->mr.length == 0) {
-               mr->mr.user_base = addr;
-               mr->mr.iova = addr;
-       }
-
         m = mapped_segs / RVT_SEGSZ;
         n = mapped_segs % RVT_SEGSZ;
         mr->mr.map[m]->segs[n].vaddr = (void *)addr;
@@ -630,17 +625,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
   * @sg_nents: number of entries in sg
   * @sg_offset: offset in bytes into sg
   *
+ * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
+ *
   * Return: number of sg elements mapped to the memory region
   */
  int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
                   int sg_nents, unsigned int *sg_offset)
  {
         struct rvt_mr *mr = to_imr(ibmr);
+       int ret;
  
         mr->mr.length = 0;
         mr->mr.page_shift = PAGE_SHIFT;
-       return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
-                             rvt_set_page);
+       ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
+       mr->mr.user_base = ibmr->iova;
+       mr->mr.iova = ibmr->iova;
+       mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
+       mr->mr.length = (size_t)ibmr->length;
+       return ret;
  }
  
  /**
@@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
         ibmr->rkey = key;
         mr->mr.lkey = key;
         mr->mr.access_flags = access;
+       mr->mr.iova = ibmr->iova;
         atomic_set(&mr->mr.lkey_invalid, 0);
  
         return 0;
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig

index a878351f16439859e3931ec71506c705eb9a6f6e..52d7f55fca329c09c9788cb8bcd91509a5f07426 100644 (file)
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -420,7 +420,7 @@ config KEYBOARD_MPR121
  
  config KEYBOARD_SNVS_PWRKEY
         tristate "IMX SNVS Power Key Driver"
-       depends on SOC_IMX6SX || SOC_IMX7D
+       depends on ARCH_MXC || COMPILE_TEST
         depends on OF
         help
           This is the snvs powerkey driver for the Freescale i.MX application
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c

index fc3ab93b7aea454475ee324eecee91470c4a9dc3..7fb358f961957507969db706c780459b937d2ba0 100644 (file)
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -860,7 +860,7 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
  
         error = rmi_register_function(fn);
         if (error)
-               goto err_put_fn;
+               return error;
  
         if (pdt->function_number == 0x01)
                 data->f01_container = fn;
@@ -870,10 +870,6 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
         list_add_tail(&fn->node, &data->function_list);
  
         return RMI_SCAN_CONTINUE;
-
-err_put_fn:
-       put_device(&fn->dev);
-       return error;
  }
  
  void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake)
diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c

index df64d6aed4f7e10b8eb78eb78619a15d7bcaaf56..93901ebd122a504e7e96c35a17c100ae1ea607e3 100644 (file)
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -1230,7 +1230,7 @@ static int rmi_f11_initialize(struct rmi_function *fn)
         }
  
         rc = f11_write_control_regs(fn, &f11->sens_query,
-                          &f11->dev_controls, fn->fd.query_base_addr);
+                          &f11->dev_controls, fn->fd.control_base_addr);
         if (rc)
                 dev_warn(&fn->dev, "Failed to write control registers\n");
  
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig

index 6f07f3b21816c64f40e98c84637fdad6bf53ce52..15b831113dedb0ae538bc5bf9dbdd3d174450901 100644 (file)
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -359,6 +359,31 @@ config ARM_SMMU
           Say Y here if your SoC includes an IOMMU device implementing
           the ARM SMMU architecture.
  
+config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
+       bool "Default to disabling bypass on ARM SMMU v1 and v2"
+       depends on ARM_SMMU
+       default y
+       help
+         Say Y here to (by default) disable bypass streams such that
+         incoming transactions from devices that are not attached to
+         an iommu domain will report an abort back to the device and
+         will not be allowed to pass through the SMMU.
+
+         Any old kernels that existed before this KConfig was
+         introduced would default to _allowing_ bypass (AKA the
+         equivalent of NO for this config).  However the default for
+         this option is YES because the old behavior is insecure.
+
+         There are few reasons to allow unmatched stream bypass, and
+         even fewer good ones.  If saying YES here breaks your board
+         you should work on fixing your board.  This KConfig option
+         is expected to be removed in the future and we'll simply
+         hardcode the bypass disable in the code.
+
+         NOTE: the kernel command line parameter
+         'arm-smmu.disable_bypass' will continue to override this
+         config.
+
  config ARM_SMMU_V3
         bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
         depends on ARM64
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c

index f7cdd2ab7f11f6cba22003d4cf71576b4bc77b72..fde16c5b0a70162f3c2067d16a717e45d34d051c 100644 (file)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1723,31 +1723,6 @@ static void dma_ops_free_iova(struct dma_ops_domain *dma_dom,
   *
   ****************************************************************************/
  
-/*
- * This function adds a protection domain to the global protection domain list
- */
-static void add_domain_to_list(struct protection_domain *domain)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-       list_add(&domain->list, &amd_iommu_pd_list);
-       spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
-/*
- * This function removes a protection domain to the global
- * protection domain list
- */
-static void del_domain_from_list(struct protection_domain *domain)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-       list_del(&domain->list);
-       spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
  static u16 domain_id_alloc(void)
  {
         int id;
@@ -1838,8 +1813,6 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
         if (!dom)
                 return;
  
-       del_domain_from_list(&dom->domain);
-
         put_iova_domain(&dom->iovad);
  
         free_pagetable(&dom->domain);
@@ -1880,8 +1853,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
         /* Initialize reserved ranges */
         copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
  
-       add_domain_to_list(&dma_dom->domain);
-
         return dma_dom;
  
  free_dma_dom:
@@ -2122,23 +2093,6 @@ out_err:
         return ret;
  }
  
-/* FIXME: Move this to PCI code */
-#define PCI_PRI_TLP_OFF                (1 << 15)
-
-static bool pci_pri_tlp_required(struct pci_dev *pdev)
-{
-       u16 status;
-       int pos;
-
-       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
-       if (!pos)
-               return false;
-
-       pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
-
-       return (status & PCI_PRI_TLP_OFF) ? true : false;
-}
-
  /*
   * If a device is not yet associated with a domain, this function makes the
   * device visible in the domain
@@ -2167,7 +2121,7 @@ static int attach_device(struct device *dev,
  
                         dev_data->ats.enabled = true;
                         dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
-                       dev_data->pri_tlp     = pci_pri_tlp_required(pdev);
+                       dev_data->pri_tlp     = pci_prg_resp_pasid_required(pdev);
                 }
         } else if (amd_iommu_iotlb_sup &&
                    pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
@@ -2897,8 +2851,6 @@ static void protection_domain_free(struct protection_domain *domain)
         if (!domain)
                 return;
  
-       del_domain_from_list(domain);
-
         if (domain->id)
                 domain_id_free(domain->id);
  
@@ -2928,8 +2880,6 @@ static struct protection_domain *protection_domain_alloc(void)
         if (protection_domain_init(domain))
                 goto out_err;
  
-       add_domain_to_list(domain);
-
         return domain;
  
  out_err:
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c

index ff40ba758cf365e89ddeb2270971e1536554b817..f977df90d2a491274da7c631d5ef7c4300d56a5f 100644 (file)
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -188,12 +188,6 @@ static bool amd_iommu_pc_present __read_mostly;
  
  bool amd_iommu_force_isolation __read_mostly;
  
-/*
- * List of protection domains - used during resume
- */
-LIST_HEAD(amd_iommu_pd_list);
-spinlock_t amd_iommu_pd_lock;
-
  /*
   * Pointer to the device table which is shared by all AMD IOMMUs
   * it is indexed by the PCI device id or the HT unit id and contains
@@ -2526,8 +2520,6 @@ static int __init early_amd_iommu_init(void)
          */
         __set_bit(0, amd_iommu_pd_alloc_bitmap);
  
-       spin_lock_init(&amd_iommu_pd_lock);
-
         /*
          * now the data structures are allocated and basically initialized
          * start the real acpi table scan
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h

index 87965e4d964771bd2352d6254bba299f43734107..85c488b8daea542207f83f182b817f4b4c338e71 100644 (file)
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -674,12 +674,6 @@ extern struct list_head amd_iommu_list;
   */
  extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
  
-/*
- * Declarations for the global list of all protection domains
- */
-extern spinlock_t amd_iommu_pd_lock;
-extern struct list_head amd_iommu_pd_list;
-
  /*
   * Structure defining one entry in the device table
   */
diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h

index a1226e4ab5f89716ec50d8b8f64594ed3c36453f..e9132a926761dfb6a8a699eca1fafa6715f36824 100644 (file)
--- a/drivers/iommu/arm-smmu-regs.h
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -147,6 +147,8 @@ enum arm_smmu_s2cr_privcfg {
  #define CBAR_IRPTNDX_SHIFT             24
  #define CBAR_IRPTNDX_MASK              0xff
  
+#define ARM_SMMU_GR1_CBFRSYNRA(n)      (0x400 + ((n) << 2))
+
  #define ARM_SMMU_GR1_CBA2R(n)          (0x800 + ((n) << 2))
  #define CBA2R_RW64_32BIT               (0 << 0)
  #define CBA2R_RW64_64BIT               (1 << 0)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c

index d3880010c6cfc8c073789807d54cad1fc64f8ad1..4d5a694f02c2bb3159d2e7119a474e35fc384a96 100644 (file)
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -29,6 +29,7 @@
  #include <linux/of_iommu.h>
  #include <linux/of_platform.h>
  #include <linux/pci.h>
+#include <linux/pci-ats.h>
  #include <linux/platform_device.h>
  
  #include <linux/amba/bus.h>
@@ -86,6 +87,7 @@
  #define IDR5_VAX_52_BIT                        1
  
  #define ARM_SMMU_CR0                   0x20
+#define CR0_ATSCHK                     (1 << 4)
  #define CR0_CMDQEN                     (1 << 3)
  #define CR0_EVTQEN                     (1 << 2)
  #define CR0_PRIQEN                     (1 << 1)
@@ -294,6 +296,7 @@
  #define CMDQ_ERR_CERROR_NONE_IDX       0
  #define CMDQ_ERR_CERROR_ILL_IDX                1
  #define CMDQ_ERR_CERROR_ABT_IDX                2
+#define CMDQ_ERR_CERROR_ATC_INV_IDX    3
  
  #define CMDQ_0_OP                      GENMASK_ULL(7, 0)
  #define CMDQ_0_SSV                     (1UL << 11)
@@ -312,6 +315,12 @@
  #define CMDQ_TLBI_1_VA_MASK            GENMASK_ULL(63, 12)
  #define CMDQ_TLBI_1_IPA_MASK           GENMASK_ULL(51, 12)
  
+#define CMDQ_ATC_0_SSID                        GENMASK_ULL(31, 12)
+#define CMDQ_ATC_0_SID                 GENMASK_ULL(63, 32)
+#define CMDQ_ATC_0_GLOBAL              (1UL << 9)
+#define CMDQ_ATC_1_SIZE                        GENMASK_ULL(5, 0)
+#define CMDQ_ATC_1_ADDR_MASK           GENMASK_ULL(63, 12)
+
  #define CMDQ_PRI_0_SSID                        GENMASK_ULL(31, 12)
  #define CMDQ_PRI_0_SID                 GENMASK_ULL(63, 32)
  #define CMDQ_PRI_1_GRPID               GENMASK_ULL(8, 0)
@@ -433,6 +442,16 @@ struct arm_smmu_cmdq_ent {
                         u64                     addr;
                 } tlbi;
  
+               #define CMDQ_OP_ATC_INV         0x40
+               #define ATC_INV_SIZE_ALL        52
+               struct {
+                       u32                     sid;
+                       u32                     ssid;
+                       u64                     addr;
+                       u8                      size;
+                       bool                    global;
+               } atc;
+
                 #define CMDQ_OP_PRI_RESP        0x41
                 struct {
                         u32                     sid;
@@ -505,19 +524,6 @@ struct arm_smmu_s2_cfg {
         u64                             vtcr;
  };
  
-struct arm_smmu_strtab_ent {
-       /*
-        * An STE is "assigned" if the master emitting the corresponding SID
-        * is attached to a domain. The behaviour of an unassigned STE is
-        * determined by the disable_bypass parameter, whereas an assigned
-        * STE behaves according to s1_cfg/s2_cfg, which themselves are
-        * configured according to the domain type.
-        */
-       bool                            assigned;
-       struct arm_smmu_s1_cfg          *s1_cfg;
-       struct arm_smmu_s2_cfg          *s2_cfg;
-};
-
  struct arm_smmu_strtab_cfg {
         __le64                          *strtab;
         dma_addr_t                      strtab_dma;
@@ -591,9 +597,14 @@ struct arm_smmu_device {
  };
  
  /* SMMU private data for each master */
-struct arm_smmu_master_data {
+struct arm_smmu_master {
         struct arm_smmu_device          *smmu;
-       struct arm_smmu_strtab_ent      ste;
+       struct device                   *dev;
+       struct arm_smmu_domain          *domain;
+       struct list_head                domain_head;
+       u32                             *sids;
+       unsigned int                    num_sids;
+       bool                            ats_enabled             :1;
  };
  
  /* SMMU private data for an IOMMU domain */
@@ -618,6 +629,9 @@ struct arm_smmu_domain {
         };
  
         struct iommu_domain             domain;
+
+       struct list_head                devices;
+       spinlock_t                      devices_lock;
  };
  
  struct arm_smmu_option_prop {
@@ -820,6 +834,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
         case CMDQ_OP_TLBI_S12_VMALL:
                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
                 break;
+       case CMDQ_OP_ATC_INV:
+               cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
+               cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
+               cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
+               cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
+               cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
+               cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
+               break;
         case CMDQ_OP_PRI_RESP:
                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
@@ -864,6 +886,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
+               [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
         };
  
         int i;
@@ -883,6 +906,14 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
                 dev_err(smmu->dev, "retrying command fetch\n");
         case CMDQ_ERR_CERROR_NONE_IDX:
                 return;
+       case CMDQ_ERR_CERROR_ATC_INV_IDX:
+               /*
+                * ATC Invalidation Completion timeout. CONS is still pointing
+                * at the CMD_SYNC. Attempt to complete other pending commands
+                * by repeating the CMD_SYNC, though we might well end up back
+                * here since the ATC invalidation may still be pending.
+                */
+               return;
         case CMDQ_ERR_CERROR_ILL_IDX:
                 /* Fallthrough */
         default:
@@ -999,7 +1030,7 @@ static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
         return ret;
  }
  
-static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
  {
         int ret;
         bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
@@ -1009,6 +1040,7 @@ static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
                   : __arm_smmu_cmdq_issue_sync(smmu);
         if (ret)
                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
+       return ret;
  }
  
  /* Context descriptor manipulation functions */
@@ -1025,7 +1057,6 @@ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
         val |= ARM_SMMU_TCR2CD(tcr, IPS);
-       val |= ARM_SMMU_TCR2CD(tcr, TBI0);
  
         return val;
  }
@@ -1085,8 +1116,8 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
         arm_smmu_cmdq_issue_sync(smmu);
  }
  
-static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
-                                     __le64 *dst, struct arm_smmu_strtab_ent *ste)
+static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
+                                     __le64 *dst)
  {
         /*
          * This is hideously complicated, but we only really care about
@@ -1106,6 +1137,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
          */
         u64 val = le64_to_cpu(dst[0]);
         bool ste_live = false;
+       struct arm_smmu_device *smmu = NULL;
+       struct arm_smmu_s1_cfg *s1_cfg = NULL;
+       struct arm_smmu_s2_cfg *s2_cfg = NULL;
+       struct arm_smmu_domain *smmu_domain = NULL;
         struct arm_smmu_cmdq_ent prefetch_cmd = {
                 .opcode         = CMDQ_OP_PREFETCH_CFG,
                 .prefetch       = {
@@ -1113,6 +1148,25 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
                 },
         };
  
+       if (master) {
+               smmu_domain = master->domain;
+               smmu = master->smmu;
+       }
+
+       if (smmu_domain) {
+               switch (smmu_domain->stage) {
+               case ARM_SMMU_DOMAIN_S1:
+                       s1_cfg = &smmu_domain->s1_cfg;
+                       break;
+               case ARM_SMMU_DOMAIN_S2:
+               case ARM_SMMU_DOMAIN_NESTED:
+                       s2_cfg = &smmu_domain->s2_cfg;
+                       break;
+               default:
+                       break;
+               }
+       }
+
         if (val & STRTAB_STE_0_V) {
                 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
                 case STRTAB_STE_0_CFG_BYPASS:
@@ -1133,8 +1187,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
         val = STRTAB_STE_0_V;
  
         /* Bypass/fault */
-       if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
-               if (!ste->assigned && disable_bypass)
+       if (!smmu_domain || !(s1_cfg || s2_cfg)) {
+               if (!smmu_domain && disable_bypass)
                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
                 else
                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
@@ -1152,41 +1206,42 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
                 return;
         }
  
-       if (ste->s1_cfg) {
+       if (s1_cfg) {
                 BUG_ON(ste_live);
                 dst[1] = cpu_to_le64(
                          FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
                          FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
                          FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
-#ifdef CONFIG_PCI_ATS
-                        FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
-#endif
                          FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
  
                 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
                    !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
  
-               val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+               val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
                         FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
         }
  
-       if (ste->s2_cfg) {
+       if (s2_cfg) {
                 BUG_ON(ste_live);
                 dst[2] = cpu_to_le64(
-                        FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
-                        FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
+                        FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
+                        FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
  #ifdef __BIG_ENDIAN
                          STRTAB_STE_2_S2ENDI |
  #endif
                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
                          STRTAB_STE_2_S2R);
  
-               dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
+               dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
  
                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
         }
  
+       if (master->ats_enabled)
+               dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
+                                                STRTAB_STE_1_EATS_TRANS));
+
         arm_smmu_sync_ste_for_sid(smmu, sid);
         dst[0] = cpu_to_le64(val);
         arm_smmu_sync_ste_for_sid(smmu, sid);
@@ -1199,10 +1254,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
  static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
  {
         unsigned int i;
-       struct arm_smmu_strtab_ent ste = { .assigned = false };
  
         for (i = 0; i < nent; ++i) {
-               arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
+               arm_smmu_write_strtab_ent(NULL, -1, strtab);
                 strtab += STRTAB_STE_DWORDS;
         }
  }
@@ -1390,6 +1444,96 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
         return IRQ_WAKE_THREAD;
  }
  
+static void
+arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
+                       struct arm_smmu_cmdq_ent *cmd)
+{
+       size_t log2_span;
+       size_t span_mask;
+       /* ATC invalidates are always on 4096-bytes pages */
+       size_t inval_grain_shift = 12;
+       unsigned long page_start, page_end;
+
+       *cmd = (struct arm_smmu_cmdq_ent) {
+               .opcode                 = CMDQ_OP_ATC_INV,
+               .substream_valid        = !!ssid,
+               .atc.ssid               = ssid,
+       };
+
+       if (!size) {
+               cmd->atc.size = ATC_INV_SIZE_ALL;
+               return;
+       }
+
+       page_start      = iova >> inval_grain_shift;
+       page_end        = (iova + size - 1) >> inval_grain_shift;
+
+       /*
+        * In an ATS Invalidate Request, the address must be aligned on the
+        * range size, which must be a power of two number of page sizes. We
+        * thus have to choose between grossly over-invalidating the region, or
+        * splitting the invalidation into multiple commands. For simplicity
+        * we'll go with the first solution, but should refine it in the future
+        * if multiple commands are shown to be more efficient.
+        *
+        * Find the smallest power of two that covers the range. The most
+        * significant differing bit between the start and end addresses,
+        * fls(start ^ end), indicates the required span. For example:
+        *
+        * We want to invalidate pages [8; 11]. This is already the ideal range:
+        *              x = 0b1000 ^ 0b1011 = 0b11
+        *              span = 1 << fls(x) = 4
+        *
+        * To invalidate pages [7; 10], we need to invalidate [0; 15]:
+        *              x = 0b0111 ^ 0b1010 = 0b1101
+        *              span = 1 << fls(x) = 16
+        */
+       log2_span       = fls_long(page_start ^ page_end);
+       span_mask       = (1ULL << log2_span) - 1;
+
+       page_start      &= ~span_mask;
+
+       cmd->atc.addr   = page_start << inval_grain_shift;
+       cmd->atc.size   = log2_span;
+}
+
+static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
+                                  struct arm_smmu_cmdq_ent *cmd)
+{
+       int i;
+
+       if (!master->ats_enabled)
+               return 0;
+
+       for (i = 0; i < master->num_sids; i++) {
+               cmd->atc.sid = master->sids[i];
+               arm_smmu_cmdq_issue_cmd(master->smmu, cmd);
+       }
+
+       return arm_smmu_cmdq_issue_sync(master->smmu);
+}
+
+static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
+                                  int ssid, unsigned long iova, size_t size)
+{
+       int ret = 0;
+       unsigned long flags;
+       struct arm_smmu_cmdq_ent cmd;
+       struct arm_smmu_master *master;
+
+       if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
+               return 0;
+
+       arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
+
+       spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+       list_for_each_entry(master, &smmu_domain->devices, domain_head)
+               ret |= arm_smmu_atc_inv_master(master, &cmd);
+       spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+       return ret ? -ETIMEDOUT : 0;
+}
+
  /* IO_PGTABLE API */
  static void arm_smmu_tlb_sync(void *cookie)
  {
@@ -1493,6 +1637,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
         }
  
         mutex_init(&smmu_domain->init_mutex);
+       INIT_LIST_HEAD(&smmu_domain->devices);
+       spin_lock_init(&smmu_domain->devices_lock);
+
         return &smmu_domain->domain;
  }
  
@@ -1688,55 +1835,97 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
         return step;
  }
  
-static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
+static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
  {
         int i, j;
-       struct arm_smmu_master_data *master = fwspec->iommu_priv;
         struct arm_smmu_device *smmu = master->smmu;
  
-       for (i = 0; i < fwspec->num_ids; ++i) {
-               u32 sid = fwspec->ids[i];
+       for (i = 0; i < master->num_sids; ++i) {
+               u32 sid = master->sids[i];
                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
  
                 /* Bridged PCI devices may end up with duplicated IDs */
                 for (j = 0; j < i; j++)
-                       if (fwspec->ids[j] == sid)
+                       if (master->sids[j] == sid)
                                 break;
                 if (j < i)
                         continue;
  
-               arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
+               arm_smmu_write_strtab_ent(master, sid, step);
         }
  }
  
-static void arm_smmu_detach_dev(struct device *dev)
+static int arm_smmu_enable_ats(struct arm_smmu_master *master)
  {
-       struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
-       struct arm_smmu_master_data *master = fwspec->iommu_priv;
+       int ret;
+       size_t stu;
+       struct pci_dev *pdev;
+       struct arm_smmu_device *smmu = master->smmu;
+       struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
  
-       master->ste.assigned = false;
-       arm_smmu_install_ste_for_dev(fwspec);
+       if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
+           !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
+               return -ENXIO;
+
+       pdev = to_pci_dev(master->dev);
+       if (pdev->untrusted)
+               return -EPERM;
+
+       /* Smallest Translation Unit: log2 of the smallest supported granule */
+       stu = __ffs(smmu->pgsize_bitmap);
+
+       ret = pci_enable_ats(pdev, stu);
+       if (ret)
+               return ret;
+
+       master->ats_enabled = true;
+       return 0;
+}
+
+static void arm_smmu_disable_ats(struct arm_smmu_master *master)
+{
+       if (!master->ats_enabled || !dev_is_pci(master->dev))
+               return;
+
+       pci_disable_ats(to_pci_dev(master->dev));
+       master->ats_enabled = false;
+}
+
+static void arm_smmu_detach_dev(struct arm_smmu_master *master)
+{
+       unsigned long flags;
+       struct arm_smmu_domain *smmu_domain = master->domain;
+
+       if (!smmu_domain)
+               return;
+
+       spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+       list_del(&master->domain_head);
+       spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+       master->domain = NULL;
+       arm_smmu_install_ste_for_dev(master);
+
+       /* Disabling ATS invalidates all ATC entries */
+       arm_smmu_disable_ats(master);
  }
  
  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
  {
         int ret = 0;
+       unsigned long flags;
         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
         struct arm_smmu_device *smmu;
         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct arm_smmu_master_data *master;
-       struct arm_smmu_strtab_ent *ste;
+       struct arm_smmu_master *master;
  
         if (!fwspec)
                 return -ENOENT;
  
         master = fwspec->iommu_priv;
         smmu = master->smmu;
-       ste = &master->ste;
  
-       /* Already attached to a different domain? */
-       if (ste->assigned)
-               arm_smmu_detach_dev(dev);
+       arm_smmu_detach_dev(master);
  
         mutex_lock(&smmu_domain->init_mutex);
  
@@ -1756,21 +1945,19 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
                 goto out_unlock;
         }
  
-       ste->assigned = true;
+       master->domain = smmu_domain;
  
-       if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
-               ste->s1_cfg = NULL;
-               ste->s2_cfg = NULL;
-       } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-               ste->s1_cfg = &smmu_domain->s1_cfg;
-               ste->s2_cfg = NULL;
-               arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
-       } else {
-               ste->s1_cfg = NULL;
-               ste->s2_cfg = &smmu_domain->s2_cfg;
-       }
+       spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+       list_add(&master->domain_head, &smmu_domain->devices);
+       spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
  
-       arm_smmu_install_ste_for_dev(fwspec);
+       if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
+               arm_smmu_enable_ats(master);
+
+       if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+               arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg);
+
+       arm_smmu_install_ste_for_dev(master);
  out_unlock:
         mutex_unlock(&smmu_domain->init_mutex);
         return ret;
@@ -1790,12 +1977,18 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
  static size_t
  arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
  {
-       struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+       int ret;
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+       struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
  
         if (!ops)
                 return 0;
  
-       return ops->unmap(ops, iova, size);
+       ret = ops->unmap(ops, iova, size);
+       if (ret && arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size))
+               return 0;
+
+       return ret;
  }
  
  static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
@@ -1860,7 +2053,7 @@ static int arm_smmu_add_device(struct device *dev)
  {
         int i, ret;
         struct arm_smmu_device *smmu;
-       struct arm_smmu_master_data *master;
+       struct arm_smmu_master *master;
         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
         struct iommu_group *group;
  
@@ -1882,13 +2075,16 @@ static int arm_smmu_add_device(struct device *dev)
                 if (!master)
                         return -ENOMEM;
  
+               master->dev = dev;
                 master->smmu = smmu;
+               master->sids = fwspec->ids;
+               master->num_sids = fwspec->num_ids;
                 fwspec->iommu_priv = master;
         }
  
         /* Check the SIDs are in range of the SMMU and our stream table */
-       for (i = 0; i < fwspec->num_ids; i++) {
-               u32 sid = fwspec->ids[i];
+       for (i = 0; i < master->num_sids; i++) {
+               u32 sid = master->sids[i];
  
                 if (!arm_smmu_sid_in_range(smmu, sid))
                         return -ERANGE;
@@ -1913,7 +2109,7 @@ static int arm_smmu_add_device(struct device *dev)
  static void arm_smmu_remove_device(struct device *dev)
  {
         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
-       struct arm_smmu_master_data *master;
+       struct arm_smmu_master *master;
         struct arm_smmu_device *smmu;
  
         if (!fwspec || fwspec->ops != &arm_smmu_ops)
@@ -1921,8 +2117,7 @@ static void arm_smmu_remove_device(struct device *dev)
  
         master = fwspec->iommu_priv;
         smmu = master->smmu;
-       if (master && master->ste.assigned)
-               arm_smmu_detach_dev(dev);
+       arm_smmu_detach_dev(master);
         iommu_group_remove_device(dev);
         iommu_device_unlink(&smmu->iommu, dev);
         kfree(master);
@@ -2454,13 +2649,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
         /* Clear CR0 and sync (disables SMMU and queue processing) */
         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
         if (reg & CR0_SMMUEN) {
-               if (is_kdump_kernel()) {
-                       arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
-                       arm_smmu_device_disable(smmu);
-                       return -EBUSY;
-               }
-
                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
+               WARN_ON(is_kdump_kernel() && !disable_bypass);
+               arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
         }
  
         ret = arm_smmu_device_disable(smmu);
@@ -2547,12 +2738,24 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
                 }
         }
  
+       if (smmu->features & ARM_SMMU_FEAT_ATS) {
+               enables |= CR0_ATSCHK;
+               ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
+                                             ARM_SMMU_CR0ACK);
+               if (ret) {
+                       dev_err(smmu->dev, "failed to enable ATS check\n");
+                       return ret;
+               }
+       }
+
         ret = arm_smmu_setup_irqs(smmu);
         if (ret) {
                 dev_err(smmu->dev, "failed to setup irqs\n");
                 return ret;
         }
  
+       if (is_kdump_kernel())
+               enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
  
         /* Enable the SMMU interface, or ensure bypass */
         if (!bypass || disable_bypass) {
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c

index 045d938841640c375f13dbbd3997d5df919a782a..5e54cc0a28b30516b65c2516c666ee3d9c398141 100644 (file)
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -110,7 +110,8 @@ static int force_stage;
  module_param(force_stage, int, S_IRUGO);
  MODULE_PARM_DESC(force_stage,
         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
-static bool disable_bypass;
+static bool disable_bypass =
+       IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
  module_param(disable_bypass, bool, S_IRUGO);
  MODULE_PARM_DESC(disable_bypass,
         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
@@ -569,12 +570,13 @@ static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
  
  static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
  {
-       u32 fsr, fsynr;
+       u32 fsr, fsynr, cbfrsynra;
         unsigned long iova;
         struct iommu_domain *domain = dev;
         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
         struct arm_smmu_device *smmu = smmu_domain->smmu;
+       void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
         void __iomem *cb_base;
  
         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
@@ -585,10 +587,11 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
  
         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
+       cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
  
         dev_err_ratelimited(smmu->dev,
-       "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
-                           fsr, iova, fsynr, cfg->cbndx);
+       "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
+                           fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
  
         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
         return IRQ_HANDLED;
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c

index 9c49300e9fb76ace67cee9930356794234de81d0..6d969a172fbb3491df6b1d38cc6f3b33cd99a4d0 100644 (file)
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -145,7 +145,7 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
                 for (tmp = dev; tmp; tmp = tmp->bus->self)
                         level++;
  
-       size = sizeof(*info) + level * sizeof(info->path[0]);
+       size = struct_size(info, path, level);
         if (size <= sizeof(dmar_pci_notify_info_buf)) {
                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
         } else {
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c

index 28cb713d728ceef9eb7f37caa746a546617e1dbb..a320bda2c3059978a2b701e111450d28e7698650 100644 (file)
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2341,32 +2341,33 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
  }
  
  static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
-                         struct scatterlist *sg, unsigned long phys_pfn,
-                         unsigned long nr_pages, int prot)
-{
-       int ret;
-       struct intel_iommu *iommu;
-
-       /* Do the real mapping first */
-       ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
-       if (ret)
-               return ret;
-
-       /* Notify about the new mapping */
-       if (domain_type_is_vm(domain)) {
-              /* VM typed domains can have more than one IOMMUs */
-              int iommu_id;
-              for_each_domain_iommu(iommu_id, domain) {
-                      iommu = g_iommus[iommu_id];
-                      __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
-              }
-       } else {
-              /* General domains only have one IOMMU */
-              iommu = domain_get_iommu(domain);
-              __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
-       }
+                         struct scatterlist *sg, unsigned long phys_pfn,
+                         unsigned long nr_pages, int prot)
+{
+       int ret;
+       struct intel_iommu *iommu;
+
+       /* Do the real mapping first */
+       ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
+       if (ret)
+               return ret;
+
+       /* Notify about the new mapping */
+       if (domain_type_is_vm(domain)) {
+               /* VM typed domains can have more than one IOMMUs */
+               int iommu_id;
+
+               for_each_domain_iommu(iommu_id, domain) {
+                       iommu = g_iommus[iommu_id];
+                       __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
+               }
+       } else {
+               /* General domains only have one IOMMU */
+               iommu = domain_get_iommu(domain);
+               __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
+       }
  
-       return 0;
+       return 0;
  }
  
  static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
@@ -2485,6 +2486,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
         info->domain = domain;
         info->iommu = iommu;
         info->pasid_table = NULL;
+       info->auxd_enabled = 0;
+       INIT_LIST_HEAD(&info->auxiliary_domains);
  
         if (dev && dev_is_pci(dev)) {
                 struct pci_dev *pdev = to_pci_dev(info->dev);
@@ -3412,9 +3415,12 @@ static int __init init_dmars(void)
                 iommu_identity_mapping |= IDENTMAP_ALL;
  
  #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
-       iommu_identity_mapping |= IDENTMAP_GFX;
+       dmar_map_gfx = 0;
  #endif
  
+       if (!dmar_map_gfx)
+               iommu_identity_mapping |= IDENTMAP_GFX;
+
         check_tylersburg_isoch();
  
         if (iommu_identity_mapping) {
@@ -3496,7 +3502,13 @@ domains_done:
  
  #ifdef CONFIG_INTEL_IOMMU_SVM
                 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
+                       /*
+                        * Call dmar_alloc_hwirq() with dmar_global_lock held,
+                        * could cause possible lock race condition.
+                        */
+                       up_write(&dmar_global_lock);
                         ret = intel_svm_enable_prq(iommu);
+                       down_write(&dmar_global_lock);
                         if (ret)
                                 goto free_iommu;
                 }
@@ -3606,45 +3618,40 @@ out:
  }
  
  /* Check if the dev needs to go through non-identity map and unmap process.*/
-static int iommu_no_mapping(struct device *dev)
+static bool iommu_need_mapping(struct device *dev)
  {
         int found;
  
         if (iommu_dummy(dev))
-               return 1;
+               return false;
  
         if (!iommu_identity_mapping)
-               return 0;
+               return true;
  
         found = identity_mapping(dev);
         if (found) {
                 if (iommu_should_identity_map(dev, 0))
-                       return 1;
-               else {
-                       /*
-                        * 32 bit DMA is removed from si_domain and fall back
-                        * to non-identity mapping.
-                        */
-                       dmar_remove_one_dev_info(dev);
-                       dev_info(dev, "32bit DMA uses non-identity mapping\n");
-                       return 0;
-               }
+                       return false;
+
+               /*
+                * 32 bit DMA is removed from si_domain and fall back to
+                * non-identity mapping.
+                */
+               dmar_remove_one_dev_info(dev);
+               dev_info(dev, "32bit DMA uses non-identity mapping\n");
         } else {
                 /*
                  * In case of a detached 64 bit DMA device from vm, the device
                  * is put into si_domain for identity mapping.
                  */
-               if (iommu_should_identity_map(dev, 0)) {
-                       int ret;
-                       ret = domain_add_dev_info(si_domain, dev);
-                       if (!ret) {
-                               dev_info(dev, "64bit DMA uses identity mapping\n");
-                               return 1;
-                       }
+               if (iommu_should_identity_map(dev, 0) &&
+                   !domain_add_dev_info(si_domain, dev)) {
+                       dev_info(dev, "64bit DMA uses identity mapping\n");
+                       return false;
                 }
         }
  
-       return 0;
+       return true;
  }
  
  static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
@@ -3660,9 +3667,6 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
  
         BUG_ON(dir == DMA_NONE);
  
-       if (iommu_no_mapping(dev))
-               return paddr;
-
         domain = get_valid_domain_for_dev(dev);
         if (!domain)
                 return DMA_MAPPING_ERROR;
@@ -3711,15 +3715,20 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
                                  enum dma_data_direction dir,
                                  unsigned long attrs)
  {
-       return __intel_map_single(dev, page_to_phys(page) + offset, size,
-                                 dir, *dev->dma_mask);
+       if (iommu_need_mapping(dev))
+               return __intel_map_single(dev, page_to_phys(page) + offset,
+                               size, dir, *dev->dma_mask);
+       return dma_direct_map_page(dev, page, offset, size, dir, attrs);
  }
  
  static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
                                      size_t size, enum dma_data_direction dir,
                                      unsigned long attrs)
  {
-       return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
+       if (iommu_need_mapping(dev))
+               return __intel_map_single(dev, phys_addr, size, dir,
+                               *dev->dma_mask);
+       return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
  }
  
  static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3730,9 +3739,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
         unsigned long iova_pfn;
         struct intel_iommu *iommu;
         struct page *freelist;
-
-       if (iommu_no_mapping(dev))
-               return;
+       struct pci_dev *pdev = NULL;
  
         domain = find_domain(dev);
         BUG_ON(!domain);
@@ -3745,11 +3752,14 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
         start_pfn = mm_to_dma_pfn(iova_pfn);
         last_pfn = start_pfn + nrpages - 1;
  
+       if (dev_is_pci(dev))
+               pdev = to_pci_dev(dev);
+
         dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
  
         freelist = domain_unmap(domain, start_pfn, last_pfn);
  
-       if (intel_iommu_strict) {
+       if (intel_iommu_strict || (pdev && pdev->untrusted)) {
                 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
                                       nrpages, !freelist, 0);
                 /* free iova */
@@ -3769,7 +3779,17 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
                              size_t size, enum dma_data_direction dir,
                              unsigned long attrs)
  {
-       intel_unmap(dev, dev_addr, size);
+       if (iommu_need_mapping(dev))
+               intel_unmap(dev, dev_addr, size);
+       else
+               dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
+               size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+       if (iommu_need_mapping(dev))
+               intel_unmap(dev, dev_addr, size);
  }
  
  static void *intel_alloc_coherent(struct device *dev, size_t size,
@@ -3779,28 +3799,17 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
         struct page *page = NULL;
         int order;
  
+       if (!iommu_need_mapping(dev))
+               return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+
         size = PAGE_ALIGN(size);
         order = get_order(size);
  
-       if (!iommu_no_mapping(dev))
-               flags &= ~(GFP_DMA | GFP_DMA32);
-       else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
-               if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
-                       flags |= GFP_DMA;
-               else
-                       flags |= GFP_DMA32;
-       }
-
         if (gfpflags_allow_blocking(flags)) {
                 unsigned int count = size >> PAGE_SHIFT;
  
                 page = dma_alloc_from_contiguous(dev, count, order,
                                                  flags & __GFP_NOWARN);
-               if (page && iommu_no_mapping(dev) &&
-                   page_to_phys(page) + size > dev->coherent_dma_mask) {
-                       dma_release_from_contiguous(dev, page, count);
-                       page = NULL;
-               }
         }
  
         if (!page)
@@ -3826,6 +3835,9 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
         int order;
         struct page *page = virt_to_page(vaddr);
  
+       if (!iommu_need_mapping(dev))
+               return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+
         size = PAGE_ALIGN(size);
         order = get_order(size);
  
@@ -3843,6 +3855,9 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
         struct scatterlist *sg;
         int i;
  
+       if (!iommu_need_mapping(dev))
+               return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
+
         for_each_sg(sglist, sg, nelems, i) {
                 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
         }
@@ -3850,20 +3865,6 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
         intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
  }
  
-static int intel_nontranslate_map_sg(struct device *hddev,
-       struct scatterlist *sglist, int nelems, int dir)
-{
-       int i;
-       struct scatterlist *sg;
-
-       for_each_sg(sglist, sg, nelems, i) {
-               BUG_ON(!sg_page(sg));
-               sg->dma_address = sg_phys(sg);
-               sg->dma_length = sg->length;
-       }
-       return nelems;
-}
-
  static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
                         enum dma_data_direction dir, unsigned long attrs)
  {
@@ -3878,8 +3879,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
         struct intel_iommu *iommu;
  
         BUG_ON(dir == DMA_NONE);
-       if (iommu_no_mapping(dev))
-               return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
+       if (!iommu_need_mapping(dev))
+               return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
  
         domain = get_valid_domain_for_dev(dev);
         if (!domain)
@@ -3929,7 +3930,7 @@ static const struct dma_map_ops intel_dma_ops = {
         .map_page = intel_map_page,
         .unmap_page = intel_unmap_page,
         .map_resource = intel_map_resource,
-       .unmap_resource = intel_unmap_page,
+       .unmap_resource = intel_unmap_resource,
         .dma_supported = dma_direct_supported,
  };
  
@@ -4055,9 +4056,7 @@ static void __init init_no_remapping_devices(void)
  
                 /* This IOMMU has *only* gfx devices. Either bypass it or
                    set the gfx_mapped flag, as appropriate */
-               if (dmar_map_gfx) {
-                       intel_iommu_gfx_mapped = 1;
-               } else {
+               if (!dmar_map_gfx) {
                         drhd->ignored = 1;
                         for_each_active_dev_scope(drhd->devices,
                                                   drhd->devices_cnt, i, dev)
@@ -4086,7 +4085,7 @@ static int init_iommu_hw(void)
                                 iommu_disable_protect_mem_regions(iommu);
                         continue;
                 }
-       
+
                 iommu_flush_write_buffer(iommu);
  
                 iommu_set_root_entry(iommu);
@@ -4896,6 +4895,9 @@ int __init intel_iommu_init(void)
                 goto out_free_reserved_range;
         }
  
+       if (dmar_map_gfx)
+               intel_iommu_gfx_mapped = 1;
+
         init_no_remapping_devices();
  
         ret = init_dmars();
@@ -5065,35 +5067,139 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
         domain_exit(to_dmar_domain(domain));
  }
  
-static int intel_iommu_attach_device(struct iommu_domain *domain,
-                                    struct device *dev)
+/*
+ * Check whether a @domain could be attached to the @dev through the
+ * aux-domain attach/detach APIs.
+ */
+static inline bool
+is_aux_domain(struct device *dev, struct iommu_domain *domain)
  {
-       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
-       struct intel_iommu *iommu;
-       int addr_width;
-       u8 bus, devfn;
+       struct device_domain_info *info = dev->archdata.iommu;
  
-       if (device_is_rmrr_locked(dev)) {
-               dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
-               return -EPERM;
-       }
+       return info && info->auxd_enabled &&
+                       domain->type == IOMMU_DOMAIN_UNMANAGED;
+}
  
-       /* normally dev is not mapped */
-       if (unlikely(domain_context_mapped(dev))) {
-               struct dmar_domain *old_domain;
+static void auxiliary_link_device(struct dmar_domain *domain,
+                                 struct device *dev)
+{
+       struct device_domain_info *info = dev->archdata.iommu;
  
-               old_domain = find_domain(dev);
-               if (old_domain) {
-                       rcu_read_lock();
-                       dmar_remove_one_dev_info(dev);
-                       rcu_read_unlock();
+       assert_spin_locked(&device_domain_lock);
+       if (WARN_ON(!info))
+               return;
  
-                       if (!domain_type_is_vm_or_si(old_domain) &&
-                            list_empty(&old_domain->devices))
-                               domain_exit(old_domain);
+       domain->auxd_refcnt++;
+       list_add(&domain->auxd, &info->auxiliary_domains);
+}
+
+static void auxiliary_unlink_device(struct dmar_domain *domain,
+                                   struct device *dev)
+{
+       struct device_domain_info *info = dev->archdata.iommu;
+
+       assert_spin_locked(&device_domain_lock);
+       if (WARN_ON(!info))
+               return;
+
+       list_del(&domain->auxd);
+       domain->auxd_refcnt--;
+
+       if (!domain->auxd_refcnt && domain->default_pasid > 0)
+               intel_pasid_free_id(domain->default_pasid);
+}
+
+static int aux_domain_add_dev(struct dmar_domain *domain,
+                             struct device *dev)
+{
+       int ret;
+       u8 bus, devfn;
+       unsigned long flags;
+       struct intel_iommu *iommu;
+
+       iommu = device_to_iommu(dev, &bus, &devfn);
+       if (!iommu)
+               return -ENODEV;
+
+       if (domain->default_pasid <= 0) {
+               int pasid;
+
+               pasid = intel_pasid_alloc_id(domain, PASID_MIN,
+                                            pci_max_pasids(to_pci_dev(dev)),
+                                            GFP_KERNEL);
+               if (pasid <= 0) {
+                       pr_err("Can't allocate default pasid\n");
+                       return -ENODEV;
                 }
+               domain->default_pasid = pasid;
         }
  
+       spin_lock_irqsave(&device_domain_lock, flags);
+       /*
+        * iommu->lock must be held to attach domain to iommu and setup the
+        * pasid entry for second level translation.
+        */
+       spin_lock(&iommu->lock);
+       ret = domain_attach_iommu(domain, iommu);
+       if (ret)
+               goto attach_failed;
+
+       /* Setup the PASID entry for mediated devices: */
+       ret = intel_pasid_setup_second_level(iommu, domain, dev,
+                                            domain->default_pasid);
+       if (ret)
+               goto table_failed;
+       spin_unlock(&iommu->lock);
+
+       auxiliary_link_device(domain, dev);
+
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+
+       return 0;
+
+table_failed:
+       domain_detach_iommu(domain, iommu);
+attach_failed:
+       spin_unlock(&iommu->lock);
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+       if (!domain->auxd_refcnt && domain->default_pasid > 0)
+               intel_pasid_free_id(domain->default_pasid);
+
+       return ret;
+}
+
+static void aux_domain_remove_dev(struct dmar_domain *domain,
+                                 struct device *dev)
+{
+       struct device_domain_info *info;
+       struct intel_iommu *iommu;
+       unsigned long flags;
+
+       if (!is_aux_domain(dev, &domain->domain))
+               return;
+
+       spin_lock_irqsave(&device_domain_lock, flags);
+       info = dev->archdata.iommu;
+       iommu = info->iommu;
+
+       auxiliary_unlink_device(domain, dev);
+
+       spin_lock(&iommu->lock);
+       intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
+       domain_detach_iommu(domain, iommu);
+       spin_unlock(&iommu->lock);
+
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+static int prepare_domain_attach_device(struct iommu_domain *domain,
+                                       struct device *dev)
+{
+       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+       struct intel_iommu *iommu;
+       int addr_width;
+       u8 bus, devfn;
+
         iommu = device_to_iommu(dev, &bus, &devfn);
         if (!iommu)
                 return -ENODEV;
@@ -5126,7 +5232,58 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
                 dmar_domain->agaw--;
         }
  
-       return domain_add_dev_info(dmar_domain, dev);
+       return 0;
+}
+
+static int intel_iommu_attach_device(struct iommu_domain *domain,
+                                    struct device *dev)
+{
+       int ret;
+
+       if (device_is_rmrr_locked(dev)) {
+               dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
+               return -EPERM;
+       }
+
+       if (is_aux_domain(dev, domain))
+               return -EPERM;
+
+       /* normally dev is not mapped */
+       if (unlikely(domain_context_mapped(dev))) {
+               struct dmar_domain *old_domain;
+
+               old_domain = find_domain(dev);
+               if (old_domain) {
+                       rcu_read_lock();
+                       dmar_remove_one_dev_info(dev);
+                       rcu_read_unlock();
+
+                       if (!domain_type_is_vm_or_si(old_domain) &&
+                           list_empty(&old_domain->devices))
+                               domain_exit(old_domain);
+               }
+       }
+
+       ret = prepare_domain_attach_device(domain, dev);
+       if (ret)
+               return ret;
+
+       return domain_add_dev_info(to_dmar_domain(domain), dev);
+}
+
+static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
+                                        struct device *dev)
+{
+       int ret;
+
+       if (!is_aux_domain(dev, domain))
+               return -EPERM;
+
+       ret = prepare_domain_attach_device(domain, dev);
+       if (ret)
+               return ret;
+
+       return aux_domain_add_dev(to_dmar_domain(domain), dev);
  }
  
  static void intel_iommu_detach_device(struct iommu_domain *domain,
@@ -5135,6 +5292,12 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
         dmar_remove_one_dev_info(dev);
  }
  
+static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
+                                         struct device *dev)
+{
+       aux_domain_remove_dev(to_dmar_domain(domain), dev);
+}
+
  static int intel_iommu_map(struct iommu_domain *domain,
                            unsigned long iova, phys_addr_t hpa,
                            size_t size, int iommu_prot)
@@ -5223,6 +5386,42 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
         return phys;
  }
  
+static inline bool scalable_mode_support(void)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       bool ret = true;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               if (!sm_supported(iommu)) {
+                       ret = false;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static inline bool iommu_pasid_support(void)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       bool ret = true;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               if (!pasid_supported(iommu)) {
+                       ret = false;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
  static bool intel_iommu_capable(enum iommu_cap cap)
  {
         if (cap == IOMMU_CAP_CACHE_COHERENCY)
@@ -5307,8 +5506,7 @@ static void intel_iommu_put_resv_regions(struct device *dev,
         }
  }
  
-#ifdef CONFIG_INTEL_IOMMU_SVM
-int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
+int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
  {
         struct device_domain_info *info;
         struct context_entry *context;
@@ -5317,7 +5515,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
         u64 ctx_lo;
         int ret;
  
-       domain = get_valid_domain_for_dev(sdev->dev);
+       domain = get_valid_domain_for_dev(dev);
         if (!domain)
                 return -EINVAL;
  
@@ -5325,7 +5523,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
         spin_lock(&iommu->lock);
  
         ret = -EINVAL;
-       info = sdev->dev->archdata.iommu;
+       info = dev->archdata.iommu;
         if (!info || !info->pasid_supported)
                 goto out;
  
@@ -5335,14 +5533,13 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
  
         ctx_lo = context[0].lo;
  
-       sdev->did = FLPT_DEFAULT_DID;
-       sdev->sid = PCI_DEVID(info->bus, info->devfn);
-
         if (!(ctx_lo & CONTEXT_PASIDE)) {
                 ctx_lo |= CONTEXT_PASIDE;
                 context[0].lo = ctx_lo;
                 wmb();
-               iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
+               iommu->flush.flush_context(iommu,
+                                          domain->iommu_did[iommu->seq_id],
+                                          PCI_DEVID(info->bus, info->devfn),
                                            DMA_CCMD_MASK_NOBIT,
                                            DMA_CCMD_DEVICE_INVL);
         }
@@ -5351,12 +5548,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
         if (!info->pasid_enabled)
                 iommu_enable_dev_iotlb(info);
  
-       if (info->ats_enabled) {
-               sdev->dev_iotlb = 1;
-               sdev->qdep = info->ats_qdep;
-               if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
-                       sdev->qdep = 0;
-       }
         ret = 0;
  
   out:
@@ -5366,6 +5557,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
         return ret;
  }
  
+#ifdef CONFIG_INTEL_IOMMU_SVM
  struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
  {
         struct intel_iommu *iommu;
@@ -5387,12 +5579,142 @@ struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
  }
  #endif /* CONFIG_INTEL_IOMMU_SVM */
  
+static int intel_iommu_enable_auxd(struct device *dev)
+{
+       struct device_domain_info *info;
+       struct intel_iommu *iommu;
+       unsigned long flags;
+       u8 bus, devfn;
+       int ret;
+
+       iommu = device_to_iommu(dev, &bus, &devfn);
+       if (!iommu || dmar_disabled)
+               return -EINVAL;
+
+       if (!sm_supported(iommu) || !pasid_supported(iommu))
+               return -EINVAL;
+
+       ret = intel_iommu_enable_pasid(iommu, dev);
+       if (ret)
+               return -ENODEV;
+
+       spin_lock_irqsave(&device_domain_lock, flags);
+       info = dev->archdata.iommu;
+       info->auxd_enabled = 1;
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+
+       return 0;
+}
+
+static int intel_iommu_disable_auxd(struct device *dev)
+{
+       struct device_domain_info *info;
+       unsigned long flags;
+
+       spin_lock_irqsave(&device_domain_lock, flags);
+       info = dev->archdata.iommu;
+       if (!WARN_ON(!info))
+               info->auxd_enabled = 0;
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+
+       return 0;
+}
+
+/*
+ * A PCI express designated vendor specific extended capability is defined
+ * in the section 3.7 of Intel scalable I/O virtualization technical spec
+ * for system software and tools to detect endpoint devices supporting the
+ * Intel scalable IO virtualization without host driver dependency.
+ *
+ * Returns the address of the matching extended capability structure within
+ * the device's PCI configuration space or 0 if the device does not support
+ * it.
+ */
+static int siov_find_pci_dvsec(struct pci_dev *pdev)
+{
+       int pos;
+       u16 vendor, id;
+
+       pos = pci_find_next_ext_capability(pdev, 0, 0x23);
+       while (pos) {
+               pci_read_config_word(pdev, pos + 4, &vendor);
+               pci_read_config_word(pdev, pos + 8, &id);
+               if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
+                       return pos;
+
+               pos = pci_find_next_ext_capability(pdev, pos, 0x23);
+       }
+
+       return 0;
+}
+
+static bool
+intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
+{
+       if (feat == IOMMU_DEV_FEAT_AUX) {
+               int ret;
+
+               if (!dev_is_pci(dev) || dmar_disabled ||
+                   !scalable_mode_support() || !iommu_pasid_support())
+                       return false;
+
+               ret = pci_pasid_features(to_pci_dev(dev));
+               if (ret < 0)
+                       return false;
+
+               return !!siov_find_pci_dvsec(to_pci_dev(dev));
+       }
+
+       return false;
+}
+
+static int
+intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
+{
+       if (feat == IOMMU_DEV_FEAT_AUX)
+               return intel_iommu_enable_auxd(dev);
+
+       return -ENODEV;
+}
+
+static int
+intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
+{
+       if (feat == IOMMU_DEV_FEAT_AUX)
+               return intel_iommu_disable_auxd(dev);
+
+       return -ENODEV;
+}
+
+static bool
+intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
+{
+       struct device_domain_info *info = dev->archdata.iommu;
+
+       if (feat == IOMMU_DEV_FEAT_AUX)
+               return scalable_mode_support() && info && info->auxd_enabled;
+
+       return false;
+}
+
+static int
+intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
+{
+       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+       return dmar_domain->default_pasid > 0 ?
+                       dmar_domain->default_pasid : -EINVAL;
+}
+
  const struct iommu_ops intel_iommu_ops = {
         .capable                = intel_iommu_capable,
         .domain_alloc           = intel_iommu_domain_alloc,
         .domain_free            = intel_iommu_domain_free,
         .attach_dev             = intel_iommu_attach_device,
         .detach_dev             = intel_iommu_detach_device,
+       .aux_attach_dev         = intel_iommu_aux_attach_device,
+       .aux_detach_dev         = intel_iommu_aux_detach_device,
+       .aux_get_pasid          = intel_iommu_aux_get_pasid,
         .map                    = intel_iommu_map,
         .unmap                  = intel_iommu_unmap,
         .iova_to_phys           = intel_iommu_iova_to_phys,
@@ -5401,6 +5723,10 @@ const struct iommu_ops intel_iommu_ops = {
         .get_resv_regions       = intel_iommu_get_resv_regions,
         .put_resv_regions       = intel_iommu_put_resv_regions,
         .device_group           = pci_device_group,
+       .dev_has_feat           = intel_iommu_dev_has_feat,
+       .dev_feat_enabled       = intel_iommu_dev_feat_enabled,
+       .dev_enable_feat        = intel_iommu_dev_enable_feat,
+       .dev_disable_feat       = intel_iommu_dev_disable_feat,
         .pgsize_bitmap          = INTEL_IOMMU_PGSIZES,
  };
  
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c

index 03b12d2ee2132fe624eeed719dc486d89279f2e4..2fefeafda437b91541f09a3c5618754516fc96b6 100644 (file)
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -154,8 +154,10 @@ int intel_pasid_alloc_table(struct device *dev)
         order = size ? get_order(size) : 0;
         pages = alloc_pages_node(info->iommu->node,
                                  GFP_KERNEL | __GFP_ZERO, order);
-       if (!pages)
+       if (!pages) {
+               kfree(pasid_table);
                 return -ENOMEM;
+       }
  
         pasid_table->table = page_address(pages);
         pasid_table->order = order;
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c

index 3a4b09ae8561d965a6ae709b766bd4fe5862c852..8f87304f915c9b7ce264a8b6af279b5a7215952d 100644 (file)
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -228,6 +228,7 @@ static LIST_HEAD(global_svm_list);
  int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
  {
         struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+       struct device_domain_info *info;
         struct intel_svm_dev *sdev;
         struct intel_svm *svm = NULL;
         struct mm_struct *mm = NULL;
@@ -291,13 +292,29 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
         }
         sdev->dev = dev;
  
-       ret = intel_iommu_enable_pasid(iommu, sdev);
+       ret = intel_iommu_enable_pasid(iommu, dev);
         if (ret || !pasid) {
                 /* If they don't actually want to assign a PASID, this is
                  * just an enabling check/preparation. */
                 kfree(sdev);
                 goto out;
         }
+
+       info = dev->archdata.iommu;
+       if (!info || !info->pasid_supported) {
+               kfree(sdev);
+               goto out;
+       }
+
+       sdev->did = FLPT_DEFAULT_DID;
+       sdev->sid = PCI_DEVID(info->bus, info->devfn);
+       if (info->ats_enabled) {
+               sdev->dev_iotlb = 1;
+               sdev->qdep = info->ats_qdep;
+               if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
+                       sdev->qdep = 0;
+       }
+
         /* Finish the setup now we know we're keeping it */
         sdev->users = 1;
         sdev->ops = ops;
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c

index 2d74641b7f7bc5aa51a524e9a018e84cdb8fac9a..634d8f05901945251018d01a00848c45b25aac83 100644 (file)
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -548,8 +548,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
                 goto out_free_table;
         }
  
-       bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES),
-                        sizeof(long), GFP_ATOMIC);
+       bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_ATOMIC);
         if (bitmap == NULL) {
                 pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
                 goto out_free_pages;
@@ -616,7 +615,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
         return 0;
  
  out_free_bitmap:
-       kfree(bitmap);
+       bitmap_free(bitmap);
  out_free_pages:
         __free_pages(pages, INTR_REMAP_PAGE_ORDER);
  out_free_table:
@@ -640,7 +639,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
                 }
                 free_pages((unsigned long)iommu->ir_table->base,
                            INTR_REMAP_PAGE_ORDER);
-               kfree(iommu->ir_table->bitmap);
+               bitmap_free(iommu->ir_table->bitmap);
                 kfree(iommu->ir_table);
                 iommu->ir_table = NULL;
         }
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c

index 109de67d5d727c227d3970b2879edd60d6478357..67ee6623f9b2a4d07dcae56dfb99fcf6e4edbb0d 100644 (file)
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -45,10 +45,6 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
  #endif
  static bool iommu_dma_strict __read_mostly = true;
  
-struct iommu_callback_data {
-       const struct iommu_ops *ops;
-};
-
  struct iommu_group {
         struct kobject kobj;
         struct kobject *devices_kobj;
@@ -1217,9 +1213,6 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
  {
         int err;
         struct notifier_block *nb;
-       struct iommu_callback_data cb = {
-               .ops = ops,
-       };
  
         nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
         if (!nb)
@@ -1231,7 +1224,7 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
         if (err)
                 goto out_free;
  
-       err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
+       err = bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
         if (err)
                 goto out_err;
  
@@ -1240,7 +1233,7 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
  
  out_err:
         /* Clean up */
-       bus_for_each_dev(bus, NULL, &cb, remove_iommu_group);
+       bus_for_each_dev(bus, NULL, NULL, remove_iommu_group);
         bus_unregister_notifier(bus, nb);
  
  out_free:
@@ -2039,3 +2032,203 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
         return 0;
  }
  EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
+
+/*
+ * Per device IOMMU features.
+ */
+bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat)
+{
+       const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+       if (ops && ops->dev_has_feat)
+               return ops->dev_has_feat(dev, feat);
+
+       return false;
+}
+EXPORT_SYMBOL_GPL(iommu_dev_has_feature);
+
+int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
+{
+       const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+       if (ops && ops->dev_enable_feat)
+               return ops->dev_enable_feat(dev, feat);
+
+       return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
+
+/*
+ * The device drivers should do the necessary cleanups before calling this.
+ * For example, before disabling the aux-domain feature, the device driver
+ * should detach all aux-domains. Otherwise, this will return -EBUSY.
+ */
+int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
+{
+       const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+       if (ops && ops->dev_disable_feat)
+               return ops->dev_disable_feat(dev, feat);
+
+       return -EBUSY;
+}
+EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
+
+bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
+{
+       const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+       if (ops && ops->dev_feat_enabled)
+               return ops->dev_feat_enabled(dev, feat);
+
+       return false;
+}
+EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
+
+/*
+ * Aux-domain specific attach/detach.
+ *
+ * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns
+ * true. Also, as long as domains are attached to a device through this
+ * interface, any tries to call iommu_attach_device() should fail
+ * (iommu_detach_device() can't fail, so we fail when trying to re-attach).
+ * This should make us safe against a device being attached to a guest as a
+ * whole while there are still pasid users on it (aux and sva).
+ */
+int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev)
+{
+       int ret = -ENODEV;
+
+       if (domain->ops->aux_attach_dev)
+               ret = domain->ops->aux_attach_dev(domain, dev);
+
+       if (!ret)
+               trace_attach_device_to_domain(dev);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_aux_attach_device);
+
+void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev)
+{
+       if (domain->ops->aux_detach_dev) {
+               domain->ops->aux_detach_dev(domain, dev);
+               trace_detach_device_from_domain(dev);
+       }
+}
+EXPORT_SYMBOL_GPL(iommu_aux_detach_device);
+
+int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
+{
+       int ret = -ENODEV;
+
+       if (domain->ops->aux_get_pasid)
+               ret = domain->ops->aux_get_pasid(domain, dev);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
+
+/**
+ * iommu_sva_bind_device() - Bind a process address space to a device
+ * @dev: the device
+ * @mm: the mm to bind, caller must hold a reference to it
+ *
+ * Create a bond between device and address space, allowing the device to access
+ * the mm using the returned PASID. If a bond already exists between @device and
+ * @mm, it is returned and an additional reference is taken. Caller must call
+ * iommu_sva_unbind_device() to release each reference.
+ *
+ * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to
+ * initialize the required SVA features.
+ *
+ * On error, returns an ERR_PTR value.
+ */
+struct iommu_sva *
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
+{
+       struct iommu_group *group;
+       struct iommu_sva *handle = ERR_PTR(-EINVAL);
+       const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+       if (!ops || !ops->sva_bind)
+               return ERR_PTR(-ENODEV);
+
+       group = iommu_group_get(dev);
+       if (!group)
+               return ERR_PTR(-ENODEV);
+
+       /* Ensure device count and domain don't change while we're binding */
+       mutex_lock(&group->mutex);
+
+       /*
+        * To keep things simple, SVA currently doesn't support IOMMU groups
+        * with more than one device. Existing SVA-capable systems are not
+        * affected by the problems that required IOMMU groups (lack of ACS
+        * isolation, device ID aliasing and other hardware issues).
+        */
+       if (iommu_group_device_count(group) != 1)
+               goto out_unlock;
+
+       handle = ops->sva_bind(dev, mm, drvdata);
+
+out_unlock:
+       mutex_unlock(&group->mutex);
+       iommu_group_put(group);
+
+       return handle;
+}
+EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
+
+/**
+ * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device
+ * @handle: the handle returned by iommu_sva_bind_device()
+ *
+ * Put reference to a bond between device and address space. The device should
+ * not be issuing any more transaction for this PASID. All outstanding page
+ * requests for this PASID must have been flushed to the IOMMU.
+ *
+ * Returns 0 on success, or an error value
+ */
+void iommu_sva_unbind_device(struct iommu_sva *handle)
+{
+       struct iommu_group *group;
+       struct device *dev = handle->dev;
+       const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+       if (!ops || !ops->sva_unbind)
+               return;
+
+       group = iommu_group_get(dev);
+       if (!group)
+               return;
+
+       mutex_lock(&group->mutex);
+       ops->sva_unbind(handle);
+       mutex_unlock(&group->mutex);
+
+       iommu_group_put(group);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
+
+int iommu_sva_set_ops(struct iommu_sva *handle,
+                     const struct iommu_sva_ops *sva_ops)
+{
+       if (handle->ops && handle->ops != sva_ops)
+               return -EEXIST;
+
+       handle->ops = sva_ops;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_sva_set_ops);
+
+int iommu_sva_get_pasid(struct iommu_sva *handle)
+{
+       const struct iommu_ops *ops = handle->dev->bus->iommu_ops;
+
+       if (!ops || !ops->sva_get_pasid)
+               return IOMMU_PASID_INVALID;
+
+       return ops->sva_get_pasid(handle);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c

index 5182c7d6171e1a3f569bd765365f4c49326211d1..463ee08f7d3a5956f91d07aed7cf82a01b3ee7fd 100644 (file)
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -102,7 +102,6 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
  #define  SMMU_TLB_FLUSH_VA_MATCH_ALL     (0 << 0)
  #define  SMMU_TLB_FLUSH_VA_MATCH_SECTION (2 << 0)
  #define  SMMU_TLB_FLUSH_VA_MATCH_GROUP   (3 << 0)
-#define  SMMU_TLB_FLUSH_ASID(x)          (((x) & 0x7f) << 24)
  #define  SMMU_TLB_FLUSH_VA_SECTION(addr) ((((addr) & 0xffc00000) >> 12) | \
                                           SMMU_TLB_FLUSH_VA_MATCH_SECTION)
  #define  SMMU_TLB_FLUSH_VA_GROUP(addr)   ((((addr) & 0xffffc000) >> 12) | \
@@ -146,8 +145,6 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
  
  #define SMMU_PDE_ATTR          (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
                                  SMMU_PDE_NONSECURE)
-#define SMMU_PTE_ATTR          (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
-                                SMMU_PTE_NONSECURE)
  
  static unsigned int iova_pd_index(unsigned long iova)
  {
@@ -205,8 +202,12 @@ static inline void smmu_flush_tlb_asid(struct tegra_smmu *smmu,
  {
         u32 value;
  
-       value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) |
-               SMMU_TLB_FLUSH_VA_MATCH_ALL;
+       if (smmu->soc->num_asids == 4)
+               value = (asid & 0x3) << 29;
+       else
+               value = (asid & 0x7f) << 24;
+
+       value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_MATCH_ALL;
         smmu_writel(smmu, value, SMMU_TLB_FLUSH);
  }
  
@@ -216,8 +217,12 @@ static inline void smmu_flush_tlb_section(struct tegra_smmu *smmu,
  {
         u32 value;
  
-       value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) |
-               SMMU_TLB_FLUSH_VA_SECTION(iova);
+       if (smmu->soc->num_asids == 4)
+               value = (asid & 0x3) << 29;
+       else
+               value = (asid & 0x7f) << 24;
+
+       value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_SECTION(iova);
         smmu_writel(smmu, value, SMMU_TLB_FLUSH);
  }
  
@@ -227,8 +232,12 @@ static inline void smmu_flush_tlb_group(struct tegra_smmu *smmu,
  {
         u32 value;
  
-       value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) |
-               SMMU_TLB_FLUSH_VA_GROUP(iova);
+       if (smmu->soc->num_asids == 4)
+               value = (asid & 0x3) << 29;
+       else
+               value = (asid & 0x7f) << 24;
+
+       value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_GROUP(iova);
         smmu_writel(smmu, value, SMMU_TLB_FLUSH);
  }
  
@@ -316,6 +325,9 @@ static void tegra_smmu_domain_free(struct iommu_domain *domain)
  
         /* TODO: free page directory and page tables */
  
+       WARN_ON_ONCE(as->use_count);
+       kfree(as->count);
+       kfree(as->pts);
         kfree(as);
  }
  
@@ -645,6 +657,7 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
  {
         struct tegra_smmu_as *as = to_smmu_as(domain);
         dma_addr_t pte_dma;
+       u32 pte_attrs;
         u32 *pte;
  
         pte = as_get_pte(as, iova, &pte_dma);
@@ -655,8 +668,16 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
         if (*pte == 0)
                 tegra_smmu_pte_get_use(as, iova);
  
+       pte_attrs = SMMU_PTE_NONSECURE;
+
+       if (prot & IOMMU_READ)
+               pte_attrs |= SMMU_PTE_READABLE;
+
+       if (prot & IOMMU_WRITE)
+               pte_attrs |= SMMU_PTE_WRITABLE;
+
         tegra_smmu_set_pte(as, iova, pte, pte_dma,
-                          __phys_to_pfn(paddr) | SMMU_PTE_ATTR);
+                          __phys_to_pfn(paddr) | pte_attrs);
  
         return 0;
  }
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c

index aa729078463601464e0c0c90d0c319c83f82214a..0390603170b405862ac63b35310f64e3d5743b7a 100644 (file)
--- a/drivers/irqchip/irq-ath79-misc.c
+++ b/drivers/irqchip/irq-ath79-misc.c
@@ -22,6 +22,15 @@
  #define AR71XX_RESET_REG_MISC_INT_ENABLE       4
  
  #define ATH79_MISC_IRQ_COUNT                   32
+#define ATH79_MISC_PERF_IRQ                    5
+
+static int ath79_perfcount_irq;
+
+int get_c0_perfcount_int(void)
+{
+       return ath79_perfcount_irq;
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
  
  static void ath79_misc_irq_handler(struct irq_desc *desc)
  {
@@ -113,6 +122,8 @@ static void __init ath79_misc_intc_domain_init(
  {
         void __iomem *base = domain->host_data;
  
+       ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ);
+
         /* Disable and clear all interrupts */
         __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
         __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c

index 9e07b469066a4bbc41119e0801d716a25dce9a55..156fbc5601ca3ece9b07c7ab14314ff4883127f7 100644 (file)
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1721,7 +1721,7 @@ static void atl1_inc_smb(struct atl1_adapter *adapter)
         adapter->soft_stats.scc += smb->tx_1_col;
         adapter->soft_stats.mcc += smb->tx_2_col;
         adapter->soft_stats.latecol += smb->tx_late_col;
-       adapter->soft_stats.tx_underun += smb->tx_underrun;
+       adapter->soft_stats.tx_underrun += smb->tx_underrun;
         adapter->soft_stats.tx_trunc += smb->tx_trunc;
         adapter->soft_stats.tx_pause += smb->tx_pause;
  
@@ -3179,7 +3179,7 @@ static struct atl1_stats atl1_gstrings_stats[] = {
         {"tx_deferred_ok", ATL1_STAT(soft_stats.deffer)},
         {"tx_single_coll_ok", ATL1_STAT(soft_stats.scc)},
         {"tx_multi_coll_ok", ATL1_STAT(soft_stats.mcc)},
-       {"tx_underun", ATL1_STAT(soft_stats.tx_underun)},
+       {"tx_underrun", ATL1_STAT(soft_stats.tx_underrun)},
         {"tx_trunc", ATL1_STAT(soft_stats.tx_trunc)},
         {"tx_pause", ATL1_STAT(soft_stats.tx_pause)},
         {"rx_pause", ATL1_STAT(soft_stats.rx_pause)},
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.h b/drivers/net/ethernet/atheros/atlx/atl1.h

index 34a58cd846a05929f5c22dfa46619f6240e85e5e..eacff19ea05b820bec71fdba222b7a7e263141db 100644 (file)
--- a/drivers/net/ethernet/atheros/atlx/atl1.h
+++ b/drivers/net/ethernet/atheros/atlx/atl1.h
@@ -681,7 +681,7 @@ struct atl1_sft_stats {
         u64 scc;                /* packets TX after a single collision */
         u64 mcc;                /* packets TX after multiple collisions */
         u64 latecol;            /* TX packets w/ late collisions */
-       u64 tx_underun;         /* TX packets aborted due to TX FIFO underrun
+       u64 tx_underrun;        /* TX packets aborted due to TX FIFO underrun
                                  * or TRD FIFO underrun */
         u64 tx_trunc;           /* TX packets truncated due to size > MTU */
         u64 rx_pause;           /* num Pause packets received. */
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c

index d99317b3d891b0a608aafef51352eea6ba58fbb2..98da0fa27192ddbab7c04651854b0fd94baa6b2a 100644 (file)
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -553,7 +553,7 @@ static void atl2_intr_tx(struct atl2_adapter *adapter)
                         netdev->stats.tx_aborted_errors++;
                 if (txs->late_col)
                         netdev->stats.tx_window_errors++;
-               if (txs->underun)
+               if (txs->underrun)
                         netdev->stats.tx_fifo_errors++;
         } while (1);
  
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.h b/drivers/net/ethernet/atheros/atlx/atl2.h

index c64a6bdfa7ae4927da9dc3c38ac4ca5b8956354e..25ec84cb48535b1a7dc0180e176af77aabf0bf03 100644 (file)
--- a/drivers/net/ethernet/atheros/atlx/atl2.h
+++ b/drivers/net/ethernet/atheros/atlx/atl2.h
@@ -260,7 +260,7 @@ struct tx_pkt_status {
         unsigned multi_col:1;
         unsigned late_col:1;
         unsigned abort_col:1;
-       unsigned underun:1;     /* current packet is aborted
+       unsigned underrun:1;    /* current packet is aborted
                                  * due to txram underrun */
         unsigned:3;             /* reserved */
         unsigned update:1;      /* always 1'b1 in tx_status_buf */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

index 03b2a9f9c5895af92bcefad0b3525757aa0191c1..cad34d6f5f451b1bfdf4b363ef25cbe50cab9fcd 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -33,6 +33,26 @@
  #include <linux/bpf_trace.h>
  #include "en/xdp.h"
  
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+{
+       int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+
+       /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+        * The condition checked in mlx5e_rx_is_linear_skb is:
+        *   SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE         (1)
+        *   (Note that hw_mtu == sw_mtu + hard_mtu.)
+        * What is returned from this function is:
+        *   max_mtu = PAGE_SIZE - S - hr - hard_mtu                         (2)
+        * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+        * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+        * because both PAGE_SIZE and S are already aligned. Any number greater
+        * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+        * so max_mtu is the maximum MTU allowed.
+        */
+
+       return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
  static inline bool
  mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
                     struct xdp_buff *xdp)
@@ -304,9 +324,9 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
                                         mlx5e_xdpi_fifo_pop(xdpi_fifo);
  
                                 if (is_redirect) {
-                                       xdp_return_frame(xdpi.xdpf);
                                         dma_unmap_single(sq->pdev, xdpi.dma_addr,
                                                          xdpi.xdpf->len, DMA_TO_DEVICE);
+                                       xdp_return_frame(xdpi.xdpf);
                                 } else {
                                         /* Recycle RX page */
                                         mlx5e_page_release(rq, &xdpi.di, true);
@@ -345,9 +365,9 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
                                 mlx5e_xdpi_fifo_pop(xdpi_fifo);
  
                         if (is_redirect) {
-                               xdp_return_frame(xdpi.xdpf);
                                 dma_unmap_single(sq->pdev, xdpi.dma_addr,
                                                  xdpi.xdpf->len, DMA_TO_DEVICE);
+                               xdp_return_frame(xdpi.xdpf);
                         } else {
                                 /* Recycle RX page */
                                 mlx5e_page_release(rq, &xdpi.di, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h

index ee27a7c8cd87d5121361f22344b53a21a7fb408b..553956cadc8a00d6bed384aa6cdad86759d5b2dd 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -34,13 +34,12 @@
  
  #include "en.h"
  
-#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
-                                MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
  #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
  #define MLX5E_XDP_TX_EMPTY_DS_COUNT \
         (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
  #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
  
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
  bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
                       void *va, u16 *rx_headroom, u32 *len);
  bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

index 76a3d01a489e00832ee5ff45e2442dbef60b6d6d..78dc8fe2a83c3499d290ceffcb273ebcd7ef1932 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1586,7 +1586,7 @@ static int mlx5e_get_module_info(struct net_device *netdev,
                 break;
         case MLX5_MODULE_ID_SFP:
                 modinfo->type       = ETH_MODULE_SFF_8472;
-               modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+               modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH;
                 break;
         default:
                 netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index f7eb521db580001f48ee021878f5e8d477ad4dbc..46157e2a1e5ac36121f8ec96f9f5a09417b5fa67 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3777,7 +3777,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
         if (params->xdp_prog &&
             !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
                 netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
-                          new_mtu, MLX5E_XDP_MAX_MTU);
+                          new_mtu, mlx5e_xdp_max_mtu(params));
                 err = -EINVAL;
                 goto out;
         }
@@ -4212,7 +4212,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
  
         if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
                 netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
-                           new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU);
+                           new_channels.params.sw_mtu,
+                           mlx5e_xdp_max_mtu(&new_channels.params));
                 return -EINVAL;
         }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c

index 21b7f05b16a5f6053a88c1cdb9067c0f9e26ea10..361468e0435dcc9fbb667716e483c4104ebf7fea 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -317,10 +317,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
                 size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
  
         i2c_addr = MLX5_I2C_ADDR_LOW;
-       if (offset >= MLX5_EEPROM_PAGE_LENGTH) {
-               i2c_addr = MLX5_I2C_ADDR_HIGH;
-               offset -= MLX5_EEPROM_PAGE_LENGTH;
-       }
  
         MLX5_SET(mcia_reg, in, l, 0);
         MLX5_SET(mcia_reg, in, module, module_num);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h

index ffee38e36ce8995348f776bbdbb8e4601b36b223..8648ca1712543abf8e6ef7bcabfae895c4009465 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -27,7 +27,7 @@
  
  #define MLXSW_PCI_SW_RESET                     0xF0010
  #define MLXSW_PCI_SW_RESET_RST_BIT             BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       13000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       20000
  #define MLXSW_PCI_SW_RESET_WAIT_MSECS          100
  #define MLXSW_PCI_FW_READY                     0xA1844
  #define MLXSW_PCI_FW_READY_MASK                        0xFFFF
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c

index 9eb63300c1d3a712d6377ea2835c5d64f2b32678..6b8aa3761899b03e7c9211b7d3272a1027a57371 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3126,11 +3126,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
         if (err)
                 return err;
  
+       mlxsw_sp_port->link.autoneg = autoneg;
+
         if (!netif_running(dev))
                 return 0;
  
-       mlxsw_sp_port->link.autoneg = autoneg;
-
         mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
         mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
  
@@ -3316,7 +3316,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
                 err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
                                             MLXSW_REG_QEEC_HIERARCY_TC,
                                             i + 8, i,
-                                           false, 0);
+                                           true, 100);
                 if (err)
                         return err;
         }
diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c

index 9852080cf45483c49db22663c7f8caa1f7fe5e3b..ff391308566525cd613acc3a733130b41e7246a9 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/abm/cls.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c
@@ -39,7 +39,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode,
         }
         if (knode->sel->off || knode->sel->offshift || knode->sel->offmask ||
             knode->sel->offoff || knode->fshift) {
-               NL_SET_ERR_MSG_MOD(extack, "variable offseting not supported");
+               NL_SET_ERR_MSG_MOD(extack, "variable offsetting not supported");
                 return false;
         }
         if (knode->sel->hoff || knode->sel->hmask) {
@@ -78,7 +78,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode,
  
         k = &knode->sel->keys[0];
         if (k->offmask) {
-               NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offseting not supported");
+               NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offsetting not supported");
                 return false;
         }
         if (k->off) {
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c

index a18149720aa2eadcd5ba9690bad3d568f5aeb812..cba5881b2746a36da1b710c5be14ed8ba5a7c080 100644 (file)
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -673,7 +673,8 @@ static void netsec_process_tx(struct netsec_priv *priv)
  }
  
  static void *netsec_alloc_rx_data(struct netsec_priv *priv,
-                                 dma_addr_t *dma_handle, u16 *desc_len)
+                                 dma_addr_t *dma_handle, u16 *desc_len,
+                                 bool napi)
  {
         size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
         size_t payload_len = NETSEC_RX_BUF_SZ;
@@ -682,7 +683,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
  
         total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD);
  
-       buf = napi_alloc_frag(total_len);
+       buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len);
         if (!buf)
                 return NULL;
  
@@ -765,7 +766,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
                 /* allocate a fresh buffer and map it to the hardware.
                  * This will eventually replace the old buffer in the hardware
                  */
-               buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len);
+               buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len,
+                                               true);
                 if (unlikely(!buf_addr))
                         break;
  
@@ -1069,7 +1071,8 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
                 void *buf;
                 u16 len;
  
-               buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
+               buf = netsec_alloc_rx_data(priv, &dma_handle, &len,
+                                          false);
                 if (!buf) {
                         netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
                         goto err_out;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c

index b7dd4e3c760d82da1439fb0d8dd9d418c34256a2..6d690678c20e11bf8594729524fafa238bb1c4bb 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -140,7 +140,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
         p->des0 |= cpu_to_le32(RDES0_OWN);
  
         bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1);
-       p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK);
+       p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK);
  
         if (mode == STMMAC_CHAIN_MODE)
                 ndesc_rx_set_on_chain(p, end);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

index a26e36dbb5df0deff58ef23a18df287c0ceef330..48712437d0da8039e74c8cc34d8063ac666c03a8 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2616,8 +2616,6 @@ static int stmmac_open(struct net_device *dev)
         u32 chan;
         int ret;
  
-       stmmac_check_ether_addr(priv);
-
         if (priv->hw->pcs != STMMAC_PCS_RGMII &&
             priv->hw->pcs != STMMAC_PCS_TBI &&
             priv->hw->pcs != STMMAC_PCS_RTBI) {
@@ -4303,6 +4301,8 @@ int stmmac_dvr_probe(struct device *device,
         if (ret)
                 goto error_hw_init;
  
+       stmmac_check_ether_addr(priv);
+
         /* Configure real RX and TX queues */
         netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
         netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c

index d819e8eaba1225dc5e9b188e42636721cc66a4c0..cc1e887e47b50f31bba7a53e8f146d9ac7fb4fa7 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -159,6 +159,12 @@ static const struct dmi_system_id quark_pci_dmi[] = {
                 },
                 .driver_data = (void *)&galileo_stmmac_dmi_data,
         },
+       /*
+        * There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040.
+        * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which
+        * has only one pci network device while other asset tags are
+        * for IOT2040 which has two.
+        */
         {
                 .matches = {
                         DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
@@ -170,8 +176,6 @@ static const struct dmi_system_id quark_pci_dmi[] = {
         {
                 .matches = {
                         DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
-                       DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG,
-                                       "6ES7647-0AA00-1YA2"),
                 },
                 .driver_data = (void *)&iot2040_stmmac_dmi_data,
         },
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c

index 92b64e254b44ed764d7db9039c949cbdb6d66597..7475cef17cf76ca09e59b6987b82010d6ce1076f 100644 (file)
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -159,6 +159,14 @@ static const struct spi_device_id ks8995_id[] = {
  };
  MODULE_DEVICE_TABLE(spi, ks8995_id);
  
+static const struct of_device_id ks8895_spi_of_match[] = {
+        { .compatible = "micrel,ks8995" },
+        { .compatible = "micrel,ksz8864" },
+        { .compatible = "micrel,ksz8795" },
+        { },
+ };
+MODULE_DEVICE_TABLE(of, ks8895_spi_of_match);
+
  static inline u8 get_chip_id(u8 val)
  {
         return (val >> ID1_CHIPID_S) & ID1_CHIPID_M;
@@ -526,6 +534,7 @@ static int ks8995_remove(struct spi_device *spi)
  static struct spi_driver ks8995_driver = {
         .driver = {
                 .name       = "spi-ks8995",
+               .of_match_table = of_match_ptr(ks8895_spi_of_match),
         },
         .probe    = ks8995_probe,
         .remove   = ks8995_remove,
diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c

index f4e93f5fc2043ebb29c5b36e94afe49ec0c7d7ba..ea90db3c77058b6a799245bd5a3ff9f672b5da5e 100644 (file)
--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -153,7 +153,7 @@ out_fail:
  void
  slhc_free(struct slcompress *comp)
  {
-       if ( comp == NULLSLCOMPR )
+       if ( IS_ERR_OR_NULL(comp) )
                 return;
  
         if ( comp->tstate != NULLSLSTATE )
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c

index 9ce61b019aadb55df9177301e0752295d61ba096..16963f7a88f748fd0946fafef5c5a477e138cf63 100644 (file)
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1156,6 +1156,13 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
                 return -EINVAL;
         }
  
+       if (netdev_has_upper_dev(dev, port_dev)) {
+               NL_SET_ERR_MSG(extack, "Device is already an upper device of the team interface");
+               netdev_err(dev, "Device %s is already an upper device of the team interface\n",
+                          portname);
+               return -EBUSY;
+       }
+
         if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
             vlan_uses_dev(dev)) {
                 NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c

index cd15c32b2e43686925161ad48b080842f00ec19c..9ee4d7402ca23296091939a59a5a6fec459a8472 100644 (file)
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -875,6 +875,7 @@ static const struct net_device_ops vrf_netdev_ops = {
         .ndo_init               = vrf_dev_init,
         .ndo_uninit             = vrf_dev_uninit,
         .ndo_start_xmit         = vrf_xmit,
+       .ndo_set_mac_address    = eth_mac_addr,
         .ndo_get_stats64        = vrf_get_stats64,
         .ndo_add_slave          = vrf_add_slave,
         .ndo_del_slave          = vrf_del_slave,
@@ -1274,6 +1275,7 @@ static void vrf_setup(struct net_device *dev)
         /* default to no qdisc; user can add if desired */
         dev->priv_flags |= IFF_NO_QUEUE;
         dev->priv_flags |= IFF_NO_RX_HANDLER;
+       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
  
         /* VRF devices do not care about MTU, but if the MTU is set
          * too low then the ipv4 and ipv6 protocols are disabled
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c

index 2b26f762fbc3b3f5f837e27267d0de9fc1b9e5c8..01acb6e533655d6b6041cbbde43af8c1364aec60 100644 (file)
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -1074,6 +1074,12 @@ static const struct spi_device_id st95hf_id[] = {
  };
  MODULE_DEVICE_TABLE(spi, st95hf_id);
  
+static const struct of_device_id st95hf_spi_of_match[] = {
+        { .compatible = "st,st95hf" },
+        { },
+};
+MODULE_DEVICE_TABLE(of, st95hf_spi_of_match);
+
  static int st95hf_probe(struct spi_device *nfc_spi_dev)
  {
         int ret;
@@ -1260,6 +1266,7 @@ static struct spi_driver st95hf_driver = {
         .driver = {
                 .name = "st95hf",
                 .owner = THIS_MODULE,
+               .of_match_table = of_match_ptr(st95hf_spi_of_match),
         },
         .id_table = st95hf_id,
         .probe = st95hf_probe,
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c

index 810ab0fbcccbf844d6af9fe6eb40e155355d9731..d820f3edd4311821696e6045d843024166b83dfc 100644 (file)
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -7,7 +7,6 @@
   */
  #include <linux/etherdevice.h>
  #include <linux/kernel.h>
-#include <linux/nvmem-consumer.h>
  #include <linux/of_net.h>
  #include <linux/phy.h>
  #include <linux/export.h>
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c

index 7617d21cb2960618cbc097bbf85cb8515234aa14..f63c5c871d3ddf48f4a88fe3c2b2db684394c7b3 100644 (file)
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -1595,6 +1595,7 @@ static int ctcm_new_device(struct ccwgroup_device *cgdev)
                 if (priv->channel[direction] == NULL) {
                         if (direction == CTCM_WRITE)
                                 channel_free(priv->channel[CTCM_READ]);
+                       result = -ENODEV;
                         goto out_dev;
                 }
                 priv->channel[direction]->netdev = dev;
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c

index b96fedc77ee51219a9c4f71800e43dc529114156..1b6435529166f3627ac0780cc364270ae10460c6 100644 (file)
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -390,6 +390,24 @@ int mdev_device_remove(struct device *dev, bool force_remove)
         return 0;
  }
  
+int mdev_set_iommu_device(struct device *dev, struct device *iommu_device)
+{
+       struct mdev_device *mdev = to_mdev_device(dev);
+
+       mdev->iommu_device = iommu_device;
+
+       return 0;
+}
+EXPORT_SYMBOL(mdev_set_iommu_device);
+
+struct device *mdev_get_iommu_device(struct device *dev)
+{
+       struct mdev_device *mdev = to_mdev_device(dev);
+
+       return mdev->iommu_device;
+}
+EXPORT_SYMBOL(mdev_get_iommu_device);
+
  static int __init mdev_init(void)
  {
         return mdev_bus_register();
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h

index 379758c52b1b4cc40a566ef9e530a33a77f59a08..f4eba723fea5b5dd129d62d12435f57b040597ae 100644 (file)
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -33,6 +33,7 @@ struct mdev_device {
         struct kref ref;
         struct list_head next;
         struct kobject *type_kobj;
+       struct device *iommu_device;
         bool active;
  };
  
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c

index d0f731c9920a65a44d614181ecf3a4e4c2d90755..3be1db3501cc2a8981ca66293cb19a62d8f74bc7 100644 (file)
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -97,6 +97,7 @@ struct vfio_dma {
  struct vfio_group {
         struct iommu_group      *iommu_group;
         struct list_head        next;
+       bool                    mdev_group;     /* An mdev group */
  };
  
  /*
@@ -564,7 +565,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
         mutex_lock(&iommu->lock);
  
         /* Fail if notifier list is empty */
-       if ((!iommu->external_domain) || (!iommu->notifier.head)) {
+       if (!iommu->notifier.head) {
                 ret = -EINVAL;
                 goto pin_done;
         }
@@ -646,11 +647,6 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
  
         mutex_lock(&iommu->lock);
  
-       if (!iommu->external_domain) {
-               mutex_unlock(&iommu->lock);
-               return -EINVAL;
-       }
-
         do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
         for (i = 0; i < npage; i++) {
                 struct vfio_dma *dma;
@@ -1311,13 +1307,109 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
         return ret;
  }
  
+static struct device *vfio_mdev_get_iommu_device(struct device *dev)
+{
+       struct device *(*fn)(struct device *dev);
+       struct device *iommu_device;
+
+       fn = symbol_get(mdev_get_iommu_device);
+       if (fn) {
+               iommu_device = fn(dev);
+               symbol_put(mdev_get_iommu_device);
+
+               return iommu_device;
+       }
+
+       return NULL;
+}
+
+static int vfio_mdev_attach_domain(struct device *dev, void *data)
+{
+       struct iommu_domain *domain = data;
+       struct device *iommu_device;
+
+       iommu_device = vfio_mdev_get_iommu_device(dev);
+       if (iommu_device) {
+               if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
+                       return iommu_aux_attach_device(domain, iommu_device);
+               else
+                       return iommu_attach_device(domain, iommu_device);
+       }
+
+       return -EINVAL;
+}
+
+static int vfio_mdev_detach_domain(struct device *dev, void *data)
+{
+       struct iommu_domain *domain = data;
+       struct device *iommu_device;
+
+       iommu_device = vfio_mdev_get_iommu_device(dev);
+       if (iommu_device) {
+               if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
+                       iommu_aux_detach_device(domain, iommu_device);
+               else
+                       iommu_detach_device(domain, iommu_device);
+       }
+
+       return 0;
+}
+
+static int vfio_iommu_attach_group(struct vfio_domain *domain,
+                                  struct vfio_group *group)
+{
+       if (group->mdev_group)
+               return iommu_group_for_each_dev(group->iommu_group,
+                                               domain->domain,
+                                               vfio_mdev_attach_domain);
+       else
+               return iommu_attach_group(domain->domain, group->iommu_group);
+}
+
+static void vfio_iommu_detach_group(struct vfio_domain *domain,
+                                   struct vfio_group *group)
+{
+       if (group->mdev_group)
+               iommu_group_for_each_dev(group->iommu_group, domain->domain,
+                                        vfio_mdev_detach_domain);
+       else
+               iommu_detach_group(domain->domain, group->iommu_group);
+}
+
+static bool vfio_bus_is_mdev(struct bus_type *bus)
+{
+       struct bus_type *mdev_bus;
+       bool ret = false;
+
+       mdev_bus = symbol_get(mdev_bus_type);
+       if (mdev_bus) {
+               ret = (bus == mdev_bus);
+               symbol_put(mdev_bus_type);
+       }
+
+       return ret;
+}
+
+static int vfio_mdev_iommu_device(struct device *dev, void *data)
+{
+       struct device **old = data, *new;
+
+       new = vfio_mdev_get_iommu_device(dev);
+       if (!new || (*old && *old != new))
+               return -EINVAL;
+
+       *old = new;
+
+       return 0;
+}
+
  static int vfio_iommu_type1_attach_group(void *iommu_data,
                                          struct iommu_group *iommu_group)
  {
         struct vfio_iommu *iommu = iommu_data;
         struct vfio_group *group;
         struct vfio_domain *domain, *d;
-       struct bus_type *bus = NULL, *mdev_bus;
+       struct bus_type *bus = NULL;
         int ret;
         bool resv_msi, msi_remap;
         phys_addr_t resv_msi_base;
@@ -1352,23 +1444,30 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
         if (ret)
                 goto out_free;
  
-       mdev_bus = symbol_get(mdev_bus_type);
+       if (vfio_bus_is_mdev(bus)) {
+               struct device *iommu_device = NULL;
  
-       if (mdev_bus) {
-               if ((bus == mdev_bus) && !iommu_present(bus)) {
-                       symbol_put(mdev_bus_type);
+               group->mdev_group = true;
+
+               /* Determine the isolation type */
+               ret = iommu_group_for_each_dev(iommu_group, &iommu_device,
+                                              vfio_mdev_iommu_device);
+               if (ret || !iommu_device) {
                         if (!iommu->external_domain) {
                                 INIT_LIST_HEAD(&domain->group_list);
                                 iommu->external_domain = domain;
-                       } else
+                       } else {
                                 kfree(domain);
+                       }
  
                         list_add(&group->next,
                                  &iommu->external_domain->group_list);
                         mutex_unlock(&iommu->lock);
+
                         return 0;
                 }
-               symbol_put(mdev_bus_type);
+
+               bus = iommu_device->bus;
         }
  
         domain->domain = iommu_domain_alloc(bus);
@@ -1386,7 +1485,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
                         goto out_domain;
         }
  
-       ret = iommu_attach_group(domain->domain, iommu_group);
+       ret = vfio_iommu_attach_group(domain, group);
         if (ret)
                 goto out_domain;
  
@@ -1418,8 +1517,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
         list_for_each_entry(d, &iommu->domain_list, next) {
                 if (d->domain->ops == domain->domain->ops &&
                     d->prot == domain->prot) {
-                       iommu_detach_group(domain->domain, iommu_group);
-                       if (!iommu_attach_group(d->domain, iommu_group)) {
+                       vfio_iommu_detach_group(domain, group);
+                       if (!vfio_iommu_attach_group(d, group)) {
                                 list_add(&group->next, &d->group_list);
                                 iommu_domain_free(domain->domain);
                                 kfree(domain);
@@ -1427,7 +1526,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
                                 return 0;
                         }
  
-                       ret = iommu_attach_group(domain->domain, iommu_group);
+                       ret = vfio_iommu_attach_group(domain, group);
                         if (ret)
                                 goto out_domain;
                 }
@@ -1453,7 +1552,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
         return 0;
  
  out_detach:
-       iommu_detach_group(domain->domain, iommu_group);
+       vfio_iommu_detach_group(domain, group);
  out_domain:
         iommu_domain_free(domain->domain);
  out_free:
@@ -1544,7 +1643,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
                 if (!group)
                         continue;
  
-               iommu_detach_group(domain->domain, iommu_group);
+               vfio_iommu_detach_group(domain, group);
                 list_del(&group->next);
                 kfree(group);
                 /*
@@ -1610,7 +1709,7 @@ static void vfio_release_domain(struct vfio_domain *domain, bool external)
         list_for_each_entry_safe(group, group_tmp,
                                  &domain->group_list, next) {
                 if (!external)
-                       iommu_detach_group(domain->domain, group->iommu_group);
+                       vfio_iommu_detach_group(domain, group);
                 list_del(&group->next);
                 kfree(group);
         }
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c

index 920bf3b4b0ef5e5296d3cec2c2e82a8ab78dc0ac..cccc75d15970cbc61e8e1bcd1735fb28dc549123 100644 (file)
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -7,6 +7,7 @@
  #include <linux/slab.h>
  #include <linux/pagemap.h>
  #include <linux/highmem.h>
+#include <linux/sched/mm.h>
  #include "ctree.h"
  #include "disk-io.h"
  #include "transaction.h"
@@ -427,9 +428,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
         unsigned long this_sum_bytes = 0;
         int i;
         u64 offset;
+       unsigned nofs_flag;
+
+       nofs_flag = memalloc_nofs_save();
+       sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
+                      GFP_KERNEL);
+       memalloc_nofs_restore(nofs_flag);
  
-       sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
-                      GFP_NOFS);
         if (!sums)
                 return BLK_STS_RESOURCE;
  
@@ -472,8 +477,10 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
  
                                 bytes_left = bio->bi_iter.bi_size - total_bytes;
  
-                               sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left),
-                                              GFP_NOFS);
+                               nofs_flag = memalloc_nofs_save();
+                               sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
+                                                     bytes_left), GFP_KERNEL);
+                               memalloc_nofs_restore(nofs_flag);
                                 BUG_ON(!sums); /* -ENOMEM */
                                 sums->len = bytes_left;
                                 ordered = btrfs_lookup_ordered_extent(inode,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 6fde2b2741ef13b2bdabfac4ad29937d7e784afa..45e3cfd1198bc29265d28a360f3d261a70144953 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,6 +6,7 @@
  #include <linux/slab.h>
  #include <linux/blkdev.h>
  #include <linux/writeback.h>
+#include <linux/sched/mm.h>
  #include "ctree.h"
  #include "transaction.h"
  #include "btrfs_inode.h"
@@ -442,7 +443,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
                         cur = entry->list.next;
                         sum = list_entry(cur, struct btrfs_ordered_sum, list);
                         list_del(&sum->list);
-                       kfree(sum);
+                       kvfree(sum);
                 }
                 kmem_cache_free(btrfs_ordered_extent_cache, entry);
         }
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c

index a8f429882249476303868e4b68f272653ad72ebb..0637149fb9f9a7d26b383a2abedc08bb026d301e 100644 (file)
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1766,6 +1766,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
  unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
  {
         struct ceph_inode_info *dci = ceph_inode(dir);
+       unsigned hash;
  
         switch (dci->i_dir_layout.dl_dir_hash) {
         case 0: /* for backward compat */
@@ -1773,8 +1774,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
                 return dn->d_name.hash;
  
         default:
-               return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+               spin_lock(&dn->d_lock);
+               hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
                                      dn->d_name.name, dn->d_name.len);
+               spin_unlock(&dn->d_lock);
+               return hash;
         }
  }
  
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c

index 2d61ddda9bf5653fb559fb320422fd84ec470419..c2feb310ac1e0d7bd0c263f4d5944cc2c22852ac 100644 (file)
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1163,6 +1163,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in)
         return 0;
  }
  
+static int d_name_cmp(struct dentry *dentry, const char *name, size_t len)
+{
+       int ret;
+
+       /* take d_lock to ensure dentry->d_name stability */
+       spin_lock(&dentry->d_lock);
+       ret = dentry->d_name.len - len;
+       if (!ret)
+               ret = memcmp(dentry->d_name.name, name, len);
+       spin_unlock(&dentry->d_lock);
+       return ret;
+}
+
  /*
   * Incorporate results into the local cache.  This is either just
   * one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
@@ -1412,7 +1425,8 @@ retry_lookup:
                 err = splice_dentry(&req->r_dentry, in);
                 if (err < 0)
                         goto done;
-       } else if (rinfo->head->is_dentry) {
+       } else if (rinfo->head->is_dentry &&
+                  !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) {
                 struct ceph_vino *ptvino = NULL;
  
                 if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index 21c33ed048ed7095aa2347159ab472d8550b9721..9049c2a3e972f499ea1371e8c4b8112ead98a6e3 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1414,6 +1414,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                         list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
                         ci->i_prealloc_cap_flush = NULL;
                 }
+
+               if (drop &&
+                  ci->i_wrbuffer_ref_head == 0 &&
+                  ci->i_wr_ref == 0 &&
+                  ci->i_dirty_caps == 0 &&
+                  ci->i_flushing_caps == 0) {
+                      ceph_put_snap_context(ci->i_head_snapc);
+                      ci->i_head_snapc = NULL;
+               }
         }
         spin_unlock(&ci->i_ceph_lock);
         while (!list_empty(&to_remove)) {
@@ -2161,10 +2170,39 @@ retry:
         return path;
  }
  
+/* Duplicate the dentry->d_name.name safely */
+static int clone_dentry_name(struct dentry *dentry, const char **ppath,
+                            int *ppathlen)
+{
+       u32 len;
+       char *name;
+
+retry:
+       len = READ_ONCE(dentry->d_name.len);
+       name = kmalloc(len + 1, GFP_NOFS);
+       if (!name)
+               return -ENOMEM;
+
+       spin_lock(&dentry->d_lock);
+       if (dentry->d_name.len != len) {
+               spin_unlock(&dentry->d_lock);
+               kfree(name);
+               goto retry;
+       }
+       memcpy(name, dentry->d_name.name, len);
+       spin_unlock(&dentry->d_lock);
+
+       name[len] = '\0';
+       *ppath = name;
+       *ppathlen = len;
+       return 0;
+}
+
  static int build_dentry_path(struct dentry *dentry, struct inode *dir,
                              const char **ppath, int *ppathlen, u64 *pino,
-                            int *pfreepath)
+                            bool *pfreepath, bool parent_locked)
  {
+       int ret;
         char *path;
  
         rcu_read_lock();
@@ -2173,8 +2211,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
         if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
                 *pino = ceph_ino(dir);
                 rcu_read_unlock();
-               *ppath = dentry->d_name.name;
-               *ppathlen = dentry->d_name.len;
+               if (parent_locked) {
+                       *ppath = dentry->d_name.name;
+                       *ppathlen = dentry->d_name.len;
+               } else {
+                       ret = clone_dentry_name(dentry, ppath, ppathlen);
+                       if (ret)
+                               return ret;
+                       *pfreepath = true;
+               }
                 return 0;
         }
         rcu_read_unlock();
@@ -2182,13 +2227,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
         if (IS_ERR(path))
                 return PTR_ERR(path);
         *ppath = path;
-       *pfreepath = 1;
+       *pfreepath = true;
         return 0;
  }
  
  static int build_inode_path(struct inode *inode,
                             const char **ppath, int *ppathlen, u64 *pino,
-                           int *pfreepath)
+                           bool *pfreepath)
  {
         struct dentry *dentry;
         char *path;
@@ -2204,7 +2249,7 @@ static int build_inode_path(struct inode *inode,
         if (IS_ERR(path))
                 return PTR_ERR(path);
         *ppath = path;
-       *pfreepath = 1;
+       *pfreepath = true;
         return 0;
  }
  
@@ -2215,7 +2260,7 @@ static int build_inode_path(struct inode *inode,
  static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
                                   struct inode *rdiri, const char *rpath,
                                   u64 rino, const char **ppath, int *pathlen,
-                                 u64 *ino, int *freepath)
+                                 u64 *ino, bool *freepath, bool parent_locked)
  {
         int r = 0;
  
@@ -2225,7 +2270,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
                      ceph_snap(rinode));
         } else if (rdentry) {
                 r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
-                                       freepath);
+                                       freepath, parent_locked);
                 dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
                      *ppath);
         } else if (rpath || rino) {
@@ -2251,7 +2296,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
         const char *path2 = NULL;
         u64 ino1 = 0, ino2 = 0;
         int pathlen1 = 0, pathlen2 = 0;
-       int freepath1 = 0, freepath2 = 0;
+       bool freepath1 = false, freepath2 = false;
         int len;
         u16 releases;
         void *p, *end;
@@ -2259,16 +2304,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
  
         ret = set_request_path_attr(req->r_inode, req->r_dentry,
                               req->r_parent, req->r_path1, req->r_ino1.ino,
-                             &path1, &pathlen1, &ino1, &freepath1);
+                             &path1, &pathlen1, &ino1, &freepath1,
+                             test_bit(CEPH_MDS_R_PARENT_LOCKED,
+                                       &req->r_req_flags));
         if (ret < 0) {
                 msg = ERR_PTR(ret);
                 goto out;
         }
  
+       /* If r_old_dentry is set, then assume that its parent is locked */
         ret = set_request_path_attr(NULL, req->r_old_dentry,
                               req->r_old_dentry_dir,
                               req->r_path2, req->r_ino2.ino,
-                             &path2, &pathlen2, &ino2, &freepath2);
+                             &path2, &pathlen2, &ino2, &freepath2, true);
         if (ret < 0) {
                 msg = ERR_PTR(ret);
                 goto out_free1;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c

index 89aa37fa0f84c55fe3324e50b554f6fcef5b5be5..b26e12cd8ec3317f44e04baaca375826a974e632 100644 (file)
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -572,7 +572,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
         old_snapc = NULL;
  
  update_snapc:
-       if (ci->i_head_snapc) {
+       if (ci->i_wrbuffer_ref_head == 0 &&
+           ci->i_wr_ref == 0 &&
+           ci->i_dirty_caps == 0 &&
+           ci->i_flushing_caps == 0) {
+               ci->i_head_snapc = NULL;
+       } else {
                 ci->i_head_snapc = ceph_get_snap_context(new_snapc);
                 dout(" new snapc is %p\n", new_snapc);
         }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c

index 9c0ccc06d172e9371513105a9a19cb9cef68ddc0..7037a137fa5330c807da19a91acd054d76ad031a 100644 (file)
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2877,7 +2877,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
         struct cifs_tcon *tcon;
         struct cifs_sb_info *cifs_sb;
         struct dentry *dentry = ctx->cfile->dentry;
-       unsigned int i;
         int rc;
  
         tcon = tlink_tcon(ctx->cfile->tlink);
@@ -2941,10 +2940,6 @@ restart_loop:
                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
         }
  
-       if (!ctx->direct_io)
-               for (i = 0; i < ctx->npages; i++)
-                       put_page(ctx->bv[i].bv_page);
-
         cifs_stats_bytes_written(tcon, ctx->total_len);
         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
  
@@ -3582,7 +3577,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
         struct iov_iter *to = &ctx->iter;
         struct cifs_sb_info *cifs_sb;
         struct cifs_tcon *tcon;
-       unsigned int i;
         int rc;
  
         tcon = tlink_tcon(ctx->cfile->tlink);
@@ -3666,15 +3660,8 @@ again:
                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
         }
  
-       if (!ctx->direct_io) {
-               for (i = 0; i < ctx->npages; i++) {
-                       if (ctx->should_dirty)
-                               set_page_dirty(ctx->bv[i].bv_page);
-                       put_page(ctx->bv[i].bv_page);
-               }
-
+       if (!ctx->direct_io)
                 ctx->total_len = ctx->len - iov_iter_count(to);
-       }
  
         /* mask nodata case */
         if (rc == -ENODATA)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c

index 53fdb5df0d2ebd67b2687b76d98441ee7faa41be..538fd7d807e476f9998820b2abc93ae7f6a7c127 100644 (file)
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
         if (rc == 0 || rc != -EBUSY)
                 goto do_rename_exit;
  
+       /* Don't fall back to using SMB on SMB 2+ mount */
+       if (server->vals->protocol_id != 0)
+               goto do_rename_exit;
+
         /* open-file renames don't work across directories */
         if (to_dentry->d_parent != from_dentry->d_parent)
                 goto do_rename_exit;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c

index 1e1626a2cfc393afb5ea6470261e6bd9703a2e40..0dc6f08020acbc81dbc99966cb229842c688acb2 100644 (file)
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -789,6 +789,11 @@ cifs_aio_ctx_alloc(void)
  {
         struct cifs_aio_ctx *ctx;
  
+       /*
+        * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io
+        * to false so that we know when we have to unreference pages within
+        * cifs_aio_ctx_release()
+        */
         ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL);
         if (!ctx)
                 return NULL;
@@ -807,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount)
                                         struct cifs_aio_ctx, refcount);
  
         cifsFileInfo_put(ctx->cfile);
-       kvfree(ctx->bv);
+
+       /*
+        * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly
+        * which means that iov_iter_get_pages() was a success and thus that
+        * we have taken reference on pages.
+        */
+       if (ctx->bv) {
+               unsigned i;
+
+               for (i = 0; i < ctx->npages; i++) {
+                       if (ctx->should_dirty)
+                               set_page_dirty(ctx->bv[i].bv_page);
+                       put_page(ctx->bv[i].bv_page);
+               }
+               kvfree(ctx->bv);
+       }
+
         kfree(ctx);
  }
  
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c

index b8f7262ac35412f3d1f13743ba4555f8756018b5..a37774a55f3aa1b8598ebd30b063fff67d4cb32f 100644 (file)
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3466,6 +3466,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
                                     io_parms->tcon->tid, ses->Suid,
                                     io_parms->offset, 0);
                 free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+               cifs_small_buf_release(req);
                 return rc == -ENODATA ? 0 : rc;
         } else
                 trace_smb3_read_done(xid, req->PersistentFileId,
diff --git a/fs/io_uring.c b/fs/io_uring.c

index f65f85d892174f252cdd2ae48c4d9eb3902ef479..0e9fb2cb1984b5c1d04623847e5fb523565dc856 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -740,7 +740,7 @@ static bool io_file_supports_async(struct file *file)
  }
  
  static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
-                     bool force_nonblock, struct io_submit_state *state)
+                     bool force_nonblock)
  {
         const struct io_uring_sqe *sqe = s->sqe;
         struct io_ring_ctx *ctx = req->ctx;
@@ -938,7 +938,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
  }
  
  static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
-                  bool force_nonblock, struct io_submit_state *state)
+                  bool force_nonblock)
  {
         struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
         struct kiocb *kiocb = &req->rw;
@@ -947,7 +947,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
         size_t iov_count;
         int ret;
  
-       ret = io_prep_rw(req, s, force_nonblock, state);
+       ret = io_prep_rw(req, s, force_nonblock);
         if (ret)
                 return ret;
         file = kiocb->ki_filp;
@@ -985,7 +985,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
  }
  
  static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
-                   bool force_nonblock, struct io_submit_state *state)
+                   bool force_nonblock)
  {
         struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
         struct kiocb *kiocb = &req->rw;
@@ -994,7 +994,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
         size_t iov_count;
         int ret;
  
-       ret = io_prep_rw(req, s, force_nonblock, state);
+       ret = io_prep_rw(req, s, force_nonblock);
         if (ret)
                 return ret;
  
@@ -1336,8 +1336,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  }
  
  static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
-                          const struct sqe_submit *s, bool force_nonblock,
-                          struct io_submit_state *state)
+                          const struct sqe_submit *s, bool force_nonblock)
  {
         int ret, opcode;
  
@@ -1353,18 +1352,18 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
         case IORING_OP_READV:
                 if (unlikely(s->sqe->buf_index))
                         return -EINVAL;
-               ret = io_read(req, s, force_nonblock, state);
+               ret = io_read(req, s, force_nonblock);
                 break;
         case IORING_OP_WRITEV:
                 if (unlikely(s->sqe->buf_index))
                         return -EINVAL;
-               ret = io_write(req, s, force_nonblock, state);
+               ret = io_write(req, s, force_nonblock);
                 break;
         case IORING_OP_READ_FIXED:
-               ret = io_read(req, s, force_nonblock, state);
+               ret = io_read(req, s, force_nonblock);
                 break;
         case IORING_OP_WRITE_FIXED:
-               ret = io_write(req, s, force_nonblock, state);
+               ret = io_write(req, s, force_nonblock);
                 break;
         case IORING_OP_FSYNC:
                 ret = io_fsync(req, s->sqe, force_nonblock);
@@ -1457,7 +1456,7 @@ restart:
                         s->has_user = cur_mm != NULL;
                         s->needs_lock = true;
                         do {
-                               ret = __io_submit_sqe(ctx, req, s, false, NULL);
+                               ret = __io_submit_sqe(ctx, req, s, false);
                                 /*
                                  * We can get EAGAIN for polled IO even though
                                  * we're forcing a sync submission from here,
@@ -1623,7 +1622,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
         if (unlikely(ret))
                 goto out;
  
-       ret = __io_submit_sqe(ctx, req, s, true, state);
+       ret = __io_submit_sqe(ctx, req, s, true);
         if (ret == -EAGAIN) {
                 struct io_uring_sqe *sqe_copy;
  
@@ -1739,7 +1738,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
         head = ctx->cached_sq_head;
         /* See comment at the top of this file */
         smp_rmb();
-       if (head == READ_ONCE(ring->r.tail))
+       /* make sure SQ entry isn't read before tail */
+       if (head == smp_load_acquire(&ring->r.tail))
                 return false;
  
         head = READ_ONCE(ring->array[head & ctx->sq_mask]);
@@ -1864,7 +1864,8 @@ static int io_sq_thread(void *data)
  
                         /* Tell userspace we may need a wakeup call */
                         ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP;
-                       smp_wmb();
+                       /* make sure to read SQ tail after writing flags */
+                       smp_mb();
  
                         if (!io_get_sqring(ctx, &sqes[0])) {
                                 if (kthread_should_stop()) {
@@ -2574,7 +2575,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
         poll_wait(file, &ctx->cq_wait, wait);
         /* See comment at the top of this file */
         smp_rmb();
-       if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head)
+       if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head !=
+           ctx->sq_ring->ring_entries)
                 mask |= EPOLLOUT | EPOLLWRNORM;
         if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail)
                 mask |= EPOLLIN | EPOLLRDNORM;
@@ -2934,6 +2936,14 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
  {
         int ret;
  
+       /*
+        * We're inside the ring mutex, if the ref is already dying, then
+        * someone else killed the ctx or is already going through
+        * io_uring_register().
+        */
+       if (percpu_ref_is_dying(&ctx->refs))
+               return -ENXIO;
+
         percpu_ref_kill(&ctx->refs);
  
         /*
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c

index 8f933e84cec18221f4645b769ea9d1914bc3e627..9bc32af4e2daff14817c4306833009c1d9ab92aa 100644 (file)
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -442,7 +442,9 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
         struct nfsd3_readdirargs *argp = rqstp->rq_argp;
         struct nfsd3_readdirres  *resp = rqstp->rq_resp;
         __be32          nfserr;
-       int             count;
+       int             count = 0;
+       struct page     **p;
+       caddr_t         page_addr = NULL;
  
         dprintk("nfsd: READDIR(3)  %s %d bytes at %d\n",
                                 SVCFH_fmt(&argp->fh),
@@ -462,7 +464,18 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
         nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie, 
                                         &resp->common, nfs3svc_encode_entry);
         memcpy(resp->verf, argp->verf, 8);
-       resp->count = resp->buffer - argp->buffer;
+       count = 0;
+       for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+               page_addr = page_address(*p);
+
+               if (((caddr_t)resp->buffer >= page_addr) &&
+                   ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+                       count += (caddr_t)resp->buffer - page_addr;
+                       break;
+               }
+               count += PAGE_SIZE;
+       }
+       resp->count = count >> 2;
         if (resp->offset) {
                 loff_t offset = argp->cookie;
  
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c

index 93fea246f676ebec32213bbf3a023ea395fc01a3..8d789124ed3c18d187eea569e350e6d40a43ad7a 100644 (file)
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -573,6 +573,7 @@ int
  nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
  {
         struct nfsd3_readdirargs *args = rqstp->rq_argp;
+       int len;
         u32 max_blocksize = svc_max_payload(rqstp);
  
         p = decode_fh(p, &args->fh);
@@ -582,8 +583,14 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
         args->verf   = p; p += 2;
         args->dircount = ~0;
         args->count  = ntohl(*p++);
-       args->count  = min_t(u32, args->count, max_blocksize);
-       args->buffer = page_address(*(rqstp->rq_next_page++));
+       len = args->count  = min_t(u32, args->count, max_blocksize);
+
+       while (len > 0) {
+               struct page *p = *(rqstp->rq_next_page++);
+               if (!args->buffer)
+                       args->buffer = page_address(p);
+               len -= PAGE_SIZE;
+       }
  
         return xdr_argsize_check(rqstp, p);
  }
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c

index d219159b98afc54bda6d2efee824b41487db17c0..7caa3801ce72b70de75802f0a5c1b78b1087ebb5 100644 (file)
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1010,8 +1010,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
         cb->cb_seq_status = 1;
         cb->cb_status = 0;
         if (minorversion) {
-               if (!nfsd41_cb_get_slot(clp, task))
+               if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
                         return;
+               cb->cb_holds_slot = true;
         }
         rpc_call_start(task);
  }
@@ -1038,6 +1039,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
                 return true;
         }
  
+       if (!cb->cb_holds_slot)
+               goto need_restart;
+
         switch (cb->cb_seq_status) {
         case 0:
                 /*
@@ -1076,6 +1080,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
                         cb->cb_seq_status);
         }
  
+       cb->cb_holds_slot = false;
         clear_bit(0, &clp->cl_cb_slot_busy);
         rpc_wake_up_next(&clp->cl_cb_waitq);
         dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1283,6 +1288,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
         cb->cb_seq_status = 1;
         cb->cb_status = 0;
         cb->cb_need_restart = false;
+       cb->cb_holds_slot = false;
  }
  
  void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c

index 6a45fb00c5fcdccabdb142096270aa6035fe2f32..f056b1d3fecd6e1d0db44b56978c23cb93300ce8 100644 (file)
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -265,6 +265,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
  static void
  free_blocked_lock(struct nfsd4_blocked_lock *nbl)
  {
+       locks_delete_block(&nbl->nbl_lock);
         locks_release_private(&nbl->nbl_lock);
         kfree(nbl);
  }
@@ -293,11 +294,18 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
                 nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
                                         nbl_lru);
                 list_del_init(&nbl->nbl_lru);
-               locks_delete_block(&nbl->nbl_lock);
                 free_blocked_lock(nbl);
         }
  }
  
+static void
+nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+{
+       struct nfsd4_blocked_lock       *nbl = container_of(cb,
+                                               struct nfsd4_blocked_lock, nbl_cb);
+       locks_delete_block(&nbl->nbl_lock);
+}
+
  static int
  nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
  {
@@ -325,6 +333,7 @@ nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
  }
  
  static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+       .prepare        = nfsd4_cb_notify_lock_prepare,
         .done           = nfsd4_cb_notify_lock_done,
         .release        = nfsd4_cb_notify_lock_release,
  };
@@ -4863,7 +4872,6 @@ nfs4_laundromat(struct nfsd_net *nn)
                 nbl = list_first_entry(&reaplist,
                                         struct nfsd4_blocked_lock, nbl_lru);
                 list_del_init(&nbl->nbl_lru);
-               locks_delete_block(&nbl->nbl_lock);
                 free_blocked_lock(nbl);
         }
  out:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h

index 396c76755b03b7cf08237b0bcd1b8a3f5de2b17d..9d6cb246c6c55737967011a919023fc2dad9c861 100644 (file)
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,7 @@ struct nfsd4_callback {
         int cb_seq_status;
         int cb_status;
         bool cb_need_restart;
+       bool cb_holds_slot;
  };
  
  struct nfsd4_callback_ops {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c

index d653907275419435e4bad20de2f1a704b5c9d6c3..7325baa8f9d474f166c1bbef54b584a028b287fb 100644 (file)
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1626,9 +1626,11 @@ static void drop_sysctl_table(struct ctl_table_header *header)
         if (--header->nreg)
                 return;
  
-       if (parent)
+       if (parent) {
                 put_links(header);
-       start_unregistering(header);
+               start_unregistering(header);
+       }
+
         if (!--header->count)
                 kfree_rcu(header, rcu);
  
diff --git a/fs/splice.c b/fs/splice.c

index 98943d9b219c0cea1037770cbc6578970fbe69b6..25212dcca2dfd6b43dc51cd8887f93038c753515 100644 (file)
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -330,8 +330,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = {
         .get = generic_pipe_buf_get,
  };
  
-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
-                                   struct pipe_buffer *buf)
+int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+                            struct pipe_buffer *buf)
  {
         return 1;
  }
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h

index cbf3180cb612ed7e54b9aecf79b2690c5a48aed6..668ad971cd7b26828e2d95a4813ec3888d0abdac 100644 (file)
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -420,7 +420,6 @@ extern struct ttm_bo_global {
         /**
          * Protected by ttm_global_mutex.
          */
-       unsigned int use_count;
         struct list_head device_list;
  
         /**
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h

index e2f3b21cd72a28d16cf2324d308e13dc64c86f59..aa8bfd6f738c7fac838b31de87ec390cf0a81d3c 100644 (file)
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -448,6 +448,18 @@ static inline void eth_addr_dec(u8 *addr)
         u64_to_ether_addr(u, addr);
  }
  
+/**
+ * eth_addr_inc() - Increment the given MAC address.
+ * @addr: Pointer to a six-byte array containing Ethernet address to increment.
+ */
+static inline void eth_addr_inc(u8 *addr)
+{
+       u64 u = ether_addr_to_u64(addr);
+
+       u++;
+       u64_to_ether_addr(u, addr);
+}
+
  /**
   * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
   * @dev: Pointer to a device structure
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h

index fa364de9db18ee8ff6845c908f7ddcb93707be2c..6925a18a5ca343b45649c505ad543c4a9b193014 100644 (file)
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -489,9 +489,11 @@ struct dmar_domain {
                                         /* Domain ids per IOMMU. Use u16 since
                                          * domain ids are 16 bit wide according
                                          * to VT-d spec, section 9.3 */
+       unsigned int    auxd_refcnt;    /* Refcount of auxiliary attaching */
  
         bool has_iotlb_device;
         struct list_head devices;       /* all devices' list */
+       struct list_head auxd;          /* link to device's auxiliary list */
         struct iova_domain iovad;       /* iova's that belong to this domain */
  
         struct dma_pte  *pgd;           /* virtual address */
@@ -510,6 +512,11 @@ struct dmar_domain {
                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
         u64             max_addr;       /* maximum mapped address */
  
+       int             default_pasid;  /*
+                                        * The default pasid used for non-SVM
+                                        * traffic on mediated devices.
+                                        */
+
         struct iommu_domain domain;     /* generic domain data structure for
                                            iommu core */
  };
@@ -559,6 +566,9 @@ struct device_domain_info {
         struct list_head link;  /* link to domain siblings */
         struct list_head global; /* link to global list */
         struct list_head table; /* link to pasid table */
+       struct list_head auxiliary_domains; /* auxiliary domains
+                                            * attached to this device
+                                            */
         u8 bus;                 /* PCI bus number */
         u8 devfn;               /* PCI devfn number */
         u16 pfsid;              /* SRIOV physical function source ID */
@@ -568,6 +578,7 @@ struct device_domain_info {
         u8 pri_enabled:1;
         u8 ats_supported:1;
         u8 ats_enabled:1;
+       u8 auxd_enabled:1;      /* Multiple domains per device */
         u8 ats_qdep;
         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
         struct intel_iommu *iommu; /* IOMMU used by this device */
@@ -650,6 +661,7 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
  int for_each_device_domain(int (*fn)(struct device_domain_info *info,
                                      void *data), void *data);
  void iommu_flush_write_buffer(struct intel_iommu *iommu);
+int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev);
  
  #ifdef CONFIG_INTEL_IOMMU_SVM
  int intel_svm_init(struct intel_iommu *iommu);
@@ -679,7 +691,6 @@ struct intel_svm {
         struct list_head list;
  };
  
-extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
  extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
  #endif
  
diff --git a/include/linux/iommu.h b/include/linux/iommu.h

index ffbbc7e39ceeba3ed827cc475b78683de27dc31a..a815cf6f6f47a8611b70b886d15da0f003ac5a01 100644 (file)
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -48,6 +48,7 @@ struct bus_type;
  struct device;
  struct iommu_domain;
  struct notifier_block;
+struct iommu_sva;
  
  /* iommu fault flags */
  #define IOMMU_FAULT_READ       0x0
@@ -55,6 +56,8 @@ struct notifier_block;
  
  typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
                         struct device *, unsigned long, int, void *);
+typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *,
+                                      void *);
  
  struct iommu_domain_geometry {
         dma_addr_t aperture_start; /* First address that can be mapped    */
@@ -156,6 +159,33 @@ struct iommu_resv_region {
         enum iommu_resv_type    type;
  };
  
+/* Per device IOMMU features */
+enum iommu_dev_features {
+       IOMMU_DEV_FEAT_AUX,     /* Aux-domain feature */
+       IOMMU_DEV_FEAT_SVA,     /* Shared Virtual Addresses */
+};
+
+#define IOMMU_PASID_INVALID    (-1U)
+
+/**
+ * struct iommu_sva_ops - device driver callbacks for an SVA context
+ *
+ * @mm_exit: called when the mm is about to be torn down by exit_mmap. After
+ *           @mm_exit returns, the device must not issue any more transaction
+ *           with the PASID given as argument.
+ *
+ *           The @mm_exit handler is allowed to sleep. Be careful about the
+ *           locks taken in @mm_exit, because they might lead to deadlocks if
+ *           they are also held when dropping references to the mm. Consider the
+ *           following call chain:
+ *           mutex_lock(A); mmput(mm) -> exit_mm() -> @mm_exit() -> mutex_lock(A)
+ *           Using mmput_async() prevents this scenario.
+ *
+ */
+struct iommu_sva_ops {
+       iommu_mm_exit_handler_t mm_exit;
+};
+
  #ifdef CONFIG_IOMMU_API
  
  /**
@@ -186,6 +216,14 @@ struct iommu_resv_region {
   * @of_xlate: add OF master IDs to iommu grouping
   * @is_attach_deferred: Check if domain attach should be deferred from iommu
   *                      driver init to device driver init (default no)
+ * @dev_has/enable/disable_feat: per device entries to check/enable/disable
+ *                               iommu specific features.
+ * @dev_feat_enabled: check enabled feature
+ * @aux_attach/detach_dev: aux-domain specific attach/detach entries.
+ * @aux_get_pasid: get the pasid given an aux-domain
+ * @sva_bind: Bind process address space to device
+ * @sva_unbind: Unbind process address space from device
+ * @sva_get_pasid: Get PASID associated to a SVA handle
   * @pgsize_bitmap: bitmap of all possible supported page sizes
   */
  struct iommu_ops {
@@ -230,6 +268,22 @@ struct iommu_ops {
         int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
         bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
  
+       /* Per device IOMMU features */
+       bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f);
+       bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f);
+       int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
+       int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
+
+       /* Aux-domain specific attach/detach entries */
+       int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev);
+       void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev);
+       int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev);
+
+       struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
+                                     void *drvdata);
+       void (*sva_unbind)(struct iommu_sva *handle);
+       int (*sva_get_pasid)(struct iommu_sva *handle);
+
         unsigned long pgsize_bitmap;
  };
  
@@ -392,10 +446,22 @@ struct iommu_fwspec {
         const struct iommu_ops  *ops;
         struct fwnode_handle    *iommu_fwnode;
         void                    *iommu_priv;
+       u32                     flags;
         unsigned int            num_ids;
         u32                     ids[1];
  };
  
+/* ATS is supported */
+#define IOMMU_FWSPEC_PCI_RC_ATS                        (1 << 0)
+
+/**
+ * struct iommu_sva - handle to a device-mm bond
+ */
+struct iommu_sva {
+       struct device                   *dev;
+       const struct iommu_sva_ops      *ops;
+};
+
  int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
                       const struct iommu_ops *ops);
  void iommu_fwspec_free(struct device *dev);
@@ -416,6 +482,22 @@ static inline void dev_iommu_fwspec_set(struct device *dev,
  int iommu_probe_device(struct device *dev);
  void iommu_release_device(struct device *dev);
  
+bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features f);
+int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
+int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
+bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f);
+int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev);
+void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev);
+int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev);
+
+struct iommu_sva *iommu_sva_bind_device(struct device *dev,
+                                       struct mm_struct *mm,
+                                       void *drvdata);
+void iommu_sva_unbind_device(struct iommu_sva *handle);
+int iommu_sva_set_ops(struct iommu_sva *handle,
+                     const struct iommu_sva_ops *ops);
+int iommu_sva_get_pasid(struct iommu_sva *handle);
+
  #else /* CONFIG_IOMMU_API */
  
  struct iommu_ops {};
@@ -700,6 +782,68 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
         return NULL;
  }
  
+static inline bool
+iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat)
+{
+       return false;
+}
+
+static inline bool
+iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
+{
+       return false;
+}
+
+static inline int
+iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
+{
+       return -ENODEV;
+}
+
+static inline int
+iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
+{
+       return -ENODEV;
+}
+
+static inline int
+iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev)
+{
+       return -ENODEV;
+}
+
+static inline void
+iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev)
+{
+}
+
+static inline int
+iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
+{
+       return -ENODEV;
+}
+
+static inline struct iommu_sva *
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
+{
+       return NULL;
+}
+
+static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
+{
+}
+
+static inline int iommu_sva_set_ops(struct iommu_sva *handle,
+                                   const struct iommu_sva_ops *ops)
+{
+       return -EINVAL;
+}
+
+static inline int iommu_sva_get_pasid(struct iommu_sva *handle)
+{
+       return IOMMU_PASID_INVALID;
+}
+
  #endif /* CONFIG_IOMMU_API */
  
  #ifdef CONFIG_IOMMU_DEBUGFS
diff --git a/include/linux/iova.h b/include/linux/iova.h

index 0b93bf96693ef6f3ea8b6a30315a24fe548443c6..28a5128405f82dd04e03caff3188d90f31eed71c 100644 (file)
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -76,6 +76,14 @@ struct iova_domain {
         unsigned long   start_pfn;      /* Lower limit for this domain */
         unsigned long   dma_32bit_pfn;
         unsigned long   max32_alloc_size; /* Size of last failed allocation */
+       struct iova_fq __percpu *fq;    /* Flush Queue */
+
+       atomic64_t      fq_flush_start_cnt;     /* Number of TLB flushes that
+                                                  have been started */
+
+       atomic64_t      fq_flush_finish_cnt;    /* Number of TLB flushes that
+                                                  have been finished */
+
         struct iova     anchor;         /* rbtree lookup anchor */
         struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];  /* IOVA range caches */
  
@@ -85,14 +93,6 @@ struct iova_domain {
         iova_entry_dtor entry_dtor;     /* IOMMU driver specific destructor for
                                            iova entry */
  
-       struct iova_fq __percpu *fq;    /* Flush Queue */
-
-       atomic64_t      fq_flush_start_cnt;     /* Number of TLB flushes that
-                                                  have been started */
-
-       atomic64_t      fq_flush_finish_cnt;    /* Number of TLB flushes that
-                                                  have been finished */
-
         struct timer_list fq_timer;             /* Timer to regularily empty the
                                                    flush-queues */
         atomic_t fq_timer_on;                   /* 1 when timer is active, 0
diff --git a/include/linux/mdev.h b/include/linux/mdev.h

index d7aee90e5da5adf16bdcd26fea9f4f130aced6dc..df2ea39f47ee98dcfc4e3559dfb5206af689a18e 100644 (file)
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -15,6 +15,20 @@
  
  struct mdev_device;
  
+/*
+ * Called by the parent device driver to set the device which represents
+ * this mdev in iommu protection scope. By default, the iommu device is
+ * NULL, that indicates using vendor defined isolation.
+ *
+ * @dev: the mediated device that iommu will isolate.
+ * @iommu_device: a pci device which represents the iommu for @dev.
+ *
+ * Return 0 for success, otherwise negative error value.
+ */
+int mdev_set_iommu_device(struct device *dev, struct device *iommu_device);
+
+struct device *mdev_get_iommu_device(struct device *dev);
+
  /**
   * struct mdev_parent_ops - Structure to be registered for each parent device to
   * register the device to mdev module.
diff --git a/include/linux/pci.h b/include/linux/pci.h

index 77448215ef5b7373ef6de3f7ff075e422f9302f2..61d7cd888bad345d921a48127f2b0175d10e2be9 100644 (file)
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1521,21 +1521,6 @@ static inline void pcie_ecrc_get_policy(char *str) { }
  
  bool pci_ats_disabled(void);
  
-#ifdef CONFIG_PCI_ATS
-/* Address Translation Service */
-void pci_ats_init(struct pci_dev *dev);
-int pci_enable_ats(struct pci_dev *dev, int ps);
-void pci_disable_ats(struct pci_dev *dev);
-int pci_ats_queue_depth(struct pci_dev *dev);
-int pci_ats_page_aligned(struct pci_dev *dev);
-#else
-static inline void pci_ats_init(struct pci_dev *d) { }
-static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; }
-static inline void pci_disable_ats(struct pci_dev *d) { }
-static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; }
-static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; }
-#endif
-
  #ifdef CONFIG_PCIE_PTM
  int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
  #else
@@ -1728,8 +1713,24 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d,
  static inline const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
                                                          struct pci_dev *dev)
  { return NULL; }
+static inline bool pci_ats_disabled(void) { return true; }
  #endif /* CONFIG_PCI */
  
+#ifdef CONFIG_PCI_ATS
+/* Address Translation Service */
+void pci_ats_init(struct pci_dev *dev);
+int pci_enable_ats(struct pci_dev *dev, int ps);
+void pci_disable_ats(struct pci_dev *dev);
+int pci_ats_queue_depth(struct pci_dev *dev);
+int pci_ats_page_aligned(struct pci_dev *dev);
+#else
+static inline void pci_ats_init(struct pci_dev *d) { }
+static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; }
+static inline void pci_disable_ats(struct pci_dev *d) { }
+static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; }
+static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; }
+#endif
+
  /* Include architecture-dependent settings and functions */
  
  #include <asm/pci.h>
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h

index abb2dac3da9b9cf69b477c2d3726e019a0352b7a..5c626fdc10dbd27d6f87f290cf5dbd50d0244528 100644 (file)
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -176,6 +176,7 @@ void free_pipe_info(struct pipe_inode_info *);
  bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
  int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
  int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *);
  void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
  void pipe_buf_mark_unmergeable(struct pipe_buffer *buf);
  
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h

index 5ee7b30b4917244a0f74d28e93c05e1c3fc59a04..d2bc733a2ef1edf2ee7159457b1f33a676e74a98 100644 (file)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -316,6 +316,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
                                  gfp_t flags);
  void nf_ct_tmpl_free(struct nf_conn *tmpl);
  
+u32 nf_ct_get_id(const struct nf_conn *ct);
+
  static inline void
  nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
  {
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h

index 778087591983dd2a274ef9aa75d9b09caf78e8af..a49edfdf47e83ece9945d8978dc3e260a5d9d7e0 100644 (file)
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -75,6 +75,12 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
  bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
                                       const struct nf_conntrack_tuple *orig);
  
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+                           unsigned int dataoff,
+                           const struct nf_hook_state *state,
+                           u8 l4proto,
+                           union nf_inet_addr *outer_daddr);
+
  int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
                               struct sk_buff *skb,
                               unsigned int dataoff,
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h

index 87b3198f4b5d7aa02c330f59178d8cc74dba5f6c..f4d4010b7e3e54f2bfc1d64a708c8454b5899b68 100644 (file)
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -238,6 +238,7 @@ enum mlx5_ib_query_dev_resp_flags {
         MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
         MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD  = 1 << 1,
         MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2,
+       MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3,
  };
  
  enum mlx5_ib_tunnel_offloads {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index a4d9e14bf13891482bb717d03946699fa04958b1..35f3ea3750844c2a8789e7b6312bf8432caa1e5a 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2007,6 +2007,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
         if (p->last_task_numa_placement) {
                 delta = runtime - p->last_sum_exec_runtime;
                 *period = now - p->last_task_numa_placement;
+
+               /* Avoid time going backwards, prevent potential divide error: */
+               if (unlikely((s64)*period < 0))
+                       *period = 0;
         } else {
                 delta = p->se.avg.load_sum;
                 *period = LOAD_AVG_MAX;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c

index 41b6f96e5366231d72454e6c33015188066751ae..4ee8d8aa3d0fdcfe6dac6ea91c4ee96cc9330835 100644 (file)
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -762,7 +762,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
  
         preempt_disable_notrace();
         time = rb_time_stamp(buffer);
-       preempt_enable_no_resched_notrace();
+       preempt_enable_notrace();
  
         return time;
  }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index 6c24755655c752a3bf9f4bb914ddb251d9ab0d2e..ca1ee656d6d852952670b0e63fd19f7499c679a6 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -496,8 +496,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
          * not modified.
          */
         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
-       if (!pid_list)
+       if (!pid_list) {
+               trace_parser_put(&parser);
                 return -ENOMEM;
+       }
  
         pid_list->pid_max = READ_ONCE(pid_max);
  
@@ -507,6 +509,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
  
         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
         if (!pid_list->pids) {
+               trace_parser_put(&parser);
                 kfree(pid_list);
                 return -ENOMEM;
         }
@@ -7025,19 +7028,23 @@ struct buffer_ref {
         struct ring_buffer      *buffer;
         void                    *page;
         int                     cpu;
-       int                     ref;
+       refcount_t              refcount;
  };
  
+static void buffer_ref_release(struct buffer_ref *ref)
+{
+       if (!refcount_dec_and_test(&ref->refcount))
+               return;
+       ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
+       kfree(ref);
+}
+
  static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
                                     struct pipe_buffer *buf)
  {
         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
  
-       if (--ref->ref)
-               return;
-
-       ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
-       kfree(ref);
+       buffer_ref_release(ref);
         buf->private = 0;
  }
  
@@ -7046,10 +7053,10 @@ static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
  {
         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
  
-       if (ref->ref > INT_MAX/2)
+       if (refcount_read(&ref->refcount) > INT_MAX/2)
                 return false;
  
-       ref->ref++;
+       refcount_inc(&ref->refcount);
         return true;
  }
  
@@ -7057,7 +7064,7 @@ static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
  static const struct pipe_buf_operations buffer_pipe_buf_ops = {
         .confirm                = generic_pipe_buf_confirm,
         .release                = buffer_pipe_buf_release,
-       .steal                  = generic_pipe_buf_steal,
+       .steal                  = generic_pipe_buf_nosteal,
         .get                    = buffer_pipe_buf_get,
  };
  
@@ -7070,11 +7077,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
         struct buffer_ref *ref =
                 (struct buffer_ref *)spd->partial[i].private;
  
-       if (--ref->ref)
-               return;
-
-       ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
-       kfree(ref);
+       buffer_ref_release(ref);
         spd->partial[i].private = 0;
  }
  
@@ -7129,7 +7132,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                         break;
                 }
  
-               ref->ref = 1;
+               refcount_set(&ref->refcount, 1);
                 ref->buffer = iter->trace_buffer->buffer;
                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
                 if (IS_ERR(ref->page)) {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index 00dbcdbc9a0d3fcac8b5c400b5f1a2eafd49c404..d5a4a4036d2f83db9df1c5e2183380f900e62ca3 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1929,6 +1929,7 @@ config TEST_KMOD
         depends on m
         depends on BLOCK && (64BIT || LBDAF)      # for XFS, BTRFS
         depends on NETDEVICES && NET_CORE && INET # for TUN
+       depends on BLOCK
         select TEST_LKM
         select XFS_FS
         select TUN
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c

index 83cdcaa82bf6cbc9a78640795bc2bbd869ba92bf..f832b095afba011293b6a3da18a2170b55501325 100644 (file)
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -383,14 +383,14 @@ static void shuffle_array(int *arr, int n)
  static int test_func(void *private)
  {
         struct test_driver *t = private;
-       cpumask_t newmask = CPU_MASK_NONE;
         int random_array[ARRAY_SIZE(test_case_array)];
         int index, i, j, ret;
         ktime_t kt;
         u64 delta;
  
-       cpumask_set_cpu(t->cpu, &newmask);
-       set_cpus_allowed_ptr(current, &newmask);
+       ret = set_cpus_allowed_ptr(current, cpumask_of(t->cpu));
+       if (ret < 0)
+               pr_err("Failed to set affinity to %d CPU\n", t->cpu);
  
         for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
                 random_array[i] = i;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 0082d699be94b4c28e1820351916568e68a684bb..b236069ff0d823ce92a84222494d42bdfa97c20c 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -874,6 +874,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
          */
         mem = find_memory_block(__pfn_to_section(pfn));
         nid = mem->nid;
+       put_device(&mem->dev);
  
         /* associate pfn range with the zone */
         zone = move_pfn_range(online_type, nid, pfn, nr_pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index c6ce20aaf80bbb8e1b306eed27e53c916296eda2..c02cff1ed56eb231fef3a5ffacfd9aa64d8dda54 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -266,7 +266,20 @@ compound_page_dtor * const compound_page_dtors[] = {
  
  int min_free_kbytes = 1024;
  int user_min_free_kbytes = -1;
+#ifdef CONFIG_DISCONTIGMEM
+/*
+ * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges
+ * are not on separate NUMA nodes. Functionally this works but with
+ * watermark_boost_factor, it can reclaim prematurely as the ranges can be
+ * quite small. By default, do not boost watermarks on discontigmem as in
+ * many cases very high-order allocations like THP are likely to be
+ * unsupported and the premature reclaim offsets the advantage of long-term
+ * fragmentation avoidance.
+ */
+int watermark_boost_factor __read_mostly;
+#else
  int watermark_boost_factor __read_mostly = 15000;
+#endif
  int watermark_scale_factor = 10;
  
  static unsigned long nr_kernel_pages __initdata;
@@ -3419,8 +3432,11 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
                 alloc_flags |= ALLOC_KSWAPD;
  
  #ifdef CONFIG_ZONE_DMA32
+       if (!zone)
+               return alloc_flags;
+
         if (zone_idx(zone) != ZONE_NORMAL)
-               goto out;
+               return alloc_flags;
  
         /*
          * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and
@@ -3429,9 +3445,9 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
          */
         BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);
         if (nr_online_nodes > 1 && !populated_zone(--zone))
-               goto out;
+               return alloc_flags;
  
-out:
+       alloc_flags |= ALLOC_NOFRAGMENT;
  #endif /* CONFIG_ZONE_DMA32 */
         return alloc_flags;
  }
@@ -3773,11 +3789,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         memalloc_noreclaim_restore(noreclaim_flag);
         psi_memstall_leave(&pflags);
  
-       if (*compact_result <= COMPACT_INACTIVE) {
-               WARN_ON_ONCE(page);
-               return NULL;
-       }
-
         /*
          * At least in one zone compaction wasn't deferred or skipped, so let's
          * count a compaction stall
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c

index eb15891f8b9ff18842b7d43e96c75733ef7aaa99..3cad01ac64e4a2a5ebcafa394d04cc4415c7ddab 100644 (file)
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
                 if (match_kern)
                         match_kern->match_size = ret;
  
-               if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+               /* rule should have no remaining data after target */
+               if (type == EBT_COMPAT_TARGET && size_left)
                         return -EINVAL;
  
                 match32 = (struct compat_ebt_entry_mwt *) buf;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index 88ce038dd495dec1d34867eb40091c61141e9acb..6fdf1c195d8e3a0e32af0359794f798457a21cb3 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1183,25 +1183,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
         return dst;
  }
  
-static void ipv4_link_failure(struct sk_buff *skb)
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
  {
         struct ip_options opt;
-       struct rtable *rt;
         int res;
  
         /* Recompile ip options since IPCB may not be valid anymore.
+        * Also check we have a reasonable ipv4 header.
          */
-       memset(&opt, 0, sizeof(opt));
-       opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+       if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
+           ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
+               return;
  
-       rcu_read_lock();
-       res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
-       rcu_read_unlock();
+       memset(&opt, 0, sizeof(opt));
+       if (ip_hdr(skb)->ihl > 5) {
+               if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
+                       return;
+               opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
  
-       if (res)
-               return;
+               rcu_read_lock();
+               res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+               rcu_read_unlock();
  
+               if (res)
+                       return;
+       }
         __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+}
+
+static void ipv4_link_failure(struct sk_buff *skb)
+{
+       struct rtable *rt;
+
+       ipv4_send_dest_unreach(skb);
  
         rt = skb_rtable(skb);
         if (rt)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c

index ba0fc4b1846561559ac995a444992f98a3187894..eeb4041fa5f905fb0f7c91ea6d74851ae97259f8 100644 (file)
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 };
  static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
  static int comp_sack_nr_max = 255;
  static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
  
  /* obsolete */
  static int sysctl_tcp_low_latency __read_mostly;
@@ -1151,7 +1152,9 @@ static struct ctl_table ipv4_net_table[] = {
                 .data           = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one_day_secs
         },
         {
                 .procname       = "tcp_autocorking",
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c

index d43d076c98f5d30b058a49b5e477e76e9d97aec8..1766325423b5dad4d8c95c85605dc571248ba6d1 100644 (file)
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -476,7 +476,7 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
         }
  
         if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
-               NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst");
+               NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request");
                 return -EINVAL;
         }
  
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c

index dc07fcc7938ec4da2b95e43530fffc0f5aefe82b..802db01e30754cfa66861acc555bf5b02d158df1 100644 (file)
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -11,6 +11,7 @@
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
  #include <linux/skbuff.h>
  
  #include <net/ncsi.h>
@@ -667,7 +668,10 @@ static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
         ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
         memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
         /* Increase mac address by 1 for BMC's address */
-       saddr.sa_data[ETH_ALEN - 1]++;
+       eth_addr_inc((u8 *)saddr.sa_data);
+       if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+               return -ENXIO;
+
         ret = ops->ndo_set_mac_address(ndev, &saddr);
         if (ret < 0)
                 netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c

index 43bbaa32b1d65cbbec89d439d2ca9bd6bfe77cf3..14457551bcb4edca3047320028be0a331d185e14 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1678,7 +1678,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
         if (!cp) {
                 int v;
  
-               if (!sysctl_schedule_icmp(ipvs))
+               if (ipip || !sysctl_schedule_icmp(ipvs))
                         return NF_ACCEPT;
  
                 if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 82bfbeef46afa53fde8d428533999b382713f053..2a714527cde17aee1152f33bc7f9b041cd5eb087 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -25,6 +25,7 @@
  #include <linux/slab.h>
  #include <linux/random.h>
  #include <linux/jhash.h>
+#include <linux/siphash.h>
  #include <linux/err.h>
  #include <linux/percpu.h>
  #include <linux/moduleparam.h>
@@ -449,6 +450,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
  }
  EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
  
+/* Generate a almost-unique pseudo-id for a given conntrack.
+ *
+ * intentionally doesn't re-use any of the seeds used for hash
+ * table location, we assume id gets exposed to userspace.
+ *
+ * Following nf_conn items do not change throughout lifetime
+ * of the nf_conn after it has been committed to main hash table:
+ *
+ * 1. nf_conn address
+ * 2. nf_conn->ext address
+ * 3. nf_conn->master address (normally NULL)
+ * 4. tuple
+ * 5. the associated net namespace
+ */
+u32 nf_ct_get_id(const struct nf_conn *ct)
+{
+       static __read_mostly siphash_key_t ct_id_seed;
+       unsigned long a, b, c, d;
+
+       net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
+
+       a = (unsigned long)ct;
+       b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
+       c = (unsigned long)ct->ext;
+       d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
+                                  &ct_id_seed);
+#ifdef CONFIG_64BIT
+       return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
+#else
+       return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_id);
+
  static void
  clean_from_lists(struct nf_conn *ct)
  {
@@ -982,12 +1017,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
  
         /* set conntrack timestamp, if enabled. */
         tstamp = nf_conn_tstamp_find(ct);
-       if (tstamp) {
-               if (skb->tstamp == 0)
-                       __net_timestamp(skb);
+       if (tstamp)
+               tstamp->start = ktime_get_real_ns();
  
-               tstamp->start = ktime_to_ns(skb->tstamp);
-       }
         /* Since the lookup is lockless, hash insertion must be done after
          * starting the timer and setting the CONFIRMED bit. The RCU barriers
          * guarantee that no other CPU can find the conntrack before the above
@@ -1350,6 +1382,7 @@ __nf_conntrack_alloc(struct net *net,
         /* save hash for reusing when confirming */
         *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
         ct->status = 0;
+       ct->timeout = 0;
         write_pnet(&ct->ct_net, net);
         memset(&ct->__nfct_init_offset[0], 0,
                offsetof(struct nf_conn, proto) -
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c

index 66c596d287a5dc44cea26680023e8c12798a5261..d7f61b0547c65c5e85a2080481906d2918a1eddf 100644 (file)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -29,6 +29,7 @@
  #include <linux/spinlock.h>
  #include <linux/interrupt.h>
  #include <linux/slab.h>
+#include <linux/siphash.h>
  
  #include <linux/netfilter.h>
  #include <net/netlink.h>
@@ -485,7 +486,9 @@ nla_put_failure:
  
  static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
  {
-       if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
+       __be32 id = (__force __be32)nf_ct_get_id(ct);
+
+       if (nla_put_be32(skb, CTA_ID, id))
                 goto nla_put_failure;
         return 0;
  
@@ -1286,8 +1289,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
         }
  
         if (cda[CTA_ID]) {
-               u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
-               if (id != (u32)(unsigned long)ct) {
+               __be32 id = nla_get_be32(cda[CTA_ID]);
+
+               if (id != (__force __be32)nf_ct_get_id(ct)) {
                         nf_ct_put(ct);
                         return -ENOENT;
                 }
@@ -2692,6 +2696,25 @@ nla_put_failure:
  
  static const union nf_inet_addr any_addr;
  
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
+{
+       static __read_mostly siphash_key_t exp_id_seed;
+       unsigned long a, b, c, d;
+
+       net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
+
+       a = (unsigned long)exp;
+       b = (unsigned long)exp->helper;
+       c = (unsigned long)exp->master;
+       d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
+
+#ifdef CONFIG_64BIT
+       return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
+#else
+       return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
+#endif
+}
+
  static int
  ctnetlink_exp_dump_expect(struct sk_buff *skb,
                           const struct nf_conntrack_expect *exp)
@@ -2739,7 +2762,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
         }
  #endif
         if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
-           nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) ||
+           nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
             nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
             nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
                 goto nla_put_failure;
@@ -3044,7 +3067,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
  
         if (cda[CTA_EXPECT_ID]) {
                 __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
-               if (ntohl(id) != (u32)(unsigned long)exp) {
+
+               if (id != nf_expect_get_id(exp)) {
                         nf_ct_expect_put(exp);
                         return -ENOENT;
                 }
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c

index b9403a266a2e20c1651585a5c76beb8a65365609..37bb530d848fa2fa1d9f95e4f8174150260dbd38 100644 (file)
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -55,7 +55,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
         struct va_format vaf;
         va_list args;
  
-       if (net->ct.sysctl_log_invalid != protonum ||
+       if (net->ct.sysctl_log_invalid != protonum &&
             net->ct.sysctl_log_invalid != IPPROTO_RAW)
                 return;
  
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c

index 7df477996b1642412faf22d4f08aa518d75f2649..9becac9535873cf7459579a70c3e3d60c055601b 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -103,49 +103,94 @@ int nf_conntrack_icmp_packet(struct nf_conn *ct,
         return NF_ACCEPT;
  }
  
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-static int
-icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
-                  const struct nf_hook_state *state)
+/* Check inner header is related to any of the existing connections */
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+                           unsigned int dataoff,
+                           const struct nf_hook_state *state,
+                           u8 l4proto, union nf_inet_addr *outer_daddr)
  {
         struct nf_conntrack_tuple innertuple, origtuple;
         const struct nf_conntrack_tuple_hash *h;
         const struct nf_conntrack_zone *zone;
         enum ip_conntrack_info ctinfo;
         struct nf_conntrack_zone tmp;
+       union nf_inet_addr *ct_daddr;
+       enum ip_conntrack_dir dir;
+       struct nf_conn *ct;
  
         WARN_ON(skb_nfct(skb));
         zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
  
         /* Are they talking about one of our connections? */
-       if (!nf_ct_get_tuplepr(skb,
-                              skb_network_offset(skb) + ip_hdrlen(skb)
-                                                      + sizeof(struct icmphdr),
-                              PF_INET, state->net, &origtuple)) {
-               pr_debug("icmp_error_message: failed to get tuple\n");
+       if (!nf_ct_get_tuplepr(skb, dataoff,
+                              state->pf, state->net, &origtuple))
                 return -NF_ACCEPT;
-       }
  
         /* Ordinarily, we'd expect the inverted tupleproto, but it's
            been preserved inside the ICMP. */
-       if (!nf_ct_invert_tuple(&innertuple, &origtuple)) {
-               pr_debug("icmp_error_message: no match\n");
+       if (!nf_ct_invert_tuple(&innertuple, &origtuple))
                 return -NF_ACCEPT;
-       }
-
-       ctinfo = IP_CT_RELATED;
  
         h = nf_conntrack_find_get(state->net, zone, &innertuple);
-       if (!h) {
-               pr_debug("icmp_error_message: no match\n");
+       if (!h)
+               return -NF_ACCEPT;
+
+       /* Consider: A -> T (=This machine) -> B
+        *   Conntrack entry will look like this:
+        *      Original:  A->B
+        *      Reply:     B->T (SNAT case) OR A
+        *
+        * When this function runs, we got packet that looks like this:
+        * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..).
+        *
+        * Above nf_conntrack_find_get() makes lookup based on inner_hdr,
+        * so we should expect that destination of the found connection
+        * matches outer header destination address.
+        *
+        * In above example, we can consider these two cases:
+        *  1. Error coming in reply direction from B or M (middle box) to
+        *     T (SNAT case) or A.
+        *     Inner saddr will be B, dst will be T or A.
+        *     The found conntrack will be reply tuple (B->T/A).
+        *  2. Error coming in original direction from A or M to B.
+        *     Inner saddr will be A, inner daddr will be B.
+        *     The found conntrack will be original tuple (A->B).
+        *
+        * In both cases, conntrack[dir].dst == inner.dst.
+        *
+        * A bogus packet could look like this:
+        *   Inner: B->T
+        *   Outer: B->X (other machine reachable by T).
+        *
+        * In this case, lookup yields connection A->B and will
+        * set packet from B->X as *RELATED*, even though no connection
+        * from X was ever seen.
+        */
+       ct = nf_ct_tuplehash_to_ctrack(h);
+       dir = NF_CT_DIRECTION(h);
+       ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
+       if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
+               if (state->pf == AF_INET) {
+                       nf_l4proto_log_invalid(skb, state->net, state->pf,
+                                              l4proto,
+                                              "outer daddr %pI4 != inner %pI4",
+                                              &outer_daddr->ip, &ct_daddr->ip);
+               } else if (state->pf == AF_INET6) {
+                       nf_l4proto_log_invalid(skb, state->net, state->pf,
+                                              l4proto,
+                                              "outer daddr %pI6 != inner %pI6",
+                                              &outer_daddr->ip6, &ct_daddr->ip6);
+               }
+               nf_ct_put(ct);
                 return -NF_ACCEPT;
         }
  
-       if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+       ctinfo = IP_CT_RELATED;
+       if (dir == IP_CT_DIR_REPLY)
                 ctinfo += IP_CT_IS_REPLY;
  
         /* Update skb to refer to this connection */
-       nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+       nf_ct_set(skb, ct, ctinfo);
         return NF_ACCEPT;
  }
  
@@ -162,11 +207,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
                               struct sk_buff *skb, unsigned int dataoff,
                               const struct nf_hook_state *state)
  {
+       union nf_inet_addr outer_daddr;
         const struct icmphdr *icmph;
         struct icmphdr _ih;
  
         /* Not enough header? */
-       icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+       icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
         if (icmph == NULL) {
                 icmp_error_log(skb, state, "short packet");
                 return -NF_ACCEPT;
@@ -199,7 +245,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
             icmph->type != ICMP_REDIRECT)
                 return NF_ACCEPT;
  
-       return icmp_error_message(tmpl, skb, state);
+       memset(&outer_daddr, 0, sizeof(outer_daddr));
+       outer_daddr.ip = ip_hdr(skb)->daddr;
+
+       dataoff += sizeof(*icmph);
+       return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+                                      IPPROTO_ICMP, &outer_daddr);
  }
  
  #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c

index bec4a321165858b828abd0e4449c85afcdf6aeb0..c63ee361285551c2c154ae8fa7dbbc0c56ea7ee2 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -123,51 +123,6 @@ int nf_conntrack_icmpv6_packet(struct nf_conn *ct,
         return NF_ACCEPT;
  }
  
-static int
-icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
-                    struct sk_buff *skb,
-                    unsigned int icmp6off)
-{
-       struct nf_conntrack_tuple intuple, origtuple;
-       const struct nf_conntrack_tuple_hash *h;
-       enum ip_conntrack_info ctinfo;
-       struct nf_conntrack_zone tmp;
-
-       WARN_ON(skb_nfct(skb));
-
-       /* Are they talking about one of our connections? */
-       if (!nf_ct_get_tuplepr(skb,
-                              skb_network_offset(skb)
-                               + sizeof(struct ipv6hdr)
-                               + sizeof(struct icmp6hdr),
-                              PF_INET6, net, &origtuple)) {
-               pr_debug("icmpv6_error: Can't get tuple\n");
-               return -NF_ACCEPT;
-       }
-
-       /* Ordinarily, we'd expect the inverted tupleproto, but it's
-          been preserved inside the ICMP. */
-       if (!nf_ct_invert_tuple(&intuple, &origtuple)) {
-               pr_debug("icmpv6_error: Can't invert tuple\n");
-               return -NF_ACCEPT;
-       }
-
-       ctinfo = IP_CT_RELATED;
-
-       h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
-                                 &intuple);
-       if (!h) {
-               pr_debug("icmpv6_error: no match\n");
-               return -NF_ACCEPT;
-       } else {
-               if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
-                       ctinfo += IP_CT_IS_REPLY;
-       }
-
-       /* Update skb to refer to this connection */
-       nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
-       return NF_ACCEPT;
-}
  
  static void icmpv6_error_log(const struct sk_buff *skb,
                              const struct nf_hook_state *state,
@@ -182,6 +137,7 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
                               unsigned int dataoff,
                               const struct nf_hook_state *state)
  {
+       union nf_inet_addr outer_daddr;
         const struct icmp6hdr *icmp6h;
         struct icmp6hdr _ih;
         int type;
@@ -210,7 +166,11 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
         if (icmp6h->icmp6_type >= 128)
                 return NF_ACCEPT;
  
-       return icmpv6_error_message(state->net, tmpl, skb, dataoff);
+       memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
+              sizeof(outer_daddr.ip6));
+       dataoff += sizeof(*icmp6h);
+       return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+                                      IPPROTO_ICMPV6, &outer_daddr);
  }
  
  #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c

index af7dc65377584d26f4b5d98ef55dd06f93d8107d..000952719adfdf49bf35a53dd800c6cecf45c14f 100644 (file)
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -415,9 +415,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
         case IPPROTO_ICMPV6:
                 /* id is same for either direction... */
                 keyptr = &tuple->src.u.icmp.id;
-               min = range->min_proto.icmp.id;
-               range_size = ntohs(range->max_proto.icmp.id) -
-                            ntohs(range->min_proto.icmp.id) + 1;
+               if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+                       min = 0;
+                       range_size = 65536;
+               } else {
+                       min = ntohs(range->min_proto.icmp.id);
+                       range_size = ntohs(range->max_proto.icmp.id) -
+                                    ntohs(range->min_proto.icmp.id) + 1;
+               }
                 goto find_free_id;
  #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
         case IPPROTO_GRE:
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index ef7772e976cc802afc64ea25d28f1fbecde773be..1606eaa5ae0da368f4a692264456e18dfe27ec8d 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1545,7 +1545,7 @@ static int nft_chain_parse_hook(struct net *net,
                 if (IS_ERR(type))
                         return PTR_ERR(type);
         }
-       if (!(type->hook_mask & (1 << hook->num)))
+       if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
                 return -EOPNOTSUPP;
  
         if (type->type == NFT_CHAIN_T_NAT &&
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c

index b1f9c5303f026a14c799d03b579b9e7b577b6dc8..0b3347570265c4edc1b176f450bf920a0b81e5d4 100644 (file)
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -540,7 +540,7 @@ __build_packet_message(struct nfnl_log_net *log,
                         goto nla_put_failure;
         }
  
-       if (skb->tstamp) {
+       if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
                 struct nfulnl_msg_packet_timestamp ts;
                 struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
                 ts.sec = cpu_to_be64(kts.tv_sec);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

index 0dcc3592d053ff41f7d8e25119d1a2fd7a90c74a..e057b2961d313cd426f2f2d37ed7e1a40c101174 100644 (file)
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -582,7 +582,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
         if (nfqnl_put_bridge(entry, skb) < 0)
                 goto nla_put_failure;
  
-       if (entskb->tstamp) {
+       if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) {
                 struct nfqnl_msg_packet_timestamp ts;
                 struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
  
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c

index c13bcd0ab491304da6ddcaa3f59aefa17ab5eacc..8dbb4d48f2ed5995dedaa8eb4f4b18a0ba91acb2 100644 (file)
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -163,19 +163,24 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
         s64 stamp;
  
         /*
-        * We cannot use get_seconds() instead of __net_timestamp() here.
+        * We need real time here, but we can neither use skb->tstamp
+        * nor __net_timestamp().
+        *
+        * skb->tstamp and skb->skb_mstamp_ns overlap, however, they
+        * use different clock types (real vs monotonic).
+        *
          * Suppose you have two rules:
-        *      1. match before 13:00
-        *      2. match after 13:00
+        *      1. match before 13:00
+        *      2. match after 13:00
+        *
          * If you match against processing time (get_seconds) it
          * may happen that the same packet matches both rules if
-        * it arrived at the right moment before 13:00.
+        * it arrived at the right moment before 13:00, so it would be
+        * better to check skb->tstamp and set it via __net_timestamp()
+        * if needed.  This however breaks outgoing packets tx timestamp,
+        * and causes them to get delayed forever by fq packet scheduler.
          */
-       if (skb->tstamp == 0)
-               __net_timestamp((struct sk_buff *)skb);
-
-       stamp = ktime_to_ns(skb->tstamp);
-       stamp = div_s64(stamp, NSEC_PER_SEC);
+       stamp = get_seconds();
  
         if (info->flags & XT_TIME_LOCAL_TZ)
                 /* Adjust for local timezone */
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c

index 31cf37da4510c3b53377ea40d4880638a89775e5..93c0437e6a5fd284b3e6dd1283e31839a305be7b 100644 (file)
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
         else
                 pool = rds_ibdev->mr_1m_pool;
  
+       if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
+               queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
+
+       /* Switch pools if one of the pool is reaching upper limit */
+       if (atomic_read(&pool->dirty_count) >=  pool->max_items * 9 / 10) {
+               if (pool->pool_type == RDS_IB_MR_8K_POOL)
+                       pool = rds_ibdev->mr_1m_pool;
+               else
+                       pool = rds_ibdev->mr_8k_pool;
+       }
+
         ibmr = rds_ib_try_reuse_ibmr(pool);
         if (ibmr)
                 return ibmr;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c

index 63c8d107adcfbec096b3dbcead8de98ec6327bc1..d664e9ade74dea264c06e0ac03997ebdc0254235 100644 (file)
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -454,9 +454,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
         struct rds_ib_mr *ibmr = NULL;
         int iter = 0;
  
-       if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
-               queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
-
         while (1) {
                 ibmr = rds_ib_reuse_mr(pool);
                 if (ibmr)
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c

index 7af4f99c4a9321bb3eef8d77f7e2dedf981e19f3..094a6621f8e803ae41101899ef02f080d91ac0f3 100644 (file)
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -16,6 +16,7 @@
  #include <linux/init.h>
  
  static struct sk_buff_head loopback_queue;
+#define ROSE_LOOPBACK_LIMIT 1000
  static struct timer_list loopback_timer;
  
  static void rose_set_loopback_timer(void);
@@ -35,29 +36,27 @@ static int rose_loopback_running(void)
  
  int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
  {
-       struct sk_buff *skbn;
+       struct sk_buff *skbn = NULL;
  
-       skbn = skb_clone(skb, GFP_ATOMIC);
+       if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT)
+               skbn = skb_clone(skb, GFP_ATOMIC);
  
-       kfree_skb(skb);
-
-       if (skbn != NULL) {
+       if (skbn) {
+               consume_skb(skb);
                 skb_queue_tail(&loopback_queue, skbn);
  
                 if (!rose_loopback_running())
                         rose_set_loopback_timer();
+       } else {
+               kfree_skb(skb);
         }
  
         return 1;
  }
  
-
  static void rose_set_loopback_timer(void)
  {
-       del_timer(&loopback_timer);
-
-       loopback_timer.expires  = jiffies + 10;
-       add_timer(&loopback_timer);
+       mod_timer(&loopback_timer, jiffies + 10);
  }
  
  static void rose_loopback_timer(struct timer_list *unused)
@@ -68,8 +67,12 @@ static void rose_loopback_timer(struct timer_list *unused)
         struct sock *sk;
         unsigned short frametype;
         unsigned int lci_i, lci_o;
+       int count;
  
-       while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+       for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) {
+               skb = skb_dequeue(&loopback_queue);
+               if (!skb)
+                       return;
                 if (skb->len < ROSE_MIN_LEN) {
                         kfree_skb(skb);
                         continue;
@@ -106,6 +109,8 @@ static void rose_loopback_timer(struct timer_list *unused)
                         kfree_skb(skb);
                 }
         }
+       if (!skb_queue_empty(&loopback_queue))
+               mod_timer(&loopback_timer, jiffies + 1);
  }
  
  void __exit rose_loopback_clear(void)
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c

index 4c6f9d0a00e79e1874f6ff6ceb6632a42c5072ff..c2c35cf4e3089038bcc73663f0a0d3ccf24b9743 100644 (file)
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1161,19 +1161,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
   * handle data received on the local endpoint
   * - may be called in interrupt context
   *
- * The socket is locked by the caller and this prevents the socket from being
- * shut down and the local endpoint from going away, thus sk_user_data will not
- * be cleared until this function returns.
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
   *
   * Called with the RCU read lock held from the IP layer via UDP.
   */
  int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
  {
+       struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
         struct rxrpc_connection *conn;
         struct rxrpc_channel *chan;
         struct rxrpc_call *call = NULL;
         struct rxrpc_skb_priv *sp;
-       struct rxrpc_local *local = udp_sk->sk_user_data;
         struct rxrpc_peer *peer = NULL;
         struct rxrpc_sock *rx = NULL;
         unsigned int channel;
@@ -1181,6 +1181,10 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
  
         _enter("%p", udp_sk);
  
+       if (unlikely(!local)) {
+               kfree_skb(skb);
+               return 0;
+       }
         if (skb->tstamp == 0)
                 skb->tstamp = ktime_get_real();
  
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c

index 15cf42d5b53a56d8d19cabdc8c2b55156d73d28a..01959db51445ca00e6044d8a849e698c4ab17a33 100644 (file)
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -304,7 +304,8 @@ nomem:
         ret = -ENOMEM;
  sock_error:
         mutex_unlock(&rxnet->local_mutex);
-       kfree(local);
+       if (local)
+               call_rcu(&local->rcu, rxrpc_local_rcu);
         _leave(" = %d", ret);
         return ERR_PTR(ret);
  
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c

index 12bb23b8e0c50c80abd51c69a5c5a2ea6433a723..261131dfa1f1ba3900d85088a6cfde659bbe231a 100644 (file)
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,6 +54,7 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
         h->last_refresh = now;
  }
  
+static inline int cache_is_valid(struct cache_head *h);
  static void cache_fresh_locked(struct cache_head *head, time_t expiry,
                                 struct cache_detail *detail);
  static void cache_fresh_unlocked(struct cache_head *head,
@@ -105,6 +106,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
                         if (cache_is_expired(detail, tmp)) {
                                 hlist_del_init_rcu(&tmp->cache_list);
                                 detail->entries --;
+                               if (cache_is_valid(tmp) == -EAGAIN)
+                                       set_bit(CACHE_NEGATIVE, &tmp->flags);
                                 cache_fresh_locked(tmp, 0, detail);
                                 freeme = tmp;
                                 break;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c

index 9f3bdbc1e59348cf049c0cca9959ee5f413e41cb..cc0256939eb63e2afef7dddb33f57583d1387cc3 100644 (file)
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -904,7 +904,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
         goto release_netdev;
  
  free_sw_resources:
+       up_read(&device_offload_lock);
         tls_sw_free_resources_rx(sk);
+       down_read(&device_offload_lock);
  release_ctx:
         ctx->priv_ctx_rx = NULL;
  release_netdev:
@@ -939,8 +941,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
         }
  out:
         up_read(&device_offload_lock);
-       kfree(tls_ctx->rx.rec_seq);
-       kfree(tls_ctx->rx.iv);
         tls_sw_release_resources_rx(sk);
  }
  
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c

index 54c3a758f2a7d9bf32f9ab13c7dbe1389e54ecc3..a3ebd4b02714c9ea46c2622e3b8742d637a2541c 100644 (file)
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -194,6 +194,9 @@ static void update_chksum(struct sk_buff *skb, int headln)
  
  static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
  {
+       struct sock *sk = skb->sk;
+       int delta;
+
         skb_copy_header(nskb, skb);
  
         skb_put(nskb, skb->len);
@@ -201,11 +204,15 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
         update_chksum(nskb, headln);
  
         nskb->destructor = skb->destructor;
-       nskb->sk = skb->sk;
+       nskb->sk = sk;
         skb->destructor = NULL;
         skb->sk = NULL;
-       refcount_add(nskb->truesize - skb->truesize,
-                    &nskb->sk->sk_wmem_alloc);
+
+       delta = nskb->truesize - skb->truesize;
+       if (likely(delta < 0))
+               WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
+       else if (delta)
+               refcount_add(delta, &sk->sk_wmem_alloc);
  }
  
  /* This function may be called after the user socket is already
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c

index 9547cea0ce3b078b4ca79fec7b24232b046b74c6..478603f43964d557146ae141ba45d4b0cae538fd 100644 (file)
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -293,11 +293,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
  #endif
         }
  
-       if (ctx->rx_conf == TLS_SW) {
-               kfree(ctx->rx.rec_seq);
-               kfree(ctx->rx.iv);
+       if (ctx->rx_conf == TLS_SW)
                 tls_sw_free_resources_rx(sk);
-       }
  
  #ifdef CONFIG_TLS_DEVICE
         if (ctx->rx_conf == TLS_HW)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c

index b50ced862f6f9a9f9d959950dcee4adbdd59cb6e..29d6af43dd249dd72c175ac7401a308ef6193c2c 100644 (file)
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2078,6 +2078,9 @@ void tls_sw_release_resources_rx(struct sock *sk)
         struct tls_context *tls_ctx = tls_get_ctx(sk);
         struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
  
+       kfree(tls_ctx->rx.rec_seq);
+       kfree(tls_ctx->rx.iv);
+
         if (ctx->aead_recv) {
                 kfree_skb(ctx->recv_pkt);
                 ctx->recv_pkt = NULL;
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests

index 2dc95fda7ef76e7b723fb91d9a68f44bcb6a2897..ea5938ec009a5eb9e28cb1778e081a568e66fd65 100755 (executable)
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -6,12 +6,14 @@ if [ $(id -u) != 0 ]; then
         exit 0
  fi
  
+ret=0
  echo "--------------------"
  echo "running psock_fanout test"
  echo "--------------------"
  ./in_netns.sh ./psock_fanout
  if [ $? -ne 0 ]; then
         echo "[FAIL]"
+       ret=1
  else
         echo "[PASS]"
  fi
@@ -22,6 +24,7 @@ echo "--------------------"
  ./in_netns.sh ./psock_tpacket
  if [ $? -ne 0 ]; then
         echo "[FAIL]"
+       ret=1
  else
         echo "[PASS]"
  fi
@@ -32,6 +35,8 @@ echo "--------------------"
  ./in_netns.sh ./txring_overwrite
  if [ $? -ne 0 ]; then
         echo "[FAIL]"
+       ret=1
  else
         echo "[PASS]"
  fi
+exit $ret
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests

index b093f39c298c3f4d7ee43eed7c58772860f55da4..14e41faf2c5740633f9dd30e500543647b736734 100755 (executable)
--- a/tools/testing/selftests/net/run_netsocktests
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -7,7 +7,7 @@ echo "--------------------"
  ./socket
  if [ $? -ne 0 ]; then
         echo "[FAIL]"
+       exit 1
  else
         echo "[PASS]"
  fi
-
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile

index c9ff2b47bd1ca3a2f70ee0683cb2b79b170c74f5..a37cb1192c6a6bc6080c829b63768e6ba52f8dd1 100644 (file)
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,6 +1,6 @@
  # SPDX-License-Identifier: GPL-2.0
  # Makefile for netfilter selftests
  
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
  
  include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh

new file mode 100755 (executable)

index 0000000..b48e183
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without nft tool"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+cleanup() {
+       for i in 1 2;do ip netns del nsclient$i;done
+       for i in 1 2;do ip netns del nsrouter$i;done
+}
+
+ipv4() {
+    echo -n 192.168.$1.2
+}
+
+ipv6 () {
+    echo -n dead:$1::2
+}
+
+check_counter()
+{
+       ns=$1
+       name=$2
+       expect=$3
+       local lret=0
+
+       cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
+       if [ $? -ne 0 ]; then
+               echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+               ip netns exec $ns nft list counter inet filter "$name" 1>&2
+               lret=1
+       fi
+
+       return $lret
+}
+
+check_unknown()
+{
+       expect="packets 0 bytes 0"
+       for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+               check_counter $n "unknown" "$expect"
+               if [ $? -ne 0 ] ;then
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+  ip netns add $n
+  ip -net $n link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
+DEV=veth0
+ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+
+DEV=veth0
+ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+
+DEV=veth0
+for i in 1 2; do
+    ip -net nsclient$i link set $DEV up
+    ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
+    ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
+done
+
+ip -net nsrouter1 link set eth1 up
+ip -net nsrouter1 link set veth0 up
+
+ip -net nsrouter2 link set eth1 up
+ip -net nsrouter2 link set eth2 up
+
+ip -net nsclient1 route add default via 192.168.1.1
+ip -net nsclient1 -6 route add default via dead:1::1
+
+ip -net nsclient2 route add default via 192.168.2.1
+ip -net nsclient2 route add default via dead:2::1
+
+i=3
+ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
+ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
+ip -net nsrouter1 addr add dead:1::1/64 dev eth1
+ip -net nsrouter1 addr add dead:3::1/64 dev veth0
+ip -net nsrouter1 route add default via 192.168.3.10
+ip -net nsrouter1 -6 route add default via dead:3::10
+
+ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
+ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
+ip -net nsrouter2 addr add dead:2::1/64 dev eth1
+ip -net nsrouter2 addr add dead:3::10/64 dev eth2
+ip -net nsrouter2 route add default via 192.168.3.1
+ip -net nsrouter2 route add default via dead:3::1
+
+sleep 2
+for i in 4 6; do
+       ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
+       ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in nsrouter1 nsrouter2; do
+ip netns exec $netns nft -f - <<EOF
+table inet filter {
+       counter unknown { }
+       counter related { }
+       chain forward {
+               type filter hook forward priority 0; policy accept;
+               meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+               meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+               meta l4proto { icmp, icmpv6 } ct state new,established accept
+               counter name "unknown" drop
+       }
+}
+EOF
+done
+
+ip netns exec nsclient1 nft -f - <<EOF
+table inet filter {
+       counter unknown { }
+       counter related { }
+       chain input {
+               type filter hook input priority 0; policy accept;
+               meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+               meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+               counter name "unknown" drop
+       }
+}
+EOF
+
+ip netns exec nsclient2 nft -f - <<EOF
+table inet filter {
+       counter unknown { }
+       counter new { }
+       counter established { }
+
+       chain input {
+               type filter hook input priority 0; policy accept;
+               meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+               meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+               meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+               counter name "unknown" drop
+       }
+       chain output {
+               type filter hook output priority 0; policy accept;
+               meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+               meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+               meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+               counter name "unknown" drop
+       }
+}
+EOF
+
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec nsrouter1 nft -f - <<EOF
+table ip nat {
+       chain postrouting {
+               type nat hook postrouting priority 0; policy accept;
+               ip protocol icmp oifname "veth0" counter masquerade
+       }
+}
+table ip6 nat {
+       chain postrouting {
+               type nat hook postrouting priority 0; policy accept;
+               ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+       }
+}
+EOF
+
+ip netns exec nsrouter2 ip link set eth1  mtu 1280
+ip netns exec nsclient2 ip link set veth0 mtu 1280
+sleep 1
+
+ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
+if [ $? -ne 0 ]; then
+       echo "ERROR: netns ip routing/connectivity broken" 1>&2
+       cleanup
+       exit 1
+fi
+ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
+if [ $? -ne 0 ]; then
+       echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+       cleanup
+       exit 1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+       ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in nsrouter1 nsrouter2 nsclient1;do
+       check_counter "$netns" "related" "$expect"
+       if [ $? -ne 0 ]; then
+               ret=1
+       fi
+done
+
+expect="packets 2 bytes 2076"
+check_counter nsclient2 "new" "$expect"
+if [ $? -ne 0 ]; then
+       ret=1
+fi
+
+ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
+if [ $? -eq 0 ]; then
+       echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+       ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+check_counter "nsrouter2" "related" "$expect"
+if [ $? -ne 0 ]; then
+       ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in nsrouter1 nsclient1;do
+       check_counter "$netns" "related" "$expect"
+       if [ $? -ne 0 ]; then
+               ret=1
+       fi
+done
+
+ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
+if [ $? -eq 0 ]; then
+       echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+       ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in nsrouter1 nsclient1;do
+       check_counter "$netns" "related" "$expect"
+       if [ $? -ne 0 ]; then
+               ret=1
+       fi
+done
+
+if [ $ret -eq 0 ];then
+       echo "PASS: icmp mtu error had RELATED state"
+else
+       echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+cleanup
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh

index 8ec76681605cca08f8cad14720ada2986d74f76c..3194007cf8d1bf3f456d9e4594417ffe2f9d56fd 100755 (executable)
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -321,6 +321,7 @@ EOF
  
  test_masquerade6()
  {
+       local natflags=$1
         local lret=0
  
         ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -354,13 +355,13 @@ ip netns exec ns0 nft -f - <<EOF
  table ip6 nat {
         chain postrouting {
                 type nat hook postrouting priority 0; policy accept;
-               meta oif veth0 masquerade
+               meta oif veth0 masquerade $natflags
         }
  }
  EOF
         ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
         if [ $? -ne 0 ] ; then
-               echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+               echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags"
                 lret=1
         fi
  
@@ -397,19 +398,26 @@ EOF
                 fi
         done
  
+       ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)"
+               lret=1
+       fi
+
         ip netns exec ns0 nft flush chain ip6 nat postrouting
         if [ $? -ne 0 ]; then
                 echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
                 lret=1
         fi
  
-       test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+       test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2"
  
         return $lret
  }
  
  test_masquerade()
  {
+       local natflags=$1
         local lret=0
  
         ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
@@ -417,7 +425,7 @@ test_masquerade()
  
         ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
         if [ $? -ne 0 ] ; then
-               echo "ERROR: canot ping ns1 from ns2"
+               echo "ERROR: cannot ping ns1 from ns2 $natflags"
                 lret=1
         fi
  
@@ -443,13 +451,13 @@ ip netns exec ns0 nft -f - <<EOF
  table ip nat {
         chain postrouting {
                 type nat hook postrouting priority 0; policy accept;
-               meta oif veth0 masquerade
+               meta oif veth0 masquerade $natflags
         }
  }
  EOF
         ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
         if [ $? -ne 0 ] ; then
-               echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+               echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags"
                 lret=1
         fi
  
@@ -485,13 +493,19 @@ EOF
                 fi
         done
  
+       ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)"
+               lret=1
+       fi
+
         ip netns exec ns0 nft flush chain ip nat postrouting
         if [ $? -ne 0 ]; then
                 echo "ERROR: Could not flush nat postrouting" 1>&2
                 lret=1
         fi
  
-       test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+       test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2"
  
         return $lret
  }
@@ -750,8 +764,12 @@ test_local_dnat
  test_local_dnat6
  
  reset_counters
-test_masquerade
-test_masquerade6
+test_masquerade ""
+test_masquerade6 ""
+
+reset_counters
+test_masquerade "fully-random"
+test_masquerade6 "fully-random"
  
  reset_counters
  test_redirect
author	Joerg Roedel <jroedel@suse.de>
	Tue, 7 May 2019 07:40:12 +0000 (09:40 +0200)
committer	Joerg Roedel <jroedel@suse.de>
	Tue, 7 May 2019 07:40:12 +0000 (09:40 +0200)
Documentation/devicetree/bindings/net/davinci_emac.txt		patch \| blob \| blame \| history
Documentation/devicetree/bindings/net/ethernet.txt		patch \| blob \| blame \| history
Documentation/devicetree/bindings/net/macb.txt		patch \| blob \| blame \| history
Documentation/networking/decnet.txt		patch \| blob \| blame \| history
Documentation/networking/ip-sysctl.txt		patch \| blob \| blame \| history
Documentation/sysctl/vm.txt		patch \| blob \| blame \| history
MAINTAINERS		patch \| blob \| blame \| history
Makefile		patch \| blob \| blame \| history
arch/alpha/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
arch/arm/Kconfig		patch \| blob \| blame \| history
arch/arm/Kconfig.debug		patch \| blob \| blame \| history
arch/arm/boot/compressed/head.S		patch \| blob \| blame \| history
arch/arm/kernel/head-nommu.S		patch \| blob \| blame \| history
arch/arm/tools/syscall.tbl		patch \| blob \| blame \| history
arch/arm64/include/asm/unistd.h		patch \| blob \| blame \| history
arch/arm64/include/asm/unistd32.h		patch \| blob \| blame \| history
arch/arm64/kernel/ftrace.c		patch \| blob \| blame \| history
arch/arm64/mm/init.c		patch \| blob \| blame \| history
arch/ia64/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
arch/m68k/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
arch/microblaze/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
arch/mips/ath79/setup.c		patch \| blob \| blame \| history
arch/mips/kernel/scall64-o32.S		patch \| blob \| blame \| history
arch/mips/kernel/syscalls/syscall_n32.tbl		patch \| blob \| blame \| history
arch/mips/kernel/syscalls/syscall_n64.tbl		patch \| blob \| blame \| history
arch/mips/kernel/syscalls/syscall_o32.tbl		patch \| blob \| blame \| history
arch/parisc/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
arch/powerpc/configs/skiroot_defconfig		patch \| blob \| blame \| history
arch/powerpc/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
arch/powerpc/mm/mmu_context_iommu.c		patch \| blob \| blame \| history
arch/powerpc/platforms/Kconfig.cputype		patch \| blob \| blame \| history
arch/s390/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
arch/sh/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
arch/sparc/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
arch/x86/boot/compressed/misc.c		patch \| blob \| blame \| history
arch/x86/events/intel/cstate.c		patch \| blob \| blame \| history
arch/x86/mm/init.c		patch \| blob \| blame \| history
arch/xtensa/kernel/syscalls/syscall.tbl		patch \| blob \| blame \| history
crypto/lrw.c		patch \| blob \| blame \| history
crypto/xts.c		patch \| blob \| blame \| history
drivers/acpi/arm64/iort.c		patch \| blob \| blame \| history
drivers/atm/firestream.c		patch \| blob \| blame \| history
drivers/block/zram/zram_drv.c		patch \| blob \| blame \| history
drivers/dma/bcm2835-dma.c		patch \| blob \| blame \| history
drivers/dma/mediatek/mtk-cqdma.c		patch \| blob \| blame \| history
drivers/dma/sh/rcar-dmac.c		patch \| blob \| blame \| history
drivers/gpio/gpio-eic-sprd.c		patch \| blob \| blame \| history
drivers/gpio/gpiolib.c		patch \| blob \| blame \| history
drivers/gpu/drm/bridge/synopsys/dw-hdmi.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/intel_ddi.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/intel_dp.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/intel_fbdev.c		patch \| blob \| blame \| history
drivers/gpu/drm/imx/ipuv3-crtc.c		patch \| blob \| blame \| history
drivers/gpu/drm/scheduler/sched_main.c		patch \| blob \| blame \| history
drivers/gpu/drm/sun4i/sun4i_drv.c		patch \| blob \| blame \| history
drivers/gpu/drm/ttm/ttm_bo.c		patch \| blob \| blame \| history
drivers/gpu/drm/ttm/ttm_memory.c		patch \| blob \| blame \| history
drivers/gpu/drm/vc4/vc4_crtc.c		patch \| blob \| blame \| history
drivers/gpu/drm/virtio/virtgpu_drv.c		patch \| blob \| blame \| history
drivers/gpu/drm/virtio/virtgpu_drv.h		patch \| blob \| blame \| history
drivers/gpu/drm/virtio/virtgpu_prime.c		patch \| blob \| blame \| history
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c		patch \| blob \| blame \| history
drivers/gpu/ipu-v3/ipu-dp.c		patch \| blob \| blame \| history
drivers/infiniband/core/uverbs.h		patch \| blob \| blame \| history
drivers/infiniband/core/uverbs_main.c		patch \| blob \| blame \| history
drivers/infiniband/hw/hns/hns_roce_qp.c		patch \| blob \| blame \| history
drivers/infiniband/hw/mlx5/main.c		patch \| blob \| blame \| history
drivers/infiniband/hw/mlx5/qp.c		patch \| blob \| blame \| history
drivers/infiniband/sw/rdmavt/mr.c		patch \| blob \| blame \| history
drivers/input/keyboard/Kconfig		patch \| blob \| blame \| history
drivers/input/rmi4/rmi_driver.c		patch \| blob \| blame \| history
drivers/input/rmi4/rmi_f11.c		patch \| blob \| blame \| history
drivers/iommu/Kconfig		patch \| blob \| blame \| history
drivers/iommu/amd_iommu.c		patch \| blob \| blame \| history
drivers/iommu/amd_iommu_init.c		patch \| blob \| blame \| history
drivers/iommu/amd_iommu_types.h		patch \| blob \| blame \| history
drivers/iommu/arm-smmu-regs.h		patch \| blob \| blame \| history
drivers/iommu/arm-smmu-v3.c		patch \| blob \| blame \| history
drivers/iommu/arm-smmu.c		patch \| blob \| blame \| history
drivers/iommu/dmar.c		patch \| blob \| blame \| history
drivers/iommu/intel-iommu.c		patch \| blob \| blame \| history
drivers/iommu/intel-pasid.c		patch \| blob \| blame \| history
drivers/iommu/intel-svm.c		patch \| blob \| blame \| history
drivers/iommu/intel_irq_remapping.c		patch \| blob \| blame \| history
drivers/iommu/iommu.c		patch \| blob \| blame \| history
drivers/iommu/tegra-smmu.c		patch \| blob \| blame \| history
drivers/irqchip/irq-ath79-misc.c		patch \| blob \| blame \| history
drivers/net/ethernet/atheros/atlx/atl1.c		patch \| blob \| blame \| history
drivers/net/ethernet/atheros/atlx/atl1.h		patch \| blob \| blame \| history
drivers/net/ethernet/atheros/atlx/atl2.c		patch \| blob \| blame \| history
drivers/net/ethernet/atheros/atlx/atl2.h		patch \| blob \| blame \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c		patch \| blob \| blame \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h		patch \| blob \| blame \| history
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c		patch \| blob \| blame \| history
drivers/net/ethernet/mellanox/mlx5/core/en_main.c		patch \| blob \| blame \| history
drivers/net/ethernet/mellanox/mlx5/core/port.c		patch \| blob \| blame \| history
drivers/net/ethernet/mellanox/mlxsw/pci_hw.h		patch \| blob \| blame \| history
drivers/net/ethernet/mellanox/mlxsw/spectrum.c		patch \| blob \| blame \| history
drivers/net/ethernet/netronome/nfp/abm/cls.c		patch \| blob \| blame \| history
drivers/net/ethernet/socionext/netsec.c		patch \| blob \| blame \| history
drivers/net/ethernet/stmicro/stmmac/norm_desc.c		patch \| blob \| blame \| history
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c		patch \| blob \| blame \| history
drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c		patch \| blob \| blame \| history
drivers/net/phy/spi_ks8995.c		patch \| blob \| blame \| history
drivers/net/slip/slhc.c		patch \| blob \| blame \| history
drivers/net/team/team.c		patch \| blob \| blame \| history
drivers/net/vrf.c		patch \| blob \| blame \| history
drivers/nfc/st95hf/core.c		patch \| blob \| blame \| history
drivers/of/of_net.c		patch \| blob \| blame \| history
drivers/s390/net/ctcm_main.c		patch \| blob \| blame \| history
drivers/vfio/mdev/mdev_core.c		patch \| blob \| blame \| history
drivers/vfio/mdev/mdev_private.h		patch \| blob \| blame \| history
drivers/vfio/vfio_iommu_type1.c		patch \| blob \| blame \| history
fs/btrfs/file-item.c		patch \| blob \| blame \| history
fs/btrfs/ordered-data.c		patch \| blob \| blame \| history
fs/ceph/dir.c		patch \| blob \| blame \| history
fs/ceph/inode.c		patch \| blob \| blame \| history
fs/ceph/mds_client.c		patch \| blob \| blame \| history
fs/ceph/snap.c		patch \| blob \| blame \| history
fs/cifs/file.c		patch \| blob \| blame \| history
fs/cifs/inode.c		patch \| blob \| blame \| history
fs/cifs/misc.c		patch \| blob \| blame \| history
fs/cifs/smb2pdu.c		patch \| blob \| blame \| history
fs/io_uring.c		patch \| blob \| blame \| history
fs/nfsd/nfs3proc.c		patch \| blob \| blame \| history
fs/nfsd/nfs3xdr.c		patch \| blob \| blame \| history
fs/nfsd/nfs4callback.c		patch \| blob \| blame \| history
fs/nfsd/nfs4state.c		patch \| blob \| blame \| history
fs/nfsd/state.h		patch \| blob \| blame \| history
fs/proc/proc_sysctl.c		patch \| blob \| blame \| history
fs/splice.c		patch \| blob \| blame \| history
include/drm/ttm/ttm_bo_driver.h		patch \| blob \| blame \| history
include/linux/etherdevice.h		patch \| blob \| blame \| history
include/linux/intel-iommu.h		patch \| blob \| blame \| history
include/linux/iommu.h		patch \| blob \| blame \| history
include/linux/iova.h		patch \| blob \| blame \| history
include/linux/mdev.h		patch \| blob \| blame \| history
include/linux/pci.h		patch \| blob \| blame \| history
include/linux/pipe_fs_i.h		patch \| blob \| blame \| history
include/net/netfilter/nf_conntrack.h		patch \| blob \| blame \| history
include/net/netfilter/nf_conntrack_l4proto.h		patch \| blob \| blame \| history
include/uapi/rdma/mlx5-abi.h		patch \| blob \| blame \| history
kernel/sched/fair.c		patch \| blob \| blame \| history
kernel/trace/ring_buffer.c		patch \| blob \| blame \| history
kernel/trace/trace.c		patch \| blob \| blame \| history
lib/Kconfig.debug		patch \| blob \| blame \| history
lib/test_vmalloc.c		patch \| blob \| blame \| history
mm/memory_hotplug.c		patch \| blob \| blame \| history
mm/page_alloc.c		patch \| blob \| blame \| history
net/bridge/netfilter/ebtables.c		patch \| blob \| blame \| history
net/ipv4/route.c		patch \| blob \| blame \| history
net/ipv4/sysctl_net_ipv4.c		patch \| blob \| blame \| history
net/ipv6/addrlabel.c		patch \| blob \| blame \| history
net/ncsi/ncsi-rsp.c		patch \| blob \| blame \| history
net/netfilter/ipvs/ip_vs_core.c		patch \| blob \| blame \| history
net/netfilter/nf_conntrack_core.c		patch \| blob \| blame \| history
net/netfilter/nf_conntrack_netlink.c		patch \| blob \| blame \| history
net/netfilter/nf_conntrack_proto.c		patch \| blob \| blame \| history
net/netfilter/nf_conntrack_proto_icmp.c		patch \| blob \| blame \| history
net/netfilter/nf_conntrack_proto_icmpv6.c		patch \| blob \| blame \| history
net/netfilter/nf_nat_core.c		patch \| blob \| blame \| history
net/netfilter/nf_tables_api.c		patch \| blob \| blame \| history
net/netfilter/nfnetlink_log.c		patch \| blob \| blame \| history
net/netfilter/nfnetlink_queue.c		patch \| blob \| blame \| history
net/netfilter/xt_time.c		patch \| blob \| blame \| history
net/rds/ib_fmr.c		patch \| blob \| blame \| history
net/rds/ib_rdma.c		patch \| blob \| blame \| history
net/rose/rose_loopback.c		patch \| blob \| blame \| history
net/rxrpc/input.c		patch \| blob \| blame \| history
net/rxrpc/local_object.c		patch \| blob \| blame \| history
net/sunrpc/cache.c		patch \| blob \| blame \| history
net/tls/tls_device.c		patch \| blob \| blame \| history
net/tls/tls_device_fallback.c		patch \| blob \| blame \| history
net/tls/tls_main.c		patch \| blob \| blame \| history
net/tls/tls_sw.c		patch \| blob \| blame \| history
tools/testing/selftests/net/run_afpackettests		patch \| blob \| blame \| history
tools/testing/selftests/net/run_netsocktests		patch \| blob \| blame \| history
tools/testing/selftests/netfilter/Makefile		patch \| blob \| blame \| history
tools/testing/selftests/netfilter/conntrack_icmp_related.sh	[new file with mode: 0755]	patch \| blob
tools/testing/selftests/netfilter/nft_nat.sh		patch \| blob \| blame \| history