Merge branch 'mm-rst' into docs-next

author Jonathan Corbet <corbet@lwn.net>

Mon, 16 Apr 2018 20:25:08 +0000 (14:25 -0600)

committer Jonathan Corbet <corbet@lwn.net>

Mon, 16 Apr 2018 20:25:08 +0000 (14:25 -0600)
author Jonathan Corbet <corbet@lwn.net>
Mon, 16 Apr 2018 20:25:08 +0000 (14:25 -0600)
committer Jonathan Corbet <corbet@lwn.net>
Mon, 16 Apr 2018 20:25:08 +0000 (14:25 -0600)
diff --combined Documentation/admin-guide/kernel-parameters.txt

index 11fc28ecdb6d9f2ea1ce28807caf59c96c93c164,5d6e5509c04901bcf5f8fa0d3bfed4bb73289462..3487be79847c26c676a22091c60ecd75657d89ad
--- 1/Documentation/admin-guide/kernel-parameters.txt
--- 2/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@@ -389,15 -389,15 +389,15 @@@
                         Use software keyboard repeat
   
         audit=          [KNL] Enable the audit sub-system
- -                      Format: { "0" | "1" } (0 = disabled, 1 = enabled)
- -                      0 - kernel audit is disabled and can not be enabled
- -                          until the next reboot
+ +                      Format: { "0" | "1" | "off" | "on" }
+ +                      0 | off - kernel audit is disabled and can not be
+ +                          enabled until the next reboot
                         unset - kernel audit is initialized but disabled and
                             will be fully enabled by the userspace auditd.
- -                      1 - kernel audit is initialized and partially enabled,
- -                          storing at most audit_backlog_limit messages in
- -                          RAM until it is fully enabled by the userspace
- -                          auditd.
+ +                      1 | on - kernel audit is initialized and partially
+ +                          enabled, storing at most audit_backlog_limit
+ +                          messages in RAM until it is fully enabled by the
+ +                          userspace auditd.
                         Default: unset
   
         audit_backlog_limit= [KNL] Set the audit queue size limit.
@@@ -1025,7 -1025,7 +1025,7 @@@
                         address. The serial port must already be setup
                         and configured. Options are not yet supported.
   
- -      earlyprintk=    [X86,SH,BLACKFIN,ARM,M68k,S390]
+ +      earlyprintk=    [X86,SH,ARM,M68k,S390]
                         earlyprintk=vga
                         earlyprintk=efi
                         earlyprintk=sclp
@@@ -1347,6 -1347,10 +1347,6 @@@
                                If specified, z/VM IUCV HVC accepts connections
                                from listed z/VM user IDs only.
   
- -      hwthread_map=   [METAG] Comma-separated list of Linux cpu id to
- -                              hardware thread id mappings.
- -                              Format: <cpu>:<hwthread>
- -
         keep_bootcon    [KNL]
                         Do not unregister boot console at start. This is only
                         useful for debugging when something happens in the window
@@@ -1521,8 -1525,7 +1521,8 @@@
   
         ima_policy=     [IMA]
                         The builtin policies to load during IMA setup.
- -                      Format: "tcb | appraise_tcb | secure_boot"
+ +                      Format: "tcb | appraise_tcb | secure_boot |
+ +                               fail_securely"
   
                         The "tcb" policy measures all programs exec'd, files
                         mmap'd for exec, and all files opened with the read
@@@ -1537,11 -1540,6 +1537,11 @@@
                         of files (eg. kexec kernel image, kernel modules,
                         firmware, policy, etc) based on file signatures.
   
+ +                      The "fail_securely" policy forces file signature
+ +                      verification failure also on privileged mounted
+ +                      filesystems with the SB_I_UNVERIFIABLE_SIGNATURE
+ +                      flag.
+ +
         ima_tcb         [IMA] Deprecated.  Use ima_policy= instead.
                         Load a policy which meets the needs of the Trusted
                         Computing Base.  This means IMA will measure all
@@@ -1745,14 -1743,6 +1745,14 @@@
                         of a GICv2 controller even if the memory range
                         exposed by the device tree is too small.
   
+ +      irqchip.gicv3_nolpi=
+ +                      [ARM, ARM64]
+ +                      Force the kernel to ignore the availability of
+ +                      LPIs (and by consequence ITSs). Intended for system
+ +                      that use the kernel as a bootloader, and thus want
+ +                      to let secondary kernels in charge of setting up
+ +                      LPIs.
+ +
         irqfixup        [HW]
                         When an interrupt is not handled search all handlers
                         for it. Intended to get systems with badly broken
@@@ -1776,17 -1766,6 +1776,17 @@@
   
                         nohz
                           Disable the tick when a single task runs.
+ +
+ +                        A residual 1Hz tick is offloaded to workqueues, which you
+ +                        need to affine to housekeeping through the global
+ +                        workqueue's affinity configured via the
+ +                        /sys/devices/virtual/workqueue/cpumask sysfs file, or
+ +                        by using the 'domain' flag described below.
+ +
+ +                        NOTE: by default the global workqueue runs on all CPUs,
+ +                        so to protect individual CPUs the 'cpumask' file has to
+ +                        be configured manually after bootup.
+ +
                         domain
                           Isolate from the general SMP balancing and scheduling
                           algorithms. Note that performing domain isolation this way
@@@ -1846,29 -1825,30 +1846,29 @@@
         keepinitrd      [HW,ARM]
   
         kernelcore=     [KNL,X86,IA-64,PPC]
- -                      Format: nn[KMGTPE] | "mirror"
- -                      This parameter
- -                      specifies the amount of memory usable by the kernel
- -                      for non-movable allocations.  The requested amount is
- -                      spread evenly throughout all nodes in the system. The
- -                      remaining memory in each node is used for Movable
- -                      pages. In the event, a node is too small to have both
- -                      kernelcore and Movable pages, kernelcore pages will
- -                      take priority and other nodes will have a larger number
- -                      of Movable pages.  The Movable zone is used for the
- -                      allocation of pages that may be reclaimed or moved
- -                      by the page migration subsystem.  This means that
- -                      HugeTLB pages may not be allocated from this zone.
- -                      Note that allocations like PTEs-from-HighMem still
- -                      use the HighMem zone if it exists, and the Normal
+ +                      Format: nn[KMGTPE] | nn% | "mirror"
+ +                      This parameter specifies the amount of memory usable by
+ +                      the kernel for non-movable allocations.  The requested
+ +                      amount is spread evenly throughout all nodes in the
+ +                      system as ZONE_NORMAL.  The remaining memory is used for
+ +                      movable memory in its own zone, ZONE_MOVABLE.  In the
+ +                      event, a node is too small to have both ZONE_NORMAL and
+ +                      ZONE_MOVABLE, kernelcore memory will take priority and
+ +                      other nodes will have a larger ZONE_MOVABLE.
+ +
+ +                      ZONE_MOVABLE is used for the allocation of pages that
+ +                      may be reclaimed or moved by the page migration
+ +                      subsystem.  Note that allocations like PTEs-from-HighMem
+ +                      still use the HighMem zone if it exists, and the Normal
                         zone if it does not.
   
- -                      Instead of specifying the amount of memory (nn[KMGTPE]),
- -                      you can specify "mirror" option. In case "mirror"
+ +                      It is possible to specify the exact amount of memory in
+ +                      the form of "nn[KMGTPE]", a percentage of total system
+ +                      memory in the form of "nn%", or "mirror".  If "mirror"
                         option is specified, mirrored (reliable) memory is used
                         for non-movable allocations and remaining memory is used
- -                      for Movable pages. nn[KMGTPE] and "mirror" are exclusive,
- -                      so you can NOT specify nn[KMGTPE] and "mirror" at the same
- -                      time.
+ +                      for Movable pages.  "nn[KMGTPE]", "nn%", and "mirror"
+ +                      are exclusive, so you cannot specify multiple forms.
   
         kgdbdbgp=       [KGDB,HW] kgdb over EHCI usb debug port.
                         Format: <Controller#>[,poll interval]
@@@ -1907,9 -1887,6 +1907,9 @@@
         kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
                         Default is 0 (don't ignore, but inject #GP)
   
+ +      kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
+ +                                 Default is false (don't support).
+ +
         kvm.mmu_audit=  [KVM] This is a R/W parameter which allows audit
                         KVM MMU at runtime.
                         Default is 0 (off)
@@@ -2260,15 -2237,6 +2260,15 @@@
                         The memory region may be marked as e820 type 12 (0xc)
                         and is NVDIMM or ADR memory.
   
+ +      memmap=<size>%<offset>-<oldtype>+<newtype>
+ +                      [KNL,ACPI] Convert memory within the specified region
+ +                      from <oldtype> to <newtype>. If "-<oldtype>" is left
+ +                      out, the whole region will be marked as <newtype>,
+ +                      even if previously unavailable. If "+<newtype>" is left
+ +                      out, matching memory will be removed. Types are
+ +                      specified as e820 types, e.g., 1 = RAM, 2 = reserved,
+ +                      3 = ACPI, 12 = PRAM.
+ +
         memory_corruption_check=0/1 [X86]
                         Some BIOSes seem to corrupt the first 64k of
                         memory when doing things like suspend/resume.
@@@ -2385,14 -2353,13 +2385,14 @@@
         mousedev.yres=  [MOUSE] Vertical screen resolution, used for devices
                         reporting absolute coordinates, such as tablets
   
- -      movablecore=nn[KMG]     [KNL,X86,IA-64,PPC] This parameter
- -                      is similar to kernelcore except it specifies the
- -                      amount of memory used for migratable allocations.
- -                      If both kernelcore and movablecore is specified,
- -                      then kernelcore will be at *least* the specified
- -                      value but may be more. If movablecore on its own
- -                      is specified, the administrator must be careful
+ +      movablecore=    [KNL,X86,IA-64,PPC]
+ +                      Format: nn[KMGTPE] | nn%
+ +                      This parameter is the complement to kernelcore=, it
+ +                      specifies the amount of memory used for migratable
+ +                      allocations.  If both kernelcore and movablecore is
+ +                      specified, then kernelcore will be at *least* the
+ +                      specified value but may be more.  If movablecore on its
+ +                      own is specified, the administrator must be careful
                         that the amount of memory usable for all allocations
                         is not too small.
   
@@@ -3163,13 -3130,18 +3163,13 @@@
                 force   Enable ASPM even on devices that claim not to support it.
                         WARNING: Forcing ASPM on may cause system lockups.
   
- -      pcie_hp=        [PCIE] PCI Express Hotplug driver options:
- -              nomsi   Do not use MSI for PCI Express Native Hotplug (this
- -                      makes all PCIe ports use INTx for hotplug services).
- -
- -      pcie_ports=     [PCIE] PCIe ports handling:
- -              auto    Ask the BIOS whether or not to use native PCIe services
- -                      associated with PCIe ports (PME, hot-plug, AER).  Use
- -                      them only if that is allowed by the BIOS.
- -              native  Use native PCIe services associated with PCIe ports
- -                      unconditionally.
- -              compat  Treat PCIe ports as PCI-to-PCI bridges, disable the PCIe
- -                      ports driver.
+ +      pcie_ports=     [PCIE] PCIe port services handling:
+ +              native  Use native PCIe services (PME, AER, DPC, PCIe hotplug)
+ +                      even if the platform doesn't give the OS permission to
+ +                      use them.  This may cause conflicts if the platform
+ +                      also tries to use these services.
+ +              compat  Disable native PCIe services (PME, AER, DPC, PCIe
+ +                      hotplug).
   
         pcie_port_pm=   [PCIE] PCIe port power management handling:
                 off     Disable power management of all PCIe ports
@@@ -3915,7 -3887,7 +3915,7 @@@
                         cache (risks via metadata attacks are mostly
                         unchanged). Debug options disable merging on their
                         own.
-                       For more information see Documentation/vm/slub.txt.
+                       For more information see Documentation/vm/slub.rst.
   
         slab_max_order= [MM, SLAB]
                         Determines the maximum allowed order for slabs.
@@@ -3929,7 -3901,7 +3929,7 @@@
                         slub_debug can create guard zones around objects and
                         may poison objects when not in use. Also tracks the
                         last alloc / free. For more information see
-                       Documentation/vm/slub.txt.
+                       Documentation/vm/slub.rst.
   
         slub_memcg_sysfs=       [MM, SLUB]
                         Determines whether to enable sysfs directories for
@@@ -3943,7 -3915,7 +3943,7 @@@
                         Determines the maximum allowed order for slabs.
                         A high setting may cause OOMs due to memory
                         fragmentation. For more information see
-                       Documentation/vm/slub.txt.
+                       Documentation/vm/slub.rst.
   
         slub_min_objects=       [MM, SLUB]
                         The minimum number of objects per slab. SLUB will
@@@ -3952,12 -3924,12 +3952,12 @@@
                         the number of objects indicated. The higher the number
                         of objects the smaller the overhead of tracking slabs
                         and the less frequently locks need to be acquired.
-                       For more information see Documentation/vm/slub.txt.
+                       For more information see Documentation/vm/slub.rst.
   
         slub_min_order= [MM, SLUB]
                         Determines the minimum page order for slabs. Must be
                         lower than slub_max_order.
-                       For more information see Documentation/vm/slub.txt.
+                       For more information see Documentation/vm/slub.rst.
   
         slub_nomerge    [MM, SLUB]
                         Same with slab_nomerge. This is supported for legacy.
@@@ -4313,7 -4285,7 +4313,7 @@@
                         Format: [always|madvise|never]
                         Can be used to control the default behavior of the system
                         with respect to transparent hugepages.
-                       See Documentation/vm/transhuge.txt for more details.
+                       See Documentation/vm/transhuge.rst for more details.
   
         tsc=            Disable clocksource stability checks for TSC.
                         Format: <string>
@@@ -4396,73 -4368,12 +4396,73 @@@
   
         usbcore.nousb   [USB] Disable the USB subsystem
   
+ +      usbcore.quirks=
+ +                      [USB] A list of quirk entries to augment the built-in
+ +                      usb core quirk list. List entries are separated by
+ +                      commas. Each entry has the form
+ +                      VendorID:ProductID:Flags. The IDs are 4-digit hex
+ +                      numbers and Flags is a set of letters. Each letter
+ +                      will change the built-in quirk; setting it if it is
+ +                      clear and clearing it if it is set. The letters have
+ +                      the following meanings:
+ +                              a = USB_QUIRK_STRING_FETCH_255 (string
+ +                                      descriptors must not be fetched using
+ +                                      a 255-byte read);
+ +                              b = USB_QUIRK_RESET_RESUME (device can't resume
+ +                                      correctly so reset it instead);
+ +                              c = USB_QUIRK_NO_SET_INTF (device can't handle
+ +                                      Set-Interface requests);
+ +                              d = USB_QUIRK_CONFIG_INTF_STRINGS (device can't
+ +                                      handle its Configuration or Interface
+ +                                      strings);
+ +                              e = USB_QUIRK_RESET (device can't be reset
+ +                                      (e.g morph devices), don't use reset);
+ +                              f = USB_QUIRK_HONOR_BNUMINTERFACES (device has
+ +                                      more interface descriptions than the
+ +                                      bNumInterfaces count, and can't handle
+ +                                      talking to these interfaces);
+ +                              g = USB_QUIRK_DELAY_INIT (device needs a pause
+ +                                      during initialization, after we read
+ +                                      the device descriptor);
+ +                              h = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL (For
+ +                                      high speed and super speed interrupt
+ +                                      endpoints, the USB 2.0 and USB 3.0 spec
+ +                                      require the interval in microframes (1
+ +                                      microframe = 125 microseconds) to be
+ +                                      calculated as interval = 2 ^
+ +                                      (bInterval-1).
+ +                                      Devices with this quirk report their
+ +                                      bInterval as the result of this
+ +                                      calculation instead of the exponent
+ +                                      variable used in the calculation);
+ +                              i = USB_QUIRK_DEVICE_QUALIFIER (device can't
+ +                                      handle device_qualifier descriptor
+ +                                      requests);
+ +                              j = USB_QUIRK_IGNORE_REMOTE_WAKEUP (device
+ +                                      generates spurious wakeup, ignore
+ +                                      remote wakeup capability);
+ +                              k = USB_QUIRK_NO_LPM (device can't handle Link
+ +                                      Power Management);
+ +                              l = USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL
+ +                                      (Device reports its bInterval as linear
+ +                                      frames instead of the USB 2.0
+ +                                      calculation);
+ +                              m = USB_QUIRK_DISCONNECT_SUSPEND (Device needs
+ +                                      to be disconnected before suspend to
+ +                                      prevent spurious wakeup);
+ +                              n = USB_QUIRK_DELAY_CTRL_MSG (Device needs a
+ +                                      pause after every control message);
+ +                      Example: quirks=0781:5580:bk,0a5c:5834:gij
+ +
         usbhid.mousepoll=
                         [USBHID] The interval which mice are to be polled at.
   
         usbhid.jspoll=
                         [USBHID] The interval which joysticks are to be polled at.
   
+ +      usbhid.kbpoll=
+ +                      [USBHID] The interval which keyboards are to be polled at.
+ +
         usb-storage.delay_use=
                         [UMS] The delay in seconds before a new device is
                         scanned for Logical Units (default 1).
diff --combined Documentation/sysctl/vm.txt

index 17256f2ad919aa99cd2271eb8b05d09d5c8e2b62,ef581a940439bb476950514af7ec8dedb14b3fab..c8e6d5b031e4864ab501b3b60d31c76f305fb494
--- 1/Documentation/sysctl/vm.txt
--- 2/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@@ -312,6 -312,8 +312,6 @@@ The lowmem_reserve_ratio is an array. Y
   % cat /proc/sys/vm/lowmem_reserve_ratio
   256     256     32
   -
- -Note: # of this elements is one fewer than number of zones. Because the highest
- -      zone's value is not necessary for following calculation.
   
   But, these values are not used directly. The kernel calculates # of protection
   pages for each zones from them. These are shown as array of protection pages
@@@ -362,8 -364,7 +362,8 @@@ As above expression, they are reciproca
   pages of higher zones on the node.
   
   If you would like to protect more pages, smaller values are effective.
- -The minimum value is 1 (1/1 -> 100%).
+ +The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
+ +disables protection of the pages.
   
   ==============================================================
   
@@@ -515,7 -516,7 +515,7 @@@ nr_hugepage
   
   Change the minimum size of the hugepage pool.
   
- See Documentation/vm/hugetlbpage.txt
+ See Documentation/vm/hugetlbpage.rst
   
   ==============================================================
   
@@@ -524,7 -525,7 +524,7 @@@ nr_overcommit_hugepage
   Change the maximum size of the hugepage pool. The maximum is
   nr_hugepages + nr_overcommit_hugepages.
   
- See Documentation/vm/hugetlbpage.txt
+ See Documentation/vm/hugetlbpage.rst
   
   ==============================================================
   
@@@ -667,7 -668,7 +667,7 @@@ and don't use much of it
   
   The default value is 0.
   
- See Documentation/vm/overcommit-accounting and
+ See Documentation/vm/overcommit-accounting.rst and
   mm/mmap.c::__vm_enough_memory() for more information.
   
   ==============================================================
diff --combined Documentation/vm/hmm.rst

index 0000000000000000000000000000000000000000,3fafa3381730e4fdcc66da25252f98f57d4b78e3..cdf3911582c863403f4f08dd68404a642c9d285a

mode 000000,100644..100644
--- /dev/null
--- 2/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@@ -1,0 -1,374 +1,386 @@@
- -Transparently allow any component of a program to use any memory region of said
- -program with a device without using device specific memory allocator. This is
- -becoming a requirement to simplify the use of advance heterogeneous computing
- -where GPU, DSP or FPGA are use to perform various computations.
- -
- -This document is divided as follow, in the first section i expose the problems
- -related to the use of a device specific allocator. The second section i expose
- -the hardware limitations that are inherent to many platforms. The third section
- -gives an overview of HMM designs. The fourth section explains how CPU page-
- -table mirroring works and what is HMM purpose in this context. Fifth section
- -deals with how device memory is represented inside the kernel. Finaly the last
- -section present the new migration helper that allow to leverage the device DMA
- -engine.
+ .. hmm:
+ 
+ =====================================
+ Heterogeneous Memory Management (HMM)
+ =====================================
+ 
- -Problems of using device specific memory allocator
- -==================================================
- -
- -Device with large amount of on board memory (several giga bytes) like GPU have
- -historically manage their memory through dedicated driver specific API. This
- -creates a disconnect between memory allocated and managed by device driver and
- -regular application memory (private anonymous, share memory or regular file
- -back memory). From here on i will refer to this aspect as split address space.
- -I use share address space to refer to the opposite situation ie one in which
- -any memory region can be use by device transparently.
- -
- -Split address space because device can only access memory allocated through the
- -device specific API. This imply that all memory object in a program are not
- -equal from device point of view which complicate large program that rely on a
- -wide set of libraries.
- -
- -Concretly this means that code that wants to leverage device like GPU need to
- -copy object between genericly allocated memory (malloc, mmap private/share/)
- -and memory allocated through the device driver API (this still end up with an
- -mmap but of the device file).
- -
- -For flat dataset (array, grid, image, ...) this isn't too hard to achieve but
- -complex data-set (list, tree, ...) are hard to get right. Duplicating a complex
- -data-set need to re-map all the pointer relations between each of its elements.
- -This is error prone and program gets harder to debug because of the duplicate
- -data-set.
- -
- -Split address space also means that library can not transparently use data they
- -are getting from core program or other library and thus each library might have
- -to duplicate its input data-set using specific memory allocator. Large project
- -suffer from this and waste resources because of the various memory copy.
- -
- -Duplicating each library API to accept as input or output memory allocted by
++Provide infrastructure and helpers to integrate non-conventional memory (device
++memory like GPU on board memory) into regular kernel path, with the cornerstone
++of this being specialized struct page for such memory (see sections 5 to 7 of
++this document).
++
++HMM also provides optional helpers for SVM (Share Virtual Memory), i.e.,
++allowing a device to transparently access program address coherently with
++the CPU meaning that any valid pointer on the CPU is also a valid pointer
++for the device. This is becoming mandatory to simplify the use of advanced
++heterogeneous computing where GPU, DSP, or FPGA are used to perform various
++computations on behalf of a process.
++
++This document is divided as follows: in the first section I expose the problems
++related to using device specific memory allocators. In the second section, I
++expose the hardware limitations that are inherent to many platforms. The third
++section gives an overview of the HMM design. The fourth section explains how
++CPU page-table mirroring works and the purpose of HMM in this context. The
++fifth section deals with how device memory is represented inside the kernel.
++Finally, the last section presents a new migration helper that allows lever-
++aging the device DMA engine.
+ 
+ .. contents:: :local:
+ 
- -combinatorial explosions in the library entry points.
++Problems of using a device specific memory allocator
++====================================================
++
++Devices with a large amount of on board memory (several gigabytes) like GPUs
++have historically managed their memory through dedicated driver specific APIs.
++This creates a disconnect between memory allocated and managed by a device
++driver and regular application memory (private anonymous, shared memory, or
++regular file backed memory). From here on I will refer to this aspect as split
++address space. I use shared address space to refer to the opposite situation:
++i.e., one in which any application memory region can be used by a device
++transparently.
++
++Split address space happens because device can only access memory allocated
++through device specific API. This implies that all memory objects in a program
++are not equal from the device point of view which complicates large programs
++that rely on a wide set of libraries.
++
++Concretely this means that code that wants to leverage devices like GPUs needs
++to copy object between generically allocated memory (malloc, mmap private, mmap
++share) and memory allocated through the device driver API (this still ends up
++with an mmap but of the device file).
++
++For flat data sets (array, grid, image, ...) this isn't too hard to achieve but
++complex data sets (list, tree, ...) are hard to get right. Duplicating a
++complex data set needs to re-map all the pointer relations between each of its
++elements. This is error prone and program gets harder to debug because of the
++duplicate data set and addresses.
++
++Split address space also means that libraries cannot transparently use data
++they are getting from the core program or another library and thus each library
++might have to duplicate its input data set using the device specific memory
++allocator. Large projects suffer from this and waste resources because of the
++various memory copies.
++
++Duplicating each library API to accept as input or output memory allocated by
+ each device specific allocator is not a viable option. It would lead to a
- -Finaly with the advance of high level language constructs (in C++ but in other
- -language too) it is now possible for compiler to leverage GPU or other devices
- -without even the programmer knowledge. Some of compiler identified patterns are
- -only do-able with a share address. It is as well more reasonable to use a share
- -address space for all the other patterns.
++combinatorial explosion in the library entry points.
+ 
- -System bus, device memory characteristics
- -=========================================
++Finally, with the advance of high level language constructs (in C++ but in
++other languages too) it is now possible for the compiler to leverage GPUs and
++other devices without programmer knowledge. Some compiler identified patterns
++are only do-able with a shared address space. It is also more reasonable to use
++a shared address space for all other patterns.
+ 
+ 
- -System bus cripple share address due to few limitations. Most system bus only
- -allow basic memory access from device to main memory, even cache coherency is
- -often optional. Access to device memory from CPU is even more limited, most
- -often than not it is not cache coherent.
++I/O bus, device memory characteristics
++======================================
+ 
- -If we only consider the PCIE bus than device can access main memory (often
- -through an IOMMU) and be cache coherent with the CPUs. However it only allows
- -a limited set of atomic operation from device on main memory. This is worse
- -in the other direction the CPUs can only access a limited range of the device
- -memory and can not perform atomic operations on it. Thus device memory can not
- -be consider like regular memory from kernel point of view.
++I/O buses cripple shared address spaces due to a few limitations. Most I/O
++buses only allow basic memory access from device to main memory; even cache
++coherency is often optional. Access to device memory from CPU is even more
++limited. More often than not, it is not cache coherent.
+ 
- -and 16 lanes). This is 33 times less that fastest GPU memory (1 TBytes/s).
- -The final limitation is latency, access to main memory from the device has an
- -order of magnitude higher latency than when the device access its own memory.
++If we only consider the PCIE bus, then a device can access main memory (often
++through an IOMMU) and be cache coherent with the CPUs. However, it only allows
++a limited set of atomic operations from device on main memory. This is worse
++in the other direction: the CPU can only access a limited range of the device
++memory and cannot perform atomic operations on it. Thus device memory cannot
++be considered the same as regular memory from the kernel point of view.
+ 
+ Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0
- -Some platform are developing new system bus or additions/modifications to PCIE
- -to address some of those limitations (OpenCAPI, CCIX). They mainly allow two
++and 16 lanes). This is 33 times less than the fastest GPU memory (1 TBytes/s).
++The final limitation is latency. Access to main memory from the device has an
++order of magnitude higher latency than when the device accesses its own memory.
+ 
- -architecture supports. Saddly not all platform are following this trends and
- -some major architecture are left without hardware solutions to those problems.
++Some platforms are developing new I/O buses or additions/modifications to PCIE
++to address some of these limitations (OpenCAPI, CCIX). They mainly allow two-
+ way cache coherency between CPU and device and allow all atomic operations the
- -So for share address space to make sense not only we must allow device to
- -access any memory memory but we must also permit any memory to be migrated to
- -device memory while device is using it (blocking CPU access while it happens).
++architecture supports. Sadly, not all platforms are following this trend and
++some major architectures are left without hardware solutions to these problems.
+ 
- -Share address space and migration
- -=================================
++So for shared address space to make sense, not only must we allow devices to
++access any memory but we must also permit any memory to be migrated to device
++memory while device is using it (blocking CPU access while it happens).
+ 
+ 
- -space by duplication the CPU page table into the device page table so same
- -address point to same memory and this for any valid main memory address in
++Shared address space and migration
++==================================
+ 
+ HMM intends to provide two main features. First one is to share the address
- -To achieve this, HMM offer a set of helpers to populate the device page table
++space by duplicating the CPU page table in the device page table so the same
++address points to the same physical memory for any valid main memory address in
+ the process address space.
+ 
- -not as easy as CPU page table updates. To update the device page table you must
- -allow a buffer (or use a pool of pre-allocated buffer) and write GPU specifics
- -commands in it to perform the update (unmap, cache invalidations and flush,
- -...). This can not be done through common code for all device. Hence why HMM
- -provides helpers to factor out everything that can be while leaving the gory
- -details to the device driver.
- -
- -The second mechanism HMM provide is a new kind of ZONE_DEVICE memory that does
- -allow to allocate a struct page for each page of the device memory. Those page
- -are special because the CPU can not map them. They however allow to migrate
- -main memory to device memory using exhisting migration mechanism and everything
- -looks like if page was swap out to disk from CPU point of view. Using a struct
- -page gives the easiest and cleanest integration with existing mm mechanisms.
- -Again here HMM only provide helpers, first to hotplug new ZONE_DEVICE memory
- -for the device memory and second to perform migration. Policy decision of what
- -and when to migrate things is left to the device driver.
- -
- -Note that any CPU access to a device page trigger a page fault and a migration
- -back to main memory ie when a page backing an given address A is migrated from
- -a main memory page to a device page then any CPU access to address A trigger a
- -page fault and initiate a migration back to main memory.
- -
- -
- -With this two features, HMM not only allow a device to mirror a process address
- -space and keeps both CPU and device page table synchronize, but also allow to
- -leverage device memory by migrating part of data-set that is actively use by a
- -device.
++To achieve this, HMM offers a set of helpers to populate the device page table
+ while keeping track of CPU page table updates. Device page table updates are
- -Address space mirroring main objective is to allow to duplicate range of CPU
- -page table into a device page table and HMM helps keeping both synchronize. A
- -device driver that want to mirror a process address space must start with the
++not as easy as CPU page table updates. To update the device page table, you must
++allocate a buffer (or use a pool of pre-allocated buffers) and write GPU
++specific commands in it to perform the update (unmap, cache invalidations, and
++flush, ...). This cannot be done through common code for all devices. Hence
++why HMM provides helpers to factor out everything that can be while leaving the
++hardware specific details to the device driver.
++
++The second mechanism HMM provides is a new kind of ZONE_DEVICE memory that
++allows allocating a struct page for each page of the device memory. Those pages
++are special because the CPU cannot map them. However, they allow migrating
++main memory to device memory using existing migration mechanisms and everything
++looks like a page is swapped out to disk from the CPU point of view. Using a
++struct page gives the easiest and cleanest integration with existing mm mech-
++anisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE
++memory for the device memory and second to perform migration. Policy decisions
++of what and when to migrate things is left to the device driver.
++
++Note that any CPU access to a device page triggers a page fault and a migration
++back to main memory. For example, when a page backing a given CPU address A is
++migrated from a main memory page to a device page, then any CPU access to
++address A triggers a page fault and initiates a migration back to main memory.
++
++With these two features, HMM not only allows a device to mirror process address
++space and keeping both CPU and device page table synchronized, but also lever-
++ages device memory by migrating the part of the data set that is actively being
++used by the device.
+ 
+ 
+ Address space mirroring implementation and API
+ ==============================================
+ 
- -The locked variant is to be use when the driver is already holding the mmap_sem
- -of the mm in write mode. The mirror struct has a set of callback that are use
- -to propagate CPU page table::
++Address space mirroring's main objective is to allow duplication of a range of
++CPU page table into a device page table; HMM helps keep both synchronized. A
++device driver that wants to mirror a process address space must start with the
+ registration of an hmm_mirror struct::
+ 
+  int hmm_mirror_register(struct hmm_mirror *mirror,
+                          struct mm_struct *mm);
+  int hmm_mirror_register_locked(struct hmm_mirror *mirror,
+                                 struct mm_struct *mm);
+ 
- -Device driver must perform update to the range following action (turn range
- -read only, or fully unmap, ...). Once driver callback returns the device must
- -be done with the update.
- -
++
++The locked variant is to be used when the driver is already holding mmap_sem
++of the mm in write mode. The mirror struct has a set of callbacks that are used
++to propagate CPU page tables::
+ 
+  struct hmm_mirror_ops {
+      /* sync_cpu_device_pagetables() - synchronize page tables
+       *
+       * @mirror: pointer to struct hmm_mirror
+       * @update_type: type of update that occurred to the CPU page table
+       * @start: virtual start address of the range to update
+       * @end: virtual end address of the range to update
+       *
+       * This callback ultimately originates from mmu_notifiers when the CPU
+       * page table is updated. The device driver must update its page table
+       * in response to this callback. The update argument tells what action
+       * to perform.
+       *
+       * The device driver must not return from this callback until the device
+       * page tables are completely updated (TLBs flushed, etc); this is a
+       * synchronous call.
+       */
+       void (*update)(struct hmm_mirror *mirror,
+                      enum hmm_update action,
+                      unsigned long start,
+                      unsigned long end);
+  };
+ 
- -When device driver wants to populate a range of virtual address it can use
- -either::
++The device driver must perform the update action to the range (mark range
++read only, or fully unmap, ...). The device must be done with the update before
++the driver callback returns.
+ 
- - int hmm_vma_get_pfns(struct vm_area_struct *vma,
++When the device driver wants to populate a range of virtual addresses, it can
++use either::
+ 
- -First one (hmm_vma_get_pfns()) will only fetch present CPU page table entry and
- -will not trigger a page fault on missing or non present entry. The second one
- -do trigger page fault on missing or read only entry if write parameter is true.
- -Page fault use the generic mm page fault code path just like a CPU page fault.
++  int hmm_vma_get_pfns(struct vm_area_struct *vma,
+                       struct hmm_range *range,
+                       unsigned long start,
+                       unsigned long end,
+                       hmm_pfn_t *pfns);
+  int hmm_vma_fault(struct vm_area_struct *vma,
+                    struct hmm_range *range,
+                    unsigned long start,
+                    unsigned long end,
+                    hmm_pfn_t *pfns,
+                    bool write,
+                    bool block);
+ 
- -Both function copy CPU page table into their pfns array argument. Each entry in
- -that array correspond to an address in the virtual range. HMM provide a set of
- -flags to help driver identify special CPU page table entries.
++The first one (hmm_vma_get_pfns()) will only fetch present CPU page table
++entries and will not trigger a page fault on missing or non-present entries.
++The second one does trigger a page fault on missing or read-only entry if the
++write parameter is true. Page faults use the generic mm page fault code path
++just like a CPU page fault.
+ 
- -respect in order to keep things properly synchronize. The usage pattern is::
++Both functions copy CPU page table entries into their pfns array argument. Each
++entry in that array corresponds to an address in the virtual range. HMM
++provides a set of flags to help the driver identify special CPU page table
++entries.
+ 
+ Locking with the update() callback is the most important aspect the driver must
- -The driver->update lock is the same lock that driver takes inside its update()
- -callback. That lock must be call before hmm_vma_range_done() to avoid any race
- -with a concurrent CPU page table update.
++respect in order to keep things properly synchronized. The usage pattern is::
+ 
+  int driver_populate_range(...)
+  {
+       struct hmm_range range;
+       ...
+  again:
+       ret = hmm_vma_get_pfns(vma, &range, start, end, pfns);
+       if (ret)
+           return ret;
+       take_lock(driver->update);
+       if (!hmm_vma_range_done(vma, &range)) {
+           release_lock(driver->update);
+           goto again;
+       }
+ 
+       // Use pfns array content to update device page table
+ 
+       release_lock(driver->update);
+       return 0;
+  }
+ 
- -HMM implements all this on top of the mmu_notifier API because we wanted to a
- -simpler API and also to be able to perform optimization latter own like doing
- -concurrent device update in multi-devices scenario.
++The driver->update lock is the same lock that the driver takes inside its
++update() callback. That lock must be held before hmm_vma_range_done() to avoid
++any race with a concurrent CPU page table update.
+ 
- -HMM also serve as an impedence missmatch between how CPU page table update are
- -done (by CPU write to the page table and TLB flushes) from how device update
- -their own page table. Device update is a multi-step process, first appropriate
- -commands are write to a buffer, then this buffer is schedule for execution on
- -the device. It is only once the device has executed commands in the buffer that
- -the update is done. Creating and scheduling update command buffer can happen
- -concurrently for multiple devices. Waiting for each device to report commands
- -as executed is serialize (there is no point in doing this concurrently).
++HMM implements all this on top of the mmu_notifier API because we wanted a
++simpler API and also to be able to perform optimizations latter on like doing
++concurrent device updates in multi-devices scenario.
+ 
- -Several differents design were try to support device memory. First one use
- -device specific data structure to keep information about migrated memory and
- -HMM hooked itself in various place of mm code to handle any access to address
- -that were back by device memory. It turns out that this ended up replicating
- -most of the fields of struct page and also needed many kernel code path to be
- -updated to understand this new kind of memory.
++HMM also serves as an impedance mismatch between how CPU page table updates
++are done (by CPU write to the page table and TLB flushes) and how devices
++update their own page table. Device updates are a multi-step process. First,
++appropriate commands are written to a buffer, then this buffer is scheduled for
++execution on the device. It is only once the device has executed commands in
++the buffer that the update is done. Creating and scheduling the update command
++buffer can happen concurrently for multiple devices. Waiting for each device to
++report commands as executed is serialized (there is no point in doing this
++concurrently).
+ 
+ 
+ Represent and manage device memory from core kernel point of view
+ =================================================================
+ 
- -Thing is most kernel code path never try to access the memory behind a page
- -but only care about struct page contents. Because of this HMM switchted to
- -directly using struct page for device memory which left most kernel code path
- -un-aware of the difference. We only need to make sure that no one ever try to
- -map those page from the CPU side.
++Several different designs were tried to support device memory. First one used
++a device specific data structure to keep information about migrated memory and
++HMM hooked itself in various places of mm code to handle any access to
++addresses that were backed by device memory. It turns out that this ended up
++replicating most of the fields of struct page and also needed many kernel code
++paths to be updated to understand this new kind of memory.
+ 
- -HMM provide a set of helpers to register and hotplug device memory as a new
- -region needing struct page. This is offer through a very simple API::
++Most kernel code paths never try to access the memory behind a page
++but only care about struct page contents. Because of this, HMM switched to
++directly using struct page for device memory which left most kernel code paths
++unaware of the difference. We only need to make sure that no one ever tries to
++map those pages from the CPU side.
+ 
- -drop. This means the device page is now free and no longer use by anyone. The
- -second callback happens whenever CPU try to access a device page which it can
- -not do. This second callback must trigger a migration back to system memory.
++HMM provides a set of helpers to register and hotplug device memory as a new
++region needing a struct page. This is offered through a very simple API::
+ 
+  struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
+                                    struct device *device,
+                                    unsigned long size);
+  void hmm_devmem_remove(struct hmm_devmem *devmem);
+ 
+ The hmm_devmem_ops is where most of the important things are::
+ 
+  struct hmm_devmem_ops {
+      void (*free)(struct hmm_devmem *devmem, struct page *page);
+      int (*fault)(struct hmm_devmem *devmem,
+                   struct vm_area_struct *vma,
+                   unsigned long addr,
+                   struct page *page,
+                   unsigned flags,
+                   pmd_t *pmdp);
+  };
+ 
+ The first callback (free()) happens when the last reference on a device page is
- -Migrate to and from device memory
- -=================================
++dropped. This means the device page is now free and no longer used by anyone.
++The second callback happens whenever the CPU tries to access a device page
++which it cannot do. This second callback must trigger a migration back to
++system memory.
+ 
+ 
- -Because CPU can not access device memory, migration must use device DMA engine
- -to perform copy from and to device memory. For this we need a new migration
- -helper::
++Migration to and from device memory
++===================================
+ 
- -Unlike other migration function it works on a range of virtual address, there
- -is two reasons for that. First device DMA copy has a high setup overhead cost
++Because the CPU cannot access device memory, migration must use the device DMA
++engine to perform copy from and to device memory. For this we need a new
++migration helper::
+ 
+  int migrate_vma(const struct migrate_vma_ops *ops,
+                  struct vm_area_struct *vma,
+                  unsigned long mentries,
+                  unsigned long start,
+                  unsigned long end,
+                  unsigned long *src,
+                  unsigned long *dst,
+                  void *private);
+ 
- -make the whole excersie pointless. The second reason is because driver trigger
- -such migration base on range of address the device is actively accessing.
++Unlike other migration functions it works on a range of virtual address, there
++are two reasons for that. First, device DMA copy has a high setup overhead cost
+ and thus batching multiple pages is needed as otherwise the migration overhead
- -The migrate_vma_ops struct define two callbacks. First one (alloc_and_copy())
- -control destination memory allocation and copy operation. Second one is there
- -to allow device driver to perform cleanup operation after migration::
++makes the whole exercise pointless. The second reason is because the
++migration might be for a range of addresses the device is actively accessing.
+ 
- -It is important to stress that this migration helpers allow for hole in the
++The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
++controls destination memory allocation and copy operation. Second one is there
++to allow the device driver to perform cleanup operations after migration::
+ 
+  struct migrate_vma_ops {
+      void (*alloc_and_copy)(struct vm_area_struct *vma,
+                             const unsigned long *src,
+                             unsigned long *dst,
+                             unsigned long start,
+                             unsigned long end,
+                             void *private);
+      void (*finalize_and_map)(struct vm_area_struct *vma,
+                               const unsigned long *src,
+                               const unsigned long *dst,
+                               unsigned long start,
+                               unsigned long end,
+                               void *private);
+  };
+ 
- -the usual reasons (page is pin, page is lock, ...). This helper does not fail
- -but just skip over those pages.
++It is important to stress that these migration helpers allow for holes in the
+ virtual address range. Some pages in the range might not be migrated for all
- -The alloc_and_copy() might as well decide to not migrate all pages in the
- -range (for reasons under the callback control). For those the callback just
- -have to leave the corresponding dst entry empty.
++the usual reasons (page is pinned, page is locked, ...). This helper does not
++fail but just skips over those pages.
+ 
- -Finaly the migration of the struct page might fails (for file back page) for
++The alloc_and_copy() might decide to not migrate all pages in the
++range (for reasons under the callback control). For those, the callback just
++has to leave the corresponding dst entry empty.
+ 
- -that happens then the finalize_and_map() can catch any pages that was not
- -migrated. Note those page were still copied to new page and thus we wasted
++Finally, the migration of the struct page might fail (for file backed page) for
+ various reasons (failure to freeze reference, or update page cache, ...). If
- -anonymous if device page is use for anonymous, file if device page is use for
- -file back page or shmem if device page is use for share memory). This is a
- -deliberate choice to keep existing application that might start using device
- -memory without knowing about it to keep runing unimpacted.
- -
- -Drawbacks is that OOM killer might kill an application using a lot of device
- -memory and not a lot of regular system memory and thus not freeing much system
- -memory. We want to gather more real world experience on how application and
- -system react under memory pressure in the presence of device memory before
++that happens, then the finalize_and_map() can catch any pages that were not
++migrated. Note those pages were still copied to a new page and thus we wasted
+ bandwidth but this is considered as a rare event and a price that we are
+ willing to pay to keep all the code simpler.
+ 
+ 
+ Memory cgroup (memcg) and rss accounting
+ ========================================
+ 
+ For now device memory is accounted as any regular page in rss counters (either
- -Same decision was made for memory cgroup. Device memory page are accounted
++anonymous if device page is used for anonymous, file if device page is used for
++file backed page or shmem if device page is used for shared memory). This is a
++deliberate choice to keep existing applications, that might start using device
++memory without knowing about it, running unimpacted.
++
++A drawback is that the OOM killer might kill an application using a lot of
++device memory and not a lot of regular system memory and thus not freeing much
++system memory. We want to gather more real world experience on how applications
++and system react under memory pressure in the presence of device memory before
+ deciding to account device memory differently.
+ 
+ 
- -back from device memory to regular memory can not fail because it would
++Same decision was made for memory cgroup. Device memory pages are accounted
+ against same memory cgroup a regular page would be accounted to. This does
+ simplify migration to and from device memory. This also means that migration
- -get more experience in how device memory is use and its impact on memory
++back from device memory to regular memory cannot fail because it would
+ go above memory cgroup limit. We might revisit this choice latter on once we
- -Note that device memory can never be pin nor by device driver nor through GUP
++get more experience in how device memory is used and its impact on memory
+ resource control.
+ 
+ 
- -is drop in case of share memory or file back memory.
++Note that device memory can never be pinned by device driver nor through GUP
+ and thus such memory is always free upon process exit. Or when last reference
++is dropped in case of shared memory or file backed memory.
diff --combined Documentation/vm/page_migration.rst

index 0000000000000000000000000000000000000000,07b67a821a12f6e80c63eca19019602303a97395..f68d61335abb6aac22ea4686f6a9cecd1b822fea

mode 000000,100644..100644
--- /dev/null
--- 2/Documentation/vm/page_migration.rst
+++ b/Documentation/vm/page_migration.rst
@@@ -1,0 -1,257 +1,257 @@@
- -2. Insure that writeback is complete.
+ .. _page_migration:
+ 
+ ==============
+ Page migration
+ ==============
+ 
+ Page migration allows the moving of the physical location of pages between
+ nodes in a numa system while the process is running. This means that the
+ virtual addresses that the process sees do not change. However, the
+ system rearranges the physical location of those pages.
+ 
+ The main intend of page migration is to reduce the latency of memory access
+ by moving pages near to the processor where the process accessing that memory
+ is running.
+ 
+ Page migration allows a process to manually relocate the node on which its
+ pages are located through the MF_MOVE and MF_MOVE_ALL options while setting
+ a new memory policy via mbind(). The pages of process can also be relocated
+ from another process using the sys_migrate_pages() function call. The
+ migrate_pages function call takes two sets of nodes and moves pages of a
+ process that are located on the from nodes to the destination nodes.
+ Page migration functions are provided by the numactl package by Andi Kleen
+ (a version later than 0.9.3 is required. Get it from
+ ftp://oss.sgi.com/www/projects/libnuma/download/). numactl provides libnuma
+ which provides an interface similar to other numa functionality for page
+ migration.  cat ``/proc/<pid>/numa_maps`` allows an easy review of where the
+ pages of a process are located. See also the numa_maps documentation in the
+ proc(5) man page.
+ 
+ Manual migration is useful if for example the scheduler has relocated
+ a process to a processor on a distant node. A batch scheduler or an
+ administrator may detect the situation and move the pages of the process
+ nearer to the new processor. The kernel itself does only provide
+ manual page migration support. Automatic page migration may be implemented
+ through user space processes that move pages. A special function call
+ "move_pages" allows the moving of individual pages within a process.
+ A NUMA profiler may f.e. obtain a log showing frequent off node
+ accesses and may use the result to move pages to more advantageous
+ locations.
+ 
+ Larger installations usually partition the system using cpusets into
+ sections of nodes. Paul Jackson has equipped cpusets with the ability to
+ move pages when a task is moved to another cpuset (See
+ Documentation/cgroup-v1/cpusets.txt).
+ Cpusets allows the automation of process locality. If a task is moved to
+ a new cpuset then also all its pages are moved with it so that the
+ performance of the process does not sink dramatically. Also the pages
+ of processes in a cpuset are moved if the allowed memory nodes of a
+ cpuset are changed.
+ 
+ Page migration allows the preservation of the relative location of pages
+ within a group of nodes for all migration techniques which will preserve a
+ particular memory allocation pattern generated even after migrating a
+ process. This is necessary in order to preserve the memory latencies.
+ Processes will run with similar performance after migration.
+ 
+ Page migration occurs in several steps. First a high level
+ description for those trying to use migrate_pages() from the kernel
+ (for userspace usage see the Andi Kleen's numactl package mentioned above)
+ and then a low level description of how the low level details work.
+ 
+ In kernel use of migrate_pages()
+ ================================
+ 
+ 1. Remove pages from the LRU.
+ 
+    Lists of pages to be migrated are generated by scanning over
+    pages and moving them into lists. This is done by
+    calling isolate_lru_page().
+    Calling isolate_lru_page increases the references to the page
+    so that it cannot vanish while the page migration occurs.
+    It also prevents the swapper or other scans to encounter
+    the page.
+ 
+ 2. We need to have a function of type new_page_t that can be
+    passed to migrate_pages(). This function should figure out
+    how to allocate the correct new page given the old page.
+ 
+ 3. The migrate_pages() function is called which attempts
+    to do the migration. It will call the function to allocate
+    the new page for each page that is considered for
+    moving.
+ 
+ How migrate_pages() works
+ =========================
+ 
+ migrate_pages() does several passes over its list of pages. A page is moved
+ if all references to a page are removable at the time. The page has
+ already been removed from the LRU via isolate_lru_page() and the refcount
+ is increased so that the page cannot be freed while page migration occurs.
+ 
+ Steps:
+ 
+ 1. Lock the page to be migrated
+ 
- -5. The radix tree lock is taken. This will cause all processes trying
- -   to access the page via the mapping to block on the radix tree spinlock.
++2. Ensure that writeback is complete.
+ 
+ 3. Lock the new page that we want to move to. It is locked so that accesses to
+    this (not yet uptodate) page immediately lock while the move is in progress.
+ 
+ 4. All the page table references to the page are converted to migration
+    entries. This decreases the mapcount of a page. If the resulting
+    mapcount is not zero then we do not migrate the page. All user space
+    processes that attempt to access the page will now wait on the page lock.
+ 
- -10. The reference count of the old page is dropped because the radix tree
++5. The i_pages lock is taken. This will cause all processes trying
++   to access the page via the mapping to block on the spinlock.
+ 
+ 6. The refcount of the page is examined and we back out if references remain
+    otherwise we know that we are the only one referencing this page.
+ 
+ 7. The radix tree is checked and if it does not contain the pointer to this
+    page then we back out because someone else modified the radix tree.
+ 
+ 8. The new page is prepped with some settings from the old page so that
+    accesses to the new page will discover a page with the correct settings.
+ 
+ 9. The radix tree is changed to point to the new page.
+ 
- -    the new page is referenced to by the radix tree.
++10. The reference count of the old page is dropped because the address space
+     reference is gone. A reference to the new page is established because
- -11. The radix tree lock is dropped. With that lookups in the mapping
- -    become possible again. Processes will move from spinning on the tree_lock
++    the new page is referenced by the address space.
+ 
++11. The i_pages lock is dropped. With that lookups in the mapping
++    become possible again. Processes will move from spinning on the lock
+     to sleeping on the locked new page.
+ 
+ 12. The page contents are copied to the new page.
+ 
+ 13. The remaining page flags are copied to the new page.
+ 
+ 14. The old page flags are cleared to indicate that the page does
+     not provide any information anymore.
+ 
+ 15. Queued up writeback on the new page is triggered.
+ 
+ 16. If migration entries were page then replace them with real ptes. Doing
+     so will enable access for user space processes not already waiting for
+     the page lock.
+ 
+ 19. The page locks are dropped from the old and new page.
+     Processes waiting on the page lock will redo their page faults
+     and will reach the new page.
+ 
+ 20. The new page is moved to the LRU and can be scanned by the swapper
+     etc again.
+ 
+ Non-LRU page migration
+ ======================
+ 
+ Although original migration aimed for reducing the latency of memory access
+ for NUMA, compaction who want to create high-order page is also main customer.
+ 
+ Current problem of the implementation is that it is designed to migrate only
+ *LRU* pages. However, there are potential non-lru pages which can be migrated
+ in drivers, for example, zsmalloc, virtio-balloon pages.
+ 
+ For virtio-balloon pages, some parts of migration code path have been hooked
+ up and added virtio-balloon specific functions to intercept migration logics.
+ It's too specific to a driver so other drivers who want to make their pages
+ movable would have to add own specific hooks in migration path.
+ 
+ To overclome the problem, VM supports non-LRU page migration which provides
+ generic functions for non-LRU movable pages without driver specific hooks
+ migration path.
+ 
+ If a driver want to make own pages movable, it should define three functions
+ which are function pointers of struct address_space_operations.
+ 
+ 1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);``
+ 
+    What VM expects on isolate_page function of driver is to return *true*
+    if driver isolates page successfully. On returing true, VM marks the page
+    as PG_isolated so concurrent isolation in several CPUs skip the page
+    for isolation. If a driver cannot isolate the page, it should return *false*.
+ 
+    Once page is successfully isolated, VM uses page.lru fields so driver
+    shouldn't expect to preserve values in that fields.
+ 
+ 2. ``int (*migratepage) (struct address_space *mapping,``
+ |     ``struct page *newpage, struct page *oldpage, enum migrate_mode);``
+ 
+    After isolation, VM calls migratepage of driver with isolated page.
+    The function of migratepage is to move content of the old page to new page
+    and set up fields of struct page newpage. Keep in mind that you should
+    indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
+    under page_lock if you migrated the oldpage successfully and returns
+    MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
+    can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
+    because VM interprets -EAGAIN as "temporal migration failure". On returning
+    any error except -EAGAIN, VM will give up the page migration without retrying
+    in this time.
+ 
+    Driver shouldn't touch page.lru field VM using in the functions.
+ 
+ 3. ``void (*putback_page)(struct page *);``
+ 
+    If migration fails on isolated page, VM should return the isolated page
+    to the driver so VM calls driver's putback_page with migration failed page.
+    In this function, driver should put the isolated page back to the own data
+    structure.
+ 
+ 4. non-lru movable page flags
+ 
+    There are two page flags for supporting non-lru movable page.
+ 
+    * PG_movable
+ 
+      Driver should use the below function to make page movable under page_lock::
+ 
+       void __SetPageMovable(struct page *page, struct address_space *mapping)
+ 
+      It needs argument of address_space for registering migration
+      family functions which will be called by VM. Exactly speaking,
+      PG_movable is not a real flag of struct page. Rather than, VM
+      reuses page->mapping's lower bits to represent it.
+ 
+ ::
+       #define PAGE_MAPPING_MOVABLE 0x2
+       page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
+ 
+      so driver shouldn't access page->mapping directly. Instead, driver should
+      use page_mapping which mask off the low two bits of page->mapping under
+      page lock so it can get right struct address_space.
+ 
+      For testing of non-lru movable page, VM supports __PageMovable function.
+      However, it doesn't guarantee to identify non-lru movable page because
+      page->mapping field is unified with other variables in struct page.
+      As well, if driver releases the page after isolation by VM, page->mapping
+      doesn't have stable value although it has PAGE_MAPPING_MOVABLE
+      (Look at __ClearPageMovable). But __PageMovable is cheap to catch whether
+      page is LRU or non-lru movable once the page has been isolated. Because
+      LRU pages never can have PAGE_MAPPING_MOVABLE in page->mapping. It is also
+      good for just peeking to test non-lru movable pages before more expensive
+      checking with lock_page in pfn scanning to select victim.
+ 
+      For guaranteeing non-lru movable page, VM provides PageMovable function.
+      Unlike __PageMovable, PageMovable functions validates page->mapping and
+      mapping->a_ops->isolate_page under lock_page. The lock_page prevents sudden
+      destroying of page->mapping.
+ 
+      Driver using __SetPageMovable should clear the flag via __ClearMovablePage
+      under page_lock before the releasing the page.
+ 
+    * PG_isolated
+ 
+      To prevent concurrent isolation among several CPUs, VM marks isolated page
+      as PG_isolated under lock_page. So if a CPU encounters PG_isolated non-lru
+      movable page, it can skip it. Driver doesn't need to manipulate the flag
+      because VM will set/clear it automatically. Keep in mind that if driver
+      sees PG_isolated page, it means the page have been isolated by VM so it
+      shouldn't touch page.lru field.
+      PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag
+      for own purpose.
+ 
+ Christoph Lameter, May 8, 2006.
+ Minchan Kim, Mar 28, 2016.
diff --combined MAINTAINERS

index 0a1410d5a621835ded529ac2d6ade31c306e086f,575849a8343e0fcd07f81846adf7289f591081af..89b8ab9826d07a31596db5bd593ce3851dea5a7c
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -766,8 -766,6 +766,8 @@@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_am
   F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
   F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
   F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+ +F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+ +F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
   F:    drivers/gpu/drm/amd/amdkfd/
   F:    drivers/gpu/drm/amd/include/cik_structs.h
   F:    drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@@ -843,6 -841,13 +843,6 @@@ F:        sound/soc/codecs/ad7
   F:    sound/soc/codecs/ssm*
   F:    sound/soc/codecs/sigmadsp.*
   
- -ANALOG DEVICES INC ASOC DRIVERS
- -L:    adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
- -L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
- -W:    http://blackfin.uclinux.org/
- -S:    Supported
- -F:    sound/soc/blackfin/*
- -
   ANALOG DEVICES INC DMA DRIVERS
   M:    Lars-Peter Clausen <lars@metafoo.de>
   W:    http://ez.analog.com/community/linux-device-drivers
@@@ -861,17 -866,7 +861,17 @@@ F:       drivers/iio/*/ad
   F:    drivers/iio/adc/ltc2497*
   X:    drivers/iio/*/adjd*
   F:    drivers/staging/iio/*/ad*
- -F:    drivers/staging/iio/trigger/iio-trig-bfin-timer.c
+ +
+ +ANDES ARCHITECTURE
+ +M:    Greentime Hu <green.hu@gmail.com>
+ +M:    Vincent Chen <deanbo422@gmail.com>
+ +T:    git https://github.com/andestech/linux.git
+ +S:    Supported
+ +F:    arch/nds32/
+ +F:    Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt
+ +F:    Documentation/devicetree/bindings/nds32/
+ +K:    nds32
+ +N:    nds32
   
   ANDROID CONFIG FRAGMENTS
   M:    Rob Herring <robh@kernel.org>
@@@ -934,8 -929,8 +934,8 @@@ F: drivers/char/apm-emulation.
   APPARMOR SECURITY MODULE
   M:    John Johansen <john.johansen@canonical.com>
   L:    apparmor@lists.ubuntu.com (subscribers-only, general discussion)
- -W:    apparmor.wiki.kernel.org
- -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
+ +W:    wiki.apparmor.net
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jj/linux-apparmor
   S:    Supported
   F:    security/apparmor/
   F:    Documentation/admin-guide/LSM/apparmor.rst
@@@ -1065,42 -1060,41 +1065,42 @@@ ARM POR
   M:    Russell King <linux@armlinux.org.uk>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   W:    http://www.armlinux.org.uk/
- -S:    Maintained
+ +S:    Odd Fixes
   T:    git git://git.armlinux.org.uk/~rmk/linux-arm.git
   F:    arch/arm/
+ +X:    arch/arm/boot/dts/
   
   ARM PRIMECELL AACI PL041 DRIVER
   M:    Russell King <linux@armlinux.org.uk>
- -S:    Maintained
+ +S:    Odd Fixes
   F:    sound/arm/aaci.*
   
   ARM PRIMECELL BUS SUPPORT
   M:    Russell King <linux@armlinux.org.uk>
- -S:    Maintained
+ +S:    Odd Fixes
   F:    drivers/amba/
   F:    include/linux/amba/bus.h
   
   ARM PRIMECELL CLCD PL110 DRIVER
   M:    Russell King <linux@armlinux.org.uk>
- -S:    Maintained
+ +S:    Odd Fixes
   F:    drivers/video/fbdev/amba-clcd.*
   
   ARM PRIMECELL KMI PL050 DRIVER
   M:    Russell King <linux@armlinux.org.uk>
- -S:    Maintained
+ +S:    Odd Fixes
   F:    drivers/input/serio/ambakmi.*
   F:    include/linux/amba/kmi.h
   
   ARM PRIMECELL MMCI PL180/1 DRIVER
   M:    Russell King <linux@armlinux.org.uk>
- -S:    Maintained
+ +S:    Odd Fixes
   F:    drivers/mmc/host/mmci.*
   F:    include/linux/amba/mmci.h
   
   ARM PRIMECELL UART PL010 AND PL011 DRIVERS
   M:    Russell King <linux@armlinux.org.uk>
- -S:    Maintained
+ +S:    Odd Fixes
   F:    drivers/tty/serial/amba-pl01*.c
   F:    include/linux/amba/serial.h
   
@@@ -1158,7 -1152,7 +1158,7 @@@ S:      Maintaine
   F:    drivers/clk/sunxi/
   
   ARM/Allwinner sunXi SoC support
- -M:    Maxime Ripard <maxime.ripard@free-electrons.com>
+ +M:    Maxime Ripard <maxime.ripard@bootlin.com>
   M:    Chen-Yu Tsai <wens@csie.org>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
@@@ -1232,21 -1226,37 +1232,21 @@@ F:   Documentation/devicetree/bindings/i2
   
   ARM/ASPEED MACHINE SUPPORT
   M:    Joel Stanley <joel@jms.id.au>
- -S:    Maintained
+ +R:    Andrew Jeffery <andrew@aj.id.au>
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +L:    linux-aspeed@lists.ozlabs.org (moderated for non-subscribers)
+ +Q:    https://patchwork.ozlabs.org/project/linux-aspeed/list/
+ +S:    Supported
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/joel/aspeed.git
   F:    arch/arm/mach-aspeed/
   F:    arch/arm/boot/dts/aspeed-*
- -F:    drivers/*/*aspeed*
+ +N:    aspeed
   
   ARM/ATMEL AT91 Clock Support
- -M:    Boris Brezillon <boris.brezillon@free-electrons.com>
+ +M:    Boris Brezillon <boris.brezillon@bootlin.com>
   S:    Maintained
   F:    drivers/clk/at91
   
- -ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
- -M:    Nicolas Ferre <nicolas.ferre@microchip.com>
- -M:    Alexandre Belloni <alexandre.belloni@free-electrons.com>
- -L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
- -W:    http://www.linux4sam.org
- -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/nferre/linux-at91.git
- -S:    Supported
- -N:    at91
- -N:    atmel
- -F:    arch/arm/mach-at91/
- -F:    include/soc/at91/
- -F:    arch/arm/boot/dts/at91*.dts
- -F:    arch/arm/boot/dts/at91*.dtsi
- -F:    arch/arm/boot/dts/sama*.dts
- -F:    arch/arm/boot/dts/sama*.dtsi
- -F:    arch/arm/include/debug/at91.S
- -F:    drivers/memory/atmel*
- -F:    drivers/watchdog/sama5d4_wdt.c
- -X:    drivers/input/touchscreen/atmel_mxt_ts.c
- -X:    drivers/net/wireless/atmel/
- -
   ARM/CALXEDA HIGHBANK ARCHITECTURE
   M:    Rob Herring <robh@kernel.org>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1567,11 -1577,20 +1567,11 @@@ ARM/MAGICIAN MACHINE SUPPOR
   M:    Philipp Zabel <philipp.zabel@gmail.com>
   S:    Maintained
   
- -ARM/Marvell Berlin SoC support
- -M:    Jisheng Zhang <jszhang@marvell.com>
- -M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
- -L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
- -S:    Maintained
- -F:    arch/arm/mach-berlin/
- -F:    arch/arm/boot/dts/berlin*
- -F:    arch/arm64/boot/dts/marvell/berlin*
- -
   ARM/Marvell Dove/MV78xx0/Orion SOC support
   M:    Jason Cooper <jason@lakedaemon.net>
   M:    Andrew Lunn <andrew@lunn.ch>
   M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
- -M:    Gregory Clement <gregory.clement@free-electrons.com>
+ +M:    Gregory Clement <gregory.clement@bootlin.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
   F:    Documentation/devicetree/bindings/soc/dove/
@@@ -1585,7 -1604,7 +1585,7 @@@ F:      arch/arm/boot/dts/orion5x
   ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K SOC support
   M:    Jason Cooper <jason@lakedaemon.net>
   M:    Andrew Lunn <andrew@lunn.ch>
- -M:    Gregory Clement <gregory.clement@free-electrons.com>
+ +M:    Gregory Clement <gregory.clement@bootlin.com>
   M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
@@@ -1637,27 -1656,6 +1637,27 @@@ L:    linux-arm-kernel@lists.infradead.or
   F:    arch/arm/mach-ks8695/
   S:    Odd Fixes
   
+ +ARM/Microchip (AT91) SoC support
+ +M:    Nicolas Ferre <nicolas.ferre@microchip.com>
+ +M:    Alexandre Belloni <alexandre.belloni@bootlin.com>
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +W:    http://www.linux4sam.org
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/nferre/linux-at91.git
+ +S:    Supported
+ +N:    at91
+ +N:    atmel
+ +F:    arch/arm/mach-at91/
+ +F:    include/soc/at91/
+ +F:    arch/arm/boot/dts/at91*.dts
+ +F:    arch/arm/boot/dts/at91*.dtsi
+ +F:    arch/arm/boot/dts/sama*.dts
+ +F:    arch/arm/boot/dts/sama*.dtsi
+ +F:    arch/arm/include/debug/at91.S
+ +F:    drivers/memory/atmel*
+ +F:    drivers/watchdog/sama5d4_wdt.c
+ +X:    drivers/input/touchscreen/atmel_mxt_ts.c
+ +X:    drivers/net/wireless/atmel/
+ +
   ARM/MIOA701 MACHINE SUPPORT
   M:    Robert Jarzmik <robert.jarzmik@free.fr>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1702,20 -1700,6 +1702,20 @@@ F:    Documentation/devicetree/bindings/ar
   F:    Documentation/devicetree/bindings/arm/ux500/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
   
+ +ARM/NUVOTON NPCM ARCHITECTURE
+ +M:    Avi Fishman <avifishman70@gmail.com>
+ +M:    Tomer Maimon <tmaimon77@gmail.com>
+ +R:    Patrick Venture <venture@google.com>
+ +R:    Nancy Yuen <yuenn@google.com>
+ +R:    Brendan Higgins <brendanhiggins@google.com>
+ +L:    openbmc@lists.ozlabs.org (moderated for non-subscribers)
+ +S:    Supported
+ +F:    arch/arm/mach-npcm/
+ +F:    arch/arm/boot/dts/nuvoton-npcm*
+ +F:    include/dt-bindings/clock/nuvoton,npcm7xx-clks.h
+ +F:    drivers/*/*npcm*
+ +F:    Documentation/*/*npcm*
+ +
   ARM/NUVOTON W90X900 ARM ARCHITECTURE
   M:    Wan ZongShun <mcuos.com@gmail.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1726,7 -1710,7 +1726,7 @@@ F:      drivers/input/keyboard/w90p910_keypa
   F:    drivers/input/touchscreen/w90p910_ts.c
   F:    drivers/watchdog/nuc900_wdt.c
   F:    drivers/net/ethernet/nuvoton/w90p910_ether.c
- -F:    drivers/mtd/nand/nuc900_nand.c
+ +F:    drivers/mtd/nand/raw/nuc900_nand.c
   F:    drivers/rtc/rtc-nuc900.c
   F:    drivers/spi/spi-nuc900.c
   F:    drivers/usb/host/ehci-w90x900.c
@@@ -1748,7 -1732,7 +1748,7 @@@ F:      arch/arm/mach-orion5x/ts78xx-
   ARM/OXNAS platform support
   M:    Neil Armstrong <narmstrong@baylibre.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
- -L:    linux-oxnas@lists.tuxfamily.org (moderated for non-subscribers)
+ +L:    linux-oxnas@groups.io (moderated for non-subscribers)
   S:    Maintained
   F:    arch/arm/mach-oxnas/
   F:    arch/arm/boot/dts/ox8*.dts*
@@@ -1879,6 -1863,7 +1879,6 @@@ Q:      https://patchwork.kernel.org/project
   S:    Maintained
   F:    arch/arm/boot/dts/s3c*
   F:    arch/arm/boot/dts/s5p*
- -F:    arch/arm/boot/dts/samsung*
   F:    arch/arm/boot/dts/exynos*
   F:    arch/arm64/boot/dts/exynos/
   F:    arch/arm/plat-samsung/
@@@ -1978,14 -1963,6 +1978,14 @@@ M:    Thor Thayer <thor.thayer@linux.intel
   S:    Maintained
   F:    drivers/edac/altera_edac.
   
+ +ARM/SPREADTRUM SoC SUPPORT
+ +M:    Orson Zhai <orsonzhai@gmail.com>
+ +M:    Baolin Wang <baolin.wang@linaro.org>
+ +M:    Chunyan Zhang <zhang.lyra@gmail.com>
+ +S:    Maintained
+ +F:    arch/arm64/boot/dts/sprd
+ +N:    sprd
+ +
   ARM/STI ARCHITECTURE
   M:    Patrice Chotard <patrice.chotard@st.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -2022,21 -1999,10 +2022,21 @@@ M:   Maxime Coquelin <mcoquelin.stm32@gma
   M:    Alexandre Torgue <alexandre.torgue@st.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
- -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mcoquelin/stm32.git
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32.git stm32-next
   N:    stm32
+ +F:    arch/arm/boot/dts/stm32*
+ +F:    arch/arm/mach-stm32/
   F:    drivers/clocksource/armv7m_systick.c
   
+ +ARM/Synaptics Berlin SoC support
+ +M:    Jisheng Zhang <Jisheng.Zhang@synaptics.com>
+ +M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +S:    Maintained
+ +F:    arch/arm/mach-berlin/
+ +F:    arch/arm/boot/dts/berlin*
+ +F:    arch/arm64/boot/dts/marvell/berlin*
+ +
   ARM/TANGO ARCHITECTURE
   M:    Marc Gonzalez <marc.w.gonzalez@free.fr>
   M:    Mans Rullgard <mans@mansr.com>
@@@ -2426,6 -2392,7 +2426,6 @@@ T:      git git://github.com/ndyer/linux.gi
   S:    Maintained
   F:    Documentation/devicetree/bindings/input/atmel,maxtouch.txt
   F:    drivers/input/touchscreen/atmel_mxt_ts.c
- -F:    include/linux/platform_data/atmel_mxt_ts.h
   
   ATMEL SAMA5D2 ADC DRIVER
   M:    Ludovic Desroches <ludovic.desroches@microchip.com>
@@@ -2508,6 -2475,7 +2508,6 @@@ M:      Paul Moore <paul@paul-moore.com
   M:    Eric Paris <eparis@redhat.com>
   L:    linux-audit@redhat.com (moderated for non-subscribers)
   W:    https://github.com/linux-audit
- -W:    https://people.redhat.com/sgrubb/audit
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git
   S:    Supported
   F:    include/linux/audit.h
@@@ -2659,6 -2627,51 +2659,6 @@@ F:     Documentation/filesystems/bfs.tx
   F:    fs/bfs/
   F:    include/uapi/linux/bfs_fs.h
   
- -BLACKFIN ARCHITECTURE
- -L:    adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
- -T:    git git://git.code.sf.net/p/adi-linux/code
- -W:    http://blackfin.uclinux.org
- -S:    Orphan
- -F:    arch/blackfin/
- -
- -BLACKFIN EMAC DRIVER
- -L:    adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
- -W:    http://blackfin.uclinux.org
- -S:    Orphan
- -F:    drivers/net/ethernet/adi/
- -
- -BLACKFIN MEDIA DRIVER
- -L:    adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
- -W:    http://blackfin.uclinux.org/
- -S:    Orphan
- -F:    drivers/media/platform/blackfin/
- -F:    drivers/media/i2c/adv7183*
- -F:    drivers/media/i2c/vs6624*
- -
- -BLACKFIN RTC DRIVER
- -L:    adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
- -W:    http://blackfin.uclinux.org
- -S:    Orphan
- -F:    drivers/rtc/rtc-bfin.c
- -
- -BLACKFIN SDH DRIVER
- -L:    adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
- -W:    http://blackfin.uclinux.org
- -S:    Orphan
- -F:    drivers/mmc/host/bfin_sdh.c
- -
- -BLACKFIN SERIAL DRIVER
- -L:    adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
- -W:    http://blackfin.uclinux.org
- -S:    Orphan
- -F:    drivers/tty/serial/bfin_uart.c
- -
- -BLACKFIN WATCHDOG DRIVER
- -L:    adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
- -W:    http://blackfin.uclinux.org
- -S:    Orphan
- -F:    drivers/watchdog/bfin_wdt.c
- -
   BLINKM RGB LED DRIVER
   M:    Jan-Simon Moeller <jansimon.moeller@gmx.de>
   S:    Maintained
@@@ -2670,7 -2683,6 +2670,7 @@@ L:      linux-block@vger.kernel.or
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
   S:    Maintained
   F:    block/
+ +F:    drivers/block/
   F:    kernel/trace/blktrace.c
   F:    lib/sbitmap.c
   
@@@ -3002,7 -3014,7 +3002,7 @@@ M:      Kamal Dasu <kdasu.kdev@gmail.com
   L:    linux-mtd@lists.infradead.org
   L:    bcm-kernel-feedback-list@broadcom.com
   S:    Maintained
- -F:    drivers/mtd/nand/brcmnand/
+ +F:    drivers/mtd/nand/raw/brcmnand/
   
   BROADCOM STB DPFE DRIVER
   M:    Markus Mayer <mmayer@broadcom.com>
@@@ -3270,11 -3282,12 +3270,11 @@@ F:   drivers/net/ieee802154/cc2520.
   F:    include/linux/spi/cc2520.h
   F:    Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
   
- -CCREE ARM TRUSTZONE CRYPTOCELL 700 REE DRIVER
+ +CCREE ARM TRUSTZONE CRYPTOCELL REE DRIVER
   M:    Gilad Ben-Yossef <gilad@benyossef.com>
   L:    linux-crypto@vger.kernel.org
- -L:    driverdev-devel@linuxdriverproject.org
   S:    Supported
- -F:    drivers/staging/ccree/
+ +F:    drivers/crypto/ccree/
   W:    https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family
   
   CEC FRAMEWORK
@@@ -3292,7 -3305,6 +3292,7 @@@ F:      include/media/cec-notifier.
   F:    include/uapi/linux/cec.h
   F:    include/uapi/linux/cec-funcs.h
   F:    Documentation/devicetree/bindings/media/cec.txt
+ +F:    Documentation/ABI/testing/debugfs-cec-error-inj
   
   CEC GPIO DRIVER
   M:    Hans Verkuil <hans.verkuil@cisco.com>
@@@ -3735,6 -3747,16 +3735,6 @@@ S:     Maintaine
   F:    Documentation/filesystems/cramfs.txt
   F:    fs/cramfs/
   
- -CRIS PORT
- -M:    Mikael Starvik <starvik@axis.com>
- -M:    Jesper Nilsson <jesper.nilsson@axis.com>
- -L:    linux-cris-kernel@axis.com
- -W:    http://developer.axis.com
- -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jesper/cris.git
- -S:    Maintained
- -F:    arch/cris/
- -F:    drivers/tty/serial/crisv10.*
- -
   CRYPTO API
   M:    Herbert Xu <herbert@gondor.apana.org.au>
   M:    "David S. Miller" <davem@davemloft.net>
@@@ -4094,10 -4116,10 +4094,10 @@@ DENALI NAND DRIVE
   M:    Masahiro Yamada <yamada.masahiro@socionext.com>
   L:    linux-mtd@lists.infradead.org
   S:    Supported
- -F:    drivers/mtd/nand/denali*
+ +F:    drivers/mtd/nand/raw/denali*
   
   DESIGNWARE USB2 DRD IP DRIVER
- -M:    John Youn <johnyoun@synopsys.com>
+ +M:    Minas Harutyunyan <hminas@synopsys.com>
   L:    linux-usb@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
   S:    Maintained
@@@ -4311,7 -4333,6 +4311,7 @@@ Q:      https://patchwork.kernel.org/project
   S:    Maintained
   F:    drivers/dma/
   F:    include/linux/dmaengine.h
+ +F:    include/linux/of_dma.h
   F:    Documentation/devicetree/bindings/dma/
   F:    Documentation/driver-api/dmaengine/
   T:    git git://git.infradead.org/users/vkoul/slave-dma.git
@@@ -4389,14 -4410,8 +4389,14 @@@ L:    linux-kernel@vger.kernel.or
   S:    Maintained
   F:    drivers/staging/fsl-dpaa2/ethernet
   
+ +DPAA2 ETHERNET SWITCH DRIVER
+ +M:    Razvan Stefanescu <razvan.stefanescu@nxp.com>
+ +L:    linux-kernel@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/staging/fsl-dpaa2/ethsw
+ +
   DPT_I2O SCSI RAID DRIVER
- -M:    Adaptec OEM Raid Solutions <aacraid@adaptec.com>
+ +M:    Adaptec OEM Raid Solutions <aacraid@microsemi.com>
   L:    linux-scsi@vger.kernel.org
   W:    http://www.adaptec.com/
   S:    Maintained
@@@ -4441,13 -4456,6 +4441,13 @@@ T:    git git://anongit.freedesktop.org/dr
   S:    Supported
   F:    drivers/gpu/drm/pl111/
   
+ +DRM DRIVER FOR ARM VERSATILE TFT PANELS
+ +M:    Linus Walleij <linus.walleij@linaro.org>
+ +T:    git git://anongit.freedesktop.org/drm/drm-misc
+ +S:    Maintained
+ +F:    drivers/gpu/drm/panel/panel-arm-versatile.c
+ +F:    Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt
+ +
   DRM DRIVER FOR AST SERVER GRAPHICS CHIPS
   M:    Dave Airlie <airlied@redhat.com>
   S:    Odd Fixes
@@@ -4602,8 -4610,8 +4602,8 @@@ F:      include/uapi/drm
   F:    include/linux/vga*
   
   DRM DRIVERS AND MISC GPU PATCHES
- -M:    Daniel Vetter <daniel.vetter@intel.com>
   M:    Gustavo Padovan <gustavo@padovan.org>
+ +M:    Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
   M:    Sean Paul <seanpaul@chromium.org>
   W:    https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
   S:    Maintained
@@@ -4616,7 -4624,7 +4616,7 @@@ F:      include/uapi/drm/drm
   F:    include/linux/vga*
   
   DRM DRIVERS FOR ALLWINNER A10
- -M:    Maxime Ripard  <maxime.ripard@free-electrons.com>
+ +M:    Maxime Ripard  <maxime.ripard@bootlin.com>
   L:    dri-devel@lists.freedesktop.org
   S:    Supported
   F:    drivers/gpu/drm/sun4i/
@@@ -4636,7 -4644,7 +4636,7 @@@ F:      Documentation/gpu/meson.rs
   T:    git git://anongit.freedesktop.org/drm/drm-misc
   
   DRM DRIVERS FOR ATMEL HLCDC
- -M:    Boris Brezillon <boris.brezillon@free-electrons.com>
+ +M:    Boris Brezillon <boris.brezillon@bootlin.com>
   L:    dri-devel@lists.freedesktop.org
   S:    Supported
   F:    drivers/gpu/drm/atmel-hlcdc/
@@@ -4729,7 -4737,6 +4729,7 @@@ F:      drivers/gpu/drm/rcar-du
   F:    drivers/gpu/drm/shmobile/
   F:    include/linux/platform_data/shmob_drm.h
   F:    Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
+ +F:    Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
   F:    Documentation/devicetree/bindings/display/renesas,du.txt
   
   DRM DRIVERS FOR ROCKCHIP
@@@ -4996,6 -5003,12 +4996,6 @@@ T:     git git://linuxtv.org/anttip/media_t
   S:    Maintained
   F:    drivers/media/tuners/e4000*
   
- -EATA ISA/EISA/PCI SCSI DRIVER
- -M:    Dario Ballabio <ballabio_dario@emc.com>
- -L:    linux-scsi@vger.kernel.org
- -S:    Maintained
- -F:    drivers/scsi/eata.c
- -
   EC100 MEDIA DRIVER
   M:    Antti Palosaari <crope@iki.fi>
   L:    linux-media@vger.kernel.org
@@@ -5537,7 -5550,7 +5537,7 @@@ M:      Luis R. Rodriguez <mcgrof@kernel.org
   L:    linux-kernel@vger.kernel.org
   S:    Maintained
   F:    Documentation/firmware_class/
- -F:    drivers/base/firmware*.c
+ +F:    drivers/base/firmware_loader/
   F:    include/linux/firmware.h
   
   FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card)
@@@ -5622,7 -5635,7 +5622,7 @@@ S:      Maintaine
   F:    drivers/dma/fsldma.*
   
   FREESCALE eTSEC ETHERNET DRIVER (GIANFAR)
- -M:    Claudiu Manoil <claudiu.manoil@freescale.com>
+ +M:    Claudiu Manoil <claudiu.manoil@nxp.com>
   L:    netdev@vger.kernel.org
   S:    Maintained
   F:    drivers/net/ethernet/freescale/gianfar*
@@@ -5633,7 -5646,7 +5633,7 @@@ FREESCALE GPMI NAND DRIVE
   M:    Han Xu <han.xu@nxp.com>
   L:    linux-mtd@lists.infradead.org
   S:    Maintained
- -F:    drivers/mtd/nand/gpmi-nand/*
+ +F:    drivers/mtd/nand/raw/gpmi-nand/*
   
   FREESCALE I2C CPM DRIVER
   M:    Jochen Friedrich <jochen@scram.de>
@@@ -5784,6 -5797,10 +5784,6 @@@ F:     fs/crypto
   F:    include/linux/fscrypt*.h
   F:    Documentation/filesystems/fscrypt.rst
   
- -FUJITSU FR-V (FRV) PORT
- -S:    Orphan
- -F:    arch/frv/
- -
   FUJITSU LAPTOP EXTRAS
   M:    Jonathan Woithe <jwoithe@just42.net>
   L:    platform-driver-x86@vger.kernel.org
@@@ -5831,6 -5848,12 +5831,6 @@@ F:     tools/testing/selftests/futex
   F:    tools/perf/bench/futex*
   F:    Documentation/*futex*
   
- -FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit)
- -M:    Rik Faith <faith@cs.unc.edu>
- -L:    linux-scsi@vger.kernel.org
- -S:    Odd Fixes (e.g., new signatures)
- -F:    drivers/scsi/fdomain.*
- -
   GCC PLUGINS
   M:    Kees Cook <keescook@chromium.org>
   R:    Emese Revfy <re.emese@gmail.com>
@@@ -5842,7 -5865,7 +5842,7 @@@ F:      scripts/Makefile.gcc-plugin
   F:    Documentation/gcc-plugins.txt
   
   GCOV BASED KERNEL PROFILING
- -M:    Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ +M:    Peter Oberparleiter <oberpar@linux.ibm.com>
   S:    Maintained
   F:    kernel/gcov/
   F:    Documentation/dev-tools/gcov.rst
@@@ -5910,11 -5933,6 +5910,11 @@@ S:    Supporte
   F:    drivers/phy/
   F:    include/linux/phy/
   
+ +GENERIC PINCTRL I2C DEMULTIPLEXER DRIVER
+ +M:    Wolfram Sang <wsa+renesas@sang-engineering.com>
+ +S:    Supported
+ +F:    drivers/i2c/muxes/i2c-demux-pinctrl.c
+ +
   GENERIC PM DOMAINS
   M:    "Rafael J. Wysocki" <rjw@rjwysocki.net>
   M:    Kevin Hilman <khilman@kernel.org>
@@@ -5996,7 -6014,7 +5996,7 @@@ S:      Maintaine
   F:    drivers/media/rc/gpio-ir-tx.c
   
   GPIO MOCKUP DRIVER
- -M:    Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
+ +M:    Bamvor Jian Zhang <bamv2005@gmail.com>
   R:    Bartosz Golaszewski <brgl@bgdev.pl>
   L:    linux-gpio@vger.kernel.org
   S:    Maintained
@@@ -6009,14 -6027,12 +6009,14 @@@ L:   linux-gpio@vger.kernel.or
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
   S:    Maintained
   F:    Documentation/devicetree/bindings/gpio/
+ +F:    Documentation/driver-api/gpio/
   F:    Documentation/gpio/
   F:    Documentation/ABI/testing/gpio-cdev
   F:    Documentation/ABI/obsolete/sysfs-gpio
   F:    drivers/gpio/
   F:    include/linux/gpio/
   F:    include/linux/gpio.h
+ +F:    include/linux/of_gpio.h
   F:    include/asm-generic/gpio.h
   F:    include/uapi/linux/gpio.h
   F:    tools/gpio/
@@@ -6226,11 -6242,6 +6226,11 @@@ F:    Documentation/hw_random.tx
   F:    drivers/char/hw_random/
   F:    include/linux/hw_random.h
   
+ +HARDWARE TRACING FACILITIES
+ +M:    Alexander Shishkin <alexander.shishkin@linux.intel.com>
+ +S:    Maintained
+ +F:    drivers/hwtracing/
+ +
   HARDWARE SPINLOCK CORE
   M:    Ohad Ben-Cohen <ohad@wizery.com>
   M:    Bjorn Andersson <bjorn.andersson@linaro.org>
@@@ -6375,13 -6386,6 +6375,13 @@@ W:    http://www.hisilicon.co
   S:    Maintained
   F:    drivers/net/ethernet/hisilicon/hns3/
   
+ +HISILICON LPC BUS DRIVER
+ +M:    john.garry@huawei.com
+ +W:    http://www.hisilicon.com
+ +S:    Maintained
+ +F:    drivers/bus/hisi_lpc.c
+ +F:    Documentation/devicetree/bindings/arm/hisilicon/hisilicon-low-pin-count.txt
+ +
   HISILICON NETWORK SUBSYSTEM DRIVER
   M:    Yisen Zhuang <yisen.zhuang@huawei.com>
   M:    Salil Mehta <salil.mehta@huawei.com>
@@@ -6419,7 -6423,6 +6419,7 @@@ L:      linux-mm@kvack.or
   S:    Maintained
   F:    mm/hmm*
   F:    include/linux/hmm*
+ +F:    Documentation/vm/hmm.txt
   
   HOST AP DRIVER
   M:    Jouni Malinen <j@w1.fi>
@@@ -6526,7 -6529,7 +6526,7 @@@ S:      Maintaine
   F:    Documentation/networking/netvsc.txt
   F:    arch/x86/include/asm/mshyperv.h
   F:    arch/x86/include/asm/trace/hyperv.h
- -F:    arch/x86/include/uapi/asm/hyperv.h
+ +F:    arch/x86/include/asm/hyperv-tlfs.h
   F:    arch/x86/kernel/cpu/mshyperv.c
   F:    arch/x86/hyperv
   F:    drivers/hid/hid-hyperv.c
@@@ -6569,7 -6572,7 +6569,7 @@@ F:      drivers/i2c/muxes
   F:    include/linux/i2c-mux.h
   
   I2C MV64XXX MARVELL AND ALLWINNER DRIVER
- -M:    Gregory CLEMENT <gregory.clement@free-electrons.com>
+ +M:    Gregory CLEMENT <gregory.clement@bootlin.com>
   L:    linux-i2c@vger.kernel.org
   S:    Maintained
   F:    drivers/i2c/busses/i2c-mv64xxx.c
@@@ -6590,25 -6593,15 +6590,25 @@@ W:   https://i2c.wiki.kernel.org
   Q:    https://patchwork.ozlabs.org/project/linux-i2c/list/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
   S:    Maintained
- -F:    Documentation/devicetree/bindings/i2c/
+ +F:    Documentation/devicetree/bindings/i2c/i2c.txt
   F:    Documentation/i2c/
- -F:    drivers/i2c/
- -F:    drivers/i2c/*/
+ +F:    drivers/i2c/*
   F:    include/linux/i2c.h
- -F:    include/linux/i2c-*.h
+ +F:    include/linux/i2c-dev.h
+ +F:    include/linux/i2c-smbus.h
   F:    include/uapi/linux/i2c.h
   F:    include/uapi/linux/i2c-*.h
   
+ +I2C SUBSYSTEM HOST DRIVERS
+ +L:    linux-i2c@vger.kernel.org
+ +W:    https://i2c.wiki.kernel.org/
+ +Q:    https://patchwork.ozlabs.org/project/linux-i2c/list/
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
+ +S:    Odd Fixes
+ +F:    Documentation/devicetree/bindings/i2c/
+ +F:    drivers/i2c/algos/
+ +F:    drivers/i2c/busses/
+ +
   I2C-TAOS-EVM DRIVER
   M:    Jean Delvare <jdelvare@suse.com>
   L:    linux-i2c@vger.kernel.org
@@@ -6908,13 -6901,6 +6908,13 @@@ M:    James Hogan <jhogan@kernel.org
   S:    Maintained
   F:    drivers/media/rc/img-ir/
   
+ +IMON SOUNDGRAPH USB IR RECEIVER
+ +M:    Sean Young <sean@mess.org>
+ +L:    linux-media@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/media/rc/imon_raw.c
+ +F:    drivers/media/rc/imon.c
+ +
   IMS TWINTURBO FRAMEBUFFER DRIVER
   L:    linux-fbdev@vger.kernel.org
   S:    Orphan
@@@ -6969,7 -6955,7 +6969,7 @@@ INGENIC JZ4780 NAND DRIVE
   M:    Harvey Hunt <harveyhuntnexus@gmail.com>
   L:    linux-mtd@lists.infradead.org
   S:    Maintained
- -F:    drivers/mtd/nand/jz4780_*
+ +F:    drivers/mtd/nand/raw/jz4780_*
   
   INOTIFY
   M:    Jan Kara <jack@suse.cz>
@@@ -7004,7 -6990,7 +7004,7 @@@ F:      drivers/input/input-mt.
   K:    \b(ABS|SYN)_MT_
   
   INSIDE SECURE CRYPTO DRIVER
- -M:    Antoine Tenart <antoine.tenart@free-electrons.com>
+ +M:    Antoine Tenart <antoine.tenart@bootlin.com>
   F:    drivers/crypto/inside-secure/
   S:    Maintained
   L:    linux-crypto@vger.kernel.org
@@@ -7075,7 -7061,6 +7075,7 @@@ F:      Documentation/networking/ixgbe.tx
   F:    Documentation/networking/ixgbevf.txt
   F:    Documentation/networking/i40e.txt
   F:    Documentation/networking/i40evf.txt
+ +F:    Documentation/networking/ice.txt
   F:    drivers/net/ethernet/intel/
   F:    drivers/net/ethernet/intel/*/
   F:    include/linux/avf/virtchnl.h
@@@ -7241,15 -7226,6 +7241,15 @@@ M:    Shiraz Saleem <shiraz.saleem@intel.c
   L:    linux-rdma@vger.kernel.org
   S:    Supported
   F:    drivers/infiniband/hw/i40iw/
+ +F:    include/uapi/rdma/i40iw-abi.h
+ +
+ +INTEL SHA MULTIBUFFER DRIVER
+ +M:    Megha Dey <megha.dey@linux.intel.com>
+ +R:    Tim Chen <tim.c.chen@linux.intel.com>
+ +L:    linux-crypto@vger.kernel.org
+ +S:    Supported
+ +F:    arch/x86/crypto/sha*-mb
+ +F:    crypto/mcryptd.c
   
   INTEL TELEMETRY DRIVER
   M:    Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
@@@ -7345,7 -7321,6 +7345,7 @@@ S:      Maintaine
   F:    Documentation/devicetree/bindings/iommu/
   F:    drivers/iommu/
   F:    include/linux/iommu.h
+ +F:    include/linux/of_iommu.h
   F:    include/linux/iova.h
   
   IP MASQUERADING
@@@ -7364,7 -7339,7 +7364,7 @@@ F:      include/linux/ipmi
   F:    include/uapi/linux/ipmi*
   
   IPS SCSI RAID DRIVER
- -M:    Adaptec OEM Raid Solutions <aacraid@adaptec.com>
+ +M:    Adaptec OEM Raid Solutions <aacraid@microsemi.com>
   L:    linux-scsi@vger.kernel.org
   W:    http://www.adaptec.com/
   S:    Maintained
@@@ -7546,13 -7521,6 +7546,13 @@@ Q:    http://patchwork.linuxtv.org/project
   S:    Maintained
   F:    drivers/media/dvb-frontends/ix2505v*
   
+ +JAILHOUSE HYPERVISOR INTERFACE
+ +M:    Jan Kiszka <jan.kiszka@siemens.com>
+ +L:    jailhouse-dev@googlegroups.com
+ +S:    Maintained
+ +F:    arch/x86/kernel/jailhouse.c
+ +F:    arch/x86/include/asm/jailhouse_para.h
+ +
   JC42.4 TEMPERATURE SENSOR DRIVER
   M:    Guenter Roeck <linux@roeck-us.net>
   L:    linux-hwmon@vger.kernel.org
@@@ -7632,10 -7600,8 +7632,10 @@@ F:    mm/kasan
   F:    scripts/Makefile.kasan
   
   KCONFIG
+ +M:    Masahiro Yamada <yamada.masahiro@socionext.com>
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig
   L:    linux-kbuild@vger.kernel.org
- -S:    Orphan
+ +S:    Maintained
   F:    Documentation/kbuild/kconfig-language.txt
   F:    scripts/kconfig/
   
@@@ -7782,7 -7748,7 +7782,7 @@@ F:      arch/powerpc/kernel/kvm
   
   KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
   M:    Christian Borntraeger <borntraeger@de.ibm.com>
- -M:    Janosch Frank <frankja@linux.vnet.ibm.com>
+ +M:    Janosch Frank <frankja@linux.ibm.com>
   R:    David Hildenbrand <david@redhat.com>
   R:    Cornelia Huck <cohuck@redhat.com>
   L:    linux-s390@vger.kernel.org
@@@ -7939,13 -7905,11 +7939,13 @@@ F:   drivers/scsi/53c700
   
   LEAKING_ADDRESSES
   M:    Tobin C. Harding <me@tobin.cc>
+ +M:    Tycho Andersen <tycho@tycho.ws>
+ +L:    kernel-hardening@lists.openwall.com
   S:    Maintained
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tobin/leaks.git
   F:    scripts/leaking_addresses.pl
   
   LED SUBSYSTEM
- -M:    Richard Purdie <rpurdie@rpsys.net>
   M:    Jacek Anaszewski <jacek.anaszewski@gmail.com>
   M:    Pavel Machek <pavel@ucw.cz>
   L:    linux-leds@vger.kernel.org
@@@ -8068,14 -8032,6 +8068,14 @@@ Q:    https://patchwork.kernel.org/project
   S:    Supported
   F:    drivers/nvdimm/pmem*
   
+ +LIBNVDIMM: DEVICETREE BINDINGS
+ +M:    Oliver O'Halloran <oohall@gmail.com>
+ +L:    linux-nvdimm@lists.01.org
+ +Q:    https://patchwork.kernel.org/project/linux-nvdimm/list/
+ +S:    Supported
+ +F:    drivers/nvdimm/of_pmem.c
+ +F:    Documentation/devicetree/bindings/pmem/pmem-region.txt
+ +
   LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
   M:    Dan Williams <dan.j.williams@intel.com>
   L:    linux-nvdimm@lists.01.org
@@@ -8190,25 -8146,7 +8190,25 @@@ F:    drivers/*/*/*pasemi
   LINUX KERNEL DUMP TEST MODULE (LKDTM)
   M:    Kees Cook <keescook@chromium.org>
   S:    Maintained
- -F:    drivers/misc/lkdtm*
+ +F:    drivers/misc/lkdtm/*
+ +
+ +LINUX KERNEL MEMORY CONSISTENCY MODEL (LKMM)
+ +M:    Alan Stern <stern@rowland.harvard.edu>
+ +M:    Andrea Parri <parri.andrea@gmail.com>
+ +M:    Will Deacon <will.deacon@arm.com>
+ +M:    Peter Zijlstra <peterz@infradead.org>
+ +M:    Boqun Feng <boqun.feng@gmail.com>
+ +M:    Nicholas Piggin <npiggin@gmail.com>
+ +M:    David Howells <dhowells@redhat.com>
+ +M:    Jade Alglave <j.alglave@ucl.ac.uk>
+ +M:    Luc Maranget <luc.maranget@inria.fr>
+ +M:    "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+ +R:    Akira Yokosawa <akiyks@gmail.com>
+ +L:    linux-kernel@vger.kernel.org
+ +S:    Supported
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+ +F:    tools/memory-model/
+ +F:    Documentation/memory-barriers.txt
   
   LINUX SECURITY MODULE (LSM) FRAMEWORK
   M:    Chris Wright <chrisw@sous-sol.org>
@@@ -8371,6 -8309,11 +8371,6 @@@ W:     http://linux-test-project.github.io
   T:    git git://github.com/linux-test-project/ltp.git
   S:    Maintained
   
- -M32R ARCHITECTURE
- -W:    http://www.linux-m32r.org/
- -S:    Orphan
- -F:    arch/m32r/
- -
   M68K ARCHITECTURE
   M:    Geert Uytterhoeven <geert@linux-m68k.org>
   L:    linux-m68k@lists.linux-m68k.org
@@@ -8469,7 -8412,7 +8469,7 @@@ F:      include/uapi/drm/armada_drm.
   F:    Documentation/devicetree/bindings/display/armada/
   
   MARVELL CRYPTO DRIVER
- -M:    Boris Brezillon <boris.brezillon@free-electrons.com>
+ +M:    Boris Brezillon <boris.brezillon@bootlin.com>
   M:    Arnaud Ebalard <arno@natisbad.org>
   F:    drivers/crypto/marvell/
   S:    Maintained
@@@ -8488,7 -8431,7 +8488,7 @@@ S:      Orpha
   F:    drivers/net/wireless/marvell/libertas/
   
   MARVELL MACCHIATOBIN SUPPORT
- -M:    Russell King <rmk@armlinux.org.uk>
+ +M:    Russell King <linux@armlinux.org.uk>
   L:    linux-arm-kernel@lists.infradead.org
   S:    Maintained
   F:    arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts
@@@ -8501,7 -8444,7 +8501,7 @@@ F:      drivers/net/ethernet/marvell/mv643xx
   F:    include/linux/mv643xx.h
   
   MARVELL MV88X3310 PHY DRIVER
- -M:    Russell King <rmk@armlinux.org.uk>
+ +M:    Russell King <linux@armlinux.org.uk>
   L:    netdev@vger.kernel.org
   S:    Maintained
   F:    drivers/net/phy/marvell10g.c
@@@ -8528,10 -8471,10 +8528,10 @@@ S:   Odd Fixe
   F:    drivers/net/wireless/marvell/mwl8k.c
   
   MARVELL NAND CONTROLLER DRIVER
- -M:    Miquel Raynal <miquel.raynal@free-electrons.com>
+ +M:    Miquel Raynal <miquel.raynal@bootlin.com>
   L:    linux-mtd@lists.infradead.org
   S:    Maintained
- -F:    drivers/mtd/nand/marvell_nand.c
+ +F:    drivers/mtd/nand/raw/marvell_nand.c
   F:    Documentation/devicetree/bindings/mtd/marvell-nand.txt
   
   MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER
@@@ -8643,23 -8586,13 +8643,23 @@@ W:   https://linuxtv.or
   S:    Maintained
   F:    drivers/media/radio/radio-maxiradio*
   
- -MCP4531 MICROCHIP DIGITAL POTENTIOMETER DRIVER
+ +MCP4018 AND MCP4531 MICROCHIP DIGITAL POTENTIOMETER DRIVERS
   M:    Peter Rosin <peda@axentia.se>
   L:    linux-iio@vger.kernel.org
   S:    Maintained
   F:    Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
+ +F:    drivers/iio/potentiometer/mcp4018.c
   F:    drivers/iio/potentiometer/mcp4531.c
   
+ +MCR20A IEEE-802.15.4 RADIO DRIVER
+ +M:    Xue Liu <liuxuenetmail@gmail.com>
+ +L:    linux-wpan@vger.kernel.org
+ +W:    https://github.com/xueliu/mcr20a-linux
+ +S:    Maintained
+ +F:    drivers/net/ieee802154/mcr20a.c
+ +F:    drivers/net/ieee802154/mcr20a.h
+ +F:    Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+ +
   MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
   M:    William Breathitt Gray <vilhelm.gray@gmail.com>
   L:    linux-iio@vger.kernel.org
@@@ -8676,14 -8609,6 +8676,14 @@@ T:    git git://linuxtv.org/media_tree.gi
   S:    Supported
   F:    drivers/media/dvb-frontends/ascot2e*
   
+ +MEDIA DRIVERS FOR CXD2099AR CI CONTROLLERS
+ +M:    Jasmin Jessich <jasmin@anw.at>
+ +L:    linux-media@vger.kernel.org
+ +W:    https://linuxtv.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    drivers/media/dvb-frontends/cxd2099*
+ +
   MEDIA DRIVERS FOR CXD2841ER
   M:    Sergey Kozlov <serjk@netup.ru>
   M:    Abylay Ospan <aospan@netup.ru>
@@@ -8694,15 -8619,6 +8694,15 @@@ T:    git git://linuxtv.org/media_tree.gi
   S:    Supported
   F:    drivers/media/dvb-frontends/cxd2841er*
   
+ +MEDIA DRIVERS FOR CXD2880
+ +M:    Yasunari Takiguchi <Yasunari.Takiguchi@sony.com>
+ +L:    linux-media@vger.kernel.org
+ +W:    http://linuxtv.org/
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Supported
+ +F:    drivers/media/dvb-frontends/cxd2880/*
+ +F:    drivers/media/spi/cxd2880*
+ +
   MEDIA DRIVERS FOR DIGITAL DEVICES PCIE DEVICES
   M:    Daniel Scheller <d.scheller.oss@gmail.com>
   L:    linux-media@vger.kernel.org
@@@ -8770,16 -8686,6 +8770,16 @@@ T:    git git://linuxtv.org/media_tree.gi
   S:    Supported
   F:    drivers/media/pci/netup_unidvb/*
   
+ +MEDIA DRIVERS FOR RENESAS - CEU
+ +M:    Jacopo Mondi <jacopo@jmondi.org>
+ +L:    linux-media@vger.kernel.org
+ +L:    linux-renesas-soc@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Supported
+ +F:    Documentation/devicetree/bindings/media/renesas,ceu.txt
+ +F:    drivers/media/platform/renesas-ceu.c
+ +F:    include/media/drv-intf/renesas-ceu.h
+ +
   MEDIA DRIVERS FOR RENESAS - DRIF
   M:    Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>
   L:    linux-media@vger.kernel.org
@@@ -8879,15 -8785,6 +8879,15 @@@ M:    Sean Wang <sean.wang@mediatek.com
   S:    Maintained
   F:    drivers/media/rc/mtk-cir.c
   
+ +MEDIATEK DMA DRIVER
+ +M:    Sean Wang <sean.wang@mediatek.com>
+ +L:    dmaengine@vger.kernel.org
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +L:    linux-mediatek@lists.infradead.org (moderated for non-subscribers)
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/dma/mtk-*
+ +F:    drivers/dma/mediatek/
+ +
   MEDIATEK PMIC LED DRIVER
   M:    Sean Wang <sean.wang@mediatek.com>
   S:    Maintained
@@@ -8981,13 -8878,6 +8981,13 @@@ W:    http://www.melexis.co
   S:    Supported
   F:    drivers/iio/temperature/mlx90614.c
   
+ +MELEXIS MLX90632 DRIVER
+ +M:    Crt Mori <cmo@melexis.com>
+ +L:    linux-iio@vger.kernel.org
+ +W:    http://www.melexis.com
+ +S:    Supported
+ +F:    drivers/iio/temperature/mlx90632.c
+ +
   MELFAS MIP4 TOUCHSCREEN DRIVER
   M:    Sangwon Jee <jeesw@melfas.com>
   W:    http://www.melfas.com
@@@ -9113,7 -9003,6 +9113,7 @@@ M:      Vadim Pasternak <vadimp@mellanox.com
   L:    linux-leds@vger.kernel.org
   S:    Supported
   F:    drivers/leds/leds-mlxcpld.c
+ +F:    drivers/leds/leds-mlxreg.c
   F:    Documentation/leds/leds-mlxcpld.txt
   
   MELLANOX PLATFORM DRIVER
@@@ -9145,9 -9034,10 +9145,9 @@@ F:     mm
   MEMORY TECHNOLOGY DEVICES (MTD)
   M:    David Woodhouse <dwmw2@infradead.org>
   M:    Brian Norris <computersforpeace@gmail.com>
- -M:    Boris Brezillon <boris.brezillon@free-electrons.com>
+ +M:    Boris Brezillon <boris.brezillon@bootlin.com>
   M:    Marek Vasut <marek.vasut@gmail.com>
   M:    Richard Weinberger <richard@nod.at>
- -M:    Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
   L:    linux-mtd@lists.infradead.org
   W:    http://www.linux-mtd.infradead.org/
   Q:    http://patchwork.ozlabs.org/project/linux-mtd/list/
@@@ -9191,6 -9081,20 +9191,6 @@@ F:     drivers/media/platform/meson/ao-cec.
   F:    Documentation/devicetree/bindings/media/meson-ao-cec.txt
   T:    git git://linuxtv.org/media_tree.git
   
- -METAG ARCHITECTURE
- -M:    James Hogan <jhogan@kernel.org>
- -L:    linux-metag@vger.kernel.org
- -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag.git
- -S:    Odd Fixes
- -F:    arch/metag/
- -F:    Documentation/metag/
- -F:    Documentation/devicetree/bindings/metag/
- -F:    Documentation/devicetree/bindings/interrupt-controller/img,*
- -F:    drivers/clocksource/metag_generic.c
- -F:    drivers/irqchip/irq-metag.c
- -F:    drivers/irqchip/irq-metag-ext.c
- -F:    drivers/tty/metag_da.c
- -
   MICROBLAZE ARCHITECTURE
   M:    Michal Simek <monstr@monstr.eu>
   W:    http://www.monstr.eu/fdt/
@@@ -9232,7 -9136,7 +9232,7 @@@ M:      Wenyou Yang <wenyou.yang@microchip.c
   M:    Josh Wu <rainyfeeling@outlook.com>
   L:    linux-mtd@lists.infradead.org
   S:    Supported
- -F:    drivers/mtd/nand/atmel/*
+ +F:    drivers/mtd/nand/raw/atmel/*
   F:    Documentation/devicetree/bindings/mtd/atmel-nand.txt
   
   MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
@@@ -9245,13 -9149,6 +9245,13 @@@ F:    drivers/net/dsa/microchip/
   F:    include/linux/platform_data/microchip-ksz.h
   F:    Documentation/devicetree/bindings/net/dsa/ksz.txt
   
+ +MICROCHIP LAN743X ETHERNET DRIVER
+ +M:    Bryan Whitehead <bryan.whitehead@microchip.com>
+ +M:    Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/net/ethernet/microchip/lan743x_*
+ +
   MICROCHIP USB251XB DRIVER
   M:    Richard Leitner <richard.leitner@skidata.com>
   L:    linux-usb@vger.kernel.org
@@@ -9259,15 -9156,6 +9259,15 @@@ S:    Maintaine
   F:    drivers/usb/misc/usb251xb.c
   F:    Documentation/devicetree/bindings/usb/usb251xb.txt
   
+ +MICROSEMI MIPS SOCS
+ +M:    Alexandre Belloni <alexandre.belloni@bootlin.com>
+ +L:    linux-mips@linux-mips.org
+ +S:    Maintained
+ +F:    arch/mips/generic/board-ocelot.c
+ +F:    arch/mips/configs/generic/board-ocelot.config
+ +F:    arch/mips/boot/dts/mscc/
+ +F:    Documentation/devicetree/bindings/mips/mscc.txt
+ +
   MICROSEMI SMART ARRAY SMARTPQI DRIVER (smartpqi)
   M:    Don Brace <don.brace@microsemi.com>
   L:    esc.storagedev@microsemi.com
@@@ -9318,7 -9206,6 +9318,7 @@@ MIPS GENERIC PLATFOR
   M:    Paul Burton <paul.burton@mips.com>
   L:    linux-mips@linux-mips.org
   S:    Supported
+ +F:    Documentation/devicetree/bindings/power/mti,mips-cpc.txt
   F:    arch/mips/generic/
   F:    arch/mips/tools/generic-board-config.sh
   
@@@ -9488,14 -9375,6 +9488,14 @@@ S:    Maintaine
   F:    drivers/media/i2c/mt9t001.c
   F:    include/media/i2c/mt9t001.h
   
+ +MT9T112 APTINA CAMERA SENSOR
+ +M:    Jacopo Mondi <jacopo@jmondi.org>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Odd Fixes
+ +F:    drivers/media/i2c/mt9t112.c
+ +F:    include/media/i2c/mt9t112.h
+ +
   MT9V032 APTINA CAMERA SENSOR
   M:    Laurent Pinchart <laurent.pinchart@ideasonboard.com>
   L:    linux-media@vger.kernel.org
@@@ -9573,7 -9452,7 +9573,7 @@@ S:      Supporte
   F:    drivers/net/ethernet/myricom/myri10ge/
   
   NAND FLASH SUBSYSTEM
- -M:    Boris Brezillon <boris.brezillon@free-electrons.com>
+ +M:    Boris Brezillon <boris.brezillon@bootlin.com>
   R:    Richard Weinberger <richard@nod.at>
   L:    linux-mtd@lists.infradead.org
   W:    http://www.linux-mtd.infradead.org/
@@@ -10042,13 -9921,6 +10042,13 @@@ F:  Documentation/ABI/stable/sysfs-bus-n
   F:    include/linux/nvmem-consumer.h
   F:    include/linux/nvmem-provider.h
   
+ +NXP SGTL5000 DRIVER
+ +M:    Fabio Estevam <fabio.estevam@nxp.com>
+ +L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/sound/sgtl5000.txt
+ +F:    sound/soc/codecs/sgtl5000*
+ +
   NXP TDA998X DRM DRIVER
   M:    Russell King <linux@armlinux.org.uk>
   S:    Supported
@@@ -10073,7 -9945,6 +10073,7 @@@ F:    drivers/nfc/nxp-nc
   
   OBJTOOL
   M:    Josh Poimboeuf <jpoimboe@redhat.com>
+ +M:    Peter Zijlstra <peterz@infradead.org>
   S:    Supported
   F:    tools/objtool/
   
@@@ -10301,13 -10172,6 +10301,13 @@@ T: git git://linuxtv.org/media_tree.gi
   S:    Maintained
   F:    drivers/media/i2c/ov13858.c
   
+ +OMNIVISION OV2685 SENSOR DRIVER
+ +M:    Shunqian Zheng <zhengsq@rock-chips.com>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    drivers/media/i2c/ov2685.c
+ +
   OMNIVISION OV5640 SENSOR DRIVER
   M:    Steve Longerbeam <slongerbeam@gmail.com>
   L:    linux-media@vger.kernel.org
@@@ -10322,13 -10186,6 +10322,13 @@@ T: git git://linuxtv.org/media_tree.gi
   S:    Maintained
   F:    drivers/media/i2c/ov5647.c
   
+ +OMNIVISION OV5695 SENSOR DRIVER
+ +M:    Shunqian Zheng <zhengsq@rock-chips.com>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    drivers/media/i2c/ov5695.c
+ +
   OMNIVISION OV7670 SENSOR DRIVER
   M:    Jonathan Corbet <corbet@lwn.net>
   L:    linux-media@vger.kernel.org
@@@ -10337,14 -10194,6 +10337,14 @@@ S: Maintaine
   F:    drivers/media/i2c/ov7670.c
   F:    Documentation/devicetree/bindings/media/i2c/ov7670.txt
   
+ +OMNIVISION OV772x SENSOR DRIVER
+ +M:    Jacopo Mondi <jacopo@jmondi.org>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Odd fixes
+ +F:    drivers/media/i2c/ov772x.c
+ +F:    include/media/i2c/ov772x.h
+ +
   OMNIVISION OV7740 SENSOR DRIVER
   M:    Wenyou Yang <wenyou.yang@microchip.com>
   L:    linux-media@vger.kernel.org
@@@ -10353,21 -10202,11 +10353,21 @@@ S:        Maintaine
   F:    drivers/media/i2c/ov7740.c
   F:    Documentation/devicetree/bindings/media/i2c/ov7740.txt
   
+ +OMNIVISION OV9650 SENSOR DRIVER
+ +M:    Sakari Ailus <sakari.ailus@linux.intel.com>
+ +R:    Akinobu Mita <akinobu.mita@gmail.com>
+ +R:    Sylwester Nawrocki <s.nawrocki@samsung.com>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    drivers/media/i2c/ov9650.c
+ +F:    Documentation/devicetree/bindings/media/i2c/ov9650.txt
+ +
   ONENAND FLASH DRIVER
   M:    Kyungmin Park <kyungmin.park@samsung.com>
   L:    linux-mtd@lists.infradead.org
   S:    Maintained
- -F:    drivers/mtd/onenand/
+ +F:    drivers/mtd/nand/onenand/
   F:    include/linux/mtd/onenand*.h
   
   ONSTREAM SCSI TAPE DRIVER
@@@ -10483,7 -10322,7 +10483,7 @@@ F:   drivers/oprofile
   F:    include/linux/oprofile.h
   
   ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
- -M:    Mark Fasheh <mfasheh@versity.com>
+ +M:    Mark Fasheh <mark@fasheh.com>
   M:    Joel Becker <jlbec@evilplan.org>
   L:    ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
   W:    http://ocfs2.wiki.kernel.org
@@@ -10564,6 -10403,14 +10564,6 @@@ L:  platform-driver-x86@vger.kernel.or
   S:    Maintained
   F:    drivers/platform/x86/panasonic-laptop.c
   
- -PANASONIC MN10300/AM33/AM34 PORT
- -M:    David Howells <dhowells@redhat.com>
- -L:    linux-am33-list@redhat.com (moderated for non-subscribers)
- -W:    ftp://ftp.redhat.com/pub/redhat/gnupro/AM33/
- -S:    Maintained
- -F:    Documentation/mn10300/
- -F:    arch/mn10300/
- -
   PARALLEL LCD/KEYPAD PANEL DRIVER
   M:    Willy Tarreau <willy@haproxy.com>
   M:    Ksenija Stanojevic <ksenija.stanojevic@gmail.com>
@@@ -10864,7 -10711,6 +10864,7 @@@ F:   drivers/acpi/pci
   F:    drivers/pci/
   F:    include/asm-generic/pci*
   F:    include/linux/pci*
+ +F:    include/linux/of_pci.h
   F:    include/uapi/linux/pci*
   F:    lib/pci*
   F:    arch/x86/pci/
@@@ -10876,7 -10722,6 +10876,7 @@@ L:   linux-pci@vger.kernel.or
   Q:    http://patchwork.ozlabs.org/project/linux-pci/list/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git/
   S:    Supported
+ +F:    drivers/pci/cadence/
   F:    drivers/pci/host/
   F:    drivers/pci/dwc/
   
@@@ -10987,7 -10832,6 +10987,7 @@@ F:   drivers/platform/x86/peaq-wmi.
   PER-CPU MEMORY ALLOCATOR
   M:    Tejun Heo <tj@kernel.org>
   M:    Christoph Lameter <cl@linux.com>
+ +M:    Dennis Zhou <dennisszhou@gmail.com>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
   S:    Maintained
   F:    include/linux/percpu*.h
@@@ -11081,17 -10925,6 +11081,17 @@@ L: linux-gpio@vger.kernel.or
   S:    Supported
   F:    drivers/pinctrl/pinctrl-at91-pio4.*
   
+ +PIN CONTROLLER - FREESCALE
+ +M:    Dong Aisheng <aisheng.dong@nxp.com>
+ +M:    Fabio Estevam <festevam@gmail.com>
+ +M:    Shawn Guo <shawnguo@kernel.org>
+ +M:    Stefan Agner <stefan@agner.ch>
+ +R:    Pengutronix Kernel Team <kernel@pengutronix.de>
+ +L:    linux-gpio@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/pinctrl/freescale/
+ +F:    Documentation/devicetree/bindings/pinctrl/fsl,*
+ +
   PIN CONTROLLER - INTEL
   M:    Mika Westerberg <mika.westerberg@linux.intel.com>
   M:    Heikki Krogerus <heikki.krogerus@linux.intel.com>
@@@ -11168,7 -11001,7 +11168,7 @@@ F:   include/linux/pktcdvd.
   F:    include/uapi/linux/pktcdvd.h
   
   PKUNITY SOC DRIVERS
- -M:    Guan Xuetao <gxt@mprc.pku.edu.cn>
+ +M:    Guan Xuetao <gxt@pku.edu.cn>
   W:    http://mprc.pku.edu.cn/~guanxuetao/linux
   S:    Maintained
   T:    git git://github.com/gxt/linux.git
@@@ -11494,6 -11327,12 +11494,6 @@@ F:  include/sound/pxa2xx-lib.
   F:    sound/arm/pxa*
   F:    sound/soc/pxa/
   
- -PXA3xx NAND FLASH DRIVER
- -M:    Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
- -L:    linux-mtd@lists.infradead.org
- -S:    Maintained
- -F:    drivers/mtd/nand/pxa3xx_nand.c
- -
   QAT DRIVER
   M:    Giovanni Cabiddu <giovanni.cabiddu@intel.com>
   L:    qat-linux@intel.com
@@@ -11513,7 -11352,6 +11513,7 @@@ M:   "Michael S. Tsirkin" <mst@redhat.com
   L:    qemu-devel@nongnu.org
   S:    Maintained
   F:    drivers/firmware/qemu_fw_cfg.c
+ +F:    include/uapi/linux/qemu_fw_cfg.h
   
   QIB DRIVER
   M:    Dennis Dalessandro <dennis.dalessandro@intel.com>
@@@ -11607,9 -11445,8 +11607,9 @@@ M:   Stuart Yoder <stuyoder@gmail.com
   M:    Laurentiu Tudor <laurentiu.tudor@nxp.com>
   L:    linux-kernel@vger.kernel.org
   S:    Maintained
- -F:    drivers/staging/fsl-mc/
+ +F:    drivers/bus/fsl-mc/
   F:    Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+ +F:    Documentation/networking/dpaa2/overview.rst
   
   QT1010 MEDIA DRIVER
   M:    Antti Palosaari <crope@iki.fi>
@@@ -11782,7 -11619,7 +11782,7 @@@ F:   drivers/char/random.
   
   RAPIDIO SUBSYSTEM
   M:    Matt Porter <mporter@kernel.crashing.org>
- -M:    Alexandre Bounine <alexandre.bounine@idt.com>
+ +M:    Alexandre Bounine <alex.bou9@gmail.com>
   S:    Maintained
   F:    drivers/rapidio/
   
@@@ -11856,7 -11693,7 +11856,7 @@@ X:   kernel/torture.
   
   REAL TIME CLOCK (RTC) SUBSYSTEM
   M:    Alessandro Zummo <a.zummo@towertech.it>
- -M:    Alexandre Belloni <alexandre.belloni@free-electrons.com>
+ +M:    Alexandre Belloni <alexandre.belloni@bootlin.com>
   L:    linux-rtc@vger.kernel.org
   Q:    http://patchwork.ozlabs.org/project/rtc-linux/list/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
@@@ -11920,11 -11757,6 +11920,11 @@@ T: git git://git.kernel.org/pub/scm/lin
   S:    Supported
   F:    drivers/clk/renesas/
   
+ +RENESAS EMEV2 I2C DRIVER
+ +M:    Wolfram Sang <wsa+renesas@sang-engineering.com>
+ +S:    Supported
+ +F:    drivers/i2c/busses/i2c-emev2.c
+ +
   RENESAS ETHERNET DRIVERS
   R:    Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
   L:    netdev@vger.kernel.org
@@@ -11940,12 -11772,6 +11940,12 @@@ L: linux-iio@vger.kernel.or
   S:    Supported
   F:    drivers/iio/adc/rcar_gyro_adc.c
   
+ +RENESAS R-CAR I2C DRIVERS
+ +M:    Wolfram Sang <wsa+renesas@sang-engineering.com>
+ +S:    Supported
+ +F:    drivers/i2c/busses/i2c-rcar.c
+ +F:    drivers/i2c/busses/i2c-sh_mobile.c
+ +
   RENESAS USB PHY DRIVER
   M:    Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
   L:    linux-renesas-soc@vger.kernel.org
@@@ -11989,8 -11815,8 +11989,8 @@@ F:   drivers/memstick/host/r592.
   RICOH SMARTMEDIA/XD DRIVER
   M:    Maxim Levitsky <maximlevitsky@gmail.com>
   S:    Maintained
- -F:    drivers/mtd/nand/r852.c
- -F:    drivers/mtd/nand/r852.h
+ +F:    drivers/mtd/nand/raw/r852.c
+ +F:    drivers/mtd/nand/raw/r852.h
   
   RISC-V ARCHITECTURE
   M:    Palmer Dabbelt <palmer@sifive.com>
@@@ -12149,16 -11975,16 +12149,16 @@@ F:        Documentation/s390
   F:    Documentation/driver-api/s390-drivers.rst
   
   S390 COMMON I/O LAYER
- -M:    Sebastian Ott <sebott@linux.vnet.ibm.com>
- -M:    Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ +M:    Sebastian Ott <sebott@linux.ibm.com>
+ +M:    Peter Oberparleiter <oberpar@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   W:    http://www.ibm.com/developerworks/linux/linux390/
   S:    Supported
   F:    drivers/s390/cio/
   
   S390 DASD DRIVER
- -M:    Stefan Haberland <sth@linux.vnet.ibm.com>
- -M:    Jan Hoeppner <hoeppner@linux.vnet.ibm.com>
+ +M:    Stefan Haberland <sth@linux.ibm.com>
+ +M:    Jan Hoeppner <hoeppner@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   W:    http://www.ibm.com/developerworks/linux/linux390/
   S:    Supported
@@@ -12173,8 -11999,8 +12173,8 @@@ S:   Supporte
   F:    drivers/iommu/s390-iommu.c
   
   S390 IUCV NETWORK LAYER
- -M:    Julian Wiedmann <jwi@linux.vnet.ibm.com>
- -M:    Ursula Braun <ubraun@linux.vnet.ibm.com>
+ +M:    Julian Wiedmann <jwi@linux.ibm.com>
+ +M:    Ursula Braun <ubraun@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   W:    http://www.ibm.com/developerworks/linux/linux390/
   S:    Supported
@@@ -12183,15 -12009,15 +12183,15 @@@ F:        include/net/iucv
   F:    net/iucv/
   
   S390 NETWORK DRIVERS
- -M:    Julian Wiedmann <jwi@linux.vnet.ibm.com>
- -M:    Ursula Braun <ubraun@linux.vnet.ibm.com>
+ +M:    Julian Wiedmann <jwi@linux.ibm.com>
+ +M:    Ursula Braun <ubraun@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   W:    http://www.ibm.com/developerworks/linux/linux390/
   S:    Supported
   F:    drivers/s390/net/
   
   S390 PCI SUBSYSTEM
- -M:    Sebastian Ott <sebott@linux.vnet.ibm.com>
+ +M:    Sebastian Ott <sebott@linux.ibm.com>
   M:    Gerald Schaefer <gerald.schaefer@de.ibm.com>
   L:    linux-s390@vger.kernel.org
   W:    http://www.ibm.com/developerworks/linux/linux390/
@@@ -12201,8 -12027,8 +12201,8 @@@ F:   drivers/pci/hotplug/s390_pci_hpc.
   
   S390 VFIO-CCW DRIVER
   M:    Cornelia Huck <cohuck@redhat.com>
- -M:    Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
- -M:    Halil Pasic <pasic@linux.vnet.ibm.com>
+ +M:    Dong Jia Shi <bjsdjshi@linux.ibm.com>
+ +M:    Halil Pasic <pasic@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   L:    kvm@vger.kernel.org
   S:    Supported
@@@ -12218,8 -12044,8 +12218,8 @@@ S:   Supporte
   F:    drivers/s390/crypto/
   
   S390 ZFCP DRIVER
- -M:    Steffen Maier <maier@linux.vnet.ibm.com>
- -M:    Benjamin Block <bblock@linux.vnet.ibm.com>
+ +M:    Steffen Maier <maier@linux.ibm.com>
+ +M:    Benjamin Block <bblock@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   W:    http://www.ibm.com/developerworks/linux/linux390/
   S:    Supported
@@@ -12265,7 -12091,6 +12265,7 @@@ M:   Sylwester Nawrocki <s.nawrocki@samsu
   L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
   S:    Supported
   F:    sound/soc/samsung/
+ +F:    Documentation/devicetree/bindings/sound/samsung*
   
   SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
   M:    Krzysztof Kozlowski <krzk@kernel.org>
@@@ -12364,7 -12189,6 +12364,7 @@@ M:   Tomasz Figa <tomasz.figa@gmail.com
   M:    Chanwoo Choi <cw00.choi@samsung.com>
   S:    Supported
   L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git
   F:    drivers/clk/samsung/
   F:    include/dt-bindings/clock/exynos*.h
   F:    Documentation/devicetree/bindings/clock/exynos*.txt
@@@ -12372,7 -12196,7 +12372,7 @@@
   SAMSUNG SPI DRIVERS
   M:    Kukjin Kim <kgene@kernel.org>
   M:    Krzysztof Kozlowski <krzk@kernel.org>
- -M:    Andi Shyti <andi.shyti@samsung.com>
+ +M:    Andi Shyti <andi@etezian.org>
   L:    linux-spi@vger.kernel.org
   L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
   S:    Maintained
@@@ -12426,6 -12250,13 +12426,6 @@@ F:  include/linux/sched.
   F:    include/uapi/linux/sched.h
   F:    include/linux/wait.h
   
- -SCORE ARCHITECTURE
- -M:    Chen Liqin <liqin.linux@gmail.com>
- -M:    Lennox Wu <lennox.wu@gmail.com>
- -W:    http://www.sunplus.com
- -S:    Supported
- -F:    arch/score/
- -
   SCR24X CHIP CARD INTERFACE DRIVER
   M:    Lubomir Rintel <lkundrak@v3.sk>
   S:    Supported
@@@ -12656,7 -12487,7 +12656,7 @@@ S:   Maintaine
   F:    drivers/misc/sgi-xp/
   
   SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
- -M:    Ursula Braun <ubraun@linux.vnet.ibm.com>
+ +M:    Ursula Braun <ubraun@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   W:    http://www.ibm.com/developerworks/linux/linux390/
   S:    Supported
@@@ -12953,20 -12784,14 +12953,20 @@@ S:        Maintaine
   F:    drivers/net/ethernet/smsc/smsc9420.*
   
   SOC-CAMERA V4L2 SUBSYSTEM
- -M:    Guennadi Liakhovetski <g.liakhovetski@gmx.de>
   L:    linux-media@vger.kernel.org
   T:    git git://linuxtv.org/media_tree.git
- -S:    Maintained
+ +S:    Orphan
   F:    include/media/soc*
   F:    drivers/media/i2c/soc_camera/
   F:    drivers/media/platform/soc_camera/
   
+ +SOCIONEXT SYNQUACER I2C DRIVER
+ +M:    Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ +L:    linux-i2c@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/i2c/busses/i2c-synquacer.c
+ +F:    Documentation/devicetree/bindings/i2c/i2c-synquacer.txt
+ +
   SOCIONEXT UNIPHIER SOUND DRIVER
   M:    Katsuhiro Suzuki <suzuki.katsuhiro@socionext.com>
   L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
@@@ -13025,19 -12850,6 +13025,19 @@@ S: Maintaine
   F:    drivers/net/ethernet/socionext/netsec.c
   F:    Documentation/devicetree/bindings/net/socionext-netsec.txt
   
+ +SOLIDRUN CLEARFOG SUPPORT
+ +M:    Russell King <linux@armlinux.org.uk>
+ +S:    Maintained
+ +F:    arch/arm/boot/dts/armada-388-clearfog*
+ +F:    arch/arm/boot/dts/armada-38x-solidrun-*
+ +
+ +SOLIDRUN CUBOX-I/HUMMINGBOARD SUPPORT
+ +M:    Russell King <linux@armlinux.org.uk>
+ +S:    Maintained
+ +F:    arch/arm/boot/dts/imx6*-cubox-i*
+ +F:    arch/arm/boot/dts/imx6*-hummingboard*
+ +F:    arch/arm/boot/dts/imx6*-sr-*
+ +
   SONIC NETWORK DRIVER
   M:    Thomas Bogendoerfer <tsbogend@alpha.franken.de>
   L:    netdev@vger.kernel.org
@@@ -13194,6 -13006,7 +13194,6 @@@ F:   arch/arm/boot/dts/spear
   F:    arch/arm/mach-spear/
   
   SPI NOR SUBSYSTEM
- -M:    Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
   M:    Marek Vasut <marek.vasut@gmail.com>
   L:    linux-mtd@lists.infradead.org
   W:    http://www.linux-mtd.infradead.org/
@@@ -13523,12 -13336,6 +13523,12 @@@ S: Maintaine
   F:    drivers/gpio/gpio-dwapb.c
   F:    Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
   
+ +SYNOPSYS DESIGNWARE AXI DMAC DRIVER
+ +M:    Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ +S:    Maintained
+ +F:    drivers/dma/dwi-axi-dmac/
+ +F:    Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
+ +
   SYNOPSYS DESIGNWARE DMAC DRIVER
   M:    Viresh Kumar <vireshk@kernel.org>
   R:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
@@@ -13572,16 -13379,15 +13572,16 @@@ T:        git git://git.kernel.org/pub/scm/lin
   S:    Supported
   F:    drivers/mfd/syscon.c
   
- -SYSTEM CONTROL & POWER INTERFACE (SCPI) Message Protocol drivers
+ +SYSTEM CONTROL & POWER/MANAGEMENT INTERFACE (SCPI/SCMI) Message Protocol drivers
   M:    Sudeep Holla <sudeep.holla@arm.com>
   L:    linux-arm-kernel@lists.infradead.org
   S:    Maintained
- -F:    Documentation/devicetree/bindings/arm/arm,scpi.txt
- -F:    drivers/clk/clk-scpi.c
- -F:    drivers/cpufreq/scpi-cpufreq.c
+ +F:    Documentation/devicetree/bindings/arm/arm,sc[mp]i.txt
+ +F:    drivers/clk/clk-sc[mp]i.c
+ +F:    drivers/cpufreq/sc[mp]i-cpufreq.c
   F:    drivers/firmware/arm_scpi.c
- -F:    include/linux/scpi_protocol.h
+ +F:    drivers/firmware/arm_scmi/
+ +F:    include/linux/sc[mp]i_protocol.h
   
   SYSTEM RESET/SHUTDOWN DRIVERS
   M:    Sebastian Reichel <sre@kernel.org>
@@@ -13697,14 -13503,6 +13697,14 @@@ T: git git://linuxtv.org/mkrufky/tuners
   S:    Maintained
   F:    drivers/media/tuners/tda18271*
   
+ +TDA1997x MEDIA DRIVER
+ +M:    Tim Harvey <tharvey@gateworks.com>
+ +L:    linux-media@vger.kernel.org
+ +W:    https://linuxtv.org
+ +Q:    http://patchwork.linuxtv.org/project/linux-media/list/
+ +S:    Maintained
+ +F:    drivers/media/i2c/tda1997x.*
+ +
   TDA827x MEDIA DRIVER
   M:    Michael Krufky <mkrufky@linuxtv.org>
   L:    linux-media@vger.kernel.org
@@@ -13786,12 -13584,6 +13786,12 @@@ L: linux-media@vger.kernel.or
   S:    Maintained
   F:    drivers/media/rc/ttusbir.c
   
+ +TECHWELL TW9910 VIDEO DECODER
+ +L:    linux-media@vger.kernel.org
+ +S:    Orphan
+ +F:    drivers/media/i2c/tw9910.c
+ +F:    include/media/i2c/tw9910.h
+ +
   TEE SUBSYSTEM
   M:    Jens Wiklander <jens.wiklander@linaro.org>
   S:    Maintained
@@@ -13827,8 -13619,7 +13827,8 @@@ S:   Supporte
   F:    drivers/i2c/busses/i2c-tegra.c
   
   TEGRA IOMMU DRIVERS
- -M:    Hiroshi Doyu <hdoyu@nvidia.com>
+ +M:    Thierry Reding <thierry.reding@gmail.com>
+ +L:    linux-tegra@vger.kernel.org
   S:    Supported
   F:    drivers/iommu/tegra*
   
@@@ -14001,13 -13792,6 +14001,13 @@@ F: arch/arm/mach-davinci
   F:    drivers/i2c/busses/i2c-davinci.c
   F:    arch/arm/boot/dts/da850*
   
+ +TI DAVINCI SERIES CLOCK DRIVER
+ +M:    David Lechner <david@lechnology.com>
+ +R:    Sekhar Nori <nsekhar@ti.com>
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/clock/ti/davinci/
+ +F:    drivers/clk/davinci/
+ +
   TI DAVINCI SERIES GPIO DRIVER
   M:    Keerthy <j-keerthy@ti.com>
   L:    linux-gpio@vger.kernel.org
@@@ -14123,6 -13907,19 +14123,6 @@@ S:  Orpha
   F:    drivers/net/wireless/ti/
   F:    include/linux/wl12xx.h
   
- -TILE ARCHITECTURE
- -W:    http://www.mellanox.com/repository/solutions/tile-scm/
- -S:    Orphan
- -F:    arch/tile/
- -F:    drivers/char/tile-srom.c
- -F:    drivers/edac/tile_edac.c
- -F:    drivers/net/ethernet/tile/
- -F:    drivers/rtc/rtc-tile.c
- -F:    drivers/tty/hvc/hvc_tile.c
- -F:    drivers/tty/serial/tilegx.c
- -F:    drivers/usb/host/*-tilegx.c
- -F:    include/linux/usb/tilegx.h
- -
   TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
   M:    John Stultz <john.stultz@linaro.org>
   M:    Thomas Gleixner <tglx@linutronix.de>
@@@ -14448,7 -14245,7 +14448,7 @@@ F:   include/linux/uwb.
   F:    include/linux/uwb/
   
   UNICORE32 ARCHITECTURE:
- -M:    Guan Xuetao <gxt@mprc.pku.edu.cn>
+ +M:    Guan Xuetao <gxt@pku.edu.cn>
   W:    http://mprc.pku.edu.cn/~guanxuetao/linux
   S:    Maintained
   T:    git git://github.com/gxt/linux.git
@@@ -14587,12 -14384,6 +14587,12 @@@ S: Maintaine
   F:    Documentation/hid/hiddev.txt
   F:    drivers/hid/usbhid/
   
+ +USB INTEL XHCI ROLE MUX DRIVER
+ +M:    Hans de Goede <hdegoede@redhat.com>
+ +L:    linux-usb@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/usb/roles/intel-xhci-usb-role-switch.c
+ +
   USB ISP116X DRIVER
   M:    Olav Kongas <ok@artecdesign.ee>
   L:    linux-usb@vger.kernel.org
@@@ -14723,12 -14514,6 +14723,12 @@@ F: drivers/usb
   F:    include/linux/usb.h
   F:    include/linux/usb/
   
+ +USB TYPEC PI3USB30532 MUX DRIVER
+ +M:    Hans de Goede <hdegoede@redhat.com>
+ +L:    linux-usb@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/usb/typec/mux/pi3usb30532.c
+ +
   USB TYPEC SUBSYSTEM
   M:    Heikki Krogerus <heikki.krogerus@linux.intel.com>
   L:    linux-usb@vger.kernel.org
@@@ -14850,7 -14635,7 +14850,7 @@@ VF610 NAND DRIVE
   M:    Stefan Agner <stefan@agner.ch>
   L:    linux-mtd@lists.infradead.org
   S:    Supported
- -F:    drivers/mtd/nand/vf610_nfc.c
+ +F:    drivers/mtd/nand/raw/vf610_nfc.c
   
   VFAT/FAT/MSDOS FILESYSTEM
   M:    OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
@@@ -14878,7 -14663,7 +14878,7 @@@ F:   include/linux/mdev.
   F:    samples/vfio-mdev/
   
   VFIO PLATFORM DRIVER
- -M:    Baptiste Reynal <b.reynal@virtualopensystems.com>
+ +M:    Eric Auger <eric.auger@redhat.com>
   L:    kvm@vger.kernel.org
   S:    Maintained
   F:    drivers/vfio/platform/
@@@ -14998,7 -14783,7 +14998,7 @@@ F:   include/uapi/linux/virtio_crypto.
   
   VIRTIO DRIVERS FOR S390
   M:    Cornelia Huck <cohuck@redhat.com>
- -M:    Halil Pasic <pasic@linux.vnet.ibm.com>
+ +M:    Halil Pasic <pasic@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   L:    virtualization@lists.linux-foundation.org
   L:    kvm@vger.kernel.org
@@@ -15113,7 -14898,7 +15113,7 @@@ F:   drivers/input/mouse/vmmouse.
   F:    drivers/input/mouse/vmmouse.h
   
   VMWARE VMXNET3 ETHERNET DRIVER
- -M:    Shrikrishna Khare <skhare@vmware.com>
+ +M:    Ronak Doshi <doshir@vmware.com>
   M:    "VMware, Inc." <pv-drivers@vmware.com>
   L:    netdev@vger.kernel.org
   S:    Maintained
@@@ -15621,7 -15406,7 +15621,7 @@@ L:   linux-mm@kvack.or
   S:    Maintained
   F:    mm/zsmalloc.c
   F:    include/linux/zsmalloc.h
- F:    Documentation/vm/zsmalloc.txt
+ F:    Documentation/vm/zsmalloc.rst
   
   ZSWAP COMPRESSED SWAP CACHING
   M:    Seth Jennings <sjenning@redhat.com>
diff --combined arch/alpha/Kconfig

index b2022885ced8ab05f9f1837b2c6cfde62f37a249,f53e5060afe78de9c8513551c4fadadbd8df5035..0ed6592d27bfbf4392460f9497ea585542dc5e76
--- 1/arch/alpha/Kconfig
--- 2/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@@ -18,7 -18,6 +18,7 @@@ config ALPH
         select ARCH_HAVE_NMI_SAFE_CMPXCHG
         select AUDIT_ARCH
         select GENERIC_CLOCKEVENTS
+ +      select GENERIC_CPU_VULNERABILITIES
         select GENERIC_SMP_IDLE_THREAD
         select GENERIC_STRNCPY_FROM_USER
         select GENERIC_STRNLEN_USER
@@@ -585,7 -584,7 +585,7 @@@ config ARCH_DISCONTIGMEM_ENABL
           Say Y to support efficient handling of discontiguous physical memory,
           for architectures which are either NUMA (Non-Uniform Memory Access)
           or have huge holes in the physical address space for other reasons.
-         See <file:Documentation/vm/numa> for more.
+         See <file:Documentation/vm/numa.rst> for more.
   
   source "mm/Kconfig"
   
diff --combined arch/mips/Kconfig

index 225c95da23ce62e8844ff663c30d56f39cd44d15,4562810857ebca6962e87bcfdafc36bb90588f6e..33ea5e865d1c5e186b8af61124d3b634ff630736
--- 1/arch/mips/Kconfig
--- 2/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@@ -200,7 -200,6 +200,7 @@@ config ATH7
         select SYS_SUPPORTS_MIPS16
         select SYS_SUPPORTS_ZBOOT_UART_PROM
         select USE_OF
+ +      select USB_EHCI_ROOT_HUB_TT if USB_EHCI_HCD_PLATFORM
         help
           Support for the Atheros AR71XX/AR724X/AR913X SoCs.
   
@@@ -2029,7 -2028,6 +2029,7 @@@ config CPU_MIPSR
         select CPU_HAS_RIXI
         select HAVE_ARCH_BITREVERSE
         select MIPS_ASID_BITS_VARIABLE
+ +      select MIPS_CRC_SUPPORT
         select MIPS_SPRAM
   
   config EVA
@@@ -2503,9 -2501,6 +2503,9 @@@ config MIPS_ASID_BIT
   config MIPS_ASID_BITS_VARIABLE
         bool
   
+ +config MIPS_CRC_SUPPORT
+ +      bool
+ +
   #
   # - Highmem only makes sense for the 32-bit kernel.
   # - The current highmem code will only work properly on physically indexed
@@@ -2556,7 -2551,7 +2556,7 @@@ config ARCH_DISCONTIGMEM_ENABL
           Say Y to support efficient handling of discontiguous physical memory,
           for architectures which are either NUMA (Non-Uniform Memory Access)
           or have huge holes in the physical address space for other reasons.
-         See <file:Documentation/vm/numa> for more.
+         See <file:Documentation/vm/numa.rst> for more.
   
   config ARCH_SPARSEMEM_ENABLE
         bool
@@@ -2854,7 -2849,8 +2854,7 @@@ config CRASH_DUM
   
   config PHYSICAL_START
         hex "Physical address where the kernel is loaded"
- -      default "0xffffffff84000000" if 64BIT
- -      default "0x84000000" if 32BIT
+ +      default "0xffffffff84000000"
         depends on CRASH_DUMP
         help
           This gives the CKSEG0 or KSEG0 address where the kernel is loaded.
diff --combined arch/powerpc/Kconfig

index c32a181a7cbbfc30ab9c55d925d89a282857bbf3,f8c0f10949eab572de1649b9ed2b988e7d110edc..2618a9170a52a63f54ae2ed70cfed2be5a498dfe
--- 1/arch/powerpc/Kconfig
--- 2/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@@ -552,9 -552,6 +552,9 @@@ config KEXEC_FIL
           for kernel and initramfs as opposed to a list of segments as is the
           case for the older kexec call.
   
+ +config ARCH_HAS_KEXEC_PURGATORY
+ +      def_bool KEXEC_FILE
+ +
   config RELOCATABLE
         bool "Build a relocatable kernel"
         depends on PPC64 || (FLATMEM && (44x || FSL_BOOKE))
@@@ -883,7 -880,7 +883,7 @@@ config PPC_MEM_KEY
           page-based protections, but without requiring modification of the
           page tables when an application changes protection domains.
   
-         For details, see Documentation/vm/protection-keys.txt
+         For details, see Documentation/vm/protection-keys.rst
   
           If unsure, say y.
   
diff --combined fs/dax.c

index aaec72ded1b63c9a2944a228d201be239f77ada1,0eb65c34d5a6b198a57d67671d67fd5bef2bfa72..aa86d9f971a4b9fd993addfe46a50a3f0644b61b
--- 1/fs/dax.c
--- 2/fs/dax.c
+++ b/fs/dax.c
@@@ -73,15 -73,16 +73,15 @@@ fs_initcall(init_dax_wait_table)
   #define RADIX_DAX_ZERO_PAGE   (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
   #define RADIX_DAX_EMPTY               (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
   
- -static unsigned long dax_radix_sector(void *entry)
+ +static unsigned long dax_radix_pfn(void *entry)
   {
         return (unsigned long)entry >> RADIX_DAX_SHIFT;
   }
   
- -static void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
+ +static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
   {
         return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
- -                      ((unsigned long)sector << RADIX_DAX_SHIFT) |
- -                      RADIX_DAX_ENTRY_LOCK);
+ +                      (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
   }
   
   static unsigned int dax_radix_order(void *entry)
@@@ -158,9 -159,11 +158,9 @@@ static int wake_exceptional_entry_func(
   }
   
   /*
- - * We do not necessarily hold the mapping->tree_lock when we call this
- - * function so it is possible that 'entry' is no longer a valid item in the
- - * radix tree.  This is okay because all we really need to do is to find the
- - * correct waitqueue where tasks might be waiting for that old 'entry' and
- - * wake them.
+ + * @entry may no longer be the entry at the index in the mapping.
+ + * The important information it's conveying is whether the entry at
+ + * this index used to be a PMD entry.
    */
   static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
                 pgoff_t index, void *entry, bool wake_all)
@@@ -172,7 -175,7 +172,7 @@@
   
         /*
          * Checking for locked entry and prepare_to_wait_exclusive() happens
- -       * under mapping->tree_lock, ditto for entry handling in our callers.
+ +       * under the i_pages lock, ditto for entry handling in our callers.
          * So at this point all tasks that could have seen our entry locked
          * must be in the waitqueue and the following check will see them.
          */
@@@ -181,39 -184,41 +181,39 @@@
   }
   
   /*
- - * Check whether the given slot is locked. The function must be called with
- - * mapping->tree_lock held
+ + * Check whether the given slot is locked.  Must be called with the i_pages
+ + * lock held.
    */
   static inline int slot_locked(struct address_space *mapping, void **slot)
   {
         unsigned long entry = (unsigned long)
- -              radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+ +              radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
         return entry & RADIX_DAX_ENTRY_LOCK;
   }
   
   /*
- - * Mark the given slot is locked. The function must be called with
- - * mapping->tree_lock held
+ + * Mark the given slot as locked.  Must be called with the i_pages lock held.
    */
   static inline void *lock_slot(struct address_space *mapping, void **slot)
   {
         unsigned long entry = (unsigned long)
- -              radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+ +              radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
   
         entry |= RADIX_DAX_ENTRY_LOCK;
- -      radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
+ +      radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
         return (void *)entry;
   }
   
   /*
- - * Mark the given slot is unlocked. The function must be called with
- - * mapping->tree_lock held
+ + * Mark the given slot as unlocked.  Must be called with the i_pages lock held.
    */
   static inline void *unlock_slot(struct address_space *mapping, void **slot)
   {
         unsigned long entry = (unsigned long)
- -              radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+ +              radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
   
         entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
- -      radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
+ +      radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
         return (void *)entry;
   }
   
@@@ -224,7 -229,7 +224,7 @@@
    * put_locked_mapping_entry() when he locked the entry and now wants to
    * unlock it.
    *
- - * The function must be called with mapping->tree_lock held.
+ + * Must be called with the i_pages lock held.
    */
   static void *get_unlocked_mapping_entry(struct address_space *mapping,
                                         pgoff_t index, void ***slotp)
@@@ -237,7 -242,7 +237,7 @@@
         ewait.wait.func = wake_exceptional_entry_func;
   
         for (;;) {
- -              entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
+ +              entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
                                           &slot);
                 if (!entry ||
                     WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
@@@ -250,10 -255,10 +250,10 @@@
                 wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key);
                 prepare_to_wait_exclusive(wq, &ewait.wait,
                                           TASK_UNINTERRUPTIBLE);
- -              spin_unlock_irq(&mapping->tree_lock);
+ +              xa_unlock_irq(&mapping->i_pages);
                 schedule();
                 finish_wait(wq, &ewait.wait);
- -              spin_lock_irq(&mapping->tree_lock);
+ +              xa_lock_irq(&mapping->i_pages);
         }
   }
   
@@@ -262,15 -267,15 +262,15 @@@ static void dax_unlock_mapping_entry(st
   {
         void *entry, **slot;
   
- -      spin_lock_irq(&mapping->tree_lock);
- -      entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
+ +      xa_lock_irq(&mapping->i_pages);
+ +      entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
         if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
                          !slot_locked(mapping, slot))) {
- -              spin_unlock_irq(&mapping->tree_lock);
+ +              xa_unlock_irq(&mapping->i_pages);
                 return;
         }
         unlock_slot(mapping, slot);
- -      spin_unlock_irq(&mapping->tree_lock);
+ +      xa_unlock_irq(&mapping->i_pages);
         dax_wake_mapping_entry_waiter(mapping, index, entry, false);
   }
   
@@@ -294,63 -299,6 +294,63 @@@ static void put_unlocked_mapping_entry(
         dax_wake_mapping_entry_waiter(mapping, index, entry, false);
   }
   
+ +static unsigned long dax_entry_size(void *entry)
+ +{
+ +      if (dax_is_zero_entry(entry))
+ +              return 0;
+ +      else if (dax_is_empty_entry(entry))
+ +              return 0;
+ +      else if (dax_is_pmd_entry(entry))
+ +              return PMD_SIZE;
+ +      else
+ +              return PAGE_SIZE;
+ +}
+ +
+ +static unsigned long dax_radix_end_pfn(void *entry)
+ +{
+ +      return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
+ +}
+ +
+ +/*
+ + * Iterate through all mapped pfns represented by an entry, i.e. skip
+ + * 'empty' and 'zero' entries.
+ + */
+ +#define for_each_mapped_pfn(entry, pfn) \
+ +      for (pfn = dax_radix_pfn(entry); \
+ +                      pfn < dax_radix_end_pfn(entry); pfn++)
+ +
+ +static void dax_associate_entry(void *entry, struct address_space *mapping)
+ +{
+ +      unsigned long pfn;
+ +
+ +      if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+ +              return;
+ +
+ +      for_each_mapped_pfn(entry, pfn) {
+ +              struct page *page = pfn_to_page(pfn);
+ +
+ +              WARN_ON_ONCE(page->mapping);
+ +              page->mapping = mapping;
+ +      }
+ +}
+ +
+ +static void dax_disassociate_entry(void *entry, struct address_space *mapping,
+ +              bool trunc)
+ +{
+ +      unsigned long pfn;
+ +
+ +      if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+ +              return;
+ +
+ +      for_each_mapped_pfn(entry, pfn) {
+ +              struct page *page = pfn_to_page(pfn);
+ +
+ +              WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
+ +              WARN_ON_ONCE(page->mapping && page->mapping != mapping);
+ +              page->mapping = NULL;
+ +      }
+ +}
+ +
   /*
    * Find radix tree entry at given index. If it points to an exceptional entry,
    * return it with the radix tree entry locked. If the radix tree doesn't
@@@ -384,7 -332,7 +384,7 @@@ static void *grab_mapping_entry(struct 
         void *entry, **slot;
   
   restart:
- -      spin_lock_irq(&mapping->tree_lock);
+ +      xa_lock_irq(&mapping->i_pages);
         entry = get_unlocked_mapping_entry(mapping, index, &slot);
   
         if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
@@@ -416,12 -364,12 +416,12 @@@
                 if (pmd_downgrade) {
                         /*
                          * Make sure 'entry' remains valid while we drop
- -                       * mapping->tree_lock.
+ +                       * the i_pages lock.
                          */
                         entry = lock_slot(mapping, slot);
                 }
   
- -              spin_unlock_irq(&mapping->tree_lock);
+ +              xa_unlock_irq(&mapping->i_pages);
                 /*
                  * Besides huge zero pages the only other thing that gets
                  * downgraded are empty entries which don't need to be
@@@ -438,27 -386,26 +438,27 @@@
                                 put_locked_mapping_entry(mapping, index);
                         return ERR_PTR(err);
                 }
- -              spin_lock_irq(&mapping->tree_lock);
+ +              xa_lock_irq(&mapping->i_pages);
   
                 if (!entry) {
                         /*
- -                       * We needed to drop the page_tree lock while calling
+ +                       * We needed to drop the i_pages lock while calling
                          * radix_tree_preload() and we didn't have an entry to
                          * lock.  See if another thread inserted an entry at
                          * our index during this time.
                          */
- -                      entry = __radix_tree_lookup(&mapping->page_tree, index,
+ +                      entry = __radix_tree_lookup(&mapping->i_pages, index,
                                         NULL, &slot);
                         if (entry) {
                                 radix_tree_preload_end();
- -                              spin_unlock_irq(&mapping->tree_lock);
+ +                              xa_unlock_irq(&mapping->i_pages);
                                 goto restart;
                         }
                 }
   
                 if (pmd_downgrade) {
- -                      radix_tree_delete(&mapping->page_tree, index);
+ +                      dax_disassociate_entry(entry, mapping, false);
+ +                      radix_tree_delete(&mapping->i_pages, index);
                         mapping->nrexceptional--;
                         dax_wake_mapping_entry_waiter(mapping, index, entry,
                                         true);
@@@ -466,11 -413,11 +466,11 @@@
   
                 entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
   
- -              err = __radix_tree_insert(&mapping->page_tree, index,
+ +              err = __radix_tree_insert(&mapping->i_pages, index,
                                 dax_radix_order(entry), entry);
                 radix_tree_preload_end();
                 if (err) {
- -                      spin_unlock_irq(&mapping->tree_lock);
+ +                      xa_unlock_irq(&mapping->i_pages);
                         /*
                          * Our insertion of a DAX entry failed, most likely
                          * because we were inserting a PMD entry and it
@@@ -483,12 -430,12 +483,12 @@@
                 }
                 /* Good, we have inserted empty locked entry into the tree. */
                 mapping->nrexceptional++;
- -              spin_unlock_irq(&mapping->tree_lock);
+ +              xa_unlock_irq(&mapping->i_pages);
                 return entry;
         }
         entry = lock_slot(mapping, slot);
    out_unlock:
- -      spin_unlock_irq(&mapping->tree_lock);
+ +      xa_unlock_irq(&mapping->i_pages);
         return entry;
   }
   
@@@ -497,23 -444,22 +497,23 @@@ static int __dax_invalidate_mapping_ent
   {
         int ret = 0;
         void *entry;
- -      struct radix_tree_root *page_tree = &mapping->page_tree;
+ +      struct radix_tree_root *pages = &mapping->i_pages;
   
- -      spin_lock_irq(&mapping->tree_lock);
+ +      xa_lock_irq(pages);
         entry = get_unlocked_mapping_entry(mapping, index, NULL);
         if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
                 goto out;
         if (!trunc &&
- -          (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
- -           radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
+ +          (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
+ +           radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)))
                 goto out;
- -      radix_tree_delete(page_tree, index);
+ +      dax_disassociate_entry(entry, mapping, trunc);
+ +      radix_tree_delete(pages, index);
         mapping->nrexceptional--;
         ret = 1;
   out:
         put_unlocked_mapping_entry(mapping, index, entry);
- -      spin_unlock_irq(&mapping->tree_lock);
+ +      xa_unlock_irq(pages);
         return ret;
   }
   /*
@@@ -580,13 -526,12 +580,13 @@@ static int copy_user_dax(struct block_d
    */
   static void *dax_insert_mapping_entry(struct address_space *mapping,
                                       struct vm_fault *vmf,
- -                                    void *entry, sector_t sector,
+ +                                    void *entry, pfn_t pfn_t,
                                       unsigned long flags, bool dirty)
   {
- -      struct radix_tree_root *page_tree = &mapping->page_tree;
- -      void *new_entry;
+ +      struct radix_tree_root *pages = &mapping->i_pages;
+ +      unsigned long pfn = pfn_t_to_pfn(pfn_t);
         pgoff_t index = vmf->pgoff;
+ +      void *new_entry;
   
         if (dirty)
                 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@@ -600,12 -545,8 +600,12 @@@
                         unmap_mapping_pages(mapping, vmf->pgoff, 1, false);
         }
   
- -      spin_lock_irq(&mapping->tree_lock);
- -      new_entry = dax_radix_locked_entry(sector, flags);
+ +      xa_lock_irq(pages);
+ +      new_entry = dax_radix_locked_entry(pfn, flags);
+ +      if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+ +              dax_disassociate_entry(entry, mapping, false);
+ +              dax_associate_entry(new_entry, mapping);
+ +      }
   
         if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
                 /*
@@@ -620,17 -561,17 +620,17 @@@
                 void **slot;
                 void *ret;
   
- -              ret = __radix_tree_lookup(page_tree, index, &node, &slot);
+ +              ret = __radix_tree_lookup(pages, index, &node, &slot);
                 WARN_ON_ONCE(ret != entry);
- -              __radix_tree_replace(page_tree, node, slot,
+ +              __radix_tree_replace(pages, node, slot,
                                      new_entry, NULL);
                 entry = new_entry;
         }
   
         if (dirty)
- -              radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+ +              radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY);
   
- -      spin_unlock_irq(&mapping->tree_lock);
+ +      xa_unlock_irq(pages);
         return entry;
   }
   
@@@ -677,7 -618,7 +677,7 @@@ static void dax_mapping_entry_mkclean(s
                  * downgrading page table protection not changing it to point
                  * to a new page.
                  *
-                * See Documentation/vm/mmu_notifier.txt
+                * See Documentation/vm/mmu_notifier.rst
                  */
                 if (pmdp) {
   #ifdef CONFIG_FS_DAX_PMD
@@@ -716,14 -657,17 +716,14 @@@ unlock_pte
         i_mmap_unlock_read(mapping);
   }
   
- -static int dax_writeback_one(struct block_device *bdev,
- -              struct dax_device *dax_dev, struct address_space *mapping,
- -              pgoff_t index, void *entry)
+ +static int dax_writeback_one(struct dax_device *dax_dev,
+ +              struct address_space *mapping, pgoff_t index, void *entry)
   {
- -      struct radix_tree_root *page_tree = &mapping->page_tree;
- -      void *entry2, **slot, *kaddr;
- -      long ret = 0, id;
- -      sector_t sector;
- -      pgoff_t pgoff;
+ +      struct radix_tree_root *pages = &mapping->i_pages;
+ +      void *entry2, **slot;
+ +      unsigned long pfn;
+ +      long ret = 0;
         size_t size;
- -      pfn_t pfn;
   
         /*
          * A page got tagged dirty in DAX mapping? Something is seriously
@@@ -732,17 -676,17 +732,17 @@@
         if (WARN_ON(!radix_tree_exceptional_entry(entry)))
                 return -EIO;
   
- -      spin_lock_irq(&mapping->tree_lock);
+ +      xa_lock_irq(pages);
         entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
         /* Entry got punched out / reallocated? */
         if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
                 goto put_unlocked;
         /*
          * Entry got reallocated elsewhere? No need to writeback. We have to
- -       * compare sectors as we must not bail out due to difference in lockbit
+ +       * compare pfns as we must not bail out due to difference in lockbit
          * or entry type.
          */
- -      if (dax_radix_sector(entry2) != dax_radix_sector(entry))
+ +      if (dax_radix_pfn(entry2) != dax_radix_pfn(entry))
                 goto put_unlocked;
         if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
                                 dax_is_zero_entry(entry))) {
@@@ -751,7 -695,7 +751,7 @@@
         }
   
         /* Another fsync thread may have already written back this entry */
- -      if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+ +      if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
                 goto put_unlocked;
         /* Lock the entry to serialize with page faults */
         entry = lock_slot(mapping, slot);
@@@ -759,40 -703,60 +759,40 @@@
          * We can clear the tag now but we have to be careful so that concurrent
          * dax_writeback_one() calls for the same index cannot finish before we
          * actually flush the caches. This is achieved as the calls will look
- -       * at the entry only under tree_lock and once they do that they will
- -       * see the entry locked and wait for it to unlock.
+ +       * at the entry only under the i_pages lock and once they do that
+ +       * they will see the entry locked and wait for it to unlock.
          */
- -      radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
- -      spin_unlock_irq(&mapping->tree_lock);
+ +      radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE);
+ +      xa_unlock_irq(pages);
   
         /*
          * Even if dax_writeback_mapping_range() was given a wbc->range_start
          * in the middle of a PMD, the 'index' we are given will be aligned to
- -       * the start index of the PMD, as will the sector we pull from
- -       * 'entry'.  This allows us to flush for PMD_SIZE and not have to
- -       * worry about partial PMD writebacks.
+ +       * the start index of the PMD, as will the pfn we pull from 'entry'.
+ +       * This allows us to flush for PMD_SIZE and not have to worry about
+ +       * partial PMD writebacks.
          */
- -      sector = dax_radix_sector(entry);
+ +      pfn = dax_radix_pfn(entry);
         size = PAGE_SIZE << dax_radix_order(entry);
   
- -      id = dax_read_lock();
- -      ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
- -      if (ret)
- -              goto dax_unlock;
- -
- -      /*
- -       * dax_direct_access() may sleep, so cannot hold tree_lock over
- -       * its invocation.
- -       */
- -      ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn);
- -      if (ret < 0)
- -              goto dax_unlock;
- -
- -      if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) {
- -              ret = -EIO;
- -              goto dax_unlock;
- -      }
- -
- -      dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn));
- -      dax_flush(dax_dev, kaddr, size);
+ +      dax_mapping_entry_mkclean(mapping, index, pfn);
+ +      dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size);
         /*
          * After we have flushed the cache, we can clear the dirty tag. There
          * cannot be new dirty data in the pfn after the flush has completed as
          * the pfn mappings are writeprotected and fault waits for mapping
          * entry lock.
          */
- -      spin_lock_irq(&mapping->tree_lock);
- -      radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY);
- -      spin_unlock_irq(&mapping->tree_lock);
+ +      xa_lock_irq(pages);
+ +      radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY);
+ +      xa_unlock_irq(pages);
         trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
- - dax_unlock:
- -      dax_read_unlock(id);
         put_locked_mapping_entry(mapping, index);
         return ret;
   
    put_unlocked:
         put_unlocked_mapping_entry(mapping, index, entry2);
- -      spin_unlock_irq(&mapping->tree_lock);
+ +      xa_unlock_irq(pages);
         return ret;
   }
   
@@@ -844,8 -808,8 +844,8 @@@ int dax_writeback_mapping_range(struct 
                                 break;
                         }
   
- -                      ret = dax_writeback_one(bdev, dax_dev, mapping,
- -                                      indices[i], pvec.pages[i]);
+ +                      ret = dax_writeback_one(dax_dev, mapping, indices[i],
+ +                                      pvec.pages[i]);
                         if (ret < 0) {
                                 mapping_set_error(mapping, ret);
                                 goto out;
@@@ -913,7 -877,6 +913,7 @@@ static int dax_load_hole(struct address
         int ret = VM_FAULT_NOPAGE;
         struct page *zero_page;
         void *entry2;
+ +      pfn_t pfn;
   
         zero_page = ZERO_PAGE(0);
         if (unlikely(!zero_page)) {
@@@ -921,15 -884,14 +921,15 @@@
                 goto out;
         }
   
- -      entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+ +      pfn = page_to_pfn_t(zero_page);
+ +      entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                         RADIX_DAX_ZERO_PAGE, false);
         if (IS_ERR(entry2)) {
                 ret = VM_FAULT_SIGBUS;
                 goto out;
         }
   
- -      vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page));
+ +      vm_insert_mixed(vmf->vma, vaddr, pfn);
   out:
         trace_dax_load_hole(inode, vmf, ret);
         return ret;
@@@ -1238,7 -1200,8 +1238,7 @@@ static int dax_iomap_pte_fault(struct v
                 if (error < 0)
                         goto error_finish_iomap;
   
- -              entry = dax_insert_mapping_entry(mapping, vmf, entry,
- -                                               dax_iomap_sector(&iomap, pos),
+ +              entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                                                  0, write && !sync);
                 if (IS_ERR(entry)) {
                         error = PTR_ERR(entry);
@@@ -1317,15 -1280,13 +1317,15 @@@ static int dax_pmd_load_hole(struct vm_
         void *ret = NULL;
         spinlock_t *ptl;
         pmd_t pmd_entry;
+ +      pfn_t pfn;
   
         zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
   
         if (unlikely(!zero_page))
                 goto fallback;
   
- -      ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+ +      pfn = page_to_pfn_t(zero_page);
+ +      ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                         RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
         if (IS_ERR(ret))
                 goto fallback;
@@@ -1448,7 -1409,8 +1448,7 @@@ static int dax_iomap_pmd_fault(struct v
                 if (error < 0)
                         goto finish_iomap;
   
- -              entry = dax_insert_mapping_entry(mapping, vmf, entry,
- -                                              dax_iomap_sector(&iomap, pos),
+ +              entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                                                 RADIX_DAX_PMD, write && !sync);
                 if (IS_ERR(entry))
                         goto finish_iomap;
@@@ -1562,21 -1524,21 +1562,21 @@@ static int dax_insert_pfn_mkwrite(struc
         pgoff_t index = vmf->pgoff;
         int vmf_ret, error;
   
- -      spin_lock_irq(&mapping->tree_lock);
+ +      xa_lock_irq(&mapping->i_pages);
         entry = get_unlocked_mapping_entry(mapping, index, &slot);
         /* Did we race with someone splitting entry or so? */
         if (!entry ||
             (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
             (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
                 put_unlocked_mapping_entry(mapping, index, entry);
- -              spin_unlock_irq(&mapping->tree_lock);
+ +              xa_unlock_irq(&mapping->i_pages);
                 trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
                                                       VM_FAULT_NOPAGE);
                 return VM_FAULT_NOPAGE;
         }
- -      radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY);
+ +      radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY);
         entry = lock_slot(mapping, slot);
- -      spin_unlock_irq(&mapping->tree_lock);
+ +      xa_unlock_irq(&mapping->i_pages);
         switch (pe_size) {
         case PE_SIZE_PTE:
                 error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
diff --combined fs/proc/task_mmu.c

index 65ae54659833888142d376835d9b4a8a70c7218e,91d14c4ac04a19bd8f00248aa8e0fb3521652ede..333cda80c3ddc33f0da92bed999068b213f55464
--- 1/fs/proc/task_mmu.c
--- 2/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@@ -24,8 -24,6 +24,8 @@@
   #include <asm/tlbflush.h>
   #include "internal.h"
   
+ +#define SEQ_PUT_DEC(str, val) \
+ +              seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
   void task_mem(struct seq_file *m, struct mm_struct *mm)
   {
         unsigned long text, lib, swap, anon, file, shmem;
@@@ -55,28 -53,39 +55,28 @@@
         lib = (mm->exec_vm << PAGE_SHIFT) - text;
   
         swap = get_mm_counter(mm, MM_SWAPENTS);
- -      seq_printf(m,
- -              "VmPeak:\t%8lu kB\n"
- -              "VmSize:\t%8lu kB\n"
- -              "VmLck:\t%8lu kB\n"
- -              "VmPin:\t%8lu kB\n"
- -              "VmHWM:\t%8lu kB\n"
- -              "VmRSS:\t%8lu kB\n"
- -              "RssAnon:\t%8lu kB\n"
- -              "RssFile:\t%8lu kB\n"
- -              "RssShmem:\t%8lu kB\n"
- -              "VmData:\t%8lu kB\n"
- -              "VmStk:\t%8lu kB\n"
- -              "VmExe:\t%8lu kB\n"
- -              "VmLib:\t%8lu kB\n"
- -              "VmPTE:\t%8lu kB\n"
- -              "VmSwap:\t%8lu kB\n",
- -              hiwater_vm << (PAGE_SHIFT-10),
- -              total_vm << (PAGE_SHIFT-10),
- -              mm->locked_vm << (PAGE_SHIFT-10),
- -              mm->pinned_vm << (PAGE_SHIFT-10),
- -              hiwater_rss << (PAGE_SHIFT-10),
- -              total_rss << (PAGE_SHIFT-10),
- -              anon << (PAGE_SHIFT-10),
- -              file << (PAGE_SHIFT-10),
- -              shmem << (PAGE_SHIFT-10),
- -              mm->data_vm << (PAGE_SHIFT-10),
- -              mm->stack_vm << (PAGE_SHIFT-10),
- -              text >> 10,
- -              lib >> 10,
- -              mm_pgtables_bytes(mm) >> 10,
- -              swap << (PAGE_SHIFT-10));
+ +      SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
+ +      SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
+ +      SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
+ +      SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+ +      SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
+ +      SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
+ +      SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
+ +      SEQ_PUT_DEC(" kB\nRssFile:\t", file);
+ +      SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
+ +      SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
+ +      SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
+ +      seq_put_decimal_ull_width(m,
+ +                  " kB\nVmExe:\t", text >> 10, 8);
+ +      seq_put_decimal_ull_width(m,
+ +                  " kB\nVmLib:\t", lib >> 10, 8);
+ +      seq_put_decimal_ull_width(m,
+ +                  " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
+ +      SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
+ +      seq_puts(m, " kB\n");
         hugetlb_report_usage(m, mm);
   }
+ +#undef SEQ_PUT_DEC
   
   unsigned long task_vsize(struct mm_struct *mm)
   {
@@@ -278,18 -287,15 +278,18 @@@ static void show_vma_header_prefix(stru
                                    dev_t dev, unsigned long ino)
   {
         seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
- -      seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
- -                 start,
- -                 end,
- -                 flags & VM_READ ? 'r' : '-',
- -                 flags & VM_WRITE ? 'w' : '-',
- -                 flags & VM_EXEC ? 'x' : '-',
- -                 flags & VM_MAYSHARE ? 's' : 'p',
- -                 pgoff,
- -                 MAJOR(dev), MINOR(dev), ino);
+ +      seq_put_hex_ll(m, NULL, start, 8);
+ +      seq_put_hex_ll(m, "-", end, 8);
+ +      seq_putc(m, ' ');
+ +      seq_putc(m, flags & VM_READ ? 'r' : '-');
+ +      seq_putc(m, flags & VM_WRITE ? 'w' : '-');
+ +      seq_putc(m, flags & VM_EXEC ? 'x' : '-');
+ +      seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p');
+ +      seq_put_hex_ll(m, " ", pgoff, 8);
+ +      seq_put_hex_ll(m, " ", MAJOR(dev), 2);
+ +      seq_put_hex_ll(m, ":", MINOR(dev), 2);
+ +      seq_put_decimal_ull(m, " ", ino);
+ +      seq_putc(m, ' ');
   }
   
   static void
@@@ -688,9 -694,8 +688,9 @@@ static void show_smap_vma_flags(struct 
                 if (!mnemonics[i][0])
                         continue;
                 if (vma->vm_flags & (1UL << i)) {
- -                      seq_printf(m, "%c%c ",
- -                                 mnemonics[i][0], mnemonics[i][1]);
+ +                      seq_putc(m, mnemonics[i][0]);
+ +                      seq_putc(m, mnemonics[i][1]);
+ +                      seq_putc(m, ' ');
                 }
         }
         seq_putc(m, '\n');
@@@ -731,8 -736,6 +731,8 @@@ void __weak arch_show_smap(struct seq_f
   {
   }
   
+ +#define SEQ_PUT_DEC(str, val) \
+ +              seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
   static int show_smap(struct seq_file *m, void *v, int is_pid)
   {
         struct proc_maps_private *priv = m->private;
@@@ -806,34 -809,51 +806,34 @@@
                 ret = SEQ_SKIP;
         }
   
- -      if (!rollup_mode)
- -              seq_printf(m,
- -                         "Size:           %8lu kB\n"
- -                         "KernelPageSize: %8lu kB\n"
- -                         "MMUPageSize:    %8lu kB\n",
- -                         (vma->vm_end - vma->vm_start) >> 10,
- -                         vma_kernel_pagesize(vma) >> 10,
- -                         vma_mmu_pagesize(vma) >> 10);
- -
- -
- -      if (!rollup_mode || last_vma)
- -              seq_printf(m,
- -                         "Rss:            %8lu kB\n"
- -                         "Pss:            %8lu kB\n"
- -                         "Shared_Clean:   %8lu kB\n"
- -                         "Shared_Dirty:   %8lu kB\n"
- -                         "Private_Clean:  %8lu kB\n"
- -                         "Private_Dirty:  %8lu kB\n"
- -                         "Referenced:     %8lu kB\n"
- -                         "Anonymous:      %8lu kB\n"
- -                         "LazyFree:       %8lu kB\n"
- -                         "AnonHugePages:  %8lu kB\n"
- -                         "ShmemPmdMapped: %8lu kB\n"
- -                         "Shared_Hugetlb: %8lu kB\n"
- -                         "Private_Hugetlb: %7lu kB\n"
- -                         "Swap:           %8lu kB\n"
- -                         "SwapPss:        %8lu kB\n"
- -                         "Locked:         %8lu kB\n",
- -                         mss->resident >> 10,
- -                         (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
- -                         mss->shared_clean  >> 10,
- -                         mss->shared_dirty  >> 10,
- -                         mss->private_clean >> 10,
- -                         mss->private_dirty >> 10,
- -                         mss->referenced >> 10,
- -                         mss->anonymous >> 10,
- -                         mss->lazyfree >> 10,
- -                         mss->anonymous_thp >> 10,
- -                         mss->shmem_thp >> 10,
- -                         mss->shared_hugetlb >> 10,
- -                         mss->private_hugetlb >> 10,
- -                         mss->swap >> 10,
- -                         (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
- -                         (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
+ +      if (!rollup_mode) {
+ +              SEQ_PUT_DEC("Size:           ", vma->vm_end - vma->vm_start);
+ +              SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+ +              SEQ_PUT_DEC(" kB\nMMUPageSize:    ", vma_mmu_pagesize(vma));
+ +              seq_puts(m, " kB\n");
+ +      }
   
+ +      if (!rollup_mode || last_vma) {
+ +              SEQ_PUT_DEC("Rss:            ", mss->resident);
+ +              SEQ_PUT_DEC(" kB\nPss:            ", mss->pss >> PSS_SHIFT);
+ +              SEQ_PUT_DEC(" kB\nShared_Clean:   ", mss->shared_clean);
+ +              SEQ_PUT_DEC(" kB\nShared_Dirty:   ", mss->shared_dirty);
+ +              SEQ_PUT_DEC(" kB\nPrivate_Clean:  ", mss->private_clean);
+ +              SEQ_PUT_DEC(" kB\nPrivate_Dirty:  ", mss->private_dirty);
+ +              SEQ_PUT_DEC(" kB\nReferenced:     ", mss->referenced);
+ +              SEQ_PUT_DEC(" kB\nAnonymous:      ", mss->anonymous);
+ +              SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
+ +              SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
+ +              SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+ +              SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+ +              seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
+ +                                        mss->private_hugetlb >> 10, 7);
+ +              SEQ_PUT_DEC(" kB\nSwap:           ", mss->swap);
+ +              SEQ_PUT_DEC(" kB\nSwapPss:        ",
+ +                                              mss->swap_pss >> PSS_SHIFT);
+ +              SEQ_PUT_DEC(" kB\nLocked:         ", mss->pss >> PSS_SHIFT);
+ +              seq_puts(m, " kB\n");
+ +      }
         if (!rollup_mode) {
                 arch_show_smap(m, vma);
                 show_smap_vma_flags(m, vma);
@@@ -841,7 -861,6 +841,7 @@@
         m_cache_vma(m, vma);
         return ret;
   }
+ +#undef SEQ_PUT_DEC
   
   static int show_pid_smap(struct seq_file *m, void *v)
   {
@@@ -937,7 -956,7 +937,7 @@@ static inline void clear_soft_dirty(str
         /*
          * The soft-dirty tracker uses #PF-s to catch writes
          * to pages, so write-protect the pte as well. See the
-        * Documentation/vm/soft-dirty.txt for full description
+        * Documentation/vm/soft-dirty.rst for full description
          * of how soft-dirty works.
          */
         pte_t ptent = *pte;
@@@ -1417,7 -1436,7 +1417,7 @@@ static int pagemap_hugetlb_range(pte_t 
    * Bits 0-54  page frame number (PFN) if present
    * Bits 0-4   swap type if swapped
    * Bits 5-54  swap offset if swapped
-  * Bit  55    pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+  * Bit  55    pte is soft-dirty (see Documentation/vm/soft-dirty.rst)
    * Bit  56    page exclusively mapped
    * Bits 57-60 zero
    * Bit  61    page is file-page or shared-anon
diff --combined include/linux/hmm.h

index 39988924de3aa08f5c3c6d0c2ac0773d4d78a269,77be87c095f25fae744179abdf33cfd7d0d773db..2f1327c37a63d68335baa06441c3e9505ff7719c
--- 1/include/linux/hmm.h
--- 2/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@@ -16,7 -16,7 +16,7 @@@
   /*
    * Heterogeneous Memory Management (HMM)
    *
-  * See Documentation/vm/hmm.txt for reasons and overview of what HMM is and it
+  * See Documentation/vm/hmm.rst for reasons and overview of what HMM is and it
    * is for. Here we focus on the HMM API description, with some explanation of
    * the underlying implementation.
    *
@@@ -80,145 -80,76 +80,145 @@@
   struct hmm;
   
   /*
- - * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
+ + * hmm_pfn_flag_e - HMM flag enums
    *
    * Flags:
- - * HMM_PFN_VALID: pfn is valid
- - * HMM_PFN_READ:  CPU page table has read permission set
+ + * HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
    * HMM_PFN_WRITE: CPU page table has write permission set
+ + * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE)
+ + *
+ + * The driver provide a flags array, if driver valid bit for an entry is bit
+ + * 3 ie (entry & (1 << 3)) is true if entry is valid then driver must provide
+ + * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
+ + * Same logic apply to all flags. This is same idea as vm_page_prot in vma
+ + * except that this is per device driver rather than per architecture.
+ + */
+ +enum hmm_pfn_flag_e {
+ +      HMM_PFN_VALID = 0,
+ +      HMM_PFN_WRITE,
+ +      HMM_PFN_DEVICE_PRIVATE,
+ +      HMM_PFN_FLAG_MAX
+ +};
+ +
+ +/*
+ + * hmm_pfn_value_e - HMM pfn special value
+ + *
+ + * Flags:
    * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
- - * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none()
+ + * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
    * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
    *      result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not
    *      be mirrored by a device, because the entry will never have HMM_PFN_VALID
    *      set and the pfn value is undefined.
- - * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE)
+ + *
+ + * Driver provide entry value for none entry, error entry and special entry,
+ + * driver can alias (ie use same value for error and special for instance). It
+ + * should not alias none and error or special.
+ + *
+ + * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be:
+ + * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous,
+ + * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table
+ + * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one
    */
- -typedef unsigned long hmm_pfn_t;
+ +enum hmm_pfn_value_e {
+ +      HMM_PFN_ERROR,
+ +      HMM_PFN_NONE,
+ +      HMM_PFN_SPECIAL,
+ +      HMM_PFN_VALUE_MAX
+ +};
   
- -#define HMM_PFN_VALID (1 << 0)
- -#define HMM_PFN_READ (1 << 1)
- -#define HMM_PFN_WRITE (1 << 2)
- -#define HMM_PFN_ERROR (1 << 3)
- -#define HMM_PFN_EMPTY (1 << 4)
- -#define HMM_PFN_SPECIAL (1 << 5)
- -#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6)
- -#define HMM_PFN_SHIFT 7
+ +/*
+ + * struct hmm_range - track invalidation lock on virtual address range
+ + *
+ + * @vma: the vm area struct for the range
+ + * @list: all range lock are on a list
+ + * @start: range virtual start address (inclusive)
+ + * @end: range virtual end address (exclusive)
+ + * @pfns: array of pfns (big enough for the range)
+ + * @flags: pfn flags to match device driver page table
+ + * @values: pfn value for some special case (none, special, error, ...)
+ + * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
+ + * @valid: pfns array did not change since it has been fill by an HMM function
+ + */
+ +struct hmm_range {
+ +      struct vm_area_struct   *vma;
+ +      struct list_head        list;
+ +      unsigned long           start;
+ +      unsigned long           end;
+ +      uint64_t                *pfns;
+ +      const uint64_t          *flags;
+ +      const uint64_t          *values;
+ +      uint8_t                 pfn_shift;
+ +      bool                    valid;
+ +};
   
   /*
- - * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t
- - * @pfn: hmm_pfn_t to convert to struct page
- - * Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise
+ + * hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn
+ + * @range: range use to decode HMM pfn value
+ + * @pfn: HMM pfn value to get corresponding struct page from
+ + * Returns: struct page pointer if pfn is a valid HMM pfn, NULL otherwise
    *
- - * If the hmm_pfn_t is valid (ie valid flag set) then return the struct page
- - * matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL.
+ + * If the HMM pfn is valid (ie valid flag set) then return the struct page
+ + * matching the pfn value stored in the HMM pfn. Otherwise return NULL.
    */
- -static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn)
+ +static inline struct page *hmm_pfn_to_page(const struct hmm_range *range,
+ +                                         uint64_t pfn)
   {
- -      if (!(pfn & HMM_PFN_VALID))
+ +      if (pfn == range->values[HMM_PFN_NONE])
+ +              return NULL;
+ +      if (pfn == range->values[HMM_PFN_ERROR])
                 return NULL;
- -      return pfn_to_page(pfn >> HMM_PFN_SHIFT);
+ +      if (pfn == range->values[HMM_PFN_SPECIAL])
+ +              return NULL;
+ +      if (!(pfn & range->flags[HMM_PFN_VALID]))
+ +              return NULL;
+ +      return pfn_to_page(pfn >> range->pfn_shift);
   }
   
   /*
- - * hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t
- - * @pfn: hmm_pfn_t to extract pfn from
- - * Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise
+ + * hmm_pfn_to_pfn() - return pfn value store in a HMM pfn
+ + * @range: range use to decode HMM pfn value
+ + * @pfn: HMM pfn value to extract pfn from
+ + * Returns: pfn value if HMM pfn is valid, -1UL otherwise
    */
- -static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn)
+ +static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range,
+ +                                         uint64_t pfn)
   {
- -      if (!(pfn & HMM_PFN_VALID))
+ +      if (pfn == range->values[HMM_PFN_NONE])
+ +              return -1UL;
+ +      if (pfn == range->values[HMM_PFN_ERROR])
+ +              return -1UL;
+ +      if (pfn == range->values[HMM_PFN_SPECIAL])
+ +              return -1UL;
+ +      if (!(pfn & range->flags[HMM_PFN_VALID]))
                 return -1UL;
- -      return (pfn >> HMM_PFN_SHIFT);
+ +      return (pfn >> range->pfn_shift);
   }
   
   /*
- - * hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page
- - * @page: struct page pointer for which to create the hmm_pfn_t
- - * Returns: valid hmm_pfn_t for the page
+ + * hmm_pfn_from_page() - create a valid HMM pfn value from struct page
+ + * @range: range use to encode HMM pfn value
+ + * @page: struct page pointer for which to create the HMM pfn
+ + * Returns: valid HMM pfn for the page
    */
- -static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page)
+ +static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range,
+ +                                       struct page *page)
   {
- -      return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+ +      return (page_to_pfn(page) << range->pfn_shift) |
+ +              range->flags[HMM_PFN_VALID];
   }
   
   /*
- - * hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn
- - * @pfn: pfn value for which to create the hmm_pfn_t
- - * Returns: valid hmm_pfn_t for the pfn
+ + * hmm_pfn_from_pfn() - create a valid HMM pfn value from pfn
+ + * @range: range use to encode HMM pfn value
+ + * @pfn: pfn value for which to create the HMM pfn
+ + * Returns: valid HMM pfn for the pfn
    */
- -static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn)
+ +static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
+ +                                      unsigned long pfn)
   {
- -      return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+ +      return (pfn << range->pfn_shift) |
+ +              range->flags[HMM_PFN_VALID];
   }
   
   
@@@ -287,16 -218,6 +287,16 @@@ enum hmm_update_type 
    * @update: callback to update range on a device
    */
   struct hmm_mirror_ops {
+ +      /* release() - release hmm_mirror
+ +       *
+ +       * @mirror: pointer to struct hmm_mirror
+ +       *
+ +       * This is called when the mm_struct is being released.
+ +       * The callback should make sure no references to the mirror occur
+ +       * after the callback returns.
+ +       */
+ +      void (*release)(struct hmm_mirror *mirror);
+ +
         /* sync_cpu_device_pagetables() - synchronize page tables
          *
          * @mirror: pointer to struct hmm_mirror
@@@ -340,6 -261,23 +340,6 @@@ int hmm_mirror_register(struct hmm_mirr
   void hmm_mirror_unregister(struct hmm_mirror *mirror);
   
   
- -/*
- - * struct hmm_range - track invalidation lock on virtual address range
- - *
- - * @list: all range lock are on a list
- - * @start: range virtual start address (inclusive)
- - * @end: range virtual end address (exclusive)
- - * @pfns: array of pfns (big enough for the range)
- - * @valid: pfns array did not change since it has been fill by an HMM function
- - */
- -struct hmm_range {
- -      struct list_head        list;
- -      unsigned long           start;
- -      unsigned long           end;
- -      hmm_pfn_t               *pfns;
- -      bool                    valid;
- -};
- -
   /*
    * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device
    * driver lock that serializes device page table updates, then call
@@@ -353,13 -291,17 +353,13 @@@
    *
    * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID !
    */
- -int hmm_vma_get_pfns(struct vm_area_struct *vma,
- -                   struct hmm_range *range,
- -                   unsigned long start,
- -                   unsigned long end,
- -                   hmm_pfn_t *pfns);
- -bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
+ +int hmm_vma_get_pfns(struct hmm_range *range);
+ +bool hmm_vma_range_done(struct hmm_range *range);
   
   
   /*
    * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will
- - * not migrate any device memory back to system memory. The hmm_pfn_t array will
+ + * not migrate any device memory back to system memory. The HMM pfn array will
    * be updated with the fault result and current snapshot of the CPU page table
    * for the range.
    *
@@@ -368,26 -310,22 +368,26 @@@
    * function returns -EAGAIN.
    *
    * Return value does not reflect if the fault was successful for every single
- - * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to
+ + * address or not. Therefore, the caller must to inspect the HMM pfn array to
    * determine fault status for each address.
    *
    * Trying to fault inside an invalid vma will result in -EINVAL.
    *
    * See the function description in mm/hmm.c for further documentation.
    */
- -int hmm_vma_fault(struct vm_area_struct *vma,
- -                struct hmm_range *range,
- -                unsigned long start,
- -                unsigned long end,
- -                hmm_pfn_t *pfns,
- -                bool write,
- -                bool block);
- -#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+ +int hmm_vma_fault(struct hmm_range *range, bool block);
   
+ +/* Below are for HMM internal use only! Not to be used by device driver! */
+ +void hmm_mm_destroy(struct mm_struct *mm);
+ +
+ +static inline void hmm_mm_init(struct mm_struct *mm)
+ +{
+ +      mm->hmm = NULL;
+ +}
+ +#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+ +static inline void hmm_mm_destroy(struct mm_struct *mm) {}
+ +static inline void hmm_mm_init(struct mm_struct *mm) {}
+ +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
   
   #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
   struct hmm_devmem;
@@@ -560,9 -498,23 +560,9 @@@ struct hmm_device 
   struct hmm_device *hmm_device_new(void *drvdata);
   void hmm_device_put(struct hmm_device *hmm_device);
   #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
- -#endif /* IS_ENABLED(CONFIG_HMM) */
- -
- -/* Below are for HMM internal use only! Not to be used by device driver! */
- -#if IS_ENABLED(CONFIG_HMM_MIRROR)
- -void hmm_mm_destroy(struct mm_struct *mm);
- -
- -static inline void hmm_mm_init(struct mm_struct *mm)
- -{
- -      mm->hmm = NULL;
- -}
- -#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
- -static inline void hmm_mm_destroy(struct mm_struct *mm) {}
- -static inline void hmm_mm_init(struct mm_struct *mm) {}
- -#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
- -
- -
   #else /* IS_ENABLED(CONFIG_HMM) */
   static inline void hmm_mm_destroy(struct mm_struct *mm) {}
   static inline void hmm_mm_init(struct mm_struct *mm) {}
+ +#endif /* IS_ENABLED(CONFIG_HMM) */
+ +
   #endif /* LINUX_HMM_H */
diff --combined include/linux/sched/mm.h

index 2c570cd934af54c14dcddbf971271c087571887d,df2c7d11f496c12cf6a316e099ee810ce42501ee..4e1411bbbcfcdf253c6bd75eec3d1cb2705eb8aa
--- 1/include/linux/sched/mm.h
--- 2/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@@ -28,7 -28,7 +28,7 @@@ extern struct mm_struct *mm_alloc(void)
    *
    * Use mmdrop() to release the reference acquired by mmgrab().
    *
-  * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+  * See also <Documentation/vm/active_mm.rst> for an in-depth explanation
    * of &mm_struct.mm_count vs &mm_struct.mm_users.
    */
   static inline void mmgrab(struct mm_struct *mm)
@@@ -36,18 -36,7 +36,18 @@@
         atomic_inc(&mm->mm_count);
   }
   
- -extern void mmdrop(struct mm_struct *mm);
+ +extern void __mmdrop(struct mm_struct *mm);
+ +
+ +static inline void mmdrop(struct mm_struct *mm)
+ +{
+ +      /*
+ +       * The implicit full barrier implied by atomic_dec_and_test() is
+ +       * required by the membarrier system call before returning to
+ +       * user-space, after storing to rq->curr.
+ +       */
+ +      if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+ +              __mmdrop(mm);
+ +}
   
   /**
    * mmget() - Pin the address space associated with a &struct mm_struct.
@@@ -62,7 -51,7 +62,7 @@@
    *
    * Use mmput() to release the reference acquired by mmget().
    *
-  * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+  * See also <Documentation/vm/active_mm.rst> for an in-depth explanation
    * of &mm_struct.mm_count vs &mm_struct.mm_users.
    */
   static inline void mmget(struct mm_struct *mm)
@@@ -104,8 -93,7 +104,8 @@@ static inline void mm_update_next_owner
   #endif /* CONFIG_MEMCG */
   
   #ifdef CONFIG_MMU
- -extern void arch_pick_mmap_layout(struct mm_struct *mm);
+ +extern void arch_pick_mmap_layout(struct mm_struct *mm,
+ +                                struct rlimit *rlim_stack);
   extern unsigned long
   arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
                        unsigned long, unsigned long);
@@@ -114,8 -102,7 +114,8 @@@ arch_get_unmapped_area_topdown(struct f
                           unsigned long len, unsigned long pgoff,
                           unsigned long flags);
   #else
- -static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+ +static inline void arch_pick_mmap_layout(struct mm_struct *mm,
+ +                                       struct rlimit *rlim_stack) {}
   #endif
   
   static inline bool in_vfork(struct task_struct *tsk)
diff --combined include/linux/swap.h

index 2417d288e016609a4e844ba6b7e3b4c3ed3526d2,4003973deff4f71e558fd8619eb5626f3e8d7573..c063443d86381eabac4cde76d723dd0991091271
--- 1/include/linux/swap.h
--- 2/include/linux/swap.h
+++ b/include/linux/swap.h
@@@ -53,7 -53,7 +53,7 @@@ static inline int current_is_kswapd(voi
   
   /*
    * Unaddressable device memory support. See include/linux/hmm.h and
-  * Documentation/vm/hmm.txt. Short description is we need struct pages for
+  * Documentation/vm/hmm.rst. Short description is we need struct pages for
    * device memory that is unaddressable (inaccessible) by CPU, so that we can
    * migrate part of a process memory to device memory.
    *
@@@ -337,6 -337,8 +337,6 @@@ extern void deactivate_file_page(struc
   extern void mark_page_lazyfree(struct page *page);
   extern void swap_setup(void);
   
- -extern void add_page_to_unevictable_list(struct page *page);
- -
   extern void lru_cache_add_active_or_unevictable(struct page *page,
                                                 struct vm_area_struct *vma);
   
@@@ -400,6 -402,7 +400,6 @@@ int generic_swapfile_activate(struct sw
   #define SWAP_ADDRESS_SPACE_SHIFT      14
   #define SWAP_ADDRESS_SPACE_PAGES      (1 << SWAP_ADDRESS_SPACE_SHIFT)
   extern struct address_space *swapper_spaces[];
- -extern bool swap_vma_readahead;
   #define swap_address_space(entry)                         \
         (&swapper_spaces[swp_type(entry)][swp_offset(entry) \
                 >> SWAP_ADDRESS_SPACE_SHIFT])
@@@ -421,10 -424,14 +421,10 @@@ extern struct page *read_swap_cache_asy
   extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
                         struct vm_area_struct *vma, unsigned long addr,
                         bool *new_page_allocated);
- -extern struct page *swapin_readahead(swp_entry_t, gfp_t,
- -                      struct vm_area_struct *vma, unsigned long addr);
- -
- -extern struct page *swap_readahead_detect(struct vm_fault *vmf,
- -                                        struct vma_swap_readahead *swap_ra);
- -extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
- -                                         struct vm_fault *vmf,
- -                                         struct vma_swap_readahead *swap_ra);
+ +extern struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
+ +                              struct vm_fault *vmf);
+ +extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
+ +                              struct vm_fault *vmf);
   
   /* linux/mm/swapfile.c */
   extern atomic_long_t nr_swap_pages;
@@@ -432,6 -439,11 +432,6 @@@ extern long total_swap_pages
   extern atomic_t nr_rotate_swap;
   extern bool has_usable_swap(void);
   
- -static inline bool swap_use_vma_readahead(void)
- -{
- -      return READ_ONCE(swap_vma_readahead) && !atomic_read(&nr_rotate_swap);
- -}
- -
   /* Swap 50% full? Release swapcache more aggressively.. */
   static inline bool vm_swap_full(void)
   {
@@@ -527,14 -539,26 +527,14 @@@ static inline void put_swap_page(struc
   {
   }
   
- -static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
- -                      struct vm_area_struct *vma, unsigned long addr)
- -{
- -      return NULL;
- -}
- -
- -static inline bool swap_use_vma_readahead(void)
- -{
- -      return false;
- -}
- -
- -static inline struct page *swap_readahead_detect(
- -      struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
+ +static inline struct page *swap_cluster_readahead(swp_entry_t entry,
+ +                              gfp_t gfp_mask, struct vm_fault *vmf)
   {
         return NULL;
   }
   
- -static inline struct page *do_swap_page_readahead(
- -      swp_entry_t fentry, gfp_t gfp_mask,
- -      struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
+ +static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
+ +                      struct vm_fault *vmf)
   {
         return NULL;
   }
diff --combined mm/Kconfig

index d5004d82a1d6d7f9b7ec8bc994bd9320f7ea8ced,b9f04213a353fb2a1158dc613770f2dcc7de3707..9bdb0189caafbdeb1d47ab703b0a657853942e58
--- 1/mm/Kconfig
--- 2/mm/Kconfig
+++ b/mm/Kconfig
@@@ -278,6 -278,13 +278,6 @@@ config BOUNC
           by default when ZONE_DMA or HIGHMEM is selected, but you
           may say n to override this.
   
- -# On the 'tile' arch, USB OHCI needs the bounce pool since tilegx will often
- -# have more than 4GB of memory, but we don't currently use the IOTLB to present
- -# a 32-bit address to OHCI.  So we need to use a bounce pool instead.
- -config NEED_BOUNCE_POOL
- -      bool
- -      default y if TILE && USB_OHCI_HCD
- -
   config NR_QUICK
         int
         depends on QUICKLIST
@@@ -305,7 -312,7 +305,7 @@@ config KS
           the many instances by a single page with that content, so
           saving memory until one or another app needs to modify the content.
           Recommended for use with KVM, or with other duplicative applications.
-         See Documentation/vm/ksm.txt for more information: KSM is inactive
+         See Documentation/vm/ksm.rst for more information: KSM is inactive
           until a program has madvised that an area is MADV_MERGEABLE, and
           root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
   
@@@ -530,7 -537,7 +530,7 @@@ config MEM_SOFT_DIRT
           into a page just as regular dirty bit, but unlike the latter
           it can be cleared by hands.
   
-         See Documentation/vm/soft-dirty.txt for more details.
+         See Documentation/vm/soft-dirty.rst for more details.
   
   config ZSWAP
         bool "Compressed cache for swap pages (EXPERIMENTAL)"
@@@ -620,14 -627,15 +620,14 @@@ config GENERIC_EARLY_IOREMA
   config MAX_STACK_SIZE_MB
         int "Maximum user stack size for 32-bit processes (MB)"
         default 80
- -      range 8 256 if METAG
         range 8 2048
         depends on STACK_GROWSUP && (!64BIT || COMPAT)
         help
           This is the maximum stack size in Megabytes in the VM layout of 32-bit
           user processes when the stack grows upwards (currently only on parisc
- -        and metag arch). The stack will be located at the highest memory
- -        address minus the given value, unless the RLIMIT_STACK hard limit is
- -        changed to a smaller value in which case that is used.
+ +        arch). The stack will be located at the highest memory address minus
+ +        the given value, unless the RLIMIT_STACK hard limit is changed to a
+ +        smaller value in which case that is used.
   
           A sane initial value is 80 MB.
   
@@@ -656,7 -664,7 +656,7 @@@ config IDLE_PAGE_TRACKIN
           be useful to tune memory cgroup limits and/or for job placement
           within a compute cluster.
   
-         See Documentation/vm/idle_page_tracking.txt for more details.
+         See Documentation/vm/idle_page_tracking.rst for more details.
   
   # arch_add_memory() comprehends device memory
   config ARCH_HAS_ZONE_DEVICE
diff --combined mm/hmm.c

index 486dc394a5a3cd1fe226e215717631619c8a4195,af176c6820cf4e4b29647617ac90afae1a05fdef..e63e353830e8f6bb32e9600fcbbd5b5e4c3be438
--- 1/mm/hmm.c
--- 2/mm/hmm.c
+++ b/mm/hmm.c
@@@ -37,7 -37,7 +37,7 @@@
   
   #if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
   /*
-  * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h
+  * Device private memory see HMM (Documentation/vm/hmm.rst) or hmm.h
    */
   DEFINE_STATIC_KEY_FALSE(device_private_key);
   EXPORT_SYMBOL(device_private_key);
@@@ -160,32 -160,6 +160,32 @@@ static void hmm_invalidate_range(struc
         up_read(&hmm->mirrors_sem);
   }
   
+ +static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+ +{
+ +      struct hmm_mirror *mirror;
+ +      struct hmm *hmm = mm->hmm;
+ +
+ +      down_write(&hmm->mirrors_sem);
+ +      mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror,
+ +                                        list);
+ +      while (mirror) {
+ +              list_del_init(&mirror->list);
+ +              if (mirror->ops->release) {
+ +                      /*
+ +                       * Drop mirrors_sem so callback can wait on any pending
+ +                       * work that might itself trigger mmu_notifier callback
+ +                       * and thus would deadlock with us.
+ +                       */
+ +                      up_write(&hmm->mirrors_sem);
+ +                      mirror->ops->release(mirror);
+ +                      down_write(&hmm->mirrors_sem);
+ +              }
+ +              mirror = list_first_entry_or_null(&hmm->mirrors,
+ +                                                struct hmm_mirror, list);
+ +      }
+ +      up_write(&hmm->mirrors_sem);
+ +}
+ +
   static void hmm_invalidate_range_start(struct mmu_notifier *mn,
                                        struct mm_struct *mm,
                                        unsigned long start,
@@@ -211,7 -185,6 +211,7 @@@ static void hmm_invalidate_range_end(st
   }
   
   static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
+ +      .release                = hmm_release,
         .invalidate_range_start = hmm_invalidate_range_start,
         .invalidate_range_end   = hmm_invalidate_range_end,
   };
@@@ -233,24 -206,13 +233,24 @@@ int hmm_mirror_register(struct hmm_mirr
         if (!mm || !mirror || !mirror->ops)
                 return -EINVAL;
   
+ +again:
         mirror->hmm = hmm_register(mm);
         if (!mirror->hmm)
                 return -ENOMEM;
   
         down_write(&mirror->hmm->mirrors_sem);
- -      list_add(&mirror->list, &mirror->hmm->mirrors);
- -      up_write(&mirror->hmm->mirrors_sem);
+ +      if (mirror->hmm->mm == NULL) {
+ +              /*
+ +               * A racing hmm_mirror_unregister() is about to destroy the hmm
+ +               * struct. Try again to allocate a new one.
+ +               */
+ +              up_write(&mirror->hmm->mirrors_sem);
+ +              mirror->hmm = NULL;
+ +              goto again;
+ +      } else {
+ +              list_add(&mirror->list, &mirror->hmm->mirrors);
+ +              up_write(&mirror->hmm->mirrors_sem);
+ +      }
   
         return 0;
   }
@@@ -265,32 -227,11 +265,32 @@@ EXPORT_SYMBOL(hmm_mirror_register)
    */
   void hmm_mirror_unregister(struct hmm_mirror *mirror)
   {
- -      struct hmm *hmm = mirror->hmm;
+ +      bool should_unregister = false;
+ +      struct mm_struct *mm;
+ +      struct hmm *hmm;
   
+ +      if (mirror->hmm == NULL)
+ +              return;
+ +
+ +      hmm = mirror->hmm;
         down_write(&hmm->mirrors_sem);
- -      list_del(&mirror->list);
+ +      list_del_init(&mirror->list);
+ +      should_unregister = list_empty(&hmm->mirrors);
+ +      mirror->hmm = NULL;
+ +      mm = hmm->mm;
+ +      hmm->mm = NULL;
         up_write(&hmm->mirrors_sem);
+ +
+ +      if (!should_unregister || mm == NULL)
+ +              return;
+ +
+ +      spin_lock(&mm->page_table_lock);
+ +      if (mm->hmm == hmm)
+ +              mm->hmm = NULL;
+ +      spin_unlock(&mm->page_table_lock);
+ +
+ +      mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+ +      kfree(hmm);
   }
   EXPORT_SYMBOL(hmm_mirror_unregister);
   
@@@ -299,275 -240,110 +299,275 @@@ struct hmm_vma_walk 
         unsigned long           last;
         bool                    fault;
         bool                    block;
- -      bool                    write;
   };
   
- -static int hmm_vma_do_fault(struct mm_walk *walk,
- -                          unsigned long addr,
- -                          hmm_pfn_t *pfn)
+ +static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
+ +                          bool write_fault, uint64_t *pfn)
   {
         unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_REMOTE;
         struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ +      struct hmm_range *range = hmm_vma_walk->range;
         struct vm_area_struct *vma = walk->vma;
         int r;
   
         flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
- -      flags |= hmm_vma_walk->write ? FAULT_FLAG_WRITE : 0;
+ +      flags |= write_fault ? FAULT_FLAG_WRITE : 0;
         r = handle_mm_fault(vma, addr, flags);
         if (r & VM_FAULT_RETRY)
                 return -EBUSY;
         if (r & VM_FAULT_ERROR) {
- -              *pfn = HMM_PFN_ERROR;
+ +              *pfn = range->values[HMM_PFN_ERROR];
                 return -EFAULT;
         }
   
         return -EAGAIN;
   }
   
- -static void hmm_pfns_special(hmm_pfn_t *pfns,
- -                           unsigned long addr,
- -                           unsigned long end)
- -{
- -      for (; addr < end; addr += PAGE_SIZE, pfns++)
- -              *pfns = HMM_PFN_SPECIAL;
- -}
- -
   static int hmm_pfns_bad(unsigned long addr,
                         unsigned long end,
                         struct mm_walk *walk)
   {
- -      struct hmm_range *range = walk->private;
- -      hmm_pfn_t *pfns = range->pfns;
+ +      struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ +      struct hmm_range *range = hmm_vma_walk->range;
+ +      uint64_t *pfns = range->pfns;
         unsigned long i;
   
         i = (addr - range->start) >> PAGE_SHIFT;
         for (; addr < end; addr += PAGE_SIZE, i++)
- -              pfns[i] = HMM_PFN_ERROR;
+ +              pfns[i] = range->values[HMM_PFN_ERROR];
   
         return 0;
   }
   
- -static void hmm_pfns_clear(hmm_pfn_t *pfns,
- -                         unsigned long addr,
- -                         unsigned long end)
- -{
- -      for (; addr < end; addr += PAGE_SIZE, pfns++)
- -              *pfns = 0;
- -}
- -
- -static int hmm_vma_walk_hole(unsigned long addr,
- -                           unsigned long end,
- -                           struct mm_walk *walk)
+ +/*
+ + * hmm_vma_walk_hole() - handle a range lacking valid pmd or pte(s)
+ + * @start: range virtual start address (inclusive)
+ + * @end: range virtual end address (exclusive)
+ + * @fault: should we fault or not ?
+ + * @write_fault: write fault ?
+ + * @walk: mm_walk structure
+ + * Returns: 0 on success, -EAGAIN after page fault, or page fault error
+ + *
+ + * This function will be called whenever pmd_none() or pte_none() returns true,
+ + * or whenever there is no page directory covering the virtual address range.
+ + */
+ +static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
+ +                            bool fault, bool write_fault,
+ +                            struct mm_walk *walk)
   {
         struct hmm_vma_walk *hmm_vma_walk = walk->private;
         struct hmm_range *range = hmm_vma_walk->range;
- -      hmm_pfn_t *pfns = range->pfns;
+ +      uint64_t *pfns = range->pfns;
         unsigned long i;
   
         hmm_vma_walk->last = addr;
         i = (addr - range->start) >> PAGE_SHIFT;
         for (; addr < end; addr += PAGE_SIZE, i++) {
- -              pfns[i] = HMM_PFN_EMPTY;
- -              if (hmm_vma_walk->fault) {
+ +              pfns[i] = range->values[HMM_PFN_NONE];
+ +              if (fault || write_fault) {
                         int ret;
   
- -                      ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
+ +                      ret = hmm_vma_do_fault(walk, addr, write_fault,
+ +                                             &pfns[i]);
                         if (ret != -EAGAIN)
                                 return ret;
                 }
         }
   
- -      return hmm_vma_walk->fault ? -EAGAIN : 0;
+ +      return (fault || write_fault) ? -EAGAIN : 0;
   }
   
- -static int hmm_vma_walk_clear(unsigned long addr,
- -                            unsigned long end,
- -                            struct mm_walk *walk)
+ +static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+ +                                    uint64_t pfns, uint64_t cpu_flags,
+ +                                    bool *fault, bool *write_fault)
   {
- -      struct hmm_vma_walk *hmm_vma_walk = walk->private;
         struct hmm_range *range = hmm_vma_walk->range;
- -      hmm_pfn_t *pfns = range->pfns;
+ +
+ +      *fault = *write_fault = false;
+ +      if (!hmm_vma_walk->fault)
+ +              return;
+ +
+ +      /* We aren't ask to do anything ... */
+ +      if (!(pfns & range->flags[HMM_PFN_VALID]))
+ +              return;
+ +      /* If this is device memory than only fault if explicitly requested */
+ +      if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
+ +              /* Do we fault on device memory ? */
+ +              if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) {
+ +                      *write_fault = pfns & range->flags[HMM_PFN_WRITE];
+ +                      *fault = true;
+ +              }
+ +              return;
+ +      }
+ +
+ +      /* If CPU page table is not valid then we need to fault */
+ +      *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]);
+ +      /* Need to write fault ? */
+ +      if ((pfns & range->flags[HMM_PFN_WRITE]) &&
+ +          !(cpu_flags & range->flags[HMM_PFN_WRITE])) {
+ +              *write_fault = true;
+ +              *fault = true;
+ +      }
+ +}
+ +
+ +static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+ +                               const uint64_t *pfns, unsigned long npages,
+ +                               uint64_t cpu_flags, bool *fault,
+ +                               bool *write_fault)
+ +{
         unsigned long i;
   
- -      hmm_vma_walk->last = addr;
+ +      if (!hmm_vma_walk->fault) {
+ +              *fault = *write_fault = false;
+ +              return;
+ +      }
+ +
+ +      for (i = 0; i < npages; ++i) {
+ +              hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags,
+ +                                 fault, write_fault);
+ +              if ((*fault) || (*write_fault))
+ +                      return;
+ +      }
+ +}
+ +
+ +static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
+ +                           struct mm_walk *walk)
+ +{
+ +      struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ +      struct hmm_range *range = hmm_vma_walk->range;
+ +      bool fault, write_fault;
+ +      unsigned long i, npages;
+ +      uint64_t *pfns;
+ +
         i = (addr - range->start) >> PAGE_SHIFT;
- -      for (; addr < end; addr += PAGE_SIZE, i++) {
- -              pfns[i] = 0;
- -              if (hmm_vma_walk->fault) {
- -                      int ret;
+ +      npages = (end - addr) >> PAGE_SHIFT;
+ +      pfns = &range->pfns[i];
+ +      hmm_range_need_fault(hmm_vma_walk, pfns, npages,
+ +                           0, &fault, &write_fault);
+ +      return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+ +}
   
- -                      ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
- -                      if (ret != -EAGAIN)
- -                              return ret;
+ +static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
+ +{
+ +      if (pmd_protnone(pmd))
+ +              return 0;
+ +      return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] |
+ +                              range->flags[HMM_PFN_WRITE] :
+ +                              range->flags[HMM_PFN_VALID];
+ +}
+ +
+ +static int hmm_vma_handle_pmd(struct mm_walk *walk,
+ +                            unsigned long addr,
+ +                            unsigned long end,
+ +                            uint64_t *pfns,
+ +                            pmd_t pmd)
+ +{
+ +      struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ +      struct hmm_range *range = hmm_vma_walk->range;
+ +      unsigned long pfn, npages, i;
+ +      bool fault, write_fault;
+ +      uint64_t cpu_flags;
+ +
+ +      npages = (end - addr) >> PAGE_SHIFT;
+ +      cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
+ +      hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags,
+ +                           &fault, &write_fault);
+ +
+ +      if (pmd_protnone(pmd) || fault || write_fault)
+ +              return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+ +
+ +      pfn = pmd_pfn(pmd) + pte_index(addr);
+ +      for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
+ +              pfns[i] = hmm_pfn_from_pfn(range, pfn) | cpu_flags;
+ +      hmm_vma_walk->last = end;
+ +      return 0;
+ +}
+ +
+ +static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
+ +{
+ +      if (pte_none(pte) || !pte_present(pte))
+ +              return 0;
+ +      return pte_write(pte) ? range->flags[HMM_PFN_VALID] |
+ +                              range->flags[HMM_PFN_WRITE] :
+ +                              range->flags[HMM_PFN_VALID];
+ +}
+ +
+ +static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
+ +                            unsigned long end, pmd_t *pmdp, pte_t *ptep,
+ +                            uint64_t *pfn)
+ +{
+ +      struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ +      struct hmm_range *range = hmm_vma_walk->range;
+ +      struct vm_area_struct *vma = walk->vma;
+ +      bool fault, write_fault;
+ +      uint64_t cpu_flags;
+ +      pte_t pte = *ptep;
+ +      uint64_t orig_pfn = *pfn;
+ +
+ +      *pfn = range->values[HMM_PFN_NONE];
+ +      cpu_flags = pte_to_hmm_pfn_flags(range, pte);
+ +      hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+ +                         &fault, &write_fault);
+ +
+ +      if (pte_none(pte)) {
+ +              if (fault || write_fault)
+ +                      goto fault;
+ +              return 0;
+ +      }
+ +
+ +      if (!pte_present(pte)) {
+ +              swp_entry_t entry = pte_to_swp_entry(pte);
+ +
+ +              if (!non_swap_entry(entry)) {
+ +                      if (fault || write_fault)
+ +                              goto fault;
+ +                      return 0;
+ +              }
+ +
+ +              /*
+ +               * This is a special swap entry, ignore migration, use
+ +               * device and report anything else as error.
+ +               */
+ +              if (is_device_private_entry(entry)) {
+ +                      cpu_flags = range->flags[HMM_PFN_VALID] |
+ +                              range->flags[HMM_PFN_DEVICE_PRIVATE];
+ +                      cpu_flags |= is_write_device_private_entry(entry) ?
+ +                              range->flags[HMM_PFN_WRITE] : 0;
+ +                      hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+ +                                         &fault, &write_fault);
+ +                      if (fault || write_fault)
+ +                              goto fault;
+ +                      *pfn = hmm_pfn_from_pfn(range, swp_offset(entry));
+ +                      *pfn |= cpu_flags;
+ +                      return 0;
+ +              }
+ +
+ +              if (is_migration_entry(entry)) {
+ +                      if (fault || write_fault) {
+ +                              pte_unmap(ptep);
+ +                              hmm_vma_walk->last = addr;
+ +                              migration_entry_wait(vma->vm_mm,
+ +                                                   pmdp, addr);
+ +                              return -EAGAIN;
+ +                      }
+ +                      return 0;
                 }
+ +
+ +              /* Report error for everything else */
+ +              *pfn = range->values[HMM_PFN_ERROR];
+ +              return -EFAULT;
         }
   
- -      return hmm_vma_walk->fault ? -EAGAIN : 0;
+ +      if (fault || write_fault)
+ +              goto fault;
+ +
+ +      *pfn = hmm_pfn_from_pfn(range, pte_pfn(pte)) | cpu_flags;
+ +      return 0;
+ +
+ +fault:
+ +      pte_unmap(ptep);
+ +      /* Fault any virtual address we were asked to fault */
+ +      return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
   }
   
   static int hmm_vma_walk_pmd(pmd_t *pmdp,
@@@ -577,20 -353,26 +577,20 @@@
   {
         struct hmm_vma_walk *hmm_vma_walk = walk->private;
         struct hmm_range *range = hmm_vma_walk->range;
- -      struct vm_area_struct *vma = walk->vma;
- -      hmm_pfn_t *pfns = range->pfns;
+ +      uint64_t *pfns = range->pfns;
         unsigned long addr = start, i;
- -      bool write_fault;
- -      hmm_pfn_t flag;
         pte_t *ptep;
   
         i = (addr - range->start) >> PAGE_SHIFT;
- -      flag = vma->vm_flags & VM_READ ? HMM_PFN_READ : 0;
- -      write_fault = hmm_vma_walk->fault & hmm_vma_walk->write;
   
   again:
         if (pmd_none(*pmdp))
                 return hmm_vma_walk_hole(start, end, walk);
   
- -      if (pmd_huge(*pmdp) && vma->vm_flags & VM_HUGETLB)
+ +      if (pmd_huge(*pmdp) && (range->vma->vm_flags & VM_HUGETLB))
                 return hmm_pfns_bad(start, end, walk);
   
         if (pmd_devmap(*pmdp) || pmd_trans_huge(*pmdp)) {
- -              unsigned long pfn;
                 pmd_t pmd;
   
                 /*
@@@ -606,8 -388,17 +606,8 @@@
                 barrier();
                 if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
                         goto again;
- -              if (pmd_protnone(pmd))
- -                      return hmm_vma_walk_clear(start, end, walk);
- -
- -              if (write_fault && !pmd_write(pmd))
- -                      return hmm_vma_walk_clear(start, end, walk);
   
- -              pfn = pmd_pfn(pmd) + pte_index(addr);
- -              flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
- -              for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
- -                      pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
- -              return 0;
+ +              return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd);
         }
   
         if (pmd_bad(*pmdp))
@@@ -615,43 -406,79 +615,43 @@@
   
         ptep = pte_offset_map(pmdp, addr);
         for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
- -              pte_t pte = *ptep;
- -
- -              pfns[i] = 0;
- -
- -              if (pte_none(pte)) {
- -                      pfns[i] = HMM_PFN_EMPTY;
- -                      if (hmm_vma_walk->fault)
- -                              goto fault;
- -                      continue;
- -              }
- -
- -              if (!pte_present(pte)) {
- -                      swp_entry_t entry = pte_to_swp_entry(pte);
- -
- -                      if (!non_swap_entry(entry)) {
- -                              if (hmm_vma_walk->fault)
- -                                      goto fault;
- -                              continue;
- -                      }
+ +              int r;
   
- -                      /*
- -                       * This is a special swap entry, ignore migration, use
- -                       * device and report anything else as error.
- -                       */
- -                      if (is_device_private_entry(entry)) {
- -                              pfns[i] = hmm_pfn_t_from_pfn(swp_offset(entry));
- -                              if (is_write_device_private_entry(entry)) {
- -                                      pfns[i] |= HMM_PFN_WRITE;
- -                              } else if (write_fault)
- -                                      goto fault;
- -                              pfns[i] |= HMM_PFN_DEVICE_UNADDRESSABLE;
- -                              pfns[i] |= flag;
- -                      } else if (is_migration_entry(entry)) {
- -                              if (hmm_vma_walk->fault) {
- -                                      pte_unmap(ptep);
- -                                      hmm_vma_walk->last = addr;
- -                                      migration_entry_wait(vma->vm_mm,
- -                                                           pmdp, addr);
- -                                      return -EAGAIN;
- -                              }
- -                              continue;
- -                      } else {
- -                              /* Report error for everything else */
- -                              pfns[i] = HMM_PFN_ERROR;
- -                      }
- -                      continue;
+ +              r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
+ +              if (r) {
+ +                      /* hmm_vma_handle_pte() did unmap pte directory */
+ +                      hmm_vma_walk->last = addr;
+ +                      return r;
                 }
- -
- -              if (write_fault && !pte_write(pte))
- -                      goto fault;
- -
- -              pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
- -              pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
- -              continue;
- -
- -fault:
- -              pte_unmap(ptep);
- -              /* Fault all pages in range */
- -              return hmm_vma_walk_clear(start, end, walk);
         }
         pte_unmap(ptep - 1);
   
+ +      hmm_vma_walk->last = addr;
         return 0;
   }
   
+ +static void hmm_pfns_clear(struct hmm_range *range,
+ +                         uint64_t *pfns,
+ +                         unsigned long addr,
+ +                         unsigned long end)
+ +{
+ +      for (; addr < end; addr += PAGE_SIZE, pfns++)
+ +              *pfns = range->values[HMM_PFN_NONE];
+ +}
+ +
+ +static void hmm_pfns_special(struct hmm_range *range)
+ +{
+ +      unsigned long addr = range->start, i = 0;
+ +
+ +      for (; addr < range->end; addr += PAGE_SIZE, i++)
+ +              range->pfns[i] = range->values[HMM_PFN_SPECIAL];
+ +}
+ +
   /*
    * hmm_vma_get_pfns() - snapshot CPU page table for a range of virtual addresses
- - * @vma: virtual memory area containing the virtual address range
- - * @range: used to track snapshot validity
- - * @start: range virtual start address (inclusive)
- - * @end: range virtual end address (exclusive)
- - * @entries: array of hmm_pfn_t: provided by the caller, filled in by function
- - * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, 0 success
+ + * @range: range being snapshotted
+ + * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid
+ + *          vma permission, 0 success
    *
    * This snapshots the CPU page table for a range of virtual addresses. Snapshot
    * validity is tracked by range struct. See hmm_vma_range_done() for further
@@@ -664,17 -491,26 +664,17 @@@
    * NOT CALLING hmm_vma_range_done() IF FUNCTION RETURNS 0 WILL LEAD TO SERIOUS
    * MEMORY CORRUPTION ! YOU HAVE BEEN WARNED !
    */
- -int hmm_vma_get_pfns(struct vm_area_struct *vma,
- -                   struct hmm_range *range,
- -                   unsigned long start,
- -                   unsigned long end,
- -                   hmm_pfn_t *pfns)
+ +int hmm_vma_get_pfns(struct hmm_range *range)
   {
+ +      struct vm_area_struct *vma = range->vma;
         struct hmm_vma_walk hmm_vma_walk;
         struct mm_walk mm_walk;
         struct hmm *hmm;
   
- -      /* FIXME support hugetlb fs */
- -      if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
- -              hmm_pfns_special(pfns, start, end);
- -              return -EINVAL;
- -      }
- -
         /* Sanity check, this really should not happen ! */
- -      if (start < vma->vm_start || start >= vma->vm_end)
+ +      if (range->start < vma->vm_start || range->start >= vma->vm_end)
                 return -EINVAL;
- -      if (end < vma->vm_start || end > vma->vm_end)
+ +      if (range->end < vma->vm_start || range->end > vma->vm_end)
                 return -EINVAL;
   
         hmm = hmm_register(vma->vm_mm);
@@@ -684,24 -520,10 +684,24 @@@
         if (!hmm->mmu_notifier.ops)
                 return -EINVAL;
   
+ +      /* FIXME support hugetlb fs */
+ +      if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+ +              hmm_pfns_special(range);
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (!(vma->vm_flags & VM_READ)) {
+ +              /*
+ +               * If vma do not allow read access, then assume that it does
+ +               * not allow write access, either. Architecture that allow
+ +               * write without read access are not supported by HMM, because
+ +               * operations such has atomic access would not work.
+ +               */
+ +              hmm_pfns_clear(range, range->pfns, range->start, range->end);
+ +              return -EPERM;
+ +      }
+ +
         /* Initialize range to track CPU page table update */
- -      range->start = start;
- -      range->pfns = pfns;
- -      range->end = end;
         spin_lock(&hmm->lock);
         range->valid = true;
         list_add_rcu(&range->list, &hmm->ranges);
@@@ -719,13 -541,14 +719,13 @@@
         mm_walk.pmd_entry = hmm_vma_walk_pmd;
         mm_walk.pte_hole = hmm_vma_walk_hole;
   
- -      walk_page_range(start, end, &mm_walk);
+ +      walk_page_range(range->start, range->end, &mm_walk);
         return 0;
   }
   EXPORT_SYMBOL(hmm_vma_get_pfns);
   
   /*
    * hmm_vma_range_done() - stop tracking change to CPU page table over a range
- - * @vma: virtual memory area containing the virtual address range
    * @range: range being tracked
    * Returns: false if range data has been invalidated, true otherwise
    *
@@@ -745,10 -568,10 +745,10 @@@
    *
    * There are two ways to use this :
    * again:
- - *   hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ + *   hmm_vma_get_pfns(range); or hmm_vma_fault(...);
    *   trans = device_build_page_table_update_transaction(pfns);
    *   device_page_table_lock();
- - *   if (!hmm_vma_range_done(vma, range)) {
+ + *   if (!hmm_vma_range_done(range)) {
    *     device_page_table_unlock();
    *     goto again;
    *   }
@@@ -756,13 -579,13 +756,13 @@@
    *   device_page_table_unlock();
    *
    * Or:
- - *   hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ + *   hmm_vma_get_pfns(range); or hmm_vma_fault(...);
    *   device_page_table_lock();
- - *   hmm_vma_range_done(vma, range);
- - *   device_update_page_table(pfns);
+ + *   hmm_vma_range_done(range);
+ + *   device_update_page_table(range->pfns);
    *   device_page_table_unlock();
    */
- -bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range)
+ +bool hmm_vma_range_done(struct hmm_range *range)
   {
         unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
         struct hmm *hmm;
@@@ -772,7 -595,7 +772,7 @@@
                 return false;
         }
   
- -      hmm = hmm_register(vma->vm_mm);
+ +      hmm = hmm_register(range->vma->vm_mm);
         if (!hmm) {
                 memset(range->pfns, 0, sizeof(*range->pfns) * npages);
                 return false;
@@@ -788,34 -611,36 +788,34 @@@ EXPORT_SYMBOL(hmm_vma_range_done)
   
   /*
    * hmm_vma_fault() - try to fault some address in a virtual address range
- - * @vma: virtual memory area containing the virtual address range
- - * @range: use to track pfns array content validity
- - * @start: fault range virtual start address (inclusive)
- - * @end: fault range virtual end address (exclusive)
- - * @pfns: array of hmm_pfn_t, only entry with fault flag set will be faulted
- - * @write: is it a write fault
+ + * @range: range being faulted
    * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
    * Returns: 0 success, error otherwise (-EAGAIN means mmap_sem have been drop)
    *
    * This is similar to a regular CPU page fault except that it will not trigger
    * any memory migration if the memory being faulted is not accessible by CPUs.
    *
- - * On error, for one virtual address in the range, the function will set the
- - * hmm_pfn_t error flag for the corresponding pfn entry.
+ + * On error, for one virtual address in the range, the function will mark the
+ + * corresponding HMM pfn entry with an error flag.
    *
    * Expected use pattern:
    * retry:
    *   down_read(&mm->mmap_sem);
    *   // Find vma and address device wants to fault, initialize hmm_pfn_t
    *   // array accordingly
- - *   ret = hmm_vma_fault(vma, start, end, pfns, allow_retry);
+ + *   ret = hmm_vma_fault(range, write, block);
    *   switch (ret) {
    *   case -EAGAIN:
- - *     hmm_vma_range_done(vma, range);
+ + *     hmm_vma_range_done(range);
    *     // You might want to rate limit or yield to play nicely, you may
    *     // also commit any valid pfn in the array assuming that you are
    *     // getting true from hmm_vma_range_monitor_end()
    *     goto retry;
    *   case 0:
    *     break;
+ + *   case -ENOMEM:
+ + *   case -EINVAL:
+ + *   case -EPERM:
    *   default:
    *     // Handle error !
    *     up_read(&mm->mmap_sem)
@@@ -823,7 -648,7 +823,7 @@@
    *   }
    *   // Take device driver lock that serialize device page table update
    *   driver_lock_device_page_table_update();
- - *   hmm_vma_range_done(vma, range);
+ + *   hmm_vma_range_done(range);
    *   // Commit pfns we got from hmm_vma_fault()
    *   driver_unlock_device_page_table_update();
    *   up_read(&mm->mmap_sem)
@@@ -833,54 -658,51 +833,54 @@@
    *
    * YOU HAVE BEEN WARNED !
    */
- -int hmm_vma_fault(struct vm_area_struct *vma,
- -                struct hmm_range *range,
- -                unsigned long start,
- -                unsigned long end,
- -                hmm_pfn_t *pfns,
- -                bool write,
- -                bool block)
+ +int hmm_vma_fault(struct hmm_range *range, bool block)
   {
+ +      struct vm_area_struct *vma = range->vma;
+ +      unsigned long start = range->start;
         struct hmm_vma_walk hmm_vma_walk;
         struct mm_walk mm_walk;
         struct hmm *hmm;
         int ret;
   
         /* Sanity check, this really should not happen ! */
- -      if (start < vma->vm_start || start >= vma->vm_end)
+ +      if (range->start < vma->vm_start || range->start >= vma->vm_end)
                 return -EINVAL;
- -      if (end < vma->vm_start || end > vma->vm_end)
+ +      if (range->end < vma->vm_start || range->end > vma->vm_end)
                 return -EINVAL;
   
         hmm = hmm_register(vma->vm_mm);
         if (!hmm) {
- -              hmm_pfns_clear(pfns, start, end);
+ +              hmm_pfns_clear(range, range->pfns, range->start, range->end);
                 return -ENOMEM;
         }
         /* Caller must have registered a mirror using hmm_mirror_register() */
         if (!hmm->mmu_notifier.ops)
                 return -EINVAL;
   
+ +      /* FIXME support hugetlb fs */
+ +      if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+ +              hmm_pfns_special(range);
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (!(vma->vm_flags & VM_READ)) {
+ +              /*
+ +               * If vma do not allow read access, then assume that it does
+ +               * not allow write access, either. Architecture that allow
+ +               * write without read access are not supported by HMM, because
+ +               * operations such has atomic access would not work.
+ +               */
+ +              hmm_pfns_clear(range, range->pfns, range->start, range->end);
+ +              return -EPERM;
+ +      }
+ +
         /* Initialize range to track CPU page table update */
- -      range->start = start;
- -      range->pfns = pfns;
- -      range->end = end;
         spin_lock(&hmm->lock);
         range->valid = true;
         list_add_rcu(&range->list, &hmm->ranges);
         spin_unlock(&hmm->lock);
   
- -      /* FIXME support hugetlb fs */
- -      if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
- -              hmm_pfns_special(pfns, start, end);
- -              return 0;
- -      }
- -
         hmm_vma_walk.fault = true;
- -      hmm_vma_walk.write = write;
         hmm_vma_walk.block = block;
         hmm_vma_walk.range = range;
         mm_walk.private = &hmm_vma_walk;
@@@ -895,7 -717,7 +895,7 @@@
         mm_walk.pte_hole = hmm_vma_walk_hole;
   
         do {
- -              ret = walk_page_range(start, end, &mm_walk);
+ +              ret = walk_page_range(start, range->end, &mm_walk);
                 start = hmm_vma_walk.last;
         } while (ret == -EAGAIN);
   
@@@ -903,9 -725,8 +903,9 @@@
                 unsigned long i;
   
                 i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
- -              hmm_pfns_clear(&pfns[i], hmm_vma_walk.last, end);
- -              hmm_vma_range_done(vma, range);
+ +              hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last,
+ +                             range->end);
+ +              hmm_vma_range_done(range);
         }
         return ret;
   }
@@@ -1024,6 -845,13 +1024,6 @@@ static void hmm_devmem_release(struct d
         hmm_devmem_radix_release(resource);
   }
   
- -static struct hmm_devmem *hmm_devmem_find(resource_size_t phys)
- -{
- -      WARN_ON_ONCE(!rcu_read_lock_held());
- -
- -      return radix_tree_lookup(&hmm_devmem_radix, phys >> PA_SECTION_SHIFT);
- -}
- -
   static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
   {
         resource_size_t key, align_start, align_size, align_end;
@@@ -1064,8 -892,9 +1064,8 @@@
         for (key = align_start; key <= align_end; key += PA_SECTION_SIZE) {
                 struct hmm_devmem *dup;
   
- -              rcu_read_lock();
- -              dup = hmm_devmem_find(key);
- -              rcu_read_unlock();
+ +              dup = radix_tree_lookup(&hmm_devmem_radix,
+ +                                      key >> PA_SECTION_SHIFT);
                 if (dup) {
                         dev_err(device, "%s: collides with mapping for %s\n",
                                 __func__, dev_name(dup->device));
diff --combined mm/huge_memory.c

index 14ed6ee5e02fc8bc6acc767de9e42ed464ce5675,6d5911673450936d681f5ff0880b350191bbcb05..3e8cda7beb7f1bc267c214678af6bf482c7f49fd
--- 1/mm/huge_memory.c
--- 2/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@@ -1185,7 -1185,7 +1185,7 @@@ static int do_huge_pmd_wp_page_fallback
          * mmu_notifier_invalidate_range_end() happens which can lead to a
          * device seeing memory write in different order than CPU.
          *
-        * See Documentation/vm/mmu_notifier.txt
+        * See Documentation/vm/mmu_notifier.rst
          */
         pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
   
@@@ -2037,7 -2037,7 +2037,7 @@@ static void __split_huge_zero_page_pmd(
          * replacing a zero pmd write protected page with a zero pte write
          * protected page.
          *
-        * See Documentation/vm/mmu_notifier.txt
+        * See Documentation/vm/mmu_notifier.rst
          */
         pmdp_huge_clear_flush(vma, haddr, pmd);
   
@@@ -2355,13 -2355,26 +2355,13 @@@ static void __split_huge_page_tail(stru
         struct page *page_tail = head + tail;
   
         VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
- -      VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
   
         /*
- -       * tail_page->_refcount is zero and not changing from under us. But
- -       * get_page_unless_zero() may be running from under us on the
- -       * tail_page. If we used atomic_set() below instead of atomic_inc() or
- -       * atomic_add(), we would then run atomic_set() concurrently with
- -       * get_page_unless_zero(), and atomic_set() is implemented in C not
- -       * using locked ops. spin_unlock on x86 sometime uses locked ops
- -       * because of PPro errata 66, 92, so unless somebody can guarantee
- -       * atomic_set() here would be safe on all archs (and not only on x86),
- -       * it's safer to use atomic_inc()/atomic_add().
+ +       * Clone page flags before unfreezing refcount.
+ +       *
+ +       * After successful get_page_unless_zero() might follow flags change,
+ +       * for exmaple lock_page() which set PG_waiters.
          */
- -      if (PageAnon(head) && !PageSwapCache(head)) {
- -              page_ref_inc(page_tail);
- -      } else {
- -              /* Additional pin to radix tree */
- -              page_ref_add(page_tail, 2);
- -      }
- -
         page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
         page_tail->flags |= (head->flags &
                         ((1L << PG_referenced) |
@@@ -2374,21 -2387,14 +2374,21 @@@
                          (1L << PG_unevictable) |
                          (1L << PG_dirty)));
   
- -      /*
- -       * After clearing PageTail the gup refcount can be released.
- -       * Page flags also must be visible before we make the page non-compound.
- -       */
+ +      /* Page flags must be visible before we make the page non-compound. */
         smp_wmb();
   
+ +      /*
+ +       * Clear PageTail before unfreezing page refcount.
+ +       *
+ +       * After successful get_page_unless_zero() might follow put_page()
+ +       * which needs correct compound_head().
+ +       */
         clear_compound_head(page_tail);
   
+ +      /* Finally unfreeze refcount. Additional reference from page cache. */
+ +      page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) ||
+ +                                        PageSwapCache(head)));
+ +
         if (page_is_young(head))
                 set_page_young(page_tail);
         if (page_is_idle(head))
@@@ -2401,12 -2407,6 +2401,12 @@@
   
         page_tail->index = head->index + tail;
         page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
+ +
+ +      /*
+ +       * always add to the tail because some iterators expect new
+ +       * pages to show after the currently processed elements - e.g.
+ +       * migrate_pages
+ +       */
         lru_add_page_tail(head, page_tail, lruvec, list);
   }
   
@@@ -2450,7 -2450,7 +2450,7 @@@ static void __split_huge_page(struct pa
         } else {
                 /* Additional pin to radix tree */
                 page_ref_add(head, 2);
- -              spin_unlock(&head->mapping->tree_lock);
+ +              xa_unlock(&head->mapping->i_pages);
         }
   
         spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
@@@ -2658,15 -2658,15 +2658,15 @@@ int split_huge_page_to_list(struct pag
         if (mapping) {
                 void **pslot;
   
- -              spin_lock(&mapping->tree_lock);
- -              pslot = radix_tree_lookup_slot(&mapping->page_tree,
+ +              xa_lock(&mapping->i_pages);
+ +              pslot = radix_tree_lookup_slot(&mapping->i_pages,
                                 page_index(head));
                 /*
                  * Check if the head page is present in radix tree.
                  * We assume all tail are present too, if head is there.
                  */
                 if (radix_tree_deref_slot_protected(pslot,
- -                                      &mapping->tree_lock) != head)
+ +                                      &mapping->i_pages.xa_lock) != head)
                         goto fail;
         }
   
@@@ -2700,7 -2700,7 +2700,7 @@@
                 }
                 spin_unlock(&pgdata->split_queue_lock);
   fail:         if (mapping)
- -                      spin_unlock(&mapping->tree_lock);
+ +                      xa_unlock(&mapping->i_pages);
                 spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
                 unfreeze_page(head);
                 ret = -EBUSY;
@@@ -2783,13 -2783,11 +2783,13 @@@ static unsigned long deferred_split_sca
   
         list_for_each_safe(pos, next, &list) {
                 page = list_entry((void *)pos, struct page, mapping);
- -              lock_page(page);
+ +              if (!trylock_page(page))
+ +                      goto next;
                 /* split_huge_page() removes page from list on success */
                 if (!split_huge_page(page))
                         split++;
                 unlock_page(page);
+ +next:
                 put_page(page);
         }
   
diff --combined mm/hugetlb.c

index 218679138255d696e6f1129016337d5220d0ad19,5af974abae460a2e22357311cf2f81a6c7209ae7..129088710510048ae0f48f41bebb79857bdce7e1
--- 1/mm/hugetlb.c
--- 2/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@@ -18,7 -18,6 +18,7 @@@
   #include <linux/bootmem.h>
   #include <linux/sysfs.h>
   #include <linux/slab.h>
+ +#include <linux/mmdebug.h>
   #include <linux/sched/signal.h>
   #include <linux/rmap.h>
   #include <linux/string_helpers.h>
@@@ -637,22 -636,29 +637,22 @@@ EXPORT_SYMBOL_GPL(linear_hugepage_index
    */
   unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
   {
- -      struct hstate *hstate;
- -
- -      if (!is_vm_hugetlb_page(vma))
- -              return PAGE_SIZE;
- -
- -      hstate = hstate_vma(vma);
- -
- -      return 1UL << huge_page_shift(hstate);
+ +      if (vma->vm_ops && vma->vm_ops->pagesize)
+ +              return vma->vm_ops->pagesize(vma);
+ +      return PAGE_SIZE;
   }
   EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
   
   /*
    * Return the page size being used by the MMU to back a VMA. In the majority
    * of cases, the page size used by the kernel matches the MMU size. On
- - * architectures where it differs, an architecture-specific version of this
- - * function is required.
+ + * architectures where it differs, an architecture-specific 'strong'
+ + * version of this symbol is required.
    */
- -#ifndef vma_mmu_pagesize
- -unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+ +__weak unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
   {
         return vma_kernel_pagesize(vma);
   }
- -#endif
   
   /*
    * Flags for MAP_PRIVATE reservations.  These are stored in the bottom
@@@ -1577,7 -1583,7 +1577,7 @@@ static struct page *alloc_surplus_huge_
                 page = NULL;
         } else {
                 h->surplus_huge_pages++;
- -              h->nr_huge_pages_node[page_to_nid(page)]++;
+ +              h->surplus_huge_pages_node[page_to_nid(page)]++;
         }
   
   out_unlock:
@@@ -3146,13 -3152,6 +3146,13 @@@ static int hugetlb_vm_op_split(struct v
         return 0;
   }
   
+ +static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
+ +{
+ +      struct hstate *hstate = hstate_vma(vma);
+ +
+ +      return 1UL << huge_page_shift(hstate);
+ +}
+ +
   /*
    * We cannot handle pagefaults against hugetlb pages at all.  They cause
    * handle_mm_fault() to try to instantiate regular-sized pages in the
@@@ -3170,7 -3169,6 +3170,7 @@@ const struct vm_operations_struct huget
         .open = hugetlb_vm_op_open,
         .close = hugetlb_vm_op_close,
         .split = hugetlb_vm_op_split,
+ +      .pagesize = hugetlb_vm_op_pagesize,
   };
   
   static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
@@@ -3291,7 -3289,7 +3291,7 @@@ int copy_hugetlb_page_range(struct mm_s
                                  * table protection not changing it to point
                                  * to a new page.
                                  *
-                                * See Documentation/vm/mmu_notifier.txt
+                                * See Documentation/vm/mmu_notifier.rst
                                  */
                                 huge_ptep_set_wrprotect(src, addr, src_pte);
                         }
@@@ -4357,7 -4355,7 +4357,7 @@@ unsigned long hugetlb_change_protection
          * No need to call mmu_notifier_invalidate_range() we are downgrading
          * page table protection not changing it to point to a new page.
          *
-        * See Documentation/vm/mmu_notifier.txt
+        * See Documentation/vm/mmu_notifier.rst
          */
         i_mmap_unlock_write(vma->vm_file->f_mapping);
         mmu_notifier_invalidate_range_end(mm, start, end);
@@@ -4376,12 -4374,6 +4376,12 @@@ int hugetlb_reserve_pages(struct inode 
         struct resv_map *resv_map;
         long gbl_reserve;
   
+ +      /* This should never happen */
+ +      if (from > to) {
+ +              VM_WARN(1, "%s called with a negative range\n", __func__);
+ +              return -EINVAL;
+ +      }
+ +
         /*
          * Only apply hugepage reservation if asked. At fault time, an
          * attempt will be made for VM_NORESERVE to allocate a page
diff --combined mm/ksm.c

index e3cbf9a92f3cdd9519f7724f152d816cd22f3a44,0b88698a9014457f34a2465f737f0a12ddb1d203..16451a2bf712a8e62c490721dbbdcbc145bcf301
--- 1/mm/ksm.c
--- 2/mm/ksm.c
+++ b/mm/ksm.c
@@@ -1049,7 -1049,7 +1049,7 @@@ static int write_protect_page(struct vm
                  * No need to notify as we are downgrading page table to read
                  * only not changing it to point to a new page.
                  *
-                * See Documentation/vm/mmu_notifier.txt
+                * See Documentation/vm/mmu_notifier.rst
                  */
                 entry = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
                 /*
@@@ -1131,13 -1131,6 +1131,13 @@@ static int replace_page(struct vm_area_
         } else {
                 newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
                                                vma->vm_page_prot));
+ +              /*
+ +               * We're replacing an anonymous page with a zero page, which is
+ +               * not anonymous. We need to do proper accounting otherwise we
+ +               * will get wrong values in /proc, and a BUG message in dmesg
+ +               * when tearing down the mm.
+ +               */
+ +              dec_mm_counter(mm, MM_ANONPAGES);
         }
   
         flush_cache_page(vma, addr, pte_pfn(*ptep));
@@@ -1145,7 -1138,7 +1145,7 @@@
          * No need to notify as we are replacing a read only page with another
          * read only page with the same content.
          *
-        * See Documentation/vm/mmu_notifier.txt
+        * See Documentation/vm/mmu_notifier.rst
          */
         ptep_clear_flush(vma, addr, ptep);
         set_pte_at_notify(mm, addr, ptep, newpte);
@@@ -1325,10 -1318,10 +1325,10 @@@ bool is_page_sharing_candidate(struct s
         return __is_page_sharing_candidate(stable_node, 0);
   }
   
- -struct page *stable_node_dup(struct stable_node **_stable_node_dup,
- -                           struct stable_node **_stable_node,
- -                           struct rb_root *root,
- -                           bool prune_stale_stable_nodes)
+ +static struct page *stable_node_dup(struct stable_node **_stable_node_dup,
+ +                                  struct stable_node **_stable_node,
+ +                                  struct rb_root *root,
+ +                                  bool prune_stale_stable_nodes)
   {
         struct stable_node *dup, *found = NULL, *stable_node = *_stable_node;
         struct hlist_node *hlist_safe;
@@@ -2089,22 -2082,8 +2089,22 @@@ static void cmp_and_merge_page(struct p
         tree_rmap_item =
                 unstable_tree_search_insert(rmap_item, page, &tree_page);
         if (tree_rmap_item) {
+ +              bool split;
+ +
                 kpage = try_to_merge_two_pages(rmap_item, page,
                                                 tree_rmap_item, tree_page);
+ +              /*
+ +               * If both pages we tried to merge belong to the same compound
+ +               * page, then we actually ended up increasing the reference
+ +               * count of the same compound page twice, and split_huge_page
+ +               * failed.
+ +               * Here we set a flag if that happened, and we use it later to
+ +               * try split_huge_page again. Since we call put_page right
+ +               * afterwards, the reference count will be correct and
+ +               * split_huge_page should succeed.
+ +               */
+ +              split = PageTransCompound(page)
+ +                      && compound_head(page) == compound_head(tree_page);
                 put_page(tree_page);
                 if (kpage) {
                         /*
@@@ -2131,20 -2110,6 +2131,20 @@@
                                 break_cow(tree_rmap_item);
                                 break_cow(rmap_item);
                         }
+ +              } else if (split) {
+ +                      /*
+ +                       * We are here if we tried to merge two pages and
+ +                       * failed because they both belonged to the same
+ +                       * compound page. We will split the page now, but no
+ +                       * merging will take place.
+ +                       * We do not want to add the cost of a full lock; if
+ +                       * the page is locked, it is better to skip it and
+ +                       * perhaps try again later.
+ +                       */
+ +                      if (!trylock_page(page))
+ +                              return;
+ +                      split_huge_page(page);
+ +                      unlock_page(page);
                 }
         }
   }
@@@ -2404,10 -2369,6 +2404,10 @@@ int ksm_madvise(struct vm_area_struct *
                 if (*vm_flags & VM_SAO)
                         return 0;
   #endif
+ +#ifdef VM_SPARC_ADI
+ +              if (*vm_flags & VM_SPARC_ADI)
+ +                      return 0;
+ +#endif
   
                 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
                         err = __ksm_enter(mm);
diff --combined mm/mmap.c

index 188f195883b90b40d8371e8e04ff5acd4d9d1526,39fc51d1639c7ff2e7f6e2775a42773c9ce33891..919cdefacf1533a0f0b62a126a578d22eb0af087
--- 1/mm/mmap.c
--- 2/mm/mmap.c
+++ b/mm/mmap.c
@@@ -1342,10 -1342,6 +1342,10 @@@ unsigned long do_mmap(struct file *file
                 if (!(file && path_noexec(&file->f_path)))
                         prot |= PROT_EXEC;
   
+ +      /* force arch specific MAP_FIXED handling in get_unmapped_area */
+ +      if (flags & MAP_FIXED_NOREPLACE)
+ +              flags |= MAP_FIXED;
+ +
         if (!(flags & MAP_FIXED))
                 addr = round_hint_to_min(addr);
   
@@@ -1369,13 -1365,6 +1369,13 @@@
         if (offset_in_page(addr))
                 return addr;
   
+ +      if (flags & MAP_FIXED_NOREPLACE) {
+ +              struct vm_area_struct *vma = find_vma(mm, addr);
+ +
+ +              if (vma && vma->vm_start <= addr)
+ +                      return -EEXIST;
+ +      }
+ +
         if (prot == PROT_EXEC) {
                 pkey = execute_only_pkey(mm);
                 if (pkey < 0)
@@@ -1499,9 -1488,9 +1499,9 @@@
         return addr;
   }
   
- -SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
- -              unsigned long, prot, unsigned long, flags,
- -              unsigned long, fd, unsigned long, pgoff)
+ +unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
+ +                            unsigned long prot, unsigned long flags,
+ +                            unsigned long fd, unsigned long pgoff)
   {
         struct file *file = NULL;
         unsigned long retval;
@@@ -1548,13 -1537,6 +1548,13 @@@ out_fput
         return retval;
   }
   
+ +SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
+ +              unsigned long, prot, unsigned long, flags,
+ +              unsigned long, fd, unsigned long, pgoff)
+ +{
+ +      return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
+ +}
+ +
   #ifdef __ARCH_WANT_SYS_OLD_MMAP
   struct mmap_arg_struct {
         unsigned long addr;
@@@ -1574,8 -1556,8 +1574,8 @@@ SYSCALL_DEFINE1(old_mmap, struct mmap_a
         if (offset_in_page(a.offset))
                 return -EINVAL;
   
- -      return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
- -                            a.offset >> PAGE_SHIFT);
+ +      return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+ +                             a.offset >> PAGE_SHIFT);
   }
   #endif /* __ARCH_WANT_SYS_OLD_MMAP */
   
@@@ -2787,7 -2769,7 +2787,7 @@@ SYSCALL_DEFINE5(remap_file_pages, unsig
         unsigned long ret = -EINVAL;
         struct file *file;
   
-       pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
+       pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
                      current->comm, current->pid);
   
         if (prot)
@@@ -3202,15 -3184,13 +3202,15 @@@ bool may_expand_vm(struct mm_struct *mm
                 if (rlimit(RLIMIT_DATA) == 0 &&
                     mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
                         return true;
- -              if (!ignore_rlimit_data) {
- -                      pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits or use boot option ignore_rlimit_data.\n",
- -                                   current->comm, current->pid,
- -                                   (mm->data_vm + npages) << PAGE_SHIFT,
- -                                   rlimit(RLIMIT_DATA));
+ +
+ +              pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
+ +                           current->comm, current->pid,
+ +                           (mm->data_vm + npages) << PAGE_SHIFT,
+ +                           rlimit(RLIMIT_DATA),
+ +                           ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
+ +
+ +              if (!ignore_rlimit_data)
                         return false;
- -              }
         }
   
         return true;
diff --combined mm/rmap.c

index f0dd4e4565bc6bc9117fe8ec9b8a2371d7f7f8b4,854b703fbe2a5504f8d69c72e27be8612c664e78..0562133266512297d9303c5de132e50b9e6e77e0
--- 1/mm/rmap.c
--- 2/mm/rmap.c
+++ b/mm/rmap.c
@@@ -32,11 -32,11 +32,11 @@@
    *                 mmlist_lock (in mmput, drain_mmlist and others)
    *                 mapping->private_lock (in __set_page_dirty_buffers)
    *                   mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
- - *                     mapping->tree_lock (widely used)
+ + *                     i_pages lock (widely used)
    *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
    *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
    *                   sb_lock (within inode_lock in fs/fs-writeback.c)
- - *                   mapping->tree_lock (widely used, in set_page_dirty,
+ + *                   i_pages lock (widely used, in set_page_dirty,
    *                             in arch-dependent flush_dcache_mmap_lock,
    *                             within bdi.wb->list_lock in __sync_single_inode)
    *
@@@ -942,7 -942,7 +942,7 @@@ static bool page_mkclean_one(struct pag
                  * downgrading page table protection not changing it to point
                  * to a new page.
                  *
-                * See Documentation/vm/mmu_notifier.txt
+                * See Documentation/vm/mmu_notifier.rst
                  */
                 if (ret)
                         (*cleaned)++;
@@@ -1171,7 -1171,6 +1171,7 @@@ void page_add_new_anon_rmap(struct pag
   /**
    * page_add_file_rmap - add pte mapping to a file page
    * @page: the page to add the mapping to
+ + * @compound: charge the page as compound or small page
    *
    * The caller needs to hold the pte lock.
    */
@@@ -1498,14 -1497,6 +1498,14 @@@ static bool try_to_unmap_one(struct pag
                                 (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
                         swp_entry_t entry;
                         pte_t swp_pte;
+ +
+ +                      if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+ +                              set_pte_at(mm, address, pvmw.pte, pteval);
+ +                              ret = false;
+ +                              page_vma_mapped_walk_done(&pvmw);
+ +                              break;
+ +                      }
+ +
                         /*
                          * Store the pfn of the page in a special migration
                          * pte. do_swap_page() will wait until the migration
@@@ -1565,12 -1556,6 +1565,12 @@@
                                 page_vma_mapped_walk_done(&pvmw);
                                 break;
                         }
+ +                      if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+ +                              set_pte_at(mm, address, pvmw.pte, pteval);
+ +                              ret = false;
+ +                              page_vma_mapped_walk_done(&pvmw);
+ +                              break;
+ +                      }
                         if (list_empty(&mm->mmlist)) {
                                 spin_lock(&mmlist_lock);
                                 if (list_empty(&mm->mmlist))
@@@ -1602,7 -1587,7 +1602,7 @@@
                          * point at new page while a device still is using this
                          * page.
                          *
-                        * See Documentation/vm/mmu_notifier.txt
+                        * See Documentation/vm/mmu_notifier.rst
                          */
                         dec_mm_counter(mm, mm_counter_file(page));
                 }
@@@ -1612,7 -1597,7 +1612,7 @@@ discard
                  * done above for all cases requiring it to happen under page
                  * table lock before mmu_notifier_invalidate_range_end()
                  *
-                * See Documentation/vm/mmu_notifier.txt
+                * See Documentation/vm/mmu_notifier.rst
                  */
                 page_remove_rmap(subpage, PageHuge(page));
                 put_page(page);
diff --combined mm/util.c

index 45fc3169e7b0fdb2677db32083615c12bd5f4b7c,e857c80c6f4a5340c42ea5152d7bbc92ee7cb8e8..c2d0a7cdb1898dfd1c94efb150cd1c2c9abe18bd
--- 1/mm/util.c
--- 2/mm/util.c
+++ b/mm/util.c
@@@ -287,7 -287,7 +287,7 @@@ int vma_is_stack_for_current(struct vm_
   }
   
   #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
- -void arch_pick_mmap_layout(struct mm_struct *mm)
+ +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
   {
         mm->mmap_base = TASK_UNMAPPED_BASE;
         mm->get_unmapped_area = arch_get_unmapped_area;
@@@ -297,10 -297,8 +297,10 @@@
   /*
    * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
    * back to the regular GUP.
- - * If the architecture not support this function, simply return with no
- - * page pinned
+ + * Note a difference with get_user_pages_fast: this always returns the
+ + * number of pages pinned, 0 if no pages were pinned.
+ + * If the architecture does not support this function, simply return with no
+ + * pages pinned.
    */
   int __weak __get_user_pages_fast(unsigned long start,
                                  int nr_pages, int write, struct page **pages)
@@@ -517,16 -515,6 +517,16 @@@ struct address_space *page_mapping(stru
   }
   EXPORT_SYMBOL(page_mapping);
   
+ +/*
+ + * For file cache pages, return the address_space, otherwise return NULL
+ + */
+ +struct address_space *page_mapping_file(struct page *page)
+ +{
+ +      if (unlikely(PageSwapCache(page)))
+ +              return NULL;
+ +      return page_mapping(page);
+ +}
+ +
   /* Slow path of page_mapcount() for compound pages */
   int __page_mapcount(struct page *page)
   {
@@@ -621,7 -609,7 +621,7 @@@ EXPORT_SYMBOL_GPL(vm_memory_committed)
    * succeed and -ENOMEM implies there is not.
    *
    * We currently support three overcommit policies, which are set via the
-  * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
+  * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting.rst
    *
    * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
    * Additional code 2002 Jul 20 by Robert Love.
@@@ -669,13 -657,6 +669,13 @@@ int __vm_enough_memory(struct mm_struc
                  */
                 free += global_node_page_state(NR_SLAB_RECLAIMABLE);
   
+ +              /*
+ +               * Part of the kernel memory, which can be released
+ +               * under memory pressure.
+ +               */
+ +              free += global_node_page_state(
+ +                      NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT;
+ +
                 /*
                  * Leave reserved pages. The pages are not for anonymous pages.
                  */
author	Jonathan Corbet <corbet@lwn.net>
	Mon, 16 Apr 2018 20:25:08 +0000 (14:25 -0600)
committer	Jonathan Corbet <corbet@lwn.net>
	Mon, 16 Apr 2018 20:25:08 +0000 (14:25 -0600)
		1	2
Documentation/admin-guide/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
Documentation/sysctl/vm.txt	patch \|	diff1 \|	diff2 \|	blob \| history
Documentation/vm/hmm.rst	patch \|	\|	diff2 \|	blob \| history
Documentation/vm/page_migration.rst	patch \|	\|	diff2 \|	blob \| history
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/alpha/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/mips/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
fs/dax.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/task_mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/hmm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/swap.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
mm/hmm.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/huge_memory.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/hugetlb.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/ksm.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/mmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/rmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/util.c	patch \|	diff1 \|	diff2 \|	blob \| history