Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 00:22:07 +0000 (17:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 00:38:16 +0000 (17:38 -0700)
Pull libnvdimm updates from Dan Williams:

 - Replace pcommit with ADR / directed-flushing.

   The pcommit instruction, which has not shipped on any product, is
   deprecated.  Instead, the requirement is that platforms implement
   either ADR, or provide one or more flush addresses per nvdimm.

   ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers
   to the memory controller on a power-fail event.

   Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware
   Interface Table (NFIT) sub-structure: "Flush Hint Address Structure".
   A flush hint is an mmio address that when written and fenced assures
   that all previous posted writes targeting a given dimm have been
   flushed to media.

 - On-demand ARS (address range scrub).

   Linux uses the results of the ACPI ARS commands to track bad blocks
   in pmem devices.  When latent errors are detected we re-scrub the
   media to refresh the bad block list, userspace can also request a
   re-scrub at any time.

 - Support for the Microsoft DSM (device specific method) command
   format.

 - Support for EDK2/OVMF virtual disk device memory ranges.

 - Various fixes and cleanups across the subsystem.

* tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (41 commits)
  libnvdimm-btt: Delete an unnecessary check before the function call "__nd_device_register"
  nfit: do an ARS scrub on hitting a latent media error
  nfit: move to nfit/ sub-directory
  nfit, libnvdimm: allow an ARS scrub to be triggered on demand
  libnvdimm: register nvdimm_bus devices with an nd_bus driver
  pmem: clarify a debug print in pmem_clear_poison
  x86/insn: remove pcommit
  Revert "KVM: x86: add pcommit support"
  nfit, tools/testing/nvdimm/: unify shutdown paths
  libnvdimm: move ->module to struct nvdimm_bus_descriptor
  nfit: cleanup acpi_nfit_init calling convention
  nfit: fix _FIT evaluation memory leak + use after free
  tools/testing/nvdimm: add manufacturing_{date|location} dimm properties
  tools/testing/nvdimm: add virtual ramdisk range
  acpi, nfit: treat virtual ramdisk SPA as pmem region
  pmem: kill __pmem address space
  pmem: kill wmb_pmem()
  libnvdimm, pmem: use nvdimm_flush() for namespace I/O writes
  fs/dax: remove wmb_pmem()
  libnvdimm, pmem: flush posted-write queues on shutdown
  ...

67 files changed:
Documentation/filesystems/Locking
Documentation/nvdimm/btt.txt
arch/powerpc/sysdev/axonram.c
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/pmem.h
arch/x86/include/asm/special_insns.h
arch/x86/include/asm/vmx.h
arch/x86/include/uapi/asm/vmx.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/vmx.c
arch/x86/lib/x86-opcode-map.txt
drivers/acpi/Kconfig
drivers/acpi/Makefile
drivers/acpi/nfit.c [deleted file]
drivers/acpi/nfit.h [deleted file]
drivers/acpi/nfit/Kconfig [new file with mode: 0644]
drivers/acpi/nfit/Makefile [new file with mode: 0644]
drivers/acpi/nfit/core.c [new file with mode: 0644]
drivers/acpi/nfit/mce.c [new file with mode: 0644]
drivers/acpi/nfit/nfit.h [new file with mode: 0644]
drivers/block/brd.c
drivers/dax/dax.c
drivers/dax/pmem.c
drivers/md/dm-linear.c
drivers/md/dm-snap.c
drivers/md/dm-stripe.c
drivers/md/dm-target.c
drivers/md/dm.c
drivers/nvdimm/Kconfig
drivers/nvdimm/blk.c
drivers/nvdimm/btt_devs.c
drivers/nvdimm/bus.c
drivers/nvdimm/claim.c
drivers/nvdimm/core.c
drivers/nvdimm/dimm_devs.c
drivers/nvdimm/e820.c
drivers/nvdimm/nd-core.h
drivers/nvdimm/nd.h
drivers/nvdimm/pmem.c
drivers/nvdimm/pmem.h [new file with mode: 0644]
drivers/nvdimm/region.c
drivers/nvdimm/region_devs.c
drivers/s390/block/dcssblk.c
fs/dax.c
include/linux/blkdev.h
include/linux/compiler.h
include/linux/device-mapper.h
include/linux/libnvdimm.h
include/linux/nd.h
include/linux/pfn_t.h
include/linux/pmem.h
include/uapi/linux/ndctl.h
kernel/memremap.c
scripts/checkpatch.pl
tools/objtool/arch/x86/insn/x86-opcode-map.txt
tools/perf/arch/x86/tests/insn-x86-dat-32.c
tools/perf/arch/x86/tests/insn-x86-dat-64.c
tools/perf/arch/x86/tests/insn-x86-dat-src.c
tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
tools/testing/nvdimm/Kbuild
tools/testing/nvdimm/config_check.c
tools/testing/nvdimm/pmem-dax.c [new file with mode: 0644]
tools/testing/nvdimm/test/Kbuild
tools/testing/nvdimm/test/iomap.c
tools/testing/nvdimm/test/nfit.c
tools/testing/nvdimm/test/nfit_test.h

index ef46d3ac5774445169098e250980fe4a3f21305b..1b3c39a7de627f572ece87d2a1c6ddd2cff27144 100644 (file)
@@ -395,7 +395,7 @@ prototypes:
        int (*release) (struct gendisk *, fmode_t);
        int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-       int (*direct_access) (struct block_device *, sector_t, void __pmem **,
+       int (*direct_access) (struct block_device *, sector_t, void **,
                                unsigned long *);
        int (*media_changed) (struct gendisk *);
        void (*unlock_native_capacity) (struct gendisk *);
index b91443f577dcff68aa8961a73d77141ba8a73cd4..e293fb664924faa3494588a27b645ffdf09fdecf 100644 (file)
@@ -256,28 +256,18 @@ If any of these error conditions are encountered, the arena is put into a read
 only state using a flag in the info block.
 
 
-5. In-kernel usage
-==================
+5. Usage
+========
 
-Any block driver that supports byte granularity IO to the storage may register
-with the BTT. It will have to provide the rw_bytes interface in its
-block_device_operations struct:
+The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
+(pmem, or blk mode). The easiest way to set up such a namespace is using the
+'ndctl' utility [1]:
 
-       int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw);
+For example, the ndctl command line to setup a btt with a 4k sector size is:
 
-It may register with the BTT after it adds its own gendisk, using btt_init:
+    ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
 
-       struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize,
-                       u32 lbasize, u8 uuid[], int maxlane);
+See ndctl create-namespace --help for more options.
 
-note that maxlane is the maximum amount of concurrency the driver wishes to
-allow the BTT to use.
-
-The BTT 'disk' appears as a stacked block device that grabs the underlying block
-device in the O_EXCL mode.
-
-When the driver wishes to remove the backing disk, it should similarly call
-btt_fini using the same struct btt* handle that was provided to it by btt_init.
-
-       void btt_fini(struct btt *btt);
+[1]: https://github.com/pmem/ndctl
 
index f9af6461521ab899396ce101d54e5a61af5a9df0..9144204442eb68438544a2e51af57ae13850af44 100644 (file)
@@ -143,12 +143,12 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-                      void __pmem **kaddr, pfn_t *pfn, long size)
+                      void **kaddr, pfn_t *pfn, long size)
 {
        struct axon_ram_bank *bank = device->bd_disk->private_data;
        loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
 
-       *kaddr = (void __pmem __force *) bank->io_addr + offset;
+       *kaddr = (void *) bank->io_addr + offset;
        *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
        return bank->size - offset;
 }
index c64b1e9c5d1a30d916be2d944a3e94134b240fb0..d683993248c8cf485cdc953325acb36a298b73c3 100644 (file)
 #define X86_FEATURE_RDSEED     ( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX                ( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP       ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT    ( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB       ( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
index fbc5e92e1ecc43bbf29e07de629d801a37d95ee5..643eba42d6206aa0fbcb57baa150606269577523 100644 (file)
  * @n: length of the copy in bytes
  *
  * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
+ * a subsequent pmem driver flush operation will drain posted write queues.
  */
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-               size_t n)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 {
-       int unwritten;
+       int rem;
 
        /*
         * We are copying between two kernel buffers, if
@@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
         * fault) we would have already reported a general protection fault
         * before the WARN+BUG.
         */
-       unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
-                       (void __user *) src, n);
-       if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
-                               __func__, dst, src, unwritten))
+       rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
+       if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
+                               __func__, dst, src, rem))
                BUG();
 }
 
-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
-               size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
 {
        if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
-               return memcpy_mcsafe(dst, (void __force *) src, n);
-       memcpy(dst, (void __force *) src, n);
+               return memcpy_mcsafe(dst, src, n);
+       memcpy(dst, src, n);
        return 0;
 }
 
-/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
-       /*
-        * wmb() to 'sfence' all previous writes such that they are
-        * architecturally visible to 'pcommit'.  Note, that we've
-        * already arranged for pmem writes to avoid the cache via
-        * arch_memcpy_to_pmem().
-        */
-       wmb();
-       pcommit_sfence();
-}
-
 /**
  * arch_wb_cache_pmem - write back a cache range with CLWB
  * @vaddr:     virtual start address
  * @size:      number of bytes to write back
  *
  * Write back a cache range using the CLWB (cache line write back)
- * instruction.  This function requires explicit ordering with an
- * arch_wmb_pmem() call.
+ * instruction.
  */
-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
        u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
        unsigned long clflush_mask = x86_clflush_size - 1;
-       void *vaddr = (void __force *)addr;
-       void *vend = vaddr + size;
+       void *vend = addr + size;
        void *p;
 
-       for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+       for (p = (void *)((unsigned long)addr & ~clflush_mask);
             p < vend; p += x86_clflush_size)
                clwb(p);
 }
@@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
  * @i:         iterator with source data
  *
  * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
  */
-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
                struct iov_iter *i)
 {
-       void *vaddr = (void __force *)addr;
        size_t len;
 
        /* TODO: skip the write-back by always using non-temporal stores */
-       len = copy_from_iter_nocache(vaddr, bytes, i);
+       len = copy_from_iter_nocache(addr, bytes, i);
 
        if (__iter_needs_pmem_wb(i))
                arch_wb_cache_pmem(addr, bytes);
@@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
  * @size:      number of bytes to zero
  *
  * Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
  */
-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
 {
-       void *vaddr = (void __force *)addr;
-
-       memset(vaddr, 0, size);
+       memset(addr, 0, size);
        arch_wb_cache_pmem(addr, size);
 }
 
-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
 {
-       clflush_cache_range((void __force *) addr, size);
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-       /*
-        * We require that wmb() be an 'sfence', that is only guaranteed on
-        * 64-bit builds
-        */
-       return static_cpu_has(X86_FEATURE_PCOMMIT);
+       clflush_cache_range(addr, size);
 }
 #endif /* CONFIG_ARCH_HAS_PMEM_API */
 #endif /* __ASM_X86_PMEM_H__ */
index d96d0437776569f5c9c0e6f28d125dbc5671d037..587d7914ea4b56a539d9877b1526139285165419 100644 (file)
@@ -253,52 +253,6 @@ static inline void clwb(volatile void *__p)
                : [pax] "a" (p));
 }
 
-/**
- * pcommit_sfence() - persistent commit and fence
- *
- * The PCOMMIT instruction ensures that data that has been flushed from the
- * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
- * memory and is durable on the DIMM.  The primary use case for this is
- * persistent memory.
- *
- * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
- * with appropriate fencing.
- *
- * Example:
- * void flush_and_commit_buffer(void *vaddr, unsigned int size)
- * {
- *         unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
- *         void *vend = vaddr + size;
- *         void *p;
- *
- *         for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
- *              p < vend; p += boot_cpu_data.x86_clflush_size)
- *                 clwb(p);
- *
- *         // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
- *         // MFENCE via mb() also works
- *         wmb();
- *
- *         // PCOMMIT and the required SFENCE for ordering
- *         pcommit_sfence();
- * }
- *
- * After this function completes the data pointed to by 'vaddr' has been
- * accepted to memory and will be durable if the 'vaddr' points to persistent
- * memory.
- *
- * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
- * things we include both the PCOMMIT and the required SFENCE in the
- * alternatives generated by pcommit_sfence().
- */
-static inline void pcommit_sfence(void)
-{
-       alternative(ASM_NOP7,
-                   ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
-                   "sfence",
-                   X86_FEATURE_PCOMMIT);
-}
-
 #define nop() asm volatile ("nop")
 
 
index 14c63c7e8337a3d2bdfc98a5b4c14d2892de5580..a002b07a7099c1f5585e1d64989177fe8509a45f 100644 (file)
@@ -72,7 +72,6 @@
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
 #define SECONDARY_EXEC_ENABLE_PML               0x00020000
 #define SECONDARY_EXEC_XSAVES                  0x00100000
-#define SECONDARY_EXEC_PCOMMIT                 0x00200000
 #define SECONDARY_EXEC_TSC_SCALING              0x02000000
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
index 5b15d94a33f818d04ee7ae2a0f5685125bd89a40..37fee272618f1de348a7d5961f1792debba72991 100644 (file)
@@ -78,7 +78,6 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
-#define EXIT_REASON_PCOMMIT             65
 
 #define VMX_EXIT_REASONS \
        { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
        { EXIT_REASON_INVVPID,               "INVVPID" }, \
        { EXIT_REASON_INVPCID,               "INVPCID" }, \
        { EXIT_REASON_XSAVES,                "XSAVES" }, \
-       { EXIT_REASON_XRSTORS,               "XRSTORS" }, \
-       { EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
index 7597b42a8a883c668ddbcf28ba03cde86cb30924..64356536449782e05dd1fd790bdb44deb8eafd6d 100644 (file)
@@ -366,7 +366,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
                F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
                F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
-               F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
+               F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
 
        /* cpuid 0xD.1.eax */
        const u32 kvm_cpuid_D_1_eax_x86_features =
index e17a74b1d8525708a051c18f93854af5667805be..35058c2c0eeabe0fd9dedd45999478d5cb61fabb 100644 (file)
@@ -144,14 +144,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
        return best && (best->ebx & bit(X86_FEATURE_RTM));
 }
 
-static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid_entry2 *best;
-
-       best = kvm_find_cpuid_entry(vcpu, 7, 0);
-       return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
-}
-
 static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
index 7758680db20b78abbf35f1b263c5f9d8da33b5e1..df07a0a4611ffa81b059229aaa08d04a2981bc56 100644 (file)
@@ -2707,8 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                SECONDARY_EXEC_APIC_REGISTER_VIRT |
                SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                SECONDARY_EXEC_WBINVD_EXITING |
-               SECONDARY_EXEC_XSAVES |
-               SECONDARY_EXEC_PCOMMIT;
+               SECONDARY_EXEC_XSAVES;
 
        if (enable_ept) {
                /* nested EPT: emulate EPT also to L1 */
@@ -3270,7 +3269,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                        SECONDARY_EXEC_SHADOW_VMCS |
                        SECONDARY_EXEC_XSAVES |
                        SECONDARY_EXEC_ENABLE_PML |
-                       SECONDARY_EXEC_PCOMMIT |
                        SECONDARY_EXEC_TSC_SCALING;
                if (adjust_vmx_controls(min2, opt2,
                                        MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -4858,9 +4856,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
        if (!enable_pml)
                exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
-       /* Currently, we allow L1 guest to directly run pcommit instruction. */
-       exec_control &= ~SECONDARY_EXEC_PCOMMIT;
-
        return exec_control;
 }
 
@@ -4904,9 +4899,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
 
-       if (cpu_has_secondary_exec_ctrls())
+       if (cpu_has_secondary_exec_ctrls()) {
                vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
                                vmx_secondary_exec_control(vmx));
+       }
 
        if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
                vmcs_write64(EOI_EXIT_BITMAP0, 0);
@@ -7564,13 +7560,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-static int handle_pcommit(struct kvm_vcpu *vcpu)
-{
-       /* we never catch pcommit instruct for L1 guest. */
-       WARN_ON(1);
-       return 1;
-}
-
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -7621,7 +7610,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_XSAVES]                  = handle_xsaves,
        [EXIT_REASON_XRSTORS]                 = handle_xrstors,
        [EXIT_REASON_PML_FULL]                = handle_pml_full,
-       [EXIT_REASON_PCOMMIT]                 = handle_pcommit,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7930,8 +7918,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 * the XSS exit bitmap in vmcs12.
                 */
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
-       case EXIT_REASON_PCOMMIT:
-               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
        default:
                return true;
        }
@@ -9094,15 +9080,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 
        if (cpu_has_secondary_exec_ctrls())
                vmcs_set_secondary_exec_control(secondary_exec_ctl);
-
-       if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
-               if (guest_cpuid_has_pcommit(vcpu))
-                       vmx->nested.nested_vmx_secondary_ctls_high |=
-                               SECONDARY_EXEC_PCOMMIT;
-               else
-                       vmx->nested.nested_vmx_secondary_ctls_high &=
-                               ~SECONDARY_EXEC_PCOMMIT;
-       }
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9715,8 +9692,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                                  SECONDARY_EXEC_RDTSCP |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-                                 SECONDARY_EXEC_APIC_REGISTER_VIRT |
-                                 SECONDARY_EXEC_PCOMMIT);
+                                 SECONDARY_EXEC_APIC_REGISTER_VIRT);
                if (nested_cpu_has(vmcs12,
                                CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
                        exec_control |= vmcs12->secondary_vm_exec_control;
index ec378cd7b71ee4e067d0a4a9beb59413def3296c..767be7c760340bd33b7e4a18b9a8f3a71d9db33e 100644 (file)
@@ -1012,7 +1012,7 @@ GrpTable: Grp15
 4: XSAVE
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
 EndTable
 
 GrpTable: Grp16
index acad70a0bb0dcccdc6d263b5767535554b668c7c..aebd944bdaa125e6c0cd52dc475fe366c0feb4ba 100644 (file)
@@ -454,32 +454,7 @@ config ACPI_REDUCED_HARDWARE_ONLY
 
          If you are unsure what to do, do not enable this option.
 
-config ACPI_NFIT
-       tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
-       depends on PHYS_ADDR_T_64BIT
-       depends on BLK_DEV
-       depends on ARCH_HAS_MMIO_FLUSH
-       select LIBNVDIMM
-       help
-         Infrastructure to probe ACPI 6 compliant platforms for
-         NVDIMMs (NFIT) and register a libnvdimm device tree.  In
-         addition to storage devices this also enables libnvdimm to pass
-         ACPI._DSM messages for platform/dimm configuration.
-
-         To compile this driver as a module, choose M here:
-         the module will be called nfit.
-
-config ACPI_NFIT_DEBUG
-       bool "NFIT DSM debug"
-       depends on ACPI_NFIT
-       depends on DYNAMIC_DEBUG
-       default n
-       help
-         Enabling this option causes the nfit driver to dump the
-         input and output buffers of _DSM operations on the ACPI0012
-         device and its children.  This can be very verbose, so leave
-         it disabled unless you are debugging a hardware / firmware
-         issue.
+source "drivers/acpi/nfit/Kconfig"
 
 source "drivers/acpi/apei/Kconfig"
 source "drivers/acpi/dptf/Kconfig"
index 88f54f03e3d228a0bfabb674cc1a72b9a5177151..35a6ccbe302580ecf713d5ec623168dc7ab48a0d 100644 (file)
@@ -69,7 +69,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT)   += pci_slot.o
 obj-$(CONFIG_ACPI_PROCESSOR)   += processor.o
 obj-$(CONFIG_ACPI)             += container.o
 obj-$(CONFIG_ACPI_THERMAL)     += thermal.o
-obj-$(CONFIG_ACPI_NFIT)                += nfit.o
+obj-$(CONFIG_ACPI_NFIT)                += nfit/
 obj-$(CONFIG_ACPI)             += acpi_memhotplug.o
 obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
 obj-$(CONFIG_ACPI_BATTERY)     += battery.o
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
deleted file mode 100644 (file)
index 1f0e060..0000000
+++ /dev/null
@@ -1,2713 +0,0 @@
-/*
- * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#include <linux/list_sort.h>
-#include <linux/libnvdimm.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/ndctl.h>
-#include <linux/delay.h>
-#include <linux/list.h>
-#include <linux/acpi.h>
-#include <linux/sort.h>
-#include <linux/pmem.h>
-#include <linux/io.h>
-#include <linux/nd.h>
-#include <asm/cacheflush.h>
-#include "nfit.h"
-
-/*
- * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
- * irrelevant.
- */
-#include <linux/io-64-nonatomic-hi-lo.h>
-
-static bool force_enable_dimms;
-module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
-
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
-/* after three payloads of overflow, it's dead jim */
-static unsigned int scrub_overflow_abort = 3;
-module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_overflow_abort,
-               "Number of times we overflow ARS results before abort");
-
-static bool disable_vendor_specific;
-module_param(disable_vendor_specific, bool, S_IRUGO);
-MODULE_PARM_DESC(disable_vendor_specific,
-               "Limit commands to the publicly specified set\n");
-
-static struct workqueue_struct *nfit_wq;
-
-struct nfit_table_prev {
-       struct list_head spas;
-       struct list_head memdevs;
-       struct list_head dcrs;
-       struct list_head bdws;
-       struct list_head idts;
-       struct list_head flushes;
-};
-
-static u8 nfit_uuid[NFIT_UUID_MAX][16];
-
-const u8 *to_nfit_uuid(enum nfit_uuids id)
-{
-       return nfit_uuid[id];
-}
-EXPORT_SYMBOL(to_nfit_uuid);
-
-static struct acpi_nfit_desc *to_acpi_nfit_desc(
-               struct nvdimm_bus_descriptor *nd_desc)
-{
-       return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
-}
-
-static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-
-       /*
-        * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
-        * acpi_device.
-        */
-       if (!nd_desc->provider_name
-                       || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
-               return NULL;
-
-       return to_acpi_device(acpi_desc->dev);
-}
-
-static int xlat_status(void *buf, unsigned int cmd)
-{
-       struct nd_cmd_clear_error *clear_err;
-       struct nd_cmd_ars_status *ars_status;
-       struct nd_cmd_ars_start *ars_start;
-       struct nd_cmd_ars_cap *ars_cap;
-       u16 flags;
-
-       switch (cmd) {
-       case ND_CMD_ARS_CAP:
-               ars_cap = buf;
-               if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
-                       return -ENOTTY;
-
-               /* Command failed */
-               if (ars_cap->status & 0xffff)
-                       return -EIO;
-
-               /* No supported scan types for this range */
-               flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
-               if ((ars_cap->status >> 16 & flags) == 0)
-                       return -ENOTTY;
-               break;
-       case ND_CMD_ARS_START:
-               ars_start = buf;
-               /* ARS is in progress */
-               if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
-                       return -EBUSY;
-
-               /* Command failed */
-               if (ars_start->status & 0xffff)
-                       return -EIO;
-               break;
-       case ND_CMD_ARS_STATUS:
-               ars_status = buf;
-               /* Command failed */
-               if (ars_status->status & 0xffff)
-                       return -EIO;
-               /* Check extended status (Upper two bytes) */
-               if (ars_status->status == NFIT_ARS_STATUS_DONE)
-                       return 0;
-
-               /* ARS is in progress */
-               if (ars_status->status == NFIT_ARS_STATUS_BUSY)
-                       return -EBUSY;
-
-               /* No ARS performed for the current boot */
-               if (ars_status->status == NFIT_ARS_STATUS_NONE)
-                       return -EAGAIN;
-
-               /*
-                * ARS interrupted, either we overflowed or some other
-                * agent wants the scan to stop.  If we didn't overflow
-                * then just continue with the returned results.
-                */
-               if (ars_status->status == NFIT_ARS_STATUS_INTR) {
-                       if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
-                               return -ENOSPC;
-                       return 0;
-               }
-
-               /* Unknown status */
-               if (ars_status->status >> 16)
-                       return -EIO;
-               break;
-       case ND_CMD_CLEAR_ERROR:
-               clear_err = buf;
-               if (clear_err->status & 0xffff)
-                       return -EIO;
-               if (!clear_err->cleared)
-                       return -EIO;
-               if (clear_err->length > clear_err->cleared)
-                       return clear_err->cleared;
-               break;
-       default:
-               break;
-       }
-
-       return 0;
-}
-
-static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
-               struct nvdimm *nvdimm, unsigned int cmd, void *buf,
-               unsigned int buf_len, int *cmd_rc)
-{
-       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
-       union acpi_object in_obj, in_buf, *out_obj;
-       const struct nd_cmd_desc *desc = NULL;
-       struct device *dev = acpi_desc->dev;
-       struct nd_cmd_pkg *call_pkg = NULL;
-       const char *cmd_name, *dimm_name;
-       unsigned long cmd_mask, dsm_mask;
-       acpi_handle handle;
-       unsigned int func;
-       const u8 *uuid;
-       u32 offset;
-       int rc, i;
-
-       func = cmd;
-       if (cmd == ND_CMD_CALL) {
-               call_pkg = buf;
-               func = call_pkg->nd_command;
-       }
-
-       if (nvdimm) {
-               struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-               struct acpi_device *adev = nfit_mem->adev;
-
-               if (!adev)
-                       return -ENOTTY;
-               if (call_pkg && nfit_mem->family != call_pkg->nd_family)
-                       return -ENOTTY;
-
-               dimm_name = nvdimm_name(nvdimm);
-               cmd_name = nvdimm_cmd_name(cmd);
-               cmd_mask = nvdimm_cmd_mask(nvdimm);
-               dsm_mask = nfit_mem->dsm_mask;
-               desc = nd_cmd_dimm_desc(cmd);
-               uuid = to_nfit_uuid(nfit_mem->family);
-               handle = adev->handle;
-       } else {
-               struct acpi_device *adev = to_acpi_dev(acpi_desc);
-
-               cmd_name = nvdimm_bus_cmd_name(cmd);
-               cmd_mask = nd_desc->cmd_mask;
-               dsm_mask = cmd_mask;
-               desc = nd_cmd_bus_desc(cmd);
-               uuid = to_nfit_uuid(NFIT_DEV_BUS);
-               handle = adev->handle;
-               dimm_name = "bus";
-       }
-
-       if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
-               return -ENOTTY;
-
-       if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
-               return -ENOTTY;
-
-       in_obj.type = ACPI_TYPE_PACKAGE;
-       in_obj.package.count = 1;
-       in_obj.package.elements = &in_buf;
-       in_buf.type = ACPI_TYPE_BUFFER;
-       in_buf.buffer.pointer = buf;
-       in_buf.buffer.length = 0;
-
-       /* libnvdimm has already validated the input envelope */
-       for (i = 0; i < desc->in_num; i++)
-               in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
-                               i, buf);
-
-       if (call_pkg) {
-               /* skip over package wrapper */
-               in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
-               in_buf.buffer.length = call_pkg->nd_size_in;
-       }
-
-       if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
-               dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
-                               __func__, dimm_name, cmd, func,
-                               in_buf.buffer.length);
-               print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
-                       in_buf.buffer.pointer,
-                       min_t(u32, 256, in_buf.buffer.length), true);
-       }
-
-       out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
-       if (!out_obj) {
-               dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
-                               cmd_name);
-               return -EINVAL;
-       }
-
-       if (call_pkg) {
-               call_pkg->nd_fw_size = out_obj->buffer.length;
-               memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
-                       out_obj->buffer.pointer,
-                       min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
-
-               ACPI_FREE(out_obj);
-               /*
-                * Need to support FW function w/o known size in advance.
-                * Caller can determine required size based upon nd_fw_size.
-                * If we return an error (like elsewhere) then caller wouldn't
-                * be able to rely upon data returned to make calculation.
-                */
-               return 0;
-       }
-
-       if (out_obj->package.type != ACPI_TYPE_BUFFER) {
-               dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
-                               __func__, dimm_name, cmd_name, out_obj->type);
-               rc = -EINVAL;
-               goto out;
-       }
-
-       if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
-               dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
-                               dimm_name, cmd_name, out_obj->buffer.length);
-               print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
-                               4, out_obj->buffer.pointer, min_t(u32, 128,
-                                       out_obj->buffer.length), true);
-       }
-
-       for (i = 0, offset = 0; i < desc->out_num; i++) {
-               u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
-                               (u32 *) out_obj->buffer.pointer);
-
-               if (offset + out_size > out_obj->buffer.length) {
-                       dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
-                                       __func__, dimm_name, cmd_name, i);
-                       break;
-               }
-
-               if (in_buf.buffer.length + offset + out_size > buf_len) {
-                       dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
-                                       __func__, dimm_name, cmd_name, i);
-                       rc = -ENXIO;
-                       goto out;
-               }
-               memcpy(buf + in_buf.buffer.length + offset,
-                               out_obj->buffer.pointer + offset, out_size);
-               offset += out_size;
-       }
-       if (offset + in_buf.buffer.length < buf_len) {
-               if (i >= 1) {
-                       /*
-                        * status valid, return the number of bytes left
-                        * unfilled in the output buffer
-                        */
-                       rc = buf_len - offset - in_buf.buffer.length;
-                       if (cmd_rc)
-                               *cmd_rc = xlat_status(buf, cmd);
-               } else {
-                       dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
-                                       __func__, dimm_name, cmd_name, buf_len,
-                                       offset);
-                       rc = -ENXIO;
-               }
-       } else {
-               rc = 0;
-               if (cmd_rc)
-                       *cmd_rc = xlat_status(buf, cmd);
-       }
-
- out:
-       ACPI_FREE(out_obj);
-
-       return rc;
-}
-
-static const char *spa_type_name(u16 type)
-{
-       static const char *to_name[] = {
-               [NFIT_SPA_VOLATILE] = "volatile",
-               [NFIT_SPA_PM] = "pmem",
-               [NFIT_SPA_DCR] = "dimm-control-region",
-               [NFIT_SPA_BDW] = "block-data-window",
-               [NFIT_SPA_VDISK] = "volatile-disk",
-               [NFIT_SPA_VCD] = "volatile-cd",
-               [NFIT_SPA_PDISK] = "persistent-disk",
-               [NFIT_SPA_PCD] = "persistent-cd",
-
-       };
-
-       if (type > NFIT_SPA_PCD)
-               return "unknown";
-
-       return to_name[type];
-}
-
-static int nfit_spa_type(struct acpi_nfit_system_address *spa)
-{
-       int i;
-
-       for (i = 0; i < NFIT_UUID_MAX; i++)
-               if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
-                       return i;
-       return -1;
-}
-
-static bool add_spa(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_system_address *spa)
-{
-       size_t length = min_t(size_t, sizeof(*spa), spa->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_spa *nfit_spa;
-
-       list_for_each_entry(nfit_spa, &prev->spas, list) {
-               if (memcmp(nfit_spa->spa, spa, length) == 0) {
-                       list_move_tail(&nfit_spa->list, &acpi_desc->spas);
-                       return true;
-               }
-       }
-
-       nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), GFP_KERNEL);
-       if (!nfit_spa)
-               return false;
-       INIT_LIST_HEAD(&nfit_spa->list);
-       nfit_spa->spa = spa;
-       list_add_tail(&nfit_spa->list, &acpi_desc->spas);
-       dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
-                       spa->range_index,
-                       spa_type_name(nfit_spa_type(spa)));
-       return true;
-}
-
-static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_memory_map *memdev)
-{
-       size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_memdev *nfit_memdev;
-
-       list_for_each_entry(nfit_memdev, &prev->memdevs, list)
-               if (memcmp(nfit_memdev->memdev, memdev, length) == 0) {
-                       list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
-                       return true;
-               }
-
-       nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev), GFP_KERNEL);
-       if (!nfit_memdev)
-               return false;
-       INIT_LIST_HEAD(&nfit_memdev->list);
-       nfit_memdev->memdev = memdev;
-       list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
-       dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
-                       __func__, memdev->device_handle, memdev->range_index,
-                       memdev->region_index);
-       return true;
-}
-
-static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_control_region *dcr)
-{
-       size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_dcr *nfit_dcr;
-
-       list_for_each_entry(nfit_dcr, &prev->dcrs, list)
-               if (memcmp(nfit_dcr->dcr, dcr, length) == 0) {
-                       list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
-                       return true;
-               }
-
-       nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), GFP_KERNEL);
-       if (!nfit_dcr)
-               return false;
-       INIT_LIST_HEAD(&nfit_dcr->list);
-       nfit_dcr->dcr = dcr;
-       list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
-       dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
-                       dcr->region_index, dcr->windows);
-       return true;
-}
-
-static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_data_region *bdw)
-{
-       size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_bdw *nfit_bdw;
-
-       list_for_each_entry(nfit_bdw, &prev->bdws, list)
-               if (memcmp(nfit_bdw->bdw, bdw, length) == 0) {
-                       list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
-                       return true;
-               }
-
-       nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), GFP_KERNEL);
-       if (!nfit_bdw)
-               return false;
-       INIT_LIST_HEAD(&nfit_bdw->list);
-       nfit_bdw->bdw = bdw;
-       list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
-       dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
-                       bdw->region_index, bdw->windows);
-       return true;
-}
-
-static bool add_idt(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_interleave *idt)
-{
-       size_t length = min_t(size_t, sizeof(*idt), idt->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_idt *nfit_idt;
-
-       list_for_each_entry(nfit_idt, &prev->idts, list)
-               if (memcmp(nfit_idt->idt, idt, length) == 0) {
-                       list_move_tail(&nfit_idt->list, &acpi_desc->idts);
-                       return true;
-               }
-
-       nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), GFP_KERNEL);
-       if (!nfit_idt)
-               return false;
-       INIT_LIST_HEAD(&nfit_idt->list);
-       nfit_idt->idt = idt;
-       list_add_tail(&nfit_idt->list, &acpi_desc->idts);
-       dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
-                       idt->interleave_index, idt->line_count);
-       return true;
-}
-
-static bool add_flush(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_flush_address *flush)
-{
-       size_t length = min_t(size_t, sizeof(*flush), flush->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_flush *nfit_flush;
-
-       list_for_each_entry(nfit_flush, &prev->flushes, list)
-               if (memcmp(nfit_flush->flush, flush, length) == 0) {
-                       list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
-                       return true;
-               }
-
-       nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), GFP_KERNEL);
-       if (!nfit_flush)
-               return false;
-       INIT_LIST_HEAD(&nfit_flush->list);
-       nfit_flush->flush = flush;
-       list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
-       dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
-                       flush->device_handle, flush->hint_count);
-       return true;
-}
-
-static void *add_table(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev, void *table, const void *end)
-{
-       struct device *dev = acpi_desc->dev;
-       struct acpi_nfit_header *hdr;
-       void *err = ERR_PTR(-ENOMEM);
-
-       if (table >= end)
-               return NULL;
-
-       hdr = table;
-       if (!hdr->length) {
-               dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
-                       hdr->type);
-               return NULL;
-       }
-
-       switch (hdr->type) {
-       case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
-               if (!add_spa(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_MEMORY_MAP:
-               if (!add_memdev(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_CONTROL_REGION:
-               if (!add_dcr(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_DATA_REGION:
-               if (!add_bdw(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_INTERLEAVE:
-               if (!add_idt(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
-               if (!add_flush(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_SMBIOS:
-               dev_dbg(dev, "%s: smbios\n", __func__);
-               break;
-       default:
-               dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
-               break;
-       }
-
-       return table + hdr->length;
-}
-
-static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_mem *nfit_mem)
-{
-       u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
-       u16 dcr = nfit_mem->dcr->region_index;
-       struct nfit_spa *nfit_spa;
-
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               u16 range_index = nfit_spa->spa->range_index;
-               int type = nfit_spa_type(nfit_spa->spa);
-               struct nfit_memdev *nfit_memdev;
-
-               if (type != NFIT_SPA_BDW)
-                       continue;
-
-               list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-                       if (nfit_memdev->memdev->range_index != range_index)
-                               continue;
-                       if (nfit_memdev->memdev->device_handle != device_handle)
-                               continue;
-                       if (nfit_memdev->memdev->region_index != dcr)
-                               continue;
-
-                       nfit_mem->spa_bdw = nfit_spa->spa;
-                       return;
-               }
-       }
-
-       dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
-                       nfit_mem->spa_dcr->range_index);
-       nfit_mem->bdw = NULL;
-}
-
-static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
-{
-       u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
-       struct nfit_memdev *nfit_memdev;
-       struct nfit_flush *nfit_flush;
-       struct nfit_bdw *nfit_bdw;
-       struct nfit_idt *nfit_idt;
-       u16 idt_idx, range_index;
-
-       list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
-               if (nfit_bdw->bdw->region_index != dcr)
-                       continue;
-               nfit_mem->bdw = nfit_bdw->bdw;
-               break;
-       }
-
-       if (!nfit_mem->bdw)
-               return;
-
-       nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
-
-       if (!nfit_mem->spa_bdw)
-               return;
-
-       range_index = nfit_mem->spa_bdw->range_index;
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-               if (nfit_memdev->memdev->range_index != range_index ||
-                               nfit_memdev->memdev->region_index != dcr)
-                       continue;
-               nfit_mem->memdev_bdw = nfit_memdev->memdev;
-               idt_idx = nfit_memdev->memdev->interleave_index;
-               list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
-                       if (nfit_idt->idt->interleave_index != idt_idx)
-                               continue;
-                       nfit_mem->idt_bdw = nfit_idt->idt;
-                       break;
-               }
-
-               list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
-                       if (nfit_flush->flush->device_handle !=
-                                       nfit_memdev->memdev->device_handle)
-                               continue;
-                       nfit_mem->nfit_flush = nfit_flush;
-                       break;
-               }
-               break;
-       }
-}
-
-static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa)
-{
-       struct nfit_mem *nfit_mem, *found;
-       struct nfit_memdev *nfit_memdev;
-       int type = nfit_spa_type(spa);
-
-       switch (type) {
-       case NFIT_SPA_DCR:
-       case NFIT_SPA_PM:
-               break;
-       default:
-               return 0;
-       }
-
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-               struct nfit_dcr *nfit_dcr;
-               u32 device_handle;
-               u16 dcr;
-
-               if (nfit_memdev->memdev->range_index != spa->range_index)
-                       continue;
-               found = NULL;
-               dcr = nfit_memdev->memdev->region_index;
-               device_handle = nfit_memdev->memdev->device_handle;
-               list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
-                       if (__to_nfit_memdev(nfit_mem)->device_handle
-                                       == device_handle) {
-                               found = nfit_mem;
-                               break;
-                       }
-
-               if (found)
-                       nfit_mem = found;
-               else {
-                       nfit_mem = devm_kzalloc(acpi_desc->dev,
-                                       sizeof(*nfit_mem), GFP_KERNEL);
-                       if (!nfit_mem)
-                               return -ENOMEM;
-                       INIT_LIST_HEAD(&nfit_mem->list);
-                       nfit_mem->acpi_desc = acpi_desc;
-                       list_add(&nfit_mem->list, &acpi_desc->dimms);
-               }
-
-               list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
-                       if (nfit_dcr->dcr->region_index != dcr)
-                               continue;
-                       /*
-                        * Record the control region for the dimm.  For
-                        * the ACPI 6.1 case, where there are separate
-                        * control regions for the pmem vs blk
-                        * interfaces, be sure to record the extended
-                        * blk details.
-                        */
-                       if (!nfit_mem->dcr)
-                               nfit_mem->dcr = nfit_dcr->dcr;
-                       else if (nfit_mem->dcr->windows == 0
-                                       && nfit_dcr->dcr->windows)
-                               nfit_mem->dcr = nfit_dcr->dcr;
-                       break;
-               }
-
-               if (dcr && !nfit_mem->dcr) {
-                       dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
-                                       spa->range_index, dcr);
-                       return -ENODEV;
-               }
-
-               if (type == NFIT_SPA_DCR) {
-                       struct nfit_idt *nfit_idt;
-                       u16 idt_idx;
-
-                       /* multiple dimms may share a SPA when interleaved */
-                       nfit_mem->spa_dcr = spa;
-                       nfit_mem->memdev_dcr = nfit_memdev->memdev;
-                       idt_idx = nfit_memdev->memdev->interleave_index;
-                       list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
-                               if (nfit_idt->idt->interleave_index != idt_idx)
-                                       continue;
-                               nfit_mem->idt_dcr = nfit_idt->idt;
-                               break;
-                       }
-                       nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
-               } else {
-                       /*
-                        * A single dimm may belong to multiple SPA-PM
-                        * ranges, record at least one in addition to
-                        * any SPA-DCR range.
-                        */
-                       nfit_mem->memdev_pmem = nfit_memdev->memdev;
-               }
-       }
-
-       return 0;
-}
-
-static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
-{
-       struct nfit_mem *a = container_of(_a, typeof(*a), list);
-       struct nfit_mem *b = container_of(_b, typeof(*b), list);
-       u32 handleA, handleB;
-
-       handleA = __to_nfit_memdev(a)->device_handle;
-       handleB = __to_nfit_memdev(b)->device_handle;
-       if (handleA < handleB)
-               return -1;
-       else if (handleA > handleB)
-               return 1;
-       return 0;
-}
-
-static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nfit_spa *nfit_spa;
-
-       /*
-        * For each SPA-DCR or SPA-PMEM address range find its
-        * corresponding MEMDEV(s).  From each MEMDEV find the
-        * corresponding DCR.  Then, if we're operating on a SPA-DCR,
-        * try to find a SPA-BDW and a corresponding BDW that references
-        * the DCR.  Throw it all into an nfit_mem object.  Note, that
-        * BDWs are optional.
-        */
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               int rc;
-
-               rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
-               if (rc)
-                       return rc;
-       }
-
-       list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
-
-       return 0;
-}
-
-static ssize_t revision_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
-       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
-       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
-
-       return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
-}
-static DEVICE_ATTR_RO(revision);
-
-static struct attribute *acpi_nfit_attributes[] = {
-       &dev_attr_revision.attr,
-       NULL,
-};
-
-static struct attribute_group acpi_nfit_attribute_group = {
-       .name = "nfit",
-       .attrs = acpi_nfit_attributes,
-};
-
-static const struct attribute_group *acpi_nfit_attribute_groups[] = {
-       &nvdimm_bus_attribute_group,
-       &acpi_nfit_attribute_group,
-       NULL,
-};
-
-static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
-       return __to_nfit_memdev(nfit_mem);
-}
-
-static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
-       return nfit_mem->dcr;
-}
-
-static ssize_t handle_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
-
-       return sprintf(buf, "%#x\n", memdev->device_handle);
-}
-static DEVICE_ATTR_RO(handle);
-
-static ssize_t phys_id_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
-
-       return sprintf(buf, "%#x\n", memdev->physical_id);
-}
-static DEVICE_ATTR_RO(phys_id);
-
-static ssize_t vendor_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
-}
-static DEVICE_ATTR_RO(vendor);
-
-static ssize_t rev_id_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
-}
-static DEVICE_ATTR_RO(rev_id);
-
-static ssize_t device_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
-}
-static DEVICE_ATTR_RO(device);
-
-static ssize_t subsystem_vendor_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
-}
-static DEVICE_ATTR_RO(subsystem_vendor);
-
-static ssize_t subsystem_rev_id_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n",
-                       be16_to_cpu(dcr->subsystem_revision_id));
-}
-static DEVICE_ATTR_RO(subsystem_rev_id);
-
-static ssize_t subsystem_device_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
-}
-static DEVICE_ATTR_RO(subsystem_device);
-
-static int num_nvdimm_formats(struct nvdimm *nvdimm)
-{
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-       int formats = 0;
-
-       if (nfit_mem->memdev_pmem)
-               formats++;
-       if (nfit_mem->memdev_bdw)
-               formats++;
-       return formats;
-}
-
-static ssize_t format_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
-}
-static DEVICE_ATTR_RO(format);
-
-static ssize_t format1_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       u32 handle;
-       ssize_t rc = -ENXIO;
-       struct nfit_mem *nfit_mem;
-       struct nfit_memdev *nfit_memdev;
-       struct acpi_nfit_desc *acpi_desc;
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       nfit_mem = nvdimm_provider_data(nvdimm);
-       acpi_desc = nfit_mem->acpi_desc;
-       handle = to_nfit_memdev(dev)->device_handle;
-
-       /* assumes DIMMs have at most 2 published interface codes */
-       mutex_lock(&acpi_desc->init_mutex);
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-               struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
-               struct nfit_dcr *nfit_dcr;
-
-               if (memdev->device_handle != handle)
-                       continue;
-
-               list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
-                       if (nfit_dcr->dcr->region_index != memdev->region_index)
-                               continue;
-                       if (nfit_dcr->dcr->code == dcr->code)
-                               continue;
-                       rc = sprintf(buf, "0x%04x\n",
-                                       le16_to_cpu(nfit_dcr->dcr->code));
-                       break;
-               }
-               if (rc != ENXIO)
-                       break;
-       }
-       mutex_unlock(&acpi_desc->init_mutex);
-       return rc;
-}
-static DEVICE_ATTR_RO(format1);
-
-static ssize_t formats_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-
-       return sprintf(buf, "%d\n", num_nvdimm_formats(nvdimm));
-}
-static DEVICE_ATTR_RO(formats);
-
-static ssize_t serial_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
-}
-static DEVICE_ATTR_RO(serial);
-
-static ssize_t family_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
-       if (nfit_mem->family < 0)
-               return -ENXIO;
-       return sprintf(buf, "%d\n", nfit_mem->family);
-}
-static DEVICE_ATTR_RO(family);
-
-static ssize_t dsm_mask_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
-       if (nfit_mem->family < 0)
-               return -ENXIO;
-       return sprintf(buf, "%#lx\n", nfit_mem->dsm_mask);
-}
-static DEVICE_ATTR_RO(dsm_mask);
-
-static ssize_t flags_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       u16 flags = to_nfit_memdev(dev)->flags;
-
-       return sprintf(buf, "%s%s%s%s%s\n",
-               flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
-               flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
-               flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
-               flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
-               flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "");
-}
-static DEVICE_ATTR_RO(flags);
-
-static ssize_t id_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
-               return sprintf(buf, "%04x-%02x-%04x-%08x\n",
-                               be16_to_cpu(dcr->vendor_id),
-                               dcr->manufacturing_location,
-                               be16_to_cpu(dcr->manufacturing_date),
-                               be32_to_cpu(dcr->serial_number));
-       else
-               return sprintf(buf, "%04x-%08x\n",
-                               be16_to_cpu(dcr->vendor_id),
-                               be32_to_cpu(dcr->serial_number));
-}
-static DEVICE_ATTR_RO(id);
-
-static struct attribute *acpi_nfit_dimm_attributes[] = {
-       &dev_attr_handle.attr,
-       &dev_attr_phys_id.attr,
-       &dev_attr_vendor.attr,
-       &dev_attr_device.attr,
-       &dev_attr_rev_id.attr,
-       &dev_attr_subsystem_vendor.attr,
-       &dev_attr_subsystem_device.attr,
-       &dev_attr_subsystem_rev_id.attr,
-       &dev_attr_format.attr,
-       &dev_attr_formats.attr,
-       &dev_attr_format1.attr,
-       &dev_attr_serial.attr,
-       &dev_attr_flags.attr,
-       &dev_attr_id.attr,
-       &dev_attr_family.attr,
-       &dev_attr_dsm_mask.attr,
-       NULL,
-};
-
-static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
-               struct attribute *a, int n)
-{
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-
-       if (!to_nfit_dcr(dev))
-               return 0;
-       if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
-               return 0;
-       return a->mode;
-}
-
-static struct attribute_group acpi_nfit_dimm_attribute_group = {
-       .name = "nfit",
-       .attrs = acpi_nfit_dimm_attributes,
-       .is_visible = acpi_nfit_dimm_attr_visible,
-};
-
-static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
-       &nvdimm_attribute_group,
-       &nd_device_attribute_group,
-       &acpi_nfit_dimm_attribute_group,
-       NULL,
-};
-
-static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
-               u32 device_handle)
-{
-       struct nfit_mem *nfit_mem;
-
-       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
-               if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
-                       return nfit_mem->nvdimm;
-
-       return NULL;
-}
-
-static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_mem *nfit_mem, u32 device_handle)
-{
-       struct acpi_device *adev, *adev_dimm;
-       struct device *dev = acpi_desc->dev;
-       unsigned long dsm_mask;
-       const u8 *uuid;
-       int i;
-
-       /* nfit test assumes 1:1 relationship between commands and dsms */
-       nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
-       nfit_mem->family = NVDIMM_FAMILY_INTEL;
-       adev = to_acpi_dev(acpi_desc);
-       if (!adev)
-               return 0;
-
-       adev_dimm = acpi_find_child_device(adev, device_handle, false);
-       nfit_mem->adev = adev_dimm;
-       if (!adev_dimm) {
-               dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
-                               device_handle);
-               return force_enable_dimms ? 0 : -ENODEV;
-       }
-
-       /*
-        * Until standardization materializes we need to consider up to 3
-        * different command sets.  Note, that checking for function0 (bit0)
-        * tells us if any commands are reachable through this uuid.
-        */
-       for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++)
-               if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
-                       break;
-
-       /* limit the supported commands to those that are publicly documented */
-       nfit_mem->family = i;
-       if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
-               dsm_mask = 0x3fe;
-               if (disable_vendor_specific)
-                       dsm_mask &= ~(1 << ND_CMD_VENDOR);
-       } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1)
-               dsm_mask = 0x1c3c76;
-       else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
-               dsm_mask = 0x1fe;
-               if (disable_vendor_specific)
-                       dsm_mask &= ~(1 << 8);
-       } else {
-               dev_dbg(dev, "unknown dimm command family\n");
-               nfit_mem->family = -1;
-               /* DSMs are optional, continue loading the driver... */
-               return 0;
-       }
-
-       uuid = to_nfit_uuid(nfit_mem->family);
-       for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
-               if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
-                       set_bit(i, &nfit_mem->dsm_mask);
-
-       return 0;
-}
-
-static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nfit_mem *nfit_mem;
-       int dimm_count = 0;
-
-       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
-               unsigned long flags = 0, cmd_mask;
-               struct nvdimm *nvdimm;
-               u32 device_handle;
-               u16 mem_flags;
-               int rc;
-
-               device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
-               nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
-               if (nvdimm) {
-                       dimm_count++;
-                       continue;
-               }
-
-               if (nfit_mem->bdw && nfit_mem->memdev_pmem)
-                       flags |= NDD_ALIASING;
-
-               mem_flags = __to_nfit_memdev(nfit_mem)->flags;
-               if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
-                       flags |= NDD_UNARMED;
-
-               rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
-               if (rc)
-                       continue;
-
-               /*
-                * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
-                * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
-                * userspace interface.
-                */
-               cmd_mask = 1UL << ND_CMD_CALL;
-               if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
-                       cmd_mask |= nfit_mem->dsm_mask;
-
-               nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
-                               acpi_nfit_dimm_attribute_groups,
-                               flags, cmd_mask);
-               if (!nvdimm)
-                       return -ENOMEM;
-
-               nfit_mem->nvdimm = nvdimm;
-               dimm_count++;
-
-               if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
-                       continue;
-
-               dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n",
-                               nvdimm_name(nvdimm),
-                 mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
-                 mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
-                 mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
-                 mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "");
-
-       }
-
-       return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
-}
-
-static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-       const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
-       struct acpi_device *adev;
-       int i;
-
-       nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
-       adev = to_acpi_dev(acpi_desc);
-       if (!adev)
-               return;
-
-       for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
-               if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
-                       set_bit(i, &nd_desc->cmd_mask);
-}
-
-static ssize_t range_index_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nd_region *nd_region = to_nd_region(dev);
-       struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
-
-       return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
-}
-static DEVICE_ATTR_RO(range_index);
-
-static struct attribute *acpi_nfit_region_attributes[] = {
-       &dev_attr_range_index.attr,
-       NULL,
-};
-
-static struct attribute_group acpi_nfit_region_attribute_group = {
-       .name = "nfit",
-       .attrs = acpi_nfit_region_attributes,
-};
-
-static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
-       &nd_region_attribute_group,
-       &nd_mapping_attribute_group,
-       &nd_device_attribute_group,
-       &nd_numa_attribute_group,
-       &acpi_nfit_region_attribute_group,
-       NULL,
-};
-
-/* enough info to uniquely specify an interleave set */
-struct nfit_set_info {
-       struct nfit_set_info_map {
-               u64 region_offset;
-               u32 serial_number;
-               u32 pad;
-       } mapping[0];
-};
-
-static size_t sizeof_nfit_set_info(int num_mappings)
-{
-       return sizeof(struct nfit_set_info)
-               + num_mappings * sizeof(struct nfit_set_info_map);
-}
-
-static int cmp_map(const void *m0, const void *m1)
-{
-       const struct nfit_set_info_map *map0 = m0;
-       const struct nfit_set_info_map *map1 = m1;
-
-       return memcmp(&map0->region_offset, &map1->region_offset,
-                       sizeof(u64));
-}
-
-/* Retrieve the nth entry referencing this spa */
-static struct acpi_nfit_memory_map *memdev_from_spa(
-               struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
-{
-       struct nfit_memdev *nfit_memdev;
-
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
-               if (nfit_memdev->memdev->range_index == range_index)
-                       if (n-- == 0)
-                               return nfit_memdev->memdev;
-       return NULL;
-}
-
-static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
-               struct nd_region_desc *ndr_desc,
-               struct acpi_nfit_system_address *spa)
-{
-       int i, spa_type = nfit_spa_type(spa);
-       struct device *dev = acpi_desc->dev;
-       struct nd_interleave_set *nd_set;
-       u16 nr = ndr_desc->num_mappings;
-       struct nfit_set_info *info;
-
-       if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
-               /* pass */;
-       else
-               return 0;
-
-       nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
-       if (!nd_set)
-               return -ENOMEM;
-
-       info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
-       if (!info)
-               return -ENOMEM;
-       for (i = 0; i < nr; i++) {
-               struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
-               struct nfit_set_info_map *map = &info->mapping[i];
-               struct nvdimm *nvdimm = nd_mapping->nvdimm;
-               struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-               struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
-                               spa->range_index, i);
-
-               if (!memdev || !nfit_mem->dcr) {
-                       dev_err(dev, "%s: failed to find DCR\n", __func__);
-                       return -ENODEV;
-               }
-
-               map->region_offset = memdev->region_offset;
-               map->serial_number = nfit_mem->dcr->serial_number;
-       }
-
-       sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
-                       cmp_map, NULL);
-       nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
-       ndr_desc->nd_set = nd_set;
-       devm_kfree(dev, info);
-
-       return 0;
-}
-
-static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
-{
-       struct acpi_nfit_interleave *idt = mmio->idt;
-       u32 sub_line_offset, line_index, line_offset;
-       u64 line_no, table_skip_count, table_offset;
-
-       line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
-       table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
-       line_offset = idt->line_offset[line_index]
-               * mmio->line_size;
-       table_offset = table_skip_count * mmio->table_size;
-
-       return mmio->base_offset + line_offset + table_offset + sub_line_offset;
-}
-
-static void wmb_blk(struct nfit_blk *nfit_blk)
-{
-
-       if (nfit_blk->nvdimm_flush) {
-               /*
-                * The first wmb() is needed to 'sfence' all previous writes
-                * such that they are architecturally visible for the platform
-                * buffer flush.  Note that we've already arranged for pmem
-                * writes to avoid the cache via arch_memcpy_to_pmem().  The
-                * final wmb() ensures ordering for the NVDIMM flush write.
-                */
-               wmb();
-               writeq(1, nfit_blk->nvdimm_flush);
-               wmb();
-       } else
-               wmb_pmem();
-}
-
-static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
-{
-       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
-       u64 offset = nfit_blk->stat_offset + mmio->size * bw;
-
-       if (mmio->num_lines)
-               offset = to_interleave_offset(offset, mmio);
-
-       return readl(mmio->addr.base + offset);
-}
-
-static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
-               resource_size_t dpa, unsigned int len, unsigned int write)
-{
-       u64 cmd, offset;
-       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
-
-       enum {
-               BCW_OFFSET_MASK = (1ULL << 48)-1,
-               BCW_LEN_SHIFT = 48,
-               BCW_LEN_MASK = (1ULL << 8) - 1,
-               BCW_CMD_SHIFT = 56,
-       };
-
-       cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
-       len = len >> L1_CACHE_SHIFT;
-       cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
-       cmd |= ((u64) write) << BCW_CMD_SHIFT;
-
-       offset = nfit_blk->cmd_offset + mmio->size * bw;
-       if (mmio->num_lines)
-               offset = to_interleave_offset(offset, mmio);
-
-       writeq(cmd, mmio->addr.base + offset);
-       wmb_blk(nfit_blk);
-
-       if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
-               readq(mmio->addr.base + offset);
-}
-
-static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
-               resource_size_t dpa, void *iobuf, size_t len, int rw,
-               unsigned int lane)
-{
-       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
-       unsigned int copied = 0;
-       u64 base_offset;
-       int rc;
-
-       base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
-               + lane * mmio->size;
-       write_blk_ctl(nfit_blk, lane, dpa, len, rw);
-       while (len) {
-               unsigned int c;
-               u64 offset;
-
-               if (mmio->num_lines) {
-                       u32 line_offset;
-
-                       offset = to_interleave_offset(base_offset + copied,
-                                       mmio);
-                       div_u64_rem(offset, mmio->line_size, &line_offset);
-                       c = min_t(size_t, len, mmio->line_size - line_offset);
-               } else {
-                       offset = base_offset + nfit_blk->bdw_offset;
-                       c = len;
-               }
-
-               if (rw)
-                       memcpy_to_pmem(mmio->addr.aperture + offset,
-                                       iobuf + copied, c);
-               else {
-                       if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
-                               mmio_flush_range((void __force *)
-                                       mmio->addr.aperture + offset, c);
-
-                       memcpy_from_pmem(iobuf + copied,
-                                       mmio->addr.aperture + offset, c);
-               }
-
-               copied += c;
-               len -= c;
-       }
-
-       if (rw)
-               wmb_blk(nfit_blk);
-
-       rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
-       return rc;
-}
-
-static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
-               resource_size_t dpa, void *iobuf, u64 len, int rw)
-{
-       struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
-       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
-       struct nd_region *nd_region = nfit_blk->nd_region;
-       unsigned int lane, copied = 0;
-       int rc = 0;
-
-       lane = nd_region_acquire_lane(nd_region);
-       while (len) {
-               u64 c = min(len, mmio->size);
-
-               rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
-                               iobuf + copied, c, rw, lane);
-               if (rc)
-                       break;
-
-               copied += c;
-               len -= c;
-       }
-       nd_region_release_lane(nd_region, lane);
-
-       return rc;
-}
-
-static void nfit_spa_mapping_release(struct kref *kref)
-{
-       struct nfit_spa_mapping *spa_map = to_spa_map(kref);
-       struct acpi_nfit_system_address *spa = spa_map->spa;
-       struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc;
-
-       WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
-       dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
-       if (spa_map->type == SPA_MAP_APERTURE)
-               memunmap((void __force *)spa_map->addr.aperture);
-       else
-               iounmap(spa_map->addr.base);
-       release_mem_region(spa->address, spa->length);
-       list_del(&spa_map->list);
-       kfree(spa_map);
-}
-
-static struct nfit_spa_mapping *find_spa_mapping(
-               struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa)
-{
-       struct nfit_spa_mapping *spa_map;
-
-       WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
-       list_for_each_entry(spa_map, &acpi_desc->spa_maps, list)
-               if (spa_map->spa == spa)
-                       return spa_map;
-
-       return NULL;
-}
-
-static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa)
-{
-       struct nfit_spa_mapping *spa_map;
-
-       mutex_lock(&acpi_desc->spa_map_mutex);
-       spa_map = find_spa_mapping(acpi_desc, spa);
-
-       if (spa_map)
-               kref_put(&spa_map->kref, nfit_spa_mapping_release);
-       mutex_unlock(&acpi_desc->spa_map_mutex);
-}
-
-static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa, enum spa_map_type type)
-{
-       resource_size_t start = spa->address;
-       resource_size_t n = spa->length;
-       struct nfit_spa_mapping *spa_map;
-       struct resource *res;
-
-       WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
-
-       spa_map = find_spa_mapping(acpi_desc, spa);
-       if (spa_map) {
-               kref_get(&spa_map->kref);
-               return spa_map->addr.base;
-       }
-
-       spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
-       if (!spa_map)
-               return NULL;
-
-       INIT_LIST_HEAD(&spa_map->list);
-       spa_map->spa = spa;
-       kref_init(&spa_map->kref);
-       spa_map->acpi_desc = acpi_desc;
-
-       res = request_mem_region(start, n, dev_name(acpi_desc->dev));
-       if (!res)
-               goto err_mem;
-
-       spa_map->type = type;
-       if (type == SPA_MAP_APERTURE)
-               spa_map->addr.aperture = (void __pmem *)memremap(start, n,
-                                                       ARCH_MEMREMAP_PMEM);
-       else
-               spa_map->addr.base = ioremap_nocache(start, n);
-
-
-       if (!spa_map->addr.base)
-               goto err_map;
-
-       list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
-       return spa_map->addr.base;
-
- err_map:
-       release_mem_region(start, n);
- err_mem:
-       kfree(spa_map);
-       return NULL;
-}
-
-/**
- * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
- * @nvdimm_bus: NFIT-bus that provided the spa table entry
- * @nfit_spa: spa table to map
- * @type: aperture or control region
- *
- * In the case where block-data-window apertures and
- * dimm-control-regions are interleaved they will end up sharing a
- * single request_mem_region() + ioremap() for the address range.  In
- * the style of devm nfit_spa_map() mappings are automatically dropped
- * when all region devices referencing the same mapping are disabled /
- * unbound.
- */
-static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa, enum spa_map_type type)
-{
-       void __iomem *iomem;
-
-       mutex_lock(&acpi_desc->spa_map_mutex);
-       iomem = __nfit_spa_map(acpi_desc, spa, type);
-       mutex_unlock(&acpi_desc->spa_map_mutex);
-
-       return iomem;
-}
-
-static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
-               struct acpi_nfit_interleave *idt, u16 interleave_ways)
-{
-       if (idt) {
-               mmio->num_lines = idt->line_count;
-               mmio->line_size = idt->line_size;
-               if (interleave_ways == 0)
-                       return -ENXIO;
-               mmio->table_size = mmio->num_lines * interleave_ways
-                       * mmio->line_size;
-       }
-
-       return 0;
-}
-
-static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
-               struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
-{
-       struct nd_cmd_dimm_flags flags;
-       int rc;
-
-       memset(&flags, 0, sizeof(flags));
-       rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
-                       sizeof(flags), NULL);
-
-       if (rc >= 0 && flags.status == 0)
-               nfit_blk->dimm_flags = flags.flags;
-       else if (rc == -ENOTTY) {
-               /* fall back to a conservative default */
-               nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
-               rc = 0;
-       } else
-               rc = -ENXIO;
-
-       return rc;
-}
-
-static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
-               struct device *dev)
-{
-       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
-       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
-       struct nd_blk_region *ndbr = to_nd_blk_region(dev);
-       struct nfit_flush *nfit_flush;
-       struct nfit_blk_mmio *mmio;
-       struct nfit_blk *nfit_blk;
-       struct nfit_mem *nfit_mem;
-       struct nvdimm *nvdimm;
-       int rc;
-
-       nvdimm = nd_blk_region_to_dimm(ndbr);
-       nfit_mem = nvdimm_provider_data(nvdimm);
-       if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
-               dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
-                               nfit_mem ? "" : " nfit_mem",
-                               (nfit_mem && nfit_mem->dcr) ? "" : " dcr",
-                               (nfit_mem && nfit_mem->bdw) ? "" : " bdw");
-               return -ENXIO;
-       }
-
-       nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
-       if (!nfit_blk)
-               return -ENOMEM;
-       nd_blk_region_set_provider_data(ndbr, nfit_blk);
-       nfit_blk->nd_region = to_nd_region(dev);
-
-       /* map block aperture memory */
-       nfit_blk->bdw_offset = nfit_mem->bdw->offset;
-       mmio = &nfit_blk->mmio[BDW];
-       mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
-                       SPA_MAP_APERTURE);
-       if (!mmio->addr.base) {
-               dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
-                               nvdimm_name(nvdimm));
-               return -ENOMEM;
-       }
-       mmio->size = nfit_mem->bdw->size;
-       mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
-       mmio->idt = nfit_mem->idt_bdw;
-       mmio->spa = nfit_mem->spa_bdw;
-       rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
-                       nfit_mem->memdev_bdw->interleave_ways);
-       if (rc) {
-               dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
-                               __func__, nvdimm_name(nvdimm));
-               return rc;
-       }
-
-       /* map block control memory */
-       nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
-       nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
-       mmio = &nfit_blk->mmio[DCR];
-       mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
-                       SPA_MAP_CONTROL);
-       if (!mmio->addr.base) {
-               dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
-                               nvdimm_name(nvdimm));
-               return -ENOMEM;
-       }
-       mmio->size = nfit_mem->dcr->window_size;
-       mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
-       mmio->idt = nfit_mem->idt_dcr;
-       mmio->spa = nfit_mem->spa_dcr;
-       rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
-                       nfit_mem->memdev_dcr->interleave_ways);
-       if (rc) {
-               dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
-                               __func__, nvdimm_name(nvdimm));
-               return rc;
-       }
-
-       rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
-       if (rc < 0) {
-               dev_dbg(dev, "%s: %s failed get DIMM flags\n",
-                               __func__, nvdimm_name(nvdimm));
-               return rc;
-       }
-
-       nfit_flush = nfit_mem->nfit_flush;
-       if (nfit_flush && nfit_flush->flush->hint_count != 0) {
-               nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev,
-                               nfit_flush->flush->hint_address[0], 8);
-               if (!nfit_blk->nvdimm_flush)
-                       return -ENOMEM;
-       }
-
-       if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush)
-               dev_warn(dev, "unable to guarantee persistence of writes\n");
-
-       if (mmio->line_size == 0)
-               return 0;
-
-       if ((u32) nfit_blk->cmd_offset % mmio->line_size
-                       + 8 > mmio->line_size) {
-               dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
-               return -ENXIO;
-       } else if ((u32) nfit_blk->stat_offset % mmio->line_size
-                       + 8 > mmio->line_size) {
-               dev_dbg(dev, "stat_offset crosses interleave boundary\n");
-               return -ENXIO;
-       }
-
-       return 0;
-}
-
-static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
-               struct device *dev)
-{
-       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
-       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
-       struct nd_blk_region *ndbr = to_nd_blk_region(dev);
-       struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
-       int i;
-
-       if (!nfit_blk)
-               return; /* never enabled */
-
-       /* auto-free BLK spa mappings */
-       for (i = 0; i < 2; i++) {
-               struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
-
-               if (mmio->addr.base)
-                       nfit_spa_unmap(acpi_desc, mmio->spa);
-       }
-       nd_blk_region_set_provider_data(ndbr, NULL);
-       /* devm will free nfit_blk */
-}
-
-static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
-               struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
-{
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       int cmd_rc, rc;
-
-       cmd->address = spa->address;
-       cmd->length = spa->length;
-       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
-                       sizeof(*cmd), &cmd_rc);
-       if (rc < 0)
-               return rc;
-       return cmd_rc;
-}
-
-static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
-{
-       int rc;
-       int cmd_rc;
-       struct nd_cmd_ars_start ars_start;
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-
-       memset(&ars_start, 0, sizeof(ars_start));
-       ars_start.address = spa->address;
-       ars_start.length = spa->length;
-       if (nfit_spa_type(spa) == NFIT_SPA_PM)
-               ars_start.type = ND_ARS_PERSISTENT;
-       else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
-               ars_start.type = ND_ARS_VOLATILE;
-       else
-               return -ENOTTY;
-
-       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
-                       sizeof(ars_start), &cmd_rc);
-
-       if (rc < 0)
-               return rc;
-       return cmd_rc;
-}
-
-static int ars_continue(struct acpi_nfit_desc *acpi_desc)
-{
-       int rc, cmd_rc;
-       struct nd_cmd_ars_start ars_start;
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
-
-       memset(&ars_start, 0, sizeof(ars_start));
-       ars_start.address = ars_status->restart_address;
-       ars_start.length = ars_status->restart_length;
-       ars_start.type = ars_status->type;
-       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
-                       sizeof(ars_start), &cmd_rc);
-       if (rc < 0)
-               return rc;
-       return cmd_rc;
-}
-
-static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
-       int rc, cmd_rc;
-
-       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
-                       acpi_desc->ars_status_size, &cmd_rc);
-       if (rc < 0)
-               return rc;
-       return cmd_rc;
-}
-
-static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
-               struct nd_cmd_ars_status *ars_status)
-{
-       int rc;
-       u32 i;
-
-       for (i = 0; i < ars_status->num_records; i++) {
-               rc = nvdimm_bus_add_poison(nvdimm_bus,
-                               ars_status->records[i].err_address,
-                               ars_status->records[i].length);
-               if (rc)
-                       return rc;
-       }
-
-       return 0;
-}
-
-static void acpi_nfit_remove_resource(void *data)
-{
-       struct resource *res = data;
-
-       remove_resource(res);
-}
-
-static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
-               struct nd_region_desc *ndr_desc)
-{
-       struct resource *res, *nd_res = ndr_desc->res;
-       int is_pmem, ret;
-
-       /* No operation if the region is already registered as PMEM */
-       is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
-                               IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
-       if (is_pmem == REGION_INTERSECTS)
-               return 0;
-
-       res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
-       if (!res)
-               return -ENOMEM;
-
-       res->name = "Persistent Memory";
-       res->start = nd_res->start;
-       res->end = nd_res->end;
-       res->flags = IORESOURCE_MEM;
-       res->desc = IORES_DESC_PERSISTENT_MEMORY;
-
-       ret = insert_resource(&iomem_resource, res);
-       if (ret)
-               return ret;
-
-       ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res);
-       if (ret) {
-               remove_resource(res);
-               return ret;
-       }
-
-       return 0;
-}
-
-static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
-               struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
-               struct acpi_nfit_memory_map *memdev,
-               struct nfit_spa *nfit_spa)
-{
-       struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
-                       memdev->device_handle);
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       struct nd_blk_region_desc *ndbr_desc;
-       struct nfit_mem *nfit_mem;
-       int blk_valid = 0;
-
-       if (!nvdimm) {
-               dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
-                               spa->range_index, memdev->device_handle);
-               return -ENODEV;
-       }
-
-       nd_mapping->nvdimm = nvdimm;
-       switch (nfit_spa_type(spa)) {
-       case NFIT_SPA_PM:
-       case NFIT_SPA_VOLATILE:
-               nd_mapping->start = memdev->address;
-               nd_mapping->size = memdev->region_size;
-               break;
-       case NFIT_SPA_DCR:
-               nfit_mem = nvdimm_provider_data(nvdimm);
-               if (!nfit_mem || !nfit_mem->bdw) {
-                       dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
-                                       spa->range_index, nvdimm_name(nvdimm));
-               } else {
-                       nd_mapping->size = nfit_mem->bdw->capacity;
-                       nd_mapping->start = nfit_mem->bdw->start_address;
-                       ndr_desc->num_lanes = nfit_mem->bdw->windows;
-                       blk_valid = 1;
-               }
-
-               ndr_desc->nd_mapping = nd_mapping;
-               ndr_desc->num_mappings = blk_valid;
-               ndbr_desc = to_blk_region_desc(ndr_desc);
-               ndbr_desc->enable = acpi_nfit_blk_region_enable;
-               ndbr_desc->disable = acpi_nfit_blk_region_disable;
-               ndbr_desc->do_io = acpi_desc->blk_do_io;
-               nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
-                               ndr_desc);
-               if (!nfit_spa->nd_region)
-                       return -ENOMEM;
-               break;
-       }
-
-       return 0;
-}
-
-static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_spa *nfit_spa)
-{
-       static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       struct nd_blk_region_desc ndbr_desc;
-       struct nd_region_desc *ndr_desc;
-       struct nfit_memdev *nfit_memdev;
-       struct nvdimm_bus *nvdimm_bus;
-       struct resource res;
-       int count = 0, rc;
-
-       if (nfit_spa->nd_region)
-               return 0;
-
-       if (spa->range_index == 0) {
-               dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
-                               __func__);
-               return 0;
-       }
-
-       memset(&res, 0, sizeof(res));
-       memset(&nd_mappings, 0, sizeof(nd_mappings));
-       memset(&ndbr_desc, 0, sizeof(ndbr_desc));
-       res.start = spa->address;
-       res.end = res.start + spa->length - 1;
-       ndr_desc = &ndbr_desc.ndr_desc;
-       ndr_desc->res = &res;
-       ndr_desc->provider_data = nfit_spa;
-       ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
-       if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
-               ndr_desc->numa_node = acpi_map_pxm_to_online_node(
-                                               spa->proximity_domain);
-       else
-               ndr_desc->numa_node = NUMA_NO_NODE;
-
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-               struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
-               struct nd_mapping *nd_mapping;
-
-               if (memdev->range_index != spa->range_index)
-                       continue;
-               if (count >= ND_MAX_MAPPINGS) {
-                       dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
-                                       spa->range_index, ND_MAX_MAPPINGS);
-                       return -ENXIO;
-               }
-               nd_mapping = &nd_mappings[count++];
-               rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
-                               memdev, nfit_spa);
-               if (rc)
-                       goto out;
-       }
-
-       ndr_desc->nd_mapping = nd_mappings;
-       ndr_desc->num_mappings = count;
-       rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
-       if (rc)
-               goto out;
-
-       nvdimm_bus = acpi_desc->nvdimm_bus;
-       if (nfit_spa_type(spa) == NFIT_SPA_PM) {
-               rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
-               if (rc) {
-                       dev_warn(acpi_desc->dev,
-                               "failed to insert pmem resource to iomem: %d\n",
-                               rc);
-                       goto out;
-               }
-
-               nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
-                               ndr_desc);
-               if (!nfit_spa->nd_region)
-                       rc = -ENOMEM;
-       } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
-               nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
-                               ndr_desc);
-               if (!nfit_spa->nd_region)
-                       rc = -ENOMEM;
-       }
-
- out:
-       if (rc)
-               dev_err(acpi_desc->dev, "failed to register spa range %d\n",
-                               nfit_spa->spa->range_index);
-       return rc;
-}
-
-static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
-               u32 max_ars)
-{
-       struct device *dev = acpi_desc->dev;
-       struct nd_cmd_ars_status *ars_status;
-
-       if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
-               memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
-               return 0;
-       }
-
-       if (acpi_desc->ars_status)
-               devm_kfree(dev, acpi_desc->ars_status);
-       acpi_desc->ars_status = NULL;
-       ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
-       if (!ars_status)
-               return -ENOMEM;
-       acpi_desc->ars_status = ars_status;
-       acpi_desc->ars_status_size = max_ars;
-       return 0;
-}
-
-static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_spa *nfit_spa)
-{
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       int rc;
-
-       if (!nfit_spa->max_ars) {
-               struct nd_cmd_ars_cap ars_cap;
-
-               memset(&ars_cap, 0, sizeof(ars_cap));
-               rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
-               if (rc < 0)
-                       return rc;
-               nfit_spa->max_ars = ars_cap.max_ars_out;
-               nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
-               /* check that the supported scrub types match the spa type */
-               if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
-                               ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
-                       return -ENOTTY;
-               else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
-                               ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
-                       return -ENOTTY;
-       }
-
-       if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
-               return -ENOMEM;
-
-       rc = ars_get_status(acpi_desc);
-       if (rc < 0 && rc != -ENOSPC)
-               return rc;
-
-       if (ars_status_process_records(acpi_desc->nvdimm_bus,
-                               acpi_desc->ars_status))
-               return -ENOMEM;
-
-       return 0;
-}
-
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_spa *nfit_spa)
-{
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       unsigned int overflow_retry = scrub_overflow_abort;
-       u64 init_ars_start = 0, init_ars_len = 0;
-       struct device *dev = acpi_desc->dev;
-       unsigned int tmo = scrub_timeout;
-       int rc;
-
-       if (nfit_spa->ars_done || !nfit_spa->nd_region)
-               return;
-
-       rc = ars_start(acpi_desc, nfit_spa);
-       /*
-        * If we timed out the initial scan we'll still be busy here,
-        * and will wait another timeout before giving up permanently.
-        */
-       if (rc < 0 && rc != -EBUSY)
-               return;
-
-       do {
-               u64 ars_start, ars_len;
-
-               if (acpi_desc->cancel)
-                       break;
-               rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-               if (rc == -ENOTTY)
-                       break;
-               if (rc == -EBUSY && !tmo) {
-                       dev_warn(dev, "range %d ars timeout, aborting\n",
-                                       spa->range_index);
-                       break;
-               }
-
-               if (rc == -EBUSY) {
-                       /*
-                        * Note, entries may be appended to the list
-                        * while the lock is dropped, but the workqueue
-                        * being active prevents entries being deleted /
-                        * freed.
-                        */
-                       mutex_unlock(&acpi_desc->init_mutex);
-                       ssleep(1);
-                       tmo--;
-                       mutex_lock(&acpi_desc->init_mutex);
-                       continue;
-               }
-
-               /* we got some results, but there are more pending... */
-               if (rc == -ENOSPC && overflow_retry--) {
-                       if (!init_ars_len) {
-                               init_ars_len = acpi_desc->ars_status->length;
-                               init_ars_start = acpi_desc->ars_status->address;
-                       }
-                       rc = ars_continue(acpi_desc);
-               }
-
-               if (rc < 0) {
-                       dev_warn(dev, "range %d ars continuation failed\n",
-                                       spa->range_index);
-                       break;
-               }
-
-               if (init_ars_len) {
-                       ars_start = init_ars_start;
-                       ars_len = init_ars_len;
-               } else {
-                       ars_start = acpi_desc->ars_status->address;
-                       ars_len = acpi_desc->ars_status->length;
-               }
-               dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
-                               spa->range_index, ars_start, ars_len);
-               /* notify the region about new poison entries */
-               nvdimm_region_notify(nfit_spa->nd_region,
-                               NVDIMM_REVALIDATE_POISON);
-               break;
-       } while (1);
-}
-
-static void acpi_nfit_scrub(struct work_struct *work)
-{
-       struct device *dev;
-       u64 init_scrub_length = 0;
-       struct nfit_spa *nfit_spa;
-       u64 init_scrub_address = 0;
-       bool init_ars_done = false;
-       struct acpi_nfit_desc *acpi_desc;
-       unsigned int tmo = scrub_timeout;
-       unsigned int overflow_retry = scrub_overflow_abort;
-
-       acpi_desc = container_of(work, typeof(*acpi_desc), work);
-       dev = acpi_desc->dev;
-
-       /*
-        * We scrub in 2 phases.  The first phase waits for any platform
-        * firmware initiated scrubs to complete and then we go search for the
-        * affected spa regions to mark them scanned.  In the second phase we
-        * initiate a directed scrub for every range that was not scrubbed in
-        * phase 1.
-        */
-
-       /* process platform firmware initiated scrubs */
- retry:
-       mutex_lock(&acpi_desc->init_mutex);
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               struct nd_cmd_ars_status *ars_status;
-               struct acpi_nfit_system_address *spa;
-               u64 ars_start, ars_len;
-               int rc;
-
-               if (acpi_desc->cancel)
-                       break;
-
-               if (nfit_spa->nd_region)
-                       continue;
-
-               if (init_ars_done) {
-                       /*
-                        * No need to re-query, we're now just
-                        * reconciling all the ranges covered by the
-                        * initial scrub
-                        */
-                       rc = 0;
-               } else
-                       rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-
-               if (rc == -ENOTTY) {
-                       /* no ars capability, just register spa and move on */
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
-                       continue;
-               }
-
-               if (rc == -EBUSY && !tmo) {
-                       /* fallthrough to directed scrub in phase 2 */
-                       dev_warn(dev, "timeout awaiting ars results, continuing...\n");
-                       break;
-               } else if (rc == -EBUSY) {
-                       mutex_unlock(&acpi_desc->init_mutex);
-                       ssleep(1);
-                       tmo--;
-                       goto retry;
-               }
-
-               /* we got some results, but there are more pending... */
-               if (rc == -ENOSPC && overflow_retry--) {
-                       ars_status = acpi_desc->ars_status;
-                       /*
-                        * Record the original scrub range, so that we
-                        * can recall all the ranges impacted by the
-                        * initial scrub.
-                        */
-                       if (!init_scrub_length) {
-                               init_scrub_length = ars_status->length;
-                               init_scrub_address = ars_status->address;
-                       }
-                       rc = ars_continue(acpi_desc);
-                       if (rc == 0) {
-                               mutex_unlock(&acpi_desc->init_mutex);
-                               goto retry;
-                       }
-               }
-
-               if (rc < 0) {
-                       /*
-                        * Initial scrub failed, we'll give it one more
-                        * try below...
-                        */
-                       break;
-               }
-
-               /* We got some final results, record completed ranges */
-               ars_status = acpi_desc->ars_status;
-               if (init_scrub_length) {
-                       ars_start = init_scrub_address;
-                       ars_len = ars_start + init_scrub_length;
-               } else {
-                       ars_start = ars_status->address;
-                       ars_len = ars_status->length;
-               }
-               spa = nfit_spa->spa;
-
-               if (!init_ars_done) {
-                       init_ars_done = true;
-                       dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
-                                       ars_start, ars_len);
-               }
-               if (ars_start <= spa->address && ars_start + ars_len
-                               >= spa->address + spa->length)
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
-       }
-
-       /*
-        * For all the ranges not covered by an initial scrub we still
-        * want to see if there are errors, but it's ok to discover them
-        * asynchronously.
-        */
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               /*
-                * Flag all the ranges that still need scrubbing, but
-                * register them now to make data available.
-                */
-               if (nfit_spa->nd_region)
-                       nfit_spa->ars_done = 1;
-               else
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
-       }
-
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-               acpi_nfit_async_scrub(acpi_desc, nfit_spa);
-       mutex_unlock(&acpi_desc->init_mutex);
-}
-
-static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nfit_spa *nfit_spa;
-       int rc;
-
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-               if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
-                       /* BLK regions don't need to wait for ars results */
-                       rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
-                       if (rc)
-                               return rc;
-               }
-
-       queue_work(nfit_wq, &acpi_desc->work);
-       return 0;
-}
-
-static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev)
-{
-       struct device *dev = acpi_desc->dev;
-
-       if (!list_empty(&prev->spas) ||
-                       !list_empty(&prev->memdevs) ||
-                       !list_empty(&prev->dcrs) ||
-                       !list_empty(&prev->bdws) ||
-                       !list_empty(&prev->idts) ||
-                       !list_empty(&prev->flushes)) {
-               dev_err(dev, "new nfit deletes entries (unsupported)\n");
-               return -ENXIO;
-       }
-       return 0;
-}
-
-int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
-{
-       struct device *dev = acpi_desc->dev;
-       struct nfit_table_prev prev;
-       const void *end;
-       u8 *data;
-       int rc;
-
-       mutex_lock(&acpi_desc->init_mutex);
-
-       INIT_LIST_HEAD(&prev.spas);
-       INIT_LIST_HEAD(&prev.memdevs);
-       INIT_LIST_HEAD(&prev.dcrs);
-       INIT_LIST_HEAD(&prev.bdws);
-       INIT_LIST_HEAD(&prev.idts);
-       INIT_LIST_HEAD(&prev.flushes);
-
-       list_cut_position(&prev.spas, &acpi_desc->spas,
-                               acpi_desc->spas.prev);
-       list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
-                               acpi_desc->memdevs.prev);
-       list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
-                               acpi_desc->dcrs.prev);
-       list_cut_position(&prev.bdws, &acpi_desc->bdws,
-                               acpi_desc->bdws.prev);
-       list_cut_position(&prev.idts, &acpi_desc->idts,
-                               acpi_desc->idts.prev);
-       list_cut_position(&prev.flushes, &acpi_desc->flushes,
-                               acpi_desc->flushes.prev);
-
-       data = (u8 *) acpi_desc->nfit;
-       end = data + sz;
-       while (!IS_ERR_OR_NULL(data))
-               data = add_table(acpi_desc, &prev, data, end);
-
-       if (IS_ERR(data)) {
-               dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
-                               PTR_ERR(data));
-               rc = PTR_ERR(data);
-               goto out_unlock;
-       }
-
-       rc = acpi_nfit_check_deletions(acpi_desc, &prev);
-       if (rc)
-               goto out_unlock;
-
-       if (nfit_mem_init(acpi_desc) != 0) {
-               rc = -ENOMEM;
-               goto out_unlock;
-       }
-
-       acpi_nfit_init_dsms(acpi_desc);
-
-       rc = acpi_nfit_register_dimms(acpi_desc);
-       if (rc)
-               goto out_unlock;
-
-       rc = acpi_nfit_register_regions(acpi_desc);
-
- out_unlock:
-       mutex_unlock(&acpi_desc->init_mutex);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(acpi_nfit_init);
-
-struct acpi_nfit_flush_work {
-       struct work_struct work;
-       struct completion cmp;
-};
-
-static void flush_probe(struct work_struct *work)
-{
-       struct acpi_nfit_flush_work *flush;
-
-       flush = container_of(work, typeof(*flush), work);
-       complete(&flush->cmp);
-}
-
-static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
-{
-       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
-       struct device *dev = acpi_desc->dev;
-       struct acpi_nfit_flush_work flush;
-
-       /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
-       device_lock(dev);
-       device_unlock(dev);
-
-       /*
-        * Scrub work could take 10s of seconds, userspace may give up so we
-        * need to be interruptible while waiting.
-        */
-       INIT_WORK_ONSTACK(&flush.work, flush_probe);
-       COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
-       queue_work(nfit_wq, &flush.work);
-       return wait_for_completion_interruptible(&flush.cmp);
-}
-
-static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
-               struct nvdimm *nvdimm, unsigned int cmd)
-{
-       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
-
-       if (nvdimm)
-               return 0;
-       if (cmd != ND_CMD_ARS_START)
-               return 0;
-
-       /*
-        * The kernel and userspace may race to initiate a scrub, but
-        * the scrub thread is prepared to lose that initial race.  It
-        * just needs guarantees that any ars it initiates are not
-        * interrupted by any intervening start reqeusts from userspace.
-        */
-       if (work_busy(&acpi_desc->work))
-               return -EBUSY;
-
-       return 0;
-}
-
-void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
-{
-       struct nvdimm_bus_descriptor *nd_desc;
-
-       dev_set_drvdata(dev, acpi_desc);
-       acpi_desc->dev = dev;
-       acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
-       nd_desc = &acpi_desc->nd_desc;
-       nd_desc->provider_name = "ACPI.NFIT";
-       nd_desc->ndctl = acpi_nfit_ctl;
-       nd_desc->flush_probe = acpi_nfit_flush_probe;
-       nd_desc->clear_to_send = acpi_nfit_clear_to_send;
-       nd_desc->attr_groups = acpi_nfit_attribute_groups;
-
-       INIT_LIST_HEAD(&acpi_desc->spa_maps);
-       INIT_LIST_HEAD(&acpi_desc->spas);
-       INIT_LIST_HEAD(&acpi_desc->dcrs);
-       INIT_LIST_HEAD(&acpi_desc->bdws);
-       INIT_LIST_HEAD(&acpi_desc->idts);
-       INIT_LIST_HEAD(&acpi_desc->flushes);
-       INIT_LIST_HEAD(&acpi_desc->memdevs);
-       INIT_LIST_HEAD(&acpi_desc->dimms);
-       mutex_init(&acpi_desc->spa_map_mutex);
-       mutex_init(&acpi_desc->init_mutex);
-       INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
-}
-EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
-
-static int acpi_nfit_add(struct acpi_device *adev)
-{
-       struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
-       struct acpi_nfit_desc *acpi_desc;
-       struct device *dev = &adev->dev;
-       struct acpi_table_header *tbl;
-       acpi_status status = AE_OK;
-       acpi_size sz;
-       int rc;
-
-       status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
-       if (ACPI_FAILURE(status)) {
-               /* This is ok, we could have an nvdimm hotplugged later */
-               dev_dbg(dev, "failed to find NFIT at startup\n");
-               return 0;
-       }
-
-       acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
-       if (!acpi_desc)
-               return -ENOMEM;
-       acpi_nfit_desc_init(acpi_desc, &adev->dev);
-       acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
-       if (!acpi_desc->nvdimm_bus)
-               return -ENOMEM;
-
-       /*
-        * Save the acpi header for later and then skip it,
-        * making nfit point to the first nfit table header.
-        */
-       acpi_desc->acpi_header = *tbl;
-       acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit);
-       sz -= sizeof(struct acpi_table_nfit);
-
-       /* Evaluate _FIT and override with that if present */
-       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
-       if (ACPI_SUCCESS(status) && buf.length > 0) {
-               union acpi_object *obj;
-               /*
-                * Adjust for the acpi_object header of the _FIT
-                */
-               obj = buf.pointer;
-               if (obj->type == ACPI_TYPE_BUFFER) {
-                       acpi_desc->nfit =
-                               (struct acpi_nfit_header *)obj->buffer.pointer;
-                       sz = obj->buffer.length;
-               } else
-                       dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
-                                __func__, (int) obj->type);
-       }
-
-       rc = acpi_nfit_init(acpi_desc, sz);
-       if (rc) {
-               nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
-               return rc;
-       }
-       return 0;
-}
-
-static int acpi_nfit_remove(struct acpi_device *adev)
-{
-       struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
-
-       acpi_desc->cancel = 1;
-       flush_workqueue(nfit_wq);
-       nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
-       return 0;
-}
-
-static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
-{
-       struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
-       struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
-       struct acpi_nfit_header *nfit_saved;
-       union acpi_object *obj;
-       struct device *dev = &adev->dev;
-       acpi_status status;
-       int ret;
-
-       dev_dbg(dev, "%s: event: %d\n", __func__, event);
-
-       device_lock(dev);
-       if (!dev->driver) {
-               /* dev->driver may be null if we're being removed */
-               dev_dbg(dev, "%s: no driver found for dev\n", __func__);
-               goto out_unlock;
-       }
-
-       if (!acpi_desc) {
-               acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
-               if (!acpi_desc)
-                       goto out_unlock;
-               acpi_nfit_desc_init(acpi_desc, &adev->dev);
-               acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
-               if (!acpi_desc->nvdimm_bus)
-                       goto out_unlock;
-       } else {
-               /*
-                * Finish previous registration before considering new
-                * regions.
-                */
-               flush_workqueue(nfit_wq);
-       }
-
-       /* Evaluate _FIT */
-       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
-       if (ACPI_FAILURE(status)) {
-               dev_err(dev, "failed to evaluate _FIT\n");
-               goto out_unlock;
-       }
-
-       nfit_saved = acpi_desc->nfit;
-       obj = buf.pointer;
-       if (obj->type == ACPI_TYPE_BUFFER) {
-               acpi_desc->nfit =
-                       (struct acpi_nfit_header *)obj->buffer.pointer;
-               ret = acpi_nfit_init(acpi_desc, obj->buffer.length);
-               if (ret) {
-                       /* Merge failed, restore old nfit, and exit */
-                       acpi_desc->nfit = nfit_saved;
-                       dev_err(dev, "failed to merge updated NFIT\n");
-               }
-       } else {
-               /* Bad _FIT, restore old nfit */
-               dev_err(dev, "Invalid _FIT\n");
-       }
-       kfree(buf.pointer);
-
- out_unlock:
-       device_unlock(dev);
-}
-
-static const struct acpi_device_id acpi_nfit_ids[] = {
-       { "ACPI0012", 0 },
-       { "", 0 },
-};
-MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
-
-static struct acpi_driver acpi_nfit_driver = {
-       .name = KBUILD_MODNAME,
-       .ids = acpi_nfit_ids,
-       .ops = {
-               .add = acpi_nfit_add,
-               .remove = acpi_nfit_remove,
-               .notify = acpi_nfit_notify,
-       },
-};
-
-static __init int nfit_init(void)
-{
-       BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
-
-       acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
-       acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
-       acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
-       acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
-       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
-       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
-       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
-       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
-       acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
-
-       nfit_wq = create_singlethread_workqueue("nfit");
-       if (!nfit_wq)
-               return -ENOMEM;
-
-       return acpi_bus_register_driver(&acpi_nfit_driver);
-}
-
-static __exit void nfit_exit(void)
-{
-       acpi_bus_unregister_driver(&acpi_nfit_driver);
-       destroy_workqueue(nfit_wq);
-}
-
-module_init(nfit_init);
-module_exit(nfit_exit);
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
deleted file mode 100644 (file)
index 02b9ea1..0000000
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * NVDIMM Firmware Interface Table - NFIT
- *
- * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#ifndef __NFIT_H__
-#define __NFIT_H__
-#include <linux/workqueue.h>
-#include <linux/libnvdimm.h>
-#include <linux/types.h>
-#include <linux/uuid.h>
-#include <linux/acpi.h>
-#include <acpi/acuuid.h>
-
-/* ACPI 6.1 */
-#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
-
-/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */
-#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
-
-/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
-#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
-#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
-
-#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
-               | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
-               | ACPI_NFIT_MEM_NOT_ARMED)
-
-enum nfit_uuids {
-       /* for simplicity alias the uuid index with the family id */
-       NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
-       NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
-       NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
-       NFIT_SPA_VOLATILE,
-       NFIT_SPA_PM,
-       NFIT_SPA_DCR,
-       NFIT_SPA_BDW,
-       NFIT_SPA_VDISK,
-       NFIT_SPA_VCD,
-       NFIT_SPA_PDISK,
-       NFIT_SPA_PCD,
-       NFIT_DEV_BUS,
-       NFIT_UUID_MAX,
-};
-
-/*
- * Region format interface codes are stored with the interface as the
- * LSB and the function as the MSB.
- */
-#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
-#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
-#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
-
-enum {
-       NFIT_BLK_READ_FLUSH = 1,
-       NFIT_BLK_DCR_LATCH = 2,
-       NFIT_ARS_STATUS_DONE = 0,
-       NFIT_ARS_STATUS_BUSY = 1 << 16,
-       NFIT_ARS_STATUS_NONE = 2 << 16,
-       NFIT_ARS_STATUS_INTR = 3 << 16,
-       NFIT_ARS_START_BUSY = 6,
-       NFIT_ARS_CAP_NONE = 1,
-       NFIT_ARS_F_OVERFLOW = 1,
-       NFIT_ARS_TIMEOUT = 90,
-};
-
-struct nfit_spa {
-       struct acpi_nfit_system_address *spa;
-       struct list_head list;
-       struct nd_region *nd_region;
-       unsigned int ars_done:1;
-       u32 clear_err_unit;
-       u32 max_ars;
-};
-
-struct nfit_dcr {
-       struct acpi_nfit_control_region *dcr;
-       struct list_head list;
-};
-
-struct nfit_bdw {
-       struct acpi_nfit_data_region *bdw;
-       struct list_head list;
-};
-
-struct nfit_idt {
-       struct acpi_nfit_interleave *idt;
-       struct list_head list;
-};
-
-struct nfit_flush {
-       struct acpi_nfit_flush_address *flush;
-       struct list_head list;
-};
-
-struct nfit_memdev {
-       struct acpi_nfit_memory_map *memdev;
-       struct list_head list;
-};
-
-/* assembled tables for a given dimm/memory-device */
-struct nfit_mem {
-       struct nvdimm *nvdimm;
-       struct acpi_nfit_memory_map *memdev_dcr;
-       struct acpi_nfit_memory_map *memdev_pmem;
-       struct acpi_nfit_memory_map *memdev_bdw;
-       struct acpi_nfit_control_region *dcr;
-       struct acpi_nfit_data_region *bdw;
-       struct acpi_nfit_system_address *spa_dcr;
-       struct acpi_nfit_system_address *spa_bdw;
-       struct acpi_nfit_interleave *idt_dcr;
-       struct acpi_nfit_interleave *idt_bdw;
-       struct nfit_flush *nfit_flush;
-       struct list_head list;
-       struct acpi_device *adev;
-       struct acpi_nfit_desc *acpi_desc;
-       unsigned long dsm_mask;
-       int family;
-};
-
-struct acpi_nfit_desc {
-       struct nvdimm_bus_descriptor nd_desc;
-       struct acpi_table_header acpi_header;
-       struct acpi_nfit_header *nfit;
-       struct mutex spa_map_mutex;
-       struct mutex init_mutex;
-       struct list_head spa_maps;
-       struct list_head memdevs;
-       struct list_head flushes;
-       struct list_head dimms;
-       struct list_head spas;
-       struct list_head dcrs;
-       struct list_head bdws;
-       struct list_head idts;
-       struct nvdimm_bus *nvdimm_bus;
-       struct device *dev;
-       struct nd_cmd_ars_status *ars_status;
-       size_t ars_status_size;
-       struct work_struct work;
-       unsigned int cancel:1;
-       unsigned long dimm_cmd_force_en;
-       unsigned long bus_cmd_force_en;
-       int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
-                       void *iobuf, u64 len, int rw);
-};
-
-enum nd_blk_mmio_selector {
-       BDW,
-       DCR,
-};
-
-struct nd_blk_addr {
-       union {
-               void __iomem *base;
-               void __pmem  *aperture;
-       };
-};
-
-struct nfit_blk {
-       struct nfit_blk_mmio {
-               struct nd_blk_addr addr;
-               u64 size;
-               u64 base_offset;
-               u32 line_size;
-               u32 num_lines;
-               u32 table_size;
-               struct acpi_nfit_interleave *idt;
-               struct acpi_nfit_system_address *spa;
-       } mmio[2];
-       struct nd_region *nd_region;
-       u64 bdw_offset; /* post interleave offset */
-       u64 stat_offset;
-       u64 cmd_offset;
-       void __iomem *nvdimm_flush;
-       u32 dimm_flags;
-};
-
-enum spa_map_type {
-       SPA_MAP_CONTROL,
-       SPA_MAP_APERTURE,
-};
-
-struct nfit_spa_mapping {
-       struct acpi_nfit_desc *acpi_desc;
-       struct acpi_nfit_system_address *spa;
-       struct list_head list;
-       struct kref kref;
-       enum spa_map_type type;
-       struct nd_blk_addr addr;
-};
-
-static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
-{
-       return container_of(kref, struct nfit_spa_mapping, kref);
-}
-
-static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
-               struct nfit_mem *nfit_mem)
-{
-       if (nfit_mem->memdev_dcr)
-               return nfit_mem->memdev_dcr;
-       return nfit_mem->memdev_pmem;
-}
-
-static inline struct acpi_nfit_desc *to_acpi_desc(
-               struct nvdimm_bus_descriptor *nd_desc)
-{
-       return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
-}
-
-const u8 *to_nfit_uuid(enum nfit_uuids id);
-int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
-void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
-#endif /* __NFIT_H__ */
diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig
new file mode 100644 (file)
index 0000000..dd0d53c
--- /dev/null
@@ -0,0 +1,26 @@
+config ACPI_NFIT
+       tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
+       depends on PHYS_ADDR_T_64BIT
+       depends on BLK_DEV
+       depends on ARCH_HAS_MMIO_FLUSH
+       select LIBNVDIMM
+       help
+         Infrastructure to probe ACPI 6 compliant platforms for
+         NVDIMMs (NFIT) and register a libnvdimm device tree.  In
+         addition to storage devices this also enables libnvdimm to pass
+         ACPI._DSM messages for platform/dimm configuration.
+
+         To compile this driver as a module, choose M here:
+         the module will be called nfit.
+
+config ACPI_NFIT_DEBUG
+       bool "NFIT DSM debug"
+       depends on ACPI_NFIT
+       depends on DYNAMIC_DEBUG
+       default n
+       help
+         Enabling this option causes the nfit driver to dump the
+         input and output buffers of _DSM operations on the ACPI0012
+         device and its children.  This can be very verbose, so leave
+         it disabled unless you are debugging a hardware / firmware
+         issue.
diff --git a/drivers/acpi/nfit/Makefile b/drivers/acpi/nfit/Makefile
new file mode 100644 (file)
index 0000000..a407e76
--- /dev/null
@@ -0,0 +1,3 @@
+obj-$(CONFIG_ACPI_NFIT) := nfit.o
+nfit-y := core.o
+nfit-$(CONFIG_X86_MCE) += mce.o
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
new file mode 100644 (file)
index 0000000..8c234dd
--- /dev/null
@@ -0,0 +1,2784 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/list_sort.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/sysfs.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/acpi.h>
+#include <linux/sort.h>
+#include <linux/pmem.h>
+#include <linux/io.h>
+#include <linux/nd.h>
+#include <asm/cacheflush.h>
+#include "nfit.h"
+
+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <linux/io-64-nonatomic-hi-lo.h>
+
+static bool force_enable_dimms;
+module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
+
+static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
+module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
+
+/* after three payloads of overflow, it's dead jim */
+static unsigned int scrub_overflow_abort = 3;
+module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(scrub_overflow_abort,
+               "Number of times we overflow ARS results before abort");
+
+static bool disable_vendor_specific;
+module_param(disable_vendor_specific, bool, S_IRUGO);
+MODULE_PARM_DESC(disable_vendor_specific,
+               "Limit commands to the publicly specified set\n");
+
+LIST_HEAD(acpi_descs);
+DEFINE_MUTEX(acpi_desc_lock);
+
+static struct workqueue_struct *nfit_wq;
+
+struct nfit_table_prev {
+       struct list_head spas;
+       struct list_head memdevs;
+       struct list_head dcrs;
+       struct list_head bdws;
+       struct list_head idts;
+       struct list_head flushes;
+};
+
+static u8 nfit_uuid[NFIT_UUID_MAX][16];
+
+const u8 *to_nfit_uuid(enum nfit_uuids id)
+{
+       return nfit_uuid[id];
+}
+EXPORT_SYMBOL(to_nfit_uuid);
+
+static struct acpi_nfit_desc *to_acpi_nfit_desc(
+               struct nvdimm_bus_descriptor *nd_desc)
+{
+       return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+       /*
+        * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
+        * acpi_device.
+        */
+       if (!nd_desc->provider_name
+                       || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
+               return NULL;
+
+       return to_acpi_device(acpi_desc->dev);
+}
+
+static int xlat_status(void *buf, unsigned int cmd)
+{
+       struct nd_cmd_clear_error *clear_err;
+       struct nd_cmd_ars_status *ars_status;
+       struct nd_cmd_ars_start *ars_start;
+       struct nd_cmd_ars_cap *ars_cap;
+       u16 flags;
+
+       switch (cmd) {
+       case ND_CMD_ARS_CAP:
+               ars_cap = buf;
+               if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
+                       return -ENOTTY;
+
+               /* Command failed */
+               if (ars_cap->status & 0xffff)
+                       return -EIO;
+
+               /* No supported scan types for this range */
+               flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
+               if ((ars_cap->status >> 16 & flags) == 0)
+                       return -ENOTTY;
+               break;
+       case ND_CMD_ARS_START:
+               ars_start = buf;
+               /* ARS is in progress */
+               if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
+                       return -EBUSY;
+
+               /* Command failed */
+               if (ars_start->status & 0xffff)
+                       return -EIO;
+               break;
+       case ND_CMD_ARS_STATUS:
+               ars_status = buf;
+               /* Command failed */
+               if (ars_status->status & 0xffff)
+                       return -EIO;
+               /* Check extended status (Upper two bytes) */
+               if (ars_status->status == NFIT_ARS_STATUS_DONE)
+                       return 0;
+
+               /* ARS is in progress */
+               if (ars_status->status == NFIT_ARS_STATUS_BUSY)
+                       return -EBUSY;
+
+               /* No ARS performed for the current boot */
+               if (ars_status->status == NFIT_ARS_STATUS_NONE)
+                       return -EAGAIN;
+
+               /*
+                * ARS interrupted, either we overflowed or some other
+                * agent wants the scan to stop.  If we didn't overflow
+                * then just continue with the returned results.
+                */
+               if (ars_status->status == NFIT_ARS_STATUS_INTR) {
+                       if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
+                               return -ENOSPC;
+                       return 0;
+               }
+
+               /* Unknown status */
+               if (ars_status->status >> 16)
+                       return -EIO;
+               break;
+       case ND_CMD_CLEAR_ERROR:
+               clear_err = buf;
+               if (clear_err->status & 0xffff)
+                       return -EIO;
+               if (!clear_err->cleared)
+                       return -EIO;
+               if (clear_err->length > clear_err->cleared)
+                       return clear_err->cleared;
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
+               struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+               unsigned int buf_len, int *cmd_rc)
+{
+       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+       union acpi_object in_obj, in_buf, *out_obj;
+       const struct nd_cmd_desc *desc = NULL;
+       struct device *dev = acpi_desc->dev;
+       struct nd_cmd_pkg *call_pkg = NULL;
+       const char *cmd_name, *dimm_name;
+       unsigned long cmd_mask, dsm_mask;
+       acpi_handle handle;
+       unsigned int func;
+       const u8 *uuid;
+       u32 offset;
+       int rc, i;
+
+       func = cmd;
+       if (cmd == ND_CMD_CALL) {
+               call_pkg = buf;
+               func = call_pkg->nd_command;
+       }
+
+       if (nvdimm) {
+               struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+               struct acpi_device *adev = nfit_mem->adev;
+
+               if (!adev)
+                       return -ENOTTY;
+               if (call_pkg && nfit_mem->family != call_pkg->nd_family)
+                       return -ENOTTY;
+
+               dimm_name = nvdimm_name(nvdimm);
+               cmd_name = nvdimm_cmd_name(cmd);
+               cmd_mask = nvdimm_cmd_mask(nvdimm);
+               dsm_mask = nfit_mem->dsm_mask;
+               desc = nd_cmd_dimm_desc(cmd);
+               uuid = to_nfit_uuid(nfit_mem->family);
+               handle = adev->handle;
+       } else {
+               struct acpi_device *adev = to_acpi_dev(acpi_desc);
+
+               cmd_name = nvdimm_bus_cmd_name(cmd);
+               cmd_mask = nd_desc->cmd_mask;
+               dsm_mask = cmd_mask;
+               desc = nd_cmd_bus_desc(cmd);
+               uuid = to_nfit_uuid(NFIT_DEV_BUS);
+               handle = adev->handle;
+               dimm_name = "bus";
+       }
+
+       if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
+               return -ENOTTY;
+
+       if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
+               return -ENOTTY;
+
+       in_obj.type = ACPI_TYPE_PACKAGE;
+       in_obj.package.count = 1;
+       in_obj.package.elements = &in_buf;
+       in_buf.type = ACPI_TYPE_BUFFER;
+       in_buf.buffer.pointer = buf;
+       in_buf.buffer.length = 0;
+
+       /* libnvdimm has already validated the input envelope */
+       for (i = 0; i < desc->in_num; i++)
+               in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
+                               i, buf);
+
+       if (call_pkg) {
+               /* skip over package wrapper */
+               in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
+               in_buf.buffer.length = call_pkg->nd_size_in;
+       }
+
+       if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+               dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
+                               __func__, dimm_name, cmd, func,
+                               in_buf.buffer.length);
+               print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
+                       in_buf.buffer.pointer,
+                       min_t(u32, 256, in_buf.buffer.length), true);
+       }
+
+       out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
+       if (!out_obj) {
+               dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
+                               cmd_name);
+               return -EINVAL;
+       }
+
+       if (call_pkg) {
+               call_pkg->nd_fw_size = out_obj->buffer.length;
+               memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
+                       out_obj->buffer.pointer,
+                       min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
+
+               ACPI_FREE(out_obj);
+               /*
+                * Need to support FW function w/o known size in advance.
+                * Caller can determine required size based upon nd_fw_size.
+                * If we return an error (like elsewhere) then caller wouldn't
+                * be able to rely upon data returned to make calculation.
+                */
+               return 0;
+       }
+
+       if (out_obj->package.type != ACPI_TYPE_BUFFER) {
+               dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
+                               __func__, dimm_name, cmd_name, out_obj->type);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+               dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
+                               dimm_name, cmd_name, out_obj->buffer.length);
+               print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+                               4, out_obj->buffer.pointer, min_t(u32, 128,
+                                       out_obj->buffer.length), true);
+       }
+
+       for (i = 0, offset = 0; i < desc->out_num; i++) {
+               u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
+                               (u32 *) out_obj->buffer.pointer);
+
+               if (offset + out_size > out_obj->buffer.length) {
+                       dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
+                                       __func__, dimm_name, cmd_name, i);
+                       break;
+               }
+
+               if (in_buf.buffer.length + offset + out_size > buf_len) {
+                       dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
+                                       __func__, dimm_name, cmd_name, i);
+                       rc = -ENXIO;
+                       goto out;
+               }
+               memcpy(buf + in_buf.buffer.length + offset,
+                               out_obj->buffer.pointer + offset, out_size);
+               offset += out_size;
+       }
+       if (offset + in_buf.buffer.length < buf_len) {
+               if (i >= 1) {
+                       /*
+                        * status valid, return the number of bytes left
+                        * unfilled in the output buffer
+                        */
+                       rc = buf_len - offset - in_buf.buffer.length;
+                       if (cmd_rc)
+                               *cmd_rc = xlat_status(buf, cmd);
+               } else {
+                       dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
+                                       __func__, dimm_name, cmd_name, buf_len,
+                                       offset);
+                       rc = -ENXIO;
+               }
+       } else {
+               rc = 0;
+               if (cmd_rc)
+                       *cmd_rc = xlat_status(buf, cmd);
+       }
+
+ out:
+       ACPI_FREE(out_obj);
+
+       return rc;
+}
+
+static const char *spa_type_name(u16 type)
+{
+       static const char *to_name[] = {
+               [NFIT_SPA_VOLATILE] = "volatile",
+               [NFIT_SPA_PM] = "pmem",
+               [NFIT_SPA_DCR] = "dimm-control-region",
+               [NFIT_SPA_BDW] = "block-data-window",
+               [NFIT_SPA_VDISK] = "volatile-disk",
+               [NFIT_SPA_VCD] = "volatile-cd",
+               [NFIT_SPA_PDISK] = "persistent-disk",
+               [NFIT_SPA_PCD] = "persistent-cd",
+
+       };
+
+       if (type > NFIT_SPA_PCD)
+               return "unknown";
+
+       return to_name[type];
+}
+
+int nfit_spa_type(struct acpi_nfit_system_address *spa)
+{
+       int i;
+
+       for (i = 0; i < NFIT_UUID_MAX; i++)
+               if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+                       return i;
+       return -1;
+}
+
+static bool add_spa(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_system_address *spa)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_spa *nfit_spa;
+
+       if (spa->header.length != sizeof(*spa))
+               return false;
+
+       list_for_each_entry(nfit_spa, &prev->spas, list) {
+               if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
+                       list_move_tail(&nfit_spa->list, &acpi_desc->spas);
+                       return true;
+               }
+       }
+
+       nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
+                       GFP_KERNEL);
+       if (!nfit_spa)
+               return false;
+       INIT_LIST_HEAD(&nfit_spa->list);
+       memcpy(nfit_spa->spa, spa, sizeof(*spa));
+       list_add_tail(&nfit_spa->list, &acpi_desc->spas);
+       dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
+                       spa->range_index,
+                       spa_type_name(nfit_spa_type(spa)));
+       return true;
+}
+
+static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_memory_map *memdev)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_memdev *nfit_memdev;
+
+       if (memdev->header.length != sizeof(*memdev))
+               return false;
+
+       list_for_each_entry(nfit_memdev, &prev->memdevs, list)
+               if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
+                       list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
+                       return true;
+               }
+
+       nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev),
+                       GFP_KERNEL);
+       if (!nfit_memdev)
+               return false;
+       INIT_LIST_HEAD(&nfit_memdev->list);
+       memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
+       list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
+       dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
+                       __func__, memdev->device_handle, memdev->range_index,
+                       memdev->region_index);
+       return true;
+}
+
+/*
+ * An implementation may provide a truncated control region if no block windows
+ * are defined.
+ */
+static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr)
+{
+       if (dcr->header.length < offsetof(struct acpi_nfit_control_region,
+                               window_size))
+               return 0;
+       if (dcr->windows)
+               return sizeof(*dcr);
+       return offsetof(struct acpi_nfit_control_region, window_size);
+}
+
+static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_control_region *dcr)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_dcr *nfit_dcr;
+
+       if (!sizeof_dcr(dcr))
+               return false;
+
+       list_for_each_entry(nfit_dcr, &prev->dcrs, list)
+               if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) {
+                       list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
+                       return true;
+               }
+
+       nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr),
+                       GFP_KERNEL);
+       if (!nfit_dcr)
+               return false;
+       INIT_LIST_HEAD(&nfit_dcr->list);
+       memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
+       list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
+       dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
+                       dcr->region_index, dcr->windows);
+       return true;
+}
+
+static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_data_region *bdw)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_bdw *nfit_bdw;
+
+       if (bdw->header.length != sizeof(*bdw))
+               return false;
+       list_for_each_entry(nfit_bdw, &prev->bdws, list)
+               if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
+                       list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
+                       return true;
+               }
+
+       nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw),
+                       GFP_KERNEL);
+       if (!nfit_bdw)
+               return false;
+       INIT_LIST_HEAD(&nfit_bdw->list);
+       memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
+       list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
+       dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
+                       bdw->region_index, bdw->windows);
+       return true;
+}
+
+static size_t sizeof_idt(struct acpi_nfit_interleave *idt)
+{
+       if (idt->header.length < sizeof(*idt))
+               return 0;
+       return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1);
+}
+
+static bool add_idt(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_interleave *idt)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_idt *nfit_idt;
+
+       if (!sizeof_idt(idt))
+               return false;
+
+       list_for_each_entry(nfit_idt, &prev->idts, list) {
+               if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt))
+                       continue;
+
+               if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) {
+                       list_move_tail(&nfit_idt->list, &acpi_desc->idts);
+                       return true;
+               }
+       }
+
+       nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt),
+                       GFP_KERNEL);
+       if (!nfit_idt)
+               return false;
+       INIT_LIST_HEAD(&nfit_idt->list);
+       memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
+       list_add_tail(&nfit_idt->list, &acpi_desc->idts);
+       dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
+                       idt->interleave_index, idt->line_count);
+       return true;
+}
+
+static size_t sizeof_flush(struct acpi_nfit_flush_address *flush)
+{
+       if (flush->header.length < sizeof(*flush))
+               return 0;
+       return sizeof(*flush) + sizeof(u64) * (flush->hint_count - 1);
+}
+
+static bool add_flush(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_flush_address *flush)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_flush *nfit_flush;
+
+       if (!sizeof_flush(flush))
+               return false;
+
+       list_for_each_entry(nfit_flush, &prev->flushes, list) {
+               if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush))
+                       continue;
+
+               if (memcmp(nfit_flush->flush, flush,
+                                       sizeof_flush(flush)) == 0) {
+                       list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
+                       return true;
+               }
+       }
+
+       nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush)
+                       + sizeof_flush(flush), GFP_KERNEL);
+       if (!nfit_flush)
+               return false;
+       INIT_LIST_HEAD(&nfit_flush->list);
+       memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
+       list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
+       dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
+                       flush->device_handle, flush->hint_count);
+       return true;
+}
+
+static void *add_table(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev, void *table, const void *end)
+{
+       struct device *dev = acpi_desc->dev;
+       struct acpi_nfit_header *hdr;
+       void *err = ERR_PTR(-ENOMEM);
+
+       if (table >= end)
+               return NULL;
+
+       hdr = table;
+       if (!hdr->length) {
+               dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
+                       hdr->type);
+               return NULL;
+       }
+
+       switch (hdr->type) {
+       case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
+               if (!add_spa(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_MEMORY_MAP:
+               if (!add_memdev(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_CONTROL_REGION:
+               if (!add_dcr(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_DATA_REGION:
+               if (!add_bdw(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_INTERLEAVE:
+               if (!add_idt(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
+               if (!add_flush(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_SMBIOS:
+               dev_dbg(dev, "%s: smbios\n", __func__);
+               break;
+       default:
+               dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
+               break;
+       }
+
+       return table + hdr->length;
+}
+
+static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_mem *nfit_mem)
+{
+       u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+       u16 dcr = nfit_mem->dcr->region_index;
+       struct nfit_spa *nfit_spa;
+
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               u16 range_index = nfit_spa->spa->range_index;
+               int type = nfit_spa_type(nfit_spa->spa);
+               struct nfit_memdev *nfit_memdev;
+
+               if (type != NFIT_SPA_BDW)
+                       continue;
+
+               list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+                       if (nfit_memdev->memdev->range_index != range_index)
+                               continue;
+                       if (nfit_memdev->memdev->device_handle != device_handle)
+                               continue;
+                       if (nfit_memdev->memdev->region_index != dcr)
+                               continue;
+
+                       nfit_mem->spa_bdw = nfit_spa->spa;
+                       return;
+               }
+       }
+
+       dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
+                       nfit_mem->spa_dcr->range_index);
+       nfit_mem->bdw = NULL;
+}
+
+static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
+{
+       u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
+       struct nfit_memdev *nfit_memdev;
+       struct nfit_bdw *nfit_bdw;
+       struct nfit_idt *nfit_idt;
+       u16 idt_idx, range_index;
+
+       list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
+               if (nfit_bdw->bdw->region_index != dcr)
+                       continue;
+               nfit_mem->bdw = nfit_bdw->bdw;
+               break;
+       }
+
+       if (!nfit_mem->bdw)
+               return;
+
+       nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
+
+       if (!nfit_mem->spa_bdw)
+               return;
+
+       range_index = nfit_mem->spa_bdw->range_index;
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+               if (nfit_memdev->memdev->range_index != range_index ||
+                               nfit_memdev->memdev->region_index != dcr)
+                       continue;
+               nfit_mem->memdev_bdw = nfit_memdev->memdev;
+               idt_idx = nfit_memdev->memdev->interleave_index;
+               list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+                       if (nfit_idt->idt->interleave_index != idt_idx)
+                               continue;
+                       nfit_mem->idt_bdw = nfit_idt->idt;
+                       break;
+               }
+               break;
+       }
+}
+
+static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
+               struct acpi_nfit_system_address *spa)
+{
+       struct nfit_mem *nfit_mem, *found;
+       struct nfit_memdev *nfit_memdev;
+       int type = nfit_spa_type(spa);
+
+       switch (type) {
+       case NFIT_SPA_DCR:
+       case NFIT_SPA_PM:
+               break;
+       default:
+               return 0;
+       }
+
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+               struct nfit_flush *nfit_flush;
+               struct nfit_dcr *nfit_dcr;
+               u32 device_handle;
+               u16 dcr;
+
+               if (nfit_memdev->memdev->range_index != spa->range_index)
+                       continue;
+               found = NULL;
+               dcr = nfit_memdev->memdev->region_index;
+               device_handle = nfit_memdev->memdev->device_handle;
+               list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+                       if (__to_nfit_memdev(nfit_mem)->device_handle
+                                       == device_handle) {
+                               found = nfit_mem;
+                               break;
+                       }
+
+               if (found)
+                       nfit_mem = found;
+               else {
+                       nfit_mem = devm_kzalloc(acpi_desc->dev,
+                                       sizeof(*nfit_mem), GFP_KERNEL);
+                       if (!nfit_mem)
+                               return -ENOMEM;
+                       INIT_LIST_HEAD(&nfit_mem->list);
+                       nfit_mem->acpi_desc = acpi_desc;
+                       list_add(&nfit_mem->list, &acpi_desc->dimms);
+               }
+
+               list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+                       if (nfit_dcr->dcr->region_index != dcr)
+                               continue;
+                       /*
+                        * Record the control region for the dimm.  For
+                        * the ACPI 6.1 case, where there are separate
+                        * control regions for the pmem vs blk
+                        * interfaces, be sure to record the extended
+                        * blk details.
+                        */
+                       if (!nfit_mem->dcr)
+                               nfit_mem->dcr = nfit_dcr->dcr;
+                       else if (nfit_mem->dcr->windows == 0
+                                       && nfit_dcr->dcr->windows)
+                               nfit_mem->dcr = nfit_dcr->dcr;
+                       break;
+               }
+
+               list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
+                       struct acpi_nfit_flush_address *flush;
+                       u16 i;
+
+                       if (nfit_flush->flush->device_handle != device_handle)
+                               continue;
+                       nfit_mem->nfit_flush = nfit_flush;
+                       flush = nfit_flush->flush;
+                       nfit_mem->flush_wpq = devm_kzalloc(acpi_desc->dev,
+                                       flush->hint_count
+                                       * sizeof(struct resource), GFP_KERNEL);
+                       if (!nfit_mem->flush_wpq)
+                               return -ENOMEM;
+                       for (i = 0; i < flush->hint_count; i++) {
+                               struct resource *res = &nfit_mem->flush_wpq[i];
+
+                               res->start = flush->hint_address[i];
+                               res->end = res->start + 8 - 1;
+                       }
+                       break;
+               }
+
+               if (dcr && !nfit_mem->dcr) {
+                       dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
+                                       spa->range_index, dcr);
+                       return -ENODEV;
+               }
+
+               if (type == NFIT_SPA_DCR) {
+                       struct nfit_idt *nfit_idt;
+                       u16 idt_idx;
+
+                       /* multiple dimms may share a SPA when interleaved */
+                       nfit_mem->spa_dcr = spa;
+                       nfit_mem->memdev_dcr = nfit_memdev->memdev;
+                       idt_idx = nfit_memdev->memdev->interleave_index;
+                       list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+                               if (nfit_idt->idt->interleave_index != idt_idx)
+                                       continue;
+                               nfit_mem->idt_dcr = nfit_idt->idt;
+                               break;
+                       }
+                       nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
+               } else {
+                       /*
+                        * A single dimm may belong to multiple SPA-PM
+                        * ranges, record at least one in addition to
+                        * any SPA-DCR range.
+                        */
+                       nfit_mem->memdev_pmem = nfit_memdev->memdev;
+               }
+       }
+
+       return 0;
+}
+
+static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
+{
+       struct nfit_mem *a = container_of(_a, typeof(*a), list);
+       struct nfit_mem *b = container_of(_b, typeof(*b), list);
+       u32 handleA, handleB;
+
+       handleA = __to_nfit_memdev(a)->device_handle;
+       handleB = __to_nfit_memdev(b)->device_handle;
+       if (handleA < handleB)
+               return -1;
+       else if (handleA > handleB)
+               return 1;
+       return 0;
+}
+
+static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nfit_spa *nfit_spa;
+
+       /*
+        * For each SPA-DCR or SPA-PMEM address range find its
+        * corresponding MEMDEV(s).  From each MEMDEV find the
+        * corresponding DCR.  Then, if we're operating on a SPA-DCR,
+        * try to find a SPA-BDW and a corresponding BDW that references
+        * the DCR.  Throw it all into an nfit_mem object.  Note, that
+        * BDWs are optional.
+        */
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               int rc;
+
+               rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
+               if (rc)
+                       return rc;
+       }
+
+       list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
+
+       return 0;
+}
+
+static ssize_t revision_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+       return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
+}
+static DEVICE_ATTR_RO(revision);
+
+/*
+ * This shows the number of full Address Range Scrubs that have been
+ * completed since driver load time. Userspace can wait on this using
+ * select/poll etc. A '+' at the end indicates an ARS is in progress
+ */
+static ssize_t scrub_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm_bus_descriptor *nd_desc;
+       ssize_t rc = -ENXIO;
+
+       device_lock(dev);
+       nd_desc = dev_get_drvdata(dev);
+       if (nd_desc) {
+               struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+               rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
+                               (work_busy(&acpi_desc->work)) ? "+\n" : "\n");
+       }
+       device_unlock(dev);
+       return rc;
+}
+
+static ssize_t scrub_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t size)
+{
+       struct nvdimm_bus_descriptor *nd_desc;
+       ssize_t rc;
+       long val;
+
+       rc = kstrtol(buf, 0, &val);
+       if (rc)
+               return rc;
+       if (val != 1)
+               return -EINVAL;
+
+       device_lock(dev);
+       nd_desc = dev_get_drvdata(dev);
+       if (nd_desc) {
+               struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+               rc = acpi_nfit_ars_rescan(acpi_desc);
+       }
+       device_unlock(dev);
+       if (rc)
+               return rc;
+       return size;
+}
+static DEVICE_ATTR_RW(scrub);
+
+static bool ars_supported(struct nvdimm_bus *nvdimm_bus)
+{
+       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+       const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START
+               | 1 << ND_CMD_ARS_STATUS;
+
+       return (nd_desc->cmd_mask & mask) == mask;
+}
+
+static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+       if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus))
+               return 0;
+       return a->mode;
+}
+
+static struct attribute *acpi_nfit_attributes[] = {
+       &dev_attr_revision.attr,
+       &dev_attr_scrub.attr,
+       NULL,
+};
+
+static struct attribute_group acpi_nfit_attribute_group = {
+       .name = "nfit",
+       .attrs = acpi_nfit_attributes,
+       .is_visible = nfit_visible,
+};
+
+static const struct attribute_group *acpi_nfit_attribute_groups[] = {
+       &nvdimm_bus_attribute_group,
+       &acpi_nfit_attribute_group,
+       NULL,
+};
+
+static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+       return __to_nfit_memdev(nfit_mem);
+}
+
+static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+       return nfit_mem->dcr;
+}
+
+static ssize_t handle_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+       return sprintf(buf, "%#x\n", memdev->device_handle);
+}
+static DEVICE_ATTR_RO(handle);
+
+static ssize_t phys_id_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+       return sprintf(buf, "%#x\n", memdev->physical_id);
+}
+static DEVICE_ATTR_RO(phys_id);
+
+static ssize_t vendor_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t rev_id_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
+}
+static DEVICE_ATTR_RO(rev_id);
+
+static ssize_t device_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t subsystem_vendor_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
+}
+static DEVICE_ATTR_RO(subsystem_vendor);
+
+static ssize_t subsystem_rev_id_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n",
+                       be16_to_cpu(dcr->subsystem_revision_id));
+}
+static DEVICE_ATTR_RO(subsystem_rev_id);
+
+static ssize_t subsystem_device_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
+}
+static DEVICE_ATTR_RO(subsystem_device);
+
+static int num_nvdimm_formats(struct nvdimm *nvdimm)
+{
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+       int formats = 0;
+
+       if (nfit_mem->memdev_pmem)
+               formats++;
+       if (nfit_mem->memdev_bdw)
+               formats++;
+       return formats;
+}
+
+static ssize_t format_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
+}
+static DEVICE_ATTR_RO(format);
+
+static ssize_t format1_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       u32 handle;
+       ssize_t rc = -ENXIO;
+       struct nfit_mem *nfit_mem;
+       struct nfit_memdev *nfit_memdev;
+       struct acpi_nfit_desc *acpi_desc;
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       nfit_mem = nvdimm_provider_data(nvdimm);
+       acpi_desc = nfit_mem->acpi_desc;
+       handle = to_nfit_memdev(dev)->device_handle;
+
+       /* assumes DIMMs have at most 2 published interface codes */
+       mutex_lock(&acpi_desc->init_mutex);
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+               struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
+               struct nfit_dcr *nfit_dcr;
+
+               if (memdev->device_handle != handle)
+                       continue;
+
+               list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+                       if (nfit_dcr->dcr->region_index != memdev->region_index)
+                               continue;
+                       if (nfit_dcr->dcr->code == dcr->code)
+                               continue;
+                       rc = sprintf(buf, "0x%04x\n",
+                                       le16_to_cpu(nfit_dcr->dcr->code));
+                       break;
+               }
+               if (rc != ENXIO)
+                       break;
+       }
+       mutex_unlock(&acpi_desc->init_mutex);
+       return rc;
+}
+static DEVICE_ATTR_RO(format1);
+
+static ssize_t formats_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+
+       return sprintf(buf, "%d\n", num_nvdimm_formats(nvdimm));
+}
+static DEVICE_ATTR_RO(formats);
+
+static ssize_t serial_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
+}
+static DEVICE_ATTR_RO(serial);
+
+static ssize_t family_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+       if (nfit_mem->family < 0)
+               return -ENXIO;
+       return sprintf(buf, "%d\n", nfit_mem->family);
+}
+static DEVICE_ATTR_RO(family);
+
+static ssize_t dsm_mask_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+       if (nfit_mem->family < 0)
+               return -ENXIO;
+       return sprintf(buf, "%#lx\n", nfit_mem->dsm_mask);
+}
+static DEVICE_ATTR_RO(dsm_mask);
+
+static ssize_t flags_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       u16 flags = to_nfit_memdev(dev)->flags;
+
+       return sprintf(buf, "%s%s%s%s%s\n",
+               flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
+               flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
+               flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
+               flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
+               flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "");
+}
+static DEVICE_ATTR_RO(flags);
+
+static ssize_t id_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
+               return sprintf(buf, "%04x-%02x-%04x-%08x\n",
+                               be16_to_cpu(dcr->vendor_id),
+                               dcr->manufacturing_location,
+                               be16_to_cpu(dcr->manufacturing_date),
+                               be32_to_cpu(dcr->serial_number));
+       else
+               return sprintf(buf, "%04x-%08x\n",
+                               be16_to_cpu(dcr->vendor_id),
+                               be32_to_cpu(dcr->serial_number));
+}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *acpi_nfit_dimm_attributes[] = {
+       &dev_attr_handle.attr,
+       &dev_attr_phys_id.attr,
+       &dev_attr_vendor.attr,
+       &dev_attr_device.attr,
+       &dev_attr_rev_id.attr,
+       &dev_attr_subsystem_vendor.attr,
+       &dev_attr_subsystem_device.attr,
+       &dev_attr_subsystem_rev_id.attr,
+       &dev_attr_format.attr,
+       &dev_attr_formats.attr,
+       &dev_attr_format1.attr,
+       &dev_attr_serial.attr,
+       &dev_attr_flags.attr,
+       &dev_attr_id.attr,
+       &dev_attr_family.attr,
+       &dev_attr_dsm_mask.attr,
+       NULL,
+};
+
+static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
+               struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+
+       if (!to_nfit_dcr(dev))
+               return 0;
+       if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
+               return 0;
+       return a->mode;
+}
+
+static struct attribute_group acpi_nfit_dimm_attribute_group = {
+       .name = "nfit",
+       .attrs = acpi_nfit_dimm_attributes,
+       .is_visible = acpi_nfit_dimm_attr_visible,
+};
+
+static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
+       &nvdimm_attribute_group,
+       &nd_device_attribute_group,
+       &acpi_nfit_dimm_attribute_group,
+       NULL,
+};
+
+static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
+               u32 device_handle)
+{
+       struct nfit_mem *nfit_mem;
+
+       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+               if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
+                       return nfit_mem->nvdimm;
+
+       return NULL;
+}
+
+static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_mem *nfit_mem, u32 device_handle)
+{
+       struct acpi_device *adev, *adev_dimm;
+       struct device *dev = acpi_desc->dev;
+       unsigned long dsm_mask;
+       const u8 *uuid;
+       int i;
+
+       /* nfit test assumes 1:1 relationship between commands and dsms */
+       nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
+       nfit_mem->family = NVDIMM_FAMILY_INTEL;
+       adev = to_acpi_dev(acpi_desc);
+       if (!adev)
+               return 0;
+
+       adev_dimm = acpi_find_child_device(adev, device_handle, false);
+       nfit_mem->adev = adev_dimm;
+       if (!adev_dimm) {
+               dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
+                               device_handle);
+               return force_enable_dimms ? 0 : -ENODEV;
+       }
+
+       /*
+        * Until standardization materializes we need to consider 4
+        * different command sets.  Note, that checking for function0 (bit0)
+        * tells us if any commands are reachable through this uuid.
+        */
+       for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
+               if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
+                       break;
+
+       /* limit the supported commands to those that are publicly documented */
+       nfit_mem->family = i;
+       if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
+               dsm_mask = 0x3fe;
+               if (disable_vendor_specific)
+                       dsm_mask &= ~(1 << ND_CMD_VENDOR);
+       } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) {
+               dsm_mask = 0x1c3c76;
+       } else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
+               dsm_mask = 0x1fe;
+               if (disable_vendor_specific)
+                       dsm_mask &= ~(1 << 8);
+       } else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
+               dsm_mask = 0xffffffff;
+       } else {
+               dev_dbg(dev, "unknown dimm command family\n");
+               nfit_mem->family = -1;
+               /* DSMs are optional, continue loading the driver... */
+               return 0;
+       }
+
+       uuid = to_nfit_uuid(nfit_mem->family);
+       for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
+               if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+                       set_bit(i, &nfit_mem->dsm_mask);
+
+       return 0;
+}
+
+static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nfit_mem *nfit_mem;
+       int dimm_count = 0;
+
+       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+               struct acpi_nfit_flush_address *flush;
+               unsigned long flags = 0, cmd_mask;
+               struct nvdimm *nvdimm;
+               u32 device_handle;
+               u16 mem_flags;
+               int rc;
+
+               device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+               nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
+               if (nvdimm) {
+                       dimm_count++;
+                       continue;
+               }
+
+               if (nfit_mem->bdw && nfit_mem->memdev_pmem)
+                       flags |= NDD_ALIASING;
+
+               mem_flags = __to_nfit_memdev(nfit_mem)->flags;
+               if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
+                       flags |= NDD_UNARMED;
+
+               rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
+               if (rc)
+                       continue;
+
+               /*
+                * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
+                * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
+                * userspace interface.
+                */
+               cmd_mask = 1UL << ND_CMD_CALL;
+               if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
+                       cmd_mask |= nfit_mem->dsm_mask;
+
+               flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
+                       : NULL;
+               nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
+                               acpi_nfit_dimm_attribute_groups,
+                               flags, cmd_mask, flush ? flush->hint_count : 0,
+                               nfit_mem->flush_wpq);
+               if (!nvdimm)
+                       return -ENOMEM;
+
+               nfit_mem->nvdimm = nvdimm;
+               dimm_count++;
+
+               if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
+                       continue;
+
+               dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n",
+                               nvdimm_name(nvdimm),
+                 mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
+                 mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
+                 mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
+                 mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "");
+
+       }
+
+       return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
+}
+
+static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+       const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+       struct acpi_device *adev;
+       int i;
+
+       nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
+       adev = to_acpi_dev(acpi_desc);
+       if (!adev)
+               return;
+
+       for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
+               if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+                       set_bit(i, &nd_desc->cmd_mask);
+}
+
+static ssize_t range_index_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_region *nd_region = to_nd_region(dev);
+       struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
+
+       return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
+}
+static DEVICE_ATTR_RO(range_index);
+
+static struct attribute *acpi_nfit_region_attributes[] = {
+       &dev_attr_range_index.attr,
+       NULL,
+};
+
+static struct attribute_group acpi_nfit_region_attribute_group = {
+       .name = "nfit",
+       .attrs = acpi_nfit_region_attributes,
+};
+
+static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
+       &nd_region_attribute_group,
+       &nd_mapping_attribute_group,
+       &nd_device_attribute_group,
+       &nd_numa_attribute_group,
+       &acpi_nfit_region_attribute_group,
+       NULL,
+};
+
+/* enough info to uniquely specify an interleave set */
+struct nfit_set_info {
+       struct nfit_set_info_map {
+               u64 region_offset;
+               u32 serial_number;
+               u32 pad;
+       } mapping[0];
+};
+
+static size_t sizeof_nfit_set_info(int num_mappings)
+{
+       return sizeof(struct nfit_set_info)
+               + num_mappings * sizeof(struct nfit_set_info_map);
+}
+
+static int cmp_map(const void *m0, const void *m1)
+{
+       const struct nfit_set_info_map *map0 = m0;
+       const struct nfit_set_info_map *map1 = m1;
+
+       return memcmp(&map0->region_offset, &map1->region_offset,
+                       sizeof(u64));
+}
+
+/* Retrieve the nth entry referencing this spa */
+static struct acpi_nfit_memory_map *memdev_from_spa(
+               struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
+{
+       struct nfit_memdev *nfit_memdev;
+
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
+               if (nfit_memdev->memdev->range_index == range_index)
+                       if (n-- == 0)
+                               return nfit_memdev->memdev;
+       return NULL;
+}
+
+static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
+               struct nd_region_desc *ndr_desc,
+               struct acpi_nfit_system_address *spa)
+{
+       int i, spa_type = nfit_spa_type(spa);
+       struct device *dev = acpi_desc->dev;
+       struct nd_interleave_set *nd_set;
+       u16 nr = ndr_desc->num_mappings;
+       struct nfit_set_info *info;
+
+       if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
+               /* pass */;
+       else
+               return 0;
+
+       nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
+       if (!nd_set)
+               return -ENOMEM;
+
+       info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+       for (i = 0; i < nr; i++) {
+               struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+               struct nfit_set_info_map *map = &info->mapping[i];
+               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+               struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+               struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
+                               spa->range_index, i);
+
+               if (!memdev || !nfit_mem->dcr) {
+                       dev_err(dev, "%s: failed to find DCR\n", __func__);
+                       return -ENODEV;
+               }
+
+               map->region_offset = memdev->region_offset;
+               map->serial_number = nfit_mem->dcr->serial_number;
+       }
+
+       sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
+                       cmp_map, NULL);
+       nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+       ndr_desc->nd_set = nd_set;
+       devm_kfree(dev, info);
+
+       return 0;
+}
+
+static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
+{
+       struct acpi_nfit_interleave *idt = mmio->idt;
+       u32 sub_line_offset, line_index, line_offset;
+       u64 line_no, table_skip_count, table_offset;
+
+       line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
+       table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
+       line_offset = idt->line_offset[line_index]
+               * mmio->line_size;
+       table_offset = table_skip_count * mmio->table_size;
+
+       return mmio->base_offset + line_offset + table_offset + sub_line_offset;
+}
+
+static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
+{
+       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+       u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+
+       if (mmio->num_lines)
+               offset = to_interleave_offset(offset, mmio);
+
+       return readl(mmio->addr.base + offset);
+}
+
+static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
+               resource_size_t dpa, unsigned int len, unsigned int write)
+{
+       u64 cmd, offset;
+       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+
+       enum {
+               BCW_OFFSET_MASK = (1ULL << 48)-1,
+               BCW_LEN_SHIFT = 48,
+               BCW_LEN_MASK = (1ULL << 8) - 1,
+               BCW_CMD_SHIFT = 56,
+       };
+
+       cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
+       len = len >> L1_CACHE_SHIFT;
+       cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
+       cmd |= ((u64) write) << BCW_CMD_SHIFT;
+
+       offset = nfit_blk->cmd_offset + mmio->size * bw;
+       if (mmio->num_lines)
+               offset = to_interleave_offset(offset, mmio);
+
+       writeq(cmd, mmio->addr.base + offset);
+       nvdimm_flush(nfit_blk->nd_region);
+
+       if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
+               readq(mmio->addr.base + offset);
+}
+
+static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
+               resource_size_t dpa, void *iobuf, size_t len, int rw,
+               unsigned int lane)
+{
+       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+       unsigned int copied = 0;
+       u64 base_offset;
+       int rc;
+
+       base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
+               + lane * mmio->size;
+       write_blk_ctl(nfit_blk, lane, dpa, len, rw);
+       while (len) {
+               unsigned int c;
+               u64 offset;
+
+               if (mmio->num_lines) {
+                       u32 line_offset;
+
+                       offset = to_interleave_offset(base_offset + copied,
+                                       mmio);
+                       div_u64_rem(offset, mmio->line_size, &line_offset);
+                       c = min_t(size_t, len, mmio->line_size - line_offset);
+               } else {
+                       offset = base_offset + nfit_blk->bdw_offset;
+                       c = len;
+               }
+
+               if (rw)
+                       memcpy_to_pmem(mmio->addr.aperture + offset,
+                                       iobuf + copied, c);
+               else {
+                       if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
+                               mmio_flush_range((void __force *)
+                                       mmio->addr.aperture + offset, c);
+
+                       memcpy_from_pmem(iobuf + copied,
+                                       mmio->addr.aperture + offset, c);
+               }
+
+               copied += c;
+               len -= c;
+       }
+
+       if (rw)
+               nvdimm_flush(nfit_blk->nd_region);
+
+       rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
+       return rc;
+}
+
+static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
+               resource_size_t dpa, void *iobuf, u64 len, int rw)
+{
+       struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
+       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+       struct nd_region *nd_region = nfit_blk->nd_region;
+       unsigned int lane, copied = 0;
+       int rc = 0;
+
+       lane = nd_region_acquire_lane(nd_region);
+       while (len) {
+               u64 c = min(len, mmio->size);
+
+               rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
+                               iobuf + copied, c, rw, lane);
+               if (rc)
+                       break;
+
+               copied += c;
+               len -= c;
+       }
+       nd_region_release_lane(nd_region, lane);
+
+       return rc;
+}
+
+static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
+               struct acpi_nfit_interleave *idt, u16 interleave_ways)
+{
+       if (idt) {
+               mmio->num_lines = idt->line_count;
+               mmio->line_size = idt->line_size;
+               if (interleave_ways == 0)
+                       return -ENXIO;
+               mmio->table_size = mmio->num_lines * interleave_ways
+                       * mmio->line_size;
+       }
+
+       return 0;
+}
+
+static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
+               struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
+{
+       struct nd_cmd_dimm_flags flags;
+       int rc;
+
+       memset(&flags, 0, sizeof(flags));
+       rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
+                       sizeof(flags), NULL);
+
+       if (rc >= 0 && flags.status == 0)
+               nfit_blk->dimm_flags = flags.flags;
+       else if (rc == -ENOTTY) {
+               /* fall back to a conservative default */
+               nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
+               rc = 0;
+       } else
+               rc = -ENXIO;
+
+       return rc;
+}
+
+static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
+               struct device *dev)
+{
+       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+       struct nd_blk_region *ndbr = to_nd_blk_region(dev);
+       struct nfit_blk_mmio *mmio;
+       struct nfit_blk *nfit_blk;
+       struct nfit_mem *nfit_mem;
+       struct nvdimm *nvdimm;
+       int rc;
+
+       nvdimm = nd_blk_region_to_dimm(ndbr);
+       nfit_mem = nvdimm_provider_data(nvdimm);
+       if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
+               dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
+                               nfit_mem ? "" : " nfit_mem",
+                               (nfit_mem && nfit_mem->dcr) ? "" : " dcr",
+                               (nfit_mem && nfit_mem->bdw) ? "" : " bdw");
+               return -ENXIO;
+       }
+
+       nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
+       if (!nfit_blk)
+               return -ENOMEM;
+       nd_blk_region_set_provider_data(ndbr, nfit_blk);
+       nfit_blk->nd_region = to_nd_region(dev);
+
+       /* map block aperture memory */
+       nfit_blk->bdw_offset = nfit_mem->bdw->offset;
+       mmio = &nfit_blk->mmio[BDW];
+       mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
+                        nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM);
+       if (!mmio->addr.base) {
+               dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
+                               nvdimm_name(nvdimm));
+               return -ENOMEM;
+       }
+       mmio->size = nfit_mem->bdw->size;
+       mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
+       mmio->idt = nfit_mem->idt_bdw;
+       mmio->spa = nfit_mem->spa_bdw;
+       rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
+                       nfit_mem->memdev_bdw->interleave_ways);
+       if (rc) {
+               dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
+                               __func__, nvdimm_name(nvdimm));
+               return rc;
+       }
+
+       /* map block control memory */
+       nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
+       nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
+       mmio = &nfit_blk->mmio[DCR];
+       mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
+                       nfit_mem->spa_dcr->length);
+       if (!mmio->addr.base) {
+               dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
+                               nvdimm_name(nvdimm));
+               return -ENOMEM;
+       }
+       mmio->size = nfit_mem->dcr->window_size;
+       mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
+       mmio->idt = nfit_mem->idt_dcr;
+       mmio->spa = nfit_mem->spa_dcr;
+       rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
+                       nfit_mem->memdev_dcr->interleave_ways);
+       if (rc) {
+               dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
+                               __func__, nvdimm_name(nvdimm));
+               return rc;
+       }
+
+       rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
+       if (rc < 0) {
+               dev_dbg(dev, "%s: %s failed get DIMM flags\n",
+                               __func__, nvdimm_name(nvdimm));
+               return rc;
+       }
+
+       if (nvdimm_has_flush(nfit_blk->nd_region) < 0)
+               dev_warn(dev, "unable to guarantee persistence of writes\n");
+
+       if (mmio->line_size == 0)
+               return 0;
+
+       if ((u32) nfit_blk->cmd_offset % mmio->line_size
+                       + 8 > mmio->line_size) {
+               dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
+               return -ENXIO;
+       } else if ((u32) nfit_blk->stat_offset % mmio->line_size
+                       + 8 > mmio->line_size) {
+               dev_dbg(dev, "stat_offset crosses interleave boundary\n");
+               return -ENXIO;
+       }
+
+       return 0;
+}
+
+static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
+               struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
+{
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       int cmd_rc, rc;
+
+       cmd->address = spa->address;
+       cmd->length = spa->length;
+       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
+                       sizeof(*cmd), &cmd_rc);
+       if (rc < 0)
+               return rc;
+       return cmd_rc;
+}
+
+static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
+{
+       int rc;
+       int cmd_rc;
+       struct nd_cmd_ars_start ars_start;
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+       memset(&ars_start, 0, sizeof(ars_start));
+       ars_start.address = spa->address;
+       ars_start.length = spa->length;
+       if (nfit_spa_type(spa) == NFIT_SPA_PM)
+               ars_start.type = ND_ARS_PERSISTENT;
+       else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
+               ars_start.type = ND_ARS_VOLATILE;
+       else
+               return -ENOTTY;
+
+       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
+                       sizeof(ars_start), &cmd_rc);
+
+       if (rc < 0)
+               return rc;
+       return cmd_rc;
+}
+
+static int ars_continue(struct acpi_nfit_desc *acpi_desc)
+{
+       int rc, cmd_rc;
+       struct nd_cmd_ars_start ars_start;
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+
+       memset(&ars_start, 0, sizeof(ars_start));
+       ars_start.address = ars_status->restart_address;
+       ars_start.length = ars_status->restart_length;
+       ars_start.type = ars_status->type;
+       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
+                       sizeof(ars_start), &cmd_rc);
+       if (rc < 0)
+               return rc;
+       return cmd_rc;
+}
+
+static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+       int rc, cmd_rc;
+
+       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
+                       acpi_desc->ars_status_size, &cmd_rc);
+       if (rc < 0)
+               return rc;
+       return cmd_rc;
+}
+
+static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
+               struct nd_cmd_ars_status *ars_status)
+{
+       int rc;
+       u32 i;
+
+       for (i = 0; i < ars_status->num_records; i++) {
+               rc = nvdimm_bus_add_poison(nvdimm_bus,
+                               ars_status->records[i].err_address,
+                               ars_status->records[i].length);
+               if (rc)
+                       return rc;
+       }
+
+       return 0;
+}
+
+static void acpi_nfit_remove_resource(void *data)
+{
+       struct resource *res = data;
+
+       remove_resource(res);
+}
+
+static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
+               struct nd_region_desc *ndr_desc)
+{
+       struct resource *res, *nd_res = ndr_desc->res;
+       int is_pmem, ret;
+
+       /* No operation if the region is already registered as PMEM */
+       is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
+                               IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
+       if (is_pmem == REGION_INTERSECTS)
+               return 0;
+
+       res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
+       if (!res)
+               return -ENOMEM;
+
+       res->name = "Persistent Memory";
+       res->start = nd_res->start;
+       res->end = nd_res->end;
+       res->flags = IORESOURCE_MEM;
+       res->desc = IORES_DESC_PERSISTENT_MEMORY;
+
+       ret = insert_resource(&iomem_resource, res);
+       if (ret)
+               return ret;
+
+       ret = devm_add_action_or_reset(acpi_desc->dev,
+                                       acpi_nfit_remove_resource,
+                                       res);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
+               struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
+               struct acpi_nfit_memory_map *memdev,
+               struct nfit_spa *nfit_spa)
+{
+       struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
+                       memdev->device_handle);
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       struct nd_blk_region_desc *ndbr_desc;
+       struct nfit_mem *nfit_mem;
+       int blk_valid = 0;
+
+       if (!nvdimm) {
+               dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
+                               spa->range_index, memdev->device_handle);
+               return -ENODEV;
+       }
+
+       nd_mapping->nvdimm = nvdimm;
+       switch (nfit_spa_type(spa)) {
+       case NFIT_SPA_PM:
+       case NFIT_SPA_VOLATILE:
+               nd_mapping->start = memdev->address;
+               nd_mapping->size = memdev->region_size;
+               break;
+       case NFIT_SPA_DCR:
+               nfit_mem = nvdimm_provider_data(nvdimm);
+               if (!nfit_mem || !nfit_mem->bdw) {
+                       dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
+                                       spa->range_index, nvdimm_name(nvdimm));
+               } else {
+                       nd_mapping->size = nfit_mem->bdw->capacity;
+                       nd_mapping->start = nfit_mem->bdw->start_address;
+                       ndr_desc->num_lanes = nfit_mem->bdw->windows;
+                       blk_valid = 1;
+               }
+
+               ndr_desc->nd_mapping = nd_mapping;
+               ndr_desc->num_mappings = blk_valid;
+               ndbr_desc = to_blk_region_desc(ndr_desc);
+               ndbr_desc->enable = acpi_nfit_blk_region_enable;
+               ndbr_desc->do_io = acpi_desc->blk_do_io;
+               nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
+                               ndr_desc);
+               if (!nfit_spa->nd_region)
+                       return -ENOMEM;
+               break;
+       }
+
+       return 0;
+}
+
+static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
+{
+       return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
+               nfit_spa_type(spa) == NFIT_SPA_VCD   ||
+               nfit_spa_type(spa) == NFIT_SPA_PDISK ||
+               nfit_spa_type(spa) == NFIT_SPA_PCD);
+}
+
+static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_spa *nfit_spa)
+{
+       static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       struct nd_blk_region_desc ndbr_desc;
+       struct nd_region_desc *ndr_desc;
+       struct nfit_memdev *nfit_memdev;
+       struct nvdimm_bus *nvdimm_bus;
+       struct resource res;
+       int count = 0, rc;
+
+       if (nfit_spa->nd_region)
+               return 0;
+
+       if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
+               dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
+                               __func__);
+               return 0;
+       }
+
+       memset(&res, 0, sizeof(res));
+       memset(&nd_mappings, 0, sizeof(nd_mappings));
+       memset(&ndbr_desc, 0, sizeof(ndbr_desc));
+       res.start = spa->address;
+       res.end = res.start + spa->length - 1;
+       ndr_desc = &ndbr_desc.ndr_desc;
+       ndr_desc->res = &res;
+       ndr_desc->provider_data = nfit_spa;
+       ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
+       if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
+               ndr_desc->numa_node = acpi_map_pxm_to_online_node(
+                                               spa->proximity_domain);
+       else
+               ndr_desc->numa_node = NUMA_NO_NODE;
+
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+               struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
+               struct nd_mapping *nd_mapping;
+
+               if (memdev->range_index != spa->range_index)
+                       continue;
+               if (count >= ND_MAX_MAPPINGS) {
+                       dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
+                                       spa->range_index, ND_MAX_MAPPINGS);
+                       return -ENXIO;
+               }
+               nd_mapping = &nd_mappings[count++];
+               rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
+                               memdev, nfit_spa);
+               if (rc)
+                       goto out;
+       }
+
+       ndr_desc->nd_mapping = nd_mappings;
+       ndr_desc->num_mappings = count;
+       rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
+       if (rc)
+               goto out;
+
+       nvdimm_bus = acpi_desc->nvdimm_bus;
+       if (nfit_spa_type(spa) == NFIT_SPA_PM) {
+               rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
+               if (rc) {
+                       dev_warn(acpi_desc->dev,
+                               "failed to insert pmem resource to iomem: %d\n",
+                               rc);
+                       goto out;
+               }
+
+               nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
+                               ndr_desc);
+               if (!nfit_spa->nd_region)
+                       rc = -ENOMEM;
+       } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
+               nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
+                               ndr_desc);
+               if (!nfit_spa->nd_region)
+                       rc = -ENOMEM;
+       } else if (nfit_spa_is_virtual(spa)) {
+               nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
+                               ndr_desc);
+               if (!nfit_spa->nd_region)
+                       rc = -ENOMEM;
+       }
+
+ out:
+       if (rc)
+               dev_err(acpi_desc->dev, "failed to register spa range %d\n",
+                               nfit_spa->spa->range_index);
+       return rc;
+}
+
+static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
+               u32 max_ars)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nd_cmd_ars_status *ars_status;
+
+       if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
+               memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
+               return 0;
+       }
+
+       if (acpi_desc->ars_status)
+               devm_kfree(dev, acpi_desc->ars_status);
+       acpi_desc->ars_status = NULL;
+       ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
+       if (!ars_status)
+               return -ENOMEM;
+       acpi_desc->ars_status = ars_status;
+       acpi_desc->ars_status_size = max_ars;
+       return 0;
+}
+
+static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_spa *nfit_spa)
+{
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       int rc;
+
+       if (!nfit_spa->max_ars) {
+               struct nd_cmd_ars_cap ars_cap;
+
+               memset(&ars_cap, 0, sizeof(ars_cap));
+               rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
+               if (rc < 0)
+                       return rc;
+               nfit_spa->max_ars = ars_cap.max_ars_out;
+               nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
+               /* check that the supported scrub types match the spa type */
+               if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
+                               ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
+                       return -ENOTTY;
+               else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
+                               ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
+                       return -ENOTTY;
+       }
+
+       if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
+               return -ENOMEM;
+
+       rc = ars_get_status(acpi_desc);
+       if (rc < 0 && rc != -ENOSPC)
+               return rc;
+
+       if (ars_status_process_records(acpi_desc->nvdimm_bus,
+                               acpi_desc->ars_status))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_spa *nfit_spa)
+{
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       unsigned int overflow_retry = scrub_overflow_abort;
+       u64 init_ars_start = 0, init_ars_len = 0;
+       struct device *dev = acpi_desc->dev;
+       unsigned int tmo = scrub_timeout;
+       int rc;
+
+       if (!nfit_spa->ars_required || !nfit_spa->nd_region)
+               return;
+
+       rc = ars_start(acpi_desc, nfit_spa);
+       /*
+        * If we timed out the initial scan we'll still be busy here,
+        * and will wait another timeout before giving up permanently.
+        */
+       if (rc < 0 && rc != -EBUSY)
+               return;
+
+       do {
+               u64 ars_start, ars_len;
+
+               if (acpi_desc->cancel)
+                       break;
+               rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+               if (rc == -ENOTTY)
+                       break;
+               if (rc == -EBUSY && !tmo) {
+                       dev_warn(dev, "range %d ars timeout, aborting\n",
+                                       spa->range_index);
+                       break;
+               }
+
+               if (rc == -EBUSY) {
+                       /*
+                        * Note, entries may be appended to the list
+                        * while the lock is dropped, but the workqueue
+                        * being active prevents entries being deleted /
+                        * freed.
+                        */
+                       mutex_unlock(&acpi_desc->init_mutex);
+                       ssleep(1);
+                       tmo--;
+                       mutex_lock(&acpi_desc->init_mutex);
+                       continue;
+               }
+
+               /* we got some results, but there are more pending... */
+               if (rc == -ENOSPC && overflow_retry--) {
+                       if (!init_ars_len) {
+                               init_ars_len = acpi_desc->ars_status->length;
+                               init_ars_start = acpi_desc->ars_status->address;
+                       }
+                       rc = ars_continue(acpi_desc);
+               }
+
+               if (rc < 0) {
+                       dev_warn(dev, "range %d ars continuation failed\n",
+                                       spa->range_index);
+                       break;
+               }
+
+               if (init_ars_len) {
+                       ars_start = init_ars_start;
+                       ars_len = init_ars_len;
+               } else {
+                       ars_start = acpi_desc->ars_status->address;
+                       ars_len = acpi_desc->ars_status->length;
+               }
+               dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
+                               spa->range_index, ars_start, ars_len);
+               /* notify the region about new poison entries */
+               nvdimm_region_notify(nfit_spa->nd_region,
+                               NVDIMM_REVALIDATE_POISON);
+               break;
+       } while (1);
+}
+
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+       struct device *dev;
+       u64 init_scrub_length = 0;
+       struct nfit_spa *nfit_spa;
+       u64 init_scrub_address = 0;
+       bool init_ars_done = false;
+       struct acpi_nfit_desc *acpi_desc;
+       unsigned int tmo = scrub_timeout;
+       unsigned int overflow_retry = scrub_overflow_abort;
+
+       acpi_desc = container_of(work, typeof(*acpi_desc), work);
+       dev = acpi_desc->dev;
+
+       /*
+        * We scrub in 2 phases.  The first phase waits for any platform
+        * firmware initiated scrubs to complete and then we go search for the
+        * affected spa regions to mark them scanned.  In the second phase we
+        * initiate a directed scrub for every range that was not scrubbed in
+        * phase 1. If we're called for a 'rescan', we harmlessly pass through
+        * the first phase, but really only care about running phase 2, where
+        * regions can be notified of new poison.
+        */
+
+       /* process platform firmware initiated scrubs */
+ retry:
+       mutex_lock(&acpi_desc->init_mutex);
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               struct nd_cmd_ars_status *ars_status;
+               struct acpi_nfit_system_address *spa;
+               u64 ars_start, ars_len;
+               int rc;
+
+               if (acpi_desc->cancel)
+                       break;
+
+               if (nfit_spa->nd_region)
+                       continue;
+
+               if (init_ars_done) {
+                       /*
+                        * No need to re-query, we're now just
+                        * reconciling all the ranges covered by the
+                        * initial scrub
+                        */
+                       rc = 0;
+               } else
+                       rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+
+               if (rc == -ENOTTY) {
+                       /* no ars capability, just register spa and move on */
+                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+                       continue;
+               }
+
+               if (rc == -EBUSY && !tmo) {
+                       /* fallthrough to directed scrub in phase 2 */
+                       dev_warn(dev, "timeout awaiting ars results, continuing...\n");
+                       break;
+               } else if (rc == -EBUSY) {
+                       mutex_unlock(&acpi_desc->init_mutex);
+                       ssleep(1);
+                       tmo--;
+                       goto retry;
+               }
+
+               /* we got some results, but there are more pending... */
+               if (rc == -ENOSPC && overflow_retry--) {
+                       ars_status = acpi_desc->ars_status;
+                       /*
+                        * Record the original scrub range, so that we
+                        * can recall all the ranges impacted by the
+                        * initial scrub.
+                        */
+                       if (!init_scrub_length) {
+                               init_scrub_length = ars_status->length;
+                               init_scrub_address = ars_status->address;
+                       }
+                       rc = ars_continue(acpi_desc);
+                       if (rc == 0) {
+                               mutex_unlock(&acpi_desc->init_mutex);
+                               goto retry;
+                       }
+               }
+
+               if (rc < 0) {
+                       /*
+                        * Initial scrub failed, we'll give it one more
+                        * try below...
+                        */
+                       break;
+               }
+
+               /* We got some final results, record completed ranges */
+               ars_status = acpi_desc->ars_status;
+               if (init_scrub_length) {
+                       ars_start = init_scrub_address;
+                       ars_len = ars_start + init_scrub_length;
+               } else {
+                       ars_start = ars_status->address;
+                       ars_len = ars_status->length;
+               }
+               spa = nfit_spa->spa;
+
+               if (!init_ars_done) {
+                       init_ars_done = true;
+                       dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
+                                       ars_start, ars_len);
+               }
+               if (ars_start <= spa->address && ars_start + ars_len
+                               >= spa->address + spa->length)
+                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+       }
+
+       /*
+        * For all the ranges not covered by an initial scrub we still
+        * want to see if there are errors, but it's ok to discover them
+        * asynchronously.
+        */
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               /*
+                * Flag all the ranges that still need scrubbing, but
+                * register them now to make data available.
+                */
+               if (!nfit_spa->nd_region) {
+                       nfit_spa->ars_required = 1;
+                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+               }
+       }
+
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+               acpi_nfit_async_scrub(acpi_desc, nfit_spa);
+       acpi_desc->scrub_count++;
+       if (acpi_desc->scrub_count_state)
+               sysfs_notify_dirent(acpi_desc->scrub_count_state);
+       mutex_unlock(&acpi_desc->init_mutex);
+}
+
+static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nfit_spa *nfit_spa;
+       int rc;
+
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+               if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
+                       /* BLK regions don't need to wait for ars results */
+                       rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+                       if (rc)
+                               return rc;
+               }
+
+       queue_work(nfit_wq, &acpi_desc->work);
+       return 0;
+}
+
+static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev)
+{
+       struct device *dev = acpi_desc->dev;
+
+       if (!list_empty(&prev->spas) ||
+                       !list_empty(&prev->memdevs) ||
+                       !list_empty(&prev->dcrs) ||
+                       !list_empty(&prev->bdws) ||
+                       !list_empty(&prev->idts) ||
+                       !list_empty(&prev->flushes)) {
+               dev_err(dev, "new nfit deletes entries (unsupported)\n");
+               return -ENXIO;
+       }
+       return 0;
+}
+
+static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
+{
+       struct device *dev = acpi_desc->dev;
+       struct kernfs_node *nfit;
+       struct device *bus_dev;
+
+       if (!ars_supported(acpi_desc->nvdimm_bus))
+               return 0;
+
+       bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
+       nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit");
+       if (!nfit) {
+               dev_err(dev, "sysfs_get_dirent 'nfit' failed\n");
+               return -ENODEV;
+       }
+       acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub");
+       sysfs_put(nfit);
+       if (!acpi_desc->scrub_count_state) {
+               dev_err(dev, "sysfs_get_dirent 'scrub' failed\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static void acpi_nfit_destruct(void *data)
+{
+       struct acpi_nfit_desc *acpi_desc = data;
+       struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
+
+       /*
+        * Destruct under acpi_desc_lock so that nfit_handle_mce does not
+        * race teardown
+        */
+       mutex_lock(&acpi_desc_lock);
+       acpi_desc->cancel = 1;
+       /*
+        * Bounce the nvdimm bus lock to make sure any in-flight
+        * acpi_nfit_ars_rescan() submissions have had a chance to
+        * either submit or see ->cancel set.
+        */
+       device_lock(bus_dev);
+       device_unlock(bus_dev);
+
+       flush_workqueue(nfit_wq);
+       if (acpi_desc->scrub_count_state)
+               sysfs_put(acpi_desc->scrub_count_state);
+       nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+       acpi_desc->nvdimm_bus = NULL;
+       list_del(&acpi_desc->list);
+       mutex_unlock(&acpi_desc_lock);
+}
+
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_table_prev prev;
+       const void *end;
+       int rc;
+
+       if (!acpi_desc->nvdimm_bus) {
+               acpi_nfit_init_dsms(acpi_desc);
+
+               acpi_desc->nvdimm_bus = nvdimm_bus_register(dev,
+                               &acpi_desc->nd_desc);
+               if (!acpi_desc->nvdimm_bus)
+                       return -ENOMEM;
+
+               rc = devm_add_action_or_reset(dev, acpi_nfit_destruct,
+                               acpi_desc);
+               if (rc)
+                       return rc;
+
+               rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
+               if (rc)
+                       return rc;
+
+               /* register this acpi_desc for mce notifications */
+               mutex_lock(&acpi_desc_lock);
+               list_add_tail(&acpi_desc->list, &acpi_descs);
+               mutex_unlock(&acpi_desc_lock);
+       }
+
+       mutex_lock(&acpi_desc->init_mutex);
+
+       INIT_LIST_HEAD(&prev.spas);
+       INIT_LIST_HEAD(&prev.memdevs);
+       INIT_LIST_HEAD(&prev.dcrs);
+       INIT_LIST_HEAD(&prev.bdws);
+       INIT_LIST_HEAD(&prev.idts);
+       INIT_LIST_HEAD(&prev.flushes);
+
+       list_cut_position(&prev.spas, &acpi_desc->spas,
+                               acpi_desc->spas.prev);
+       list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
+                               acpi_desc->memdevs.prev);
+       list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
+                               acpi_desc->dcrs.prev);
+       list_cut_position(&prev.bdws, &acpi_desc->bdws,
+                               acpi_desc->bdws.prev);
+       list_cut_position(&prev.idts, &acpi_desc->idts,
+                               acpi_desc->idts.prev);
+       list_cut_position(&prev.flushes, &acpi_desc->flushes,
+                               acpi_desc->flushes.prev);
+
+       end = data + sz;
+       while (!IS_ERR_OR_NULL(data))
+               data = add_table(acpi_desc, &prev, data, end);
+
+       if (IS_ERR(data)) {
+               dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
+                               PTR_ERR(data));
+               rc = PTR_ERR(data);
+               goto out_unlock;
+       }
+
+       rc = acpi_nfit_check_deletions(acpi_desc, &prev);
+       if (rc)
+               goto out_unlock;
+
+       rc = nfit_mem_init(acpi_desc);
+       if (rc)
+               goto out_unlock;
+
+       rc = acpi_nfit_register_dimms(acpi_desc);
+       if (rc)
+               goto out_unlock;
+
+       rc = acpi_nfit_register_regions(acpi_desc);
+
+ out_unlock:
+       mutex_unlock(&acpi_desc->init_mutex);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(acpi_nfit_init);
+
+struct acpi_nfit_flush_work {
+       struct work_struct work;
+       struct completion cmp;
+};
+
+static void flush_probe(struct work_struct *work)
+{
+       struct acpi_nfit_flush_work *flush;
+
+       flush = container_of(work, typeof(*flush), work);
+       complete(&flush->cmp);
+}
+
+static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
+{
+       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+       struct device *dev = acpi_desc->dev;
+       struct acpi_nfit_flush_work flush;
+
+       /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+       device_lock(dev);
+       device_unlock(dev);
+
+       /*
+        * Scrub work could take 10s of seconds, userspace may give up so we
+        * need to be interruptible while waiting.
+        */
+       INIT_WORK_ONSTACK(&flush.work, flush_probe);
+       COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
+       queue_work(nfit_wq, &flush.work);
+       return wait_for_completion_interruptible(&flush.cmp);
+}
+
+static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
+               struct nvdimm *nvdimm, unsigned int cmd)
+{
+       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+
+       if (nvdimm)
+               return 0;
+       if (cmd != ND_CMD_ARS_START)
+               return 0;
+
+       /*
+        * The kernel and userspace may race to initiate a scrub, but
+        * the scrub thread is prepared to lose that initial race.  It
+        * just needs guarantees that any ars it initiates are not
+        * interrupted by any intervening start reqeusts from userspace.
+        */
+       if (work_busy(&acpi_desc->work))
+               return -EBUSY;
+
+       return 0;
+}
+
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_spa *nfit_spa;
+
+       if (work_busy(&acpi_desc->work))
+               return -EBUSY;
+
+       if (acpi_desc->cancel)
+               return 0;
+
+       mutex_lock(&acpi_desc->init_mutex);
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               struct acpi_nfit_system_address *spa = nfit_spa->spa;
+
+               if (nfit_spa_type(spa) != NFIT_SPA_PM)
+                       continue;
+
+               nfit_spa->ars_required = 1;
+       }
+       queue_work(nfit_wq, &acpi_desc->work);
+       dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
+       mutex_unlock(&acpi_desc->init_mutex);
+
+       return 0;
+}
+
+void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
+{
+       struct nvdimm_bus_descriptor *nd_desc;
+
+       dev_set_drvdata(dev, acpi_desc);
+       acpi_desc->dev = dev;
+       acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
+       nd_desc = &acpi_desc->nd_desc;
+       nd_desc->provider_name = "ACPI.NFIT";
+       nd_desc->module = THIS_MODULE;
+       nd_desc->ndctl = acpi_nfit_ctl;
+       nd_desc->flush_probe = acpi_nfit_flush_probe;
+       nd_desc->clear_to_send = acpi_nfit_clear_to_send;
+       nd_desc->attr_groups = acpi_nfit_attribute_groups;
+
+       INIT_LIST_HEAD(&acpi_desc->spas);
+       INIT_LIST_HEAD(&acpi_desc->dcrs);
+       INIT_LIST_HEAD(&acpi_desc->bdws);
+       INIT_LIST_HEAD(&acpi_desc->idts);
+       INIT_LIST_HEAD(&acpi_desc->flushes);
+       INIT_LIST_HEAD(&acpi_desc->memdevs);
+       INIT_LIST_HEAD(&acpi_desc->dimms);
+       INIT_LIST_HEAD(&acpi_desc->list);
+       mutex_init(&acpi_desc->init_mutex);
+       INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+}
+EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
+
+static int acpi_nfit_add(struct acpi_device *adev)
+{
+       struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+       struct acpi_nfit_desc *acpi_desc;
+       struct device *dev = &adev->dev;
+       struct acpi_table_header *tbl;
+       acpi_status status = AE_OK;
+       acpi_size sz;
+       int rc = 0;
+
+       status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
+       if (ACPI_FAILURE(status)) {
+               /* This is ok, we could have an nvdimm hotplugged later */
+               dev_dbg(dev, "failed to find NFIT at startup\n");
+               return 0;
+       }
+
+       acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+       if (!acpi_desc)
+               return -ENOMEM;
+       acpi_nfit_desc_init(acpi_desc, &adev->dev);
+
+       /* Save the acpi header for exporting the revision via sysfs */
+       acpi_desc->acpi_header = *tbl;
+
+       /* Evaluate _FIT and override with that if present */
+       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
+       if (ACPI_SUCCESS(status) && buf.length > 0) {
+               union acpi_object *obj = buf.pointer;
+
+               if (obj->type == ACPI_TYPE_BUFFER)
+                       rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
+                                       obj->buffer.length);
+               else
+                       dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
+                                __func__, (int) obj->type);
+               kfree(buf.pointer);
+       } else
+               /* skip over the lead-in header table */
+               rc = acpi_nfit_init(acpi_desc, (void *) tbl
+                               + sizeof(struct acpi_table_nfit),
+                               sz - sizeof(struct acpi_table_nfit));
+       return rc;
+}
+
+static int acpi_nfit_remove(struct acpi_device *adev)
+{
+       /* see acpi_nfit_destruct */
+       return 0;
+}
+
+static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
+{
+       struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
+       struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+       struct device *dev = &adev->dev;
+       union acpi_object *obj;
+       acpi_status status;
+       int ret;
+
+       dev_dbg(dev, "%s: event: %d\n", __func__, event);
+
+       device_lock(dev);
+       if (!dev->driver) {
+               /* dev->driver may be null if we're being removed */
+               dev_dbg(dev, "%s: no driver found for dev\n", __func__);
+               goto out_unlock;
+       }
+
+       if (!acpi_desc) {
+               acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+               if (!acpi_desc)
+                       goto out_unlock;
+               acpi_nfit_desc_init(acpi_desc, &adev->dev);
+       } else {
+               /*
+                * Finish previous registration before considering new
+                * regions.
+                */
+               flush_workqueue(nfit_wq);
+       }
+
+       /* Evaluate _FIT */
+       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
+       if (ACPI_FAILURE(status)) {
+               dev_err(dev, "failed to evaluate _FIT\n");
+               goto out_unlock;
+       }
+
+       obj = buf.pointer;
+       if (obj->type == ACPI_TYPE_BUFFER) {
+               ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
+                               obj->buffer.length);
+               if (ret)
+                       dev_err(dev, "failed to merge updated NFIT\n");
+       } else
+               dev_err(dev, "Invalid _FIT\n");
+       kfree(buf.pointer);
+
+ out_unlock:
+       device_unlock(dev);
+}
+
+static const struct acpi_device_id acpi_nfit_ids[] = {
+       { "ACPI0012", 0 },
+       { "", 0 },
+};
+MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
+
+static struct acpi_driver acpi_nfit_driver = {
+       .name = KBUILD_MODNAME,
+       .ids = acpi_nfit_ids,
+       .ops = {
+               .add = acpi_nfit_add,
+               .remove = acpi_nfit_remove,
+               .notify = acpi_nfit_notify,
+       },
+};
+
+static __init int nfit_init(void)
+{
+       BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
+
+       acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
+       acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
+       acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
+       acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
+       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
+       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
+       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
+       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
+       acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
+       acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
+       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
+       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
+       acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+
+       nfit_wq = create_singlethread_workqueue("nfit");
+       if (!nfit_wq)
+               return -ENOMEM;
+
+       nfit_mce_register();
+
+       return acpi_bus_register_driver(&acpi_nfit_driver);
+}
+
+static __exit void nfit_exit(void)
+{
+       nfit_mce_unregister();
+       acpi_bus_unregister_driver(&acpi_nfit_driver);
+       destroy_workqueue(nfit_wq);
+       WARN_ON(!list_empty(&acpi_descs));
+}
+
+module_init(nfit_init);
+module_exit(nfit_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
new file mode 100644 (file)
index 0000000..4c745bf
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * NFIT - Machine Check Handler
+ *
+ * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/notifier.h>
+#include <linux/acpi.h>
+#include <asm/mce.h>
+#include "nfit.h"
+
+static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
+                       void *data)
+{
+       struct mce *mce = (struct mce *)data;
+       struct acpi_nfit_desc *acpi_desc;
+       struct nfit_spa *nfit_spa;
+
+       /* We only care about memory errors */
+       if (!(mce->status & MCACOD))
+               return NOTIFY_DONE;
+
+       /*
+        * mce->addr contains the physical addr accessed that caused the
+        * machine check. We need to walk through the list of NFITs, and see
+        * if any of them matches that address, and only then start a scrub.
+        */
+       mutex_lock(&acpi_desc_lock);
+       list_for_each_entry(acpi_desc, &acpi_descs, list) {
+               struct device *dev = acpi_desc->dev;
+               int found_match = 0;
+
+               mutex_lock(&acpi_desc->init_mutex);
+               list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+                       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+
+                       if (nfit_spa_type(spa) == NFIT_SPA_PM)
+                               continue;
+                       /* find the spa that covers the mce addr */
+                       if (spa->address > mce->addr)
+                               continue;
+                       if ((spa->address + spa->length - 1) < mce->addr)
+                               continue;
+                       found_match = 1;
+                       dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
+                               __func__, spa->range_index, spa->address,
+                               spa->length);
+                       /*
+                        * We can break at the first match because we're going
+                        * to rescan all the SPA ranges. There shouldn't be any
+                        * aliasing anyway.
+                        */
+                       break;
+               }
+               mutex_unlock(&acpi_desc->init_mutex);
+
+               /*
+                * We can ignore an -EBUSY here because if an ARS is already
+                * in progress, just let that be the last authoritative one
+                */
+               if (found_match)
+                       acpi_nfit_ars_rescan(acpi_desc);
+       }
+
+       mutex_unlock(&acpi_desc_lock);
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block nfit_mce_dec = {
+       .notifier_call  = nfit_handle_mce,
+};
+
+void nfit_mce_register(void)
+{
+       mce_register_decode_chain(&nfit_mce_dec);
+}
+
+void nfit_mce_unregister(void)
+{
+       mce_unregister_decode_chain(&nfit_mce_dec);
+}
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
new file mode 100644 (file)
index 0000000..e894ded
--- /dev/null
@@ -0,0 +1,227 @@
+/*
+ * NVDIMM Firmware Interface Table - NFIT
+ *
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_H__
+#define __NFIT_H__
+#include <linux/workqueue.h>
+#include <linux/libnvdimm.h>
+#include <linux/ndctl.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/acpi.h>
+#include <acpi/acuuid.h>
+
+/* ACPI 6.1 */
+#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
+
+/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */
+#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
+
+/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
+#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
+#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
+
+/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
+#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
+
+#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
+               | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
+               | ACPI_NFIT_MEM_NOT_ARMED)
+
+enum nfit_uuids {
+       /* for simplicity alias the uuid index with the family id */
+       NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
+       NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
+       NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
+       NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
+       NFIT_SPA_VOLATILE,
+       NFIT_SPA_PM,
+       NFIT_SPA_DCR,
+       NFIT_SPA_BDW,
+       NFIT_SPA_VDISK,
+       NFIT_SPA_VCD,
+       NFIT_SPA_PDISK,
+       NFIT_SPA_PCD,
+       NFIT_DEV_BUS,
+       NFIT_UUID_MAX,
+};
+
+/*
+ * Region format interface codes are stored with the interface as the
+ * LSB and the function as the MSB.
+ */
+#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
+#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
+#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
+
+enum {
+       NFIT_BLK_READ_FLUSH = 1,
+       NFIT_BLK_DCR_LATCH = 2,
+       NFIT_ARS_STATUS_DONE = 0,
+       NFIT_ARS_STATUS_BUSY = 1 << 16,
+       NFIT_ARS_STATUS_NONE = 2 << 16,
+       NFIT_ARS_STATUS_INTR = 3 << 16,
+       NFIT_ARS_START_BUSY = 6,
+       NFIT_ARS_CAP_NONE = 1,
+       NFIT_ARS_F_OVERFLOW = 1,
+       NFIT_ARS_TIMEOUT = 90,
+};
+
+struct nfit_spa {
+       struct list_head list;
+       struct nd_region *nd_region;
+       unsigned int ars_required:1;
+       u32 clear_err_unit;
+       u32 max_ars;
+       struct acpi_nfit_system_address spa[0];
+};
+
+struct nfit_dcr {
+       struct list_head list;
+       struct acpi_nfit_control_region dcr[0];
+};
+
+struct nfit_bdw {
+       struct list_head list;
+       struct acpi_nfit_data_region bdw[0];
+};
+
+struct nfit_idt {
+       struct list_head list;
+       struct acpi_nfit_interleave idt[0];
+};
+
+struct nfit_flush {
+       struct list_head list;
+       struct acpi_nfit_flush_address flush[0];
+};
+
+struct nfit_memdev {
+       struct list_head list;
+       struct acpi_nfit_memory_map memdev[0];
+};
+
+/* assembled tables for a given dimm/memory-device */
+struct nfit_mem {
+       struct nvdimm *nvdimm;
+       struct acpi_nfit_memory_map *memdev_dcr;
+       struct acpi_nfit_memory_map *memdev_pmem;
+       struct acpi_nfit_memory_map *memdev_bdw;
+       struct acpi_nfit_control_region *dcr;
+       struct acpi_nfit_data_region *bdw;
+       struct acpi_nfit_system_address *spa_dcr;
+       struct acpi_nfit_system_address *spa_bdw;
+       struct acpi_nfit_interleave *idt_dcr;
+       struct acpi_nfit_interleave *idt_bdw;
+       struct nfit_flush *nfit_flush;
+       struct list_head list;
+       struct acpi_device *adev;
+       struct acpi_nfit_desc *acpi_desc;
+       struct resource *flush_wpq;
+       unsigned long dsm_mask;
+       int family;
+};
+
+struct acpi_nfit_desc {
+       struct nvdimm_bus_descriptor nd_desc;
+       struct acpi_table_header acpi_header;
+       struct mutex init_mutex;
+       struct list_head memdevs;
+       struct list_head flushes;
+       struct list_head dimms;
+       struct list_head spas;
+       struct list_head dcrs;
+       struct list_head bdws;
+       struct list_head idts;
+       struct nvdimm_bus *nvdimm_bus;
+       struct device *dev;
+       struct nd_cmd_ars_status *ars_status;
+       size_t ars_status_size;
+       struct work_struct work;
+       struct list_head list;
+       struct kernfs_node *scrub_count_state;
+       unsigned int scrub_count;
+       unsigned int cancel:1;
+       unsigned long dimm_cmd_force_en;
+       unsigned long bus_cmd_force_en;
+       int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+                       void *iobuf, u64 len, int rw);
+};
+
+enum nd_blk_mmio_selector {
+       BDW,
+       DCR,
+};
+
+struct nd_blk_addr {
+       union {
+               void __iomem *base;
+               void *aperture;
+       };
+};
+
+struct nfit_blk {
+       struct nfit_blk_mmio {
+               struct nd_blk_addr addr;
+               u64 size;
+               u64 base_offset;
+               u32 line_size;
+               u32 num_lines;
+               u32 table_size;
+               struct acpi_nfit_interleave *idt;
+               struct acpi_nfit_system_address *spa;
+       } mmio[2];
+       struct nd_region *nd_region;
+       u64 bdw_offset; /* post interleave offset */
+       u64 stat_offset;
+       u64 cmd_offset;
+       u32 dimm_flags;
+};
+
+extern struct list_head acpi_descs;
+extern struct mutex acpi_desc_lock;
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
+
+#ifdef CONFIG_X86_MCE
+void nfit_mce_register(void);
+void nfit_mce_unregister(void);
+#else
+static inline void nfit_mce_register(void)
+{
+}
+static inline void nfit_mce_unregister(void)
+{
+}
+#endif
+
+int nfit_spa_type(struct acpi_nfit_system_address *spa);
+
+static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
+               struct nfit_mem *nfit_mem)
+{
+       if (nfit_mem->memdev_dcr)
+               return nfit_mem->memdev_dcr;
+       return nfit_mem->memdev_pmem;
+}
+
+static inline struct acpi_nfit_desc *to_acpi_desc(
+               struct nvdimm_bus_descriptor *nd_desc)
+{
+       return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+const u8 *to_nfit_uuid(enum nfit_uuids id);
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
+void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
+#endif /* __NFIT_H__ */
index ba5145d384d8013df86b1f36047303df26708518..3022dad240719138d0a0aca0f7b6a7d2486f862a 100644 (file)
@@ -379,7 +379,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
 
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-                       void __pmem **kaddr, pfn_t *pfn, long size)
+                       void **kaddr, pfn_t *pfn, long size)
 {
        struct brd_device *brd = bdev->bd_disk->private_data;
        struct page *page;
@@ -389,7 +389,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
        page = brd_insert_page(brd, sector);
        if (!page)
                return -ENOSPC;
-       *kaddr = (void __pmem *)page_address(page);
+       *kaddr = page_address(page);
        *pfn = page_to_pfn_t(page);
 
        return PAGE_SIZE;
index b891a129b275d56985a436bb30bf5486a5b81ec7..803f3953b341a42aa47adcc4f8405f5dfa501a06 100644 (file)
@@ -211,11 +211,9 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
        }
        dax_dev->dev = dev;
 
-       rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
-       if (rc) {
-               unregister_dax_dev(dev);
+       rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
+       if (rc)
                return rc;
-       }
 
        return 0;
 
index 55d510e36cd1bd2d9236dd6f00d34db640dbb171..dfb168568af1a6d2ba163a7010d611ec0ad3a3f0 100644 (file)
@@ -102,21 +102,19 @@ static int dax_pmem_probe(struct device *dev)
        if (rc)
                return rc;
 
-       rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
-       if (rc) {
-               dax_pmem_percpu_exit(&dax_pmem->ref);
+       rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit,
+                                                       &dax_pmem->ref);
+       if (rc)
                return rc;
-       }
 
        addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
        if (IS_ERR(addr))
                return PTR_ERR(addr);
 
-       rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
-       if (rc) {
-               dax_pmem_percpu_kill(&dax_pmem->ref);
+       rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
+                                                       &dax_pmem->ref);
+       if (rc)
                return rc;
-       }
 
        nd_region = to_nd_region(dev->parent);
        dax_region = alloc_dax_region(dev, nd_region->id, &res,
index 6d35dd4e9efbe45384dca916bf447396a30fe02e..4788b0b989a9bac661f07a8deb2c7a86a96c8677 100644 (file)
@@ -142,7 +142,7 @@ static int linear_iterate_devices(struct dm_target *ti,
 }
 
 static long linear_direct_access(struct dm_target *ti, sector_t sector,
-                                void __pmem **kaddr, pfn_t *pfn, long size)
+                                void **kaddr, pfn_t *pfn, long size)
 {
        struct linear_c *lc = ti->private;
        struct block_device *bdev = lc->dev->bdev;
index 731e1f5bd89574deb711419ebe8eaf7a544e605d..ce2a910709f722ce065e365fd53a9ee326b6477e 100644 (file)
@@ -2303,7 +2303,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
 }
 
 static long origin_direct_access(struct dm_target *ti, sector_t sector,
-               void __pmem **kaddr, pfn_t *pfn, long size)
+               void **kaddr, pfn_t *pfn, long size)
 {
        DMWARN("device does not support dax.");
        return -EIO;
index 01bb9cf2a8c2318e1b5cf704728637acf95665c9..83f1d46671953323bd7390d57a6eef4099b61d5d 100644 (file)
@@ -309,7 +309,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 }
 
 static long stripe_direct_access(struct dm_target *ti, sector_t sector,
-                                void __pmem **kaddr, pfn_t *pfn, long size)
+                                void **kaddr, pfn_t *pfn, long size)
 {
        struct stripe_c *sc = ti->private;
        uint32_t stripe;
index 6eecd6b36f768fb0ea4e056b5742afce02089e28..710ae28fd618256ea0b1da6fc34c40ae6e473066 100644 (file)
@@ -149,7 +149,7 @@ static void io_err_release_clone_rq(struct request *clone)
 }
 
 static long io_err_direct_access(struct dm_target *ti, sector_t sector,
-                                void __pmem **kaddr, pfn_t *pfn, long size)
+                                void **kaddr, pfn_t *pfn, long size)
 {
        return -EIO;
 }
index ceb69fc0b10b32773bd41d3c0b8beadf442571f7..25d1d97154a8b68c0f847acdfdc74cca4aa711f1 100644 (file)
@@ -906,7 +906,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
 
 static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
-                                void __pmem **kaddr, pfn_t *pfn, long size)
+                                void **kaddr, pfn_t *pfn, long size)
 {
        struct mapped_device *md = bdev->bd_disk->private_data;
        struct dm_table *map;
index 7c8a3bf078846ac0bb1410f93e5a91ff3ca4c985..124c2432ac9cb3d6e0a696023507f5131774c282 100644 (file)
@@ -1,6 +1,7 @@
 menuconfig LIBNVDIMM
        tristate "NVDIMM (Non-Volatile Memory Device) Support"
        depends on PHYS_ADDR_T_64BIT
+       depends on HAS_IOMEM
        depends on BLK_DEV
        help
          Generic support for non-volatile memory devices including
@@ -19,7 +20,6 @@ if LIBNVDIMM
 config BLK_DEV_PMEM
        tristate "PMEM: Persistent memory block device support"
        default LIBNVDIMM
-       depends on HAS_IOMEM
        select ND_BTT if BTT
        select ND_PFN if NVDIMM_PFN
        help
index 7e262ef06ede793ebb378770977fd12fefaf79a6..9faaa9694d8741adb64b5aae8754f44d24a01e13 100644 (file)
@@ -267,10 +267,8 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
        q = blk_alloc_queue(GFP_KERNEL);
        if (!q)
                return -ENOMEM;
-       if (devm_add_action(dev, nd_blk_release_queue, q)) {
-               blk_cleanup_queue(q);
+       if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
                return -ENOMEM;
-       }
 
        blk_queue_make_request(q, nd_blk_make_request);
        blk_queue_max_hw_sectors(q, UINT_MAX);
@@ -282,10 +280,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
        disk = alloc_disk(0);
        if (!disk)
                return -ENOMEM;
-       if (devm_add_action(dev, nd_blk_release_disk, disk)) {
-               put_disk(disk);
-               return -ENOMEM;
-       }
 
        disk->first_minor       = 0;
        disk->fops              = &nd_blk_fops;
@@ -295,6 +289,9 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
        set_capacity(disk, 0);
        device_add_disk(dev, disk);
 
+       if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
+               return -ENOMEM;
+
        if (nsblk_meta_size(nsblk)) {
                int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
 
index 816d0dae63983c8ccf2c4f288e2d841dc74d843d..3fa7919f94a8785860afd3487d803f5b3010acd9 100644 (file)
@@ -198,8 +198,7 @@ struct device *nd_btt_create(struct nd_region *nd_region)
 {
        struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
 
-       if (dev)
-               __nd_device_register(dev);
+       __nd_device_register(dev);
        return dev;
 }
 
index 5e4e5c772ea54ff9f5696a1f7d2816d0ebff4d5c..458daf9273362a19cc26d6a4f2c9113764f493b8 100644 (file)
@@ -31,6 +31,7 @@
 int nvdimm_major;
 static int nvdimm_bus_major;
 static struct class *nd_class;
+static DEFINE_IDA(nd_ida);
 
 static int to_nd_device_type(struct device *dev)
 {
@@ -60,20 +61,13 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
                        to_nd_device_type(dev));
 }
 
-static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
-{
-       struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
-
-       return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
-}
-
 static struct module *to_bus_provider(struct device *dev)
 {
        /* pin bus providers while regions are enabled */
        if (is_nd_pmem(dev) || is_nd_blk(dev)) {
                struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 
-               return nvdimm_bus->module;
+               return nvdimm_bus->nd_desc->module;
        }
        return NULL;
 }
@@ -136,6 +130,21 @@ static int nvdimm_bus_remove(struct device *dev)
        return rc;
 }
 
+static void nvdimm_bus_shutdown(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+       struct nd_device_driver *nd_drv = NULL;
+
+       if (dev->driver)
+               nd_drv = to_nd_device_driver(dev->driver);
+
+       if (nd_drv && nd_drv->shutdown) {
+               nd_drv->shutdown(dev);
+               dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n",
+                               dev->driver->name, dev_name(dev));
+       }
+}
+
 void nd_device_notify(struct device *dev, enum nvdimm_event event)
 {
        device_lock(dev);
@@ -208,14 +217,187 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
 }
 EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
 
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);
+
 static struct bus_type nvdimm_bus_type = {
        .name = "nd",
        .uevent = nvdimm_bus_uevent,
        .match = nvdimm_bus_match,
        .probe = nvdimm_bus_probe,
        .remove = nvdimm_bus_remove,
+       .shutdown = nvdimm_bus_shutdown,
+};
+
+static void nvdimm_bus_release(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus;
+
+       nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+       ida_simple_remove(&nd_ida, nvdimm_bus->id);
+       kfree(nvdimm_bus);
+}
+
+static bool is_nvdimm_bus(struct device *dev)
+{
+       return dev->release == nvdimm_bus_release;
+}
+
+struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+{
+       struct device *dev;
+
+       for (dev = nd_dev; dev; dev = dev->parent)
+               if (is_nvdimm_bus(dev))
+                       break;
+       dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
+       if (dev)
+               return to_nvdimm_bus(dev);
+       return NULL;
+}
+
+struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus;
+
+       nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+       WARN_ON(!is_nvdimm_bus(dev));
+       return nvdimm_bus;
+}
+EXPORT_SYMBOL_GPL(to_nvdimm_bus);
+
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+               struct nvdimm_bus_descriptor *nd_desc)
+{
+       struct nvdimm_bus *nvdimm_bus;
+       int rc;
+
+       nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
+       if (!nvdimm_bus)
+               return NULL;
+       INIT_LIST_HEAD(&nvdimm_bus->list);
+       INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
+       INIT_LIST_HEAD(&nvdimm_bus->poison_list);
+       init_waitqueue_head(&nvdimm_bus->probe_wait);
+       nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
+       mutex_init(&nvdimm_bus->reconfig_mutex);
+       if (nvdimm_bus->id < 0) {
+               kfree(nvdimm_bus);
+               return NULL;
+       }
+       nvdimm_bus->nd_desc = nd_desc;
+       nvdimm_bus->dev.parent = parent;
+       nvdimm_bus->dev.release = nvdimm_bus_release;
+       nvdimm_bus->dev.groups = nd_desc->attr_groups;
+       nvdimm_bus->dev.bus = &nvdimm_bus_type;
+       dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
+       rc = device_register(&nvdimm_bus->dev);
+       if (rc) {
+               dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
+               goto err;
+       }
+
+       return nvdimm_bus;
+ err:
+       put_device(&nvdimm_bus->dev);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_register);
+
+void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
+{
+       if (!nvdimm_bus)
+               return;
+       device_unregister(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
+
+static int child_unregister(struct device *dev, void *data)
+{
+       /*
+        * the singular ndctl class device per bus needs to be
+        * "device_destroy"ed, so skip it here
+        *
+        * i.e. remove classless children
+        */
+       if (dev->class)
+               /* pass */;
+       else
+               nd_device_unregister(dev, ND_SYNC);
+       return 0;
+}
+
+static void free_poison_list(struct list_head *poison_list)
+{
+       struct nd_poison *pl, *next;
+
+       list_for_each_entry_safe(pl, next, poison_list, list) {
+               list_del(&pl->list);
+               kfree(pl);
+       }
+       list_del_init(poison_list);
+}
+
+static int nd_bus_remove(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+       mutex_lock(&nvdimm_bus_list_mutex);
+       list_del_init(&nvdimm_bus->list);
+       mutex_unlock(&nvdimm_bus_list_mutex);
+
+       nd_synchronize();
+       device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
+
+       nvdimm_bus_lock(&nvdimm_bus->dev);
+       free_poison_list(&nvdimm_bus->poison_list);
+       nvdimm_bus_unlock(&nvdimm_bus->dev);
+
+       nvdimm_bus_destroy_ndctl(nvdimm_bus);
+
+       return 0;
+}
+
+static int nd_bus_probe(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+       int rc;
+
+       rc = nvdimm_bus_create_ndctl(nvdimm_bus);
+       if (rc)
+               return rc;
+
+       mutex_lock(&nvdimm_bus_list_mutex);
+       list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
+       mutex_unlock(&nvdimm_bus_list_mutex);
+
+       /* enable bus provider attributes to look up their local context */
+       dev_set_drvdata(dev, nvdimm_bus->nd_desc);
+
+       return 0;
+}
+
+static struct nd_device_driver nd_bus_driver = {
+       .probe = nd_bus_probe,
+       .remove = nd_bus_remove,
+       .drv = {
+               .name = "nd_bus",
+               .suppress_bind_attrs = true,
+               .bus = &nvdimm_bus_type,
+               .owner = THIS_MODULE,
+               .mod_name = KBUILD_MODNAME,
+       },
 };
 
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
+{
+       struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
+
+       if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver)
+               return true;
+
+       return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
+}
+
 static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
 
 void nd_synchronize(void)
@@ -395,12 +577,10 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
        dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
                        "ndctl%d", nvdimm_bus->id);
 
-       if (IS_ERR(dev)) {
+       if (IS_ERR(dev))
                dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
                                nvdimm_bus->id, PTR_ERR(dev));
-               return PTR_ERR(dev);
-       }
-       return 0;
+       return PTR_ERR_OR_ZERO(dev);
 }
 
 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
@@ -850,8 +1030,14 @@ int __init nvdimm_bus_init(void)
                goto err_class;
        }
 
+       rc = driver_register(&nd_bus_driver.drv);
+       if (rc)
+               goto err_nd_bus;
+
        return 0;
 
+ err_nd_bus:
+       class_destroy(nd_class);
  err_class:
        unregister_chrdev(nvdimm_major, "dimmctl");
  err_dimm_chrdev:
@@ -864,8 +1050,10 @@ int __init nvdimm_bus_init(void)
 
 void nvdimm_bus_exit(void)
 {
+       driver_unregister(&nd_bus_driver.drv);
        class_destroy(nd_class);
        unregister_chrdev(nvdimm_bus_major, "ndctl");
        unregister_chrdev(nvdimm_major, "dimmctl");
        bus_unregister(&nvdimm_bus_type);
+       ida_destroy(&nd_ida);
 }
index 8b2e3c4fb0add718d2dbf167b499516a94612c80..d5dc80c48b4cb36a55c54a2383ce9812ae068260 100644 (file)
@@ -240,7 +240,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
                return memcpy_from_pmem(buf, nsio->addr + offset, size);
        } else {
                memcpy_to_pmem(nsio->addr + offset, buf, size);
-               wmb_pmem();
+               nvdimm_flush(to_nd_region(ndns->dev.parent));
        }
 
        return 0;
@@ -266,9 +266,8 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
 
        nsio->addr = devm_memremap(dev, res->start, resource_size(res),
                        ARCH_MEMREMAP_PMEM);
-       if (IS_ERR(nsio->addr))
-               return PTR_ERR(nsio->addr);
-       return 0;
+
+       return PTR_ERR_OR_ZERO(nsio->addr);
 }
 EXPORT_SYMBOL_GPL(devm_nsio_enable);
 
index be89764315c2b54d9bbe9548fc3f6183d6032ea4..715583f69d28ae2f413768c81c141e1dd3019d75 100644 (file)
 #include <linux/ndctl.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/io.h>
 #include "nd-core.h"
 #include "nd.h"
 
 LIST_HEAD(nvdimm_bus_list);
 DEFINE_MUTEX(nvdimm_bus_list_mutex);
-static DEFINE_IDA(nd_ida);
 
 void nvdimm_bus_lock(struct device *dev)
 {
@@ -57,6 +57,127 @@ bool is_nvdimm_bus_locked(struct device *dev)
 }
 EXPORT_SYMBOL(is_nvdimm_bus_locked);
 
+struct nvdimm_map {
+       struct nvdimm_bus *nvdimm_bus;
+       struct list_head list;
+       resource_size_t offset;
+       unsigned long flags;
+       size_t size;
+       union {
+               void *mem;
+               void __iomem *iomem;
+       };
+       struct kref kref;
+};
+
+static struct nvdimm_map *find_nvdimm_map(struct device *dev,
+               resource_size_t offset)
+{
+       struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+       struct nvdimm_map *nvdimm_map;
+
+       list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list)
+               if (nvdimm_map->offset == offset)
+                       return nvdimm_map;
+       return NULL;
+}
+
+static struct nvdimm_map *alloc_nvdimm_map(struct device *dev,
+               resource_size_t offset, size_t size, unsigned long flags)
+{
+       struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+       struct nvdimm_map *nvdimm_map;
+
+       nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL);
+       if (!nvdimm_map)
+               return NULL;
+
+       INIT_LIST_HEAD(&nvdimm_map->list);
+       nvdimm_map->nvdimm_bus = nvdimm_bus;
+       nvdimm_map->offset = offset;
+       nvdimm_map->flags = flags;
+       nvdimm_map->size = size;
+       kref_init(&nvdimm_map->kref);
+
+       if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev)))
+               goto err_request_region;
+
+       if (flags)
+               nvdimm_map->mem = memremap(offset, size, flags);
+       else
+               nvdimm_map->iomem = ioremap(offset, size);
+
+       if (!nvdimm_map->mem)
+               goto err_map;
+
+       dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!",
+                       __func__);
+       list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list);
+
+       return nvdimm_map;
+
+ err_map:
+       release_mem_region(offset, size);
+ err_request_region:
+       kfree(nvdimm_map);
+       return NULL;
+}
+
+static void nvdimm_map_release(struct kref *kref)
+{
+       struct nvdimm_bus *nvdimm_bus;
+       struct nvdimm_map *nvdimm_map;
+
+       nvdimm_map = container_of(kref, struct nvdimm_map, kref);
+       nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+       dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
+       list_del(&nvdimm_map->list);
+       if (nvdimm_map->flags)
+               memunmap(nvdimm_map->mem);
+       else
+               iounmap(nvdimm_map->iomem);
+       release_mem_region(nvdimm_map->offset, nvdimm_map->size);
+       kfree(nvdimm_map);
+}
+
+static void nvdimm_map_put(void *data)
+{
+       struct nvdimm_map *nvdimm_map = data;
+       struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+       nvdimm_bus_lock(&nvdimm_bus->dev);
+       kref_put(&nvdimm_map->kref, nvdimm_map_release);
+       nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+
+/**
+ * devm_nvdimm_memremap - map a resource that is shared across regions
+ * @dev: device that will own a reference to the shared mapping
+ * @offset: physical base address of the mapping
+ * @size: mapping size
+ * @flags: memremap flags, or, if zero, perform an ioremap instead
+ */
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+               size_t size, unsigned long flags)
+{
+       struct nvdimm_map *nvdimm_map;
+
+       nvdimm_bus_lock(dev);
+       nvdimm_map = find_nvdimm_map(dev, offset);
+       if (!nvdimm_map)
+               nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags);
+       else
+               kref_get(&nvdimm_map->kref);
+       nvdimm_bus_unlock(dev);
+
+       if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map))
+               return NULL;
+
+       return nvdimm_map->mem;
+}
+EXPORT_SYMBOL_GPL(devm_nvdimm_memremap);
+
 u64 nd_fletcher64(void *addr, size_t len, bool le)
 {
        u32 *buf = addr;
@@ -73,25 +194,6 @@ u64 nd_fletcher64(void *addr, size_t len, bool le)
 }
 EXPORT_SYMBOL_GPL(nd_fletcher64);
 
-static void nvdimm_bus_release(struct device *dev)
-{
-       struct nvdimm_bus *nvdimm_bus;
-
-       nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
-       ida_simple_remove(&nd_ida, nvdimm_bus->id);
-       kfree(nvdimm_bus);
-}
-
-struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
-{
-       struct nvdimm_bus *nvdimm_bus;
-
-       nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
-       WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
-       return nvdimm_bus;
-}
-EXPORT_SYMBOL_GPL(to_nvdimm_bus);
-
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
 {
        /* struct nvdimm_bus definition is private to libnvdimm */
@@ -99,18 +201,12 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
 }
 EXPORT_SYMBOL_GPL(to_nd_desc);
 
-struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
 {
-       struct device *dev;
-
-       for (dev = nd_dev; dev; dev = dev->parent)
-               if (dev->release == nvdimm_bus_release)
-                       break;
-       dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
-       if (dev)
-               return to_nvdimm_bus(dev);
-       return NULL;
+       /* struct nvdimm_bus definition is private to libnvdimm */
+       return &nvdimm_bus->dev;
 }
+EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);
 
 static bool is_uuid_sep(char sep)
 {
@@ -325,51 +421,6 @@ struct attribute_group nvdimm_bus_attribute_group = {
 };
 EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
 
-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
-               struct nvdimm_bus_descriptor *nd_desc, struct module *module)
-{
-       struct nvdimm_bus *nvdimm_bus;
-       int rc;
-
-       nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
-       if (!nvdimm_bus)
-               return NULL;
-       INIT_LIST_HEAD(&nvdimm_bus->list);
-       INIT_LIST_HEAD(&nvdimm_bus->poison_list);
-       init_waitqueue_head(&nvdimm_bus->probe_wait);
-       nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
-       mutex_init(&nvdimm_bus->reconfig_mutex);
-       if (nvdimm_bus->id < 0) {
-               kfree(nvdimm_bus);
-               return NULL;
-       }
-       nvdimm_bus->nd_desc = nd_desc;
-       nvdimm_bus->module = module;
-       nvdimm_bus->dev.parent = parent;
-       nvdimm_bus->dev.release = nvdimm_bus_release;
-       nvdimm_bus->dev.groups = nd_desc->attr_groups;
-       dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
-       rc = device_register(&nvdimm_bus->dev);
-       if (rc) {
-               dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
-               goto err;
-       }
-
-       rc = nvdimm_bus_create_ndctl(nvdimm_bus);
-       if (rc)
-               goto err;
-
-       mutex_lock(&nvdimm_bus_list_mutex);
-       list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
-       mutex_unlock(&nvdimm_bus_list_mutex);
-
-       return nvdimm_bus;
- err:
-       put_device(&nvdimm_bus->dev);
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
-
 static void set_badblock(struct badblocks *bb, sector_t s, int num)
 {
        dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
@@ -545,54 +596,6 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 }
 EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
 
-static void free_poison_list(struct list_head *poison_list)
-{
-       struct nd_poison *pl, *next;
-
-       list_for_each_entry_safe(pl, next, poison_list, list) {
-               list_del(&pl->list);
-               kfree(pl);
-       }
-       list_del_init(poison_list);
-}
-
-static int child_unregister(struct device *dev, void *data)
-{
-       /*
-        * the singular ndctl class device per bus needs to be
-        * "device_destroy"ed, so skip it here
-        *
-        * i.e. remove classless children
-        */
-       if (dev->class)
-               /* pass */;
-       else
-               nd_device_unregister(dev, ND_SYNC);
-       return 0;
-}
-
-void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
-{
-       if (!nvdimm_bus)
-               return;
-
-       mutex_lock(&nvdimm_bus_list_mutex);
-       list_del_init(&nvdimm_bus->list);
-       mutex_unlock(&nvdimm_bus_list_mutex);
-
-       nd_synchronize();
-       device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
-
-       nvdimm_bus_lock(&nvdimm_bus->dev);
-       free_poison_list(&nvdimm_bus->poison_list);
-       nvdimm_bus_unlock(&nvdimm_bus->dev);
-
-       nvdimm_bus_destroy_ndctl(nvdimm_bus);
-
-       device_unregister(&nvdimm_bus->dev);
-}
-EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
-
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 {
@@ -601,7 +604,8 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
        if (meta_size == 0)
                return 0;
 
-       bi.profile = NULL;
+       memset(&bi, 0, sizeof(bi));
+
        bi.tuple_size = meta_size;
        bi.tag_size = meta_size;
 
@@ -650,7 +654,6 @@ static __exit void libnvdimm_exit(void)
        nvdimm_bus_exit();
        nd_region_devs_exit();
        nvdimm_devs_exit();
-       ida_destroy(&nd_ida);
 }
 
 MODULE_LICENSE("GPL v2");
index bbde28d3dec5cd4bbbb283847d2f736bc81148c9..d9bba5edd8dcf0646cad13a0160534648c52ce8d 100644 (file)
@@ -346,7 +346,8 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
 
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
                const struct attribute_group **groups, unsigned long flags,
-               unsigned long cmd_mask)
+               unsigned long cmd_mask, int num_flush,
+               struct resource *flush_wpq)
 {
        struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
        struct device *dev;
@@ -362,6 +363,8 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
        nvdimm->provider_data = provider_data;
        nvdimm->flags = flags;
        nvdimm->cmd_mask = cmd_mask;
+       nvdimm->num_flush = num_flush;
+       nvdimm->flush_wpq = flush_wpq;
        atomic_set(&nvdimm->busy, 0);
        dev = &nvdimm->dev;
        dev_set_name(dev, "nmem%d", nvdimm->id);
index 95825b38559addb8a0dc5cca22bc305e228e65f6..11ea90120542dcbec8410147efca7a502eea9a77 100644 (file)
@@ -47,6 +47,7 @@ static int e820_pmem_probe(struct platform_device *pdev)
 
        nd_desc.attr_groups = e820_pmem_attribute_groups;
        nd_desc.provider_name = "e820";
+       nd_desc.module = THIS_MODULE;
        nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
        if (!nvdimm_bus)
                goto err;
index 284cdaa268cfd8132bc7fcd7299d66eaa4256217..38ce6bbbc170bcdc40982d5f49473bab181e2071 100644 (file)
@@ -26,11 +26,11 @@ extern int nvdimm_major;
 struct nvdimm_bus {
        struct nvdimm_bus_descriptor *nd_desc;
        wait_queue_head_t probe_wait;
-       struct module *module;
        struct list_head list;
        struct device dev;
        int id, probe_active;
        struct list_head poison_list;
+       struct list_head mapping_list;
        struct mutex reconfig_mutex;
 };
 
@@ -40,7 +40,8 @@ struct nvdimm {
        unsigned long cmd_mask;
        struct device dev;
        atomic_t busy;
-       int id;
+       int id, num_flush;
+       struct resource *flush_wpq;
 };
 
 bool is_nvdimm(struct device *dev);
index d0ac93c31dda6adcd01a4be288b3963d5f09f084..40476399d22793aece0438da0f5a0976cef063ab 100644 (file)
@@ -49,9 +49,11 @@ struct nvdimm_drvdata {
        struct kref kref;
 };
 
-struct nd_region_namespaces {
-       int count;
-       int active;
+struct nd_region_data {
+       int ns_count;
+       int ns_active;
+       unsigned int flush_mask;
+       void __iomem *flush_wpq[0][0];
 };
 
 static inline struct nd_namespace_index *to_namespace_index(
@@ -119,7 +121,6 @@ struct nd_region {
 
 struct nd_blk_region {
        int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-       void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
        int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
                        void *iobuf, u64 len, int rw);
        void *blk_provider_data;
@@ -325,6 +326,7 @@ static inline void devm_nsio_disable(struct device *dev,
 }
 #endif
 int nd_blk_region_init(struct nd_region *nd_region);
+int nd_region_activate(struct nd_region *nd_region);
 void __nd_iostat_start(struct bio *bio, unsigned long *start);
 static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
 {
index 36cb39047d5b77b17eb3d04980b82b36980e7ce4..b511099457db5cbb155e5b6270bc9bb8a3677cc6 100644 (file)
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/nd.h>
+#include "pmem.h"
 #include "pfn.h"
 #include "nd.h"
 
-struct pmem_device {
-       /* One contiguous memory region per device */
-       phys_addr_t             phys_addr;
-       /* when non-zero this device is hosting a 'pfn' instance */
-       phys_addr_t             data_offset;
-       u64                     pfn_flags;
-       void __pmem             *virt_addr;
-       /* immutable base size of the namespace */
-       size_t                  size;
-       /* trim size when namespace capacity has been section aligned */
-       u32                     pfn_pad;
-       struct badblocks        bb;
-};
+static struct device *to_dev(struct pmem_device *pmem)
+{
+       /*
+        * nvdimm bus services need a 'dev' parameter, and we record the device
+        * at init in bb.dev.
+        */
+       return pmem->bb.dev;
+}
+
+static struct nd_region *to_region(struct pmem_device *pmem)
+{
+       return to_nd_region(to_dev(pmem)->parent);
+}
 
 static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
                unsigned int len)
 {
-       struct device *dev = pmem->bb.dev;
+       struct device *dev = to_dev(pmem);
        sector_t sector;
        long cleared;
 
@@ -57,7 +58,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
        cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
 
        if (cleared > 0 && cleared / 512) {
-               dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
+               dev_dbg(dev, "%s: %#llx clear %ld sector%s\n",
                                __func__, (unsigned long long) sector,
                                cleared / 512, cleared / 512 > 1 ? "s" : "");
                badblocks_clear(&pmem->bb, sector, cleared / 512);
@@ -73,7 +74,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
        bool bad_pmem = false;
        void *mem = kmap_atomic(page);
        phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
-       void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
+       void *pmem_addr = pmem->virt_addr + pmem_off;
 
        if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
                bad_pmem = true;
@@ -112,6 +113,11 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
        return rc;
 }
 
+/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
+#ifndef REQ_FLUSH
+#define REQ_FLUSH REQ_PREFLUSH
+#endif
+
 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
        int rc = 0;
@@ -120,6 +126,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
        struct bio_vec bvec;
        struct bvec_iter iter;
        struct pmem_device *pmem = q->queuedata;
+       struct nd_region *nd_region = to_region(pmem);
+
+       if (bio->bi_rw & REQ_FLUSH)
+               nvdimm_flush(nd_region);
 
        do_acct = nd_iostat_start(bio, &start);
        bio_for_each_segment(bvec, bio, iter) {
@@ -134,8 +144,8 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
        if (do_acct)
                nd_iostat_end(bio, start);
 
-       if (bio_data_dir(bio))
-               wmb_pmem();
+       if (bio->bi_rw & REQ_FUA)
+               nvdimm_flush(nd_region);
 
        bio_endio(bio);
        return BLK_QC_T_NONE;
@@ -148,8 +158,6 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
        int rc;
 
        rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
-       if (rw & WRITE)
-               wmb_pmem();
 
        /*
         * The ->rw_page interface is subtle and tricky.  The core
@@ -163,8 +171,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
        return rc;
 }
 
-static long pmem_direct_access(struct block_device *bdev, sector_t sector,
-                     void __pmem **kaddr, pfn_t *pfn, long size)
+/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
+__weak long pmem_direct_access(struct block_device *bdev, sector_t sector,
+                     void **kaddr, pfn_t *pfn, long size)
 {
        struct pmem_device *pmem = bdev->bd_queue->queuedata;
        resource_size_t offset = sector * 512 + pmem->data_offset;
@@ -195,7 +204,7 @@ static void pmem_release_queue(void *q)
        blk_cleanup_queue(q);
 }
 
-void pmem_release_disk(void *disk)
+static void pmem_release_disk(void *disk)
 {
        del_gendisk(disk);
        put_disk(disk);
@@ -205,6 +214,7 @@ static int pmem_attach_disk(struct device *dev,
                struct nd_namespace_common *ndns)
 {
        struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+       struct nd_region *nd_region = to_nd_region(dev->parent);
        struct vmem_altmap __altmap, *altmap = NULL;
        struct resource *res = &nsio->res;
        struct nd_pfn *nd_pfn = NULL;
@@ -234,7 +244,7 @@ static int pmem_attach_disk(struct device *dev,
        dev_set_drvdata(dev, pmem);
        pmem->phys_addr = res->start;
        pmem->size = resource_size(res);
-       if (!arch_has_wmb_pmem())
+       if (nvdimm_has_flush(nd_region) < 0)
                dev_warn(dev, "unable to guarantee persistence of writes\n");
 
        if (!devm_request_mem_region(dev, res->start, resource_size(res),
@@ -269,15 +279,14 @@ static int pmem_attach_disk(struct device *dev,
         * At release time the queue must be dead before
         * devm_memremap_pages is unwound
         */
-       if (devm_add_action(dev, pmem_release_queue, q)) {
-               blk_cleanup_queue(q);
+       if (devm_add_action_or_reset(dev, pmem_release_queue, q))
                return -ENOMEM;
-       }
 
        if (IS_ERR(addr))
                return PTR_ERR(addr);
-       pmem->virt_addr = (void __pmem *) addr;
+       pmem->virt_addr = addr;
 
+       blk_queue_write_cache(q, true, true);
        blk_queue_make_request(q, pmem_make_request);
        blk_queue_physical_block_size(q, PAGE_SIZE);
        blk_queue_max_hw_sectors(q, UINT_MAX);
@@ -289,10 +298,6 @@ static int pmem_attach_disk(struct device *dev,
        disk = alloc_disk_node(0, nid);
        if (!disk)
                return -ENOMEM;
-       if (devm_add_action(dev, pmem_release_disk, disk)) {
-               put_disk(disk);
-               return -ENOMEM;
-       }
 
        disk->fops              = &pmem_fops;
        disk->queue             = q;
@@ -302,9 +307,13 @@ static int pmem_attach_disk(struct device *dev,
                        / 512);
        if (devm_init_badblocks(dev, &pmem->bb))
                return -ENOMEM;
-       nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
+       nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
        disk->bb = &pmem->bb;
        device_add_disk(dev, disk);
+
+       if (devm_add_action_or_reset(dev, pmem_release_disk, disk))
+               return -ENOMEM;
+
        revalidate_disk(disk);
 
        return 0;
@@ -340,13 +349,20 @@ static int nd_pmem_remove(struct device *dev)
 {
        if (is_nd_btt(dev))
                nvdimm_namespace_detach_btt(to_nd_btt(dev));
+       nvdimm_flush(to_nd_region(dev->parent));
+
        return 0;
 }
 
+static void nd_pmem_shutdown(struct device *dev)
+{
+       nvdimm_flush(to_nd_region(dev->parent));
+}
+
 static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 {
-       struct nd_region *nd_region = to_nd_region(dev->parent);
        struct pmem_device *pmem = dev_get_drvdata(dev);
+       struct nd_region *nd_region = to_region(pmem);
        resource_size_t offset = 0, end_trunc = 0;
        struct nd_namespace_common *ndns;
        struct nd_namespace_io *nsio;
@@ -382,6 +398,7 @@ static struct nd_device_driver nd_pmem_driver = {
        .probe = nd_pmem_probe,
        .remove = nd_pmem_remove,
        .notify = nd_pmem_notify,
+       .shutdown = nd_pmem_shutdown,
        .drv = {
                .name = "nd_pmem",
        },
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
new file mode 100644 (file)
index 0000000..b4ee4f7
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef __NVDIMM_PMEM_H__
+#define __NVDIMM_PMEM_H__
+#include <linux/badblocks.h>
+#include <linux/types.h>
+#include <linux/pfn_t.h>
+#include <linux/fs.h>
+
+long pmem_direct_access(struct block_device *bdev, sector_t sector,
+                     void **kaddr, pfn_t *pfn, long size);
+/* this definition is in it's own header for tools/testing/nvdimm to consume */
+struct pmem_device {
+       /* One contiguous memory region per device */
+       phys_addr_t             phys_addr;
+       /* when non-zero this device is hosting a 'pfn' instance */
+       phys_addr_t             data_offset;
+       u64                     pfn_flags;
+       void                    *virt_addr;
+       /* immutable base size of the namespace */
+       size_t                  size;
+       /* trim size when namespace capacity has been section aligned */
+       u32                     pfn_pad;
+       struct badblocks        bb;
+};
+#endif /* __NVDIMM_PMEM_H__ */
index 05a91235993969752a8e1edad056a036847923e5..8f241772ec0b24d0a2d297a7549ceac9e072872a 100644 (file)
@@ -20,7 +20,7 @@ static int nd_region_probe(struct device *dev)
 {
        int err, rc;
        static unsigned long once;
-       struct nd_region_namespaces *num_ns;
+       struct nd_region_data *ndrd;
        struct nd_region *nd_region = to_nd_region(dev);
 
        if (nd_region->num_lanes > num_online_cpus()
@@ -33,21 +33,21 @@ static int nd_region_probe(struct device *dev)
                                nd_region->num_lanes);
        }
 
+       rc = nd_region_activate(nd_region);
+       if (rc)
+               return rc;
+
        rc = nd_blk_region_init(nd_region);
        if (rc)
                return rc;
 
        rc = nd_region_register_namespaces(nd_region, &err);
-       num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
-       if (!num_ns)
-               return -ENOMEM;
-
        if (rc < 0)
                return rc;
 
-       num_ns->active = rc;
-       num_ns->count = rc + err;
-       dev_set_drvdata(dev, num_ns);
+       ndrd = dev_get_drvdata(dev);
+       ndrd->ns_active = rc;
+       ndrd->ns_count = rc + err;
 
        if (rc && err && rc == err)
                return -ENODEV;
@@ -82,6 +82,8 @@ static int nd_region_remove(struct device *dev)
 {
        struct nd_region *nd_region = to_nd_region(dev);
 
+       device_for_each_child(dev, NULL, child_unregister);
+
        /* flush attribute readers and disable */
        nvdimm_bus_lock(dev);
        nd_region->ns_seed = NULL;
@@ -91,7 +93,6 @@ static int nd_region_remove(struct device *dev)
        dev_set_drvdata(dev, NULL);
        nvdimm_bus_unlock(dev);
 
-       device_for_each_child(dev, NULL, child_unregister);
        return 0;
 }
 
index 40fcfea26fbbc387a3ecbf235b1d4b011022f1aa..e8d5ba7b29af98f647b119640e79e581996cfdc0 100644 (file)
 #include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/pmem.h>
 #include <linux/sort.h>
 #include <linux/io.h>
 #include <linux/nd.h>
 #include "nd-core.h"
 #include "nd.h"
 
+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <linux/io-64-nonatomic-hi-lo.h>
+
 static DEFINE_IDA(region_ida);
+static DEFINE_PER_CPU(int, flush_idx);
+
+static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
+               struct nd_region_data *ndrd)
+{
+       int i, j;
+
+       dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
+                       nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
+       for (i = 0; i < nvdimm->num_flush; i++) {
+               struct resource *res = &nvdimm->flush_wpq[i];
+               unsigned long pfn = PHYS_PFN(res->start);
+               void __iomem *flush_page;
+
+               /* check if flush hints share a page */
+               for (j = 0; j < i; j++) {
+                       struct resource *res_j = &nvdimm->flush_wpq[j];
+                       unsigned long pfn_j = PHYS_PFN(res_j->start);
+
+                       if (pfn == pfn_j)
+                               break;
+               }
+
+               if (j < i)
+                       flush_page = (void __iomem *) ((unsigned long)
+                                       ndrd->flush_wpq[dimm][j] & PAGE_MASK);
+               else
+                       flush_page = devm_nvdimm_ioremap(dev,
+                                       PHYS_PFN(pfn), PAGE_SIZE);
+               if (!flush_page)
+                       return -ENXIO;
+               ndrd->flush_wpq[dimm][i] = flush_page
+                       + (res->start & ~PAGE_MASK);
+       }
+
+       return 0;
+}
+
+int nd_region_activate(struct nd_region *nd_region)
+{
+       int i, num_flush = 0;
+       struct nd_region_data *ndrd;
+       struct device *dev = &nd_region->dev;
+       size_t flush_data_size = sizeof(void *);
+
+       nvdimm_bus_lock(&nd_region->dev);
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+               /* at least one null hint slot per-dimm for the "no-hint" case */
+               flush_data_size += sizeof(void *);
+               num_flush = min_not_zero(num_flush, nvdimm->num_flush);
+               if (!nvdimm->num_flush)
+                       continue;
+               flush_data_size += nvdimm->num_flush * sizeof(void *);
+       }
+       nvdimm_bus_unlock(&nd_region->dev);
+
+       ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
+       if (!ndrd)
+               return -ENOMEM;
+       dev_set_drvdata(dev, ndrd);
+
+       ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+               int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
+
+               if (rc)
+                       return rc;
+       }
+
+       return 0;
+}
 
 static void nd_region_release(struct device *dev)
 {
@@ -242,12 +326,12 @@ static DEVICE_ATTR_RO(available_size);
 static ssize_t init_namespaces_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
-       struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
+       struct nd_region_data *ndrd = dev_get_drvdata(dev);
        ssize_t rc;
 
        nvdimm_bus_lock(dev);
-       if (num_ns)
-               rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
+       if (ndrd)
+               rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
        else
                rc = -ENXIO;
        nvdimm_bus_unlock(dev);
@@ -433,8 +517,6 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
 
                if (is_nd_pmem(dev))
                        return;
-
-               to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
        }
        if (dev->parent && is_nd_blk(dev->parent) && probe) {
                nd_region = to_nd_region(dev->parent);
@@ -698,7 +780,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
                if (ndbr) {
                        nd_region = &ndbr->nd_region;
                        ndbr->enable = ndbr_desc->enable;
-                       ndbr->disable = ndbr_desc->disable;
                        ndbr->do_io = ndbr_desc->do_io;
                }
                region_buf = ndbr;
@@ -794,6 +875,67 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
 }
 EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
 
+/**
+ * nvdimm_flush - flush any posted write queues between the cpu and pmem media
+ * @nd_region: blk or interleaved pmem region
+ */
+void nvdimm_flush(struct nd_region *nd_region)
+{
+       struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+       int i, idx;
+
+       /*
+        * Try to encourage some diversity in flush hint addresses
+        * across cpus assuming a limited number of flush hints.
+        */
+       idx = this_cpu_read(flush_idx);
+       idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
+
+       /*
+        * The first wmb() is needed to 'sfence' all previous writes
+        * such that they are architecturally visible for the platform
+        * buffer flush.  Note that we've already arranged for pmem
+        * writes to avoid the cache via arch_memcpy_to_pmem().  The
+        * final wmb() ensures ordering for the NVDIMM flush write.
+        */
+       wmb();
+       for (i = 0; i < nd_region->ndr_mappings; i++)
+               if (ndrd->flush_wpq[i][0])
+                       writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
+       wmb();
+}
+EXPORT_SYMBOL_GPL(nvdimm_flush);
+
+/**
+ * nvdimm_has_flush - determine write flushing requirements
+ * @nd_region: blk or interleaved pmem region
+ *
+ * Returns 1 if writes require flushing
+ * Returns 0 if writes do not require flushing
+ * Returns -ENXIO if flushing capability can not be determined
+ */
+int nvdimm_has_flush(struct nd_region *nd_region)
+{
+       struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+       int i;
+
+       /* no nvdimm == flushing capability unknown */
+       if (nd_region->ndr_mappings == 0)
+               return -ENXIO;
+
+       for (i = 0; i < nd_region->ndr_mappings; i++)
+               /* flush hints present, flushing required */
+               if (ndrd->flush_wpq[i][0])
+                       return 1;
+
+       /*
+        * The platform defines dimm devices without hints, assume
+        * platform persistence mechanism like ADR
+        */
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_has_flush);
+
 void __exit nd_region_devs_exit(void)
 {
        ida_destroy(&region_ida);
index fac1b51ea0dee9a55738d030efed1aaf1a43e5bc..9d66b4fb174b80231a98737bf3f9fafeb4849c66 100644 (file)
@@ -31,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static blk_qc_t dcssblk_make_request(struct request_queue *q,
                                                struct bio *bio);
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
-                        void __pmem **kaddr, pfn_t *pfn, long size);
+                        void **kaddr, pfn_t *pfn, long size);
 
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 
@@ -884,7 +884,7 @@ fail:
 
 static long
 dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
-                       void __pmem **kaddr, pfn_t *pfn, long size)
+                       void **kaddr, pfn_t *pfn, long size)
 {
        struct dcssblk_dev_info *dev_info;
        unsigned long offset, dev_sz;
@@ -894,7 +894,7 @@ dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
                return -ENODEV;
        dev_sz = dev_info->end - dev_info->start;
        offset = secnum * 512;
-       *kaddr = (void __pmem *) (dev_info->start + offset);
+       *kaddr = (void *) dev_info->start + offset;
        *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
 
        return dev_sz - offset;
index 432b9e6dd63b90b7a98a4820f1c45e257d007517..993dc6fe0416e17e8a0ca5c8a432b8daf574df86 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -75,13 +75,13 @@ static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
        struct request_queue *q = bdev->bd_queue;
        long rc = -EIO;
 
-       dax->addr = (void __pmem *) ERR_PTR(-EIO);
+       dax->addr = ERR_PTR(-EIO);
        if (blk_queue_enter(q, true) != 0)
                return rc;
 
        rc = bdev_direct_access(bdev, dax);
        if (rc < 0) {
-               dax->addr = (void __pmem *) ERR_PTR(rc);
+               dax->addr = ERR_PTR(rc);
                blk_queue_exit(q);
                return rc;
        }
@@ -147,12 +147,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
                      struct buffer_head *bh)
 {
        loff_t pos = start, max = start, bh_max = start;
-       bool hole = false, need_wmb = false;
+       bool hole = false;
        struct block_device *bdev = NULL;
        int rw = iov_iter_rw(iter), rc;
        long map_len = 0;
        struct blk_dax_ctl dax = {
-               .addr = (void __pmem *) ERR_PTR(-EIO),
+               .addr = ERR_PTR(-EIO),
        };
        unsigned blkbits = inode->i_blkbits;
        sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
@@ -218,7 +218,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 
                if (iov_iter_rw(iter) == WRITE) {
                        len = copy_from_iter_pmem(dax.addr, max - pos, iter);
-                       need_wmb = true;
                } else if (!hole)
                        len = copy_to_iter((void __force *) dax.addr, max - pos,
                                        iter);
@@ -235,8 +234,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
                        dax.addr += len;
        }
 
-       if (need_wmb)
-               wmb_pmem();
        dax_unmap_atomic(bdev, &dax);
 
        return (pos == start) ? rc : pos - start;
@@ -788,7 +785,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
                                return ret;
                }
        }
-       wmb_pmem();
        return 0;
 }
 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
@@ -1187,7 +1183,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
                if (dax_map_atomic(bdev, &dax) < 0)
                        return PTR_ERR(dax.addr);
                clear_pmem(dax.addr + offset, length);
-               wmb_pmem();
                dax_unmap_atomic(bdev, &dax);
        }
        return 0;
index c96db9c22d1031496d01c0622a65ea0df8e2965a..adf33079771e740a93630305d229ef7d5578b814 100644 (file)
@@ -1665,7 +1665,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
  */
 struct blk_dax_ctl {
        sector_t sector;
-       void __pmem *addr;
+       void *addr;
        long size;
        pfn_t pfn;
 };
@@ -1676,8 +1676,8 @@ struct block_device_operations {
        int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
        int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-       long (*direct_access)(struct block_device *, sector_t, void __pmem **,
-                       pfn_t *, long);
+       long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
+                       long);
        unsigned int (*check_events) (struct gendisk *disk,
                                      unsigned int clearing);
        /* ->media_changed() is DEPRECATED, use ->check_events() instead */
index 2e853b679a5da9c4061053b714801d7434728154..1bb95484272501bbc8d0603da489f56b4f87714a 100644 (file)
@@ -17,7 +17,6 @@
 # define __release(x)  __context__(x,-1)
 # define __cond_lock(x,c)      ((c) ? ({ __acquire(x); 1; }) : 0)
 # define __percpu      __attribute__((noderef, address_space(3)))
-# define __pmem                __attribute__((noderef, address_space(5)))
 #ifdef CONFIG_SPARSE_RCU_POINTER
 # define __rcu         __attribute__((noderef, address_space(4)))
 #else /* CONFIG_SPARSE_RCU_POINTER */
@@ -45,7 +44,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __cond_lock(x,c) (c)
 # define __percpu
 # define __rcu
-# define __pmem
 # define __private
 # define ACCESS_PRIVATE(p, member) ((p)->member)
 #endif /* __CHECKER__ */
index b0db857f334b95630a4a05ff5b10a6c8383f0c6c..91acfce74a220010549536edde925ef9e3ec81c0 100644 (file)
@@ -131,7 +131,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
  * >= 0 : the number of bytes accessible at the address
  */
 typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
-                                    void __pmem **kaddr, pfn_t *pfn, long size);
+                                    void **kaddr, pfn_t *pfn, long size);
 
 void dm_error(const char *message);
 
index 0c3c30cbbea54431cff38dc01f21ba3d831674d4..b519e137b9b7d98ab44aa1409510ad35e841c10f 100644 (file)
@@ -52,6 +52,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 
 struct nd_namespace_label;
 struct nvdimm_drvdata;
+
 struct nd_mapping {
        struct nvdimm *nvdimm;
        struct nd_namespace_label **labels;
@@ -69,6 +70,7 @@ struct nd_mapping {
 struct nvdimm_bus_descriptor {
        const struct attribute_group **attr_groups;
        unsigned long cmd_mask;
+       struct module *module;
        char *provider_name;
        ndctl_fn ndctl;
        int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
@@ -99,13 +101,21 @@ struct nd_region_desc {
        unsigned long flags;
 };
 
+struct device;
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+               size_t size, unsigned long flags);
+static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
+               resource_size_t offset, size_t size)
+{
+       return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0);
+}
+
 struct nvdimm_bus;
 struct module;
 struct device;
 struct nd_blk_region;
 struct nd_blk_region_desc {
        int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-       void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
        int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
                        void *iobuf, u64 len, int rw);
        struct nd_region_desc ndr_desc;
@@ -119,22 +129,22 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
 }
 
 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
-               struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
-#define nvdimm_bus_register(parent, desc) \
-       __nvdimm_bus_register(parent, desc, THIS_MODULE)
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+               struct nvdimm_bus_descriptor *nfit_desc);
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
 struct nd_region *to_nd_region(struct device *dev);
 struct nd_blk_region *to_nd_blk_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
 const char *nvdimm_name(struct nvdimm *nvdimm);
 unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
 void *nvdimm_provider_data(struct nvdimm *nvdimm);
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
                const struct attribute_group **groups, unsigned long flags,
-               unsigned long cmd_mask);
+               unsigned long cmd_mask, int num_flush,
+               struct resource *flush_wpq);
 const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
 const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
 u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
@@ -156,4 +166,6 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
 unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
 void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
 u64 nd_fletcher64(void *addr, size_t len, bool le);
+void nvdimm_flush(struct nd_region *nd_region);
+int nvdimm_has_flush(struct nd_region *nd_region);
 #endif /* __LIBNVDIMM_H__ */
index aee2761d294cbc6a06ae32de2c66dd4533cade3d..f1ea426d6a5e9ca742b7ae02ec03548ccaccd34e 100644 (file)
@@ -26,6 +26,7 @@ struct nd_device_driver {
        unsigned long type;
        int (*probe)(struct device *dev);
        int (*remove)(struct device *dev);
+       void (*shutdown)(struct device *dev);
        void (*notify)(struct device *dev, enum nvdimm_event event);
 };
 
@@ -67,7 +68,7 @@ struct nd_namespace_io {
        struct nd_namespace_common common;
        struct resource res;
        resource_size_t size;
-       void __pmem *addr;
+       void *addr;
        struct badblocks bb;
 };
 
index 94994810c7c086e8410f3333d919ef5256595ad5..a3d90b9da18d444f0d53437d1dd5c4bd01c9e845 100644 (file)
@@ -28,7 +28,10 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
        return __pfn_to_pfn_t(pfn, 0);
 }
 
-extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags);
+static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
+{
+       return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
+}
 
 static inline bool pfn_t_has_page(pfn_t pfn)
 {
index 57d146fe44dd84e926199d81a8ca0a74a2d012fc..e856c2cb0fe86da91d55e2766b687b66ad02cc1e 100644 (file)
  * calling these symbols with arch_has_pmem_api() and redirect to the
  * implementation in asm/pmem.h.
  */
-static inline bool __arch_has_wmb_pmem(void)
-{
-       return false;
-}
-
-static inline void arch_wmb_pmem(void)
-{
-       BUG();
-}
-
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-               size_t n)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 {
        BUG();
 }
 
-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
-               size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
 {
        BUG();
        return -EFAULT;
 }
 
-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
                struct iov_iter *i)
 {
        BUG();
        return 0;
 }
 
-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
 {
        BUG();
 }
 
-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
        BUG();
 }
 
-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
 {
        BUG();
 }
@@ -77,13 +65,6 @@ static inline bool arch_has_pmem_api(void)
        return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
 }
 
-static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
-               size_t size)
-{
-       memcpy(dst, (void __force *) src, size);
-       return 0;
-}
-
 /*
  * memcpy_from_pmem - read from persistent memory with error handling
  * @dst: destination buffer
@@ -92,54 +73,13 @@ static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
  *
  * Returns 0 on success negative error code on failure.
  */
-static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
-               size_t size)
+static inline int memcpy_from_pmem(void *dst, void const *src, size_t size)
 {
        if (arch_has_pmem_api())
                return arch_memcpy_from_pmem(dst, src, size);
        else
-               return default_memcpy_from_pmem(dst, src, size);
-}
-
-/**
- * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
- *
- * For a given cpu implementation within an architecture it is possible
- * that wmb_pmem() resolves to a nop.  In the case this returns
- * false, pmem api users are unable to ensure durability and may want to
- * fall back to a different data consistency model, or otherwise notify
- * the user.
- */
-static inline bool arch_has_wmb_pmem(void)
-{
-       return arch_has_pmem_api() && __arch_has_wmb_pmem();
-}
-
-/*
- * These defaults seek to offer decent performance and minimize the
- * window between i/o completion and writes being durable on media.
- * However, it is undefined / architecture specific whether
- * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
- * making data durable relative to i/o completion.
- */
-static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
-               size_t size)
-{
-       memcpy((void __force *) dst, src, size);
-}
-
-static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
-               size_t bytes, struct iov_iter *i)
-{
-       return copy_from_iter_nocache((void __force *)addr, bytes, i);
-}
-
-static inline void default_clear_pmem(void __pmem *addr, size_t size)
-{
-       if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
-               clear_page((void __force *)addr);
-       else
-               memset((void __force *)addr, 0, size);
+               memcpy(dst, src, size);
+       return 0;
 }
 
 /**
@@ -152,29 +92,14 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
  * being effectively evicted from, or never written to, the processor
  * cache hierarchy after the copy completes.  After memcpy_to_pmem()
  * data may still reside in cpu or platform buffers, so this operation
- * must be followed by a wmb_pmem().
+ * must be followed by a blkdev_issue_flush() on the pmem block device.
  */
-static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
+static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
 {
        if (arch_has_pmem_api())
                arch_memcpy_to_pmem(dst, src, n);
        else
-               default_memcpy_to_pmem(dst, src, n);
-}
-
-/**
- * wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of memcpy_to_pmem() operations this drains data from
- * cpu write buffers and any platform (memory controller) buffers to
- * ensure that written data is durable on persistent memory media.
- */
-static inline void wmb_pmem(void)
-{
-       if (arch_has_wmb_pmem())
-               arch_wmb_pmem();
-       else
-               wmb();
+               memcpy(dst, src, n);
 }
 
 /**
@@ -184,14 +109,14 @@ static inline void wmb_pmem(void)
  * @i:         iterator with source data
  *
  * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
  */
-static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t copy_from_iter_pmem(void *addr, size_t bytes,
                struct iov_iter *i)
 {
        if (arch_has_pmem_api())
                return arch_copy_from_iter_pmem(addr, bytes, i);
-       return default_copy_from_iter_pmem(addr, bytes, i);
+       return copy_from_iter_nocache(addr, bytes, i);
 }
 
 /**
@@ -200,14 +125,14 @@ static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
  * @size:      number of bytes to zero
  *
  * Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
  */
-static inline void clear_pmem(void __pmem *addr, size_t size)
+static inline void clear_pmem(void *addr, size_t size)
 {
        if (arch_has_pmem_api())
                arch_clear_pmem(addr, size);
        else
-               default_clear_pmem(addr, size);
+               memset(addr, 0, size);
 }
 
 /**
@@ -218,7 +143,7 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
  * For platforms that support clearing poison this flushes any poisoned
  * ranges out of the cache
  */
-static inline void invalidate_pmem(void __pmem *addr, size_t size)
+static inline void invalidate_pmem(void *addr, size_t size)
 {
        if (arch_has_pmem_api())
                arch_invalidate_pmem(addr, size);
@@ -230,9 +155,9 @@ static inline void invalidate_pmem(void __pmem *addr, size_t size)
  * @size:      number of bytes to write back
  *
  * Write back the processor cache range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
  */
-static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void wb_cache_pmem(void *addr, size_t size)
 {
        if (arch_has_pmem_api())
                arch_wb_cache_pmem(addr, size);
index 309915f74492406595782af8e8c4e20a5a7ef934..ba5a8c79652a469f048d1507bea9e489ed698bef 100644 (file)
@@ -298,6 +298,7 @@ struct nd_cmd_pkg {
 #define NVDIMM_FAMILY_INTEL 0
 #define NVDIMM_FAMILY_HPE1 1
 #define NVDIMM_FAMILY_HPE2 2
+#define NVDIMM_FAMILY_MSFT 3
 
 #define ND_IOCTL_CALL                  _IOWR(ND_IOCTL, ND_CMD_CALL,\
                                        struct nd_cmd_pkg)
index ddb3247a872a60b3ec5e716ac503f8bd13254d97..251d16b4cb41e67111ff2f1f783bf39cdea13822 100644 (file)
@@ -169,12 +169,6 @@ void devm_memunmap(struct device *dev, void *addr)
 }
 EXPORT_SYMBOL(devm_memunmap);
 
-pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
-{
-       return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
-}
-EXPORT_SYMBOL(phys_to_pfn_t);
-
 #ifdef CONFIG_ZONE_DEVICE
 static DEFINE_MUTEX(pgmap_lock);
 static RADIX_TREE(pgmap_radix, GFP_KERNEL);
index 4904ced676d40289356aa3358f894fa7efa4b5c0..24a08363995adeeda264cacd801bebf9cc48b5c6 100755 (executable)
@@ -313,7 +313,6 @@ our $Sparse = qr{
                        __kernel|
                        __force|
                        __iomem|
-                       __pmem|
                        __must_check|
                        __init_refok|
                        __kprobes|
index d388de72eacaa353ba14e3340a30ef64b63bcd97..28632ee683772bf26e9c7a5a740340b507a9e5d9 100644 (file)
@@ -947,7 +947,7 @@ GrpTable: Grp15
 4: XSAVE
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
 EndTable
 
 GrpTable: Grp16
index 3918dd52e903c9c07db1c027d2680d743f145977..0f196eec9f48ab7b126ace056b8f28bc8b367f71 100644 (file)
 "0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
 {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
-{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
-"66 0f ae f8          \tpcommit ",},
index 9c8c61e06d5a49b8e075e02971395579544f0919..af25bc8240d0de5dab4fc34fb86f8339cf7aa9e4 100644 (file)
 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
 {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
 "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
-{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
-"66 0f ae f8          \tpcommit ",},
index 76e0ec379c8bb357ea886242ae33e244e8bed0bb..979487dae8d4e0a28120561820c567735133035d 100644 (file)
@@ -2655,10 +2655,6 @@ int main(void)
 
 #endif /* #ifndef __x86_64__ */
 
-       /* pcommit */
-
-       asm volatile("pcommit");
-
        /* Following line is a marker for the awk script - do not change */
        asm volatile("rdtsc"); /* Stop here */
 
index ec378cd7b71ee4e067d0a4a9beb59413def3296c..767be7c760340bd33b7e4a18b9a8f3a71d9db33e 100644 (file)
@@ -1012,7 +1012,7 @@ GrpTable: Grp15
 4: XSAVE
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
 EndTable
 
 GrpTable: Grp16
index 7859856771599531d156c322ac4ceb94596af27a..ad6dd05430192ffdba8d163f9855942fa75b4099 100644 (file)
@@ -11,12 +11,14 @@ ldflags-y += --wrap=__devm_release_region
 ldflags-y += --wrap=__request_region
 ldflags-y += --wrap=__release_region
 ldflags-y += --wrap=devm_memremap_pages
-ldflags-y += --wrap=phys_to_pfn_t
+ldflags-y += --wrap=insert_resource
+ldflags-y += --wrap=remove_resource
 
 DRIVERS := ../../../drivers
 NVDIMM_SRC := $(DRIVERS)/nvdimm
-ACPI_SRC := $(DRIVERS)/acpi
+ACPI_SRC := $(DRIVERS)/acpi/nfit
 DAX_SRC := $(DRIVERS)/dax
+ccflags-y := -I$(src)/$(NVDIMM_SRC)/
 
 obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
@@ -27,10 +29,12 @@ obj-$(CONFIG_ACPI_NFIT) += nfit.o
 obj-$(CONFIG_DEV_DAX) += dax.o
 obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
 
-nfit-y := $(ACPI_SRC)/nfit.o
+nfit-y := $(ACPI_SRC)/core.o
+nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
 nfit-y += config_check.o
 
 nd_pmem-y := $(NVDIMM_SRC)/pmem.o
+nd_pmem-y += pmem-dax.o
 nd_pmem-y += config_check.o
 
 nd_btt-y := $(NVDIMM_SRC)/btt.o
index adf18bfeca0068cedf19f9dd8b823822bbffac6c..878daf3429e8f822593aff99c490550e5d76ef93 100644 (file)
@@ -10,6 +10,7 @@ void check(void)
        BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
        BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
        BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
+       BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
        BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
        BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
        BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
new file mode 100644 (file)
index 0000000..c9b8c48
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/blkdev.h>
+#include <pmem.h>
+#include <nd.h>
+
+long pmem_direct_access(struct block_device *bdev, sector_t sector,
+               void **kaddr, pfn_t *pfn, long size)
+{
+       struct pmem_device *pmem = bdev->bd_queue->queuedata;
+       resource_size_t offset = sector * 512 + pmem->data_offset;
+
+       if (unlikely(is_bad_pmem(&pmem->bb, sector, size)))
+               return -EIO;
+
+       /*
+        * Limit dax to a single page at a time given vmalloc()-backed
+        * in the nfit_test case.
+        */
+       if (get_nfit_res(pmem->phys_addr + offset)) {
+               struct page *page;
+
+               *kaddr = pmem->virt_addr + offset;
+               page = vmalloc_to_page(pmem->virt_addr + offset);
+               *pfn = page_to_pfn_t(page);
+               dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent,
+                               "%s: sector: %#llx pfn: %#lx\n", __func__,
+                               (unsigned long long) sector, page_to_pfn(page));
+
+               return PAGE_SIZE;
+       }
+
+       *kaddr = pmem->virt_addr + offset;
+       *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+       /*
+        * If badblocks are present, limit known good range to the
+        * requested range.
+        */
+       if (unlikely(pmem->bb.count))
+               return size;
+       return pmem->size - pmem->pfn_pad - offset;
+}
index 9241064970fe72be96c300f544c03345df42ac81..d32f25bba42a35854364fa7eca200971bf4b02a1 100644 (file)
@@ -1,5 +1,5 @@
 ccflags-y := -I$(src)/../../../../drivers/nvdimm/
-ccflags-y += -I$(src)/../../../../drivers/acpi/
+ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
 
 obj-m += nfit_test.o
 obj-m += nfit_test_iomap.o
index c842095f2801b2183b734c66ae99fa45a6048945..c29f8dca9e67c1f95da2861078ffee61b258ed69 100644 (file)
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  */
+#include <linux/memremap.h>
 #include <linux/rculist.h>
 #include <linux/export.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/pfn_t.h>
 #include <linux/io.h>
 #include <linux/mm.h>
 #include "nfit_test.h"
@@ -52,7 +54,7 @@ static struct nfit_test_resource *__get_nfit_res(resource_size_t resource)
        return NULL;
 }
 
-static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
+struct nfit_test_resource *get_nfit_res(resource_size_t resource)
 {
        struct nfit_test_resource *res;
 
@@ -62,6 +64,7 @@ static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
 
        return res;
 }
+EXPORT_SYMBOL(get_nfit_res);
 
 void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
                void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
@@ -97,10 +100,6 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
 }
 EXPORT_SYMBOL(__wrap_devm_memremap);
 
-#ifdef __HAVE_ARCH_PTE_DEVMAP
-#include <linux/memremap.h>
-#include <linux/pfn_t.h>
-
 void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
                struct percpu_ref *ref, struct vmem_altmap *altmap)
 {
@@ -122,19 +121,6 @@ pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
         return phys_to_pfn_t(addr, flags);
 }
 EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
-#else
-/* to be removed post 4.5-rc1 */
-void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res)
-{
-       resource_size_t offset = res->start;
-       struct nfit_test_resource *nfit_res = get_nfit_res(offset);
-
-       if (nfit_res)
-               return nfit_res->buf + offset - nfit_res->res->start;
-       return devm_memremap_pages(dev, res);
-}
-EXPORT_SYMBOL(__wrap_devm_memremap_pages);
-#endif
 
 void *__wrap_memremap(resource_size_t offset, size_t size,
                unsigned long flags)
@@ -229,6 +215,22 @@ struct resource *__wrap___request_region(struct resource *parent,
 }
 EXPORT_SYMBOL(__wrap___request_region);
 
+int __wrap_insert_resource(struct resource *parent, struct resource *res)
+{
+       if (get_nfit_res(res->start))
+               return 0;
+       return insert_resource(parent, res);
+}
+EXPORT_SYMBOL(__wrap_insert_resource);
+
+int __wrap_remove_resource(struct resource *res)
+{
+       if (get_nfit_res(res->start))
+               return 0;
+       return remove_resource(res);
+}
+EXPORT_SYMBOL(__wrap_remove_resource);
+
 struct resource *__wrap___devm_request_region(struct device *dev,
                struct resource *parent, resource_size_t start,
                resource_size_t n, const char *name)
index c919866853a045fdeb1d7b2c0aae699cddd68200..5404efa578a3fcea18ce5bbab2a991e0c3d98b73 100644 (file)
 enum {
        NUM_PM  = 3,
        NUM_DCR = 5,
+       NUM_HINTS = 8,
        NUM_BDW = NUM_DCR,
        NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
        NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
        DIMM_SIZE = SZ_32M,
        LABEL_SIZE = SZ_128K,
+       SPA_VCD_SIZE = SZ_4M,
        SPA0_SIZE = DIMM_SIZE,
        SPA1_SIZE = DIMM_SIZE*2,
        SPA2_SIZE = DIMM_SIZE,
@@ -470,11 +472,7 @@ static void release_nfit_res(void *data)
        list_del(&nfit_res->list);
        spin_unlock(&nfit_test_lock);
 
-       if (is_vmalloc_addr(nfit_res->buf))
-               vfree(nfit_res->buf);
-       else
-               dma_free_coherent(nfit_res->dev, resource_size(res),
-                               nfit_res->buf, res->start);
+       vfree(nfit_res->buf);
        kfree(res);
        kfree(nfit_res);
 }
@@ -507,9 +505,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
 
        return nfit_res->buf;
  err:
-       if (buf && !is_vmalloc_addr(buf))
-               dma_free_coherent(dev, size, buf, *dma);
-       else if (buf)
+       if (buf)
                vfree(buf);
        kfree(res);
        kfree(nfit_res);
@@ -524,15 +520,6 @@ static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
        return __test_alloc(t, size, dma, buf);
 }
 
-static void *test_alloc_coherent(struct nfit_test *t, size_t size,
-               dma_addr_t *dma)
-{
-       struct device *dev = &t->pdev.dev;
-       void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
-
-       return __test_alloc(t, size, dma, buf);
-}
-
 static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
 {
        int i;
@@ -584,7 +571,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
                        + offsetof(struct acpi_nfit_control_region,
                                        window_size) * NUM_DCR
                        + sizeof(struct acpi_nfit_data_region) * NUM_BDW
-                       + sizeof(struct acpi_nfit_flush_address) * NUM_DCR;
+                       + (sizeof(struct acpi_nfit_flush_address)
+                                       + sizeof(u64) * NUM_HINTS) * NUM_DCR;
        int i;
 
        t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
@@ -592,15 +580,15 @@ static int nfit_test0_alloc(struct nfit_test *t)
                return -ENOMEM;
        t->nfit_size = nfit_size;
 
-       t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]);
+       t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]);
        if (!t->spa_set[0])
                return -ENOMEM;
 
-       t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]);
+       t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]);
        if (!t->spa_set[1])
                return -ENOMEM;
 
-       t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]);
+       t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]);
        if (!t->spa_set[2])
                return -ENOMEM;
 
@@ -614,7 +602,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
                        return -ENOMEM;
                sprintf(t->label[i], "label%d", i);
 
-               t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]);
+               t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS,
+                               &t->flush_dma[i]);
                if (!t->flush[i])
                        return -ENOMEM;
        }
@@ -630,7 +619,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
 
 static int nfit_test1_alloc(struct nfit_test *t)
 {
-       size_t nfit_size = sizeof(struct acpi_nfit_system_address)
+       size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
                + sizeof(struct acpi_nfit_memory_map)
                + offsetof(struct acpi_nfit_control_region, window_size);
 
@@ -639,15 +628,31 @@ static int nfit_test1_alloc(struct nfit_test *t)
                return -ENOMEM;
        t->nfit_size = nfit_size;
 
-       t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]);
+       t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]);
        if (!t->spa_set[0])
                return -ENOMEM;
 
+       t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
+       if (!t->spa_set[1])
+               return -ENOMEM;
+
        return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
+static void dcr_common_init(struct acpi_nfit_control_region *dcr)
+{
+       dcr->vendor_id = 0xabcd;
+       dcr->device_id = 0;
+       dcr->revision_id = 1;
+       dcr->valid_fields = 1;
+       dcr->manufacturing_location = 0xa;
+       dcr->manufacturing_date = cpu_to_be16(2016);
+}
+
 static void nfit_test0_setup(struct nfit_test *t)
 {
+       const int flush_hint_size = sizeof(struct acpi_nfit_flush_address)
+               + (sizeof(u64) * NUM_HINTS);
        struct acpi_nfit_desc *acpi_desc;
        struct acpi_nfit_memory_map *memdev;
        void *nfit_buf = t->nfit_buf;
@@ -655,7 +660,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        struct acpi_nfit_control_region *dcr;
        struct acpi_nfit_data_region *bdw;
        struct acpi_nfit_flush_address *flush;
-       unsigned int offset;
+       unsigned int offset, i;
 
        /*
         * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -972,9 +977,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
        dcr->header.length = sizeof(struct acpi_nfit_control_region);
        dcr->region_index = 0+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
        dcr->serial_number = ~handle[0];
        dcr->code = NFIT_FIC_BLK;
        dcr->windows = 1;
@@ -989,9 +992,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
        dcr->header.length = sizeof(struct acpi_nfit_control_region);
        dcr->region_index = 1+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
        dcr->serial_number = ~handle[1];
        dcr->code = NFIT_FIC_BLK;
        dcr->windows = 1;
@@ -1006,9 +1007,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
        dcr->header.length = sizeof(struct acpi_nfit_control_region);
        dcr->region_index = 2+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
        dcr->serial_number = ~handle[2];
        dcr->code = NFIT_FIC_BLK;
        dcr->windows = 1;
@@ -1023,9 +1022,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
        dcr->header.length = sizeof(struct acpi_nfit_control_region);
        dcr->region_index = 3+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
        dcr->serial_number = ~handle[3];
        dcr->code = NFIT_FIC_BLK;
        dcr->windows = 1;
@@ -1042,9 +1039,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->header.length = offsetof(struct acpi_nfit_control_region,
                        window_size);
        dcr->region_index = 4+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
        dcr->serial_number = ~handle[0];
        dcr->code = NFIT_FIC_BYTEN;
        dcr->windows = 0;
@@ -1056,9 +1051,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->header.length = offsetof(struct acpi_nfit_control_region,
                        window_size);
        dcr->region_index = 5+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
        dcr->serial_number = ~handle[1];
        dcr->code = NFIT_FIC_BYTEN;
        dcr->windows = 0;
@@ -1070,9 +1063,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->header.length = offsetof(struct acpi_nfit_control_region,
                        window_size);
        dcr->region_index = 6+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
        dcr->serial_number = ~handle[2];
        dcr->code = NFIT_FIC_BYTEN;
        dcr->windows = 0;
@@ -1084,9 +1075,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->header.length = offsetof(struct acpi_nfit_control_region,
                        window_size);
        dcr->region_index = 7+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
        dcr->serial_number = ~handle[3];
        dcr->code = NFIT_FIC_BYTEN;
        dcr->windows = 0;
@@ -1141,45 +1130,47 @@ static void nfit_test0_setup(struct nfit_test *t)
        /* flush0 (dimm0) */
        flush = nfit_buf + offset;
        flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-       flush->header.length = sizeof(struct acpi_nfit_flush_address);
+       flush->header.length = flush_hint_size;
        flush->device_handle = handle[0];
-       flush->hint_count = 1;
-       flush->hint_address[0] = t->flush_dma[0];
+       flush->hint_count = NUM_HINTS;
+       for (i = 0; i < NUM_HINTS; i++)
+               flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
 
        /* flush1 (dimm1) */
-       flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1;
+       flush = nfit_buf + offset + flush_hint_size * 1;
        flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-       flush->header.length = sizeof(struct acpi_nfit_flush_address);
+       flush->header.length = flush_hint_size;
        flush->device_handle = handle[1];
-       flush->hint_count = 1;
-       flush->hint_address[0] = t->flush_dma[1];
+       flush->hint_count = NUM_HINTS;
+       for (i = 0; i < NUM_HINTS; i++)
+               flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
 
        /* flush2 (dimm2) */
-       flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2;
+       flush = nfit_buf + offset + flush_hint_size  * 2;
        flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-       flush->header.length = sizeof(struct acpi_nfit_flush_address);
+       flush->header.length = flush_hint_size;
        flush->device_handle = handle[2];
-       flush->hint_count = 1;
-       flush->hint_address[0] = t->flush_dma[2];
+       flush->hint_count = NUM_HINTS;
+       for (i = 0; i < NUM_HINTS; i++)
+               flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
 
        /* flush3 (dimm3) */
-       flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3;
+       flush = nfit_buf + offset + flush_hint_size * 3;
        flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-       flush->header.length = sizeof(struct acpi_nfit_flush_address);
+       flush->header.length = flush_hint_size;
        flush->device_handle = handle[3];
-       flush->hint_count = 1;
-       flush->hint_address[0] = t->flush_dma[3];
+       flush->hint_count = NUM_HINTS;
+       for (i = 0; i < NUM_HINTS; i++)
+               flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
 
        if (t->setup_hotplug) {
-               offset = offset + sizeof(struct acpi_nfit_flush_address) * 4;
+               offset = offset + flush_hint_size * 4;
                /* dcr-descriptor4: blk */
                dcr = nfit_buf + offset;
                dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
                dcr->header.length = sizeof(struct acpi_nfit_control_region);
                dcr->region_index = 8+1;
-               dcr->vendor_id = 0xabcd;
-               dcr->device_id = 0;
-               dcr->revision_id = 1;
+               dcr_common_init(dcr);
                dcr->serial_number = ~handle[4];
                dcr->code = NFIT_FIC_BLK;
                dcr->windows = 1;
@@ -1196,9 +1187,7 @@ static void nfit_test0_setup(struct nfit_test *t)
                dcr->header.length = offsetof(struct acpi_nfit_control_region,
                                window_size);
                dcr->region_index = 9+1;
-               dcr->vendor_id = 0xabcd;
-               dcr->device_id = 0;
-               dcr->revision_id = 1;
+               dcr_common_init(dcr);
                dcr->serial_number = ~handle[4];
                dcr->code = NFIT_FIC_BYTEN;
                dcr->windows = 0;
@@ -1300,10 +1289,12 @@ static void nfit_test0_setup(struct nfit_test *t)
                /* flush3 (dimm4) */
                flush = nfit_buf + offset;
                flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-               flush->header.length = sizeof(struct acpi_nfit_flush_address);
+               flush->header.length = flush_hint_size;
                flush->device_handle = handle[4];
-               flush->hint_count = 1;
-               flush->hint_address[0] = t->flush_dma[4];
+               flush->hint_count = NUM_HINTS;
+               for (i = 0; i < NUM_HINTS; i++)
+                       flush->hint_address[i] = t->flush_dma[4]
+                               + i * sizeof(u64);
        }
 
        post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
@@ -1339,7 +1330,16 @@ static void nfit_test1_setup(struct nfit_test *t)
        spa->address = t->spa_set_dma[0];
        spa->length = SPA2_SIZE;
 
-       offset += sizeof(*spa);
+       /* virtual cd region */
+       spa = nfit_buf + sizeof(*spa);
+       spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+       spa->header.length = sizeof(*spa);
+       memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
+       spa->range_index = 0;
+       spa->address = t->spa_set_dma[1];
+       spa->length = SPA_VCD_SIZE;
+
+       offset += sizeof(*spa) * 2;
        /* mem-region0 (spa0, dimm0) */
        memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1365,9 +1365,7 @@ static void nfit_test1_setup(struct nfit_test *t)
        dcr->header.length = offsetof(struct acpi_nfit_control_region,
                        window_size);
        dcr->region_index = 0+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
        dcr->serial_number = ~0;
        dcr->code = NFIT_FIC_BYTE;
        dcr->windows = 0;
@@ -1462,20 +1460,16 @@ static int nfit_test_probe(struct platform_device *pdev)
        nfit_test->setup(nfit_test);
        acpi_desc = &nfit_test->acpi_desc;
        acpi_nfit_desc_init(acpi_desc, &pdev->dev);
-       acpi_desc->nfit = nfit_test->nfit_buf;
        acpi_desc->blk_do_io = nfit_test_blk_do_io;
        nd_desc = &acpi_desc->nd_desc;
        nd_desc->provider_name = NULL;
+       nd_desc->module = THIS_MODULE;
        nd_desc->ndctl = nfit_test_ctl;
-       acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc);
-       if (!acpi_desc->nvdimm_bus)
-               return -ENXIO;
 
-       rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
-       if (rc) {
-               nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+       rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+                       nfit_test->nfit_size);
+       if (rc)
                return rc;
-       }
 
        if (nfit_test->setup != nfit_test0_setup)
                return 0;
@@ -1483,22 +1477,16 @@ static int nfit_test_probe(struct platform_device *pdev)
        nfit_test->setup_hotplug = 1;
        nfit_test->setup(nfit_test);
 
-       rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
-       if (rc) {
-               nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+       rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+                       nfit_test->nfit_size);
+       if (rc)
                return rc;
-       }
 
        return 0;
 }
 
 static int nfit_test_remove(struct platform_device *pdev)
 {
-       struct nfit_test *nfit_test = to_nfit_test(&pdev->dev);
-       struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc;
-
-       nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
-
        return 0;
 }
 
@@ -1523,12 +1511,6 @@ static struct platform_driver nfit_test_driver = {
        .id_table = nfit_test_id,
 };
 
-#ifdef CONFIG_CMA_SIZE_MBYTES
-#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
-#else
-#define CMA_SIZE_MBYTES 0
-#endif
-
 static __init int nfit_test_init(void)
 {
        int rc, i;
@@ -1538,7 +1520,6 @@ static __init int nfit_test_init(void)
        for (i = 0; i < NUM_NFITS; i++) {
                struct nfit_test *nfit_test;
                struct platform_device *pdev;
-               static int once;
 
                nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
                if (!nfit_test) {
@@ -1577,20 +1558,6 @@ static __init int nfit_test_init(void)
                        goto err_register;
 
                instances[i] = nfit_test;
-
-               if (!once++) {
-                       dma_addr_t dma;
-                       void *buf;
-
-                       buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma,
-                                       GFP_KERNEL);
-                       if (!buf) {
-                               rc = -ENOMEM;
-                               dev_warn(&pdev->dev, "need 128M of free cma\n");
-                               goto err_register;
-                       }
-                       dma_free_coherent(&pdev->dev, SZ_128M, buf, dma);
-               }
        }
 
        rc = platform_driver_register(&nfit_test_driver);
index 96c5e16d7db9a6580419b572f2912b330bec6161..9f18e2a4a862d543a4275301d6629679a133d663 100644 (file)
@@ -12,6 +12,7 @@
  */
 #ifndef __NFIT_TEST_H__
 #define __NFIT_TEST_H__
+#include <linux/list.h>
 
 struct nfit_test_resource {
        struct list_head list;
@@ -26,4 +27,5 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
 void __wrap_iounmap(volatile void __iomem *addr);
 void nfit_test_setup(nfit_test_lookup_fn lookup);
 void nfit_test_teardown(void);
+struct nfit_test_resource *get_nfit_res(resource_size_t resource);
 #endif