Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 29 Jul 2016 00:22:07 +0000 (17:22 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 29 Jul 2016 00:38:16 +0000 (17:38 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 00:22:07 +0000 (17:22 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 00:38:16 +0000 (17:38 -0700)
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking

index ef46d3ac5774445169098e250980fe4a3f21305b..1b3c39a7de627f572ece87d2a1c6ddd2cff27144 100644 (file)
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -395,7 +395,7 @@ prototypes:
         int (*release) (struct gendisk *, fmode_t);
         int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
         int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-       int (*direct_access) (struct block_device *, sector_t, void __pmem **,
+       int (*direct_access) (struct block_device *, sector_t, void **,
                                 unsigned long *);
         int (*media_changed) (struct gendisk *);
         void (*unlock_native_capacity) (struct gendisk *);
diff --git a/Documentation/nvdimm/btt.txt b/Documentation/nvdimm/btt.txt

index b91443f577dcff68aa8961a73d77141ba8a73cd4..e293fb664924faa3494588a27b645ffdf09fdecf 100644 (file)
--- a/Documentation/nvdimm/btt.txt
+++ b/Documentation/nvdimm/btt.txt
@@ -256,28 +256,18 @@ If any of these error conditions are encountered, the arena is put into a read
  only state using a flag in the info block.
  
  
-5. In-kernel usage
-==================
+5. Usage
+========
  
-Any block driver that supports byte granularity IO to the storage may register
-with the BTT. It will have to provide the rw_bytes interface in its
-block_device_operations struct:
+The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
+(pmem, or blk mode). The easiest way to set up such a namespace is using the
+'ndctl' utility [1]:
  
-       int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw);
+For example, the ndctl command line to setup a btt with a 4k sector size is:
  
-It may register with the BTT after it adds its own gendisk, using btt_init:
+    ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
  
-       struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize,
-                       u32 lbasize, u8 uuid[], int maxlane);
+See ndctl create-namespace --help for more options.
  
-note that maxlane is the maximum amount of concurrency the driver wishes to
-allow the BTT to use.
-
-The BTT 'disk' appears as a stacked block device that grabs the underlying block
-device in the O_EXCL mode.
-
-When the driver wishes to remove the backing disk, it should similarly call
-btt_fini using the same struct btt* handle that was provided to it by btt_init.
-
-       void btt_fini(struct btt *btt);
+[1]: https://github.com/pmem/ndctl
  
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c

index f9af6461521ab899396ce101d54e5a61af5a9df0..9144204442eb68438544a2e51af57ae13850af44 100644 (file)
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -143,12 +143,12 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
   */
  static long
  axon_ram_direct_access(struct block_device *device, sector_t sector,
-                      void __pmem **kaddr, pfn_t *pfn, long size)
+                      void **kaddr, pfn_t *pfn, long size)
  {
         struct axon_ram_bank *bank = device->bd_disk->private_data;
         loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
  
-       *kaddr = (void __pmem __force *) bank->io_addr + offset;
+       *kaddr = (void *) bank->io_addr + offset;
         *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
         return bank->size - offset;
  }
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h

index c64b1e9c5d1a30d916be2d944a3e94134b240fb0..d683993248c8cf485cdc953325acb36a298b73c3 100644 (file)
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -225,7 +225,6 @@
  #define X86_FEATURE_RDSEED     ( 9*32+18) /* The RDSEED instruction */
  #define X86_FEATURE_ADX                ( 9*32+19) /* The ADCX and ADOX instructions */
  #define X86_FEATURE_SMAP       ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT    ( 9*32+22) /* PCOMMIT instruction */
  #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
  #define X86_FEATURE_CLWB       ( 9*32+24) /* CLWB instruction */
  #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h

index fbc5e92e1ecc43bbf29e07de629d801a37d95ee5..643eba42d6206aa0fbcb57baa150606269577523 100644 (file)
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -26,13 +26,11 @@
   * @n: length of the copy in bytes
   *
   * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
+ * a subsequent pmem driver flush operation will drain posted write queues.
   */
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-               size_t n)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
  {
-       int unwritten;
+       int rem;
  
         /*
          * We are copying between two kernel buffers, if
@@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
          * fault) we would have already reported a general protection fault
          * before the WARN+BUG.
          */
-       unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
-                       (void __user *) src, n);
-       if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
-                               __func__, dst, src, unwritten))
+       rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
+       if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
+                               __func__, dst, src, rem))
                 BUG();
  }
  
-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
-               size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
  {
         if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
-               return memcpy_mcsafe(dst, (void __force *) src, n);
-       memcpy(dst, (void __force *) src, n);
+               return memcpy_mcsafe(dst, src, n);
+       memcpy(dst, src, n);
         return 0;
  }
  
-/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
-       /*
-        * wmb() to 'sfence' all previous writes such that they are
-        * architecturally visible to 'pcommit'.  Note, that we've
-        * already arranged for pmem writes to avoid the cache via
-        * arch_memcpy_to_pmem().
-        */
-       wmb();
-       pcommit_sfence();
-}
-
  /**
   * arch_wb_cache_pmem - write back a cache range with CLWB
   * @vaddr:     virtual start address
   * @size:      number of bytes to write back
   *
   * Write back a cache range using the CLWB (cache line write back)
- * instruction.  This function requires explicit ordering with an
- * arch_wmb_pmem() call.
+ * instruction.
   */
-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
  {
         u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
         unsigned long clflush_mask = x86_clflush_size - 1;
-       void *vaddr = (void __force *)addr;
-       void *vend = vaddr + size;
+       void *vend = addr + size;
         void *p;
  
-       for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+       for (p = (void *)((unsigned long)addr & ~clflush_mask);
              p < vend; p += x86_clflush_size)
                 clwb(p);
  }
@@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
   * @i:         iterator with source data
   *
   * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
   */
-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
                 struct iov_iter *i)
  {
-       void *vaddr = (void __force *)addr;
         size_t len;
  
         /* TODO: skip the write-back by always using non-temporal stores */
-       len = copy_from_iter_nocache(vaddr, bytes, i);
+       len = copy_from_iter_nocache(addr, bytes, i);
  
         if (__iter_needs_pmem_wb(i))
                 arch_wb_cache_pmem(addr, bytes);
@@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
   * @size:      number of bytes to zero
   *
   * Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
   */
-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
  {
-       void *vaddr = (void __force *)addr;
-
-       memset(vaddr, 0, size);
+       memset(addr, 0, size);
         arch_wb_cache_pmem(addr, size);
  }
  
-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
  {
-       clflush_cache_range((void __force *) addr, size);
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-       /*
-        * We require that wmb() be an 'sfence', that is only guaranteed on
-        * 64-bit builds
-        */
-       return static_cpu_has(X86_FEATURE_PCOMMIT);
+       clflush_cache_range(addr, size);
  }
  #endif /* CONFIG_ARCH_HAS_PMEM_API */
  #endif /* __ASM_X86_PMEM_H__ */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h

index d96d0437776569f5c9c0e6f28d125dbc5671d037..587d7914ea4b56a539d9877b1526139285165419 100644 (file)
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -253,52 +253,6 @@ static inline void clwb(volatile void *__p)
                 : [pax] "a" (p));
  }
  
-/**
- * pcommit_sfence() - persistent commit and fence
- *
- * The PCOMMIT instruction ensures that data that has been flushed from the
- * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
- * memory and is durable on the DIMM.  The primary use case for this is
- * persistent memory.
- *
- * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
- * with appropriate fencing.
- *
- * Example:
- * void flush_and_commit_buffer(void *vaddr, unsigned int size)
- * {
- *         unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
- *         void *vend = vaddr + size;
- *         void *p;
- *
- *         for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
- *              p < vend; p += boot_cpu_data.x86_clflush_size)
- *                 clwb(p);
- *
- *         // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
- *         // MFENCE via mb() also works
- *         wmb();
- *
- *         // PCOMMIT and the required SFENCE for ordering
- *         pcommit_sfence();
- * }
- *
- * After this function completes the data pointed to by 'vaddr' has been
- * accepted to memory and will be durable if the 'vaddr' points to persistent
- * memory.
- *
- * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
- * things we include both the PCOMMIT and the required SFENCE in the
- * alternatives generated by pcommit_sfence().
- */
-static inline void pcommit_sfence(void)
-{
-       alternative(ASM_NOP7,
-                   ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
-                   "sfence",
-                   X86_FEATURE_PCOMMIT);
-}
-
  #define nop() asm volatile ("nop")
  
  
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h

index 14c63c7e8337a3d2bdfc98a5b4c14d2892de5580..a002b07a7099c1f5585e1d64989177fe8509a45f 100644 (file)
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,7 +72,6 @@
  #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
  #define SECONDARY_EXEC_ENABLE_PML               0x00020000
  #define SECONDARY_EXEC_XSAVES                  0x00100000
-#define SECONDARY_EXEC_PCOMMIT                 0x00200000
  #define SECONDARY_EXEC_TSC_SCALING              0x02000000
  
  #define PIN_BASED_EXT_INTR_MASK                 0x00000001
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h

index 5b15d94a33f818d04ee7ae2a0f5685125bd89a40..37fee272618f1de348a7d5961f1792debba72991 100644 (file)
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -78,7 +78,6 @@
  #define EXIT_REASON_PML_FULL            62
  #define EXIT_REASON_XSAVES              63
  #define EXIT_REASON_XRSTORS             64
-#define EXIT_REASON_PCOMMIT             65
  
  #define VMX_EXIT_REASONS \
         { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -127,8 +126,7 @@
         { EXIT_REASON_INVVPID,               "INVVPID" }, \
         { EXIT_REASON_INVPCID,               "INVPCID" }, \
         { EXIT_REASON_XSAVES,                "XSAVES" }, \
-       { EXIT_REASON_XRSTORS,               "XRSTORS" }, \
-       { EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }
  
  #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
  #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c

index 7597b42a8a883c668ddbcf28ba03cde86cb30924..64356536449782e05dd1fd790bdb44deb8eafd6d 100644 (file)
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -366,7 +366,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
                 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
                 F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
-               F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
+               F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
  
         /* cpuid 0xD.1.eax */
         const u32 kvm_cpuid_D_1_eax_x86_features =
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h

index e17a74b1d8525708a051c18f93854af5667805be..35058c2c0eeabe0fd9dedd45999478d5cb61fabb 100644 (file)
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -144,14 +144,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
         return best && (best->ebx & bit(X86_FEATURE_RTM));
  }
  
-static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid_entry2 *best;
-
-       best = kvm_find_cpuid_entry(vcpu, 7, 0);
-       return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
-}
-
  static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
  {
         struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 7758680db20b78abbf35f1b263c5f9d8da33b5e1..df07a0a4611ffa81b059229aaa08d04a2981bc56 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2707,8 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                 SECONDARY_EXEC_APIC_REGISTER_VIRT |
                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                 SECONDARY_EXEC_WBINVD_EXITING |
-               SECONDARY_EXEC_XSAVES |
-               SECONDARY_EXEC_PCOMMIT;
+               SECONDARY_EXEC_XSAVES;
  
         if (enable_ept) {
                 /* nested EPT: emulate EPT also to L1 */
@@ -3270,7 +3269,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                         SECONDARY_EXEC_SHADOW_VMCS |
                         SECONDARY_EXEC_XSAVES |
                         SECONDARY_EXEC_ENABLE_PML |
-                       SECONDARY_EXEC_PCOMMIT |
                         SECONDARY_EXEC_TSC_SCALING;
                 if (adjust_vmx_controls(min2, opt2,
                                         MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -4858,9 +4856,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
         if (!enable_pml)
                 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
  
-       /* Currently, we allow L1 guest to directly run pcommit instruction. */
-       exec_control &= ~SECONDARY_EXEC_PCOMMIT;
-
         return exec_control;
  }
  
@@ -4904,9 +4899,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
  
         vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
  
-       if (cpu_has_secondary_exec_ctrls())
+       if (cpu_has_secondary_exec_ctrls()) {
                 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
                                 vmx_secondary_exec_control(vmx));
+       }
  
         if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
                 vmcs_write64(EOI_EXIT_BITMAP0, 0);
@@ -7564,13 +7560,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
         return 1;
  }
  
-static int handle_pcommit(struct kvm_vcpu *vcpu)
-{
-       /* we never catch pcommit instruct for L1 guest. */
-       WARN_ON(1);
-       return 1;
-}
-
  /*
   * The exit handlers return 1 if the exit was handled fully and guest execution
   * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -7621,7 +7610,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
         [EXIT_REASON_XSAVES]                  = handle_xsaves,
         [EXIT_REASON_XRSTORS]                 = handle_xrstors,
         [EXIT_REASON_PML_FULL]                = handle_pml_full,
-       [EXIT_REASON_PCOMMIT]                 = handle_pcommit,
  };
  
  static const int kvm_vmx_max_exit_handlers =
@@ -7930,8 +7918,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                  * the XSS exit bitmap in vmcs12.
                  */
                 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
-       case EXIT_REASON_PCOMMIT:
-               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
         default:
                 return true;
         }
@@ -9094,15 +9080,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
  
         if (cpu_has_secondary_exec_ctrls())
                 vmcs_set_secondary_exec_control(secondary_exec_ctl);
-
-       if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
-               if (guest_cpuid_has_pcommit(vcpu))
-                       vmx->nested.nested_vmx_secondary_ctls_high |=
-                               SECONDARY_EXEC_PCOMMIT;
-               else
-                       vmx->nested.nested_vmx_secondary_ctls_high &=
-                               ~SECONDARY_EXEC_PCOMMIT;
-       }
  }
  
  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9715,8 +9692,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                                   SECONDARY_EXEC_RDTSCP |
                                   SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-                                 SECONDARY_EXEC_APIC_REGISTER_VIRT |
-                                 SECONDARY_EXEC_PCOMMIT);
+                                 SECONDARY_EXEC_APIC_REGISTER_VIRT);
                 if (nested_cpu_has(vmcs12,
                                 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
                         exec_control |= vmcs12->secondary_vm_exec_control;
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt

index ec378cd7b71ee4e067d0a4a9beb59413def3296c..767be7c760340bd33b7e4a18b9a8f3a71d9db33e 100644 (file)
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1012,7 +1012,7 @@ GrpTable: Grp15
  4: XSAVE
  5: XRSTOR | lfence (11B)
  6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
  EndTable
  
  GrpTable: Grp16
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig

index acad70a0bb0dcccdc6d263b5767535554b668c7c..aebd944bdaa125e6c0cd52dc475fe366c0feb4ba 100644 (file)
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -454,32 +454,7 @@ config ACPI_REDUCED_HARDWARE_ONLY
  
           If you are unsure what to do, do not enable this option.
  
-config ACPI_NFIT
-       tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
-       depends on PHYS_ADDR_T_64BIT
-       depends on BLK_DEV
-       depends on ARCH_HAS_MMIO_FLUSH
-       select LIBNVDIMM
-       help
-         Infrastructure to probe ACPI 6 compliant platforms for
-         NVDIMMs (NFIT) and register a libnvdimm device tree.  In
-         addition to storage devices this also enables libnvdimm to pass
-         ACPI._DSM messages for platform/dimm configuration.
-
-         To compile this driver as a module, choose M here:
-         the module will be called nfit.
-
-config ACPI_NFIT_DEBUG
-       bool "NFIT DSM debug"
-       depends on ACPI_NFIT
-       depends on DYNAMIC_DEBUG
-       default n
-       help
-         Enabling this option causes the nfit driver to dump the
-         input and output buffers of _DSM operations on the ACPI0012
-         device and its children.  This can be very verbose, so leave
-         it disabled unless you are debugging a hardware / firmware
-         issue.
+source "drivers/acpi/nfit/Kconfig"
  
  source "drivers/acpi/apei/Kconfig"
  source "drivers/acpi/dptf/Kconfig"
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile

index 88f54f03e3d228a0bfabb674cc1a72b9a5177151..35a6ccbe302580ecf713d5ec623168dc7ab48a0d 100644 (file)
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -69,7 +69,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT)   += pci_slot.o
  obj-$(CONFIG_ACPI_PROCESSOR)   += processor.o
  obj-$(CONFIG_ACPI)             += container.o
  obj-$(CONFIG_ACPI_THERMAL)     += thermal.o
-obj-$(CONFIG_ACPI_NFIT)                += nfit.o
+obj-$(CONFIG_ACPI_NFIT)                += nfit/
  obj-$(CONFIG_ACPI)             += acpi_memhotplug.o
  obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
  obj-$(CONFIG_ACPI_BATTERY)     += battery.o
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c

deleted file mode 100644 (file)

index 1f0e060..0000000
--- a/drivers/acpi/nfit.c
+++ /dev/null
@@ -1,2713 +0,0 @@
-/*
- * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#include <linux/list_sort.h>
-#include <linux/libnvdimm.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/ndctl.h>
-#include <linux/delay.h>
-#include <linux/list.h>
-#include <linux/acpi.h>
-#include <linux/sort.h>
-#include <linux/pmem.h>
-#include <linux/io.h>
-#include <linux/nd.h>
-#include <asm/cacheflush.h>
-#include "nfit.h"
-
-/*
- * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
- * irrelevant.
- */
-#include <linux/io-64-nonatomic-hi-lo.h>
-
-static bool force_enable_dimms;
-module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
-
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
-/* after three payloads of overflow, it's dead jim */
-static unsigned int scrub_overflow_abort = 3;
-module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_overflow_abort,
-               "Number of times we overflow ARS results before abort");
-
-static bool disable_vendor_specific;
-module_param(disable_vendor_specific, bool, S_IRUGO);
-MODULE_PARM_DESC(disable_vendor_specific,
-               "Limit commands to the publicly specified set\n");
-
-static struct workqueue_struct *nfit_wq;
-
-struct nfit_table_prev {
-       struct list_head spas;
-       struct list_head memdevs;
-       struct list_head dcrs;
-       struct list_head bdws;
-       struct list_head idts;
-       struct list_head flushes;
-};
-
-static u8 nfit_uuid[NFIT_UUID_MAX][16];
-
-const u8 *to_nfit_uuid(enum nfit_uuids id)
-{
-       return nfit_uuid[id];
-}
-EXPORT_SYMBOL(to_nfit_uuid);
-
-static struct acpi_nfit_desc *to_acpi_nfit_desc(
-               struct nvdimm_bus_descriptor *nd_desc)
-{
-       return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
-}
-
-static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-
-       /*
-        * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
-        * acpi_device.
-        */
-       if (!nd_desc->provider_name
-                       || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
-               return NULL;
-
-       return to_acpi_device(acpi_desc->dev);
-}
-
-static int xlat_status(void *buf, unsigned int cmd)
-{
-       struct nd_cmd_clear_error *clear_err;
-       struct nd_cmd_ars_status *ars_status;
-       struct nd_cmd_ars_start *ars_start;
-       struct nd_cmd_ars_cap *ars_cap;
-       u16 flags;
-
-       switch (cmd) {
-       case ND_CMD_ARS_CAP:
-               ars_cap = buf;
-               if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
-                       return -ENOTTY;
-
-               /* Command failed */
-               if (ars_cap->status & 0xffff)
-                       return -EIO;
-
-               /* No supported scan types for this range */
-               flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
-               if ((ars_cap->status >> 16 & flags) == 0)
-                       return -ENOTTY;
-               break;
-       case ND_CMD_ARS_START:
-               ars_start = buf;
-               /* ARS is in progress */
-               if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
-                       return -EBUSY;
-
-               /* Command failed */
-               if (ars_start->status & 0xffff)
-                       return -EIO;
-               break;
-       case ND_CMD_ARS_STATUS:
-               ars_status = buf;
-               /* Command failed */
-               if (ars_status->status & 0xffff)
-                       return -EIO;
-               /* Check extended status (Upper two bytes) */
-               if (ars_status->status == NFIT_ARS_STATUS_DONE)
-                       return 0;
-
-               /* ARS is in progress */
-               if (ars_status->status == NFIT_ARS_STATUS_BUSY)
-                       return -EBUSY;
-
-               /* No ARS performed for the current boot */
-               if (ars_status->status == NFIT_ARS_STATUS_NONE)
-                       return -EAGAIN;
-
-               /*
-                * ARS interrupted, either we overflowed or some other
-                * agent wants the scan to stop.  If we didn't overflow
-                * then just continue with the returned results.
-                */
-               if (ars_status->status == NFIT_ARS_STATUS_INTR) {
-                       if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
-                               return -ENOSPC;
-                       return 0;
-               }
-
-               /* Unknown status */
-               if (ars_status->status >> 16)
-                       return -EIO;
-               break;
-       case ND_CMD_CLEAR_ERROR:
-               clear_err = buf;
-               if (clear_err->status & 0xffff)
-                       return -EIO;
-               if (!clear_err->cleared)
-                       return -EIO;
-               if (clear_err->length > clear_err->cleared)
-                       return clear_err->cleared;
-               break;
-       default:
-               break;
-       }
-
-       return 0;
-}
-
-static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
-               struct nvdimm *nvdimm, unsigned int cmd, void *buf,
-               unsigned int buf_len, int *cmd_rc)
-{
-       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
-       union acpi_object in_obj, in_buf, *out_obj;
-       const struct nd_cmd_desc *desc = NULL;
-       struct device *dev = acpi_desc->dev;
-       struct nd_cmd_pkg *call_pkg = NULL;
-       const char *cmd_name, *dimm_name;
-       unsigned long cmd_mask, dsm_mask;
-       acpi_handle handle;
-       unsigned int func;
-       const u8 *uuid;
-       u32 offset;
-       int rc, i;
-
-       func = cmd;
-       if (cmd == ND_CMD_CALL) {
-               call_pkg = buf;
-               func = call_pkg->nd_command;
-       }
-
-       if (nvdimm) {
-               struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-               struct acpi_device *adev = nfit_mem->adev;
-
-               if (!adev)
-                       return -ENOTTY;
-               if (call_pkg && nfit_mem->family != call_pkg->nd_family)
-                       return -ENOTTY;
-
-               dimm_name = nvdimm_name(nvdimm);
-               cmd_name = nvdimm_cmd_name(cmd);
-               cmd_mask = nvdimm_cmd_mask(nvdimm);
-               dsm_mask = nfit_mem->dsm_mask;
-               desc = nd_cmd_dimm_desc(cmd);
-               uuid = to_nfit_uuid(nfit_mem->family);
-               handle = adev->handle;
-       } else {
-               struct acpi_device *adev = to_acpi_dev(acpi_desc);
-
-               cmd_name = nvdimm_bus_cmd_name(cmd);
-               cmd_mask = nd_desc->cmd_mask;
-               dsm_mask = cmd_mask;
-               desc = nd_cmd_bus_desc(cmd);
-               uuid = to_nfit_uuid(NFIT_DEV_BUS);
-               handle = adev->handle;
-               dimm_name = "bus";
-       }
-
-       if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
-               return -ENOTTY;
-
-       if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
-               return -ENOTTY;
-
-       in_obj.type = ACPI_TYPE_PACKAGE;
-       in_obj.package.count = 1;
-       in_obj.package.elements = &in_buf;
-       in_buf.type = ACPI_TYPE_BUFFER;
-       in_buf.buffer.pointer = buf;
-       in_buf.buffer.length = 0;
-
-       /* libnvdimm has already validated the input envelope */
-       for (i = 0; i < desc->in_num; i++)
-               in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
-                               i, buf);
-
-       if (call_pkg) {
-               /* skip over package wrapper */
-               in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
-               in_buf.buffer.length = call_pkg->nd_size_in;
-       }
-
-       if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
-               dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
-                               __func__, dimm_name, cmd, func,
-                               in_buf.buffer.length);
-               print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
-                       in_buf.buffer.pointer,
-                       min_t(u32, 256, in_buf.buffer.length), true);
-       }
-
-       out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
-       if (!out_obj) {
-               dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
-                               cmd_name);
-               return -EINVAL;
-       }
-
-       if (call_pkg) {
-               call_pkg->nd_fw_size = out_obj->buffer.length;
-               memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
-                       out_obj->buffer.pointer,
-                       min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
-
-               ACPI_FREE(out_obj);
-               /*
-                * Need to support FW function w/o known size in advance.
-                * Caller can determine required size based upon nd_fw_size.
-                * If we return an error (like elsewhere) then caller wouldn't
-                * be able to rely upon data returned to make calculation.
-                */
-               return 0;
-       }
-
-       if (out_obj->package.type != ACPI_TYPE_BUFFER) {
-               dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
-                               __func__, dimm_name, cmd_name, out_obj->type);
-               rc = -EINVAL;
-               goto out;
-       }
-
-       if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
-               dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
-                               dimm_name, cmd_name, out_obj->buffer.length);
-               print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
-                               4, out_obj->buffer.pointer, min_t(u32, 128,
-                                       out_obj->buffer.length), true);
-       }
-
-       for (i = 0, offset = 0; i < desc->out_num; i++) {
-               u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
-                               (u32 *) out_obj->buffer.pointer);
-
-               if (offset + out_size > out_obj->buffer.length) {
-                       dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
-                                       __func__, dimm_name, cmd_name, i);
-                       break;
-               }
-
-               if (in_buf.buffer.length + offset + out_size > buf_len) {
-                       dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
-                                       __func__, dimm_name, cmd_name, i);
-                       rc = -ENXIO;
-                       goto out;
-               }
-               memcpy(buf + in_buf.buffer.length + offset,
-                               out_obj->buffer.pointer + offset, out_size);
-               offset += out_size;
-       }
-       if (offset + in_buf.buffer.length < buf_len) {
-               if (i >= 1) {
-                       /*
-                        * status valid, return the number of bytes left
-                        * unfilled in the output buffer
-                        */
-                       rc = buf_len - offset - in_buf.buffer.length;
-                       if (cmd_rc)
-                               *cmd_rc = xlat_status(buf, cmd);
-               } else {
-                       dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
-                                       __func__, dimm_name, cmd_name, buf_len,
-                                       offset);
-                       rc = -ENXIO;
-               }
-       } else {
-               rc = 0;
-               if (cmd_rc)
-                       *cmd_rc = xlat_status(buf, cmd);
-       }
-
- out:
-       ACPI_FREE(out_obj);
-
-       return rc;
-}
-
-static const char *spa_type_name(u16 type)
-{
-       static const char *to_name[] = {
-               [NFIT_SPA_VOLATILE] = "volatile",
-               [NFIT_SPA_PM] = "pmem",
-               [NFIT_SPA_DCR] = "dimm-control-region",
-               [NFIT_SPA_BDW] = "block-data-window",
-               [NFIT_SPA_VDISK] = "volatile-disk",
-               [NFIT_SPA_VCD] = "volatile-cd",
-               [NFIT_SPA_PDISK] = "persistent-disk",
-               [NFIT_SPA_PCD] = "persistent-cd",
-
-       };
-
-       if (type > NFIT_SPA_PCD)
-               return "unknown";
-
-       return to_name[type];
-}
-
-static int nfit_spa_type(struct acpi_nfit_system_address *spa)
-{
-       int i;
-
-       for (i = 0; i < NFIT_UUID_MAX; i++)
-               if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
-                       return i;
-       return -1;
-}
-
-static bool add_spa(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_system_address *spa)
-{
-       size_t length = min_t(size_t, sizeof(*spa), spa->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_spa *nfit_spa;
-
-       list_for_each_entry(nfit_spa, &prev->spas, list) {
-               if (memcmp(nfit_spa->spa, spa, length) == 0) {
-                       list_move_tail(&nfit_spa->list, &acpi_desc->spas);
-                       return true;
-               }
-       }
-
-       nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), GFP_KERNEL);
-       if (!nfit_spa)
-               return false;
-       INIT_LIST_HEAD(&nfit_spa->list);
-       nfit_spa->spa = spa;
-       list_add_tail(&nfit_spa->list, &acpi_desc->spas);
-       dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
-                       spa->range_index,
-                       spa_type_name(nfit_spa_type(spa)));
-       return true;
-}
-
-static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_memory_map *memdev)
-{
-       size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_memdev *nfit_memdev;
-
-       list_for_each_entry(nfit_memdev, &prev->memdevs, list)
-               if (memcmp(nfit_memdev->memdev, memdev, length) == 0) {
-                       list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
-                       return true;
-               }
-
-       nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev), GFP_KERNEL);
-       if (!nfit_memdev)
-               return false;
-       INIT_LIST_HEAD(&nfit_memdev->list);
-       nfit_memdev->memdev = memdev;
-       list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
-       dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
-                       __func__, memdev->device_handle, memdev->range_index,
-                       memdev->region_index);
-       return true;
-}
-
-static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_control_region *dcr)
-{
-       size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_dcr *nfit_dcr;
-
-       list_for_each_entry(nfit_dcr, &prev->dcrs, list)
-               if (memcmp(nfit_dcr->dcr, dcr, length) == 0) {
-                       list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
-                       return true;
-               }
-
-       nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), GFP_KERNEL);
-       if (!nfit_dcr)
-               return false;
-       INIT_LIST_HEAD(&nfit_dcr->list);
-       nfit_dcr->dcr = dcr;
-       list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
-       dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
-                       dcr->region_index, dcr->windows);
-       return true;
-}
-
-static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_data_region *bdw)
-{
-       size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_bdw *nfit_bdw;
-
-       list_for_each_entry(nfit_bdw, &prev->bdws, list)
-               if (memcmp(nfit_bdw->bdw, bdw, length) == 0) {
-                       list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
-                       return true;
-               }
-
-       nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), GFP_KERNEL);
-       if (!nfit_bdw)
-               return false;
-       INIT_LIST_HEAD(&nfit_bdw->list);
-       nfit_bdw->bdw = bdw;
-       list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
-       dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
-                       bdw->region_index, bdw->windows);
-       return true;
-}
-
-static bool add_idt(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_interleave *idt)
-{
-       size_t length = min_t(size_t, sizeof(*idt), idt->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_idt *nfit_idt;
-
-       list_for_each_entry(nfit_idt, &prev->idts, list)
-               if (memcmp(nfit_idt->idt, idt, length) == 0) {
-                       list_move_tail(&nfit_idt->list, &acpi_desc->idts);
-                       return true;
-               }
-
-       nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), GFP_KERNEL);
-       if (!nfit_idt)
-               return false;
-       INIT_LIST_HEAD(&nfit_idt->list);
-       nfit_idt->idt = idt;
-       list_add_tail(&nfit_idt->list, &acpi_desc->idts);
-       dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
-                       idt->interleave_index, idt->line_count);
-       return true;
-}
-
-static bool add_flush(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev,
-               struct acpi_nfit_flush_address *flush)
-{
-       size_t length = min_t(size_t, sizeof(*flush), flush->header.length);
-       struct device *dev = acpi_desc->dev;
-       struct nfit_flush *nfit_flush;
-
-       list_for_each_entry(nfit_flush, &prev->flushes, list)
-               if (memcmp(nfit_flush->flush, flush, length) == 0) {
-                       list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
-                       return true;
-               }
-
-       nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), GFP_KERNEL);
-       if (!nfit_flush)
-               return false;
-       INIT_LIST_HEAD(&nfit_flush->list);
-       nfit_flush->flush = flush;
-       list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
-       dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
-                       flush->device_handle, flush->hint_count);
-       return true;
-}
-
-static void *add_table(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev, void *table, const void *end)
-{
-       struct device *dev = acpi_desc->dev;
-       struct acpi_nfit_header *hdr;
-       void *err = ERR_PTR(-ENOMEM);
-
-       if (table >= end)
-               return NULL;
-
-       hdr = table;
-       if (!hdr->length) {
-               dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
-                       hdr->type);
-               return NULL;
-       }
-
-       switch (hdr->type) {
-       case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
-               if (!add_spa(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_MEMORY_MAP:
-               if (!add_memdev(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_CONTROL_REGION:
-               if (!add_dcr(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_DATA_REGION:
-               if (!add_bdw(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_INTERLEAVE:
-               if (!add_idt(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
-               if (!add_flush(acpi_desc, prev, table))
-                       return err;
-               break;
-       case ACPI_NFIT_TYPE_SMBIOS:
-               dev_dbg(dev, "%s: smbios\n", __func__);
-               break;
-       default:
-               dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
-               break;
-       }
-
-       return table + hdr->length;
-}
-
-static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_mem *nfit_mem)
-{
-       u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
-       u16 dcr = nfit_mem->dcr->region_index;
-       struct nfit_spa *nfit_spa;
-
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               u16 range_index = nfit_spa->spa->range_index;
-               int type = nfit_spa_type(nfit_spa->spa);
-               struct nfit_memdev *nfit_memdev;
-
-               if (type != NFIT_SPA_BDW)
-                       continue;
-
-               list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-                       if (nfit_memdev->memdev->range_index != range_index)
-                               continue;
-                       if (nfit_memdev->memdev->device_handle != device_handle)
-                               continue;
-                       if (nfit_memdev->memdev->region_index != dcr)
-                               continue;
-
-                       nfit_mem->spa_bdw = nfit_spa->spa;
-                       return;
-               }
-       }
-
-       dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
-                       nfit_mem->spa_dcr->range_index);
-       nfit_mem->bdw = NULL;
-}
-
-static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
-{
-       u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
-       struct nfit_memdev *nfit_memdev;
-       struct nfit_flush *nfit_flush;
-       struct nfit_bdw *nfit_bdw;
-       struct nfit_idt *nfit_idt;
-       u16 idt_idx, range_index;
-
-       list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
-               if (nfit_bdw->bdw->region_index != dcr)
-                       continue;
-               nfit_mem->bdw = nfit_bdw->bdw;
-               break;
-       }
-
-       if (!nfit_mem->bdw)
-               return;
-
-       nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
-
-       if (!nfit_mem->spa_bdw)
-               return;
-
-       range_index = nfit_mem->spa_bdw->range_index;
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-               if (nfit_memdev->memdev->range_index != range_index ||
-                               nfit_memdev->memdev->region_index != dcr)
-                       continue;
-               nfit_mem->memdev_bdw = nfit_memdev->memdev;
-               idt_idx = nfit_memdev->memdev->interleave_index;
-               list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
-                       if (nfit_idt->idt->interleave_index != idt_idx)
-                               continue;
-                       nfit_mem->idt_bdw = nfit_idt->idt;
-                       break;
-               }
-
-               list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
-                       if (nfit_flush->flush->device_handle !=
-                                       nfit_memdev->memdev->device_handle)
-                               continue;
-                       nfit_mem->nfit_flush = nfit_flush;
-                       break;
-               }
-               break;
-       }
-}
-
-static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa)
-{
-       struct nfit_mem *nfit_mem, *found;
-       struct nfit_memdev *nfit_memdev;
-       int type = nfit_spa_type(spa);
-
-       switch (type) {
-       case NFIT_SPA_DCR:
-       case NFIT_SPA_PM:
-               break;
-       default:
-               return 0;
-       }
-
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-               struct nfit_dcr *nfit_dcr;
-               u32 device_handle;
-               u16 dcr;
-
-               if (nfit_memdev->memdev->range_index != spa->range_index)
-                       continue;
-               found = NULL;
-               dcr = nfit_memdev->memdev->region_index;
-               device_handle = nfit_memdev->memdev->device_handle;
-               list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
-                       if (__to_nfit_memdev(nfit_mem)->device_handle
-                                       == device_handle) {
-                               found = nfit_mem;
-                               break;
-                       }
-
-               if (found)
-                       nfit_mem = found;
-               else {
-                       nfit_mem = devm_kzalloc(acpi_desc->dev,
-                                       sizeof(*nfit_mem), GFP_KERNEL);
-                       if (!nfit_mem)
-                               return -ENOMEM;
-                       INIT_LIST_HEAD(&nfit_mem->list);
-                       nfit_mem->acpi_desc = acpi_desc;
-                       list_add(&nfit_mem->list, &acpi_desc->dimms);
-               }
-
-               list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
-                       if (nfit_dcr->dcr->region_index != dcr)
-                               continue;
-                       /*
-                        * Record the control region for the dimm.  For
-                        * the ACPI 6.1 case, where there are separate
-                        * control regions for the pmem vs blk
-                        * interfaces, be sure to record the extended
-                        * blk details.
-                        */
-                       if (!nfit_mem->dcr)
-                               nfit_mem->dcr = nfit_dcr->dcr;
-                       else if (nfit_mem->dcr->windows == 0
-                                       && nfit_dcr->dcr->windows)
-                               nfit_mem->dcr = nfit_dcr->dcr;
-                       break;
-               }
-
-               if (dcr && !nfit_mem->dcr) {
-                       dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
-                                       spa->range_index, dcr);
-                       return -ENODEV;
-               }
-
-               if (type == NFIT_SPA_DCR) {
-                       struct nfit_idt *nfit_idt;
-                       u16 idt_idx;
-
-                       /* multiple dimms may share a SPA when interleaved */
-                       nfit_mem->spa_dcr = spa;
-                       nfit_mem->memdev_dcr = nfit_memdev->memdev;
-                       idt_idx = nfit_memdev->memdev->interleave_index;
-                       list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
-                               if (nfit_idt->idt->interleave_index != idt_idx)
-                                       continue;
-                               nfit_mem->idt_dcr = nfit_idt->idt;
-                               break;
-                       }
-                       nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
-               } else {
-                       /*
-                        * A single dimm may belong to multiple SPA-PM
-                        * ranges, record at least one in addition to
-                        * any SPA-DCR range.
-                        */
-                       nfit_mem->memdev_pmem = nfit_memdev->memdev;
-               }
-       }
-
-       return 0;
-}
-
-static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
-{
-       struct nfit_mem *a = container_of(_a, typeof(*a), list);
-       struct nfit_mem *b = container_of(_b, typeof(*b), list);
-       u32 handleA, handleB;
-
-       handleA = __to_nfit_memdev(a)->device_handle;
-       handleB = __to_nfit_memdev(b)->device_handle;
-       if (handleA < handleB)
-               return -1;
-       else if (handleA > handleB)
-               return 1;
-       return 0;
-}
-
-static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nfit_spa *nfit_spa;
-
-       /*
-        * For each SPA-DCR or SPA-PMEM address range find its
-        * corresponding MEMDEV(s).  From each MEMDEV find the
-        * corresponding DCR.  Then, if we're operating on a SPA-DCR,
-        * try to find a SPA-BDW and a corresponding BDW that references
-        * the DCR.  Throw it all into an nfit_mem object.  Note, that
-        * BDWs are optional.
-        */
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               int rc;
-
-               rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
-               if (rc)
-                       return rc;
-       }
-
-       list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
-
-       return 0;
-}
-
-static ssize_t revision_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
-       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
-       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
-
-       return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
-}
-static DEVICE_ATTR_RO(revision);
-
-static struct attribute *acpi_nfit_attributes[] = {
-       &dev_attr_revision.attr,
-       NULL,
-};
-
-static struct attribute_group acpi_nfit_attribute_group = {
-       .name = "nfit",
-       .attrs = acpi_nfit_attributes,
-};
-
-static const struct attribute_group *acpi_nfit_attribute_groups[] = {
-       &nvdimm_bus_attribute_group,
-       &acpi_nfit_attribute_group,
-       NULL,
-};
-
-static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
-       return __to_nfit_memdev(nfit_mem);
-}
-
-static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
-       return nfit_mem->dcr;
-}
-
-static ssize_t handle_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
-
-       return sprintf(buf, "%#x\n", memdev->device_handle);
-}
-static DEVICE_ATTR_RO(handle);
-
-static ssize_t phys_id_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
-
-       return sprintf(buf, "%#x\n", memdev->physical_id);
-}
-static DEVICE_ATTR_RO(phys_id);
-
-static ssize_t vendor_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
-}
-static DEVICE_ATTR_RO(vendor);
-
-static ssize_t rev_id_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
-}
-static DEVICE_ATTR_RO(rev_id);
-
-static ssize_t device_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
-}
-static DEVICE_ATTR_RO(device);
-
-static ssize_t subsystem_vendor_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
-}
-static DEVICE_ATTR_RO(subsystem_vendor);
-
-static ssize_t subsystem_rev_id_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n",
-                       be16_to_cpu(dcr->subsystem_revision_id));
-}
-static DEVICE_ATTR_RO(subsystem_rev_id);
-
-static ssize_t subsystem_device_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
-}
-static DEVICE_ATTR_RO(subsystem_device);
-
-static int num_nvdimm_formats(struct nvdimm *nvdimm)
-{
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-       int formats = 0;
-
-       if (nfit_mem->memdev_pmem)
-               formats++;
-       if (nfit_mem->memdev_bdw)
-               formats++;
-       return formats;
-}
-
-static ssize_t format_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
-}
-static DEVICE_ATTR_RO(format);
-
-static ssize_t format1_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       u32 handle;
-       ssize_t rc = -ENXIO;
-       struct nfit_mem *nfit_mem;
-       struct nfit_memdev *nfit_memdev;
-       struct acpi_nfit_desc *acpi_desc;
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       nfit_mem = nvdimm_provider_data(nvdimm);
-       acpi_desc = nfit_mem->acpi_desc;
-       handle = to_nfit_memdev(dev)->device_handle;
-
-       /* assumes DIMMs have at most 2 published interface codes */
-       mutex_lock(&acpi_desc->init_mutex);
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-               struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
-               struct nfit_dcr *nfit_dcr;
-
-               if (memdev->device_handle != handle)
-                       continue;
-
-               list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
-                       if (nfit_dcr->dcr->region_index != memdev->region_index)
-                               continue;
-                       if (nfit_dcr->dcr->code == dcr->code)
-                               continue;
-                       rc = sprintf(buf, "0x%04x\n",
-                                       le16_to_cpu(nfit_dcr->dcr->code));
-                       break;
-               }
-               if (rc != ENXIO)
-                       break;
-       }
-       mutex_unlock(&acpi_desc->init_mutex);
-       return rc;
-}
-static DEVICE_ATTR_RO(format1);
-
-static ssize_t formats_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-
-       return sprintf(buf, "%d\n", num_nvdimm_formats(nvdimm));
-}
-static DEVICE_ATTR_RO(formats);
-
-static ssize_t serial_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       return sprintf(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
-}
-static DEVICE_ATTR_RO(serial);
-
-static ssize_t family_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
-       if (nfit_mem->family < 0)
-               return -ENXIO;
-       return sprintf(buf, "%d\n", nfit_mem->family);
-}
-static DEVICE_ATTR_RO(family);
-
-static ssize_t dsm_mask_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
-       if (nfit_mem->family < 0)
-               return -ENXIO;
-       return sprintf(buf, "%#lx\n", nfit_mem->dsm_mask);
-}
-static DEVICE_ATTR_RO(dsm_mask);
-
-static ssize_t flags_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       u16 flags = to_nfit_memdev(dev)->flags;
-
-       return sprintf(buf, "%s%s%s%s%s\n",
-               flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
-               flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
-               flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
-               flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
-               flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "");
-}
-static DEVICE_ATTR_RO(flags);
-
-static ssize_t id_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
-       if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
-               return sprintf(buf, "%04x-%02x-%04x-%08x\n",
-                               be16_to_cpu(dcr->vendor_id),
-                               dcr->manufacturing_location,
-                               be16_to_cpu(dcr->manufacturing_date),
-                               be32_to_cpu(dcr->serial_number));
-       else
-               return sprintf(buf, "%04x-%08x\n",
-                               be16_to_cpu(dcr->vendor_id),
-                               be32_to_cpu(dcr->serial_number));
-}
-static DEVICE_ATTR_RO(id);
-
-static struct attribute *acpi_nfit_dimm_attributes[] = {
-       &dev_attr_handle.attr,
-       &dev_attr_phys_id.attr,
-       &dev_attr_vendor.attr,
-       &dev_attr_device.attr,
-       &dev_attr_rev_id.attr,
-       &dev_attr_subsystem_vendor.attr,
-       &dev_attr_subsystem_device.attr,
-       &dev_attr_subsystem_rev_id.attr,
-       &dev_attr_format.attr,
-       &dev_attr_formats.attr,
-       &dev_attr_format1.attr,
-       &dev_attr_serial.attr,
-       &dev_attr_flags.attr,
-       &dev_attr_id.attr,
-       &dev_attr_family.attr,
-       &dev_attr_dsm_mask.attr,
-       NULL,
-};
-
-static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
-               struct attribute *a, int n)
-{
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct nvdimm *nvdimm = to_nvdimm(dev);
-
-       if (!to_nfit_dcr(dev))
-               return 0;
-       if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
-               return 0;
-       return a->mode;
-}
-
-static struct attribute_group acpi_nfit_dimm_attribute_group = {
-       .name = "nfit",
-       .attrs = acpi_nfit_dimm_attributes,
-       .is_visible = acpi_nfit_dimm_attr_visible,
-};
-
-static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
-       &nvdimm_attribute_group,
-       &nd_device_attribute_group,
-       &acpi_nfit_dimm_attribute_group,
-       NULL,
-};
-
-static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
-               u32 device_handle)
-{
-       struct nfit_mem *nfit_mem;
-
-       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
-               if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
-                       return nfit_mem->nvdimm;
-
-       return NULL;
-}
-
-static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_mem *nfit_mem, u32 device_handle)
-{
-       struct acpi_device *adev, *adev_dimm;
-       struct device *dev = acpi_desc->dev;
-       unsigned long dsm_mask;
-       const u8 *uuid;
-       int i;
-
-       /* nfit test assumes 1:1 relationship between commands and dsms */
-       nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
-       nfit_mem->family = NVDIMM_FAMILY_INTEL;
-       adev = to_acpi_dev(acpi_desc);
-       if (!adev)
-               return 0;
-
-       adev_dimm = acpi_find_child_device(adev, device_handle, false);
-       nfit_mem->adev = adev_dimm;
-       if (!adev_dimm) {
-               dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
-                               device_handle);
-               return force_enable_dimms ? 0 : -ENODEV;
-       }
-
-       /*
-        * Until standardization materializes we need to consider up to 3
-        * different command sets.  Note, that checking for function0 (bit0)
-        * tells us if any commands are reachable through this uuid.
-        */
-       for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++)
-               if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
-                       break;
-
-       /* limit the supported commands to those that are publicly documented */
-       nfit_mem->family = i;
-       if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
-               dsm_mask = 0x3fe;
-               if (disable_vendor_specific)
-                       dsm_mask &= ~(1 << ND_CMD_VENDOR);
-       } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1)
-               dsm_mask = 0x1c3c76;
-       else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
-               dsm_mask = 0x1fe;
-               if (disable_vendor_specific)
-                       dsm_mask &= ~(1 << 8);
-       } else {
-               dev_dbg(dev, "unknown dimm command family\n");
-               nfit_mem->family = -1;
-               /* DSMs are optional, continue loading the driver... */
-               return 0;
-       }
-
-       uuid = to_nfit_uuid(nfit_mem->family);
-       for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
-               if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
-                       set_bit(i, &nfit_mem->dsm_mask);
-
-       return 0;
-}
-
-static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nfit_mem *nfit_mem;
-       int dimm_count = 0;
-
-       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
-               unsigned long flags = 0, cmd_mask;
-               struct nvdimm *nvdimm;
-               u32 device_handle;
-               u16 mem_flags;
-               int rc;
-
-               device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
-               nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
-               if (nvdimm) {
-                       dimm_count++;
-                       continue;
-               }
-
-               if (nfit_mem->bdw && nfit_mem->memdev_pmem)
-                       flags |= NDD_ALIASING;
-
-               mem_flags = __to_nfit_memdev(nfit_mem)->flags;
-               if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
-                       flags |= NDD_UNARMED;
-
-               rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
-               if (rc)
-                       continue;
-
-               /*
-                * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
-                * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
-                * userspace interface.
-                */
-               cmd_mask = 1UL << ND_CMD_CALL;
-               if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
-                       cmd_mask |= nfit_mem->dsm_mask;
-
-               nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
-                               acpi_nfit_dimm_attribute_groups,
-                               flags, cmd_mask);
-               if (!nvdimm)
-                       return -ENOMEM;
-
-               nfit_mem->nvdimm = nvdimm;
-               dimm_count++;
-
-               if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
-                       continue;
-
-               dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n",
-                               nvdimm_name(nvdimm),
-                 mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
-                 mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
-                 mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
-                 mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "");
-
-       }
-
-       return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
-}
-
-static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-       const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
-       struct acpi_device *adev;
-       int i;
-
-       nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
-       adev = to_acpi_dev(acpi_desc);
-       if (!adev)
-               return;
-
-       for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
-               if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
-                       set_bit(i, &nd_desc->cmd_mask);
-}
-
-static ssize_t range_index_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct nd_region *nd_region = to_nd_region(dev);
-       struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
-
-       return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
-}
-static DEVICE_ATTR_RO(range_index);
-
-static struct attribute *acpi_nfit_region_attributes[] = {
-       &dev_attr_range_index.attr,
-       NULL,
-};
-
-static struct attribute_group acpi_nfit_region_attribute_group = {
-       .name = "nfit",
-       .attrs = acpi_nfit_region_attributes,
-};
-
-static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
-       &nd_region_attribute_group,
-       &nd_mapping_attribute_group,
-       &nd_device_attribute_group,
-       &nd_numa_attribute_group,
-       &acpi_nfit_region_attribute_group,
-       NULL,
-};
-
-/* enough info to uniquely specify an interleave set */
-struct nfit_set_info {
-       struct nfit_set_info_map {
-               u64 region_offset;
-               u32 serial_number;
-               u32 pad;
-       } mapping[0];
-};
-
-static size_t sizeof_nfit_set_info(int num_mappings)
-{
-       return sizeof(struct nfit_set_info)
-               + num_mappings * sizeof(struct nfit_set_info_map);
-}
-
-static int cmp_map(const void *m0, const void *m1)
-{
-       const struct nfit_set_info_map *map0 = m0;
-       const struct nfit_set_info_map *map1 = m1;
-
-       return memcmp(&map0->region_offset, &map1->region_offset,
-                       sizeof(u64));
-}
-
-/* Retrieve the nth entry referencing this spa */
-static struct acpi_nfit_memory_map *memdev_from_spa(
-               struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
-{
-       struct nfit_memdev *nfit_memdev;
-
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
-               if (nfit_memdev->memdev->range_index == range_index)
-                       if (n-- == 0)
-                               return nfit_memdev->memdev;
-       return NULL;
-}
-
-static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
-               struct nd_region_desc *ndr_desc,
-               struct acpi_nfit_system_address *spa)
-{
-       int i, spa_type = nfit_spa_type(spa);
-       struct device *dev = acpi_desc->dev;
-       struct nd_interleave_set *nd_set;
-       u16 nr = ndr_desc->num_mappings;
-       struct nfit_set_info *info;
-
-       if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
-               /* pass */;
-       else
-               return 0;
-
-       nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
-       if (!nd_set)
-               return -ENOMEM;
-
-       info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
-       if (!info)
-               return -ENOMEM;
-       for (i = 0; i < nr; i++) {
-               struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
-               struct nfit_set_info_map *map = &info->mapping[i];
-               struct nvdimm *nvdimm = nd_mapping->nvdimm;
-               struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-               struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
-                               spa->range_index, i);
-
-               if (!memdev || !nfit_mem->dcr) {
-                       dev_err(dev, "%s: failed to find DCR\n", __func__);
-                       return -ENODEV;
-               }
-
-               map->region_offset = memdev->region_offset;
-               map->serial_number = nfit_mem->dcr->serial_number;
-       }
-
-       sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
-                       cmp_map, NULL);
-       nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
-       ndr_desc->nd_set = nd_set;
-       devm_kfree(dev, info);
-
-       return 0;
-}
-
-static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
-{
-       struct acpi_nfit_interleave *idt = mmio->idt;
-       u32 sub_line_offset, line_index, line_offset;
-       u64 line_no, table_skip_count, table_offset;
-
-       line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
-       table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
-       line_offset = idt->line_offset[line_index]
-               * mmio->line_size;
-       table_offset = table_skip_count * mmio->table_size;
-
-       return mmio->base_offset + line_offset + table_offset + sub_line_offset;
-}
-
-static void wmb_blk(struct nfit_blk *nfit_blk)
-{
-
-       if (nfit_blk->nvdimm_flush) {
-               /*
-                * The first wmb() is needed to 'sfence' all previous writes
-                * such that they are architecturally visible for the platform
-                * buffer flush.  Note that we've already arranged for pmem
-                * writes to avoid the cache via arch_memcpy_to_pmem().  The
-                * final wmb() ensures ordering for the NVDIMM flush write.
-                */
-               wmb();
-               writeq(1, nfit_blk->nvdimm_flush);
-               wmb();
-       } else
-               wmb_pmem();
-}
-
-static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
-{
-       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
-       u64 offset = nfit_blk->stat_offset + mmio->size * bw;
-
-       if (mmio->num_lines)
-               offset = to_interleave_offset(offset, mmio);
-
-       return readl(mmio->addr.base + offset);
-}
-
-static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
-               resource_size_t dpa, unsigned int len, unsigned int write)
-{
-       u64 cmd, offset;
-       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
-
-       enum {
-               BCW_OFFSET_MASK = (1ULL << 48)-1,
-               BCW_LEN_SHIFT = 48,
-               BCW_LEN_MASK = (1ULL << 8) - 1,
-               BCW_CMD_SHIFT = 56,
-       };
-
-       cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
-       len = len >> L1_CACHE_SHIFT;
-       cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
-       cmd |= ((u64) write) << BCW_CMD_SHIFT;
-
-       offset = nfit_blk->cmd_offset + mmio->size * bw;
-       if (mmio->num_lines)
-               offset = to_interleave_offset(offset, mmio);
-
-       writeq(cmd, mmio->addr.base + offset);
-       wmb_blk(nfit_blk);
-
-       if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
-               readq(mmio->addr.base + offset);
-}
-
-static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
-               resource_size_t dpa, void *iobuf, size_t len, int rw,
-               unsigned int lane)
-{
-       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
-       unsigned int copied = 0;
-       u64 base_offset;
-       int rc;
-
-       base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
-               + lane * mmio->size;
-       write_blk_ctl(nfit_blk, lane, dpa, len, rw);
-       while (len) {
-               unsigned int c;
-               u64 offset;
-
-               if (mmio->num_lines) {
-                       u32 line_offset;
-
-                       offset = to_interleave_offset(base_offset + copied,
-                                       mmio);
-                       div_u64_rem(offset, mmio->line_size, &line_offset);
-                       c = min_t(size_t, len, mmio->line_size - line_offset);
-               } else {
-                       offset = base_offset + nfit_blk->bdw_offset;
-                       c = len;
-               }
-
-               if (rw)
-                       memcpy_to_pmem(mmio->addr.aperture + offset,
-                                       iobuf + copied, c);
-               else {
-                       if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
-                               mmio_flush_range((void __force *)
-                                       mmio->addr.aperture + offset, c);
-
-                       memcpy_from_pmem(iobuf + copied,
-                                       mmio->addr.aperture + offset, c);
-               }
-
-               copied += c;
-               len -= c;
-       }
-
-       if (rw)
-               wmb_blk(nfit_blk);
-
-       rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
-       return rc;
-}
-
-static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
-               resource_size_t dpa, void *iobuf, u64 len, int rw)
-{
-       struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
-       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
-       struct nd_region *nd_region = nfit_blk->nd_region;
-       unsigned int lane, copied = 0;
-       int rc = 0;
-
-       lane = nd_region_acquire_lane(nd_region);
-       while (len) {
-               u64 c = min(len, mmio->size);
-
-               rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
-                               iobuf + copied, c, rw, lane);
-               if (rc)
-                       break;
-
-               copied += c;
-               len -= c;
-       }
-       nd_region_release_lane(nd_region, lane);
-
-       return rc;
-}
-
-static void nfit_spa_mapping_release(struct kref *kref)
-{
-       struct nfit_spa_mapping *spa_map = to_spa_map(kref);
-       struct acpi_nfit_system_address *spa = spa_map->spa;
-       struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc;
-
-       WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
-       dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
-       if (spa_map->type == SPA_MAP_APERTURE)
-               memunmap((void __force *)spa_map->addr.aperture);
-       else
-               iounmap(spa_map->addr.base);
-       release_mem_region(spa->address, spa->length);
-       list_del(&spa_map->list);
-       kfree(spa_map);
-}
-
-static struct nfit_spa_mapping *find_spa_mapping(
-               struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa)
-{
-       struct nfit_spa_mapping *spa_map;
-
-       WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
-       list_for_each_entry(spa_map, &acpi_desc->spa_maps, list)
-               if (spa_map->spa == spa)
-                       return spa_map;
-
-       return NULL;
-}
-
-static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa)
-{
-       struct nfit_spa_mapping *spa_map;
-
-       mutex_lock(&acpi_desc->spa_map_mutex);
-       spa_map = find_spa_mapping(acpi_desc, spa);
-
-       if (spa_map)
-               kref_put(&spa_map->kref, nfit_spa_mapping_release);
-       mutex_unlock(&acpi_desc->spa_map_mutex);
-}
-
-static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa, enum spa_map_type type)
-{
-       resource_size_t start = spa->address;
-       resource_size_t n = spa->length;
-       struct nfit_spa_mapping *spa_map;
-       struct resource *res;
-
-       WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
-
-       spa_map = find_spa_mapping(acpi_desc, spa);
-       if (spa_map) {
-               kref_get(&spa_map->kref);
-               return spa_map->addr.base;
-       }
-
-       spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
-       if (!spa_map)
-               return NULL;
-
-       INIT_LIST_HEAD(&spa_map->list);
-       spa_map->spa = spa;
-       kref_init(&spa_map->kref);
-       spa_map->acpi_desc = acpi_desc;
-
-       res = request_mem_region(start, n, dev_name(acpi_desc->dev));
-       if (!res)
-               goto err_mem;
-
-       spa_map->type = type;
-       if (type == SPA_MAP_APERTURE)
-               spa_map->addr.aperture = (void __pmem *)memremap(start, n,
-                                                       ARCH_MEMREMAP_PMEM);
-       else
-               spa_map->addr.base = ioremap_nocache(start, n);
-
-
-       if (!spa_map->addr.base)
-               goto err_map;
-
-       list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
-       return spa_map->addr.base;
-
- err_map:
-       release_mem_region(start, n);
- err_mem:
-       kfree(spa_map);
-       return NULL;
-}
-
-/**
- * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
- * @nvdimm_bus: NFIT-bus that provided the spa table entry
- * @nfit_spa: spa table to map
- * @type: aperture or control region
- *
- * In the case where block-data-window apertures and
- * dimm-control-regions are interleaved they will end up sharing a
- * single request_mem_region() + ioremap() for the address range.  In
- * the style of devm nfit_spa_map() mappings are automatically dropped
- * when all region devices referencing the same mapping are disabled /
- * unbound.
- */
-static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
-               struct acpi_nfit_system_address *spa, enum spa_map_type type)
-{
-       void __iomem *iomem;
-
-       mutex_lock(&acpi_desc->spa_map_mutex);
-       iomem = __nfit_spa_map(acpi_desc, spa, type);
-       mutex_unlock(&acpi_desc->spa_map_mutex);
-
-       return iomem;
-}
-
-static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
-               struct acpi_nfit_interleave *idt, u16 interleave_ways)
-{
-       if (idt) {
-               mmio->num_lines = idt->line_count;
-               mmio->line_size = idt->line_size;
-               if (interleave_ways == 0)
-                       return -ENXIO;
-               mmio->table_size = mmio->num_lines * interleave_ways
-                       * mmio->line_size;
-       }
-
-       return 0;
-}
-
-static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
-               struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
-{
-       struct nd_cmd_dimm_flags flags;
-       int rc;
-
-       memset(&flags, 0, sizeof(flags));
-       rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
-                       sizeof(flags), NULL);
-
-       if (rc >= 0 && flags.status == 0)
-               nfit_blk->dimm_flags = flags.flags;
-       else if (rc == -ENOTTY) {
-               /* fall back to a conservative default */
-               nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
-               rc = 0;
-       } else
-               rc = -ENXIO;
-
-       return rc;
-}
-
-static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
-               struct device *dev)
-{
-       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
-       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
-       struct nd_blk_region *ndbr = to_nd_blk_region(dev);
-       struct nfit_flush *nfit_flush;
-       struct nfit_blk_mmio *mmio;
-       struct nfit_blk *nfit_blk;
-       struct nfit_mem *nfit_mem;
-       struct nvdimm *nvdimm;
-       int rc;
-
-       nvdimm = nd_blk_region_to_dimm(ndbr);
-       nfit_mem = nvdimm_provider_data(nvdimm);
-       if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
-               dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
-                               nfit_mem ? "" : " nfit_mem",
-                               (nfit_mem && nfit_mem->dcr) ? "" : " dcr",
-                               (nfit_mem && nfit_mem->bdw) ? "" : " bdw");
-               return -ENXIO;
-       }
-
-       nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
-       if (!nfit_blk)
-               return -ENOMEM;
-       nd_blk_region_set_provider_data(ndbr, nfit_blk);
-       nfit_blk->nd_region = to_nd_region(dev);
-
-       /* map block aperture memory */
-       nfit_blk->bdw_offset = nfit_mem->bdw->offset;
-       mmio = &nfit_blk->mmio[BDW];
-       mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
-                       SPA_MAP_APERTURE);
-       if (!mmio->addr.base) {
-               dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
-                               nvdimm_name(nvdimm));
-               return -ENOMEM;
-       }
-       mmio->size = nfit_mem->bdw->size;
-       mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
-       mmio->idt = nfit_mem->idt_bdw;
-       mmio->spa = nfit_mem->spa_bdw;
-       rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
-                       nfit_mem->memdev_bdw->interleave_ways);
-       if (rc) {
-               dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
-                               __func__, nvdimm_name(nvdimm));
-               return rc;
-       }
-
-       /* map block control memory */
-       nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
-       nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
-       mmio = &nfit_blk->mmio[DCR];
-       mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
-                       SPA_MAP_CONTROL);
-       if (!mmio->addr.base) {
-               dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
-                               nvdimm_name(nvdimm));
-               return -ENOMEM;
-       }
-       mmio->size = nfit_mem->dcr->window_size;
-       mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
-       mmio->idt = nfit_mem->idt_dcr;
-       mmio->spa = nfit_mem->spa_dcr;
-       rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
-                       nfit_mem->memdev_dcr->interleave_ways);
-       if (rc) {
-               dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
-                               __func__, nvdimm_name(nvdimm));
-               return rc;
-       }
-
-       rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
-       if (rc < 0) {
-               dev_dbg(dev, "%s: %s failed get DIMM flags\n",
-                               __func__, nvdimm_name(nvdimm));
-               return rc;
-       }
-
-       nfit_flush = nfit_mem->nfit_flush;
-       if (nfit_flush && nfit_flush->flush->hint_count != 0) {
-               nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev,
-                               nfit_flush->flush->hint_address[0], 8);
-               if (!nfit_blk->nvdimm_flush)
-                       return -ENOMEM;
-       }
-
-       if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush)
-               dev_warn(dev, "unable to guarantee persistence of writes\n");
-
-       if (mmio->line_size == 0)
-               return 0;
-
-       if ((u32) nfit_blk->cmd_offset % mmio->line_size
-                       + 8 > mmio->line_size) {
-               dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
-               return -ENXIO;
-       } else if ((u32) nfit_blk->stat_offset % mmio->line_size
-                       + 8 > mmio->line_size) {
-               dev_dbg(dev, "stat_offset crosses interleave boundary\n");
-               return -ENXIO;
-       }
-
-       return 0;
-}
-
-static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
-               struct device *dev)
-{
-       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
-       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
-       struct nd_blk_region *ndbr = to_nd_blk_region(dev);
-       struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
-       int i;
-
-       if (!nfit_blk)
-               return; /* never enabled */
-
-       /* auto-free BLK spa mappings */
-       for (i = 0; i < 2; i++) {
-               struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
-
-               if (mmio->addr.base)
-                       nfit_spa_unmap(acpi_desc, mmio->spa);
-       }
-       nd_blk_region_set_provider_data(ndbr, NULL);
-       /* devm will free nfit_blk */
-}
-
-static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
-               struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
-{
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       int cmd_rc, rc;
-
-       cmd->address = spa->address;
-       cmd->length = spa->length;
-       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
-                       sizeof(*cmd), &cmd_rc);
-       if (rc < 0)
-               return rc;
-       return cmd_rc;
-}
-
-static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
-{
-       int rc;
-       int cmd_rc;
-       struct nd_cmd_ars_start ars_start;
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-
-       memset(&ars_start, 0, sizeof(ars_start));
-       ars_start.address = spa->address;
-       ars_start.length = spa->length;
-       if (nfit_spa_type(spa) == NFIT_SPA_PM)
-               ars_start.type = ND_ARS_PERSISTENT;
-       else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
-               ars_start.type = ND_ARS_VOLATILE;
-       else
-               return -ENOTTY;
-
-       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
-                       sizeof(ars_start), &cmd_rc);
-
-       if (rc < 0)
-               return rc;
-       return cmd_rc;
-}
-
-static int ars_continue(struct acpi_nfit_desc *acpi_desc)
-{
-       int rc, cmd_rc;
-       struct nd_cmd_ars_start ars_start;
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
-
-       memset(&ars_start, 0, sizeof(ars_start));
-       ars_start.address = ars_status->restart_address;
-       ars_start.length = ars_status->restart_length;
-       ars_start.type = ars_status->type;
-       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
-                       sizeof(ars_start), &cmd_rc);
-       if (rc < 0)
-               return rc;
-       return cmd_rc;
-}
-
-static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
-       int rc, cmd_rc;
-
-       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
-                       acpi_desc->ars_status_size, &cmd_rc);
-       if (rc < 0)
-               return rc;
-       return cmd_rc;
-}
-
-static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
-               struct nd_cmd_ars_status *ars_status)
-{
-       int rc;
-       u32 i;
-
-       for (i = 0; i < ars_status->num_records; i++) {
-               rc = nvdimm_bus_add_poison(nvdimm_bus,
-                               ars_status->records[i].err_address,
-                               ars_status->records[i].length);
-               if (rc)
-                       return rc;
-       }
-
-       return 0;
-}
-
-static void acpi_nfit_remove_resource(void *data)
-{
-       struct resource *res = data;
-
-       remove_resource(res);
-}
-
-static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
-               struct nd_region_desc *ndr_desc)
-{
-       struct resource *res, *nd_res = ndr_desc->res;
-       int is_pmem, ret;
-
-       /* No operation if the region is already registered as PMEM */
-       is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
-                               IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
-       if (is_pmem == REGION_INTERSECTS)
-               return 0;
-
-       res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
-       if (!res)
-               return -ENOMEM;
-
-       res->name = "Persistent Memory";
-       res->start = nd_res->start;
-       res->end = nd_res->end;
-       res->flags = IORESOURCE_MEM;
-       res->desc = IORES_DESC_PERSISTENT_MEMORY;
-
-       ret = insert_resource(&iomem_resource, res);
-       if (ret)
-               return ret;
-
-       ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res);
-       if (ret) {
-               remove_resource(res);
-               return ret;
-       }
-
-       return 0;
-}
-
-static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
-               struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
-               struct acpi_nfit_memory_map *memdev,
-               struct nfit_spa *nfit_spa)
-{
-       struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
-                       memdev->device_handle);
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       struct nd_blk_region_desc *ndbr_desc;
-       struct nfit_mem *nfit_mem;
-       int blk_valid = 0;
-
-       if (!nvdimm) {
-               dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
-                               spa->range_index, memdev->device_handle);
-               return -ENODEV;
-       }
-
-       nd_mapping->nvdimm = nvdimm;
-       switch (nfit_spa_type(spa)) {
-       case NFIT_SPA_PM:
-       case NFIT_SPA_VOLATILE:
-               nd_mapping->start = memdev->address;
-               nd_mapping->size = memdev->region_size;
-               break;
-       case NFIT_SPA_DCR:
-               nfit_mem = nvdimm_provider_data(nvdimm);
-               if (!nfit_mem || !nfit_mem->bdw) {
-                       dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
-                                       spa->range_index, nvdimm_name(nvdimm));
-               } else {
-                       nd_mapping->size = nfit_mem->bdw->capacity;
-                       nd_mapping->start = nfit_mem->bdw->start_address;
-                       ndr_desc->num_lanes = nfit_mem->bdw->windows;
-                       blk_valid = 1;
-               }
-
-               ndr_desc->nd_mapping = nd_mapping;
-               ndr_desc->num_mappings = blk_valid;
-               ndbr_desc = to_blk_region_desc(ndr_desc);
-               ndbr_desc->enable = acpi_nfit_blk_region_enable;
-               ndbr_desc->disable = acpi_nfit_blk_region_disable;
-               ndbr_desc->do_io = acpi_desc->blk_do_io;
-               nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
-                               ndr_desc);
-               if (!nfit_spa->nd_region)
-                       return -ENOMEM;
-               break;
-       }
-
-       return 0;
-}
-
-static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_spa *nfit_spa)
-{
-       static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       struct nd_blk_region_desc ndbr_desc;
-       struct nd_region_desc *ndr_desc;
-       struct nfit_memdev *nfit_memdev;
-       struct nvdimm_bus *nvdimm_bus;
-       struct resource res;
-       int count = 0, rc;
-
-       if (nfit_spa->nd_region)
-               return 0;
-
-       if (spa->range_index == 0) {
-               dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
-                               __func__);
-               return 0;
-       }
-
-       memset(&res, 0, sizeof(res));
-       memset(&nd_mappings, 0, sizeof(nd_mappings));
-       memset(&ndbr_desc, 0, sizeof(ndbr_desc));
-       res.start = spa->address;
-       res.end = res.start + spa->length - 1;
-       ndr_desc = &ndbr_desc.ndr_desc;
-       ndr_desc->res = &res;
-       ndr_desc->provider_data = nfit_spa;
-       ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
-       if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
-               ndr_desc->numa_node = acpi_map_pxm_to_online_node(
-                                               spa->proximity_domain);
-       else
-               ndr_desc->numa_node = NUMA_NO_NODE;
-
-       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-               struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
-               struct nd_mapping *nd_mapping;
-
-               if (memdev->range_index != spa->range_index)
-                       continue;
-               if (count >= ND_MAX_MAPPINGS) {
-                       dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
-                                       spa->range_index, ND_MAX_MAPPINGS);
-                       return -ENXIO;
-               }
-               nd_mapping = &nd_mappings[count++];
-               rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
-                               memdev, nfit_spa);
-               if (rc)
-                       goto out;
-       }
-
-       ndr_desc->nd_mapping = nd_mappings;
-       ndr_desc->num_mappings = count;
-       rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
-       if (rc)
-               goto out;
-
-       nvdimm_bus = acpi_desc->nvdimm_bus;
-       if (nfit_spa_type(spa) == NFIT_SPA_PM) {
-               rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
-               if (rc) {
-                       dev_warn(acpi_desc->dev,
-                               "failed to insert pmem resource to iomem: %d\n",
-                               rc);
-                       goto out;
-               }
-
-               nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
-                               ndr_desc);
-               if (!nfit_spa->nd_region)
-                       rc = -ENOMEM;
-       } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
-               nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
-                               ndr_desc);
-               if (!nfit_spa->nd_region)
-                       rc = -ENOMEM;
-       }
-
- out:
-       if (rc)
-               dev_err(acpi_desc->dev, "failed to register spa range %d\n",
-                               nfit_spa->spa->range_index);
-       return rc;
-}
-
-static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
-               u32 max_ars)
-{
-       struct device *dev = acpi_desc->dev;
-       struct nd_cmd_ars_status *ars_status;
-
-       if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
-               memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
-               return 0;
-       }
-
-       if (acpi_desc->ars_status)
-               devm_kfree(dev, acpi_desc->ars_status);
-       acpi_desc->ars_status = NULL;
-       ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
-       if (!ars_status)
-               return -ENOMEM;
-       acpi_desc->ars_status = ars_status;
-       acpi_desc->ars_status_size = max_ars;
-       return 0;
-}
-
-static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_spa *nfit_spa)
-{
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       int rc;
-
-       if (!nfit_spa->max_ars) {
-               struct nd_cmd_ars_cap ars_cap;
-
-               memset(&ars_cap, 0, sizeof(ars_cap));
-               rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
-               if (rc < 0)
-                       return rc;
-               nfit_spa->max_ars = ars_cap.max_ars_out;
-               nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
-               /* check that the supported scrub types match the spa type */
-               if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
-                               ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
-                       return -ENOTTY;
-               else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
-                               ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
-                       return -ENOTTY;
-       }
-
-       if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
-               return -ENOMEM;
-
-       rc = ars_get_status(acpi_desc);
-       if (rc < 0 && rc != -ENOSPC)
-               return rc;
-
-       if (ars_status_process_records(acpi_desc->nvdimm_bus,
-                               acpi_desc->ars_status))
-               return -ENOMEM;
-
-       return 0;
-}
-
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_spa *nfit_spa)
-{
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       unsigned int overflow_retry = scrub_overflow_abort;
-       u64 init_ars_start = 0, init_ars_len = 0;
-       struct device *dev = acpi_desc->dev;
-       unsigned int tmo = scrub_timeout;
-       int rc;
-
-       if (nfit_spa->ars_done || !nfit_spa->nd_region)
-               return;
-
-       rc = ars_start(acpi_desc, nfit_spa);
-       /*
-        * If we timed out the initial scan we'll still be busy here,
-        * and will wait another timeout before giving up permanently.
-        */
-       if (rc < 0 && rc != -EBUSY)
-               return;
-
-       do {
-               u64 ars_start, ars_len;
-
-               if (acpi_desc->cancel)
-                       break;
-               rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-               if (rc == -ENOTTY)
-                       break;
-               if (rc == -EBUSY && !tmo) {
-                       dev_warn(dev, "range %d ars timeout, aborting\n",
-                                       spa->range_index);
-                       break;
-               }
-
-               if (rc == -EBUSY) {
-                       /*
-                        * Note, entries may be appended to the list
-                        * while the lock is dropped, but the workqueue
-                        * being active prevents entries being deleted /
-                        * freed.
-                        */
-                       mutex_unlock(&acpi_desc->init_mutex);
-                       ssleep(1);
-                       tmo--;
-                       mutex_lock(&acpi_desc->init_mutex);
-                       continue;
-               }
-
-               /* we got some results, but there are more pending... */
-               if (rc == -ENOSPC && overflow_retry--) {
-                       if (!init_ars_len) {
-                               init_ars_len = acpi_desc->ars_status->length;
-                               init_ars_start = acpi_desc->ars_status->address;
-                       }
-                       rc = ars_continue(acpi_desc);
-               }
-
-               if (rc < 0) {
-                       dev_warn(dev, "range %d ars continuation failed\n",
-                                       spa->range_index);
-                       break;
-               }
-
-               if (init_ars_len) {
-                       ars_start = init_ars_start;
-                       ars_len = init_ars_len;
-               } else {
-                       ars_start = acpi_desc->ars_status->address;
-                       ars_len = acpi_desc->ars_status->length;
-               }
-               dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
-                               spa->range_index, ars_start, ars_len);
-               /* notify the region about new poison entries */
-               nvdimm_region_notify(nfit_spa->nd_region,
-                               NVDIMM_REVALIDATE_POISON);
-               break;
-       } while (1);
-}
-
-static void acpi_nfit_scrub(struct work_struct *work)
-{
-       struct device *dev;
-       u64 init_scrub_length = 0;
-       struct nfit_spa *nfit_spa;
-       u64 init_scrub_address = 0;
-       bool init_ars_done = false;
-       struct acpi_nfit_desc *acpi_desc;
-       unsigned int tmo = scrub_timeout;
-       unsigned int overflow_retry = scrub_overflow_abort;
-
-       acpi_desc = container_of(work, typeof(*acpi_desc), work);
-       dev = acpi_desc->dev;
-
-       /*
-        * We scrub in 2 phases.  The first phase waits for any platform
-        * firmware initiated scrubs to complete and then we go search for the
-        * affected spa regions to mark them scanned.  In the second phase we
-        * initiate a directed scrub for every range that was not scrubbed in
-        * phase 1.
-        */
-
-       /* process platform firmware initiated scrubs */
- retry:
-       mutex_lock(&acpi_desc->init_mutex);
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               struct nd_cmd_ars_status *ars_status;
-               struct acpi_nfit_system_address *spa;
-               u64 ars_start, ars_len;
-               int rc;
-
-               if (acpi_desc->cancel)
-                       break;
-
-               if (nfit_spa->nd_region)
-                       continue;
-
-               if (init_ars_done) {
-                       /*
-                        * No need to re-query, we're now just
-                        * reconciling all the ranges covered by the
-                        * initial scrub
-                        */
-                       rc = 0;
-               } else
-                       rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-
-               if (rc == -ENOTTY) {
-                       /* no ars capability, just register spa and move on */
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
-                       continue;
-               }
-
-               if (rc == -EBUSY && !tmo) {
-                       /* fallthrough to directed scrub in phase 2 */
-                       dev_warn(dev, "timeout awaiting ars results, continuing...\n");
-                       break;
-               } else if (rc == -EBUSY) {
-                       mutex_unlock(&acpi_desc->init_mutex);
-                       ssleep(1);
-                       tmo--;
-                       goto retry;
-               }
-
-               /* we got some results, but there are more pending... */
-               if (rc == -ENOSPC && overflow_retry--) {
-                       ars_status = acpi_desc->ars_status;
-                       /*
-                        * Record the original scrub range, so that we
-                        * can recall all the ranges impacted by the
-                        * initial scrub.
-                        */
-                       if (!init_scrub_length) {
-                               init_scrub_length = ars_status->length;
-                               init_scrub_address = ars_status->address;
-                       }
-                       rc = ars_continue(acpi_desc);
-                       if (rc == 0) {
-                               mutex_unlock(&acpi_desc->init_mutex);
-                               goto retry;
-                       }
-               }
-
-               if (rc < 0) {
-                       /*
-                        * Initial scrub failed, we'll give it one more
-                        * try below...
-                        */
-                       break;
-               }
-
-               /* We got some final results, record completed ranges */
-               ars_status = acpi_desc->ars_status;
-               if (init_scrub_length) {
-                       ars_start = init_scrub_address;
-                       ars_len = ars_start + init_scrub_length;
-               } else {
-                       ars_start = ars_status->address;
-                       ars_len = ars_status->length;
-               }
-               spa = nfit_spa->spa;
-
-               if (!init_ars_done) {
-                       init_ars_done = true;
-                       dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
-                                       ars_start, ars_len);
-               }
-               if (ars_start <= spa->address && ars_start + ars_len
-                               >= spa->address + spa->length)
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
-       }
-
-       /*
-        * For all the ranges not covered by an initial scrub we still
-        * want to see if there are errors, but it's ok to discover them
-        * asynchronously.
-        */
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               /*
-                * Flag all the ranges that still need scrubbing, but
-                * register them now to make data available.
-                */
-               if (nfit_spa->nd_region)
-                       nfit_spa->ars_done = 1;
-               else
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
-       }
-
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-               acpi_nfit_async_scrub(acpi_desc, nfit_spa);
-       mutex_unlock(&acpi_desc->init_mutex);
-}
-
-static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
-{
-       struct nfit_spa *nfit_spa;
-       int rc;
-
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-               if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
-                       /* BLK regions don't need to wait for ars results */
-                       rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
-                       if (rc)
-                               return rc;
-               }
-
-       queue_work(nfit_wq, &acpi_desc->work);
-       return 0;
-}
-
-static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_table_prev *prev)
-{
-       struct device *dev = acpi_desc->dev;
-
-       if (!list_empty(&prev->spas) ||
-                       !list_empty(&prev->memdevs) ||
-                       !list_empty(&prev->dcrs) ||
-                       !list_empty(&prev->bdws) ||
-                       !list_empty(&prev->idts) ||
-                       !list_empty(&prev->flushes)) {
-               dev_err(dev, "new nfit deletes entries (unsupported)\n");
-               return -ENXIO;
-       }
-       return 0;
-}
-
-int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
-{
-       struct device *dev = acpi_desc->dev;
-       struct nfit_table_prev prev;
-       const void *end;
-       u8 *data;
-       int rc;
-
-       mutex_lock(&acpi_desc->init_mutex);
-
-       INIT_LIST_HEAD(&prev.spas);
-       INIT_LIST_HEAD(&prev.memdevs);
-       INIT_LIST_HEAD(&prev.dcrs);
-       INIT_LIST_HEAD(&prev.bdws);
-       INIT_LIST_HEAD(&prev.idts);
-       INIT_LIST_HEAD(&prev.flushes);
-
-       list_cut_position(&prev.spas, &acpi_desc->spas,
-                               acpi_desc->spas.prev);
-       list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
-                               acpi_desc->memdevs.prev);
-       list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
-                               acpi_desc->dcrs.prev);
-       list_cut_position(&prev.bdws, &acpi_desc->bdws,
-                               acpi_desc->bdws.prev);
-       list_cut_position(&prev.idts, &acpi_desc->idts,
-                               acpi_desc->idts.prev);
-       list_cut_position(&prev.flushes, &acpi_desc->flushes,
-                               acpi_desc->flushes.prev);
-
-       data = (u8 *) acpi_desc->nfit;
-       end = data + sz;
-       while (!IS_ERR_OR_NULL(data))
-               data = add_table(acpi_desc, &prev, data, end);
-
-       if (IS_ERR(data)) {
-               dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
-                               PTR_ERR(data));
-               rc = PTR_ERR(data);
-               goto out_unlock;
-       }
-
-       rc = acpi_nfit_check_deletions(acpi_desc, &prev);
-       if (rc)
-               goto out_unlock;
-
-       if (nfit_mem_init(acpi_desc) != 0) {
-               rc = -ENOMEM;
-               goto out_unlock;
-       }
-
-       acpi_nfit_init_dsms(acpi_desc);
-
-       rc = acpi_nfit_register_dimms(acpi_desc);
-       if (rc)
-               goto out_unlock;
-
-       rc = acpi_nfit_register_regions(acpi_desc);
-
- out_unlock:
-       mutex_unlock(&acpi_desc->init_mutex);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(acpi_nfit_init);
-
-struct acpi_nfit_flush_work {
-       struct work_struct work;
-       struct completion cmp;
-};
-
-static void flush_probe(struct work_struct *work)
-{
-       struct acpi_nfit_flush_work *flush;
-
-       flush = container_of(work, typeof(*flush), work);
-       complete(&flush->cmp);
-}
-
-static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
-{
-       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
-       struct device *dev = acpi_desc->dev;
-       struct acpi_nfit_flush_work flush;
-
-       /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
-       device_lock(dev);
-       device_unlock(dev);
-
-       /*
-        * Scrub work could take 10s of seconds, userspace may give up so we
-        * need to be interruptible while waiting.
-        */
-       INIT_WORK_ONSTACK(&flush.work, flush_probe);
-       COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
-       queue_work(nfit_wq, &flush.work);
-       return wait_for_completion_interruptible(&flush.cmp);
-}
-
-static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
-               struct nvdimm *nvdimm, unsigned int cmd)
-{
-       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
-
-       if (nvdimm)
-               return 0;
-       if (cmd != ND_CMD_ARS_START)
-               return 0;
-
-       /*
-        * The kernel and userspace may race to initiate a scrub, but
-        * the scrub thread is prepared to lose that initial race.  It
-        * just needs guarantees that any ars it initiates are not
-        * interrupted by any intervening start reqeusts from userspace.
-        */
-       if (work_busy(&acpi_desc->work))
-               return -EBUSY;
-
-       return 0;
-}
-
-void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
-{
-       struct nvdimm_bus_descriptor *nd_desc;
-
-       dev_set_drvdata(dev, acpi_desc);
-       acpi_desc->dev = dev;
-       acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
-       nd_desc = &acpi_desc->nd_desc;
-       nd_desc->provider_name = "ACPI.NFIT";
-       nd_desc->ndctl = acpi_nfit_ctl;
-       nd_desc->flush_probe = acpi_nfit_flush_probe;
-       nd_desc->clear_to_send = acpi_nfit_clear_to_send;
-       nd_desc->attr_groups = acpi_nfit_attribute_groups;
-
-       INIT_LIST_HEAD(&acpi_desc->spa_maps);
-       INIT_LIST_HEAD(&acpi_desc->spas);
-       INIT_LIST_HEAD(&acpi_desc->dcrs);
-       INIT_LIST_HEAD(&acpi_desc->bdws);
-       INIT_LIST_HEAD(&acpi_desc->idts);
-       INIT_LIST_HEAD(&acpi_desc->flushes);
-       INIT_LIST_HEAD(&acpi_desc->memdevs);
-       INIT_LIST_HEAD(&acpi_desc->dimms);
-       mutex_init(&acpi_desc->spa_map_mutex);
-       mutex_init(&acpi_desc->init_mutex);
-       INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
-}
-EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
-
-static int acpi_nfit_add(struct acpi_device *adev)
-{
-       struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
-       struct acpi_nfit_desc *acpi_desc;
-       struct device *dev = &adev->dev;
-       struct acpi_table_header *tbl;
-       acpi_status status = AE_OK;
-       acpi_size sz;
-       int rc;
-
-       status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
-       if (ACPI_FAILURE(status)) {
-               /* This is ok, we could have an nvdimm hotplugged later */
-               dev_dbg(dev, "failed to find NFIT at startup\n");
-               return 0;
-       }
-
-       acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
-       if (!acpi_desc)
-               return -ENOMEM;
-       acpi_nfit_desc_init(acpi_desc, &adev->dev);
-       acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
-       if (!acpi_desc->nvdimm_bus)
-               return -ENOMEM;
-
-       /*
-        * Save the acpi header for later and then skip it,
-        * making nfit point to the first nfit table header.
-        */
-       acpi_desc->acpi_header = *tbl;
-       acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit);
-       sz -= sizeof(struct acpi_table_nfit);
-
-       /* Evaluate _FIT and override with that if present */
-       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
-       if (ACPI_SUCCESS(status) && buf.length > 0) {
-               union acpi_object *obj;
-               /*
-                * Adjust for the acpi_object header of the _FIT
-                */
-               obj = buf.pointer;
-               if (obj->type == ACPI_TYPE_BUFFER) {
-                       acpi_desc->nfit =
-                               (struct acpi_nfit_header *)obj->buffer.pointer;
-                       sz = obj->buffer.length;
-               } else
-                       dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
-                                __func__, (int) obj->type);
-       }
-
-       rc = acpi_nfit_init(acpi_desc, sz);
-       if (rc) {
-               nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
-               return rc;
-       }
-       return 0;
-}
-
-static int acpi_nfit_remove(struct acpi_device *adev)
-{
-       struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
-
-       acpi_desc->cancel = 1;
-       flush_workqueue(nfit_wq);
-       nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
-       return 0;
-}
-
-static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
-{
-       struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
-       struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
-       struct acpi_nfit_header *nfit_saved;
-       union acpi_object *obj;
-       struct device *dev = &adev->dev;
-       acpi_status status;
-       int ret;
-
-       dev_dbg(dev, "%s: event: %d\n", __func__, event);
-
-       device_lock(dev);
-       if (!dev->driver) {
-               /* dev->driver may be null if we're being removed */
-               dev_dbg(dev, "%s: no driver found for dev\n", __func__);
-               goto out_unlock;
-       }
-
-       if (!acpi_desc) {
-               acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
-               if (!acpi_desc)
-                       goto out_unlock;
-               acpi_nfit_desc_init(acpi_desc, &adev->dev);
-               acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
-               if (!acpi_desc->nvdimm_bus)
-                       goto out_unlock;
-       } else {
-               /*
-                * Finish previous registration before considering new
-                * regions.
-                */
-               flush_workqueue(nfit_wq);
-       }
-
-       /* Evaluate _FIT */
-       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
-       if (ACPI_FAILURE(status)) {
-               dev_err(dev, "failed to evaluate _FIT\n");
-               goto out_unlock;
-       }
-
-       nfit_saved = acpi_desc->nfit;
-       obj = buf.pointer;
-       if (obj->type == ACPI_TYPE_BUFFER) {
-               acpi_desc->nfit =
-                       (struct acpi_nfit_header *)obj->buffer.pointer;
-               ret = acpi_nfit_init(acpi_desc, obj->buffer.length);
-               if (ret) {
-                       /* Merge failed, restore old nfit, and exit */
-                       acpi_desc->nfit = nfit_saved;
-                       dev_err(dev, "failed to merge updated NFIT\n");
-               }
-       } else {
-               /* Bad _FIT, restore old nfit */
-               dev_err(dev, "Invalid _FIT\n");
-       }
-       kfree(buf.pointer);
-
- out_unlock:
-       device_unlock(dev);
-}
-
-static const struct acpi_device_id acpi_nfit_ids[] = {
-       { "ACPI0012", 0 },
-       { "", 0 },
-};
-MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
-
-static struct acpi_driver acpi_nfit_driver = {
-       .name = KBUILD_MODNAME,
-       .ids = acpi_nfit_ids,
-       .ops = {
-               .add = acpi_nfit_add,
-               .remove = acpi_nfit_remove,
-               .notify = acpi_nfit_notify,
-       },
-};
-
-static __init int nfit_init(void)
-{
-       BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
-       BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
-
-       acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
-       acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
-       acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
-       acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
-       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
-       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
-       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
-       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
-       acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
-
-       nfit_wq = create_singlethread_workqueue("nfit");
-       if (!nfit_wq)
-               return -ENOMEM;
-
-       return acpi_bus_register_driver(&acpi_nfit_driver);
-}
-
-static __exit void nfit_exit(void)
-{
-       acpi_bus_unregister_driver(&acpi_nfit_driver);
-       destroy_workqueue(nfit_wq);
-}
-
-module_init(nfit_init);
-module_exit(nfit_exit);
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h

deleted file mode 100644 (file)

index 02b9ea1..0000000
--- a/drivers/acpi/nfit.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * NVDIMM Firmware Interface Table - NFIT
- *
- * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#ifndef __NFIT_H__
-#define __NFIT_H__
-#include <linux/workqueue.h>
-#include <linux/libnvdimm.h>
-#include <linux/types.h>
-#include <linux/uuid.h>
-#include <linux/acpi.h>
-#include <acpi/acuuid.h>
-
-/* ACPI 6.1 */
-#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
-
-/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */
-#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
-
-/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
-#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
-#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
-
-#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
-               | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
-               | ACPI_NFIT_MEM_NOT_ARMED)
-
-enum nfit_uuids {
-       /* for simplicity alias the uuid index with the family id */
-       NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
-       NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
-       NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
-       NFIT_SPA_VOLATILE,
-       NFIT_SPA_PM,
-       NFIT_SPA_DCR,
-       NFIT_SPA_BDW,
-       NFIT_SPA_VDISK,
-       NFIT_SPA_VCD,
-       NFIT_SPA_PDISK,
-       NFIT_SPA_PCD,
-       NFIT_DEV_BUS,
-       NFIT_UUID_MAX,
-};
-
-/*
- * Region format interface codes are stored with the interface as the
- * LSB and the function as the MSB.
- */
-#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
-#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
-#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
-
-enum {
-       NFIT_BLK_READ_FLUSH = 1,
-       NFIT_BLK_DCR_LATCH = 2,
-       NFIT_ARS_STATUS_DONE = 0,
-       NFIT_ARS_STATUS_BUSY = 1 << 16,
-       NFIT_ARS_STATUS_NONE = 2 << 16,
-       NFIT_ARS_STATUS_INTR = 3 << 16,
-       NFIT_ARS_START_BUSY = 6,
-       NFIT_ARS_CAP_NONE = 1,
-       NFIT_ARS_F_OVERFLOW = 1,
-       NFIT_ARS_TIMEOUT = 90,
-};
-
-struct nfit_spa {
-       struct acpi_nfit_system_address *spa;
-       struct list_head list;
-       struct nd_region *nd_region;
-       unsigned int ars_done:1;
-       u32 clear_err_unit;
-       u32 max_ars;
-};
-
-struct nfit_dcr {
-       struct acpi_nfit_control_region *dcr;
-       struct list_head list;
-};
-
-struct nfit_bdw {
-       struct acpi_nfit_data_region *bdw;
-       struct list_head list;
-};
-
-struct nfit_idt {
-       struct acpi_nfit_interleave *idt;
-       struct list_head list;
-};
-
-struct nfit_flush {
-       struct acpi_nfit_flush_address *flush;
-       struct list_head list;
-};
-
-struct nfit_memdev {
-       struct acpi_nfit_memory_map *memdev;
-       struct list_head list;
-};
-
-/* assembled tables for a given dimm/memory-device */
-struct nfit_mem {
-       struct nvdimm *nvdimm;
-       struct acpi_nfit_memory_map *memdev_dcr;
-       struct acpi_nfit_memory_map *memdev_pmem;
-       struct acpi_nfit_memory_map *memdev_bdw;
-       struct acpi_nfit_control_region *dcr;
-       struct acpi_nfit_data_region *bdw;
-       struct acpi_nfit_system_address *spa_dcr;
-       struct acpi_nfit_system_address *spa_bdw;
-       struct acpi_nfit_interleave *idt_dcr;
-       struct acpi_nfit_interleave *idt_bdw;
-       struct nfit_flush *nfit_flush;
-       struct list_head list;
-       struct acpi_device *adev;
-       struct acpi_nfit_desc *acpi_desc;
-       unsigned long dsm_mask;
-       int family;
-};
-
-struct acpi_nfit_desc {
-       struct nvdimm_bus_descriptor nd_desc;
-       struct acpi_table_header acpi_header;
-       struct acpi_nfit_header *nfit;
-       struct mutex spa_map_mutex;
-       struct mutex init_mutex;
-       struct list_head spa_maps;
-       struct list_head memdevs;
-       struct list_head flushes;
-       struct list_head dimms;
-       struct list_head spas;
-       struct list_head dcrs;
-       struct list_head bdws;
-       struct list_head idts;
-       struct nvdimm_bus *nvdimm_bus;
-       struct device *dev;
-       struct nd_cmd_ars_status *ars_status;
-       size_t ars_status_size;
-       struct work_struct work;
-       unsigned int cancel:1;
-       unsigned long dimm_cmd_force_en;
-       unsigned long bus_cmd_force_en;
-       int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
-                       void *iobuf, u64 len, int rw);
-};
-
-enum nd_blk_mmio_selector {
-       BDW,
-       DCR,
-};
-
-struct nd_blk_addr {
-       union {
-               void __iomem *base;
-               void __pmem  *aperture;
-       };
-};
-
-struct nfit_blk {
-       struct nfit_blk_mmio {
-               struct nd_blk_addr addr;
-               u64 size;
-               u64 base_offset;
-               u32 line_size;
-               u32 num_lines;
-               u32 table_size;
-               struct acpi_nfit_interleave *idt;
-               struct acpi_nfit_system_address *spa;
-       } mmio[2];
-       struct nd_region *nd_region;
-       u64 bdw_offset; /* post interleave offset */
-       u64 stat_offset;
-       u64 cmd_offset;
-       void __iomem *nvdimm_flush;
-       u32 dimm_flags;
-};
-
-enum spa_map_type {
-       SPA_MAP_CONTROL,
-       SPA_MAP_APERTURE,
-};
-
-struct nfit_spa_mapping {
-       struct acpi_nfit_desc *acpi_desc;
-       struct acpi_nfit_system_address *spa;
-       struct list_head list;
-       struct kref kref;
-       enum spa_map_type type;
-       struct nd_blk_addr addr;
-};
-
-static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
-{
-       return container_of(kref, struct nfit_spa_mapping, kref);
-}
-
-static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
-               struct nfit_mem *nfit_mem)
-{
-       if (nfit_mem->memdev_dcr)
-               return nfit_mem->memdev_dcr;
-       return nfit_mem->memdev_pmem;
-}
-
-static inline struct acpi_nfit_desc *to_acpi_desc(
-               struct nvdimm_bus_descriptor *nd_desc)
-{
-       return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
-}
-
-const u8 *to_nfit_uuid(enum nfit_uuids id);
-int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
-void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
-#endif /* __NFIT_H__ */
diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig

new file mode 100644 (file)

index 0000000..dd0d53c
--- /dev/null
+++ b/drivers/acpi/nfit/Kconfig
@@ -0,0 +1,26 @@
+config ACPI_NFIT
+       tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
+       depends on PHYS_ADDR_T_64BIT
+       depends on BLK_DEV
+       depends on ARCH_HAS_MMIO_FLUSH
+       select LIBNVDIMM
+       help
+         Infrastructure to probe ACPI 6 compliant platforms for
+         NVDIMMs (NFIT) and register a libnvdimm device tree.  In
+         addition to storage devices this also enables libnvdimm to pass
+         ACPI._DSM messages for platform/dimm configuration.
+
+         To compile this driver as a module, choose M here:
+         the module will be called nfit.
+
+config ACPI_NFIT_DEBUG
+       bool "NFIT DSM debug"
+       depends on ACPI_NFIT
+       depends on DYNAMIC_DEBUG
+       default n
+       help
+         Enabling this option causes the nfit driver to dump the
+         input and output buffers of _DSM operations on the ACPI0012
+         device and its children.  This can be very verbose, so leave
+         it disabled unless you are debugging a hardware / firmware
+         issue.
diff --git a/drivers/acpi/nfit/Makefile b/drivers/acpi/nfit/Makefile

new file mode 100644 (file)

index 0000000..a407e76
--- /dev/null
+++ b/drivers/acpi/nfit/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_ACPI_NFIT) := nfit.o
+nfit-y := core.o
+nfit-$(CONFIG_X86_MCE) += mce.o
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c

new file mode 100644 (file)

index 0000000..8c234dd
--- /dev/null
+++ b/drivers/acpi/nfit/core.c
@@ -0,0 +1,2784 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/list_sort.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/sysfs.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/acpi.h>
+#include <linux/sort.h>
+#include <linux/pmem.h>
+#include <linux/io.h>
+#include <linux/nd.h>
+#include <asm/cacheflush.h>
+#include "nfit.h"
+
+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <linux/io-64-nonatomic-hi-lo.h>
+
+static bool force_enable_dimms;
+module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
+
+static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
+module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
+
+/* after three payloads of overflow, it's dead jim */
+static unsigned int scrub_overflow_abort = 3;
+module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(scrub_overflow_abort,
+               "Number of times we overflow ARS results before abort");
+
+static bool disable_vendor_specific;
+module_param(disable_vendor_specific, bool, S_IRUGO);
+MODULE_PARM_DESC(disable_vendor_specific,
+               "Limit commands to the publicly specified set\n");
+
+LIST_HEAD(acpi_descs);
+DEFINE_MUTEX(acpi_desc_lock);
+
+static struct workqueue_struct *nfit_wq;
+
+struct nfit_table_prev {
+       struct list_head spas;
+       struct list_head memdevs;
+       struct list_head dcrs;
+       struct list_head bdws;
+       struct list_head idts;
+       struct list_head flushes;
+};
+
+static u8 nfit_uuid[NFIT_UUID_MAX][16];
+
+const u8 *to_nfit_uuid(enum nfit_uuids id)
+{
+       return nfit_uuid[id];
+}
+EXPORT_SYMBOL(to_nfit_uuid);
+
+static struct acpi_nfit_desc *to_acpi_nfit_desc(
+               struct nvdimm_bus_descriptor *nd_desc)
+{
+       return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+       /*
+        * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
+        * acpi_device.
+        */
+       if (!nd_desc->provider_name
+                       || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
+               return NULL;
+
+       return to_acpi_device(acpi_desc->dev);
+}
+
+static int xlat_status(void *buf, unsigned int cmd)
+{
+       struct nd_cmd_clear_error *clear_err;
+       struct nd_cmd_ars_status *ars_status;
+       struct nd_cmd_ars_start *ars_start;
+       struct nd_cmd_ars_cap *ars_cap;
+       u16 flags;
+
+       switch (cmd) {
+       case ND_CMD_ARS_CAP:
+               ars_cap = buf;
+               if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
+                       return -ENOTTY;
+
+               /* Command failed */
+               if (ars_cap->status & 0xffff)
+                       return -EIO;
+
+               /* No supported scan types for this range */
+               flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
+               if ((ars_cap->status >> 16 & flags) == 0)
+                       return -ENOTTY;
+               break;
+       case ND_CMD_ARS_START:
+               ars_start = buf;
+               /* ARS is in progress */
+               if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
+                       return -EBUSY;
+
+               /* Command failed */
+               if (ars_start->status & 0xffff)
+                       return -EIO;
+               break;
+       case ND_CMD_ARS_STATUS:
+               ars_status = buf;
+               /* Command failed */
+               if (ars_status->status & 0xffff)
+                       return -EIO;
+               /* Check extended status (Upper two bytes) */
+               if (ars_status->status == NFIT_ARS_STATUS_DONE)
+                       return 0;
+
+               /* ARS is in progress */
+               if (ars_status->status == NFIT_ARS_STATUS_BUSY)
+                       return -EBUSY;
+
+               /* No ARS performed for the current boot */
+               if (ars_status->status == NFIT_ARS_STATUS_NONE)
+                       return -EAGAIN;
+
+               /*
+                * ARS interrupted, either we overflowed or some other
+                * agent wants the scan to stop.  If we didn't overflow
+                * then just continue with the returned results.
+                */
+               if (ars_status->status == NFIT_ARS_STATUS_INTR) {
+                       if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
+                               return -ENOSPC;
+                       return 0;
+               }
+
+               /* Unknown status */
+               if (ars_status->status >> 16)
+                       return -EIO;
+               break;
+       case ND_CMD_CLEAR_ERROR:
+               clear_err = buf;
+               if (clear_err->status & 0xffff)
+                       return -EIO;
+               if (!clear_err->cleared)
+                       return -EIO;
+               if (clear_err->length > clear_err->cleared)
+                       return clear_err->cleared;
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
+               struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+               unsigned int buf_len, int *cmd_rc)
+{
+       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+       union acpi_object in_obj, in_buf, *out_obj;
+       const struct nd_cmd_desc *desc = NULL;
+       struct device *dev = acpi_desc->dev;
+       struct nd_cmd_pkg *call_pkg = NULL;
+       const char *cmd_name, *dimm_name;
+       unsigned long cmd_mask, dsm_mask;
+       acpi_handle handle;
+       unsigned int func;
+       const u8 *uuid;
+       u32 offset;
+       int rc, i;
+
+       func = cmd;
+       if (cmd == ND_CMD_CALL) {
+               call_pkg = buf;
+               func = call_pkg->nd_command;
+       }
+
+       if (nvdimm) {
+               struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+               struct acpi_device *adev = nfit_mem->adev;
+
+               if (!adev)
+                       return -ENOTTY;
+               if (call_pkg && nfit_mem->family != call_pkg->nd_family)
+                       return -ENOTTY;
+
+               dimm_name = nvdimm_name(nvdimm);
+               cmd_name = nvdimm_cmd_name(cmd);
+               cmd_mask = nvdimm_cmd_mask(nvdimm);
+               dsm_mask = nfit_mem->dsm_mask;
+               desc = nd_cmd_dimm_desc(cmd);
+               uuid = to_nfit_uuid(nfit_mem->family);
+               handle = adev->handle;
+       } else {
+               struct acpi_device *adev = to_acpi_dev(acpi_desc);
+
+               cmd_name = nvdimm_bus_cmd_name(cmd);
+               cmd_mask = nd_desc->cmd_mask;
+               dsm_mask = cmd_mask;
+               desc = nd_cmd_bus_desc(cmd);
+               uuid = to_nfit_uuid(NFIT_DEV_BUS);
+               handle = adev->handle;
+               dimm_name = "bus";
+       }
+
+       if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
+               return -ENOTTY;
+
+       if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
+               return -ENOTTY;
+
+       in_obj.type = ACPI_TYPE_PACKAGE;
+       in_obj.package.count = 1;
+       in_obj.package.elements = &in_buf;
+       in_buf.type = ACPI_TYPE_BUFFER;
+       in_buf.buffer.pointer = buf;
+       in_buf.buffer.length = 0;
+
+       /* libnvdimm has already validated the input envelope */
+       for (i = 0; i < desc->in_num; i++)
+               in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
+                               i, buf);
+
+       if (call_pkg) {
+               /* skip over package wrapper */
+               in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
+               in_buf.buffer.length = call_pkg->nd_size_in;
+       }
+
+       if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+               dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
+                               __func__, dimm_name, cmd, func,
+                               in_buf.buffer.length);
+               print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
+                       in_buf.buffer.pointer,
+                       min_t(u32, 256, in_buf.buffer.length), true);
+       }
+
+       out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
+       if (!out_obj) {
+               dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
+                               cmd_name);
+               return -EINVAL;
+       }
+
+       if (call_pkg) {
+               call_pkg->nd_fw_size = out_obj->buffer.length;
+               memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
+                       out_obj->buffer.pointer,
+                       min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
+
+               ACPI_FREE(out_obj);
+               /*
+                * Need to support FW function w/o known size in advance.
+                * Caller can determine required size based upon nd_fw_size.
+                * If we return an error (like elsewhere) then caller wouldn't
+                * be able to rely upon data returned to make calculation.
+                */
+               return 0;
+       }
+
+       if (out_obj->package.type != ACPI_TYPE_BUFFER) {
+               dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
+                               __func__, dimm_name, cmd_name, out_obj->type);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+               dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
+                               dimm_name, cmd_name, out_obj->buffer.length);
+               print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+                               4, out_obj->buffer.pointer, min_t(u32, 128,
+                                       out_obj->buffer.length), true);
+       }
+
+       for (i = 0, offset = 0; i < desc->out_num; i++) {
+               u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
+                               (u32 *) out_obj->buffer.pointer);
+
+               if (offset + out_size > out_obj->buffer.length) {
+                       dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
+                                       __func__, dimm_name, cmd_name, i);
+                       break;
+               }
+
+               if (in_buf.buffer.length + offset + out_size > buf_len) {
+                       dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
+                                       __func__, dimm_name, cmd_name, i);
+                       rc = -ENXIO;
+                       goto out;
+               }
+               memcpy(buf + in_buf.buffer.length + offset,
+                               out_obj->buffer.pointer + offset, out_size);
+               offset += out_size;
+       }
+       if (offset + in_buf.buffer.length < buf_len) {
+               if (i >= 1) {
+                       /*
+                        * status valid, return the number of bytes left
+                        * unfilled in the output buffer
+                        */
+                       rc = buf_len - offset - in_buf.buffer.length;
+                       if (cmd_rc)
+                               *cmd_rc = xlat_status(buf, cmd);
+               } else {
+                       dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
+                                       __func__, dimm_name, cmd_name, buf_len,
+                                       offset);
+                       rc = -ENXIO;
+               }
+       } else {
+               rc = 0;
+               if (cmd_rc)
+                       *cmd_rc = xlat_status(buf, cmd);
+       }
+
+ out:
+       ACPI_FREE(out_obj);
+
+       return rc;
+}
+
+static const char *spa_type_name(u16 type)
+{
+       static const char *to_name[] = {
+               [NFIT_SPA_VOLATILE] = "volatile",
+               [NFIT_SPA_PM] = "pmem",
+               [NFIT_SPA_DCR] = "dimm-control-region",
+               [NFIT_SPA_BDW] = "block-data-window",
+               [NFIT_SPA_VDISK] = "volatile-disk",
+               [NFIT_SPA_VCD] = "volatile-cd",
+               [NFIT_SPA_PDISK] = "persistent-disk",
+               [NFIT_SPA_PCD] = "persistent-cd",
+
+       };
+
+       if (type > NFIT_SPA_PCD)
+               return "unknown";
+
+       return to_name[type];
+}
+
+int nfit_spa_type(struct acpi_nfit_system_address *spa)
+{
+       int i;
+
+       for (i = 0; i < NFIT_UUID_MAX; i++)
+               if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+                       return i;
+       return -1;
+}
+
+static bool add_spa(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_system_address *spa)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_spa *nfit_spa;
+
+       if (spa->header.length != sizeof(*spa))
+               return false;
+
+       list_for_each_entry(nfit_spa, &prev->spas, list) {
+               if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
+                       list_move_tail(&nfit_spa->list, &acpi_desc->spas);
+                       return true;
+               }
+       }
+
+       nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
+                       GFP_KERNEL);
+       if (!nfit_spa)
+               return false;
+       INIT_LIST_HEAD(&nfit_spa->list);
+       memcpy(nfit_spa->spa, spa, sizeof(*spa));
+       list_add_tail(&nfit_spa->list, &acpi_desc->spas);
+       dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
+                       spa->range_index,
+                       spa_type_name(nfit_spa_type(spa)));
+       return true;
+}
+
+static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_memory_map *memdev)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_memdev *nfit_memdev;
+
+       if (memdev->header.length != sizeof(*memdev))
+               return false;
+
+       list_for_each_entry(nfit_memdev, &prev->memdevs, list)
+               if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
+                       list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
+                       return true;
+               }
+
+       nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev),
+                       GFP_KERNEL);
+       if (!nfit_memdev)
+               return false;
+       INIT_LIST_HEAD(&nfit_memdev->list);
+       memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
+       list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
+       dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
+                       __func__, memdev->device_handle, memdev->range_index,
+                       memdev->region_index);
+       return true;
+}
+
+/*
+ * An implementation may provide a truncated control region if no block windows
+ * are defined.
+ */
+static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr)
+{
+       if (dcr->header.length < offsetof(struct acpi_nfit_control_region,
+                               window_size))
+               return 0;
+       if (dcr->windows)
+               return sizeof(*dcr);
+       return offsetof(struct acpi_nfit_control_region, window_size);
+}
+
+static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_control_region *dcr)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_dcr *nfit_dcr;
+
+       if (!sizeof_dcr(dcr))
+               return false;
+
+       list_for_each_entry(nfit_dcr, &prev->dcrs, list)
+               if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) {
+                       list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
+                       return true;
+               }
+
+       nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr),
+                       GFP_KERNEL);
+       if (!nfit_dcr)
+               return false;
+       INIT_LIST_HEAD(&nfit_dcr->list);
+       memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
+       list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
+       dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
+                       dcr->region_index, dcr->windows);
+       return true;
+}
+
+static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_data_region *bdw)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_bdw *nfit_bdw;
+
+       if (bdw->header.length != sizeof(*bdw))
+               return false;
+       list_for_each_entry(nfit_bdw, &prev->bdws, list)
+               if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
+                       list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
+                       return true;
+               }
+
+       nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw),
+                       GFP_KERNEL);
+       if (!nfit_bdw)
+               return false;
+       INIT_LIST_HEAD(&nfit_bdw->list);
+       memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
+       list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
+       dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
+                       bdw->region_index, bdw->windows);
+       return true;
+}
+
+static size_t sizeof_idt(struct acpi_nfit_interleave *idt)
+{
+       if (idt->header.length < sizeof(*idt))
+               return 0;
+       return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1);
+}
+
+static bool add_idt(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_interleave *idt)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_idt *nfit_idt;
+
+       if (!sizeof_idt(idt))
+               return false;
+
+       list_for_each_entry(nfit_idt, &prev->idts, list) {
+               if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt))
+                       continue;
+
+               if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) {
+                       list_move_tail(&nfit_idt->list, &acpi_desc->idts);
+                       return true;
+               }
+       }
+
+       nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt),
+                       GFP_KERNEL);
+       if (!nfit_idt)
+               return false;
+       INIT_LIST_HEAD(&nfit_idt->list);
+       memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
+       list_add_tail(&nfit_idt->list, &acpi_desc->idts);
+       dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
+                       idt->interleave_index, idt->line_count);
+       return true;
+}
+
+static size_t sizeof_flush(struct acpi_nfit_flush_address *flush)
+{
+       if (flush->header.length < sizeof(*flush))
+               return 0;
+       return sizeof(*flush) + sizeof(u64) * (flush->hint_count - 1);
+}
+
+static bool add_flush(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev,
+               struct acpi_nfit_flush_address *flush)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_flush *nfit_flush;
+
+       if (!sizeof_flush(flush))
+               return false;
+
+       list_for_each_entry(nfit_flush, &prev->flushes, list) {
+               if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush))
+                       continue;
+
+               if (memcmp(nfit_flush->flush, flush,
+                                       sizeof_flush(flush)) == 0) {
+                       list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
+                       return true;
+               }
+       }
+
+       nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush)
+                       + sizeof_flush(flush), GFP_KERNEL);
+       if (!nfit_flush)
+               return false;
+       INIT_LIST_HEAD(&nfit_flush->list);
+       memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
+       list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
+       dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
+                       flush->device_handle, flush->hint_count);
+       return true;
+}
+
+static void *add_table(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev, void *table, const void *end)
+{
+       struct device *dev = acpi_desc->dev;
+       struct acpi_nfit_header *hdr;
+       void *err = ERR_PTR(-ENOMEM);
+
+       if (table >= end)
+               return NULL;
+
+       hdr = table;
+       if (!hdr->length) {
+               dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
+                       hdr->type);
+               return NULL;
+       }
+
+       switch (hdr->type) {
+       case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
+               if (!add_spa(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_MEMORY_MAP:
+               if (!add_memdev(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_CONTROL_REGION:
+               if (!add_dcr(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_DATA_REGION:
+               if (!add_bdw(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_INTERLEAVE:
+               if (!add_idt(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
+               if (!add_flush(acpi_desc, prev, table))
+                       return err;
+               break;
+       case ACPI_NFIT_TYPE_SMBIOS:
+               dev_dbg(dev, "%s: smbios\n", __func__);
+               break;
+       default:
+               dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
+               break;
+       }
+
+       return table + hdr->length;
+}
+
+static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_mem *nfit_mem)
+{
+       u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+       u16 dcr = nfit_mem->dcr->region_index;
+       struct nfit_spa *nfit_spa;
+
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               u16 range_index = nfit_spa->spa->range_index;
+               int type = nfit_spa_type(nfit_spa->spa);
+               struct nfit_memdev *nfit_memdev;
+
+               if (type != NFIT_SPA_BDW)
+                       continue;
+
+               list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+                       if (nfit_memdev->memdev->range_index != range_index)
+                               continue;
+                       if (nfit_memdev->memdev->device_handle != device_handle)
+                               continue;
+                       if (nfit_memdev->memdev->region_index != dcr)
+                               continue;
+
+                       nfit_mem->spa_bdw = nfit_spa->spa;
+                       return;
+               }
+       }
+
+       dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
+                       nfit_mem->spa_dcr->range_index);
+       nfit_mem->bdw = NULL;
+}
+
+static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
+{
+       u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
+       struct nfit_memdev *nfit_memdev;
+       struct nfit_bdw *nfit_bdw;
+       struct nfit_idt *nfit_idt;
+       u16 idt_idx, range_index;
+
+       list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
+               if (nfit_bdw->bdw->region_index != dcr)
+                       continue;
+               nfit_mem->bdw = nfit_bdw->bdw;
+               break;
+       }
+
+       if (!nfit_mem->bdw)
+               return;
+
+       nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
+
+       if (!nfit_mem->spa_bdw)
+               return;
+
+       range_index = nfit_mem->spa_bdw->range_index;
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+               if (nfit_memdev->memdev->range_index != range_index ||
+                               nfit_memdev->memdev->region_index != dcr)
+                       continue;
+               nfit_mem->memdev_bdw = nfit_memdev->memdev;
+               idt_idx = nfit_memdev->memdev->interleave_index;
+               list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+                       if (nfit_idt->idt->interleave_index != idt_idx)
+                               continue;
+                       nfit_mem->idt_bdw = nfit_idt->idt;
+                       break;
+               }
+               break;
+       }
+}
+
+static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
+               struct acpi_nfit_system_address *spa)
+{
+       struct nfit_mem *nfit_mem, *found;
+       struct nfit_memdev *nfit_memdev;
+       int type = nfit_spa_type(spa);
+
+       switch (type) {
+       case NFIT_SPA_DCR:
+       case NFIT_SPA_PM:
+               break;
+       default:
+               return 0;
+       }
+
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+               struct nfit_flush *nfit_flush;
+               struct nfit_dcr *nfit_dcr;
+               u32 device_handle;
+               u16 dcr;
+
+               if (nfit_memdev->memdev->range_index != spa->range_index)
+                       continue;
+               found = NULL;
+               dcr = nfit_memdev->memdev->region_index;
+               device_handle = nfit_memdev->memdev->device_handle;
+               list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+                       if (__to_nfit_memdev(nfit_mem)->device_handle
+                                       == device_handle) {
+                               found = nfit_mem;
+                               break;
+                       }
+
+               if (found)
+                       nfit_mem = found;
+               else {
+                       nfit_mem = devm_kzalloc(acpi_desc->dev,
+                                       sizeof(*nfit_mem), GFP_KERNEL);
+                       if (!nfit_mem)
+                               return -ENOMEM;
+                       INIT_LIST_HEAD(&nfit_mem->list);
+                       nfit_mem->acpi_desc = acpi_desc;
+                       list_add(&nfit_mem->list, &acpi_desc->dimms);
+               }
+
+               list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+                       if (nfit_dcr->dcr->region_index != dcr)
+                               continue;
+                       /*
+                        * Record the control region for the dimm.  For
+                        * the ACPI 6.1 case, where there are separate
+                        * control regions for the pmem vs blk
+                        * interfaces, be sure to record the extended
+                        * blk details.
+                        */
+                       if (!nfit_mem->dcr)
+                               nfit_mem->dcr = nfit_dcr->dcr;
+                       else if (nfit_mem->dcr->windows == 0
+                                       && nfit_dcr->dcr->windows)
+                               nfit_mem->dcr = nfit_dcr->dcr;
+                       break;
+               }
+
+               list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
+                       struct acpi_nfit_flush_address *flush;
+                       u16 i;
+
+                       if (nfit_flush->flush->device_handle != device_handle)
+                               continue;
+                       nfit_mem->nfit_flush = nfit_flush;
+                       flush = nfit_flush->flush;
+                       nfit_mem->flush_wpq = devm_kzalloc(acpi_desc->dev,
+                                       flush->hint_count
+                                       * sizeof(struct resource), GFP_KERNEL);
+                       if (!nfit_mem->flush_wpq)
+                               return -ENOMEM;
+                       for (i = 0; i < flush->hint_count; i++) {
+                               struct resource *res = &nfit_mem->flush_wpq[i];
+
+                               res->start = flush->hint_address[i];
+                               res->end = res->start + 8 - 1;
+                       }
+                       break;
+               }
+
+               if (dcr && !nfit_mem->dcr) {
+                       dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
+                                       spa->range_index, dcr);
+                       return -ENODEV;
+               }
+
+               if (type == NFIT_SPA_DCR) {
+                       struct nfit_idt *nfit_idt;
+                       u16 idt_idx;
+
+                       /* multiple dimms may share a SPA when interleaved */
+                       nfit_mem->spa_dcr = spa;
+                       nfit_mem->memdev_dcr = nfit_memdev->memdev;
+                       idt_idx = nfit_memdev->memdev->interleave_index;
+                       list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+                               if (nfit_idt->idt->interleave_index != idt_idx)
+                                       continue;
+                               nfit_mem->idt_dcr = nfit_idt->idt;
+                               break;
+                       }
+                       nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
+               } else {
+                       /*
+                        * A single dimm may belong to multiple SPA-PM
+                        * ranges, record at least one in addition to
+                        * any SPA-DCR range.
+                        */
+                       nfit_mem->memdev_pmem = nfit_memdev->memdev;
+               }
+       }
+
+       return 0;
+}
+
+static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
+{
+       struct nfit_mem *a = container_of(_a, typeof(*a), list);
+       struct nfit_mem *b = container_of(_b, typeof(*b), list);
+       u32 handleA, handleB;
+
+       handleA = __to_nfit_memdev(a)->device_handle;
+       handleB = __to_nfit_memdev(b)->device_handle;
+       if (handleA < handleB)
+               return -1;
+       else if (handleA > handleB)
+               return 1;
+       return 0;
+}
+
+static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nfit_spa *nfit_spa;
+
+       /*
+        * For each SPA-DCR or SPA-PMEM address range find its
+        * corresponding MEMDEV(s).  From each MEMDEV find the
+        * corresponding DCR.  Then, if we're operating on a SPA-DCR,
+        * try to find a SPA-BDW and a corresponding BDW that references
+        * the DCR.  Throw it all into an nfit_mem object.  Note, that
+        * BDWs are optional.
+        */
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               int rc;
+
+               rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
+               if (rc)
+                       return rc;
+       }
+
+       list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
+
+       return 0;
+}
+
+static ssize_t revision_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+       return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
+}
+static DEVICE_ATTR_RO(revision);
+
+/*
+ * This shows the number of full Address Range Scrubs that have been
+ * completed since driver load time. Userspace can wait on this using
+ * select/poll etc. A '+' at the end indicates an ARS is in progress
+ */
+static ssize_t scrub_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm_bus_descriptor *nd_desc;
+       ssize_t rc = -ENXIO;
+
+       device_lock(dev);
+       nd_desc = dev_get_drvdata(dev);
+       if (nd_desc) {
+               struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+               rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
+                               (work_busy(&acpi_desc->work)) ? "+\n" : "\n");
+       }
+       device_unlock(dev);
+       return rc;
+}
+
+static ssize_t scrub_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t size)
+{
+       struct nvdimm_bus_descriptor *nd_desc;
+       ssize_t rc;
+       long val;
+
+       rc = kstrtol(buf, 0, &val);
+       if (rc)
+               return rc;
+       if (val != 1)
+               return -EINVAL;
+
+       device_lock(dev);
+       nd_desc = dev_get_drvdata(dev);
+       if (nd_desc) {
+               struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+               rc = acpi_nfit_ars_rescan(acpi_desc);
+       }
+       device_unlock(dev);
+       if (rc)
+               return rc;
+       return size;
+}
+static DEVICE_ATTR_RW(scrub);
+
+static bool ars_supported(struct nvdimm_bus *nvdimm_bus)
+{
+       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+       const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START
+               | 1 << ND_CMD_ARS_STATUS;
+
+       return (nd_desc->cmd_mask & mask) == mask;
+}
+
+static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+       if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus))
+               return 0;
+       return a->mode;
+}
+
+static struct attribute *acpi_nfit_attributes[] = {
+       &dev_attr_revision.attr,
+       &dev_attr_scrub.attr,
+       NULL,
+};
+
+static struct attribute_group acpi_nfit_attribute_group = {
+       .name = "nfit",
+       .attrs = acpi_nfit_attributes,
+       .is_visible = nfit_visible,
+};
+
+static const struct attribute_group *acpi_nfit_attribute_groups[] = {
+       &nvdimm_bus_attribute_group,
+       &acpi_nfit_attribute_group,
+       NULL,
+};
+
+static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+       return __to_nfit_memdev(nfit_mem);
+}
+
+static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+       return nfit_mem->dcr;
+}
+
+static ssize_t handle_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+       return sprintf(buf, "%#x\n", memdev->device_handle);
+}
+static DEVICE_ATTR_RO(handle);
+
+static ssize_t phys_id_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+       return sprintf(buf, "%#x\n", memdev->physical_id);
+}
+static DEVICE_ATTR_RO(phys_id);
+
+static ssize_t vendor_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t rev_id_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
+}
+static DEVICE_ATTR_RO(rev_id);
+
+static ssize_t device_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t subsystem_vendor_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
+}
+static DEVICE_ATTR_RO(subsystem_vendor);
+
+static ssize_t subsystem_rev_id_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n",
+                       be16_to_cpu(dcr->subsystem_revision_id));
+}
+static DEVICE_ATTR_RO(subsystem_rev_id);
+
+static ssize_t subsystem_device_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
+}
+static DEVICE_ATTR_RO(subsystem_device);
+
+static int num_nvdimm_formats(struct nvdimm *nvdimm)
+{
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+       int formats = 0;
+
+       if (nfit_mem->memdev_pmem)
+               formats++;
+       if (nfit_mem->memdev_bdw)
+               formats++;
+       return formats;
+}
+
+static ssize_t format_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
+}
+static DEVICE_ATTR_RO(format);
+
+static ssize_t format1_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       u32 handle;
+       ssize_t rc = -ENXIO;
+       struct nfit_mem *nfit_mem;
+       struct nfit_memdev *nfit_memdev;
+       struct acpi_nfit_desc *acpi_desc;
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       nfit_mem = nvdimm_provider_data(nvdimm);
+       acpi_desc = nfit_mem->acpi_desc;
+       handle = to_nfit_memdev(dev)->device_handle;
+
+       /* assumes DIMMs have at most 2 published interface codes */
+       mutex_lock(&acpi_desc->init_mutex);
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+               struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
+               struct nfit_dcr *nfit_dcr;
+
+               if (memdev->device_handle != handle)
+                       continue;
+
+               list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+                       if (nfit_dcr->dcr->region_index != memdev->region_index)
+                               continue;
+                       if (nfit_dcr->dcr->code == dcr->code)
+                               continue;
+                       rc = sprintf(buf, "0x%04x\n",
+                                       le16_to_cpu(nfit_dcr->dcr->code));
+                       break;
+               }
+               if (rc != ENXIO)
+                       break;
+       }
+       mutex_unlock(&acpi_desc->init_mutex);
+       return rc;
+}
+static DEVICE_ATTR_RO(format1);
+
+static ssize_t formats_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+
+       return sprintf(buf, "%d\n", num_nvdimm_formats(nvdimm));
+}
+static DEVICE_ATTR_RO(formats);
+
+static ssize_t serial_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       return sprintf(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
+}
+static DEVICE_ATTR_RO(serial);
+
+static ssize_t family_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+       if (nfit_mem->family < 0)
+               return -ENXIO;
+       return sprintf(buf, "%d\n", nfit_mem->family);
+}
+static DEVICE_ATTR_RO(family);
+
+static ssize_t dsm_mask_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+       if (nfit_mem->family < 0)
+               return -ENXIO;
+       return sprintf(buf, "%#lx\n", nfit_mem->dsm_mask);
+}
+static DEVICE_ATTR_RO(dsm_mask);
+
+static ssize_t flags_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       u16 flags = to_nfit_memdev(dev)->flags;
+
+       return sprintf(buf, "%s%s%s%s%s\n",
+               flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
+               flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
+               flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
+               flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
+               flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "");
+}
+static DEVICE_ATTR_RO(flags);
+
+static ssize_t id_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+       if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
+               return sprintf(buf, "%04x-%02x-%04x-%08x\n",
+                               be16_to_cpu(dcr->vendor_id),
+                               dcr->manufacturing_location,
+                               be16_to_cpu(dcr->manufacturing_date),
+                               be32_to_cpu(dcr->serial_number));
+       else
+               return sprintf(buf, "%04x-%08x\n",
+                               be16_to_cpu(dcr->vendor_id),
+                               be32_to_cpu(dcr->serial_number));
+}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *acpi_nfit_dimm_attributes[] = {
+       &dev_attr_handle.attr,
+       &dev_attr_phys_id.attr,
+       &dev_attr_vendor.attr,
+       &dev_attr_device.attr,
+       &dev_attr_rev_id.attr,
+       &dev_attr_subsystem_vendor.attr,
+       &dev_attr_subsystem_device.attr,
+       &dev_attr_subsystem_rev_id.attr,
+       &dev_attr_format.attr,
+       &dev_attr_formats.attr,
+       &dev_attr_format1.attr,
+       &dev_attr_serial.attr,
+       &dev_attr_flags.attr,
+       &dev_attr_id.attr,
+       &dev_attr_family.attr,
+       &dev_attr_dsm_mask.attr,
+       NULL,
+};
+
+static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
+               struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+
+       if (!to_nfit_dcr(dev))
+               return 0;
+       if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
+               return 0;
+       return a->mode;
+}
+
+static struct attribute_group acpi_nfit_dimm_attribute_group = {
+       .name = "nfit",
+       .attrs = acpi_nfit_dimm_attributes,
+       .is_visible = acpi_nfit_dimm_attr_visible,
+};
+
+static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
+       &nvdimm_attribute_group,
+       &nd_device_attribute_group,
+       &acpi_nfit_dimm_attribute_group,
+       NULL,
+};
+
+static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
+               u32 device_handle)
+{
+       struct nfit_mem *nfit_mem;
+
+       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+               if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
+                       return nfit_mem->nvdimm;
+
+       return NULL;
+}
+
+static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_mem *nfit_mem, u32 device_handle)
+{
+       struct acpi_device *adev, *adev_dimm;
+       struct device *dev = acpi_desc->dev;
+       unsigned long dsm_mask;
+       const u8 *uuid;
+       int i;
+
+       /* nfit test assumes 1:1 relationship between commands and dsms */
+       nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
+       nfit_mem->family = NVDIMM_FAMILY_INTEL;
+       adev = to_acpi_dev(acpi_desc);
+       if (!adev)
+               return 0;
+
+       adev_dimm = acpi_find_child_device(adev, device_handle, false);
+       nfit_mem->adev = adev_dimm;
+       if (!adev_dimm) {
+               dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
+                               device_handle);
+               return force_enable_dimms ? 0 : -ENODEV;
+       }
+
+       /*
+        * Until standardization materializes we need to consider 4
+        * different command sets.  Note, that checking for function0 (bit0)
+        * tells us if any commands are reachable through this uuid.
+        */
+       for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
+               if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
+                       break;
+
+       /* limit the supported commands to those that are publicly documented */
+       nfit_mem->family = i;
+       if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
+               dsm_mask = 0x3fe;
+               if (disable_vendor_specific)
+                       dsm_mask &= ~(1 << ND_CMD_VENDOR);
+       } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) {
+               dsm_mask = 0x1c3c76;
+       } else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
+               dsm_mask = 0x1fe;
+               if (disable_vendor_specific)
+                       dsm_mask &= ~(1 << 8);
+       } else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
+               dsm_mask = 0xffffffff;
+       } else {
+               dev_dbg(dev, "unknown dimm command family\n");
+               nfit_mem->family = -1;
+               /* DSMs are optional, continue loading the driver... */
+               return 0;
+       }
+
+       uuid = to_nfit_uuid(nfit_mem->family);
+       for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
+               if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+                       set_bit(i, &nfit_mem->dsm_mask);
+
+       return 0;
+}
+
+static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nfit_mem *nfit_mem;
+       int dimm_count = 0;
+
+       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+               struct acpi_nfit_flush_address *flush;
+               unsigned long flags = 0, cmd_mask;
+               struct nvdimm *nvdimm;
+               u32 device_handle;
+               u16 mem_flags;
+               int rc;
+
+               device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+               nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
+               if (nvdimm) {
+                       dimm_count++;
+                       continue;
+               }
+
+               if (nfit_mem->bdw && nfit_mem->memdev_pmem)
+                       flags |= NDD_ALIASING;
+
+               mem_flags = __to_nfit_memdev(nfit_mem)->flags;
+               if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
+                       flags |= NDD_UNARMED;
+
+               rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
+               if (rc)
+                       continue;
+
+               /*
+                * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
+                * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
+                * userspace interface.
+                */
+               cmd_mask = 1UL << ND_CMD_CALL;
+               if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
+                       cmd_mask |= nfit_mem->dsm_mask;
+
+               flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
+                       : NULL;
+               nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
+                               acpi_nfit_dimm_attribute_groups,
+                               flags, cmd_mask, flush ? flush->hint_count : 0,
+                               nfit_mem->flush_wpq);
+               if (!nvdimm)
+                       return -ENOMEM;
+
+               nfit_mem->nvdimm = nvdimm;
+               dimm_count++;
+
+               if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
+                       continue;
+
+               dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n",
+                               nvdimm_name(nvdimm),
+                 mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
+                 mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
+                 mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
+                 mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "");
+
+       }
+
+       return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
+}
+
+static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+       const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+       struct acpi_device *adev;
+       int i;
+
+       nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
+       adev = to_acpi_dev(acpi_desc);
+       if (!adev)
+               return;
+
+       for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
+               if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+                       set_bit(i, &nd_desc->cmd_mask);
+}
+
+static ssize_t range_index_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_region *nd_region = to_nd_region(dev);
+       struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
+
+       return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
+}
+static DEVICE_ATTR_RO(range_index);
+
+static struct attribute *acpi_nfit_region_attributes[] = {
+       &dev_attr_range_index.attr,
+       NULL,
+};
+
+static struct attribute_group acpi_nfit_region_attribute_group = {
+       .name = "nfit",
+       .attrs = acpi_nfit_region_attributes,
+};
+
+static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
+       &nd_region_attribute_group,
+       &nd_mapping_attribute_group,
+       &nd_device_attribute_group,
+       &nd_numa_attribute_group,
+       &acpi_nfit_region_attribute_group,
+       NULL,
+};
+
+/* enough info to uniquely specify an interleave set */
+struct nfit_set_info {
+       struct nfit_set_info_map {
+               u64 region_offset;
+               u32 serial_number;
+               u32 pad;
+       } mapping[0];
+};
+
+static size_t sizeof_nfit_set_info(int num_mappings)
+{
+       return sizeof(struct nfit_set_info)
+               + num_mappings * sizeof(struct nfit_set_info_map);
+}
+
+static int cmp_map(const void *m0, const void *m1)
+{
+       const struct nfit_set_info_map *map0 = m0;
+       const struct nfit_set_info_map *map1 = m1;
+
+       return memcmp(&map0->region_offset, &map1->region_offset,
+                       sizeof(u64));
+}
+
+/* Retrieve the nth entry referencing this spa */
+static struct acpi_nfit_memory_map *memdev_from_spa(
+               struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
+{
+       struct nfit_memdev *nfit_memdev;
+
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
+               if (nfit_memdev->memdev->range_index == range_index)
+                       if (n-- == 0)
+                               return nfit_memdev->memdev;
+       return NULL;
+}
+
+static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
+               struct nd_region_desc *ndr_desc,
+               struct acpi_nfit_system_address *spa)
+{
+       int i, spa_type = nfit_spa_type(spa);
+       struct device *dev = acpi_desc->dev;
+       struct nd_interleave_set *nd_set;
+       u16 nr = ndr_desc->num_mappings;
+       struct nfit_set_info *info;
+
+       if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
+               /* pass */;
+       else
+               return 0;
+
+       nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
+       if (!nd_set)
+               return -ENOMEM;
+
+       info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+       for (i = 0; i < nr; i++) {
+               struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+               struct nfit_set_info_map *map = &info->mapping[i];
+               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+               struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+               struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
+                               spa->range_index, i);
+
+               if (!memdev || !nfit_mem->dcr) {
+                       dev_err(dev, "%s: failed to find DCR\n", __func__);
+                       return -ENODEV;
+               }
+
+               map->region_offset = memdev->region_offset;
+               map->serial_number = nfit_mem->dcr->serial_number;
+       }
+
+       sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
+                       cmp_map, NULL);
+       nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+       ndr_desc->nd_set = nd_set;
+       devm_kfree(dev, info);
+
+       return 0;
+}
+
+static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
+{
+       struct acpi_nfit_interleave *idt = mmio->idt;
+       u32 sub_line_offset, line_index, line_offset;
+       u64 line_no, table_skip_count, table_offset;
+
+       line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
+       table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
+       line_offset = idt->line_offset[line_index]
+               * mmio->line_size;
+       table_offset = table_skip_count * mmio->table_size;
+
+       return mmio->base_offset + line_offset + table_offset + sub_line_offset;
+}
+
+static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
+{
+       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+       u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+
+       if (mmio->num_lines)
+               offset = to_interleave_offset(offset, mmio);
+
+       return readl(mmio->addr.base + offset);
+}
+
+static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
+               resource_size_t dpa, unsigned int len, unsigned int write)
+{
+       u64 cmd, offset;
+       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+
+       enum {
+               BCW_OFFSET_MASK = (1ULL << 48)-1,
+               BCW_LEN_SHIFT = 48,
+               BCW_LEN_MASK = (1ULL << 8) - 1,
+               BCW_CMD_SHIFT = 56,
+       };
+
+       cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
+       len = len >> L1_CACHE_SHIFT;
+       cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
+       cmd |= ((u64) write) << BCW_CMD_SHIFT;
+
+       offset = nfit_blk->cmd_offset + mmio->size * bw;
+       if (mmio->num_lines)
+               offset = to_interleave_offset(offset, mmio);
+
+       writeq(cmd, mmio->addr.base + offset);
+       nvdimm_flush(nfit_blk->nd_region);
+
+       if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
+               readq(mmio->addr.base + offset);
+}
+
+static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
+               resource_size_t dpa, void *iobuf, size_t len, int rw,
+               unsigned int lane)
+{
+       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+       unsigned int copied = 0;
+       u64 base_offset;
+       int rc;
+
+       base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
+               + lane * mmio->size;
+       write_blk_ctl(nfit_blk, lane, dpa, len, rw);
+       while (len) {
+               unsigned int c;
+               u64 offset;
+
+               if (mmio->num_lines) {
+                       u32 line_offset;
+
+                       offset = to_interleave_offset(base_offset + copied,
+                                       mmio);
+                       div_u64_rem(offset, mmio->line_size, &line_offset);
+                       c = min_t(size_t, len, mmio->line_size - line_offset);
+               } else {
+                       offset = base_offset + nfit_blk->bdw_offset;
+                       c = len;
+               }
+
+               if (rw)
+                       memcpy_to_pmem(mmio->addr.aperture + offset,
+                                       iobuf + copied, c);
+               else {
+                       if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
+                               mmio_flush_range((void __force *)
+                                       mmio->addr.aperture + offset, c);
+
+                       memcpy_from_pmem(iobuf + copied,
+                                       mmio->addr.aperture + offset, c);
+               }
+
+               copied += c;
+               len -= c;
+       }
+
+       if (rw)
+               nvdimm_flush(nfit_blk->nd_region);
+
+       rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
+       return rc;
+}
+
+static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
+               resource_size_t dpa, void *iobuf, u64 len, int rw)
+{
+       struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
+       struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+       struct nd_region *nd_region = nfit_blk->nd_region;
+       unsigned int lane, copied = 0;
+       int rc = 0;
+
+       lane = nd_region_acquire_lane(nd_region);
+       while (len) {
+               u64 c = min(len, mmio->size);
+
+               rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
+                               iobuf + copied, c, rw, lane);
+               if (rc)
+                       break;
+
+               copied += c;
+               len -= c;
+       }
+       nd_region_release_lane(nd_region, lane);
+
+       return rc;
+}
+
+static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
+               struct acpi_nfit_interleave *idt, u16 interleave_ways)
+{
+       if (idt) {
+               mmio->num_lines = idt->line_count;
+               mmio->line_size = idt->line_size;
+               if (interleave_ways == 0)
+                       return -ENXIO;
+               mmio->table_size = mmio->num_lines * interleave_ways
+                       * mmio->line_size;
+       }
+
+       return 0;
+}
+
+static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
+               struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
+{
+       struct nd_cmd_dimm_flags flags;
+       int rc;
+
+       memset(&flags, 0, sizeof(flags));
+       rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
+                       sizeof(flags), NULL);
+
+       if (rc >= 0 && flags.status == 0)
+               nfit_blk->dimm_flags = flags.flags;
+       else if (rc == -ENOTTY) {
+               /* fall back to a conservative default */
+               nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
+               rc = 0;
+       } else
+               rc = -ENXIO;
+
+       return rc;
+}
+
+static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
+               struct device *dev)
+{
+       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+       struct nd_blk_region *ndbr = to_nd_blk_region(dev);
+       struct nfit_blk_mmio *mmio;
+       struct nfit_blk *nfit_blk;
+       struct nfit_mem *nfit_mem;
+       struct nvdimm *nvdimm;
+       int rc;
+
+       nvdimm = nd_blk_region_to_dimm(ndbr);
+       nfit_mem = nvdimm_provider_data(nvdimm);
+       if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
+               dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
+                               nfit_mem ? "" : " nfit_mem",
+                               (nfit_mem && nfit_mem->dcr) ? "" : " dcr",
+                               (nfit_mem && nfit_mem->bdw) ? "" : " bdw");
+               return -ENXIO;
+       }
+
+       nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
+       if (!nfit_blk)
+               return -ENOMEM;
+       nd_blk_region_set_provider_data(ndbr, nfit_blk);
+       nfit_blk->nd_region = to_nd_region(dev);
+
+       /* map block aperture memory */
+       nfit_blk->bdw_offset = nfit_mem->bdw->offset;
+       mmio = &nfit_blk->mmio[BDW];
+       mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
+                        nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM);
+       if (!mmio->addr.base) {
+               dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
+                               nvdimm_name(nvdimm));
+               return -ENOMEM;
+       }
+       mmio->size = nfit_mem->bdw->size;
+       mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
+       mmio->idt = nfit_mem->idt_bdw;
+       mmio->spa = nfit_mem->spa_bdw;
+       rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
+                       nfit_mem->memdev_bdw->interleave_ways);
+       if (rc) {
+               dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
+                               __func__, nvdimm_name(nvdimm));
+               return rc;
+       }
+
+       /* map block control memory */
+       nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
+       nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
+       mmio = &nfit_blk->mmio[DCR];
+       mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
+                       nfit_mem->spa_dcr->length);
+       if (!mmio->addr.base) {
+               dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
+                               nvdimm_name(nvdimm));
+               return -ENOMEM;
+       }
+       mmio->size = nfit_mem->dcr->window_size;
+       mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
+       mmio->idt = nfit_mem->idt_dcr;
+       mmio->spa = nfit_mem->spa_dcr;
+       rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
+                       nfit_mem->memdev_dcr->interleave_ways);
+       if (rc) {
+               dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
+                               __func__, nvdimm_name(nvdimm));
+               return rc;
+       }
+
+       rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
+       if (rc < 0) {
+               dev_dbg(dev, "%s: %s failed get DIMM flags\n",
+                               __func__, nvdimm_name(nvdimm));
+               return rc;
+       }
+
+       if (nvdimm_has_flush(nfit_blk->nd_region) < 0)
+               dev_warn(dev, "unable to guarantee persistence of writes\n");
+
+       if (mmio->line_size == 0)
+               return 0;
+
+       if ((u32) nfit_blk->cmd_offset % mmio->line_size
+                       + 8 > mmio->line_size) {
+               dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
+               return -ENXIO;
+       } else if ((u32) nfit_blk->stat_offset % mmio->line_size
+                       + 8 > mmio->line_size) {
+               dev_dbg(dev, "stat_offset crosses interleave boundary\n");
+               return -ENXIO;
+       }
+
+       return 0;
+}
+
+static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
+               struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
+{
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       int cmd_rc, rc;
+
+       cmd->address = spa->address;
+       cmd->length = spa->length;
+       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
+                       sizeof(*cmd), &cmd_rc);
+       if (rc < 0)
+               return rc;
+       return cmd_rc;
+}
+
+static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
+{
+       int rc;
+       int cmd_rc;
+       struct nd_cmd_ars_start ars_start;
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+       memset(&ars_start, 0, sizeof(ars_start));
+       ars_start.address = spa->address;
+       ars_start.length = spa->length;
+       if (nfit_spa_type(spa) == NFIT_SPA_PM)
+               ars_start.type = ND_ARS_PERSISTENT;
+       else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
+               ars_start.type = ND_ARS_VOLATILE;
+       else
+               return -ENOTTY;
+
+       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
+                       sizeof(ars_start), &cmd_rc);
+
+       if (rc < 0)
+               return rc;
+       return cmd_rc;
+}
+
+static int ars_continue(struct acpi_nfit_desc *acpi_desc)
+{
+       int rc, cmd_rc;
+       struct nd_cmd_ars_start ars_start;
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+
+       memset(&ars_start, 0, sizeof(ars_start));
+       ars_start.address = ars_status->restart_address;
+       ars_start.length = ars_status->restart_length;
+       ars_start.type = ars_status->type;
+       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
+                       sizeof(ars_start), &cmd_rc);
+       if (rc < 0)
+               return rc;
+       return cmd_rc;
+}
+
+static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+       int rc, cmd_rc;
+
+       rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
+                       acpi_desc->ars_status_size, &cmd_rc);
+       if (rc < 0)
+               return rc;
+       return cmd_rc;
+}
+
+static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
+               struct nd_cmd_ars_status *ars_status)
+{
+       int rc;
+       u32 i;
+
+       for (i = 0; i < ars_status->num_records; i++) {
+               rc = nvdimm_bus_add_poison(nvdimm_bus,
+                               ars_status->records[i].err_address,
+                               ars_status->records[i].length);
+               if (rc)
+                       return rc;
+       }
+
+       return 0;
+}
+
+static void acpi_nfit_remove_resource(void *data)
+{
+       struct resource *res = data;
+
+       remove_resource(res);
+}
+
+static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
+               struct nd_region_desc *ndr_desc)
+{
+       struct resource *res, *nd_res = ndr_desc->res;
+       int is_pmem, ret;
+
+       /* No operation if the region is already registered as PMEM */
+       is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
+                               IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
+       if (is_pmem == REGION_INTERSECTS)
+               return 0;
+
+       res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
+       if (!res)
+               return -ENOMEM;
+
+       res->name = "Persistent Memory";
+       res->start = nd_res->start;
+       res->end = nd_res->end;
+       res->flags = IORESOURCE_MEM;
+       res->desc = IORES_DESC_PERSISTENT_MEMORY;
+
+       ret = insert_resource(&iomem_resource, res);
+       if (ret)
+               return ret;
+
+       ret = devm_add_action_or_reset(acpi_desc->dev,
+                                       acpi_nfit_remove_resource,
+                                       res);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
+               struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
+               struct acpi_nfit_memory_map *memdev,
+               struct nfit_spa *nfit_spa)
+{
+       struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
+                       memdev->device_handle);
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       struct nd_blk_region_desc *ndbr_desc;
+       struct nfit_mem *nfit_mem;
+       int blk_valid = 0;
+
+       if (!nvdimm) {
+               dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
+                               spa->range_index, memdev->device_handle);
+               return -ENODEV;
+       }
+
+       nd_mapping->nvdimm = nvdimm;
+       switch (nfit_spa_type(spa)) {
+       case NFIT_SPA_PM:
+       case NFIT_SPA_VOLATILE:
+               nd_mapping->start = memdev->address;
+               nd_mapping->size = memdev->region_size;
+               break;
+       case NFIT_SPA_DCR:
+               nfit_mem = nvdimm_provider_data(nvdimm);
+               if (!nfit_mem || !nfit_mem->bdw) {
+                       dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
+                                       spa->range_index, nvdimm_name(nvdimm));
+               } else {
+                       nd_mapping->size = nfit_mem->bdw->capacity;
+                       nd_mapping->start = nfit_mem->bdw->start_address;
+                       ndr_desc->num_lanes = nfit_mem->bdw->windows;
+                       blk_valid = 1;
+               }
+
+               ndr_desc->nd_mapping = nd_mapping;
+               ndr_desc->num_mappings = blk_valid;
+               ndbr_desc = to_blk_region_desc(ndr_desc);
+               ndbr_desc->enable = acpi_nfit_blk_region_enable;
+               ndbr_desc->do_io = acpi_desc->blk_do_io;
+               nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
+                               ndr_desc);
+               if (!nfit_spa->nd_region)
+                       return -ENOMEM;
+               break;
+       }
+
+       return 0;
+}
+
+static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
+{
+       return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
+               nfit_spa_type(spa) == NFIT_SPA_VCD   ||
+               nfit_spa_type(spa) == NFIT_SPA_PDISK ||
+               nfit_spa_type(spa) == NFIT_SPA_PCD);
+}
+
+static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_spa *nfit_spa)
+{
+       static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       struct nd_blk_region_desc ndbr_desc;
+       struct nd_region_desc *ndr_desc;
+       struct nfit_memdev *nfit_memdev;
+       struct nvdimm_bus *nvdimm_bus;
+       struct resource res;
+       int count = 0, rc;
+
+       if (nfit_spa->nd_region)
+               return 0;
+
+       if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
+               dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
+                               __func__);
+               return 0;
+       }
+
+       memset(&res, 0, sizeof(res));
+       memset(&nd_mappings, 0, sizeof(nd_mappings));
+       memset(&ndbr_desc, 0, sizeof(ndbr_desc));
+       res.start = spa->address;
+       res.end = res.start + spa->length - 1;
+       ndr_desc = &ndbr_desc.ndr_desc;
+       ndr_desc->res = &res;
+       ndr_desc->provider_data = nfit_spa;
+       ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
+       if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
+               ndr_desc->numa_node = acpi_map_pxm_to_online_node(
+                                               spa->proximity_domain);
+       else
+               ndr_desc->numa_node = NUMA_NO_NODE;
+
+       list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+               struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
+               struct nd_mapping *nd_mapping;
+
+               if (memdev->range_index != spa->range_index)
+                       continue;
+               if (count >= ND_MAX_MAPPINGS) {
+                       dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
+                                       spa->range_index, ND_MAX_MAPPINGS);
+                       return -ENXIO;
+               }
+               nd_mapping = &nd_mappings[count++];
+               rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
+                               memdev, nfit_spa);
+               if (rc)
+                       goto out;
+       }
+
+       ndr_desc->nd_mapping = nd_mappings;
+       ndr_desc->num_mappings = count;
+       rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
+       if (rc)
+               goto out;
+
+       nvdimm_bus = acpi_desc->nvdimm_bus;
+       if (nfit_spa_type(spa) == NFIT_SPA_PM) {
+               rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
+               if (rc) {
+                       dev_warn(acpi_desc->dev,
+                               "failed to insert pmem resource to iomem: %d\n",
+                               rc);
+                       goto out;
+               }
+
+               nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
+                               ndr_desc);
+               if (!nfit_spa->nd_region)
+                       rc = -ENOMEM;
+       } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
+               nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
+                               ndr_desc);
+               if (!nfit_spa->nd_region)
+                       rc = -ENOMEM;
+       } else if (nfit_spa_is_virtual(spa)) {
+               nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
+                               ndr_desc);
+               if (!nfit_spa->nd_region)
+                       rc = -ENOMEM;
+       }
+
+ out:
+       if (rc)
+               dev_err(acpi_desc->dev, "failed to register spa range %d\n",
+                               nfit_spa->spa->range_index);
+       return rc;
+}
+
+static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
+               u32 max_ars)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nd_cmd_ars_status *ars_status;
+
+       if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
+               memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
+               return 0;
+       }
+
+       if (acpi_desc->ars_status)
+               devm_kfree(dev, acpi_desc->ars_status);
+       acpi_desc->ars_status = NULL;
+       ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
+       if (!ars_status)
+               return -ENOMEM;
+       acpi_desc->ars_status = ars_status;
+       acpi_desc->ars_status_size = max_ars;
+       return 0;
+}
+
+static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_spa *nfit_spa)
+{
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       int rc;
+
+       if (!nfit_spa->max_ars) {
+               struct nd_cmd_ars_cap ars_cap;
+
+               memset(&ars_cap, 0, sizeof(ars_cap));
+               rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
+               if (rc < 0)
+                       return rc;
+               nfit_spa->max_ars = ars_cap.max_ars_out;
+               nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
+               /* check that the supported scrub types match the spa type */
+               if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
+                               ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
+                       return -ENOTTY;
+               else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
+                               ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
+                       return -ENOTTY;
+       }
+
+       if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
+               return -ENOMEM;
+
+       rc = ars_get_status(acpi_desc);
+       if (rc < 0 && rc != -ENOSPC)
+               return rc;
+
+       if (ars_status_process_records(acpi_desc->nvdimm_bus,
+                               acpi_desc->ars_status))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_spa *nfit_spa)
+{
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       unsigned int overflow_retry = scrub_overflow_abort;
+       u64 init_ars_start = 0, init_ars_len = 0;
+       struct device *dev = acpi_desc->dev;
+       unsigned int tmo = scrub_timeout;
+       int rc;
+
+       if (!nfit_spa->ars_required || !nfit_spa->nd_region)
+               return;
+
+       rc = ars_start(acpi_desc, nfit_spa);
+       /*
+        * If we timed out the initial scan we'll still be busy here,
+        * and will wait another timeout before giving up permanently.
+        */
+       if (rc < 0 && rc != -EBUSY)
+               return;
+
+       do {
+               u64 ars_start, ars_len;
+
+               if (acpi_desc->cancel)
+                       break;
+               rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+               if (rc == -ENOTTY)
+                       break;
+               if (rc == -EBUSY && !tmo) {
+                       dev_warn(dev, "range %d ars timeout, aborting\n",
+                                       spa->range_index);
+                       break;
+               }
+
+               if (rc == -EBUSY) {
+                       /*
+                        * Note, entries may be appended to the list
+                        * while the lock is dropped, but the workqueue
+                        * being active prevents entries being deleted /
+                        * freed.
+                        */
+                       mutex_unlock(&acpi_desc->init_mutex);
+                       ssleep(1);
+                       tmo--;
+                       mutex_lock(&acpi_desc->init_mutex);
+                       continue;
+               }
+
+               /* we got some results, but there are more pending... */
+               if (rc == -ENOSPC && overflow_retry--) {
+                       if (!init_ars_len) {
+                               init_ars_len = acpi_desc->ars_status->length;
+                               init_ars_start = acpi_desc->ars_status->address;
+                       }
+                       rc = ars_continue(acpi_desc);
+               }
+
+               if (rc < 0) {
+                       dev_warn(dev, "range %d ars continuation failed\n",
+                                       spa->range_index);
+                       break;
+               }
+
+               if (init_ars_len) {
+                       ars_start = init_ars_start;
+                       ars_len = init_ars_len;
+               } else {
+                       ars_start = acpi_desc->ars_status->address;
+                       ars_len = acpi_desc->ars_status->length;
+               }
+               dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
+                               spa->range_index, ars_start, ars_len);
+               /* notify the region about new poison entries */
+               nvdimm_region_notify(nfit_spa->nd_region,
+                               NVDIMM_REVALIDATE_POISON);
+               break;
+       } while (1);
+}
+
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+       struct device *dev;
+       u64 init_scrub_length = 0;
+       struct nfit_spa *nfit_spa;
+       u64 init_scrub_address = 0;
+       bool init_ars_done = false;
+       struct acpi_nfit_desc *acpi_desc;
+       unsigned int tmo = scrub_timeout;
+       unsigned int overflow_retry = scrub_overflow_abort;
+
+       acpi_desc = container_of(work, typeof(*acpi_desc), work);
+       dev = acpi_desc->dev;
+
+       /*
+        * We scrub in 2 phases.  The first phase waits for any platform
+        * firmware initiated scrubs to complete and then we go search for the
+        * affected spa regions to mark them scanned.  In the second phase we
+        * initiate a directed scrub for every range that was not scrubbed in
+        * phase 1. If we're called for a 'rescan', we harmlessly pass through
+        * the first phase, but really only care about running phase 2, where
+        * regions can be notified of new poison.
+        */
+
+       /* process platform firmware initiated scrubs */
+ retry:
+       mutex_lock(&acpi_desc->init_mutex);
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               struct nd_cmd_ars_status *ars_status;
+               struct acpi_nfit_system_address *spa;
+               u64 ars_start, ars_len;
+               int rc;
+
+               if (acpi_desc->cancel)
+                       break;
+
+               if (nfit_spa->nd_region)
+                       continue;
+
+               if (init_ars_done) {
+                       /*
+                        * No need to re-query, we're now just
+                        * reconciling all the ranges covered by the
+                        * initial scrub
+                        */
+                       rc = 0;
+               } else
+                       rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+
+               if (rc == -ENOTTY) {
+                       /* no ars capability, just register spa and move on */
+                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+                       continue;
+               }
+
+               if (rc == -EBUSY && !tmo) {
+                       /* fallthrough to directed scrub in phase 2 */
+                       dev_warn(dev, "timeout awaiting ars results, continuing...\n");
+                       break;
+               } else if (rc == -EBUSY) {
+                       mutex_unlock(&acpi_desc->init_mutex);
+                       ssleep(1);
+                       tmo--;
+                       goto retry;
+               }
+
+               /* we got some results, but there are more pending... */
+               if (rc == -ENOSPC && overflow_retry--) {
+                       ars_status = acpi_desc->ars_status;
+                       /*
+                        * Record the original scrub range, so that we
+                        * can recall all the ranges impacted by the
+                        * initial scrub.
+                        */
+                       if (!init_scrub_length) {
+                               init_scrub_length = ars_status->length;
+                               init_scrub_address = ars_status->address;
+                       }
+                       rc = ars_continue(acpi_desc);
+                       if (rc == 0) {
+                               mutex_unlock(&acpi_desc->init_mutex);
+                               goto retry;
+                       }
+               }
+
+               if (rc < 0) {
+                       /*
+                        * Initial scrub failed, we'll give it one more
+                        * try below...
+                        */
+                       break;
+               }
+
+               /* We got some final results, record completed ranges */
+               ars_status = acpi_desc->ars_status;
+               if (init_scrub_length) {
+                       ars_start = init_scrub_address;
+                       ars_len = ars_start + init_scrub_length;
+               } else {
+                       ars_start = ars_status->address;
+                       ars_len = ars_status->length;
+               }
+               spa = nfit_spa->spa;
+
+               if (!init_ars_done) {
+                       init_ars_done = true;
+                       dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
+                                       ars_start, ars_len);
+               }
+               if (ars_start <= spa->address && ars_start + ars_len
+                               >= spa->address + spa->length)
+                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+       }
+
+       /*
+        * For all the ranges not covered by an initial scrub we still
+        * want to see if there are errors, but it's ok to discover them
+        * asynchronously.
+        */
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               /*
+                * Flag all the ranges that still need scrubbing, but
+                * register them now to make data available.
+                */
+               if (!nfit_spa->nd_region) {
+                       nfit_spa->ars_required = 1;
+                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+               }
+       }
+
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+               acpi_nfit_async_scrub(acpi_desc, nfit_spa);
+       acpi_desc->scrub_count++;
+       if (acpi_desc->scrub_count_state)
+               sysfs_notify_dirent(acpi_desc->scrub_count_state);
+       mutex_unlock(&acpi_desc->init_mutex);
+}
+
+static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
+{
+       struct nfit_spa *nfit_spa;
+       int rc;
+
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+               if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
+                       /* BLK regions don't need to wait for ars results */
+                       rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+                       if (rc)
+                               return rc;
+               }
+
+       queue_work(nfit_wq, &acpi_desc->work);
+       return 0;
+}
+
+static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_table_prev *prev)
+{
+       struct device *dev = acpi_desc->dev;
+
+       if (!list_empty(&prev->spas) ||
+                       !list_empty(&prev->memdevs) ||
+                       !list_empty(&prev->dcrs) ||
+                       !list_empty(&prev->bdws) ||
+                       !list_empty(&prev->idts) ||
+                       !list_empty(&prev->flushes)) {
+               dev_err(dev, "new nfit deletes entries (unsupported)\n");
+               return -ENXIO;
+       }
+       return 0;
+}
+
+static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
+{
+       struct device *dev = acpi_desc->dev;
+       struct kernfs_node *nfit;
+       struct device *bus_dev;
+
+       if (!ars_supported(acpi_desc->nvdimm_bus))
+               return 0;
+
+       bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
+       nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit");
+       if (!nfit) {
+               dev_err(dev, "sysfs_get_dirent 'nfit' failed\n");
+               return -ENODEV;
+       }
+       acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub");
+       sysfs_put(nfit);
+       if (!acpi_desc->scrub_count_state) {
+               dev_err(dev, "sysfs_get_dirent 'scrub' failed\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static void acpi_nfit_destruct(void *data)
+{
+       struct acpi_nfit_desc *acpi_desc = data;
+       struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
+
+       /*
+        * Destruct under acpi_desc_lock so that nfit_handle_mce does not
+        * race teardown
+        */
+       mutex_lock(&acpi_desc_lock);
+       acpi_desc->cancel = 1;
+       /*
+        * Bounce the nvdimm bus lock to make sure any in-flight
+        * acpi_nfit_ars_rescan() submissions have had a chance to
+        * either submit or see ->cancel set.
+        */
+       device_lock(bus_dev);
+       device_unlock(bus_dev);
+
+       flush_workqueue(nfit_wq);
+       if (acpi_desc->scrub_count_state)
+               sysfs_put(acpi_desc->scrub_count_state);
+       nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+       acpi_desc->nvdimm_bus = NULL;
+       list_del(&acpi_desc->list);
+       mutex_unlock(&acpi_desc_lock);
+}
+
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_table_prev prev;
+       const void *end;
+       int rc;
+
+       if (!acpi_desc->nvdimm_bus) {
+               acpi_nfit_init_dsms(acpi_desc);
+
+               acpi_desc->nvdimm_bus = nvdimm_bus_register(dev,
+                               &acpi_desc->nd_desc);
+               if (!acpi_desc->nvdimm_bus)
+                       return -ENOMEM;
+
+               rc = devm_add_action_or_reset(dev, acpi_nfit_destruct,
+                               acpi_desc);
+               if (rc)
+                       return rc;
+
+               rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
+               if (rc)
+                       return rc;
+
+               /* register this acpi_desc for mce notifications */
+               mutex_lock(&acpi_desc_lock);
+               list_add_tail(&acpi_desc->list, &acpi_descs);
+               mutex_unlock(&acpi_desc_lock);
+       }
+
+       mutex_lock(&acpi_desc->init_mutex);
+
+       INIT_LIST_HEAD(&prev.spas);
+       INIT_LIST_HEAD(&prev.memdevs);
+       INIT_LIST_HEAD(&prev.dcrs);
+       INIT_LIST_HEAD(&prev.bdws);
+       INIT_LIST_HEAD(&prev.idts);
+       INIT_LIST_HEAD(&prev.flushes);
+
+       list_cut_position(&prev.spas, &acpi_desc->spas,
+                               acpi_desc->spas.prev);
+       list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
+                               acpi_desc->memdevs.prev);
+       list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
+                               acpi_desc->dcrs.prev);
+       list_cut_position(&prev.bdws, &acpi_desc->bdws,
+                               acpi_desc->bdws.prev);
+       list_cut_position(&prev.idts, &acpi_desc->idts,
+                               acpi_desc->idts.prev);
+       list_cut_position(&prev.flushes, &acpi_desc->flushes,
+                               acpi_desc->flushes.prev);
+
+       end = data + sz;
+       while (!IS_ERR_OR_NULL(data))
+               data = add_table(acpi_desc, &prev, data, end);
+
+       if (IS_ERR(data)) {
+               dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
+                               PTR_ERR(data));
+               rc = PTR_ERR(data);
+               goto out_unlock;
+       }
+
+       rc = acpi_nfit_check_deletions(acpi_desc, &prev);
+       if (rc)
+               goto out_unlock;
+
+       rc = nfit_mem_init(acpi_desc);
+       if (rc)
+               goto out_unlock;
+
+       rc = acpi_nfit_register_dimms(acpi_desc);
+       if (rc)
+               goto out_unlock;
+
+       rc = acpi_nfit_register_regions(acpi_desc);
+
+ out_unlock:
+       mutex_unlock(&acpi_desc->init_mutex);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(acpi_nfit_init);
+
+struct acpi_nfit_flush_work {
+       struct work_struct work;
+       struct completion cmp;
+};
+
+static void flush_probe(struct work_struct *work)
+{
+       struct acpi_nfit_flush_work *flush;
+
+       flush = container_of(work, typeof(*flush), work);
+       complete(&flush->cmp);
+}
+
+static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
+{
+       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+       struct device *dev = acpi_desc->dev;
+       struct acpi_nfit_flush_work flush;
+
+       /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+       device_lock(dev);
+       device_unlock(dev);
+
+       /*
+        * Scrub work could take 10s of seconds, userspace may give up so we
+        * need to be interruptible while waiting.
+        */
+       INIT_WORK_ONSTACK(&flush.work, flush_probe);
+       COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
+       queue_work(nfit_wq, &flush.work);
+       return wait_for_completion_interruptible(&flush.cmp);
+}
+
+static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
+               struct nvdimm *nvdimm, unsigned int cmd)
+{
+       struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+
+       if (nvdimm)
+               return 0;
+       if (cmd != ND_CMD_ARS_START)
+               return 0;
+
+       /*
+        * The kernel and userspace may race to initiate a scrub, but
+        * the scrub thread is prepared to lose that initial race.  It
+        * just needs guarantees that any ars it initiates are not
+        * interrupted by any intervening start reqeusts from userspace.
+        */
+       if (work_busy(&acpi_desc->work))
+               return -EBUSY;
+
+       return 0;
+}
+
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
+{
+       struct device *dev = acpi_desc->dev;
+       struct nfit_spa *nfit_spa;
+
+       if (work_busy(&acpi_desc->work))
+               return -EBUSY;
+
+       if (acpi_desc->cancel)
+               return 0;
+
+       mutex_lock(&acpi_desc->init_mutex);
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               struct acpi_nfit_system_address *spa = nfit_spa->spa;
+
+               if (nfit_spa_type(spa) != NFIT_SPA_PM)
+                       continue;
+
+               nfit_spa->ars_required = 1;
+       }
+       queue_work(nfit_wq, &acpi_desc->work);
+       dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
+       mutex_unlock(&acpi_desc->init_mutex);
+
+       return 0;
+}
+
+void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
+{
+       struct nvdimm_bus_descriptor *nd_desc;
+
+       dev_set_drvdata(dev, acpi_desc);
+       acpi_desc->dev = dev;
+       acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
+       nd_desc = &acpi_desc->nd_desc;
+       nd_desc->provider_name = "ACPI.NFIT";
+       nd_desc->module = THIS_MODULE;
+       nd_desc->ndctl = acpi_nfit_ctl;
+       nd_desc->flush_probe = acpi_nfit_flush_probe;
+       nd_desc->clear_to_send = acpi_nfit_clear_to_send;
+       nd_desc->attr_groups = acpi_nfit_attribute_groups;
+
+       INIT_LIST_HEAD(&acpi_desc->spas);
+       INIT_LIST_HEAD(&acpi_desc->dcrs);
+       INIT_LIST_HEAD(&acpi_desc->bdws);
+       INIT_LIST_HEAD(&acpi_desc->idts);
+       INIT_LIST_HEAD(&acpi_desc->flushes);
+       INIT_LIST_HEAD(&acpi_desc->memdevs);
+       INIT_LIST_HEAD(&acpi_desc->dimms);
+       INIT_LIST_HEAD(&acpi_desc->list);
+       mutex_init(&acpi_desc->init_mutex);
+       INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+}
+EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
+
+static int acpi_nfit_add(struct acpi_device *adev)
+{
+       struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+       struct acpi_nfit_desc *acpi_desc;
+       struct device *dev = &adev->dev;
+       struct acpi_table_header *tbl;
+       acpi_status status = AE_OK;
+       acpi_size sz;
+       int rc = 0;
+
+       status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
+       if (ACPI_FAILURE(status)) {
+               /* This is ok, we could have an nvdimm hotplugged later */
+               dev_dbg(dev, "failed to find NFIT at startup\n");
+               return 0;
+       }
+
+       acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+       if (!acpi_desc)
+               return -ENOMEM;
+       acpi_nfit_desc_init(acpi_desc, &adev->dev);
+
+       /* Save the acpi header for exporting the revision via sysfs */
+       acpi_desc->acpi_header = *tbl;
+
+       /* Evaluate _FIT and override with that if present */
+       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
+       if (ACPI_SUCCESS(status) && buf.length > 0) {
+               union acpi_object *obj = buf.pointer;
+
+               if (obj->type == ACPI_TYPE_BUFFER)
+                       rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
+                                       obj->buffer.length);
+               else
+                       dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
+                                __func__, (int) obj->type);
+               kfree(buf.pointer);
+       } else
+               /* skip over the lead-in header table */
+               rc = acpi_nfit_init(acpi_desc, (void *) tbl
+                               + sizeof(struct acpi_table_nfit),
+                               sz - sizeof(struct acpi_table_nfit));
+       return rc;
+}
+
+static int acpi_nfit_remove(struct acpi_device *adev)
+{
+       /* see acpi_nfit_destruct */
+       return 0;
+}
+
+static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
+{
+       struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
+       struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+       struct device *dev = &adev->dev;
+       union acpi_object *obj;
+       acpi_status status;
+       int ret;
+
+       dev_dbg(dev, "%s: event: %d\n", __func__, event);
+
+       device_lock(dev);
+       if (!dev->driver) {
+               /* dev->driver may be null if we're being removed */
+               dev_dbg(dev, "%s: no driver found for dev\n", __func__);
+               goto out_unlock;
+       }
+
+       if (!acpi_desc) {
+               acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+               if (!acpi_desc)
+                       goto out_unlock;
+               acpi_nfit_desc_init(acpi_desc, &adev->dev);
+       } else {
+               /*
+                * Finish previous registration before considering new
+                * regions.
+                */
+               flush_workqueue(nfit_wq);
+       }
+
+       /* Evaluate _FIT */
+       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
+       if (ACPI_FAILURE(status)) {
+               dev_err(dev, "failed to evaluate _FIT\n");
+               goto out_unlock;
+       }
+
+       obj = buf.pointer;
+       if (obj->type == ACPI_TYPE_BUFFER) {
+               ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
+                               obj->buffer.length);
+               if (ret)
+                       dev_err(dev, "failed to merge updated NFIT\n");
+       } else
+               dev_err(dev, "Invalid _FIT\n");
+       kfree(buf.pointer);
+
+ out_unlock:
+       device_unlock(dev);
+}
+
+static const struct acpi_device_id acpi_nfit_ids[] = {
+       { "ACPI0012", 0 },
+       { "", 0 },
+};
+MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
+
+static struct acpi_driver acpi_nfit_driver = {
+       .name = KBUILD_MODNAME,
+       .ids = acpi_nfit_ids,
+       .ops = {
+               .add = acpi_nfit_add,
+               .remove = acpi_nfit_remove,
+               .notify = acpi_nfit_notify,
+       },
+};
+
+static __init int nfit_init(void)
+{
+       BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
+       BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
+
+       acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
+       acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
+       acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
+       acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
+       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
+       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
+       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
+       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
+       acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
+       acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
+       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
+       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
+       acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+
+       nfit_wq = create_singlethread_workqueue("nfit");
+       if (!nfit_wq)
+               return -ENOMEM;
+
+       nfit_mce_register();
+
+       return acpi_bus_register_driver(&acpi_nfit_driver);
+}
+
+static __exit void nfit_exit(void)
+{
+       nfit_mce_unregister();
+       acpi_bus_unregister_driver(&acpi_nfit_driver);
+       destroy_workqueue(nfit_wq);
+       WARN_ON(!list_empty(&acpi_descs));
+}
+
+module_init(nfit_init);
+module_exit(nfit_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c

new file mode 100644 (file)

index 0000000..4c745bf
--- /dev/null
+++ b/drivers/acpi/nfit/mce.c
@@ -0,0 +1,89 @@
+/*
+ * NFIT - Machine Check Handler
+ *
+ * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/notifier.h>
+#include <linux/acpi.h>
+#include <asm/mce.h>
+#include "nfit.h"
+
+static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
+                       void *data)
+{
+       struct mce *mce = (struct mce *)data;
+       struct acpi_nfit_desc *acpi_desc;
+       struct nfit_spa *nfit_spa;
+
+       /* We only care about memory errors */
+       if (!(mce->status & MCACOD))
+               return NOTIFY_DONE;
+
+       /*
+        * mce->addr contains the physical addr accessed that caused the
+        * machine check. We need to walk through the list of NFITs, and see
+        * if any of them matches that address, and only then start a scrub.
+        */
+       mutex_lock(&acpi_desc_lock);
+       list_for_each_entry(acpi_desc, &acpi_descs, list) {
+               struct device *dev = acpi_desc->dev;
+               int found_match = 0;
+
+               mutex_lock(&acpi_desc->init_mutex);
+               list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+                       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+
+                       if (nfit_spa_type(spa) == NFIT_SPA_PM)
+                               continue;
+                       /* find the spa that covers the mce addr */
+                       if (spa->address > mce->addr)
+                               continue;
+                       if ((spa->address + spa->length - 1) < mce->addr)
+                               continue;
+                       found_match = 1;
+                       dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
+                               __func__, spa->range_index, spa->address,
+                               spa->length);
+                       /*
+                        * We can break at the first match because we're going
+                        * to rescan all the SPA ranges. There shouldn't be any
+                        * aliasing anyway.
+                        */
+                       break;
+               }
+               mutex_unlock(&acpi_desc->init_mutex);
+
+               /*
+                * We can ignore an -EBUSY here because if an ARS is already
+                * in progress, just let that be the last authoritative one
+                */
+               if (found_match)
+                       acpi_nfit_ars_rescan(acpi_desc);
+       }
+
+       mutex_unlock(&acpi_desc_lock);
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block nfit_mce_dec = {
+       .notifier_call  = nfit_handle_mce,
+};
+
+void nfit_mce_register(void)
+{
+       mce_register_decode_chain(&nfit_mce_dec);
+}
+
+void nfit_mce_unregister(void)
+{
+       mce_unregister_decode_chain(&nfit_mce_dec);
+}
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h

new file mode 100644 (file)

index 0000000..e894ded
--- /dev/null
+++ b/drivers/acpi/nfit/nfit.h
@@ -0,0 +1,227 @@
+/*
+ * NVDIMM Firmware Interface Table - NFIT
+ *
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_H__
+#define __NFIT_H__
+#include <linux/workqueue.h>
+#include <linux/libnvdimm.h>
+#include <linux/ndctl.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/acpi.h>
+#include <acpi/acuuid.h>
+
+/* ACPI 6.1 */
+#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
+
+/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */
+#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
+
+/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
+#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
+#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
+
+/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
+#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
+
+#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
+               | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
+               | ACPI_NFIT_MEM_NOT_ARMED)
+
+enum nfit_uuids {
+       /* for simplicity alias the uuid index with the family id */
+       NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
+       NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
+       NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
+       NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
+       NFIT_SPA_VOLATILE,
+       NFIT_SPA_PM,
+       NFIT_SPA_DCR,
+       NFIT_SPA_BDW,
+       NFIT_SPA_VDISK,
+       NFIT_SPA_VCD,
+       NFIT_SPA_PDISK,
+       NFIT_SPA_PCD,
+       NFIT_DEV_BUS,
+       NFIT_UUID_MAX,
+};
+
+/*
+ * Region format interface codes are stored with the interface as the
+ * LSB and the function as the MSB.
+ */
+#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
+#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
+#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
+
+enum {
+       NFIT_BLK_READ_FLUSH = 1,
+       NFIT_BLK_DCR_LATCH = 2,
+       NFIT_ARS_STATUS_DONE = 0,
+       NFIT_ARS_STATUS_BUSY = 1 << 16,
+       NFIT_ARS_STATUS_NONE = 2 << 16,
+       NFIT_ARS_STATUS_INTR = 3 << 16,
+       NFIT_ARS_START_BUSY = 6,
+       NFIT_ARS_CAP_NONE = 1,
+       NFIT_ARS_F_OVERFLOW = 1,
+       NFIT_ARS_TIMEOUT = 90,
+};
+
+struct nfit_spa {
+       struct list_head list;
+       struct nd_region *nd_region;
+       unsigned int ars_required:1;
+       u32 clear_err_unit;
+       u32 max_ars;
+       struct acpi_nfit_system_address spa[0];
+};
+
+struct nfit_dcr {
+       struct list_head list;
+       struct acpi_nfit_control_region dcr[0];
+};
+
+struct nfit_bdw {
+       struct list_head list;
+       struct acpi_nfit_data_region bdw[0];
+};
+
+struct nfit_idt {
+       struct list_head list;
+       struct acpi_nfit_interleave idt[0];
+};
+
+struct nfit_flush {
+       struct list_head list;
+       struct acpi_nfit_flush_address flush[0];
+};
+
+struct nfit_memdev {
+       struct list_head list;
+       struct acpi_nfit_memory_map memdev[0];
+};
+
+/* assembled tables for a given dimm/memory-device */
+struct nfit_mem {
+       struct nvdimm *nvdimm;
+       struct acpi_nfit_memory_map *memdev_dcr;
+       struct acpi_nfit_memory_map *memdev_pmem;
+       struct acpi_nfit_memory_map *memdev_bdw;
+       struct acpi_nfit_control_region *dcr;
+       struct acpi_nfit_data_region *bdw;
+       struct acpi_nfit_system_address *spa_dcr;
+       struct acpi_nfit_system_address *spa_bdw;
+       struct acpi_nfit_interleave *idt_dcr;
+       struct acpi_nfit_interleave *idt_bdw;
+       struct nfit_flush *nfit_flush;
+       struct list_head list;
+       struct acpi_device *adev;
+       struct acpi_nfit_desc *acpi_desc;
+       struct resource *flush_wpq;
+       unsigned long dsm_mask;
+       int family;
+};
+
+struct acpi_nfit_desc {
+       struct nvdimm_bus_descriptor nd_desc;
+       struct acpi_table_header acpi_header;
+       struct mutex init_mutex;
+       struct list_head memdevs;
+       struct list_head flushes;
+       struct list_head dimms;
+       struct list_head spas;
+       struct list_head dcrs;
+       struct list_head bdws;
+       struct list_head idts;
+       struct nvdimm_bus *nvdimm_bus;
+       struct device *dev;
+       struct nd_cmd_ars_status *ars_status;
+       size_t ars_status_size;
+       struct work_struct work;
+       struct list_head list;
+       struct kernfs_node *scrub_count_state;
+       unsigned int scrub_count;
+       unsigned int cancel:1;
+       unsigned long dimm_cmd_force_en;
+       unsigned long bus_cmd_force_en;
+       int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+                       void *iobuf, u64 len, int rw);
+};
+
+enum nd_blk_mmio_selector {
+       BDW,
+       DCR,
+};
+
+struct nd_blk_addr {
+       union {
+               void __iomem *base;
+               void *aperture;
+       };
+};
+
+struct nfit_blk {
+       struct nfit_blk_mmio {
+               struct nd_blk_addr addr;
+               u64 size;
+               u64 base_offset;
+               u32 line_size;
+               u32 num_lines;
+               u32 table_size;
+               struct acpi_nfit_interleave *idt;
+               struct acpi_nfit_system_address *spa;
+       } mmio[2];
+       struct nd_region *nd_region;
+       u64 bdw_offset; /* post interleave offset */
+       u64 stat_offset;
+       u64 cmd_offset;
+       u32 dimm_flags;
+};
+
+extern struct list_head acpi_descs;
+extern struct mutex acpi_desc_lock;
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
+
+#ifdef CONFIG_X86_MCE
+void nfit_mce_register(void);
+void nfit_mce_unregister(void);
+#else
+static inline void nfit_mce_register(void)
+{
+}
+static inline void nfit_mce_unregister(void)
+{
+}
+#endif
+
+int nfit_spa_type(struct acpi_nfit_system_address *spa);
+
+static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
+               struct nfit_mem *nfit_mem)
+{
+       if (nfit_mem->memdev_dcr)
+               return nfit_mem->memdev_dcr;
+       return nfit_mem->memdev_pmem;
+}
+
+static inline struct acpi_nfit_desc *to_acpi_desc(
+               struct nvdimm_bus_descriptor *nd_desc)
+{
+       return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+const u8 *to_nfit_uuid(enum nfit_uuids id);
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
+void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
+#endif /* __NFIT_H__ */
diff --git a/drivers/block/brd.c b/drivers/block/brd.c

index ba5145d384d8013df86b1f36047303df26708518..3022dad240719138d0a0aca0f7b6a7d2486f862a 100644 (file)
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -379,7 +379,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
  
  #ifdef CONFIG_BLK_DEV_RAM_DAX
  static long brd_direct_access(struct block_device *bdev, sector_t sector,
-                       void __pmem **kaddr, pfn_t *pfn, long size)
+                       void **kaddr, pfn_t *pfn, long size)
  {
         struct brd_device *brd = bdev->bd_disk->private_data;
         struct page *page;
@@ -389,7 +389,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
         page = brd_insert_page(brd, sector);
         if (!page)
                 return -ENOSPC;
-       *kaddr = (void __pmem *)page_address(page);
+       *kaddr = page_address(page);
         *pfn = page_to_pfn_t(page);
  
         return PAGE_SIZE;
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c

index b891a129b275d56985a436bb30bf5486a5b81ec7..803f3953b341a42aa47adcc4f8405f5dfa501a06 100644 (file)
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -211,11 +211,9 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
         }
         dax_dev->dev = dev;
  
-       rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
-       if (rc) {
-               unregister_dax_dev(dev);
+       rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
+       if (rc)
                 return rc;
-       }
  
         return 0;
  
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c

index 55d510e36cd1bd2d9236dd6f00d34db640dbb171..dfb168568af1a6d2ba163a7010d611ec0ad3a3f0 100644 (file)
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -102,21 +102,19 @@ static int dax_pmem_probe(struct device *dev)
         if (rc)
                 return rc;
  
-       rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
-       if (rc) {
-               dax_pmem_percpu_exit(&dax_pmem->ref);
+       rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit,
+                                                       &dax_pmem->ref);
+       if (rc)
                 return rc;
-       }
  
         addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
         if (IS_ERR(addr))
                 return PTR_ERR(addr);
  
-       rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
-       if (rc) {
-               dax_pmem_percpu_kill(&dax_pmem->ref);
+       rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
+                                                       &dax_pmem->ref);
+       if (rc)
                 return rc;
-       }
  
         nd_region = to_nd_region(dev->parent);
         dax_region = alloc_dax_region(dev, nd_region->id, &res,
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c

index 6d35dd4e9efbe45384dca916bf447396a30fe02e..4788b0b989a9bac661f07a8deb2c7a86a96c8677 100644 (file)
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -142,7 +142,7 @@ static int linear_iterate_devices(struct dm_target *ti,
  }
  
  static long linear_direct_access(struct dm_target *ti, sector_t sector,
-                                void __pmem **kaddr, pfn_t *pfn, long size)
+                                void **kaddr, pfn_t *pfn, long size)
  {
         struct linear_c *lc = ti->private;
         struct block_device *bdev = lc->dev->bdev;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c

index 731e1f5bd89574deb711419ebe8eaf7a544e605d..ce2a910709f722ce065e365fd53a9ee326b6477e 100644 (file)
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2303,7 +2303,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
  }
  
  static long origin_direct_access(struct dm_target *ti, sector_t sector,
-               void __pmem **kaddr, pfn_t *pfn, long size)
+               void **kaddr, pfn_t *pfn, long size)
  {
         DMWARN("device does not support dax.");
         return -EIO;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c

index 01bb9cf2a8c2318e1b5cf704728637acf95665c9..83f1d46671953323bd7390d57a6eef4099b61d5d 100644 (file)
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -309,7 +309,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
  }
  
  static long stripe_direct_access(struct dm_target *ti, sector_t sector,
-                                void __pmem **kaddr, pfn_t *pfn, long size)
+                                void **kaddr, pfn_t *pfn, long size)
  {
         struct stripe_c *sc = ti->private;
         uint32_t stripe;
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c

index 6eecd6b36f768fb0ea4e056b5742afce02089e28..710ae28fd618256ea0b1da6fc34c40ae6e473066 100644 (file)
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -149,7 +149,7 @@ static void io_err_release_clone_rq(struct request *clone)
  }
  
  static long io_err_direct_access(struct dm_target *ti, sector_t sector,
-                                void __pmem **kaddr, pfn_t *pfn, long size)
+                                void **kaddr, pfn_t *pfn, long size)
  {
         return -EIO;
  }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c

index ceb69fc0b10b32773bd41d3c0b8beadf442571f7..25d1d97154a8b68c0f847acdfdc74cca4aa711f1 100644 (file)
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -906,7 +906,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
  EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
  
  static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
-                                void __pmem **kaddr, pfn_t *pfn, long size)
+                                void **kaddr, pfn_t *pfn, long size)
  {
         struct mapped_device *md = bdev->bd_disk->private_data;
         struct dm_table *map;
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig

index 7c8a3bf078846ac0bb1410f93e5a91ff3ca4c985..124c2432ac9cb3d6e0a696023507f5131774c282 100644 (file)
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -1,6 +1,7 @@
  menuconfig LIBNVDIMM
         tristate "NVDIMM (Non-Volatile Memory Device) Support"
         depends on PHYS_ADDR_T_64BIT
+       depends on HAS_IOMEM
         depends on BLK_DEV
         help
           Generic support for non-volatile memory devices including
@@ -19,7 +20,6 @@ if LIBNVDIMM
  config BLK_DEV_PMEM
         tristate "PMEM: Persistent memory block device support"
         default LIBNVDIMM
-       depends on HAS_IOMEM
         select ND_BTT if BTT
         select ND_PFN if NVDIMM_PFN
         help
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c

index 7e262ef06ede793ebb378770977fd12fefaf79a6..9faaa9694d8741adb64b5aae8754f44d24a01e13 100644 (file)
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -267,10 +267,8 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
         q = blk_alloc_queue(GFP_KERNEL);
         if (!q)
                 return -ENOMEM;
-       if (devm_add_action(dev, nd_blk_release_queue, q)) {
-               blk_cleanup_queue(q);
+       if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
                 return -ENOMEM;
-       }
  
         blk_queue_make_request(q, nd_blk_make_request);
         blk_queue_max_hw_sectors(q, UINT_MAX);
@@ -282,10 +280,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
         disk = alloc_disk(0);
         if (!disk)
                 return -ENOMEM;
-       if (devm_add_action(dev, nd_blk_release_disk, disk)) {
-               put_disk(disk);
-               return -ENOMEM;
-       }
  
         disk->first_minor       = 0;
         disk->fops              = &nd_blk_fops;
@@ -295,6 +289,9 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
         set_capacity(disk, 0);
         device_add_disk(dev, disk);
  
+       if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
+               return -ENOMEM;
+
         if (nsblk_meta_size(nsblk)) {
                 int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
  
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c

index 816d0dae63983c8ccf2c4f288e2d841dc74d843d..3fa7919f94a8785860afd3487d803f5b3010acd9 100644 (file)
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -198,8 +198,7 @@ struct device *nd_btt_create(struct nd_region *nd_region)
  {
         struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
  
-       if (dev)
-               __nd_device_register(dev);
+       __nd_device_register(dev);
         return dev;
  }
  
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c

index 5e4e5c772ea54ff9f5696a1f7d2816d0ebff4d5c..458daf9273362a19cc26d6a4f2c9113764f493b8 100644 (file)
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -31,6 +31,7 @@
  int nvdimm_major;
  static int nvdimm_bus_major;
  static struct class *nd_class;
+static DEFINE_IDA(nd_ida);
  
  static int to_nd_device_type(struct device *dev)
  {
@@ -60,20 +61,13 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
                         to_nd_device_type(dev));
  }
  
-static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
-{
-       struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
-
-       return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
-}
-
  static struct module *to_bus_provider(struct device *dev)
  {
         /* pin bus providers while regions are enabled */
         if (is_nd_pmem(dev) || is_nd_blk(dev)) {
                 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
  
-               return nvdimm_bus->module;
+               return nvdimm_bus->nd_desc->module;
         }
         return NULL;
  }
@@ -136,6 +130,21 @@ static int nvdimm_bus_remove(struct device *dev)
         return rc;
  }
  
+static void nvdimm_bus_shutdown(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+       struct nd_device_driver *nd_drv = NULL;
+
+       if (dev->driver)
+               nd_drv = to_nd_device_driver(dev->driver);
+
+       if (nd_drv && nd_drv->shutdown) {
+               nd_drv->shutdown(dev);
+               dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n",
+                               dev->driver->name, dev_name(dev));
+       }
+}
+
  void nd_device_notify(struct device *dev, enum nvdimm_event event)
  {
         device_lock(dev);
@@ -208,14 +217,187 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
  }
  EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
  
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);
+
  static struct bus_type nvdimm_bus_type = {
         .name = "nd",
         .uevent = nvdimm_bus_uevent,
         .match = nvdimm_bus_match,
         .probe = nvdimm_bus_probe,
         .remove = nvdimm_bus_remove,
+       .shutdown = nvdimm_bus_shutdown,
+};
+
+static void nvdimm_bus_release(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus;
+
+       nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+       ida_simple_remove(&nd_ida, nvdimm_bus->id);
+       kfree(nvdimm_bus);
+}
+
+static bool is_nvdimm_bus(struct device *dev)
+{
+       return dev->release == nvdimm_bus_release;
+}
+
+struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+{
+       struct device *dev;
+
+       for (dev = nd_dev; dev; dev = dev->parent)
+               if (is_nvdimm_bus(dev))
+                       break;
+       dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
+       if (dev)
+               return to_nvdimm_bus(dev);
+       return NULL;
+}
+
+struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus;
+
+       nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+       WARN_ON(!is_nvdimm_bus(dev));
+       return nvdimm_bus;
+}
+EXPORT_SYMBOL_GPL(to_nvdimm_bus);
+
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+               struct nvdimm_bus_descriptor *nd_desc)
+{
+       struct nvdimm_bus *nvdimm_bus;
+       int rc;
+
+       nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
+       if (!nvdimm_bus)
+               return NULL;
+       INIT_LIST_HEAD(&nvdimm_bus->list);
+       INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
+       INIT_LIST_HEAD(&nvdimm_bus->poison_list);
+       init_waitqueue_head(&nvdimm_bus->probe_wait);
+       nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
+       mutex_init(&nvdimm_bus->reconfig_mutex);
+       if (nvdimm_bus->id < 0) {
+               kfree(nvdimm_bus);
+               return NULL;
+       }
+       nvdimm_bus->nd_desc = nd_desc;
+       nvdimm_bus->dev.parent = parent;
+       nvdimm_bus->dev.release = nvdimm_bus_release;
+       nvdimm_bus->dev.groups = nd_desc->attr_groups;
+       nvdimm_bus->dev.bus = &nvdimm_bus_type;
+       dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
+       rc = device_register(&nvdimm_bus->dev);
+       if (rc) {
+               dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
+               goto err;
+       }
+
+       return nvdimm_bus;
+ err:
+       put_device(&nvdimm_bus->dev);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_register);
+
+void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
+{
+       if (!nvdimm_bus)
+               return;
+       device_unregister(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
+
+static int child_unregister(struct device *dev, void *data)
+{
+       /*
+        * the singular ndctl class device per bus needs to be
+        * "device_destroy"ed, so skip it here
+        *
+        * i.e. remove classless children
+        */
+       if (dev->class)
+               /* pass */;
+       else
+               nd_device_unregister(dev, ND_SYNC);
+       return 0;
+}
+
+static void free_poison_list(struct list_head *poison_list)
+{
+       struct nd_poison *pl, *next;
+
+       list_for_each_entry_safe(pl, next, poison_list, list) {
+               list_del(&pl->list);
+               kfree(pl);
+       }
+       list_del_init(poison_list);
+}
+
+static int nd_bus_remove(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+       mutex_lock(&nvdimm_bus_list_mutex);
+       list_del_init(&nvdimm_bus->list);
+       mutex_unlock(&nvdimm_bus_list_mutex);
+
+       nd_synchronize();
+       device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
+
+       nvdimm_bus_lock(&nvdimm_bus->dev);
+       free_poison_list(&nvdimm_bus->poison_list);
+       nvdimm_bus_unlock(&nvdimm_bus->dev);
+
+       nvdimm_bus_destroy_ndctl(nvdimm_bus);
+
+       return 0;
+}
+
+static int nd_bus_probe(struct device *dev)
+{
+       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+       int rc;
+
+       rc = nvdimm_bus_create_ndctl(nvdimm_bus);
+       if (rc)
+               return rc;
+
+       mutex_lock(&nvdimm_bus_list_mutex);
+       list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
+       mutex_unlock(&nvdimm_bus_list_mutex);
+
+       /* enable bus provider attributes to look up their local context */
+       dev_set_drvdata(dev, nvdimm_bus->nd_desc);
+
+       return 0;
+}
+
+static struct nd_device_driver nd_bus_driver = {
+       .probe = nd_bus_probe,
+       .remove = nd_bus_remove,
+       .drv = {
+               .name = "nd_bus",
+               .suppress_bind_attrs = true,
+               .bus = &nvdimm_bus_type,
+               .owner = THIS_MODULE,
+               .mod_name = KBUILD_MODNAME,
+       },
  };
  
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
+{
+       struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
+
+       if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver)
+               return true;
+
+       return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
+}
+
  static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
  
  void nd_synchronize(void)
@@ -395,12 +577,10 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
         dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
                         "ndctl%d", nvdimm_bus->id);
  
-       if (IS_ERR(dev)) {
+       if (IS_ERR(dev))
                 dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
                                 nvdimm_bus->id, PTR_ERR(dev));
-               return PTR_ERR(dev);
-       }
-       return 0;
+       return PTR_ERR_OR_ZERO(dev);
  }
  
  void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
@@ -850,8 +1030,14 @@ int __init nvdimm_bus_init(void)
                 goto err_class;
         }
  
+       rc = driver_register(&nd_bus_driver.drv);
+       if (rc)
+               goto err_nd_bus;
+
         return 0;
  
+ err_nd_bus:
+       class_destroy(nd_class);
   err_class:
         unregister_chrdev(nvdimm_major, "dimmctl");
   err_dimm_chrdev:
@@ -864,8 +1050,10 @@ int __init nvdimm_bus_init(void)
  
  void nvdimm_bus_exit(void)
  {
+       driver_unregister(&nd_bus_driver.drv);
         class_destroy(nd_class);
         unregister_chrdev(nvdimm_bus_major, "ndctl");
         unregister_chrdev(nvdimm_major, "dimmctl");
         bus_unregister(&nvdimm_bus_type);
+       ida_destroy(&nd_ida);
  }
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c

index 8b2e3c4fb0add718d2dbf167b499516a94612c80..d5dc80c48b4cb36a55c54a2383ce9812ae068260 100644 (file)
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -240,7 +240,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
                 return memcpy_from_pmem(buf, nsio->addr + offset, size);
         } else {
                 memcpy_to_pmem(nsio->addr + offset, buf, size);
-               wmb_pmem();
+               nvdimm_flush(to_nd_region(ndns->dev.parent));
         }
  
         return 0;
@@ -266,9 +266,8 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
  
         nsio->addr = devm_memremap(dev, res->start, resource_size(res),
                         ARCH_MEMREMAP_PMEM);
-       if (IS_ERR(nsio->addr))
-               return PTR_ERR(nsio->addr);
-       return 0;
+
+       return PTR_ERR_OR_ZERO(nsio->addr);
  }
  EXPORT_SYMBOL_GPL(devm_nsio_enable);
  
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c

index be89764315c2b54d9bbe9548fc3f6183d6032ea4..715583f69d28ae2f413768c81c141e1dd3019d75 100644 (file)
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -20,12 +20,12 @@
  #include <linux/ndctl.h>
  #include <linux/mutex.h>
  #include <linux/slab.h>
+#include <linux/io.h>
  #include "nd-core.h"
  #include "nd.h"
  
  LIST_HEAD(nvdimm_bus_list);
  DEFINE_MUTEX(nvdimm_bus_list_mutex);
-static DEFINE_IDA(nd_ida);
  
  void nvdimm_bus_lock(struct device *dev)
  {
@@ -57,6 +57,127 @@ bool is_nvdimm_bus_locked(struct device *dev)
  }
  EXPORT_SYMBOL(is_nvdimm_bus_locked);
  
+struct nvdimm_map {
+       struct nvdimm_bus *nvdimm_bus;
+       struct list_head list;
+       resource_size_t offset;
+       unsigned long flags;
+       size_t size;
+       union {
+               void *mem;
+               void __iomem *iomem;
+       };
+       struct kref kref;
+};
+
+static struct nvdimm_map *find_nvdimm_map(struct device *dev,
+               resource_size_t offset)
+{
+       struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+       struct nvdimm_map *nvdimm_map;
+
+       list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list)
+               if (nvdimm_map->offset == offset)
+                       return nvdimm_map;
+       return NULL;
+}
+
+static struct nvdimm_map *alloc_nvdimm_map(struct device *dev,
+               resource_size_t offset, size_t size, unsigned long flags)
+{
+       struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+       struct nvdimm_map *nvdimm_map;
+
+       nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL);
+       if (!nvdimm_map)
+               return NULL;
+
+       INIT_LIST_HEAD(&nvdimm_map->list);
+       nvdimm_map->nvdimm_bus = nvdimm_bus;
+       nvdimm_map->offset = offset;
+       nvdimm_map->flags = flags;
+       nvdimm_map->size = size;
+       kref_init(&nvdimm_map->kref);
+
+       if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev)))
+               goto err_request_region;
+
+       if (flags)
+               nvdimm_map->mem = memremap(offset, size, flags);
+       else
+               nvdimm_map->iomem = ioremap(offset, size);
+
+       if (!nvdimm_map->mem)
+               goto err_map;
+
+       dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!",
+                       __func__);
+       list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list);
+
+       return nvdimm_map;
+
+ err_map:
+       release_mem_region(offset, size);
+ err_request_region:
+       kfree(nvdimm_map);
+       return NULL;
+}
+
+static void nvdimm_map_release(struct kref *kref)
+{
+       struct nvdimm_bus *nvdimm_bus;
+       struct nvdimm_map *nvdimm_map;
+
+       nvdimm_map = container_of(kref, struct nvdimm_map, kref);
+       nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+       dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
+       list_del(&nvdimm_map->list);
+       if (nvdimm_map->flags)
+               memunmap(nvdimm_map->mem);
+       else
+               iounmap(nvdimm_map->iomem);
+       release_mem_region(nvdimm_map->offset, nvdimm_map->size);
+       kfree(nvdimm_map);
+}
+
+static void nvdimm_map_put(void *data)
+{
+       struct nvdimm_map *nvdimm_map = data;
+       struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+       nvdimm_bus_lock(&nvdimm_bus->dev);
+       kref_put(&nvdimm_map->kref, nvdimm_map_release);
+       nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+
+/**
+ * devm_nvdimm_memremap - map a resource that is shared across regions
+ * @dev: device that will own a reference to the shared mapping
+ * @offset: physical base address of the mapping
+ * @size: mapping size
+ * @flags: memremap flags, or, if zero, perform an ioremap instead
+ */
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+               size_t size, unsigned long flags)
+{
+       struct nvdimm_map *nvdimm_map;
+
+       nvdimm_bus_lock(dev);
+       nvdimm_map = find_nvdimm_map(dev, offset);
+       if (!nvdimm_map)
+               nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags);
+       else
+               kref_get(&nvdimm_map->kref);
+       nvdimm_bus_unlock(dev);
+
+       if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map))
+               return NULL;
+
+       return nvdimm_map->mem;
+}
+EXPORT_SYMBOL_GPL(devm_nvdimm_memremap);
+
  u64 nd_fletcher64(void *addr, size_t len, bool le)
  {
         u32 *buf = addr;
@@ -73,25 +194,6 @@ u64 nd_fletcher64(void *addr, size_t len, bool le)
  }
  EXPORT_SYMBOL_GPL(nd_fletcher64);
  
-static void nvdimm_bus_release(struct device *dev)
-{
-       struct nvdimm_bus *nvdimm_bus;
-
-       nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
-       ida_simple_remove(&nd_ida, nvdimm_bus->id);
-       kfree(nvdimm_bus);
-}
-
-struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
-{
-       struct nvdimm_bus *nvdimm_bus;
-
-       nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
-       WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
-       return nvdimm_bus;
-}
-EXPORT_SYMBOL_GPL(to_nvdimm_bus);
-
  struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
  {
         /* struct nvdimm_bus definition is private to libnvdimm */
@@ -99,18 +201,12 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
  }
  EXPORT_SYMBOL_GPL(to_nd_desc);
  
-struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
  {
-       struct device *dev;
-
-       for (dev = nd_dev; dev; dev = dev->parent)
-               if (dev->release == nvdimm_bus_release)
-                       break;
-       dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
-       if (dev)
-               return to_nvdimm_bus(dev);
-       return NULL;
+       /* struct nvdimm_bus definition is private to libnvdimm */
+       return &nvdimm_bus->dev;
  }
+EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);
  
  static bool is_uuid_sep(char sep)
  {
@@ -325,51 +421,6 @@ struct attribute_group nvdimm_bus_attribute_group = {
  };
  EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
  
-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
-               struct nvdimm_bus_descriptor *nd_desc, struct module *module)
-{
-       struct nvdimm_bus *nvdimm_bus;
-       int rc;
-
-       nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
-       if (!nvdimm_bus)
-               return NULL;
-       INIT_LIST_HEAD(&nvdimm_bus->list);
-       INIT_LIST_HEAD(&nvdimm_bus->poison_list);
-       init_waitqueue_head(&nvdimm_bus->probe_wait);
-       nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
-       mutex_init(&nvdimm_bus->reconfig_mutex);
-       if (nvdimm_bus->id < 0) {
-               kfree(nvdimm_bus);
-               return NULL;
-       }
-       nvdimm_bus->nd_desc = nd_desc;
-       nvdimm_bus->module = module;
-       nvdimm_bus->dev.parent = parent;
-       nvdimm_bus->dev.release = nvdimm_bus_release;
-       nvdimm_bus->dev.groups = nd_desc->attr_groups;
-       dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
-       rc = device_register(&nvdimm_bus->dev);
-       if (rc) {
-               dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
-               goto err;
-       }
-
-       rc = nvdimm_bus_create_ndctl(nvdimm_bus);
-       if (rc)
-               goto err;
-
-       mutex_lock(&nvdimm_bus_list_mutex);
-       list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
-       mutex_unlock(&nvdimm_bus_list_mutex);
-
-       return nvdimm_bus;
- err:
-       put_device(&nvdimm_bus->dev);
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
-
  static void set_badblock(struct badblocks *bb, sector_t s, int num)
  {
         dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
@@ -545,54 +596,6 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
  }
  EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
  
-static void free_poison_list(struct list_head *poison_list)
-{
-       struct nd_poison *pl, *next;
-
-       list_for_each_entry_safe(pl, next, poison_list, list) {
-               list_del(&pl->list);
-               kfree(pl);
-       }
-       list_del_init(poison_list);
-}
-
-static int child_unregister(struct device *dev, void *data)
-{
-       /*
-        * the singular ndctl class device per bus needs to be
-        * "device_destroy"ed, so skip it here
-        *
-        * i.e. remove classless children
-        */
-       if (dev->class)
-               /* pass */;
-       else
-               nd_device_unregister(dev, ND_SYNC);
-       return 0;
-}
-
-void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
-{
-       if (!nvdimm_bus)
-               return;
-
-       mutex_lock(&nvdimm_bus_list_mutex);
-       list_del_init(&nvdimm_bus->list);
-       mutex_unlock(&nvdimm_bus_list_mutex);
-
-       nd_synchronize();
-       device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
-
-       nvdimm_bus_lock(&nvdimm_bus->dev);
-       free_poison_list(&nvdimm_bus->poison_list);
-       nvdimm_bus_unlock(&nvdimm_bus->dev);
-
-       nvdimm_bus_destroy_ndctl(nvdimm_bus);
-
-       device_unregister(&nvdimm_bus->dev);
-}
-EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
-
  #ifdef CONFIG_BLK_DEV_INTEGRITY
  int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
  {
@@ -601,7 +604,8 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
         if (meta_size == 0)
                 return 0;
  
-       bi.profile = NULL;
+       memset(&bi, 0, sizeof(bi));
+
         bi.tuple_size = meta_size;
         bi.tag_size = meta_size;
  
@@ -650,7 +654,6 @@ static __exit void libnvdimm_exit(void)
         nvdimm_bus_exit();
         nd_region_devs_exit();
         nvdimm_devs_exit();
-       ida_destroy(&nd_ida);
  }
  
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c

index bbde28d3dec5cd4bbbb283847d2f736bc81148c9..d9bba5edd8dcf0646cad13a0160534648c52ce8d 100644 (file)
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -346,7 +346,8 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
  
  struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
                 const struct attribute_group **groups, unsigned long flags,
-               unsigned long cmd_mask)
+               unsigned long cmd_mask, int num_flush,
+               struct resource *flush_wpq)
  {
         struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
         struct device *dev;
@@ -362,6 +363,8 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
         nvdimm->provider_data = provider_data;
         nvdimm->flags = flags;
         nvdimm->cmd_mask = cmd_mask;
+       nvdimm->num_flush = num_flush;
+       nvdimm->flush_wpq = flush_wpq;
         atomic_set(&nvdimm->busy, 0);
         dev = &nvdimm->dev;
         dev_set_name(dev, "nmem%d", nvdimm->id);
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c

index 95825b38559addb8a0dc5cca22bc305e228e65f6..11ea90120542dcbec8410147efca7a502eea9a77 100644 (file)
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -47,6 +47,7 @@ static int e820_pmem_probe(struct platform_device *pdev)
  
         nd_desc.attr_groups = e820_pmem_attribute_groups;
         nd_desc.provider_name = "e820";
+       nd_desc.module = THIS_MODULE;
         nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
         if (!nvdimm_bus)
                 goto err;
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h

index 284cdaa268cfd8132bc7fcd7299d66eaa4256217..38ce6bbbc170bcdc40982d5f49473bab181e2071 100644 (file)
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -26,11 +26,11 @@ extern int nvdimm_major;
  struct nvdimm_bus {
         struct nvdimm_bus_descriptor *nd_desc;
         wait_queue_head_t probe_wait;
-       struct module *module;
         struct list_head list;
         struct device dev;
         int id, probe_active;
         struct list_head poison_list;
+       struct list_head mapping_list;
         struct mutex reconfig_mutex;
  };
  
@@ -40,7 +40,8 @@ struct nvdimm {
         unsigned long cmd_mask;
         struct device dev;
         atomic_t busy;
-       int id;
+       int id, num_flush;
+       struct resource *flush_wpq;
  };
  
  bool is_nvdimm(struct device *dev);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h

index d0ac93c31dda6adcd01a4be288b3963d5f09f084..40476399d22793aece0438da0f5a0976cef063ab 100644 (file)
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -49,9 +49,11 @@ struct nvdimm_drvdata {
         struct kref kref;
  };
  
-struct nd_region_namespaces {
-       int count;
-       int active;
+struct nd_region_data {
+       int ns_count;
+       int ns_active;
+       unsigned int flush_mask;
+       void __iomem *flush_wpq[0][0];
  };
  
  static inline struct nd_namespace_index *to_namespace_index(
@@ -119,7 +121,6 @@ struct nd_region {
  
  struct nd_blk_region {
         int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-       void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
         int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
                         void *iobuf, u64 len, int rw);
         void *blk_provider_data;
@@ -325,6 +326,7 @@ static inline void devm_nsio_disable(struct device *dev,
  }
  #endif
  int nd_blk_region_init(struct nd_region *nd_region);
+int nd_region_activate(struct nd_region *nd_region);
  void __nd_iostat_start(struct bio *bio, unsigned long *start);
  static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
  {
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c

index 36cb39047d5b77b17eb3d04980b82b36980e7ce4..b511099457db5cbb155e5b6270bc9bb8a3677cc6 100644 (file)
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -29,27 +29,28 @@
  #include <linux/slab.h>
  #include <linux/pmem.h>
  #include <linux/nd.h>
+#include "pmem.h"
  #include "pfn.h"
  #include "nd.h"
  
-struct pmem_device {
-       /* One contiguous memory region per device */
-       phys_addr_t             phys_addr;
-       /* when non-zero this device is hosting a 'pfn' instance */
-       phys_addr_t             data_offset;
-       u64                     pfn_flags;
-       void __pmem             *virt_addr;
-       /* immutable base size of the namespace */
-       size_t                  size;
-       /* trim size when namespace capacity has been section aligned */
-       u32                     pfn_pad;
-       struct badblocks        bb;
-};
+static struct device *to_dev(struct pmem_device *pmem)
+{
+       /*
+        * nvdimm bus services need a 'dev' parameter, and we record the device
+        * at init in bb.dev.
+        */
+       return pmem->bb.dev;
+}
+
+static struct nd_region *to_region(struct pmem_device *pmem)
+{
+       return to_nd_region(to_dev(pmem)->parent);
+}
  
  static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
                 unsigned int len)
  {
-       struct device *dev = pmem->bb.dev;
+       struct device *dev = to_dev(pmem);
         sector_t sector;
         long cleared;
  
@@ -57,7 +58,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
         cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
  
         if (cleared > 0 && cleared / 512) {
-               dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
+               dev_dbg(dev, "%s: %#llx clear %ld sector%s\n",
                                 __func__, (unsigned long long) sector,
                                 cleared / 512, cleared / 512 > 1 ? "s" : "");
                 badblocks_clear(&pmem->bb, sector, cleared / 512);
@@ -73,7 +74,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
         bool bad_pmem = false;
         void *mem = kmap_atomic(page);
         phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
-       void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
+       void *pmem_addr = pmem->virt_addr + pmem_off;
  
         if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
                 bad_pmem = true;
@@ -112,6 +113,11 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
         return rc;
  }
  
+/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
+#ifndef REQ_FLUSH
+#define REQ_FLUSH REQ_PREFLUSH
+#endif
+
  static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
  {
         int rc = 0;
@@ -120,6 +126,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
         struct bio_vec bvec;
         struct bvec_iter iter;
         struct pmem_device *pmem = q->queuedata;
+       struct nd_region *nd_region = to_region(pmem);
+
+       if (bio->bi_rw & REQ_FLUSH)
+               nvdimm_flush(nd_region);
  
         do_acct = nd_iostat_start(bio, &start);
         bio_for_each_segment(bvec, bio, iter) {
@@ -134,8 +144,8 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
         if (do_acct)
                 nd_iostat_end(bio, start);
  
-       if (bio_data_dir(bio))
-               wmb_pmem();
+       if (bio->bi_rw & REQ_FUA)
+               nvdimm_flush(nd_region);
  
         bio_endio(bio);
         return BLK_QC_T_NONE;
@@ -148,8 +158,6 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
         int rc;
  
         rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
-       if (rw & WRITE)
-               wmb_pmem();
  
         /*
          * The ->rw_page interface is subtle and tricky.  The core
@@ -163,8 +171,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
         return rc;
  }
  
-static long pmem_direct_access(struct block_device *bdev, sector_t sector,
-                     void __pmem **kaddr, pfn_t *pfn, long size)
+/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
+__weak long pmem_direct_access(struct block_device *bdev, sector_t sector,
+                     void **kaddr, pfn_t *pfn, long size)
  {
         struct pmem_device *pmem = bdev->bd_queue->queuedata;
         resource_size_t offset = sector * 512 + pmem->data_offset;
@@ -195,7 +204,7 @@ static void pmem_release_queue(void *q)
         blk_cleanup_queue(q);
  }
  
-void pmem_release_disk(void *disk)
+static void pmem_release_disk(void *disk)
  {
         del_gendisk(disk);
         put_disk(disk);
@@ -205,6 +214,7 @@ static int pmem_attach_disk(struct device *dev,
                 struct nd_namespace_common *ndns)
  {
         struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+       struct nd_region *nd_region = to_nd_region(dev->parent);
         struct vmem_altmap __altmap, *altmap = NULL;
         struct resource *res = &nsio->res;
         struct nd_pfn *nd_pfn = NULL;
@@ -234,7 +244,7 @@ static int pmem_attach_disk(struct device *dev,
         dev_set_drvdata(dev, pmem);
         pmem->phys_addr = res->start;
         pmem->size = resource_size(res);
-       if (!arch_has_wmb_pmem())
+       if (nvdimm_has_flush(nd_region) < 0)
                 dev_warn(dev, "unable to guarantee persistence of writes\n");
  
         if (!devm_request_mem_region(dev, res->start, resource_size(res),
@@ -269,15 +279,14 @@ static int pmem_attach_disk(struct device *dev,
          * At release time the queue must be dead before
          * devm_memremap_pages is unwound
          */
-       if (devm_add_action(dev, pmem_release_queue, q)) {
-               blk_cleanup_queue(q);
+       if (devm_add_action_or_reset(dev, pmem_release_queue, q))
                 return -ENOMEM;
-       }
  
         if (IS_ERR(addr))
                 return PTR_ERR(addr);
-       pmem->virt_addr = (void __pmem *) addr;
+       pmem->virt_addr = addr;
  
+       blk_queue_write_cache(q, true, true);
         blk_queue_make_request(q, pmem_make_request);
         blk_queue_physical_block_size(q, PAGE_SIZE);
         blk_queue_max_hw_sectors(q, UINT_MAX);
@@ -289,10 +298,6 @@ static int pmem_attach_disk(struct device *dev,
         disk = alloc_disk_node(0, nid);
         if (!disk)
                 return -ENOMEM;
-       if (devm_add_action(dev, pmem_release_disk, disk)) {
-               put_disk(disk);
-               return -ENOMEM;
-       }
  
         disk->fops              = &pmem_fops;
         disk->queue             = q;
@@ -302,9 +307,13 @@ static int pmem_attach_disk(struct device *dev,
                         / 512);
         if (devm_init_badblocks(dev, &pmem->bb))
                 return -ENOMEM;
-       nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
+       nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
         disk->bb = &pmem->bb;
         device_add_disk(dev, disk);
+
+       if (devm_add_action_or_reset(dev, pmem_release_disk, disk))
+               return -ENOMEM;
+
         revalidate_disk(disk);
  
         return 0;
@@ -340,13 +349,20 @@ static int nd_pmem_remove(struct device *dev)
  {
         if (is_nd_btt(dev))
                 nvdimm_namespace_detach_btt(to_nd_btt(dev));
+       nvdimm_flush(to_nd_region(dev->parent));
+
         return 0;
  }
  
+static void nd_pmem_shutdown(struct device *dev)
+{
+       nvdimm_flush(to_nd_region(dev->parent));
+}
+
  static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
  {
-       struct nd_region *nd_region = to_nd_region(dev->parent);
         struct pmem_device *pmem = dev_get_drvdata(dev);
+       struct nd_region *nd_region = to_region(pmem);
         resource_size_t offset = 0, end_trunc = 0;
         struct nd_namespace_common *ndns;
         struct nd_namespace_io *nsio;
@@ -382,6 +398,7 @@ static struct nd_device_driver nd_pmem_driver = {
         .probe = nd_pmem_probe,
         .remove = nd_pmem_remove,
         .notify = nd_pmem_notify,
+       .shutdown = nd_pmem_shutdown,
         .drv = {
                 .name = "nd_pmem",
         },
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h

new file mode 100644 (file)

index 0000000..b4ee4f7
--- /dev/null
+++ b/drivers/nvdimm/pmem.h
@@ -0,0 +1,24 @@
+#ifndef __NVDIMM_PMEM_H__
+#define __NVDIMM_PMEM_H__
+#include <linux/badblocks.h>
+#include <linux/types.h>
+#include <linux/pfn_t.h>
+#include <linux/fs.h>
+
+long pmem_direct_access(struct block_device *bdev, sector_t sector,
+                     void **kaddr, pfn_t *pfn, long size);
+/* this definition is in it's own header for tools/testing/nvdimm to consume */
+struct pmem_device {
+       /* One contiguous memory region per device */
+       phys_addr_t             phys_addr;
+       /* when non-zero this device is hosting a 'pfn' instance */
+       phys_addr_t             data_offset;
+       u64                     pfn_flags;
+       void                    *virt_addr;
+       /* immutable base size of the namespace */
+       size_t                  size;
+       /* trim size when namespace capacity has been section aligned */
+       u32                     pfn_pad;
+       struct badblocks        bb;
+};
+#endif /* __NVDIMM_PMEM_H__ */
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c

index 05a91235993969752a8e1edad056a036847923e5..8f241772ec0b24d0a2d297a7549ceac9e072872a 100644 (file)
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -20,7 +20,7 @@ static int nd_region_probe(struct device *dev)
  {
         int err, rc;
         static unsigned long once;
-       struct nd_region_namespaces *num_ns;
+       struct nd_region_data *ndrd;
         struct nd_region *nd_region = to_nd_region(dev);
  
         if (nd_region->num_lanes > num_online_cpus()
@@ -33,21 +33,21 @@ static int nd_region_probe(struct device *dev)
                                 nd_region->num_lanes);
         }
  
+       rc = nd_region_activate(nd_region);
+       if (rc)
+               return rc;
+
         rc = nd_blk_region_init(nd_region);
         if (rc)
                 return rc;
  
         rc = nd_region_register_namespaces(nd_region, &err);
-       num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
-       if (!num_ns)
-               return -ENOMEM;
-
         if (rc < 0)
                 return rc;
  
-       num_ns->active = rc;
-       num_ns->count = rc + err;
-       dev_set_drvdata(dev, num_ns);
+       ndrd = dev_get_drvdata(dev);
+       ndrd->ns_active = rc;
+       ndrd->ns_count = rc + err;
  
         if (rc && err && rc == err)
                 return -ENODEV;
@@ -82,6 +82,8 @@ static int nd_region_remove(struct device *dev)
  {
         struct nd_region *nd_region = to_nd_region(dev);
  
+       device_for_each_child(dev, NULL, child_unregister);
+
         /* flush attribute readers and disable */
         nvdimm_bus_lock(dev);
         nd_region->ns_seed = NULL;
@@ -91,7 +93,6 @@ static int nd_region_remove(struct device *dev)
         dev_set_drvdata(dev, NULL);
         nvdimm_bus_unlock(dev);
  
-       device_for_each_child(dev, NULL, child_unregister);
         return 0;
  }
  
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c

index 40fcfea26fbbc387a3ecbf235b1d4b011022f1aa..e8d5ba7b29af98f647b119640e79e581996cfdc0 100644 (file)
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -14,13 +14,97 @@
  #include <linux/highmem.h>
  #include <linux/sched.h>
  #include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/pmem.h>
  #include <linux/sort.h>
  #include <linux/io.h>
  #include <linux/nd.h>
  #include "nd-core.h"
  #include "nd.h"
  
+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <linux/io-64-nonatomic-hi-lo.h>
+
  static DEFINE_IDA(region_ida);
+static DEFINE_PER_CPU(int, flush_idx);
+
+static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
+               struct nd_region_data *ndrd)
+{
+       int i, j;
+
+       dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
+                       nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
+       for (i = 0; i < nvdimm->num_flush; i++) {
+               struct resource *res = &nvdimm->flush_wpq[i];
+               unsigned long pfn = PHYS_PFN(res->start);
+               void __iomem *flush_page;
+
+               /* check if flush hints share a page */
+               for (j = 0; j < i; j++) {
+                       struct resource *res_j = &nvdimm->flush_wpq[j];
+                       unsigned long pfn_j = PHYS_PFN(res_j->start);
+
+                       if (pfn == pfn_j)
+                               break;
+               }
+
+               if (j < i)
+                       flush_page = (void __iomem *) ((unsigned long)
+                                       ndrd->flush_wpq[dimm][j] & PAGE_MASK);
+               else
+                       flush_page = devm_nvdimm_ioremap(dev,
+                                       PHYS_PFN(pfn), PAGE_SIZE);
+               if (!flush_page)
+                       return -ENXIO;
+               ndrd->flush_wpq[dimm][i] = flush_page
+                       + (res->start & ~PAGE_MASK);
+       }
+
+       return 0;
+}
+
+int nd_region_activate(struct nd_region *nd_region)
+{
+       int i, num_flush = 0;
+       struct nd_region_data *ndrd;
+       struct device *dev = &nd_region->dev;
+       size_t flush_data_size = sizeof(void *);
+
+       nvdimm_bus_lock(&nd_region->dev);
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+               /* at least one null hint slot per-dimm for the "no-hint" case */
+               flush_data_size += sizeof(void *);
+               num_flush = min_not_zero(num_flush, nvdimm->num_flush);
+               if (!nvdimm->num_flush)
+                       continue;
+               flush_data_size += nvdimm->num_flush * sizeof(void *);
+       }
+       nvdimm_bus_unlock(&nd_region->dev);
+
+       ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
+       if (!ndrd)
+               return -ENOMEM;
+       dev_set_drvdata(dev, ndrd);
+
+       ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+               int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
+
+               if (rc)
+                       return rc;
+       }
+
+       return 0;
+}
  
  static void nd_region_release(struct device *dev)
  {
@@ -242,12 +326,12 @@ static DEVICE_ATTR_RO(available_size);
  static ssize_t init_namespaces_show(struct device *dev,
                 struct device_attribute *attr, char *buf)
  {
-       struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
+       struct nd_region_data *ndrd = dev_get_drvdata(dev);
         ssize_t rc;
  
         nvdimm_bus_lock(dev);
-       if (num_ns)
-               rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
+       if (ndrd)
+               rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
         else
                 rc = -ENXIO;
         nvdimm_bus_unlock(dev);
@@ -433,8 +517,6 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
  
                 if (is_nd_pmem(dev))
                         return;
-
-               to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
         }
         if (dev->parent && is_nd_blk(dev->parent) && probe) {
                 nd_region = to_nd_region(dev->parent);
@@ -698,7 +780,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
                 if (ndbr) {
                         nd_region = &ndbr->nd_region;
                         ndbr->enable = ndbr_desc->enable;
-                       ndbr->disable = ndbr_desc->disable;
                         ndbr->do_io = ndbr_desc->do_io;
                 }
                 region_buf = ndbr;
@@ -794,6 +875,67 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
  }
  EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
  
+/**
+ * nvdimm_flush - flush any posted write queues between the cpu and pmem media
+ * @nd_region: blk or interleaved pmem region
+ */
+void nvdimm_flush(struct nd_region *nd_region)
+{
+       struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+       int i, idx;
+
+       /*
+        * Try to encourage some diversity in flush hint addresses
+        * across cpus assuming a limited number of flush hints.
+        */
+       idx = this_cpu_read(flush_idx);
+       idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
+
+       /*
+        * The first wmb() is needed to 'sfence' all previous writes
+        * such that they are architecturally visible for the platform
+        * buffer flush.  Note that we've already arranged for pmem
+        * writes to avoid the cache via arch_memcpy_to_pmem().  The
+        * final wmb() ensures ordering for the NVDIMM flush write.
+        */
+       wmb();
+       for (i = 0; i < nd_region->ndr_mappings; i++)
+               if (ndrd->flush_wpq[i][0])
+                       writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
+       wmb();
+}
+EXPORT_SYMBOL_GPL(nvdimm_flush);
+
+/**
+ * nvdimm_has_flush - determine write flushing requirements
+ * @nd_region: blk or interleaved pmem region
+ *
+ * Returns 1 if writes require flushing
+ * Returns 0 if writes do not require flushing
+ * Returns -ENXIO if flushing capability can not be determined
+ */
+int nvdimm_has_flush(struct nd_region *nd_region)
+{
+       struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+       int i;
+
+       /* no nvdimm == flushing capability unknown */
+       if (nd_region->ndr_mappings == 0)
+               return -ENXIO;
+
+       for (i = 0; i < nd_region->ndr_mappings; i++)
+               /* flush hints present, flushing required */
+               if (ndrd->flush_wpq[i][0])
+                       return 1;
+
+       /*
+        * The platform defines dimm devices without hints, assume
+        * platform persistence mechanism like ADR
+        */
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_has_flush);
+
  void __exit nd_region_devs_exit(void)
  {
         ida_destroy(&region_ida);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c

index fac1b51ea0dee9a55738d030efed1aaf1a43e5bc..9d66b4fb174b80231a98737bf3f9fafeb4849c66 100644 (file)
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -31,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode);
  static blk_qc_t dcssblk_make_request(struct request_queue *q,
                                                 struct bio *bio);
  static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
-                        void __pmem **kaddr, pfn_t *pfn, long size);
+                        void **kaddr, pfn_t *pfn, long size);
  
  static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
  
@@ -884,7 +884,7 @@ fail:
  
  static long
  dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
-                       void __pmem **kaddr, pfn_t *pfn, long size)
+                       void **kaddr, pfn_t *pfn, long size)
  {
         struct dcssblk_dev_info *dev_info;
         unsigned long offset, dev_sz;
@@ -894,7 +894,7 @@ dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
                 return -ENODEV;
         dev_sz = dev_info->end - dev_info->start;
         offset = secnum * 512;
-       *kaddr = (void __pmem *) (dev_info->start + offset);
+       *kaddr = (void *) dev_info->start + offset;
         *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
  
         return dev_sz - offset;
diff --git a/fs/dax.c b/fs/dax.c

index 432b9e6dd63b90b7a98a4820f1c45e257d007517..993dc6fe0416e17e8a0ca5c8a432b8daf574df86 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -75,13 +75,13 @@ static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
         struct request_queue *q = bdev->bd_queue;
         long rc = -EIO;
  
-       dax->addr = (void __pmem *) ERR_PTR(-EIO);
+       dax->addr = ERR_PTR(-EIO);
         if (blk_queue_enter(q, true) != 0)
                 return rc;
  
         rc = bdev_direct_access(bdev, dax);
         if (rc < 0) {
-               dax->addr = (void __pmem *) ERR_PTR(rc);
+               dax->addr = ERR_PTR(rc);
                 blk_queue_exit(q);
                 return rc;
         }
@@ -147,12 +147,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
                       struct buffer_head *bh)
  {
         loff_t pos = start, max = start, bh_max = start;
-       bool hole = false, need_wmb = false;
+       bool hole = false;
         struct block_device *bdev = NULL;
         int rw = iov_iter_rw(iter), rc;
         long map_len = 0;
         struct blk_dax_ctl dax = {
-               .addr = (void __pmem *) ERR_PTR(-EIO),
+               .addr = ERR_PTR(-EIO),
         };
         unsigned blkbits = inode->i_blkbits;
         sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
@@ -218,7 +218,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
  
                 if (iov_iter_rw(iter) == WRITE) {
                         len = copy_from_iter_pmem(dax.addr, max - pos, iter);
-                       need_wmb = true;
                 } else if (!hole)
                         len = copy_to_iter((void __force *) dax.addr, max - pos,
                                         iter);
@@ -235,8 +234,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
                         dax.addr += len;
         }
  
-       if (need_wmb)
-               wmb_pmem();
         dax_unmap_atomic(bdev, &dax);
  
         return (pos == start) ? rc : pos - start;
@@ -788,7 +785,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
                                 return ret;
                 }
         }
-       wmb_pmem();
         return 0;
  }
  EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
@@ -1187,7 +1183,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
                 if (dax_map_atomic(bdev, &dax) < 0)
                         return PTR_ERR(dax.addr);
                 clear_pmem(dax.addr + offset, length);
-               wmb_pmem();
                 dax_unmap_atomic(bdev, &dax);
         }
         return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index c96db9c22d1031496d01c0622a65ea0df8e2965a..adf33079771e740a93630305d229ef7d5578b814 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1665,7 +1665,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
   */
  struct blk_dax_ctl {
         sector_t sector;
-       void __pmem *addr;
+       void *addr;
         long size;
         pfn_t pfn;
  };
@@ -1676,8 +1676,8 @@ struct block_device_operations {
         int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
         int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
         int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-       long (*direct_access)(struct block_device *, sector_t, void __pmem **,
-                       pfn_t *, long);
+       long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
+                       long);
         unsigned int (*check_events) (struct gendisk *disk,
                                       unsigned int clearing);
         /* ->media_changed() is DEPRECATED, use ->check_events() instead */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h

index 2e853b679a5da9c4061053b714801d7434728154..1bb95484272501bbc8d0603da489f56b4f87714a 100644 (file)
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -17,7 +17,6 @@
  # define __release(x)  __context__(x,-1)
  # define __cond_lock(x,c)      ((c) ? ({ __acquire(x); 1; }) : 0)
  # define __percpu      __attribute__((noderef, address_space(3)))
-# define __pmem                __attribute__((noderef, address_space(5)))
  #ifdef CONFIG_SPARSE_RCU_POINTER
  # define __rcu         __attribute__((noderef, address_space(4)))
  #else /* CONFIG_SPARSE_RCU_POINTER */
@@ -45,7 +44,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
  # define __cond_lock(x,c) (c)
  # define __percpu
  # define __rcu
-# define __pmem
  # define __private
  # define ACCESS_PRIVATE(p, member) ((p)->member)
  #endif /* __CHECKER__ */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h

index b0db857f334b95630a4a05ff5b10a6c8383f0c6c..91acfce74a220010549536edde925ef9e3ec81c0 100644 (file)
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -131,7 +131,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
   * >= 0 : the number of bytes accessible at the address
   */
  typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
-                                    void __pmem **kaddr, pfn_t *pfn, long size);
+                                    void **kaddr, pfn_t *pfn, long size);
  
  void dm_error(const char *message);
  
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h

index 0c3c30cbbea54431cff38dc01f21ba3d831674d4..b519e137b9b7d98ab44aa1409510ad35e841c10f 100644 (file)
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -52,6 +52,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
  
  struct nd_namespace_label;
  struct nvdimm_drvdata;
+
  struct nd_mapping {
         struct nvdimm *nvdimm;
         struct nd_namespace_label **labels;
@@ -69,6 +70,7 @@ struct nd_mapping {
  struct nvdimm_bus_descriptor {
         const struct attribute_group **attr_groups;
         unsigned long cmd_mask;
+       struct module *module;
         char *provider_name;
         ndctl_fn ndctl;
         int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
@@ -99,13 +101,21 @@ struct nd_region_desc {
         unsigned long flags;
  };
  
+struct device;
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+               size_t size, unsigned long flags);
+static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
+               resource_size_t offset, size_t size)
+{
+       return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0);
+}
+
  struct nvdimm_bus;
  struct module;
  struct device;
  struct nd_blk_region;
  struct nd_blk_region_desc {
         int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-       void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
         int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
                         void *iobuf, u64 len, int rw);
         struct nd_region_desc ndr_desc;
@@ -119,22 +129,22 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
  }
  
  int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
-               struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
-#define nvdimm_bus_register(parent, desc) \
-       __nvdimm_bus_register(parent, desc, THIS_MODULE)
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+               struct nvdimm_bus_descriptor *nfit_desc);
  void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
  struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
  struct nvdimm *to_nvdimm(struct device *dev);
  struct nd_region *to_nd_region(struct device *dev);
  struct nd_blk_region *to_nd_blk_region(struct device *dev);
  struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
  const char *nvdimm_name(struct nvdimm *nvdimm);
  unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
  void *nvdimm_provider_data(struct nvdimm *nvdimm);
  struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
                 const struct attribute_group **groups, unsigned long flags,
-               unsigned long cmd_mask);
+               unsigned long cmd_mask, int num_flush,
+               struct resource *flush_wpq);
  const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
  const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
  u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
@@ -156,4 +166,6 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
  unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
  void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
  u64 nd_fletcher64(void *addr, size_t len, bool le);
+void nvdimm_flush(struct nd_region *nd_region);
+int nvdimm_has_flush(struct nd_region *nd_region);
  #endif /* __LIBNVDIMM_H__ */
diff --git a/include/linux/nd.h b/include/linux/nd.h

index aee2761d294cbc6a06ae32de2c66dd4533cade3d..f1ea426d6a5e9ca742b7ae02ec03548ccaccd34e 100644 (file)
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -26,6 +26,7 @@ struct nd_device_driver {
         unsigned long type;
         int (*probe)(struct device *dev);
         int (*remove)(struct device *dev);
+       void (*shutdown)(struct device *dev);
         void (*notify)(struct device *dev, enum nvdimm_event event);
  };
  
@@ -67,7 +68,7 @@ struct nd_namespace_io {
         struct nd_namespace_common common;
         struct resource res;
         resource_size_t size;
-       void __pmem *addr;
+       void *addr;
         struct badblocks bb;
  };
  
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h

index 94994810c7c086e8410f3333d919ef5256595ad5..a3d90b9da18d444f0d53437d1dd5c4bd01c9e845 100644 (file)
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -28,7 +28,10 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
         return __pfn_to_pfn_t(pfn, 0);
  }
  
-extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags);
+static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
+{
+       return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
+}
  
  static inline bool pfn_t_has_page(pfn_t pfn)
  {
diff --git a/include/linux/pmem.h b/include/linux/pmem.h

index 57d146fe44dd84e926199d81a8ca0a74a2d012fc..e856c2cb0fe86da91d55e2766b687b66ad02cc1e 100644 (file)
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -26,47 +26,35 @@
   * calling these symbols with arch_has_pmem_api() and redirect to the
   * implementation in asm/pmem.h.
   */
-static inline bool __arch_has_wmb_pmem(void)
-{
-       return false;
-}
-
-static inline void arch_wmb_pmem(void)
-{
-       BUG();
-}
-
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-               size_t n)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
  {
         BUG();
  }
  
-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
-               size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
  {
         BUG();
         return -EFAULT;
  }
  
-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
                 struct iov_iter *i)
  {
         BUG();
         return 0;
  }
  
-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
  {
         BUG();
  }
  
-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
  {
         BUG();
  }
  
-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
  {
         BUG();
  }
@@ -77,13 +65,6 @@ static inline bool arch_has_pmem_api(void)
         return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
  }
  
-static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
-               size_t size)
-{
-       memcpy(dst, (void __force *) src, size);
-       return 0;
-}
-
  /*
   * memcpy_from_pmem - read from persistent memory with error handling
   * @dst: destination buffer
@@ -92,54 +73,13 @@ static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
   *
   * Returns 0 on success negative error code on failure.
   */
-static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
-               size_t size)
+static inline int memcpy_from_pmem(void *dst, void const *src, size_t size)
  {
         if (arch_has_pmem_api())
                 return arch_memcpy_from_pmem(dst, src, size);
         else
-               return default_memcpy_from_pmem(dst, src, size);
-}
-
-/**
- * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
- *
- * For a given cpu implementation within an architecture it is possible
- * that wmb_pmem() resolves to a nop.  In the case this returns
- * false, pmem api users are unable to ensure durability and may want to
- * fall back to a different data consistency model, or otherwise notify
- * the user.
- */
-static inline bool arch_has_wmb_pmem(void)
-{
-       return arch_has_pmem_api() && __arch_has_wmb_pmem();
-}
-
-/*
- * These defaults seek to offer decent performance and minimize the
- * window between i/o completion and writes being durable on media.
- * However, it is undefined / architecture specific whether
- * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
- * making data durable relative to i/o completion.
- */
-static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
-               size_t size)
-{
-       memcpy((void __force *) dst, src, size);
-}
-
-static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
-               size_t bytes, struct iov_iter *i)
-{
-       return copy_from_iter_nocache((void __force *)addr, bytes, i);
-}
-
-static inline void default_clear_pmem(void __pmem *addr, size_t size)
-{
-       if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
-               clear_page((void __force *)addr);
-       else
-               memset((void __force *)addr, 0, size);
+               memcpy(dst, src, size);
+       return 0;
  }
  
  /**
@@ -152,29 +92,14 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
   * being effectively evicted from, or never written to, the processor
   * cache hierarchy after the copy completes.  After memcpy_to_pmem()
   * data may still reside in cpu or platform buffers, so this operation
- * must be followed by a wmb_pmem().
+ * must be followed by a blkdev_issue_flush() on the pmem block device.
   */
-static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
+static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
  {
         if (arch_has_pmem_api())
                 arch_memcpy_to_pmem(dst, src, n);
         else
-               default_memcpy_to_pmem(dst, src, n);
-}
-
-/**
- * wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of memcpy_to_pmem() operations this drains data from
- * cpu write buffers and any platform (memory controller) buffers to
- * ensure that written data is durable on persistent memory media.
- */
-static inline void wmb_pmem(void)
-{
-       if (arch_has_wmb_pmem())
-               arch_wmb_pmem();
-       else
-               wmb();
+               memcpy(dst, src, n);
  }
  
  /**
@@ -184,14 +109,14 @@ static inline void wmb_pmem(void)
   * @i:         iterator with source data
   *
   * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
   */
-static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t copy_from_iter_pmem(void *addr, size_t bytes,
                 struct iov_iter *i)
  {
         if (arch_has_pmem_api())
                 return arch_copy_from_iter_pmem(addr, bytes, i);
-       return default_copy_from_iter_pmem(addr, bytes, i);
+       return copy_from_iter_nocache(addr, bytes, i);
  }
  
  /**
@@ -200,14 +125,14 @@ static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
   * @size:      number of bytes to zero
   *
   * Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
   */
-static inline void clear_pmem(void __pmem *addr, size_t size)
+static inline void clear_pmem(void *addr, size_t size)
  {
         if (arch_has_pmem_api())
                 arch_clear_pmem(addr, size);
         else
-               default_clear_pmem(addr, size);
+               memset(addr, 0, size);
  }
  
  /**
@@ -218,7 +143,7 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
   * For platforms that support clearing poison this flushes any poisoned
   * ranges out of the cache
   */
-static inline void invalidate_pmem(void __pmem *addr, size_t size)
+static inline void invalidate_pmem(void *addr, size_t size)
  {
         if (arch_has_pmem_api())
                 arch_invalidate_pmem(addr, size);
@@ -230,9 +155,9 @@ static inline void invalidate_pmem(void __pmem *addr, size_t size)
   * @size:      number of bytes to write back
   *
   * Write back the processor cache range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
   */
-static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void wb_cache_pmem(void *addr, size_t size)
  {
         if (arch_has_pmem_api())
                 arch_wb_cache_pmem(addr, size);
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h

index 309915f74492406595782af8e8c4e20a5a7ef934..ba5a8c79652a469f048d1507bea9e489ed698bef 100644 (file)
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -298,6 +298,7 @@ struct nd_cmd_pkg {
  #define NVDIMM_FAMILY_INTEL 0
  #define NVDIMM_FAMILY_HPE1 1
  #define NVDIMM_FAMILY_HPE2 2
+#define NVDIMM_FAMILY_MSFT 3
  
  #define ND_IOCTL_CALL                  _IOWR(ND_IOCTL, ND_CMD_CALL,\
                                         struct nd_cmd_pkg)
diff --git a/kernel/memremap.c b/kernel/memremap.c

index ddb3247a872a60b3ec5e716ac503f8bd13254d97..251d16b4cb41e67111ff2f1f783bf39cdea13822 100644 (file)
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -169,12 +169,6 @@ void devm_memunmap(struct device *dev, void *addr)
  }
  EXPORT_SYMBOL(devm_memunmap);
  
-pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
-{
-       return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
-}
-EXPORT_SYMBOL(phys_to_pfn_t);
-
  #ifdef CONFIG_ZONE_DEVICE
  static DEFINE_MUTEX(pgmap_lock);
  static RADIX_TREE(pgmap_radix, GFP_KERNEL);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl

index 4904ced676d40289356aa3358f894fa7efa4b5c0..24a08363995adeeda264cacd801bebf9cc48b5c6 100755 (executable)
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -313,7 +313,6 @@ our $Sparse = qr{
                         __kernel|
                         __force|
                         __iomem|
-                       __pmem|
                         __must_check|
                         __init_refok|
                         __kprobes|
diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt

index d388de72eacaa353ba14e3340a30ef64b63bcd97..28632ee683772bf26e9c7a5a740340b507a9e5d9 100644 (file)
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
@@ -947,7 +947,7 @@ GrpTable: Grp15
  4: XSAVE
  5: XRSTOR | lfence (11B)
  6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
  EndTable
  
  GrpTable: Grp16
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c

index 3918dd52e903c9c07db1c027d2680d743f145977..0f196eec9f48ab7b126ace056b8f28bc8b367f71 100644 (file)
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -1664,5 +1664,3 @@
  "0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
  {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
  "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
-{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
-"66 0f ae f8          \tpcommit ",},
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c

index 9c8c61e06d5a49b8e075e02971395579544f0919..af25bc8240d0de5dab4fc34fb86f8339cf7aa9e4 100644 (file)
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -1696,5 +1696,3 @@
  "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
  {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
  "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
-{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
-"66 0f ae f8          \tpcommit ",},
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c

index 76e0ec379c8bb357ea886242ae33e244e8bed0bb..979487dae8d4e0a28120561820c567735133035d 100644 (file)
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -2655,10 +2655,6 @@ int main(void)
  
  #endif /* #ifndef __x86_64__ */
  
-       /* pcommit */
-
-       asm volatile("pcommit");
-
         /* Following line is a marker for the awk script - do not change */
         asm volatile("rdtsc"); /* Stop here */
  
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt

index ec378cd7b71ee4e067d0a4a9beb59413def3296c..767be7c760340bd33b7e4a18b9a8f3a71d9db33e 100644 (file)
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -1012,7 +1012,7 @@ GrpTable: Grp15
  4: XSAVE
  5: XRSTOR | lfence (11B)
  6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
  EndTable
  
  GrpTable: Grp16
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild

index 7859856771599531d156c322ac4ceb94596af27a..ad6dd05430192ffdba8d163f9855942fa75b4099 100644 (file)
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -11,12 +11,14 @@ ldflags-y += --wrap=__devm_release_region
  ldflags-y += --wrap=__request_region
  ldflags-y += --wrap=__release_region
  ldflags-y += --wrap=devm_memremap_pages
-ldflags-y += --wrap=phys_to_pfn_t
+ldflags-y += --wrap=insert_resource
+ldflags-y += --wrap=remove_resource
  
  DRIVERS := ../../../drivers
  NVDIMM_SRC := $(DRIVERS)/nvdimm
-ACPI_SRC := $(DRIVERS)/acpi
+ACPI_SRC := $(DRIVERS)/acpi/nfit
  DAX_SRC := $(DRIVERS)/dax
+ccflags-y := -I$(src)/$(NVDIMM_SRC)/
  
  obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
  obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
@@ -27,10 +29,12 @@ obj-$(CONFIG_ACPI_NFIT) += nfit.o
  obj-$(CONFIG_DEV_DAX) += dax.o
  obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
  
-nfit-y := $(ACPI_SRC)/nfit.o
+nfit-y := $(ACPI_SRC)/core.o
+nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
  nfit-y += config_check.o
  
  nd_pmem-y := $(NVDIMM_SRC)/pmem.o
+nd_pmem-y += pmem-dax.o
  nd_pmem-y += config_check.o
  
  nd_btt-y := $(NVDIMM_SRC)/btt.o
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c

index adf18bfeca0068cedf19f9dd8b823822bbffac6c..878daf3429e8f822593aff99c490550e5d76ef93 100644 (file)
--- a/tools/testing/nvdimm/config_check.c
+++ b/tools/testing/nvdimm/config_check.c
@@ -10,6 +10,7 @@ void check(void)
         BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
         BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
         BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
+       BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
         BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
         BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
         BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c

new file mode 100644 (file)

index 0000000..c9b8c48
--- /dev/null
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/blkdev.h>
+#include <pmem.h>
+#include <nd.h>
+
+long pmem_direct_access(struct block_device *bdev, sector_t sector,
+               void **kaddr, pfn_t *pfn, long size)
+{
+       struct pmem_device *pmem = bdev->bd_queue->queuedata;
+       resource_size_t offset = sector * 512 + pmem->data_offset;
+
+       if (unlikely(is_bad_pmem(&pmem->bb, sector, size)))
+               return -EIO;
+
+       /*
+        * Limit dax to a single page at a time given vmalloc()-backed
+        * in the nfit_test case.
+        */
+       if (get_nfit_res(pmem->phys_addr + offset)) {
+               struct page *page;
+
+               *kaddr = pmem->virt_addr + offset;
+               page = vmalloc_to_page(pmem->virt_addr + offset);
+               *pfn = page_to_pfn_t(page);
+               dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent,
+                               "%s: sector: %#llx pfn: %#lx\n", __func__,
+                               (unsigned long long) sector, page_to_pfn(page));
+
+               return PAGE_SIZE;
+       }
+
+       *kaddr = pmem->virt_addr + offset;
+       *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+       /*
+        * If badblocks are present, limit known good range to the
+        * requested range.
+        */
+       if (unlikely(pmem->bb.count))
+               return size;
+       return pmem->size - pmem->pfn_pad - offset;
+}
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild

index 9241064970fe72be96c300f544c03345df42ac81..d32f25bba42a35854364fa7eca200971bf4b02a1 100644 (file)
--- a/tools/testing/nvdimm/test/Kbuild
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -1,5 +1,5 @@
  ccflags-y := -I$(src)/../../../../drivers/nvdimm/
-ccflags-y += -I$(src)/../../../../drivers/acpi/
+ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
  
  obj-m += nfit_test.o
  obj-m += nfit_test_iomap.o
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c

index c842095f2801b2183b734c66ae99fa45a6048945..c29f8dca9e67c1f95da2861078ffee61b258ed69 100644 (file)
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -10,11 +10,13 @@
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * General Public License for more details.
   */
+#include <linux/memremap.h>
  #include <linux/rculist.h>
  #include <linux/export.h>
  #include <linux/ioport.h>
  #include <linux/module.h>
  #include <linux/types.h>
+#include <linux/pfn_t.h>
  #include <linux/io.h>
  #include <linux/mm.h>
  #include "nfit_test.h"
@@ -52,7 +54,7 @@ static struct nfit_test_resource *__get_nfit_res(resource_size_t resource)
         return NULL;
  }
  
-static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
+struct nfit_test_resource *get_nfit_res(resource_size_t resource)
  {
         struct nfit_test_resource *res;
  
@@ -62,6 +64,7 @@ static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
  
         return res;
  }
+EXPORT_SYMBOL(get_nfit_res);
  
  void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
                 void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
@@ -97,10 +100,6 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
  }
  EXPORT_SYMBOL(__wrap_devm_memremap);
  
-#ifdef __HAVE_ARCH_PTE_DEVMAP
-#include <linux/memremap.h>
-#include <linux/pfn_t.h>
-
  void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
                 struct percpu_ref *ref, struct vmem_altmap *altmap)
  {
@@ -122,19 +121,6 @@ pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
          return phys_to_pfn_t(addr, flags);
  }
  EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
-#else
-/* to be removed post 4.5-rc1 */
-void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res)
-{
-       resource_size_t offset = res->start;
-       struct nfit_test_resource *nfit_res = get_nfit_res(offset);
-
-       if (nfit_res)
-               return nfit_res->buf + offset - nfit_res->res->start;
-       return devm_memremap_pages(dev, res);
-}
-EXPORT_SYMBOL(__wrap_devm_memremap_pages);
-#endif
  
  void *__wrap_memremap(resource_size_t offset, size_t size,
                 unsigned long flags)
@@ -229,6 +215,22 @@ struct resource *__wrap___request_region(struct resource *parent,
  }
  EXPORT_SYMBOL(__wrap___request_region);
  
+int __wrap_insert_resource(struct resource *parent, struct resource *res)
+{
+       if (get_nfit_res(res->start))
+               return 0;
+       return insert_resource(parent, res);
+}
+EXPORT_SYMBOL(__wrap_insert_resource);
+
+int __wrap_remove_resource(struct resource *res)
+{
+       if (get_nfit_res(res->start))
+               return 0;
+       return remove_resource(res);
+}
+EXPORT_SYMBOL(__wrap_remove_resource);
+
  struct resource *__wrap___devm_request_region(struct device *dev,
                 struct resource *parent, resource_size_t start,
                 resource_size_t n, const char *name)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c

index c919866853a045fdeb1d7b2c0aae699cddd68200..5404efa578a3fcea18ce5bbab2a991e0c3d98b73 100644 (file)
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -98,11 +98,13 @@
  enum {
         NUM_PM  = 3,
         NUM_DCR = 5,
+       NUM_HINTS = 8,
         NUM_BDW = NUM_DCR,
         NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
         NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
         DIMM_SIZE = SZ_32M,
         LABEL_SIZE = SZ_128K,
+       SPA_VCD_SIZE = SZ_4M,
         SPA0_SIZE = DIMM_SIZE,
         SPA1_SIZE = DIMM_SIZE*2,
         SPA2_SIZE = DIMM_SIZE,
@@ -470,11 +472,7 @@ static void release_nfit_res(void *data)
         list_del(&nfit_res->list);
         spin_unlock(&nfit_test_lock);
  
-       if (is_vmalloc_addr(nfit_res->buf))
-               vfree(nfit_res->buf);
-       else
-               dma_free_coherent(nfit_res->dev, resource_size(res),
-                               nfit_res->buf, res->start);
+       vfree(nfit_res->buf);
         kfree(res);
         kfree(nfit_res);
  }
@@ -507,9 +505,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
  
         return nfit_res->buf;
   err:
-       if (buf && !is_vmalloc_addr(buf))
-               dma_free_coherent(dev, size, buf, *dma);
-       else if (buf)
+       if (buf)
                 vfree(buf);
         kfree(res);
         kfree(nfit_res);
@@ -524,15 +520,6 @@ static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
         return __test_alloc(t, size, dma, buf);
  }
  
-static void *test_alloc_coherent(struct nfit_test *t, size_t size,
-               dma_addr_t *dma)
-{
-       struct device *dev = &t->pdev.dev;
-       void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
-
-       return __test_alloc(t, size, dma, buf);
-}
-
  static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
  {
         int i;
@@ -584,7 +571,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
                         + offsetof(struct acpi_nfit_control_region,
                                         window_size) * NUM_DCR
                         + sizeof(struct acpi_nfit_data_region) * NUM_BDW
-                       + sizeof(struct acpi_nfit_flush_address) * NUM_DCR;
+                       + (sizeof(struct acpi_nfit_flush_address)
+                                       + sizeof(u64) * NUM_HINTS) * NUM_DCR;
         int i;
  
         t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
@@ -592,15 +580,15 @@ static int nfit_test0_alloc(struct nfit_test *t)
                 return -ENOMEM;
         t->nfit_size = nfit_size;
  
-       t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]);
+       t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]);
         if (!t->spa_set[0])
                 return -ENOMEM;
  
-       t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]);
+       t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]);
         if (!t->spa_set[1])
                 return -ENOMEM;
  
-       t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]);
+       t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]);
         if (!t->spa_set[2])
                 return -ENOMEM;
  
@@ -614,7 +602,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
                         return -ENOMEM;
                 sprintf(t->label[i], "label%d", i);
  
-               t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]);
+               t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS,
+                               &t->flush_dma[i]);
                 if (!t->flush[i])
                         return -ENOMEM;
         }
@@ -630,7 +619,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
  
  static int nfit_test1_alloc(struct nfit_test *t)
  {
-       size_t nfit_size = sizeof(struct acpi_nfit_system_address)
+       size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
                 + sizeof(struct acpi_nfit_memory_map)
                 + offsetof(struct acpi_nfit_control_region, window_size);
  
@@ -639,15 +628,31 @@ static int nfit_test1_alloc(struct nfit_test *t)
                 return -ENOMEM;
         t->nfit_size = nfit_size;
  
-       t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]);
+       t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]);
         if (!t->spa_set[0])
                 return -ENOMEM;
  
+       t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
+       if (!t->spa_set[1])
+               return -ENOMEM;
+
         return ars_state_init(&t->pdev.dev, &t->ars_state);
  }
  
+static void dcr_common_init(struct acpi_nfit_control_region *dcr)
+{
+       dcr->vendor_id = 0xabcd;
+       dcr->device_id = 0;
+       dcr->revision_id = 1;
+       dcr->valid_fields = 1;
+       dcr->manufacturing_location = 0xa;
+       dcr->manufacturing_date = cpu_to_be16(2016);
+}
+
  static void nfit_test0_setup(struct nfit_test *t)
  {
+       const int flush_hint_size = sizeof(struct acpi_nfit_flush_address)
+               + (sizeof(u64) * NUM_HINTS);
         struct acpi_nfit_desc *acpi_desc;
         struct acpi_nfit_memory_map *memdev;
         void *nfit_buf = t->nfit_buf;
@@ -655,7 +660,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         struct acpi_nfit_control_region *dcr;
         struct acpi_nfit_data_region *bdw;
         struct acpi_nfit_flush_address *flush;
-       unsigned int offset;
+       unsigned int offset, i;
  
         /*
          * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -972,9 +977,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
         dcr->header.length = sizeof(struct acpi_nfit_control_region);
         dcr->region_index = 0+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
         dcr->serial_number = ~handle[0];
         dcr->code = NFIT_FIC_BLK;
         dcr->windows = 1;
@@ -989,9 +992,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
         dcr->header.length = sizeof(struct acpi_nfit_control_region);
         dcr->region_index = 1+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
         dcr->serial_number = ~handle[1];
         dcr->code = NFIT_FIC_BLK;
         dcr->windows = 1;
@@ -1006,9 +1007,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
         dcr->header.length = sizeof(struct acpi_nfit_control_region);
         dcr->region_index = 2+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
         dcr->serial_number = ~handle[2];
         dcr->code = NFIT_FIC_BLK;
         dcr->windows = 1;
@@ -1023,9 +1022,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
         dcr->header.length = sizeof(struct acpi_nfit_control_region);
         dcr->region_index = 3+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
         dcr->serial_number = ~handle[3];
         dcr->code = NFIT_FIC_BLK;
         dcr->windows = 1;
@@ -1042,9 +1039,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         dcr->header.length = offsetof(struct acpi_nfit_control_region,
                         window_size);
         dcr->region_index = 4+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
         dcr->serial_number = ~handle[0];
         dcr->code = NFIT_FIC_BYTEN;
         dcr->windows = 0;
@@ -1056,9 +1051,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         dcr->header.length = offsetof(struct acpi_nfit_control_region,
                         window_size);
         dcr->region_index = 5+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
         dcr->serial_number = ~handle[1];
         dcr->code = NFIT_FIC_BYTEN;
         dcr->windows = 0;
@@ -1070,9 +1063,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         dcr->header.length = offsetof(struct acpi_nfit_control_region,
                         window_size);
         dcr->region_index = 6+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
         dcr->serial_number = ~handle[2];
         dcr->code = NFIT_FIC_BYTEN;
         dcr->windows = 0;
@@ -1084,9 +1075,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         dcr->header.length = offsetof(struct acpi_nfit_control_region,
                         window_size);
         dcr->region_index = 7+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
         dcr->serial_number = ~handle[3];
         dcr->code = NFIT_FIC_BYTEN;
         dcr->windows = 0;
@@ -1141,45 +1130,47 @@ static void nfit_test0_setup(struct nfit_test *t)
         /* flush0 (dimm0) */
         flush = nfit_buf + offset;
         flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-       flush->header.length = sizeof(struct acpi_nfit_flush_address);
+       flush->header.length = flush_hint_size;
         flush->device_handle = handle[0];
-       flush->hint_count = 1;
-       flush->hint_address[0] = t->flush_dma[0];
+       flush->hint_count = NUM_HINTS;
+       for (i = 0; i < NUM_HINTS; i++)
+               flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
  
         /* flush1 (dimm1) */
-       flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1;
+       flush = nfit_buf + offset + flush_hint_size * 1;
         flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-       flush->header.length = sizeof(struct acpi_nfit_flush_address);
+       flush->header.length = flush_hint_size;
         flush->device_handle = handle[1];
-       flush->hint_count = 1;
-       flush->hint_address[0] = t->flush_dma[1];
+       flush->hint_count = NUM_HINTS;
+       for (i = 0; i < NUM_HINTS; i++)
+               flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
  
         /* flush2 (dimm2) */
-       flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2;
+       flush = nfit_buf + offset + flush_hint_size  * 2;
         flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-       flush->header.length = sizeof(struct acpi_nfit_flush_address);
+       flush->header.length = flush_hint_size;
         flush->device_handle = handle[2];
-       flush->hint_count = 1;
-       flush->hint_address[0] = t->flush_dma[2];
+       flush->hint_count = NUM_HINTS;
+       for (i = 0; i < NUM_HINTS; i++)
+               flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
  
         /* flush3 (dimm3) */
-       flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3;
+       flush = nfit_buf + offset + flush_hint_size * 3;
         flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-       flush->header.length = sizeof(struct acpi_nfit_flush_address);
+       flush->header.length = flush_hint_size;
         flush->device_handle = handle[3];
-       flush->hint_count = 1;
-       flush->hint_address[0] = t->flush_dma[3];
+       flush->hint_count = NUM_HINTS;
+       for (i = 0; i < NUM_HINTS; i++)
+               flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
  
         if (t->setup_hotplug) {
-               offset = offset + sizeof(struct acpi_nfit_flush_address) * 4;
+               offset = offset + flush_hint_size * 4;
                 /* dcr-descriptor4: blk */
                 dcr = nfit_buf + offset;
                 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
                 dcr->header.length = sizeof(struct acpi_nfit_control_region);
                 dcr->region_index = 8+1;
-               dcr->vendor_id = 0xabcd;
-               dcr->device_id = 0;
-               dcr->revision_id = 1;
+               dcr_common_init(dcr);
                 dcr->serial_number = ~handle[4];
                 dcr->code = NFIT_FIC_BLK;
                 dcr->windows = 1;
@@ -1196,9 +1187,7 @@ static void nfit_test0_setup(struct nfit_test *t)
                 dcr->header.length = offsetof(struct acpi_nfit_control_region,
                                 window_size);
                 dcr->region_index = 9+1;
-               dcr->vendor_id = 0xabcd;
-               dcr->device_id = 0;
-               dcr->revision_id = 1;
+               dcr_common_init(dcr);
                 dcr->serial_number = ~handle[4];
                 dcr->code = NFIT_FIC_BYTEN;
                 dcr->windows = 0;
@@ -1300,10 +1289,12 @@ static void nfit_test0_setup(struct nfit_test *t)
                 /* flush3 (dimm4) */
                 flush = nfit_buf + offset;
                 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-               flush->header.length = sizeof(struct acpi_nfit_flush_address);
+               flush->header.length = flush_hint_size;
                 flush->device_handle = handle[4];
-               flush->hint_count = 1;
-               flush->hint_address[0] = t->flush_dma[4];
+               flush->hint_count = NUM_HINTS;
+               for (i = 0; i < NUM_HINTS; i++)
+                       flush->hint_address[i] = t->flush_dma[4]
+                               + i * sizeof(u64);
         }
  
         post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
@@ -1339,7 +1330,16 @@ static void nfit_test1_setup(struct nfit_test *t)
         spa->address = t->spa_set_dma[0];
         spa->length = SPA2_SIZE;
  
-       offset += sizeof(*spa);
+       /* virtual cd region */
+       spa = nfit_buf + sizeof(*spa);
+       spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+       spa->header.length = sizeof(*spa);
+       memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
+       spa->range_index = 0;
+       spa->address = t->spa_set_dma[1];
+       spa->length = SPA_VCD_SIZE;
+
+       offset += sizeof(*spa) * 2;
         /* mem-region0 (spa0, dimm0) */
         memdev = nfit_buf + offset;
         memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1365,9 +1365,7 @@ static void nfit_test1_setup(struct nfit_test *t)
         dcr->header.length = offsetof(struct acpi_nfit_control_region,
                         window_size);
         dcr->region_index = 0+1;
-       dcr->vendor_id = 0xabcd;
-       dcr->device_id = 0;
-       dcr->revision_id = 1;
+       dcr_common_init(dcr);
         dcr->serial_number = ~0;
         dcr->code = NFIT_FIC_BYTE;
         dcr->windows = 0;
@@ -1462,20 +1460,16 @@ static int nfit_test_probe(struct platform_device *pdev)
         nfit_test->setup(nfit_test);
         acpi_desc = &nfit_test->acpi_desc;
         acpi_nfit_desc_init(acpi_desc, &pdev->dev);
-       acpi_desc->nfit = nfit_test->nfit_buf;
         acpi_desc->blk_do_io = nfit_test_blk_do_io;
         nd_desc = &acpi_desc->nd_desc;
         nd_desc->provider_name = NULL;
+       nd_desc->module = THIS_MODULE;
         nd_desc->ndctl = nfit_test_ctl;
-       acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc);
-       if (!acpi_desc->nvdimm_bus)
-               return -ENXIO;
  
-       rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
-       if (rc) {
-               nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+       rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+                       nfit_test->nfit_size);
+       if (rc)
                 return rc;
-       }
  
         if (nfit_test->setup != nfit_test0_setup)
                 return 0;
@@ -1483,22 +1477,16 @@ static int nfit_test_probe(struct platform_device *pdev)
         nfit_test->setup_hotplug = 1;
         nfit_test->setup(nfit_test);
  
-       rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
-       if (rc) {
-               nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+       rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+                       nfit_test->nfit_size);
+       if (rc)
                 return rc;
-       }
  
         return 0;
  }
  
  static int nfit_test_remove(struct platform_device *pdev)
  {
-       struct nfit_test *nfit_test = to_nfit_test(&pdev->dev);
-       struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc;
-
-       nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
-
         return 0;
  }
  
@@ -1523,12 +1511,6 @@ static struct platform_driver nfit_test_driver = {
         .id_table = nfit_test_id,
  };
  
-#ifdef CONFIG_CMA_SIZE_MBYTES
-#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
-#else
-#define CMA_SIZE_MBYTES 0
-#endif
-
  static __init int nfit_test_init(void)
  {
         int rc, i;
@@ -1538,7 +1520,6 @@ static __init int nfit_test_init(void)
         for (i = 0; i < NUM_NFITS; i++) {
                 struct nfit_test *nfit_test;
                 struct platform_device *pdev;
-               static int once;
  
                 nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
                 if (!nfit_test) {
@@ -1577,20 +1558,6 @@ static __init int nfit_test_init(void)
                         goto err_register;
  
                 instances[i] = nfit_test;
-
-               if (!once++) {
-                       dma_addr_t dma;
-                       void *buf;
-
-                       buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma,
-                                       GFP_KERNEL);
-                       if (!buf) {
-                               rc = -ENOMEM;
-                               dev_warn(&pdev->dev, "need 128M of free cma\n");
-                               goto err_register;
-                       }
-                       dma_free_coherent(&pdev->dev, SZ_128M, buf, dma);
-               }
         }
  
         rc = platform_driver_register(&nfit_test_driver);
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h

index 96c5e16d7db9a6580419b572f2912b330bec6161..9f18e2a4a862d543a4275301d6629679a133d663 100644 (file)
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -12,6 +12,7 @@
   */
  #ifndef __NFIT_TEST_H__
  #define __NFIT_TEST_H__
+#include <linux/list.h>
  
  struct nfit_test_resource {
         struct list_head list;
@@ -26,4 +27,5 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
  void __wrap_iounmap(volatile void __iomem *addr);
  void nfit_test_setup(nfit_test_lookup_fn lookup);
  void nfit_test_teardown(void);
+struct nfit_test_resource *get_nfit_res(resource_size_t resource);
  #endif
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 29 Jul 2016 00:22:07 +0000 (17:22 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 29 Jul 2016 00:38:16 +0000 (17:38 -0700)
Documentation/filesystems/Locking		patch \| blob \| blame \| history
Documentation/nvdimm/btt.txt		patch \| blob \| blame \| history
arch/powerpc/sysdev/axonram.c		patch \| blob \| blame \| history
arch/x86/include/asm/cpufeatures.h		patch \| blob \| blame \| history
arch/x86/include/asm/pmem.h		patch \| blob \| blame \| history
arch/x86/include/asm/special_insns.h		patch \| blob \| blame \| history
arch/x86/include/asm/vmx.h		patch \| blob \| blame \| history
arch/x86/include/uapi/asm/vmx.h		patch \| blob \| blame \| history
arch/x86/kvm/cpuid.c		patch \| blob \| blame \| history
arch/x86/kvm/cpuid.h		patch \| blob \| blame \| history
arch/x86/kvm/vmx.c		patch \| blob \| blame \| history
arch/x86/lib/x86-opcode-map.txt		patch \| blob \| blame \| history
drivers/acpi/Kconfig		patch \| blob \| blame \| history
drivers/acpi/Makefile		patch \| blob \| blame \| history
drivers/acpi/nfit.c	[deleted file]	patch \| blob \| blame \| history
drivers/acpi/nfit.h	[deleted file]	patch \| blob \| blame \| history
drivers/acpi/nfit/Kconfig	[new file with mode: 0644]	patch \| blob
drivers/acpi/nfit/Makefile	[new file with mode: 0644]	patch \| blob
drivers/acpi/nfit/core.c	[new file with mode: 0644]	patch \| blob
drivers/acpi/nfit/mce.c	[new file with mode: 0644]	patch \| blob
drivers/acpi/nfit/nfit.h	[new file with mode: 0644]	patch \| blob
drivers/block/brd.c		patch \| blob \| blame \| history
drivers/dax/dax.c		patch \| blob \| blame \| history
drivers/dax/pmem.c		patch \| blob \| blame \| history
drivers/md/dm-linear.c		patch \| blob \| blame \| history
drivers/md/dm-snap.c		patch \| blob \| blame \| history
drivers/md/dm-stripe.c		patch \| blob \| blame \| history
drivers/md/dm-target.c		patch \| blob \| blame \| history
drivers/md/dm.c		patch \| blob \| blame \| history
drivers/nvdimm/Kconfig		patch \| blob \| blame \| history
drivers/nvdimm/blk.c		patch \| blob \| blame \| history
drivers/nvdimm/btt_devs.c		patch \| blob \| blame \| history
drivers/nvdimm/bus.c		patch \| blob \| blame \| history
drivers/nvdimm/claim.c		patch \| blob \| blame \| history
drivers/nvdimm/core.c		patch \| blob \| blame \| history
drivers/nvdimm/dimm_devs.c		patch \| blob \| blame \| history
drivers/nvdimm/e820.c		patch \| blob \| blame \| history
drivers/nvdimm/nd-core.h		patch \| blob \| blame \| history
drivers/nvdimm/nd.h		patch \| blob \| blame \| history
drivers/nvdimm/pmem.c		patch \| blob \| blame \| history
drivers/nvdimm/pmem.h	[new file with mode: 0644]	patch \| blob
drivers/nvdimm/region.c		patch \| blob \| blame \| history
drivers/nvdimm/region_devs.c		patch \| blob \| blame \| history
drivers/s390/block/dcssblk.c		patch \| blob \| blame \| history
fs/dax.c		patch \| blob \| blame \| history
include/linux/blkdev.h		patch \| blob \| blame \| history
include/linux/compiler.h		patch \| blob \| blame \| history
include/linux/device-mapper.h		patch \| blob \| blame \| history
include/linux/libnvdimm.h		patch \| blob \| blame \| history
include/linux/nd.h		patch \| blob \| blame \| history
include/linux/pfn_t.h		patch \| blob \| blame \| history
include/linux/pmem.h		patch \| blob \| blame \| history
include/uapi/linux/ndctl.h		patch \| blob \| blame \| history
kernel/memremap.c		patch \| blob \| blame \| history
scripts/checkpatch.pl		patch \| blob \| blame \| history
tools/objtool/arch/x86/insn/x86-opcode-map.txt		patch \| blob \| blame \| history
tools/perf/arch/x86/tests/insn-x86-dat-32.c		patch \| blob \| blame \| history
tools/perf/arch/x86/tests/insn-x86-dat-64.c		patch \| blob \| blame \| history
tools/perf/arch/x86/tests/insn-x86-dat-src.c		patch \| blob \| blame \| history
tools/perf/util/intel-pt-decoder/x86-opcode-map.txt		patch \| blob \| blame \| history
tools/testing/nvdimm/Kbuild		patch \| blob \| blame \| history
tools/testing/nvdimm/config_check.c		patch \| blob \| blame \| history
tools/testing/nvdimm/pmem-dax.c	[new file with mode: 0644]	patch \| blob
tools/testing/nvdimm/test/Kbuild		patch \| blob \| blame \| history
tools/testing/nvdimm/test/iomap.c		patch \| blob \| blame \| history
tools/testing/nvdimm/test/nfit.c		patch \| blob \| blame \| history
tools/testing/nvdimm/test/nfit_test.h		patch \| blob \| blame \| history