int (*release) (struct gendisk *, fmode_t);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
- int (*direct_access) (struct block_device *, sector_t, void __pmem **,
+ int (*direct_access) (struct block_device *, sector_t, void **,
unsigned long *);
int (*media_changed) (struct gendisk *);
void (*unlock_native_capacity) (struct gendisk *);
only state using a flag in the info block.
-5. In-kernel usage
-==================
+5. Usage
+========
-Any block driver that supports byte granularity IO to the storage may register
-with the BTT. It will have to provide the rw_bytes interface in its
-block_device_operations struct:
+The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
+(pmem, or blk mode). The easiest way to set up such a namespace is using the
+'ndctl' utility [1]:
- int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw);
+For example, the ndctl command line to setup a btt with a 4k sector size is:
-It may register with the BTT after it adds its own gendisk, using btt_init:
+ ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
- struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize,
- u32 lbasize, u8 uuid[], int maxlane);
+See ndctl create-namespace --help for more options.
-note that maxlane is the maximum amount of concurrency the driver wishes to
-allow the BTT to use.
-
-The BTT 'disk' appears as a stacked block device that grabs the underlying block
-device in the O_EXCL mode.
-
-When the driver wishes to remove the backing disk, it should similarly call
-btt_fini using the same struct btt* handle that was provided to it by btt_init.
-
- void btt_fini(struct btt *btt);
+[1]: https://github.com/pmem/ndctl
*/
static long
axon_ram_direct_access(struct block_device *device, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
struct axon_ram_bank *bank = device->bd_disk->private_data;
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
- *kaddr = (void __pmem __force *) bank->io_addr + offset;
+ *kaddr = (void *) bank->io_addr + offset;
*pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
return bank->size - offset;
}
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
* @n: length of the copy in bytes
*
* Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
+ * a subsequent pmem driver flush operation will drain posted write queues.
*/
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
- size_t n)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
- int unwritten;
+ int rem;
/*
* We are copying between two kernel buffers, if
* fault) we would have already reported a general protection fault
* before the WARN+BUG.
*/
- unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
- (void __user *) src, n);
- if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
- __func__, dst, src, unwritten))
+ rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
+ if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
+ __func__, dst, src, rem))
BUG();
}
-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
- size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
- return memcpy_mcsafe(dst, (void __force *) src, n);
- memcpy(dst, (void __force *) src, n);
+ return memcpy_mcsafe(dst, src, n);
+ memcpy(dst, src, n);
return 0;
}
-/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
- /*
- * wmb() to 'sfence' all previous writes such that they are
- * architecturally visible to 'pcommit'. Note, that we've
- * already arranged for pmem writes to avoid the cache via
- * arch_memcpy_to_pmem().
- */
- wmb();
- pcommit_sfence();
-}
-
/**
* arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
- * instruction. This function requires explicit ordering with an
- * arch_wmb_pmem() call.
+ * instruction.
*/
-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
- void *vaddr = (void __force *)addr;
- void *vend = vaddr + size;
+ void *vend = addr + size;
void *p;
- for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+ for (p = (void *)((unsigned long)addr & ~clflush_mask);
p < vend; p += x86_clflush_size)
clwb(p);
}
* @i: iterator with source data
*
* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
*/
-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
- void *vaddr = (void __force *)addr;
size_t len;
/* TODO: skip the write-back by always using non-temporal stores */
- len = copy_from_iter_nocache(vaddr, bytes, i);
+ len = copy_from_iter_nocache(addr, bytes, i);
if (__iter_needs_pmem_wb(i))
arch_wb_cache_pmem(addr, bytes);
* @size: number of bytes to zero
*
* Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
*/
-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
{
- void *vaddr = (void __force *)addr;
-
- memset(vaddr, 0, size);
+ memset(addr, 0, size);
arch_wb_cache_pmem(addr, size);
}
-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
{
- clflush_cache_range((void __force *) addr, size);
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
- /*
- * We require that wmb() be an 'sfence', that is only guaranteed on
- * 64-bit builds
- */
- return static_cpu_has(X86_FEATURE_PCOMMIT);
+ clflush_cache_range(addr, size);
}
#endif /* CONFIG_ARCH_HAS_PMEM_API */
#endif /* __ASM_X86_PMEM_H__ */
: [pax] "a" (p));
}
-/**
- * pcommit_sfence() - persistent commit and fence
- *
- * The PCOMMIT instruction ensures that data that has been flushed from the
- * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
- * memory and is durable on the DIMM. The primary use case for this is
- * persistent memory.
- *
- * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
- * with appropriate fencing.
- *
- * Example:
- * void flush_and_commit_buffer(void *vaddr, unsigned int size)
- * {
- * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
- * void *vend = vaddr + size;
- * void *p;
- *
- * for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
- * p < vend; p += boot_cpu_data.x86_clflush_size)
- * clwb(p);
- *
- * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
- * // MFENCE via mb() also works
- * wmb();
- *
- * // PCOMMIT and the required SFENCE for ordering
- * pcommit_sfence();
- * }
- *
- * After this function completes the data pointed to by 'vaddr' has been
- * accepted to memory and will be durable if the 'vaddr' points to persistent
- * memory.
- *
- * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
- * things we include both the PCOMMIT and the required SFENCE in the
- * alternatives generated by pcommit_sfence().
- */
-static inline void pcommit_sfence(void)
-{
- alternative(ASM_NOP7,
- ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
- "sfence",
- X86_FEATURE_PCOMMIT);
-}
-
#define nop() asm volatile ("nop")
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
-#define SECONDARY_EXEC_PCOMMIT 0x00200000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
-#define EXIT_REASON_PCOMMIT 65
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
{ EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
- { EXIT_REASON_XRSTORS, "XRSTORS" }, \
- { EXIT_REASON_PCOMMIT, "PCOMMIT" }
+ { EXIT_REASON_XRSTORS, "XRSTORS" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
- F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
+ F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
-static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
-}
-
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING |
- SECONDARY_EXEC_XSAVES |
- SECONDARY_EXEC_PCOMMIT;
+ SECONDARY_EXEC_XSAVES;
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_PCOMMIT |
SECONDARY_EXEC_TSC_SCALING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
if (!enable_pml)
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
- /* Currently, we allow L1 guest to directly run pcommit instruction. */
- exec_control &= ~SECONDARY_EXEC_PCOMMIT;
-
return exec_control;
}
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
- if (cpu_has_secondary_exec_ctrls())
+ if (cpu_has_secondary_exec_ctrls()) {
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
vmx_secondary_exec_control(vmx));
+ }
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
vmcs_write64(EOI_EXIT_BITMAP0, 0);
return 1;
}
-static int handle_pcommit(struct kvm_vcpu *vcpu)
-{
- /* we never catch pcommit instruct for L1 guest. */
- WARN_ON(1);
- return 1;
-}
-
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
- [EXIT_REASON_PCOMMIT] = handle_pcommit,
};
static const int kvm_vmx_max_exit_handlers =
* the XSS exit bitmap in vmcs12.
*/
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
- case EXIT_REASON_PCOMMIT:
- return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
default:
return true;
}
if (cpu_has_secondary_exec_ctrls())
vmcs_set_secondary_exec_control(secondary_exec_ctl);
-
- if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
- if (guest_cpuid_has_pcommit(vcpu))
- vmx->nested.nested_vmx_secondary_ctls_high |=
- SECONDARY_EXEC_PCOMMIT;
- else
- vmx->nested.nested_vmx_secondary_ctls_high &=
- ~SECONDARY_EXEC_PCOMMIT;
- }
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_PCOMMIT);
+ SECONDARY_EXEC_APIC_REGISTER_VIRT);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12->secondary_vm_exec_control;
4: XSAVE
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
EndTable
GrpTable: Grp16
If you are unsure what to do, do not enable this option.
-config ACPI_NFIT
- tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
- depends on PHYS_ADDR_T_64BIT
- depends on BLK_DEV
- depends on ARCH_HAS_MMIO_FLUSH
- select LIBNVDIMM
- help
- Infrastructure to probe ACPI 6 compliant platforms for
- NVDIMMs (NFIT) and register a libnvdimm device tree. In
- addition to storage devices this also enables libnvdimm to pass
- ACPI._DSM messages for platform/dimm configuration.
-
- To compile this driver as a module, choose M here:
- the module will be called nfit.
-
-config ACPI_NFIT_DEBUG
- bool "NFIT DSM debug"
- depends on ACPI_NFIT
- depends on DYNAMIC_DEBUG
- default n
- help
- Enabling this option causes the nfit driver to dump the
- input and output buffers of _DSM operations on the ACPI0012
- device and its children. This can be very verbose, so leave
- it disabled unless you are debugging a hardware / firmware
- issue.
+source "drivers/acpi/nfit/Kconfig"
source "drivers/acpi/apei/Kconfig"
source "drivers/acpi/dptf/Kconfig"
obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
obj-$(CONFIG_ACPI) += container.o
obj-$(CONFIG_ACPI_THERMAL) += thermal.o
-obj-$(CONFIG_ACPI_NFIT) += nfit.o
+obj-$(CONFIG_ACPI_NFIT) += nfit/
obj-$(CONFIG_ACPI) += acpi_memhotplug.o
obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
obj-$(CONFIG_ACPI_BATTERY) += battery.o
+++ /dev/null
-/*
- * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <linux/list_sort.h>
-#include <linux/libnvdimm.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/ndctl.h>
-#include <linux/delay.h>
-#include <linux/list.h>
-#include <linux/acpi.h>
-#include <linux/sort.h>
-#include <linux/pmem.h>
-#include <linux/io.h>
-#include <linux/nd.h>
-#include <asm/cacheflush.h>
-#include "nfit.h"
-
-/*
- * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
- * irrelevant.
- */
-#include <linux/io-64-nonatomic-hi-lo.h>
-
-static bool force_enable_dimms;
-module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
-
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
-/* after three payloads of overflow, it's dead jim */
-static unsigned int scrub_overflow_abort = 3;
-module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_overflow_abort,
- "Number of times we overflow ARS results before abort");
-
-static bool disable_vendor_specific;
-module_param(disable_vendor_specific, bool, S_IRUGO);
-MODULE_PARM_DESC(disable_vendor_specific,
- "Limit commands to the publicly specified set\n");
-
-static struct workqueue_struct *nfit_wq;
-
-struct nfit_table_prev {
- struct list_head spas;
- struct list_head memdevs;
- struct list_head dcrs;
- struct list_head bdws;
- struct list_head idts;
- struct list_head flushes;
-};
-
-static u8 nfit_uuid[NFIT_UUID_MAX][16];
-
-const u8 *to_nfit_uuid(enum nfit_uuids id)
-{
- return nfit_uuid[id];
-}
-EXPORT_SYMBOL(to_nfit_uuid);
-
-static struct acpi_nfit_desc *to_acpi_nfit_desc(
- struct nvdimm_bus_descriptor *nd_desc)
-{
- return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
-}
-
-static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
-{
- struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-
- /*
- * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
- * acpi_device.
- */
- if (!nd_desc->provider_name
- || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
- return NULL;
-
- return to_acpi_device(acpi_desc->dev);
-}
-
-static int xlat_status(void *buf, unsigned int cmd)
-{
- struct nd_cmd_clear_error *clear_err;
- struct nd_cmd_ars_status *ars_status;
- struct nd_cmd_ars_start *ars_start;
- struct nd_cmd_ars_cap *ars_cap;
- u16 flags;
-
- switch (cmd) {
- case ND_CMD_ARS_CAP:
- ars_cap = buf;
- if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
- return -ENOTTY;
-
- /* Command failed */
- if (ars_cap->status & 0xffff)
- return -EIO;
-
- /* No supported scan types for this range */
- flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
- if ((ars_cap->status >> 16 & flags) == 0)
- return -ENOTTY;
- break;
- case ND_CMD_ARS_START:
- ars_start = buf;
- /* ARS is in progress */
- if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
- return -EBUSY;
-
- /* Command failed */
- if (ars_start->status & 0xffff)
- return -EIO;
- break;
- case ND_CMD_ARS_STATUS:
- ars_status = buf;
- /* Command failed */
- if (ars_status->status & 0xffff)
- return -EIO;
- /* Check extended status (Upper two bytes) */
- if (ars_status->status == NFIT_ARS_STATUS_DONE)
- return 0;
-
- /* ARS is in progress */
- if (ars_status->status == NFIT_ARS_STATUS_BUSY)
- return -EBUSY;
-
- /* No ARS performed for the current boot */
- if (ars_status->status == NFIT_ARS_STATUS_NONE)
- return -EAGAIN;
-
- /*
- * ARS interrupted, either we overflowed or some other
- * agent wants the scan to stop. If we didn't overflow
- * then just continue with the returned results.
- */
- if (ars_status->status == NFIT_ARS_STATUS_INTR) {
- if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
- return -ENOSPC;
- return 0;
- }
-
- /* Unknown status */
- if (ars_status->status >> 16)
- return -EIO;
- break;
- case ND_CMD_CLEAR_ERROR:
- clear_err = buf;
- if (clear_err->status & 0xffff)
- return -EIO;
- if (!clear_err->cleared)
- return -EIO;
- if (clear_err->length > clear_err->cleared)
- return clear_err->cleared;
- break;
- default:
- break;
- }
-
- return 0;
-}
-
-static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
- struct nvdimm *nvdimm, unsigned int cmd, void *buf,
- unsigned int buf_len, int *cmd_rc)
-{
- struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
- union acpi_object in_obj, in_buf, *out_obj;
- const struct nd_cmd_desc *desc = NULL;
- struct device *dev = acpi_desc->dev;
- struct nd_cmd_pkg *call_pkg = NULL;
- const char *cmd_name, *dimm_name;
- unsigned long cmd_mask, dsm_mask;
- acpi_handle handle;
- unsigned int func;
- const u8 *uuid;
- u32 offset;
- int rc, i;
-
- func = cmd;
- if (cmd == ND_CMD_CALL) {
- call_pkg = buf;
- func = call_pkg->nd_command;
- }
-
- if (nvdimm) {
- struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
- struct acpi_device *adev = nfit_mem->adev;
-
- if (!adev)
- return -ENOTTY;
- if (call_pkg && nfit_mem->family != call_pkg->nd_family)
- return -ENOTTY;
-
- dimm_name = nvdimm_name(nvdimm);
- cmd_name = nvdimm_cmd_name(cmd);
- cmd_mask = nvdimm_cmd_mask(nvdimm);
- dsm_mask = nfit_mem->dsm_mask;
- desc = nd_cmd_dimm_desc(cmd);
- uuid = to_nfit_uuid(nfit_mem->family);
- handle = adev->handle;
- } else {
- struct acpi_device *adev = to_acpi_dev(acpi_desc);
-
- cmd_name = nvdimm_bus_cmd_name(cmd);
- cmd_mask = nd_desc->cmd_mask;
- dsm_mask = cmd_mask;
- desc = nd_cmd_bus_desc(cmd);
- uuid = to_nfit_uuid(NFIT_DEV_BUS);
- handle = adev->handle;
- dimm_name = "bus";
- }
-
- if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
- return -ENOTTY;
-
- if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
- return -ENOTTY;
-
- in_obj.type = ACPI_TYPE_PACKAGE;
- in_obj.package.count = 1;
- in_obj.package.elements = &in_buf;
- in_buf.type = ACPI_TYPE_BUFFER;
- in_buf.buffer.pointer = buf;
- in_buf.buffer.length = 0;
-
- /* libnvdimm has already validated the input envelope */
- for (i = 0; i < desc->in_num; i++)
- in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
- i, buf);
-
- if (call_pkg) {
- /* skip over package wrapper */
- in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
- in_buf.buffer.length = call_pkg->nd_size_in;
- }
-
- if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
- dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
- __func__, dimm_name, cmd, func,
- in_buf.buffer.length);
- print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4,
- in_buf.buffer.pointer,
- min_t(u32, 256, in_buf.buffer.length), true);
- }
-
- out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
- if (!out_obj) {
- dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
- cmd_name);
- return -EINVAL;
- }
-
- if (call_pkg) {
- call_pkg->nd_fw_size = out_obj->buffer.length;
- memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
- out_obj->buffer.pointer,
- min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
-
- ACPI_FREE(out_obj);
- /*
- * Need to support FW function w/o known size in advance.
- * Caller can determine required size based upon nd_fw_size.
- * If we return an error (like elsewhere) then caller wouldn't
- * be able to rely upon data returned to make calculation.
- */
- return 0;
- }
-
- if (out_obj->package.type != ACPI_TYPE_BUFFER) {
- dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
- __func__, dimm_name, cmd_name, out_obj->type);
- rc = -EINVAL;
- goto out;
- }
-
- if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
- dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
- dimm_name, cmd_name, out_obj->buffer.length);
- print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
- 4, out_obj->buffer.pointer, min_t(u32, 128,
- out_obj->buffer.length), true);
- }
-
- for (i = 0, offset = 0; i < desc->out_num; i++) {
- u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
- (u32 *) out_obj->buffer.pointer);
-
- if (offset + out_size > out_obj->buffer.length) {
- dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
- __func__, dimm_name, cmd_name, i);
- break;
- }
-
- if (in_buf.buffer.length + offset + out_size > buf_len) {
- dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
- __func__, dimm_name, cmd_name, i);
- rc = -ENXIO;
- goto out;
- }
- memcpy(buf + in_buf.buffer.length + offset,
- out_obj->buffer.pointer + offset, out_size);
- offset += out_size;
- }
- if (offset + in_buf.buffer.length < buf_len) {
- if (i >= 1) {
- /*
- * status valid, return the number of bytes left
- * unfilled in the output buffer
- */
- rc = buf_len - offset - in_buf.buffer.length;
- if (cmd_rc)
- *cmd_rc = xlat_status(buf, cmd);
- } else {
- dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
- __func__, dimm_name, cmd_name, buf_len,
- offset);
- rc = -ENXIO;
- }
- } else {
- rc = 0;
- if (cmd_rc)
- *cmd_rc = xlat_status(buf, cmd);
- }
-
- out:
- ACPI_FREE(out_obj);
-
- return rc;
-}
-
-static const char *spa_type_name(u16 type)
-{
- static const char *to_name[] = {
- [NFIT_SPA_VOLATILE] = "volatile",
- [NFIT_SPA_PM] = "pmem",
- [NFIT_SPA_DCR] = "dimm-control-region",
- [NFIT_SPA_BDW] = "block-data-window",
- [NFIT_SPA_VDISK] = "volatile-disk",
- [NFIT_SPA_VCD] = "volatile-cd",
- [NFIT_SPA_PDISK] = "persistent-disk",
- [NFIT_SPA_PCD] = "persistent-cd",
-
- };
-
- if (type > NFIT_SPA_PCD)
- return "unknown";
-
- return to_name[type];
-}
-
-static int nfit_spa_type(struct acpi_nfit_system_address *spa)
-{
- int i;
-
- for (i = 0; i < NFIT_UUID_MAX; i++)
- if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
- return i;
- return -1;
-}
-
-static bool add_spa(struct acpi_nfit_desc *acpi_desc,
- struct nfit_table_prev *prev,
- struct acpi_nfit_system_address *spa)
-{
- size_t length = min_t(size_t, sizeof(*spa), spa->header.length);
- struct device *dev = acpi_desc->dev;
- struct nfit_spa *nfit_spa;
-
- list_for_each_entry(nfit_spa, &prev->spas, list) {
- if (memcmp(nfit_spa->spa, spa, length) == 0) {
- list_move_tail(&nfit_spa->list, &acpi_desc->spas);
- return true;
- }
- }
-
- nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), GFP_KERNEL);
- if (!nfit_spa)
- return false;
- INIT_LIST_HEAD(&nfit_spa->list);
- nfit_spa->spa = spa;
- list_add_tail(&nfit_spa->list, &acpi_desc->spas);
- dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
- spa->range_index,
- spa_type_name(nfit_spa_type(spa)));
- return true;
-}
-
-static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
- struct nfit_table_prev *prev,
- struct acpi_nfit_memory_map *memdev)
-{
- size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length);
- struct device *dev = acpi_desc->dev;
- struct nfit_memdev *nfit_memdev;
-
- list_for_each_entry(nfit_memdev, &prev->memdevs, list)
- if (memcmp(nfit_memdev->memdev, memdev, length) == 0) {
- list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
- return true;
- }
-
- nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev), GFP_KERNEL);
- if (!nfit_memdev)
- return false;
- INIT_LIST_HEAD(&nfit_memdev->list);
- nfit_memdev->memdev = memdev;
- list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
- dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
- __func__, memdev->device_handle, memdev->range_index,
- memdev->region_index);
- return true;
-}
-
-static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
- struct nfit_table_prev *prev,
- struct acpi_nfit_control_region *dcr)
-{
- size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length);
- struct device *dev = acpi_desc->dev;
- struct nfit_dcr *nfit_dcr;
-
- list_for_each_entry(nfit_dcr, &prev->dcrs, list)
- if (memcmp(nfit_dcr->dcr, dcr, length) == 0) {
- list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
- return true;
- }
-
- nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), GFP_KERNEL);
- if (!nfit_dcr)
- return false;
- INIT_LIST_HEAD(&nfit_dcr->list);
- nfit_dcr->dcr = dcr;
- list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
- dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
- dcr->region_index, dcr->windows);
- return true;
-}
-
-static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
- struct nfit_table_prev *prev,
- struct acpi_nfit_data_region *bdw)
-{
- size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length);
- struct device *dev = acpi_desc->dev;
- struct nfit_bdw *nfit_bdw;
-
- list_for_each_entry(nfit_bdw, &prev->bdws, list)
- if (memcmp(nfit_bdw->bdw, bdw, length) == 0) {
- list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
- return true;
- }
-
- nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), GFP_KERNEL);
- if (!nfit_bdw)
- return false;
- INIT_LIST_HEAD(&nfit_bdw->list);
- nfit_bdw->bdw = bdw;
- list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
- dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
- bdw->region_index, bdw->windows);
- return true;
-}
-
-static bool add_idt(struct acpi_nfit_desc *acpi_desc,
- struct nfit_table_prev *prev,
- struct acpi_nfit_interleave *idt)
-{
- size_t length = min_t(size_t, sizeof(*idt), idt->header.length);
- struct device *dev = acpi_desc->dev;
- struct nfit_idt *nfit_idt;
-
- list_for_each_entry(nfit_idt, &prev->idts, list)
- if (memcmp(nfit_idt->idt, idt, length) == 0) {
- list_move_tail(&nfit_idt->list, &acpi_desc->idts);
- return true;
- }
-
- nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), GFP_KERNEL);
- if (!nfit_idt)
- return false;
- INIT_LIST_HEAD(&nfit_idt->list);
- nfit_idt->idt = idt;
- list_add_tail(&nfit_idt->list, &acpi_desc->idts);
- dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
- idt->interleave_index, idt->line_count);
- return true;
-}
-
-static bool add_flush(struct acpi_nfit_desc *acpi_desc,
- struct nfit_table_prev *prev,
- struct acpi_nfit_flush_address *flush)
-{
- size_t length = min_t(size_t, sizeof(*flush), flush->header.length);
- struct device *dev = acpi_desc->dev;
- struct nfit_flush *nfit_flush;
-
- list_for_each_entry(nfit_flush, &prev->flushes, list)
- if (memcmp(nfit_flush->flush, flush, length) == 0) {
- list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
- return true;
- }
-
- nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), GFP_KERNEL);
- if (!nfit_flush)
- return false;
- INIT_LIST_HEAD(&nfit_flush->list);
- nfit_flush->flush = flush;
- list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
- dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
- flush->device_handle, flush->hint_count);
- return true;
-}
-
-static void *add_table(struct acpi_nfit_desc *acpi_desc,
- struct nfit_table_prev *prev, void *table, const void *end)
-{
- struct device *dev = acpi_desc->dev;
- struct acpi_nfit_header *hdr;
- void *err = ERR_PTR(-ENOMEM);
-
- if (table >= end)
- return NULL;
-
- hdr = table;
- if (!hdr->length) {
- dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
- hdr->type);
- return NULL;
- }
-
- switch (hdr->type) {
- case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
- if (!add_spa(acpi_desc, prev, table))
- return err;
- break;
- case ACPI_NFIT_TYPE_MEMORY_MAP:
- if (!add_memdev(acpi_desc, prev, table))
- return err;
- break;
- case ACPI_NFIT_TYPE_CONTROL_REGION:
- if (!add_dcr(acpi_desc, prev, table))
- return err;
- break;
- case ACPI_NFIT_TYPE_DATA_REGION:
- if (!add_bdw(acpi_desc, prev, table))
- return err;
- break;
- case ACPI_NFIT_TYPE_INTERLEAVE:
- if (!add_idt(acpi_desc, prev, table))
- return err;
- break;
- case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
- if (!add_flush(acpi_desc, prev, table))
- return err;
- break;
- case ACPI_NFIT_TYPE_SMBIOS:
- dev_dbg(dev, "%s: smbios\n", __func__);
- break;
- default:
- dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
- break;
- }
-
- return table + hdr->length;
-}
-
-static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
- struct nfit_mem *nfit_mem)
-{
- u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
- u16 dcr = nfit_mem->dcr->region_index;
- struct nfit_spa *nfit_spa;
-
- list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- u16 range_index = nfit_spa->spa->range_index;
- int type = nfit_spa_type(nfit_spa->spa);
- struct nfit_memdev *nfit_memdev;
-
- if (type != NFIT_SPA_BDW)
- continue;
-
- list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
- if (nfit_memdev->memdev->range_index != range_index)
- continue;
- if (nfit_memdev->memdev->device_handle != device_handle)
- continue;
- if (nfit_memdev->memdev->region_index != dcr)
- continue;
-
- nfit_mem->spa_bdw = nfit_spa->spa;
- return;
- }
- }
-
- dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
- nfit_mem->spa_dcr->range_index);
- nfit_mem->bdw = NULL;
-}
-
-static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
- struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
-{
- u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
- struct nfit_memdev *nfit_memdev;
- struct nfit_flush *nfit_flush;
- struct nfit_bdw *nfit_bdw;
- struct nfit_idt *nfit_idt;
- u16 idt_idx, range_index;
-
- list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
- if (nfit_bdw->bdw->region_index != dcr)
- continue;
- nfit_mem->bdw = nfit_bdw->bdw;
- break;
- }
-
- if (!nfit_mem->bdw)
- return;
-
- nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
-
- if (!nfit_mem->spa_bdw)
- return;
-
- range_index = nfit_mem->spa_bdw->range_index;
- list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
- if (nfit_memdev->memdev->range_index != range_index ||
- nfit_memdev->memdev->region_index != dcr)
- continue;
- nfit_mem->memdev_bdw = nfit_memdev->memdev;
- idt_idx = nfit_memdev->memdev->interleave_index;
- list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
- if (nfit_idt->idt->interleave_index != idt_idx)
- continue;
- nfit_mem->idt_bdw = nfit_idt->idt;
- break;
- }
-
- list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
- if (nfit_flush->flush->device_handle !=
- nfit_memdev->memdev->device_handle)
- continue;
- nfit_mem->nfit_flush = nfit_flush;
- break;
- }
- break;
- }
-}
-
-static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
- struct acpi_nfit_system_address *spa)
-{
- struct nfit_mem *nfit_mem, *found;
- struct nfit_memdev *nfit_memdev;
- int type = nfit_spa_type(spa);
-
- switch (type) {
- case NFIT_SPA_DCR:
- case NFIT_SPA_PM:
- break;
- default:
- return 0;
- }
-
- list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
- struct nfit_dcr *nfit_dcr;
- u32 device_handle;
- u16 dcr;
-
- if (nfit_memdev->memdev->range_index != spa->range_index)
- continue;
- found = NULL;
- dcr = nfit_memdev->memdev->region_index;
- device_handle = nfit_memdev->memdev->device_handle;
- list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
- if (__to_nfit_memdev(nfit_mem)->device_handle
- == device_handle) {
- found = nfit_mem;
- break;
- }
-
- if (found)
- nfit_mem = found;
- else {
- nfit_mem = devm_kzalloc(acpi_desc->dev,
- sizeof(*nfit_mem), GFP_KERNEL);
- if (!nfit_mem)
- return -ENOMEM;
- INIT_LIST_HEAD(&nfit_mem->list);
- nfit_mem->acpi_desc = acpi_desc;
- list_add(&nfit_mem->list, &acpi_desc->dimms);
- }
-
- list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
- if (nfit_dcr->dcr->region_index != dcr)
- continue;
- /*
- * Record the control region for the dimm. For
- * the ACPI 6.1 case, where there are separate
- * control regions for the pmem vs blk
- * interfaces, be sure to record the extended
- * blk details.
- */
- if (!nfit_mem->dcr)
- nfit_mem->dcr = nfit_dcr->dcr;
- else if (nfit_mem->dcr->windows == 0
- && nfit_dcr->dcr->windows)
- nfit_mem->dcr = nfit_dcr->dcr;
- break;
- }
-
- if (dcr && !nfit_mem->dcr) {
- dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
- spa->range_index, dcr);
- return -ENODEV;
- }
-
- if (type == NFIT_SPA_DCR) {
- struct nfit_idt *nfit_idt;
- u16 idt_idx;
-
- /* multiple dimms may share a SPA when interleaved */
- nfit_mem->spa_dcr = spa;
- nfit_mem->memdev_dcr = nfit_memdev->memdev;
- idt_idx = nfit_memdev->memdev->interleave_index;
- list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
- if (nfit_idt->idt->interleave_index != idt_idx)
- continue;
- nfit_mem->idt_dcr = nfit_idt->idt;
- break;
- }
- nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
- } else {
- /*
- * A single dimm may belong to multiple SPA-PM
- * ranges, record at least one in addition to
- * any SPA-DCR range.
- */
- nfit_mem->memdev_pmem = nfit_memdev->memdev;
- }
- }
-
- return 0;
-}
-
-static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
-{
- struct nfit_mem *a = container_of(_a, typeof(*a), list);
- struct nfit_mem *b = container_of(_b, typeof(*b), list);
- u32 handleA, handleB;
-
- handleA = __to_nfit_memdev(a)->device_handle;
- handleB = __to_nfit_memdev(b)->device_handle;
- if (handleA < handleB)
- return -1;
- else if (handleA > handleB)
- return 1;
- return 0;
-}
-
-static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
-{
- struct nfit_spa *nfit_spa;
-
- /*
- * For each SPA-DCR or SPA-PMEM address range find its
- * corresponding MEMDEV(s). From each MEMDEV find the
- * corresponding DCR. Then, if we're operating on a SPA-DCR,
- * try to find a SPA-BDW and a corresponding BDW that references
- * the DCR. Throw it all into an nfit_mem object. Note, that
- * BDWs are optional.
- */
- list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- int rc;
-
- rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
- if (rc)
- return rc;
- }
-
- list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
-
- return 0;
-}
-
-static ssize_t revision_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
- struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
- struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
-
- return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
-}
-static DEVICE_ATTR_RO(revision);
-
-static struct attribute *acpi_nfit_attributes[] = {
- &dev_attr_revision.attr,
- NULL,
-};
-
-static struct attribute_group acpi_nfit_attribute_group = {
- .name = "nfit",
- .attrs = acpi_nfit_attributes,
-};
-
-static const struct attribute_group *acpi_nfit_attribute_groups[] = {
- &nvdimm_bus_attribute_group,
- &acpi_nfit_attribute_group,
- NULL,
-};
-
-static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
-{
- struct nvdimm *nvdimm = to_nvdimm(dev);
- struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
- return __to_nfit_memdev(nfit_mem);
-}
-
-static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
-{
- struct nvdimm *nvdimm = to_nvdimm(dev);
- struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
- return nfit_mem->dcr;
-}
-
-static ssize_t handle_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
-
- return sprintf(buf, "%#x\n", memdev->device_handle);
-}
-static DEVICE_ATTR_RO(handle);
-
-static ssize_t phys_id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
-
- return sprintf(buf, "%#x\n", memdev->physical_id);
-}
-static DEVICE_ATTR_RO(phys_id);
-
-static ssize_t vendor_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
-}
-static DEVICE_ATTR_RO(vendor);
-
-static ssize_t rev_id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
-}
-static DEVICE_ATTR_RO(rev_id);
-
-static ssize_t device_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
-}
-static DEVICE_ATTR_RO(device);
-
-static ssize_t subsystem_vendor_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
-}
-static DEVICE_ATTR_RO(subsystem_vendor);
-
-static ssize_t subsystem_rev_id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- return sprintf(buf, "0x%04x\n",
- be16_to_cpu(dcr->subsystem_revision_id));
-}
-static DEVICE_ATTR_RO(subsystem_rev_id);
-
-static ssize_t subsystem_device_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
-}
-static DEVICE_ATTR_RO(subsystem_device);
-
-static int num_nvdimm_formats(struct nvdimm *nvdimm)
-{
- struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
- int formats = 0;
-
- if (nfit_mem->memdev_pmem)
- formats++;
- if (nfit_mem->memdev_bdw)
- formats++;
- return formats;
-}
-
-static ssize_t format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
-}
-static DEVICE_ATTR_RO(format);
-
-static ssize_t format1_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- u32 handle;
- ssize_t rc = -ENXIO;
- struct nfit_mem *nfit_mem;
- struct nfit_memdev *nfit_memdev;
- struct acpi_nfit_desc *acpi_desc;
- struct nvdimm *nvdimm = to_nvdimm(dev);
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- nfit_mem = nvdimm_provider_data(nvdimm);
- acpi_desc = nfit_mem->acpi_desc;
- handle = to_nfit_memdev(dev)->device_handle;
-
- /* assumes DIMMs have at most 2 published interface codes */
- mutex_lock(&acpi_desc->init_mutex);
- list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
- struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
- struct nfit_dcr *nfit_dcr;
-
- if (memdev->device_handle != handle)
- continue;
-
- list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
- if (nfit_dcr->dcr->region_index != memdev->region_index)
- continue;
- if (nfit_dcr->dcr->code == dcr->code)
- continue;
- rc = sprintf(buf, "0x%04x\n",
- le16_to_cpu(nfit_dcr->dcr->code));
- break;
- }
- if (rc != ENXIO)
- break;
- }
- mutex_unlock(&acpi_desc->init_mutex);
- return rc;
-}
-static DEVICE_ATTR_RO(format1);
-
-static ssize_t formats_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvdimm *nvdimm = to_nvdimm(dev);
-
- return sprintf(buf, "%d\n", num_nvdimm_formats(nvdimm));
-}
-static DEVICE_ATTR_RO(formats);
-
-static ssize_t serial_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- return sprintf(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
-}
-static DEVICE_ATTR_RO(serial);
-
-static ssize_t family_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvdimm *nvdimm = to_nvdimm(dev);
- struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
- if (nfit_mem->family < 0)
- return -ENXIO;
- return sprintf(buf, "%d\n", nfit_mem->family);
-}
-static DEVICE_ATTR_RO(family);
-
-static ssize_t dsm_mask_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvdimm *nvdimm = to_nvdimm(dev);
- struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-
- if (nfit_mem->family < 0)
- return -ENXIO;
- return sprintf(buf, "%#lx\n", nfit_mem->dsm_mask);
-}
-static DEVICE_ATTR_RO(dsm_mask);
-
-static ssize_t flags_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- u16 flags = to_nfit_memdev(dev)->flags;
-
- return sprintf(buf, "%s%s%s%s%s\n",
- flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
- flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
- flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
- flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
- flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "");
-}
-static DEVICE_ATTR_RO(flags);
-
-static ssize_t id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
-
- if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
- return sprintf(buf, "%04x-%02x-%04x-%08x\n",
- be16_to_cpu(dcr->vendor_id),
- dcr->manufacturing_location,
- be16_to_cpu(dcr->manufacturing_date),
- be32_to_cpu(dcr->serial_number));
- else
- return sprintf(buf, "%04x-%08x\n",
- be16_to_cpu(dcr->vendor_id),
- be32_to_cpu(dcr->serial_number));
-}
-static DEVICE_ATTR_RO(id);
-
-static struct attribute *acpi_nfit_dimm_attributes[] = {
- &dev_attr_handle.attr,
- &dev_attr_phys_id.attr,
- &dev_attr_vendor.attr,
- &dev_attr_device.attr,
- &dev_attr_rev_id.attr,
- &dev_attr_subsystem_vendor.attr,
- &dev_attr_subsystem_device.attr,
- &dev_attr_subsystem_rev_id.attr,
- &dev_attr_format.attr,
- &dev_attr_formats.attr,
- &dev_attr_format1.attr,
- &dev_attr_serial.attr,
- &dev_attr_flags.attr,
- &dev_attr_id.attr,
- &dev_attr_family.attr,
- &dev_attr_dsm_mask.attr,
- NULL,
-};
-
-static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
- struct attribute *a, int n)
-{
- struct device *dev = container_of(kobj, struct device, kobj);
- struct nvdimm *nvdimm = to_nvdimm(dev);
-
- if (!to_nfit_dcr(dev))
- return 0;
- if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
- return 0;
- return a->mode;
-}
-
-static struct attribute_group acpi_nfit_dimm_attribute_group = {
- .name = "nfit",
- .attrs = acpi_nfit_dimm_attributes,
- .is_visible = acpi_nfit_dimm_attr_visible,
-};
-
-static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
- &nvdimm_attribute_group,
- &nd_device_attribute_group,
- &acpi_nfit_dimm_attribute_group,
- NULL,
-};
-
-static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
- u32 device_handle)
-{
- struct nfit_mem *nfit_mem;
-
- list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
- if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
- return nfit_mem->nvdimm;
-
- return NULL;
-}
-
-static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
- struct nfit_mem *nfit_mem, u32 device_handle)
-{
- struct acpi_device *adev, *adev_dimm;
- struct device *dev = acpi_desc->dev;
- unsigned long dsm_mask;
- const u8 *uuid;
- int i;
-
- /* nfit test assumes 1:1 relationship between commands and dsms */
- nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
- nfit_mem->family = NVDIMM_FAMILY_INTEL;
- adev = to_acpi_dev(acpi_desc);
- if (!adev)
- return 0;
-
- adev_dimm = acpi_find_child_device(adev, device_handle, false);
- nfit_mem->adev = adev_dimm;
- if (!adev_dimm) {
- dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
- device_handle);
- return force_enable_dimms ? 0 : -ENODEV;
- }
-
- /*
- * Until standardization materializes we need to consider up to 3
- * different command sets. Note, that checking for function0 (bit0)
- * tells us if any commands are reachable through this uuid.
- */
- for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++)
- if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
- break;
-
- /* limit the supported commands to those that are publicly documented */
- nfit_mem->family = i;
- if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
- dsm_mask = 0x3fe;
- if (disable_vendor_specific)
- dsm_mask &= ~(1 << ND_CMD_VENDOR);
- } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1)
- dsm_mask = 0x1c3c76;
- else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
- dsm_mask = 0x1fe;
- if (disable_vendor_specific)
- dsm_mask &= ~(1 << 8);
- } else {
- dev_dbg(dev, "unknown dimm command family\n");
- nfit_mem->family = -1;
- /* DSMs are optional, continue loading the driver... */
- return 0;
- }
-
- uuid = to_nfit_uuid(nfit_mem->family);
- for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
- if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
- set_bit(i, &nfit_mem->dsm_mask);
-
- return 0;
-}
-
-static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
-{
- struct nfit_mem *nfit_mem;
- int dimm_count = 0;
-
- list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
- unsigned long flags = 0, cmd_mask;
- struct nvdimm *nvdimm;
- u32 device_handle;
- u16 mem_flags;
- int rc;
-
- device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
- nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
- if (nvdimm) {
- dimm_count++;
- continue;
- }
-
- if (nfit_mem->bdw && nfit_mem->memdev_pmem)
- flags |= NDD_ALIASING;
-
- mem_flags = __to_nfit_memdev(nfit_mem)->flags;
- if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
- flags |= NDD_UNARMED;
-
- rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
- if (rc)
- continue;
-
- /*
- * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
- * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
- * userspace interface.
- */
- cmd_mask = 1UL << ND_CMD_CALL;
- if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
- cmd_mask |= nfit_mem->dsm_mask;
-
- nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
- acpi_nfit_dimm_attribute_groups,
- flags, cmd_mask);
- if (!nvdimm)
- return -ENOMEM;
-
- nfit_mem->nvdimm = nvdimm;
- dimm_count++;
-
- if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
- continue;
-
- dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n",
- nvdimm_name(nvdimm),
- mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
- mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
- mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
- mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "");
-
- }
-
- return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
-}
-
-static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
-{
- struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
- const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
- struct acpi_device *adev;
- int i;
-
- nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
- adev = to_acpi_dev(acpi_desc);
- if (!adev)
- return;
-
- for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
- if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
- set_bit(i, &nd_desc->cmd_mask);
-}
-
-static ssize_t range_index_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nd_region *nd_region = to_nd_region(dev);
- struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
-
- return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
-}
-static DEVICE_ATTR_RO(range_index);
-
-static struct attribute *acpi_nfit_region_attributes[] = {
- &dev_attr_range_index.attr,
- NULL,
-};
-
-static struct attribute_group acpi_nfit_region_attribute_group = {
- .name = "nfit",
- .attrs = acpi_nfit_region_attributes,
-};
-
-static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
- &nd_region_attribute_group,
- &nd_mapping_attribute_group,
- &nd_device_attribute_group,
- &nd_numa_attribute_group,
- &acpi_nfit_region_attribute_group,
- NULL,
-};
-
-/* enough info to uniquely specify an interleave set */
-struct nfit_set_info {
- struct nfit_set_info_map {
- u64 region_offset;
- u32 serial_number;
- u32 pad;
- } mapping[0];
-};
-
-static size_t sizeof_nfit_set_info(int num_mappings)
-{
- return sizeof(struct nfit_set_info)
- + num_mappings * sizeof(struct nfit_set_info_map);
-}
-
-static int cmp_map(const void *m0, const void *m1)
-{
- const struct nfit_set_info_map *map0 = m0;
- const struct nfit_set_info_map *map1 = m1;
-
- return memcmp(&map0->region_offset, &map1->region_offset,
- sizeof(u64));
-}
-
-/* Retrieve the nth entry referencing this spa */
-static struct acpi_nfit_memory_map *memdev_from_spa(
- struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
-{
- struct nfit_memdev *nfit_memdev;
-
- list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
- if (nfit_memdev->memdev->range_index == range_index)
- if (n-- == 0)
- return nfit_memdev->memdev;
- return NULL;
-}
-
-static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
- struct nd_region_desc *ndr_desc,
- struct acpi_nfit_system_address *spa)
-{
- int i, spa_type = nfit_spa_type(spa);
- struct device *dev = acpi_desc->dev;
- struct nd_interleave_set *nd_set;
- u16 nr = ndr_desc->num_mappings;
- struct nfit_set_info *info;
-
- if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
- /* pass */;
- else
- return 0;
-
- nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
- if (!nd_set)
- return -ENOMEM;
-
- info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
- for (i = 0; i < nr; i++) {
- struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
- struct nfit_set_info_map *map = &info->mapping[i];
- struct nvdimm *nvdimm = nd_mapping->nvdimm;
- struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
- struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
- spa->range_index, i);
-
- if (!memdev || !nfit_mem->dcr) {
- dev_err(dev, "%s: failed to find DCR\n", __func__);
- return -ENODEV;
- }
-
- map->region_offset = memdev->region_offset;
- map->serial_number = nfit_mem->dcr->serial_number;
- }
-
- sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
- cmp_map, NULL);
- nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
- ndr_desc->nd_set = nd_set;
- devm_kfree(dev, info);
-
- return 0;
-}
-
-static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
-{
- struct acpi_nfit_interleave *idt = mmio->idt;
- u32 sub_line_offset, line_index, line_offset;
- u64 line_no, table_skip_count, table_offset;
-
- line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
- table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
- line_offset = idt->line_offset[line_index]
- * mmio->line_size;
- table_offset = table_skip_count * mmio->table_size;
-
- return mmio->base_offset + line_offset + table_offset + sub_line_offset;
-}
-
-static void wmb_blk(struct nfit_blk *nfit_blk)
-{
-
- if (nfit_blk->nvdimm_flush) {
- /*
- * The first wmb() is needed to 'sfence' all previous writes
- * such that they are architecturally visible for the platform
- * buffer flush. Note that we've already arranged for pmem
- * writes to avoid the cache via arch_memcpy_to_pmem(). The
- * final wmb() ensures ordering for the NVDIMM flush write.
- */
- wmb();
- writeq(1, nfit_blk->nvdimm_flush);
- wmb();
- } else
- wmb_pmem();
-}
-
-static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
-{
- struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
- u64 offset = nfit_blk->stat_offset + mmio->size * bw;
-
- if (mmio->num_lines)
- offset = to_interleave_offset(offset, mmio);
-
- return readl(mmio->addr.base + offset);
-}
-
-static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
- resource_size_t dpa, unsigned int len, unsigned int write)
-{
- u64 cmd, offset;
- struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
-
- enum {
- BCW_OFFSET_MASK = (1ULL << 48)-1,
- BCW_LEN_SHIFT = 48,
- BCW_LEN_MASK = (1ULL << 8) - 1,
- BCW_CMD_SHIFT = 56,
- };
-
- cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
- len = len >> L1_CACHE_SHIFT;
- cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
- cmd |= ((u64) write) << BCW_CMD_SHIFT;
-
- offset = nfit_blk->cmd_offset + mmio->size * bw;
- if (mmio->num_lines)
- offset = to_interleave_offset(offset, mmio);
-
- writeq(cmd, mmio->addr.base + offset);
- wmb_blk(nfit_blk);
-
- if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
- readq(mmio->addr.base + offset);
-}
-
-static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
- resource_size_t dpa, void *iobuf, size_t len, int rw,
- unsigned int lane)
-{
- struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
- unsigned int copied = 0;
- u64 base_offset;
- int rc;
-
- base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
- + lane * mmio->size;
- write_blk_ctl(nfit_blk, lane, dpa, len, rw);
- while (len) {
- unsigned int c;
- u64 offset;
-
- if (mmio->num_lines) {
- u32 line_offset;
-
- offset = to_interleave_offset(base_offset + copied,
- mmio);
- div_u64_rem(offset, mmio->line_size, &line_offset);
- c = min_t(size_t, len, mmio->line_size - line_offset);
- } else {
- offset = base_offset + nfit_blk->bdw_offset;
- c = len;
- }
-
- if (rw)
- memcpy_to_pmem(mmio->addr.aperture + offset,
- iobuf + copied, c);
- else {
- if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
- mmio_flush_range((void __force *)
- mmio->addr.aperture + offset, c);
-
- memcpy_from_pmem(iobuf + copied,
- mmio->addr.aperture + offset, c);
- }
-
- copied += c;
- len -= c;
- }
-
- if (rw)
- wmb_blk(nfit_blk);
-
- rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
- return rc;
-}
-
-static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
- resource_size_t dpa, void *iobuf, u64 len, int rw)
-{
- struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
- struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
- struct nd_region *nd_region = nfit_blk->nd_region;
- unsigned int lane, copied = 0;
- int rc = 0;
-
- lane = nd_region_acquire_lane(nd_region);
- while (len) {
- u64 c = min(len, mmio->size);
-
- rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
- iobuf + copied, c, rw, lane);
- if (rc)
- break;
-
- copied += c;
- len -= c;
- }
- nd_region_release_lane(nd_region, lane);
-
- return rc;
-}
-
-static void nfit_spa_mapping_release(struct kref *kref)
-{
- struct nfit_spa_mapping *spa_map = to_spa_map(kref);
- struct acpi_nfit_system_address *spa = spa_map->spa;
- struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc;
-
- WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
- dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
- if (spa_map->type == SPA_MAP_APERTURE)
- memunmap((void __force *)spa_map->addr.aperture);
- else
- iounmap(spa_map->addr.base);
- release_mem_region(spa->address, spa->length);
- list_del(&spa_map->list);
- kfree(spa_map);
-}
-
-static struct nfit_spa_mapping *find_spa_mapping(
- struct acpi_nfit_desc *acpi_desc,
- struct acpi_nfit_system_address *spa)
-{
- struct nfit_spa_mapping *spa_map;
-
- WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
- list_for_each_entry(spa_map, &acpi_desc->spa_maps, list)
- if (spa_map->spa == spa)
- return spa_map;
-
- return NULL;
-}
-
-static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc,
- struct acpi_nfit_system_address *spa)
-{
- struct nfit_spa_mapping *spa_map;
-
- mutex_lock(&acpi_desc->spa_map_mutex);
- spa_map = find_spa_mapping(acpi_desc, spa);
-
- if (spa_map)
- kref_put(&spa_map->kref, nfit_spa_mapping_release);
- mutex_unlock(&acpi_desc->spa_map_mutex);
-}
-
-static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
- struct acpi_nfit_system_address *spa, enum spa_map_type type)
-{
- resource_size_t start = spa->address;
- resource_size_t n = spa->length;
- struct nfit_spa_mapping *spa_map;
- struct resource *res;
-
- WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
-
- spa_map = find_spa_mapping(acpi_desc, spa);
- if (spa_map) {
- kref_get(&spa_map->kref);
- return spa_map->addr.base;
- }
-
- spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
- if (!spa_map)
- return NULL;
-
- INIT_LIST_HEAD(&spa_map->list);
- spa_map->spa = spa;
- kref_init(&spa_map->kref);
- spa_map->acpi_desc = acpi_desc;
-
- res = request_mem_region(start, n, dev_name(acpi_desc->dev));
- if (!res)
- goto err_mem;
-
- spa_map->type = type;
- if (type == SPA_MAP_APERTURE)
- spa_map->addr.aperture = (void __pmem *)memremap(start, n,
- ARCH_MEMREMAP_PMEM);
- else
- spa_map->addr.base = ioremap_nocache(start, n);
-
-
- if (!spa_map->addr.base)
- goto err_map;
-
- list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
- return spa_map->addr.base;
-
- err_map:
- release_mem_region(start, n);
- err_mem:
- kfree(spa_map);
- return NULL;
-}
-
-/**
- * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
- * @nvdimm_bus: NFIT-bus that provided the spa table entry
- * @nfit_spa: spa table to map
- * @type: aperture or control region
- *
- * In the case where block-data-window apertures and
- * dimm-control-regions are interleaved they will end up sharing a
- * single request_mem_region() + ioremap() for the address range. In
- * the style of devm nfit_spa_map() mappings are automatically dropped
- * when all region devices referencing the same mapping are disabled /
- * unbound.
- */
-static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
- struct acpi_nfit_system_address *spa, enum spa_map_type type)
-{
- void __iomem *iomem;
-
- mutex_lock(&acpi_desc->spa_map_mutex);
- iomem = __nfit_spa_map(acpi_desc, spa, type);
- mutex_unlock(&acpi_desc->spa_map_mutex);
-
- return iomem;
-}
-
-static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
- struct acpi_nfit_interleave *idt, u16 interleave_ways)
-{
- if (idt) {
- mmio->num_lines = idt->line_count;
- mmio->line_size = idt->line_size;
- if (interleave_ways == 0)
- return -ENXIO;
- mmio->table_size = mmio->num_lines * interleave_ways
- * mmio->line_size;
- }
-
- return 0;
-}
-
-static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
- struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
-{
- struct nd_cmd_dimm_flags flags;
- int rc;
-
- memset(&flags, 0, sizeof(flags));
- rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
- sizeof(flags), NULL);
-
- if (rc >= 0 && flags.status == 0)
- nfit_blk->dimm_flags = flags.flags;
- else if (rc == -ENOTTY) {
- /* fall back to a conservative default */
- nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
- rc = 0;
- } else
- rc = -ENXIO;
-
- return rc;
-}
-
-static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
- struct device *dev)
-{
- struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
- struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
- struct nd_blk_region *ndbr = to_nd_blk_region(dev);
- struct nfit_flush *nfit_flush;
- struct nfit_blk_mmio *mmio;
- struct nfit_blk *nfit_blk;
- struct nfit_mem *nfit_mem;
- struct nvdimm *nvdimm;
- int rc;
-
- nvdimm = nd_blk_region_to_dimm(ndbr);
- nfit_mem = nvdimm_provider_data(nvdimm);
- if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
- dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
- nfit_mem ? "" : " nfit_mem",
- (nfit_mem && nfit_mem->dcr) ? "" : " dcr",
- (nfit_mem && nfit_mem->bdw) ? "" : " bdw");
- return -ENXIO;
- }
-
- nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
- if (!nfit_blk)
- return -ENOMEM;
- nd_blk_region_set_provider_data(ndbr, nfit_blk);
- nfit_blk->nd_region = to_nd_region(dev);
-
- /* map block aperture memory */
- nfit_blk->bdw_offset = nfit_mem->bdw->offset;
- mmio = &nfit_blk->mmio[BDW];
- mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
- SPA_MAP_APERTURE);
- if (!mmio->addr.base) {
- dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
- nvdimm_name(nvdimm));
- return -ENOMEM;
- }
- mmio->size = nfit_mem->bdw->size;
- mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
- mmio->idt = nfit_mem->idt_bdw;
- mmio->spa = nfit_mem->spa_bdw;
- rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
- nfit_mem->memdev_bdw->interleave_ways);
- if (rc) {
- dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
- __func__, nvdimm_name(nvdimm));
- return rc;
- }
-
- /* map block control memory */
- nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
- nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
- mmio = &nfit_blk->mmio[DCR];
- mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
- SPA_MAP_CONTROL);
- if (!mmio->addr.base) {
- dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
- nvdimm_name(nvdimm));
- return -ENOMEM;
- }
- mmio->size = nfit_mem->dcr->window_size;
- mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
- mmio->idt = nfit_mem->idt_dcr;
- mmio->spa = nfit_mem->spa_dcr;
- rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
- nfit_mem->memdev_dcr->interleave_ways);
- if (rc) {
- dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
- __func__, nvdimm_name(nvdimm));
- return rc;
- }
-
- rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
- if (rc < 0) {
- dev_dbg(dev, "%s: %s failed get DIMM flags\n",
- __func__, nvdimm_name(nvdimm));
- return rc;
- }
-
- nfit_flush = nfit_mem->nfit_flush;
- if (nfit_flush && nfit_flush->flush->hint_count != 0) {
- nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev,
- nfit_flush->flush->hint_address[0], 8);
- if (!nfit_blk->nvdimm_flush)
- return -ENOMEM;
- }
-
- if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush)
- dev_warn(dev, "unable to guarantee persistence of writes\n");
-
- if (mmio->line_size == 0)
- return 0;
-
- if ((u32) nfit_blk->cmd_offset % mmio->line_size
- + 8 > mmio->line_size) {
- dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
- return -ENXIO;
- } else if ((u32) nfit_blk->stat_offset % mmio->line_size
- + 8 > mmio->line_size) {
- dev_dbg(dev, "stat_offset crosses interleave boundary\n");
- return -ENXIO;
- }
-
- return 0;
-}
-
-static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
- struct device *dev)
-{
- struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
- struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
- struct nd_blk_region *ndbr = to_nd_blk_region(dev);
- struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
- int i;
-
- if (!nfit_blk)
- return; /* never enabled */
-
- /* auto-free BLK spa mappings */
- for (i = 0; i < 2; i++) {
- struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
-
- if (mmio->addr.base)
- nfit_spa_unmap(acpi_desc, mmio->spa);
- }
- nd_blk_region_set_provider_data(ndbr, NULL);
- /* devm will free nfit_blk */
-}
-
-static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
- struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
-{
- struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
- struct acpi_nfit_system_address *spa = nfit_spa->spa;
- int cmd_rc, rc;
-
- cmd->address = spa->address;
- cmd->length = spa->length;
- rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
- sizeof(*cmd), &cmd_rc);
- if (rc < 0)
- return rc;
- return cmd_rc;
-}
-
-static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
-{
- int rc;
- int cmd_rc;
- struct nd_cmd_ars_start ars_start;
- struct acpi_nfit_system_address *spa = nfit_spa->spa;
- struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-
- memset(&ars_start, 0, sizeof(ars_start));
- ars_start.address = spa->address;
- ars_start.length = spa->length;
- if (nfit_spa_type(spa) == NFIT_SPA_PM)
- ars_start.type = ND_ARS_PERSISTENT;
- else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
- ars_start.type = ND_ARS_VOLATILE;
- else
- return -ENOTTY;
-
- rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
- sizeof(ars_start), &cmd_rc);
-
- if (rc < 0)
- return rc;
- return cmd_rc;
-}
-
-static int ars_continue(struct acpi_nfit_desc *acpi_desc)
-{
- int rc, cmd_rc;
- struct nd_cmd_ars_start ars_start;
- struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
- struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
-
- memset(&ars_start, 0, sizeof(ars_start));
- ars_start.address = ars_status->restart_address;
- ars_start.length = ars_status->restart_length;
- ars_start.type = ars_status->type;
- rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
- sizeof(ars_start), &cmd_rc);
- if (rc < 0)
- return rc;
- return cmd_rc;
-}
-
-static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
-{
- struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
- struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
- int rc, cmd_rc;
-
- rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
- acpi_desc->ars_status_size, &cmd_rc);
- if (rc < 0)
- return rc;
- return cmd_rc;
-}
-
-static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
- struct nd_cmd_ars_status *ars_status)
-{
- int rc;
- u32 i;
-
- for (i = 0; i < ars_status->num_records; i++) {
- rc = nvdimm_bus_add_poison(nvdimm_bus,
- ars_status->records[i].err_address,
- ars_status->records[i].length);
- if (rc)
- return rc;
- }
-
- return 0;
-}
-
-static void acpi_nfit_remove_resource(void *data)
-{
- struct resource *res = data;
-
- remove_resource(res);
-}
-
-static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
- struct nd_region_desc *ndr_desc)
-{
- struct resource *res, *nd_res = ndr_desc->res;
- int is_pmem, ret;
-
- /* No operation if the region is already registered as PMEM */
- is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
- IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
- if (is_pmem == REGION_INTERSECTS)
- return 0;
-
- res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
- if (!res)
- return -ENOMEM;
-
- res->name = "Persistent Memory";
- res->start = nd_res->start;
- res->end = nd_res->end;
- res->flags = IORESOURCE_MEM;
- res->desc = IORES_DESC_PERSISTENT_MEMORY;
-
- ret = insert_resource(&iomem_resource, res);
- if (ret)
- return ret;
-
- ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res);
- if (ret) {
- remove_resource(res);
- return ret;
- }
-
- return 0;
-}
-
-static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
- struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
- struct acpi_nfit_memory_map *memdev,
- struct nfit_spa *nfit_spa)
-{
- struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
- memdev->device_handle);
- struct acpi_nfit_system_address *spa = nfit_spa->spa;
- struct nd_blk_region_desc *ndbr_desc;
- struct nfit_mem *nfit_mem;
- int blk_valid = 0;
-
- if (!nvdimm) {
- dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
- spa->range_index, memdev->device_handle);
- return -ENODEV;
- }
-
- nd_mapping->nvdimm = nvdimm;
- switch (nfit_spa_type(spa)) {
- case NFIT_SPA_PM:
- case NFIT_SPA_VOLATILE:
- nd_mapping->start = memdev->address;
- nd_mapping->size = memdev->region_size;
- break;
- case NFIT_SPA_DCR:
- nfit_mem = nvdimm_provider_data(nvdimm);
- if (!nfit_mem || !nfit_mem->bdw) {
- dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
- spa->range_index, nvdimm_name(nvdimm));
- } else {
- nd_mapping->size = nfit_mem->bdw->capacity;
- nd_mapping->start = nfit_mem->bdw->start_address;
- ndr_desc->num_lanes = nfit_mem->bdw->windows;
- blk_valid = 1;
- }
-
- ndr_desc->nd_mapping = nd_mapping;
- ndr_desc->num_mappings = blk_valid;
- ndbr_desc = to_blk_region_desc(ndr_desc);
- ndbr_desc->enable = acpi_nfit_blk_region_enable;
- ndbr_desc->disable = acpi_nfit_blk_region_disable;
- ndbr_desc->do_io = acpi_desc->blk_do_io;
- nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
- ndr_desc);
- if (!nfit_spa->nd_region)
- return -ENOMEM;
- break;
- }
-
- return 0;
-}
-
-static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
- struct nfit_spa *nfit_spa)
-{
- static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
- struct acpi_nfit_system_address *spa = nfit_spa->spa;
- struct nd_blk_region_desc ndbr_desc;
- struct nd_region_desc *ndr_desc;
- struct nfit_memdev *nfit_memdev;
- struct nvdimm_bus *nvdimm_bus;
- struct resource res;
- int count = 0, rc;
-
- if (nfit_spa->nd_region)
- return 0;
-
- if (spa->range_index == 0) {
- dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
- __func__);
- return 0;
- }
-
- memset(&res, 0, sizeof(res));
- memset(&nd_mappings, 0, sizeof(nd_mappings));
- memset(&ndbr_desc, 0, sizeof(ndbr_desc));
- res.start = spa->address;
- res.end = res.start + spa->length - 1;
- ndr_desc = &ndbr_desc.ndr_desc;
- ndr_desc->res = &res;
- ndr_desc->provider_data = nfit_spa;
- ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
- if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
- ndr_desc->numa_node = acpi_map_pxm_to_online_node(
- spa->proximity_domain);
- else
- ndr_desc->numa_node = NUMA_NO_NODE;
-
- list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
- struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
- struct nd_mapping *nd_mapping;
-
- if (memdev->range_index != spa->range_index)
- continue;
- if (count >= ND_MAX_MAPPINGS) {
- dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
- spa->range_index, ND_MAX_MAPPINGS);
- return -ENXIO;
- }
- nd_mapping = &nd_mappings[count++];
- rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
- memdev, nfit_spa);
- if (rc)
- goto out;
- }
-
- ndr_desc->nd_mapping = nd_mappings;
- ndr_desc->num_mappings = count;
- rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
- if (rc)
- goto out;
-
- nvdimm_bus = acpi_desc->nvdimm_bus;
- if (nfit_spa_type(spa) == NFIT_SPA_PM) {
- rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
- if (rc) {
- dev_warn(acpi_desc->dev,
- "failed to insert pmem resource to iomem: %d\n",
- rc);
- goto out;
- }
-
- nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
- ndr_desc);
- if (!nfit_spa->nd_region)
- rc = -ENOMEM;
- } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
- nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
- ndr_desc);
- if (!nfit_spa->nd_region)
- rc = -ENOMEM;
- }
-
- out:
- if (rc)
- dev_err(acpi_desc->dev, "failed to register spa range %d\n",
- nfit_spa->spa->range_index);
- return rc;
-}
-
-static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
- u32 max_ars)
-{
- struct device *dev = acpi_desc->dev;
- struct nd_cmd_ars_status *ars_status;
-
- if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
- memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
- return 0;
- }
-
- if (acpi_desc->ars_status)
- devm_kfree(dev, acpi_desc->ars_status);
- acpi_desc->ars_status = NULL;
- ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
- if (!ars_status)
- return -ENOMEM;
- acpi_desc->ars_status = ars_status;
- acpi_desc->ars_status_size = max_ars;
- return 0;
-}
-
-static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
- struct nfit_spa *nfit_spa)
-{
- struct acpi_nfit_system_address *spa = nfit_spa->spa;
- int rc;
-
- if (!nfit_spa->max_ars) {
- struct nd_cmd_ars_cap ars_cap;
-
- memset(&ars_cap, 0, sizeof(ars_cap));
- rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
- if (rc < 0)
- return rc;
- nfit_spa->max_ars = ars_cap.max_ars_out;
- nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
- /* check that the supported scrub types match the spa type */
- if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
- ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
- return -ENOTTY;
- else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
- ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
- return -ENOTTY;
- }
-
- if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
- return -ENOMEM;
-
- rc = ars_get_status(acpi_desc);
- if (rc < 0 && rc != -ENOSPC)
- return rc;
-
- if (ars_status_process_records(acpi_desc->nvdimm_bus,
- acpi_desc->ars_status))
- return -ENOMEM;
-
- return 0;
-}
-
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
- struct nfit_spa *nfit_spa)
-{
- struct acpi_nfit_system_address *spa = nfit_spa->spa;
- unsigned int overflow_retry = scrub_overflow_abort;
- u64 init_ars_start = 0, init_ars_len = 0;
- struct device *dev = acpi_desc->dev;
- unsigned int tmo = scrub_timeout;
- int rc;
-
- if (nfit_spa->ars_done || !nfit_spa->nd_region)
- return;
-
- rc = ars_start(acpi_desc, nfit_spa);
- /*
- * If we timed out the initial scan we'll still be busy here,
- * and will wait another timeout before giving up permanently.
- */
- if (rc < 0 && rc != -EBUSY)
- return;
-
- do {
- u64 ars_start, ars_len;
-
- if (acpi_desc->cancel)
- break;
- rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
- if (rc == -ENOTTY)
- break;
- if (rc == -EBUSY && !tmo) {
- dev_warn(dev, "range %d ars timeout, aborting\n",
- spa->range_index);
- break;
- }
-
- if (rc == -EBUSY) {
- /*
- * Note, entries may be appended to the list
- * while the lock is dropped, but the workqueue
- * being active prevents entries being deleted /
- * freed.
- */
- mutex_unlock(&acpi_desc->init_mutex);
- ssleep(1);
- tmo--;
- mutex_lock(&acpi_desc->init_mutex);
- continue;
- }
-
- /* we got some results, but there are more pending... */
- if (rc == -ENOSPC && overflow_retry--) {
- if (!init_ars_len) {
- init_ars_len = acpi_desc->ars_status->length;
- init_ars_start = acpi_desc->ars_status->address;
- }
- rc = ars_continue(acpi_desc);
- }
-
- if (rc < 0) {
- dev_warn(dev, "range %d ars continuation failed\n",
- spa->range_index);
- break;
- }
-
- if (init_ars_len) {
- ars_start = init_ars_start;
- ars_len = init_ars_len;
- } else {
- ars_start = acpi_desc->ars_status->address;
- ars_len = acpi_desc->ars_status->length;
- }
- dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
- spa->range_index, ars_start, ars_len);
- /* notify the region about new poison entries */
- nvdimm_region_notify(nfit_spa->nd_region,
- NVDIMM_REVALIDATE_POISON);
- break;
- } while (1);
-}
-
-static void acpi_nfit_scrub(struct work_struct *work)
-{
- struct device *dev;
- u64 init_scrub_length = 0;
- struct nfit_spa *nfit_spa;
- u64 init_scrub_address = 0;
- bool init_ars_done = false;
- struct acpi_nfit_desc *acpi_desc;
- unsigned int tmo = scrub_timeout;
- unsigned int overflow_retry = scrub_overflow_abort;
-
- acpi_desc = container_of(work, typeof(*acpi_desc), work);
- dev = acpi_desc->dev;
-
- /*
- * We scrub in 2 phases. The first phase waits for any platform
- * firmware initiated scrubs to complete and then we go search for the
- * affected spa regions to mark them scanned. In the second phase we
- * initiate a directed scrub for every range that was not scrubbed in
- * phase 1.
- */
-
- /* process platform firmware initiated scrubs */
- retry:
- mutex_lock(&acpi_desc->init_mutex);
- list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- struct nd_cmd_ars_status *ars_status;
- struct acpi_nfit_system_address *spa;
- u64 ars_start, ars_len;
- int rc;
-
- if (acpi_desc->cancel)
- break;
-
- if (nfit_spa->nd_region)
- continue;
-
- if (init_ars_done) {
- /*
- * No need to re-query, we're now just
- * reconciling all the ranges covered by the
- * initial scrub
- */
- rc = 0;
- } else
- rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-
- if (rc == -ENOTTY) {
- /* no ars capability, just register spa and move on */
- acpi_nfit_register_region(acpi_desc, nfit_spa);
- continue;
- }
-
- if (rc == -EBUSY && !tmo) {
- /* fallthrough to directed scrub in phase 2 */
- dev_warn(dev, "timeout awaiting ars results, continuing...\n");
- break;
- } else if (rc == -EBUSY) {
- mutex_unlock(&acpi_desc->init_mutex);
- ssleep(1);
- tmo--;
- goto retry;
- }
-
- /* we got some results, but there are more pending... */
- if (rc == -ENOSPC && overflow_retry--) {
- ars_status = acpi_desc->ars_status;
- /*
- * Record the original scrub range, so that we
- * can recall all the ranges impacted by the
- * initial scrub.
- */
- if (!init_scrub_length) {
- init_scrub_length = ars_status->length;
- init_scrub_address = ars_status->address;
- }
- rc = ars_continue(acpi_desc);
- if (rc == 0) {
- mutex_unlock(&acpi_desc->init_mutex);
- goto retry;
- }
- }
-
- if (rc < 0) {
- /*
- * Initial scrub failed, we'll give it one more
- * try below...
- */
- break;
- }
-
- /* We got some final results, record completed ranges */
- ars_status = acpi_desc->ars_status;
- if (init_scrub_length) {
- ars_start = init_scrub_address;
- ars_len = ars_start + init_scrub_length;
- } else {
- ars_start = ars_status->address;
- ars_len = ars_status->length;
- }
- spa = nfit_spa->spa;
-
- if (!init_ars_done) {
- init_ars_done = true;
- dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
- ars_start, ars_len);
- }
- if (ars_start <= spa->address && ars_start + ars_len
- >= spa->address + spa->length)
- acpi_nfit_register_region(acpi_desc, nfit_spa);
- }
-
- /*
- * For all the ranges not covered by an initial scrub we still
- * want to see if there are errors, but it's ok to discover them
- * asynchronously.
- */
- list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- /*
- * Flag all the ranges that still need scrubbing, but
- * register them now to make data available.
- */
- if (nfit_spa->nd_region)
- nfit_spa->ars_done = 1;
- else
- acpi_nfit_register_region(acpi_desc, nfit_spa);
- }
-
- list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
- acpi_nfit_async_scrub(acpi_desc, nfit_spa);
- mutex_unlock(&acpi_desc->init_mutex);
-}
-
-static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
-{
- struct nfit_spa *nfit_spa;
- int rc;
-
- list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
- if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
- /* BLK regions don't need to wait for ars results */
- rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
- if (rc)
- return rc;
- }
-
- queue_work(nfit_wq, &acpi_desc->work);
- return 0;
-}
-
-static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
- struct nfit_table_prev *prev)
-{
- struct device *dev = acpi_desc->dev;
-
- if (!list_empty(&prev->spas) ||
- !list_empty(&prev->memdevs) ||
- !list_empty(&prev->dcrs) ||
- !list_empty(&prev->bdws) ||
- !list_empty(&prev->idts) ||
- !list_empty(&prev->flushes)) {
- dev_err(dev, "new nfit deletes entries (unsupported)\n");
- return -ENXIO;
- }
- return 0;
-}
-
-int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
-{
- struct device *dev = acpi_desc->dev;
- struct nfit_table_prev prev;
- const void *end;
- u8 *data;
- int rc;
-
- mutex_lock(&acpi_desc->init_mutex);
-
- INIT_LIST_HEAD(&prev.spas);
- INIT_LIST_HEAD(&prev.memdevs);
- INIT_LIST_HEAD(&prev.dcrs);
- INIT_LIST_HEAD(&prev.bdws);
- INIT_LIST_HEAD(&prev.idts);
- INIT_LIST_HEAD(&prev.flushes);
-
- list_cut_position(&prev.spas, &acpi_desc->spas,
- acpi_desc->spas.prev);
- list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
- acpi_desc->memdevs.prev);
- list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
- acpi_desc->dcrs.prev);
- list_cut_position(&prev.bdws, &acpi_desc->bdws,
- acpi_desc->bdws.prev);
- list_cut_position(&prev.idts, &acpi_desc->idts,
- acpi_desc->idts.prev);
- list_cut_position(&prev.flushes, &acpi_desc->flushes,
- acpi_desc->flushes.prev);
-
- data = (u8 *) acpi_desc->nfit;
- end = data + sz;
- while (!IS_ERR_OR_NULL(data))
- data = add_table(acpi_desc, &prev, data, end);
-
- if (IS_ERR(data)) {
- dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
- PTR_ERR(data));
- rc = PTR_ERR(data);
- goto out_unlock;
- }
-
- rc = acpi_nfit_check_deletions(acpi_desc, &prev);
- if (rc)
- goto out_unlock;
-
- if (nfit_mem_init(acpi_desc) != 0) {
- rc = -ENOMEM;
- goto out_unlock;
- }
-
- acpi_nfit_init_dsms(acpi_desc);
-
- rc = acpi_nfit_register_dimms(acpi_desc);
- if (rc)
- goto out_unlock;
-
- rc = acpi_nfit_register_regions(acpi_desc);
-
- out_unlock:
- mutex_unlock(&acpi_desc->init_mutex);
- return rc;
-}
-EXPORT_SYMBOL_GPL(acpi_nfit_init);
-
-struct acpi_nfit_flush_work {
- struct work_struct work;
- struct completion cmp;
-};
-
-static void flush_probe(struct work_struct *work)
-{
- struct acpi_nfit_flush_work *flush;
-
- flush = container_of(work, typeof(*flush), work);
- complete(&flush->cmp);
-}
-
-static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
-{
- struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
- struct device *dev = acpi_desc->dev;
- struct acpi_nfit_flush_work flush;
-
- /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
- device_lock(dev);
- device_unlock(dev);
-
- /*
- * Scrub work could take 10s of seconds, userspace may give up so we
- * need to be interruptible while waiting.
- */
- INIT_WORK_ONSTACK(&flush.work, flush_probe);
- COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
- queue_work(nfit_wq, &flush.work);
- return wait_for_completion_interruptible(&flush.cmp);
-}
-
-static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
- struct nvdimm *nvdimm, unsigned int cmd)
-{
- struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
-
- if (nvdimm)
- return 0;
- if (cmd != ND_CMD_ARS_START)
- return 0;
-
- /*
- * The kernel and userspace may race to initiate a scrub, but
- * the scrub thread is prepared to lose that initial race. It
- * just needs guarantees that any ars it initiates are not
- * interrupted by any intervening start reqeusts from userspace.
- */
- if (work_busy(&acpi_desc->work))
- return -EBUSY;
-
- return 0;
-}
-
-void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
-{
- struct nvdimm_bus_descriptor *nd_desc;
-
- dev_set_drvdata(dev, acpi_desc);
- acpi_desc->dev = dev;
- acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
- nd_desc = &acpi_desc->nd_desc;
- nd_desc->provider_name = "ACPI.NFIT";
- nd_desc->ndctl = acpi_nfit_ctl;
- nd_desc->flush_probe = acpi_nfit_flush_probe;
- nd_desc->clear_to_send = acpi_nfit_clear_to_send;
- nd_desc->attr_groups = acpi_nfit_attribute_groups;
-
- INIT_LIST_HEAD(&acpi_desc->spa_maps);
- INIT_LIST_HEAD(&acpi_desc->spas);
- INIT_LIST_HEAD(&acpi_desc->dcrs);
- INIT_LIST_HEAD(&acpi_desc->bdws);
- INIT_LIST_HEAD(&acpi_desc->idts);
- INIT_LIST_HEAD(&acpi_desc->flushes);
- INIT_LIST_HEAD(&acpi_desc->memdevs);
- INIT_LIST_HEAD(&acpi_desc->dimms);
- mutex_init(&acpi_desc->spa_map_mutex);
- mutex_init(&acpi_desc->init_mutex);
- INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
-}
-EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
-
-static int acpi_nfit_add(struct acpi_device *adev)
-{
- struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
- struct acpi_nfit_desc *acpi_desc;
- struct device *dev = &adev->dev;
- struct acpi_table_header *tbl;
- acpi_status status = AE_OK;
- acpi_size sz;
- int rc;
-
- status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
- if (ACPI_FAILURE(status)) {
- /* This is ok, we could have an nvdimm hotplugged later */
- dev_dbg(dev, "failed to find NFIT at startup\n");
- return 0;
- }
-
- acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
- if (!acpi_desc)
- return -ENOMEM;
- acpi_nfit_desc_init(acpi_desc, &adev->dev);
- acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
- if (!acpi_desc->nvdimm_bus)
- return -ENOMEM;
-
- /*
- * Save the acpi header for later and then skip it,
- * making nfit point to the first nfit table header.
- */
- acpi_desc->acpi_header = *tbl;
- acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit);
- sz -= sizeof(struct acpi_table_nfit);
-
- /* Evaluate _FIT and override with that if present */
- status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
- if (ACPI_SUCCESS(status) && buf.length > 0) {
- union acpi_object *obj;
- /*
- * Adjust for the acpi_object header of the _FIT
- */
- obj = buf.pointer;
- if (obj->type == ACPI_TYPE_BUFFER) {
- acpi_desc->nfit =
- (struct acpi_nfit_header *)obj->buffer.pointer;
- sz = obj->buffer.length;
- } else
- dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
- __func__, (int) obj->type);
- }
-
- rc = acpi_nfit_init(acpi_desc, sz);
- if (rc) {
- nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
- return rc;
- }
- return 0;
-}
-
-static int acpi_nfit_remove(struct acpi_device *adev)
-{
- struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
-
- acpi_desc->cancel = 1;
- flush_workqueue(nfit_wq);
- nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
- return 0;
-}
-
-static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
-{
- struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
- struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
- struct acpi_nfit_header *nfit_saved;
- union acpi_object *obj;
- struct device *dev = &adev->dev;
- acpi_status status;
- int ret;
-
- dev_dbg(dev, "%s: event: %d\n", __func__, event);
-
- device_lock(dev);
- if (!dev->driver) {
- /* dev->driver may be null if we're being removed */
- dev_dbg(dev, "%s: no driver found for dev\n", __func__);
- goto out_unlock;
- }
-
- if (!acpi_desc) {
- acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
- if (!acpi_desc)
- goto out_unlock;
- acpi_nfit_desc_init(acpi_desc, &adev->dev);
- acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
- if (!acpi_desc->nvdimm_bus)
- goto out_unlock;
- } else {
- /*
- * Finish previous registration before considering new
- * regions.
- */
- flush_workqueue(nfit_wq);
- }
-
- /* Evaluate _FIT */
- status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
- if (ACPI_FAILURE(status)) {
- dev_err(dev, "failed to evaluate _FIT\n");
- goto out_unlock;
- }
-
- nfit_saved = acpi_desc->nfit;
- obj = buf.pointer;
- if (obj->type == ACPI_TYPE_BUFFER) {
- acpi_desc->nfit =
- (struct acpi_nfit_header *)obj->buffer.pointer;
- ret = acpi_nfit_init(acpi_desc, obj->buffer.length);
- if (ret) {
- /* Merge failed, restore old nfit, and exit */
- acpi_desc->nfit = nfit_saved;
- dev_err(dev, "failed to merge updated NFIT\n");
- }
- } else {
- /* Bad _FIT, restore old nfit */
- dev_err(dev, "Invalid _FIT\n");
- }
- kfree(buf.pointer);
-
- out_unlock:
- device_unlock(dev);
-}
-
-static const struct acpi_device_id acpi_nfit_ids[] = {
- { "ACPI0012", 0 },
- { "", 0 },
-};
-MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
-
-static struct acpi_driver acpi_nfit_driver = {
- .name = KBUILD_MODNAME,
- .ids = acpi_nfit_ids,
- .ops = {
- .add = acpi_nfit_add,
- .remove = acpi_nfit_remove,
- .notify = acpi_nfit_notify,
- },
-};
-
-static __init int nfit_init(void)
-{
- BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
- BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
- BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
- BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
- BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
- BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
- BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
-
- acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
- acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
- acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
- acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
- acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
- acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
- acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
- acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
- acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
- acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
- acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
- acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
-
- nfit_wq = create_singlethread_workqueue("nfit");
- if (!nfit_wq)
- return -ENOMEM;
-
- return acpi_bus_register_driver(&acpi_nfit_driver);
-}
-
-static __exit void nfit_exit(void)
-{
- acpi_bus_unregister_driver(&acpi_nfit_driver);
- destroy_workqueue(nfit_wq);
-}
-
-module_init(nfit_init);
-module_exit(nfit_exit);
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Intel Corporation");
+++ /dev/null
-/*
- * NVDIMM Firmware Interface Table - NFIT
- *
- * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#ifndef __NFIT_H__
-#define __NFIT_H__
-#include <linux/workqueue.h>
-#include <linux/libnvdimm.h>
-#include <linux/types.h>
-#include <linux/uuid.h>
-#include <linux/acpi.h>
-#include <acpi/acuuid.h>
-
-/* ACPI 6.1 */
-#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
-
-/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */
-#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
-
-/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
-#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
-#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
-
-#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
- | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
- | ACPI_NFIT_MEM_NOT_ARMED)
-
-enum nfit_uuids {
- /* for simplicity alias the uuid index with the family id */
- NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
- NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
- NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
- NFIT_SPA_VOLATILE,
- NFIT_SPA_PM,
- NFIT_SPA_DCR,
- NFIT_SPA_BDW,
- NFIT_SPA_VDISK,
- NFIT_SPA_VCD,
- NFIT_SPA_PDISK,
- NFIT_SPA_PCD,
- NFIT_DEV_BUS,
- NFIT_UUID_MAX,
-};
-
-/*
- * Region format interface codes are stored with the interface as the
- * LSB and the function as the MSB.
- */
-#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
-#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
-#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
-
-enum {
- NFIT_BLK_READ_FLUSH = 1,
- NFIT_BLK_DCR_LATCH = 2,
- NFIT_ARS_STATUS_DONE = 0,
- NFIT_ARS_STATUS_BUSY = 1 << 16,
- NFIT_ARS_STATUS_NONE = 2 << 16,
- NFIT_ARS_STATUS_INTR = 3 << 16,
- NFIT_ARS_START_BUSY = 6,
- NFIT_ARS_CAP_NONE = 1,
- NFIT_ARS_F_OVERFLOW = 1,
- NFIT_ARS_TIMEOUT = 90,
-};
-
-struct nfit_spa {
- struct acpi_nfit_system_address *spa;
- struct list_head list;
- struct nd_region *nd_region;
- unsigned int ars_done:1;
- u32 clear_err_unit;
- u32 max_ars;
-};
-
-struct nfit_dcr {
- struct acpi_nfit_control_region *dcr;
- struct list_head list;
-};
-
-struct nfit_bdw {
- struct acpi_nfit_data_region *bdw;
- struct list_head list;
-};
-
-struct nfit_idt {
- struct acpi_nfit_interleave *idt;
- struct list_head list;
-};
-
-struct nfit_flush {
- struct acpi_nfit_flush_address *flush;
- struct list_head list;
-};
-
-struct nfit_memdev {
- struct acpi_nfit_memory_map *memdev;
- struct list_head list;
-};
-
-/* assembled tables for a given dimm/memory-device */
-struct nfit_mem {
- struct nvdimm *nvdimm;
- struct acpi_nfit_memory_map *memdev_dcr;
- struct acpi_nfit_memory_map *memdev_pmem;
- struct acpi_nfit_memory_map *memdev_bdw;
- struct acpi_nfit_control_region *dcr;
- struct acpi_nfit_data_region *bdw;
- struct acpi_nfit_system_address *spa_dcr;
- struct acpi_nfit_system_address *spa_bdw;
- struct acpi_nfit_interleave *idt_dcr;
- struct acpi_nfit_interleave *idt_bdw;
- struct nfit_flush *nfit_flush;
- struct list_head list;
- struct acpi_device *adev;
- struct acpi_nfit_desc *acpi_desc;
- unsigned long dsm_mask;
- int family;
-};
-
-struct acpi_nfit_desc {
- struct nvdimm_bus_descriptor nd_desc;
- struct acpi_table_header acpi_header;
- struct acpi_nfit_header *nfit;
- struct mutex spa_map_mutex;
- struct mutex init_mutex;
- struct list_head spa_maps;
- struct list_head memdevs;
- struct list_head flushes;
- struct list_head dimms;
- struct list_head spas;
- struct list_head dcrs;
- struct list_head bdws;
- struct list_head idts;
- struct nvdimm_bus *nvdimm_bus;
- struct device *dev;
- struct nd_cmd_ars_status *ars_status;
- size_t ars_status_size;
- struct work_struct work;
- unsigned int cancel:1;
- unsigned long dimm_cmd_force_en;
- unsigned long bus_cmd_force_en;
- int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
- void *iobuf, u64 len, int rw);
-};
-
-enum nd_blk_mmio_selector {
- BDW,
- DCR,
-};
-
-struct nd_blk_addr {
- union {
- void __iomem *base;
- void __pmem *aperture;
- };
-};
-
-struct nfit_blk {
- struct nfit_blk_mmio {
- struct nd_blk_addr addr;
- u64 size;
- u64 base_offset;
- u32 line_size;
- u32 num_lines;
- u32 table_size;
- struct acpi_nfit_interleave *idt;
- struct acpi_nfit_system_address *spa;
- } mmio[2];
- struct nd_region *nd_region;
- u64 bdw_offset; /* post interleave offset */
- u64 stat_offset;
- u64 cmd_offset;
- void __iomem *nvdimm_flush;
- u32 dimm_flags;
-};
-
-enum spa_map_type {
- SPA_MAP_CONTROL,
- SPA_MAP_APERTURE,
-};
-
-struct nfit_spa_mapping {
- struct acpi_nfit_desc *acpi_desc;
- struct acpi_nfit_system_address *spa;
- struct list_head list;
- struct kref kref;
- enum spa_map_type type;
- struct nd_blk_addr addr;
-};
-
-static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
-{
- return container_of(kref, struct nfit_spa_mapping, kref);
-}
-
-static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
- struct nfit_mem *nfit_mem)
-{
- if (nfit_mem->memdev_dcr)
- return nfit_mem->memdev_dcr;
- return nfit_mem->memdev_pmem;
-}
-
-static inline struct acpi_nfit_desc *to_acpi_desc(
- struct nvdimm_bus_descriptor *nd_desc)
-{
- return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
-}
-
-const u8 *to_nfit_uuid(enum nfit_uuids id);
-int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
-void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
-#endif /* __NFIT_H__ */
--- /dev/null
+config ACPI_NFIT
+ tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
+ depends on PHYS_ADDR_T_64BIT
+ depends on BLK_DEV
+ depends on ARCH_HAS_MMIO_FLUSH
+ select LIBNVDIMM
+ help
+ Infrastructure to probe ACPI 6 compliant platforms for
+ NVDIMMs (NFIT) and register a libnvdimm device tree. In
+ addition to storage devices this also enables libnvdimm to pass
+ ACPI._DSM messages for platform/dimm configuration.
+
+ To compile this driver as a module, choose M here:
+ the module will be called nfit.
+
+config ACPI_NFIT_DEBUG
+ bool "NFIT DSM debug"
+ depends on ACPI_NFIT
+ depends on DYNAMIC_DEBUG
+ default n
+ help
+ Enabling this option causes the nfit driver to dump the
+ input and output buffers of _DSM operations on the ACPI0012
+ device and its children. This can be very verbose, so leave
+ it disabled unless you are debugging a hardware / firmware
+ issue.
--- /dev/null
+obj-$(CONFIG_ACPI_NFIT) := nfit.o
+nfit-y := core.o
+nfit-$(CONFIG_X86_MCE) += mce.o
--- /dev/null
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/list_sort.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/sysfs.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/acpi.h>
+#include <linux/sort.h>
+#include <linux/pmem.h>
+#include <linux/io.h>
+#include <linux/nd.h>
+#include <asm/cacheflush.h>
+#include "nfit.h"
+
+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <linux/io-64-nonatomic-hi-lo.h>
+
+static bool force_enable_dimms;
+module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
+
+static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
+module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
+
+/* after three payloads of overflow, it's dead jim */
+static unsigned int scrub_overflow_abort = 3;
+module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(scrub_overflow_abort,
+ "Number of times we overflow ARS results before abort");
+
+static bool disable_vendor_specific;
+module_param(disable_vendor_specific, bool, S_IRUGO);
+MODULE_PARM_DESC(disable_vendor_specific,
+ "Limit commands to the publicly specified set\n");
+
+LIST_HEAD(acpi_descs);
+DEFINE_MUTEX(acpi_desc_lock);
+
+static struct workqueue_struct *nfit_wq;
+
+struct nfit_table_prev {
+ struct list_head spas;
+ struct list_head memdevs;
+ struct list_head dcrs;
+ struct list_head bdws;
+ struct list_head idts;
+ struct list_head flushes;
+};
+
+static u8 nfit_uuid[NFIT_UUID_MAX][16];
+
+const u8 *to_nfit_uuid(enum nfit_uuids id)
+{
+ return nfit_uuid[id];
+}
+EXPORT_SYMBOL(to_nfit_uuid);
+
+static struct acpi_nfit_desc *to_acpi_nfit_desc(
+ struct nvdimm_bus_descriptor *nd_desc)
+{
+ return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
+{
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+ /*
+ * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
+ * acpi_device.
+ */
+ if (!nd_desc->provider_name
+ || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
+ return NULL;
+
+ return to_acpi_device(acpi_desc->dev);
+}
+
+static int xlat_status(void *buf, unsigned int cmd)
+{
+ struct nd_cmd_clear_error *clear_err;
+ struct nd_cmd_ars_status *ars_status;
+ struct nd_cmd_ars_start *ars_start;
+ struct nd_cmd_ars_cap *ars_cap;
+ u16 flags;
+
+ switch (cmd) {
+ case ND_CMD_ARS_CAP:
+ ars_cap = buf;
+ if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
+ return -ENOTTY;
+
+ /* Command failed */
+ if (ars_cap->status & 0xffff)
+ return -EIO;
+
+ /* No supported scan types for this range */
+ flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
+ if ((ars_cap->status >> 16 & flags) == 0)
+ return -ENOTTY;
+ break;
+ case ND_CMD_ARS_START:
+ ars_start = buf;
+ /* ARS is in progress */
+ if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
+ return -EBUSY;
+
+ /* Command failed */
+ if (ars_start->status & 0xffff)
+ return -EIO;
+ break;
+ case ND_CMD_ARS_STATUS:
+ ars_status = buf;
+ /* Command failed */
+ if (ars_status->status & 0xffff)
+ return -EIO;
+ /* Check extended status (Upper two bytes) */
+ if (ars_status->status == NFIT_ARS_STATUS_DONE)
+ return 0;
+
+ /* ARS is in progress */
+ if (ars_status->status == NFIT_ARS_STATUS_BUSY)
+ return -EBUSY;
+
+ /* No ARS performed for the current boot */
+ if (ars_status->status == NFIT_ARS_STATUS_NONE)
+ return -EAGAIN;
+
+ /*
+ * ARS interrupted, either we overflowed or some other
+ * agent wants the scan to stop. If we didn't overflow
+ * then just continue with the returned results.
+ */
+ if (ars_status->status == NFIT_ARS_STATUS_INTR) {
+ if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
+ return -ENOSPC;
+ return 0;
+ }
+
+ /* Unknown status */
+ if (ars_status->status >> 16)
+ return -EIO;
+ break;
+ case ND_CMD_CLEAR_ERROR:
+ clear_err = buf;
+ if (clear_err->status & 0xffff)
+ return -EIO;
+ if (!clear_err->cleared)
+ return -EIO;
+ if (clear_err->length > clear_err->cleared)
+ return clear_err->cleared;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
+ struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int buf_len, int *cmd_rc)
+{
+ struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+ union acpi_object in_obj, in_buf, *out_obj;
+ const struct nd_cmd_desc *desc = NULL;
+ struct device *dev = acpi_desc->dev;
+ struct nd_cmd_pkg *call_pkg = NULL;
+ const char *cmd_name, *dimm_name;
+ unsigned long cmd_mask, dsm_mask;
+ acpi_handle handle;
+ unsigned int func;
+ const u8 *uuid;
+ u32 offset;
+ int rc, i;
+
+ func = cmd;
+ if (cmd == ND_CMD_CALL) {
+ call_pkg = buf;
+ func = call_pkg->nd_command;
+ }
+
+ if (nvdimm) {
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ struct acpi_device *adev = nfit_mem->adev;
+
+ if (!adev)
+ return -ENOTTY;
+ if (call_pkg && nfit_mem->family != call_pkg->nd_family)
+ return -ENOTTY;
+
+ dimm_name = nvdimm_name(nvdimm);
+ cmd_name = nvdimm_cmd_name(cmd);
+ cmd_mask = nvdimm_cmd_mask(nvdimm);
+ dsm_mask = nfit_mem->dsm_mask;
+ desc = nd_cmd_dimm_desc(cmd);
+ uuid = to_nfit_uuid(nfit_mem->family);
+ handle = adev->handle;
+ } else {
+ struct acpi_device *adev = to_acpi_dev(acpi_desc);
+
+ cmd_name = nvdimm_bus_cmd_name(cmd);
+ cmd_mask = nd_desc->cmd_mask;
+ dsm_mask = cmd_mask;
+ desc = nd_cmd_bus_desc(cmd);
+ uuid = to_nfit_uuid(NFIT_DEV_BUS);
+ handle = adev->handle;
+ dimm_name = "bus";
+ }
+
+ if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
+ return -ENOTTY;
+
+ if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
+ return -ENOTTY;
+
+ in_obj.type = ACPI_TYPE_PACKAGE;
+ in_obj.package.count = 1;
+ in_obj.package.elements = &in_buf;
+ in_buf.type = ACPI_TYPE_BUFFER;
+ in_buf.buffer.pointer = buf;
+ in_buf.buffer.length = 0;
+
+ /* libnvdimm has already validated the input envelope */
+ for (i = 0; i < desc->in_num; i++)
+ in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
+ i, buf);
+
+ if (call_pkg) {
+ /* skip over package wrapper */
+ in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
+ in_buf.buffer.length = call_pkg->nd_size_in;
+ }
+
+ if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+ dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
+ __func__, dimm_name, cmd, func,
+ in_buf.buffer.length);
+ print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4,
+ in_buf.buffer.pointer,
+ min_t(u32, 256, in_buf.buffer.length), true);
+ }
+
+ out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
+ if (!out_obj) {
+ dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
+ cmd_name);
+ return -EINVAL;
+ }
+
+ if (call_pkg) {
+ call_pkg->nd_fw_size = out_obj->buffer.length;
+ memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
+ out_obj->buffer.pointer,
+ min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
+
+ ACPI_FREE(out_obj);
+ /*
+ * Need to support FW function w/o known size in advance.
+ * Caller can determine required size based upon nd_fw_size.
+ * If we return an error (like elsewhere) then caller wouldn't
+ * be able to rely upon data returned to make calculation.
+ */
+ return 0;
+ }
+
+ if (out_obj->package.type != ACPI_TYPE_BUFFER) {
+ dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
+ __func__, dimm_name, cmd_name, out_obj->type);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+ dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
+ dimm_name, cmd_name, out_obj->buffer.length);
+ print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+ 4, out_obj->buffer.pointer, min_t(u32, 128,
+ out_obj->buffer.length), true);
+ }
+
+ for (i = 0, offset = 0; i < desc->out_num; i++) {
+ u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
+ (u32 *) out_obj->buffer.pointer);
+
+ if (offset + out_size > out_obj->buffer.length) {
+ dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
+ __func__, dimm_name, cmd_name, i);
+ break;
+ }
+
+ if (in_buf.buffer.length + offset + out_size > buf_len) {
+ dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
+ __func__, dimm_name, cmd_name, i);
+ rc = -ENXIO;
+ goto out;
+ }
+ memcpy(buf + in_buf.buffer.length + offset,
+ out_obj->buffer.pointer + offset, out_size);
+ offset += out_size;
+ }
+ if (offset + in_buf.buffer.length < buf_len) {
+ if (i >= 1) {
+ /*
+ * status valid, return the number of bytes left
+ * unfilled in the output buffer
+ */
+ rc = buf_len - offset - in_buf.buffer.length;
+ if (cmd_rc)
+ *cmd_rc = xlat_status(buf, cmd);
+ } else {
+ dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
+ __func__, dimm_name, cmd_name, buf_len,
+ offset);
+ rc = -ENXIO;
+ }
+ } else {
+ rc = 0;
+ if (cmd_rc)
+ *cmd_rc = xlat_status(buf, cmd);
+ }
+
+ out:
+ ACPI_FREE(out_obj);
+
+ return rc;
+}
+
+static const char *spa_type_name(u16 type)
+{
+ static const char *to_name[] = {
+ [NFIT_SPA_VOLATILE] = "volatile",
+ [NFIT_SPA_PM] = "pmem",
+ [NFIT_SPA_DCR] = "dimm-control-region",
+ [NFIT_SPA_BDW] = "block-data-window",
+ [NFIT_SPA_VDISK] = "volatile-disk",
+ [NFIT_SPA_VCD] = "volatile-cd",
+ [NFIT_SPA_PDISK] = "persistent-disk",
+ [NFIT_SPA_PCD] = "persistent-cd",
+
+ };
+
+ if (type > NFIT_SPA_PCD)
+ return "unknown";
+
+ return to_name[type];
+}
+
+int nfit_spa_type(struct acpi_nfit_system_address *spa)
+{
+ int i;
+
+ for (i = 0; i < NFIT_UUID_MAX; i++)
+ if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+ return i;
+ return -1;
+}
+
+static bool add_spa(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_table_prev *prev,
+ struct acpi_nfit_system_address *spa)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nfit_spa *nfit_spa;
+
+ if (spa->header.length != sizeof(*spa))
+ return false;
+
+ list_for_each_entry(nfit_spa, &prev->spas, list) {
+ if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
+ list_move_tail(&nfit_spa->list, &acpi_desc->spas);
+ return true;
+ }
+ }
+
+ nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
+ GFP_KERNEL);
+ if (!nfit_spa)
+ return false;
+ INIT_LIST_HEAD(&nfit_spa->list);
+ memcpy(nfit_spa->spa, spa, sizeof(*spa));
+ list_add_tail(&nfit_spa->list, &acpi_desc->spas);
+ dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
+ spa->range_index,
+ spa_type_name(nfit_spa_type(spa)));
+ return true;
+}
+
+static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_table_prev *prev,
+ struct acpi_nfit_memory_map *memdev)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nfit_memdev *nfit_memdev;
+
+ if (memdev->header.length != sizeof(*memdev))
+ return false;
+
+ list_for_each_entry(nfit_memdev, &prev->memdevs, list)
+ if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
+ list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
+ return true;
+ }
+
+ nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev),
+ GFP_KERNEL);
+ if (!nfit_memdev)
+ return false;
+ INIT_LIST_HEAD(&nfit_memdev->list);
+ memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
+ list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
+ dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
+ __func__, memdev->device_handle, memdev->range_index,
+ memdev->region_index);
+ return true;
+}
+
+/*
+ * An implementation may provide a truncated control region if no block windows
+ * are defined.
+ */
+static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr)
+{
+ if (dcr->header.length < offsetof(struct acpi_nfit_control_region,
+ window_size))
+ return 0;
+ if (dcr->windows)
+ return sizeof(*dcr);
+ return offsetof(struct acpi_nfit_control_region, window_size);
+}
+
+static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_table_prev *prev,
+ struct acpi_nfit_control_region *dcr)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nfit_dcr *nfit_dcr;
+
+ if (!sizeof_dcr(dcr))
+ return false;
+
+ list_for_each_entry(nfit_dcr, &prev->dcrs, list)
+ if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) {
+ list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
+ return true;
+ }
+
+ nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr),
+ GFP_KERNEL);
+ if (!nfit_dcr)
+ return false;
+ INIT_LIST_HEAD(&nfit_dcr->list);
+ memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
+ list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
+ dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
+ dcr->region_index, dcr->windows);
+ return true;
+}
+
+static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_table_prev *prev,
+ struct acpi_nfit_data_region *bdw)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nfit_bdw *nfit_bdw;
+
+ if (bdw->header.length != sizeof(*bdw))
+ return false;
+ list_for_each_entry(nfit_bdw, &prev->bdws, list)
+ if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
+ list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
+ return true;
+ }
+
+ nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw),
+ GFP_KERNEL);
+ if (!nfit_bdw)
+ return false;
+ INIT_LIST_HEAD(&nfit_bdw->list);
+ memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
+ list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
+ dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
+ bdw->region_index, bdw->windows);
+ return true;
+}
+
+static size_t sizeof_idt(struct acpi_nfit_interleave *idt)
+{
+ if (idt->header.length < sizeof(*idt))
+ return 0;
+ return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1);
+}
+
+static bool add_idt(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_table_prev *prev,
+ struct acpi_nfit_interleave *idt)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nfit_idt *nfit_idt;
+
+ if (!sizeof_idt(idt))
+ return false;
+
+ list_for_each_entry(nfit_idt, &prev->idts, list) {
+ if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt))
+ continue;
+
+ if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) {
+ list_move_tail(&nfit_idt->list, &acpi_desc->idts);
+ return true;
+ }
+ }
+
+ nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt),
+ GFP_KERNEL);
+ if (!nfit_idt)
+ return false;
+ INIT_LIST_HEAD(&nfit_idt->list);
+ memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
+ list_add_tail(&nfit_idt->list, &acpi_desc->idts);
+ dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
+ idt->interleave_index, idt->line_count);
+ return true;
+}
+
+static size_t sizeof_flush(struct acpi_nfit_flush_address *flush)
+{
+ if (flush->header.length < sizeof(*flush))
+ return 0;
+ return sizeof(*flush) + sizeof(u64) * (flush->hint_count - 1);
+}
+
+static bool add_flush(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_table_prev *prev,
+ struct acpi_nfit_flush_address *flush)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nfit_flush *nfit_flush;
+
+ if (!sizeof_flush(flush))
+ return false;
+
+ list_for_each_entry(nfit_flush, &prev->flushes, list) {
+ if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush))
+ continue;
+
+ if (memcmp(nfit_flush->flush, flush,
+ sizeof_flush(flush)) == 0) {
+ list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
+ return true;
+ }
+ }
+
+ nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush)
+ + sizeof_flush(flush), GFP_KERNEL);
+ if (!nfit_flush)
+ return false;
+ INIT_LIST_HEAD(&nfit_flush->list);
+ memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
+ list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
+ dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
+ flush->device_handle, flush->hint_count);
+ return true;
+}
+
+static void *add_table(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_table_prev *prev, void *table, const void *end)
+{
+ struct device *dev = acpi_desc->dev;
+ struct acpi_nfit_header *hdr;
+ void *err = ERR_PTR(-ENOMEM);
+
+ if (table >= end)
+ return NULL;
+
+ hdr = table;
+ if (!hdr->length) {
+ dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
+ hdr->type);
+ return NULL;
+ }
+
+ switch (hdr->type) {
+ case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
+ if (!add_spa(acpi_desc, prev, table))
+ return err;
+ break;
+ case ACPI_NFIT_TYPE_MEMORY_MAP:
+ if (!add_memdev(acpi_desc, prev, table))
+ return err;
+ break;
+ case ACPI_NFIT_TYPE_CONTROL_REGION:
+ if (!add_dcr(acpi_desc, prev, table))
+ return err;
+ break;
+ case ACPI_NFIT_TYPE_DATA_REGION:
+ if (!add_bdw(acpi_desc, prev, table))
+ return err;
+ break;
+ case ACPI_NFIT_TYPE_INTERLEAVE:
+ if (!add_idt(acpi_desc, prev, table))
+ return err;
+ break;
+ case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
+ if (!add_flush(acpi_desc, prev, table))
+ return err;
+ break;
+ case ACPI_NFIT_TYPE_SMBIOS:
+ dev_dbg(dev, "%s: smbios\n", __func__);
+ break;
+ default:
+ dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
+ break;
+ }
+
+ return table + hdr->length;
+}
+
+static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_mem *nfit_mem)
+{
+ u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+ u16 dcr = nfit_mem->dcr->region_index;
+ struct nfit_spa *nfit_spa;
+
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ u16 range_index = nfit_spa->spa->range_index;
+ int type = nfit_spa_type(nfit_spa->spa);
+ struct nfit_memdev *nfit_memdev;
+
+ if (type != NFIT_SPA_BDW)
+ continue;
+
+ list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+ if (nfit_memdev->memdev->range_index != range_index)
+ continue;
+ if (nfit_memdev->memdev->device_handle != device_handle)
+ continue;
+ if (nfit_memdev->memdev->region_index != dcr)
+ continue;
+
+ nfit_mem->spa_bdw = nfit_spa->spa;
+ return;
+ }
+ }
+
+ dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
+ nfit_mem->spa_dcr->range_index);
+ nfit_mem->bdw = NULL;
+}
+
+static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
+{
+ u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
+ struct nfit_memdev *nfit_memdev;
+ struct nfit_bdw *nfit_bdw;
+ struct nfit_idt *nfit_idt;
+ u16 idt_idx, range_index;
+
+ list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
+ if (nfit_bdw->bdw->region_index != dcr)
+ continue;
+ nfit_mem->bdw = nfit_bdw->bdw;
+ break;
+ }
+
+ if (!nfit_mem->bdw)
+ return;
+
+ nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
+
+ if (!nfit_mem->spa_bdw)
+ return;
+
+ range_index = nfit_mem->spa_bdw->range_index;
+ list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+ if (nfit_memdev->memdev->range_index != range_index ||
+ nfit_memdev->memdev->region_index != dcr)
+ continue;
+ nfit_mem->memdev_bdw = nfit_memdev->memdev;
+ idt_idx = nfit_memdev->memdev->interleave_index;
+ list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+ if (nfit_idt->idt->interleave_index != idt_idx)
+ continue;
+ nfit_mem->idt_bdw = nfit_idt->idt;
+ break;
+ }
+ break;
+ }
+}
+
+static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
+ struct acpi_nfit_system_address *spa)
+{
+ struct nfit_mem *nfit_mem, *found;
+ struct nfit_memdev *nfit_memdev;
+ int type = nfit_spa_type(spa);
+
+ switch (type) {
+ case NFIT_SPA_DCR:
+ case NFIT_SPA_PM:
+ break;
+ default:
+ return 0;
+ }
+
+ list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+ struct nfit_flush *nfit_flush;
+ struct nfit_dcr *nfit_dcr;
+ u32 device_handle;
+ u16 dcr;
+
+ if (nfit_memdev->memdev->range_index != spa->range_index)
+ continue;
+ found = NULL;
+ dcr = nfit_memdev->memdev->region_index;
+ device_handle = nfit_memdev->memdev->device_handle;
+ list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+ if (__to_nfit_memdev(nfit_mem)->device_handle
+ == device_handle) {
+ found = nfit_mem;
+ break;
+ }
+
+ if (found)
+ nfit_mem = found;
+ else {
+ nfit_mem = devm_kzalloc(acpi_desc->dev,
+ sizeof(*nfit_mem), GFP_KERNEL);
+ if (!nfit_mem)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&nfit_mem->list);
+ nfit_mem->acpi_desc = acpi_desc;
+ list_add(&nfit_mem->list, &acpi_desc->dimms);
+ }
+
+ list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+ if (nfit_dcr->dcr->region_index != dcr)
+ continue;
+ /*
+ * Record the control region for the dimm. For
+ * the ACPI 6.1 case, where there are separate
+ * control regions for the pmem vs blk
+ * interfaces, be sure to record the extended
+ * blk details.
+ */
+ if (!nfit_mem->dcr)
+ nfit_mem->dcr = nfit_dcr->dcr;
+ else if (nfit_mem->dcr->windows == 0
+ && nfit_dcr->dcr->windows)
+ nfit_mem->dcr = nfit_dcr->dcr;
+ break;
+ }
+
+ list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
+ struct acpi_nfit_flush_address *flush;
+ u16 i;
+
+ if (nfit_flush->flush->device_handle != device_handle)
+ continue;
+ nfit_mem->nfit_flush = nfit_flush;
+ flush = nfit_flush->flush;
+ nfit_mem->flush_wpq = devm_kzalloc(acpi_desc->dev,
+ flush->hint_count
+ * sizeof(struct resource), GFP_KERNEL);
+ if (!nfit_mem->flush_wpq)
+ return -ENOMEM;
+ for (i = 0; i < flush->hint_count; i++) {
+ struct resource *res = &nfit_mem->flush_wpq[i];
+
+ res->start = flush->hint_address[i];
+ res->end = res->start + 8 - 1;
+ }
+ break;
+ }
+
+ if (dcr && !nfit_mem->dcr) {
+ dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
+ spa->range_index, dcr);
+ return -ENODEV;
+ }
+
+ if (type == NFIT_SPA_DCR) {
+ struct nfit_idt *nfit_idt;
+ u16 idt_idx;
+
+ /* multiple dimms may share a SPA when interleaved */
+ nfit_mem->spa_dcr = spa;
+ nfit_mem->memdev_dcr = nfit_memdev->memdev;
+ idt_idx = nfit_memdev->memdev->interleave_index;
+ list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+ if (nfit_idt->idt->interleave_index != idt_idx)
+ continue;
+ nfit_mem->idt_dcr = nfit_idt->idt;
+ break;
+ }
+ nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
+ } else {
+ /*
+ * A single dimm may belong to multiple SPA-PM
+ * ranges, record at least one in addition to
+ * any SPA-DCR range.
+ */
+ nfit_mem->memdev_pmem = nfit_memdev->memdev;
+ }
+ }
+
+ return 0;
+}
+
+static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
+{
+ struct nfit_mem *a = container_of(_a, typeof(*a), list);
+ struct nfit_mem *b = container_of(_b, typeof(*b), list);
+ u32 handleA, handleB;
+
+ handleA = __to_nfit_memdev(a)->device_handle;
+ handleB = __to_nfit_memdev(b)->device_handle;
+ if (handleA < handleB)
+ return -1;
+ else if (handleA > handleB)
+ return 1;
+ return 0;
+}
+
+static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
+{
+ struct nfit_spa *nfit_spa;
+
+ /*
+ * For each SPA-DCR or SPA-PMEM address range find its
+ * corresponding MEMDEV(s). From each MEMDEV find the
+ * corresponding DCR. Then, if we're operating on a SPA-DCR,
+ * try to find a SPA-BDW and a corresponding BDW that references
+ * the DCR. Throw it all into an nfit_mem object. Note, that
+ * BDWs are optional.
+ */
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ int rc;
+
+ rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
+ if (rc)
+ return rc;
+ }
+
+ list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
+
+ return 0;
+}
+
+static ssize_t revision_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+ return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
+}
+static DEVICE_ATTR_RO(revision);
+
+/*
+ * This shows the number of full Address Range Scrubs that have been
+ * completed since driver load time. Userspace can wait on this using
+ * select/poll etc. A '+' at the end indicates an ARS is in progress
+ */
+static ssize_t scrub_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ ssize_t rc = -ENXIO;
+
+ device_lock(dev);
+ nd_desc = dev_get_drvdata(dev);
+ if (nd_desc) {
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+ rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
+ (work_busy(&acpi_desc->work)) ? "+\n" : "\n");
+ }
+ device_unlock(dev);
+ return rc;
+}
+
+static ssize_t scrub_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ ssize_t rc;
+ long val;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (val != 1)
+ return -EINVAL;
+
+ device_lock(dev);
+ nd_desc = dev_get_drvdata(dev);
+ if (nd_desc) {
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+ rc = acpi_nfit_ars_rescan(acpi_desc);
+ }
+ device_unlock(dev);
+ if (rc)
+ return rc;
+ return size;
+}
+static DEVICE_ATTR_RW(scrub);
+
+static bool ars_supported(struct nvdimm_bus *nvdimm_bus)
+{
+ struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+ const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START
+ | 1 << ND_CMD_ARS_STATUS;
+
+ return (nd_desc->cmd_mask & mask) == mask;
+}
+
+static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+ if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus))
+ return 0;
+ return a->mode;
+}
+
+static struct attribute *acpi_nfit_attributes[] = {
+ &dev_attr_revision.attr,
+ &dev_attr_scrub.attr,
+ NULL,
+};
+
+static struct attribute_group acpi_nfit_attribute_group = {
+ .name = "nfit",
+ .attrs = acpi_nfit_attributes,
+ .is_visible = nfit_visible,
+};
+
+static const struct attribute_group *acpi_nfit_attribute_groups[] = {
+ &nvdimm_bus_attribute_group,
+ &acpi_nfit_attribute_group,
+ NULL,
+};
+
+static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+ return __to_nfit_memdev(nfit_mem);
+}
+
+static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+ return nfit_mem->dcr;
+}
+
+static ssize_t handle_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+ return sprintf(buf, "%#x\n", memdev->device_handle);
+}
+static DEVICE_ATTR_RO(handle);
+
+static ssize_t phys_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+ return sprintf(buf, "%#x\n", memdev->physical_id);
+}
+static DEVICE_ATTR_RO(phys_id);
+
+static ssize_t vendor_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t rev_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
+}
+static DEVICE_ATTR_RO(rev_id);
+
+static ssize_t device_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t subsystem_vendor_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
+}
+static DEVICE_ATTR_RO(subsystem_vendor);
+
+static ssize_t subsystem_rev_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ return sprintf(buf, "0x%04x\n",
+ be16_to_cpu(dcr->subsystem_revision_id));
+}
+static DEVICE_ATTR_RO(subsystem_rev_id);
+
+static ssize_t subsystem_device_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
+}
+static DEVICE_ATTR_RO(subsystem_device);
+
+static int num_nvdimm_formats(struct nvdimm *nvdimm)
+{
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ int formats = 0;
+
+ if (nfit_mem->memdev_pmem)
+ formats++;
+ if (nfit_mem->memdev_bdw)
+ formats++;
+ return formats;
+}
+
+static ssize_t format_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
+}
+static DEVICE_ATTR_RO(format);
+
+static ssize_t format1_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u32 handle;
+ ssize_t rc = -ENXIO;
+ struct nfit_mem *nfit_mem;
+ struct nfit_memdev *nfit_memdev;
+ struct acpi_nfit_desc *acpi_desc;
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ nfit_mem = nvdimm_provider_data(nvdimm);
+ acpi_desc = nfit_mem->acpi_desc;
+ handle = to_nfit_memdev(dev)->device_handle;
+
+ /* assumes DIMMs have at most 2 published interface codes */
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+ struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
+ struct nfit_dcr *nfit_dcr;
+
+ if (memdev->device_handle != handle)
+ continue;
+
+ list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+ if (nfit_dcr->dcr->region_index != memdev->region_index)
+ continue;
+ if (nfit_dcr->dcr->code == dcr->code)
+ continue;
+ rc = sprintf(buf, "0x%04x\n",
+ le16_to_cpu(nfit_dcr->dcr->code));
+ break;
+ }
+ if (rc != ENXIO)
+ break;
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
+ return rc;
+}
+static DEVICE_ATTR_RO(format1);
+
+static ssize_t formats_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+
+ return sprintf(buf, "%d\n", num_nvdimm_formats(nvdimm));
+}
+static DEVICE_ATTR_RO(formats);
+
+static ssize_t serial_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ return sprintf(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
+}
+static DEVICE_ATTR_RO(serial);
+
+static ssize_t family_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+ if (nfit_mem->family < 0)
+ return -ENXIO;
+ return sprintf(buf, "%d\n", nfit_mem->family);
+}
+static DEVICE_ATTR_RO(family);
+
+static ssize_t dsm_mask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+ if (nfit_mem->family < 0)
+ return -ENXIO;
+ return sprintf(buf, "%#lx\n", nfit_mem->dsm_mask);
+}
+static DEVICE_ATTR_RO(dsm_mask);
+
+static ssize_t flags_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u16 flags = to_nfit_memdev(dev)->flags;
+
+ return sprintf(buf, "%s%s%s%s%s\n",
+ flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
+ flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
+ flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
+ flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
+ flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "");
+}
+static DEVICE_ATTR_RO(flags);
+
+static ssize_t id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+ if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
+ return sprintf(buf, "%04x-%02x-%04x-%08x\n",
+ be16_to_cpu(dcr->vendor_id),
+ dcr->manufacturing_location,
+ be16_to_cpu(dcr->manufacturing_date),
+ be32_to_cpu(dcr->serial_number));
+ else
+ return sprintf(buf, "%04x-%08x\n",
+ be16_to_cpu(dcr->vendor_id),
+ be32_to_cpu(dcr->serial_number));
+}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *acpi_nfit_dimm_attributes[] = {
+ &dev_attr_handle.attr,
+ &dev_attr_phys_id.attr,
+ &dev_attr_vendor.attr,
+ &dev_attr_device.attr,
+ &dev_attr_rev_id.attr,
+ &dev_attr_subsystem_vendor.attr,
+ &dev_attr_subsystem_device.attr,
+ &dev_attr_subsystem_rev_id.attr,
+ &dev_attr_format.attr,
+ &dev_attr_formats.attr,
+ &dev_attr_format1.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_flags.attr,
+ &dev_attr_id.attr,
+ &dev_attr_family.attr,
+ &dev_attr_dsm_mask.attr,
+ NULL,
+};
+
+static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+
+ if (!to_nfit_dcr(dev))
+ return 0;
+ if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
+ return 0;
+ return a->mode;
+}
+
+static struct attribute_group acpi_nfit_dimm_attribute_group = {
+ .name = "nfit",
+ .attrs = acpi_nfit_dimm_attributes,
+ .is_visible = acpi_nfit_dimm_attr_visible,
+};
+
+static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
+ &nvdimm_attribute_group,
+ &nd_device_attribute_group,
+ &acpi_nfit_dimm_attribute_group,
+ NULL,
+};
+
+static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
+ u32 device_handle)
+{
+ struct nfit_mem *nfit_mem;
+
+ list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+ if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
+ return nfit_mem->nvdimm;
+
+ return NULL;
+}
+
+static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_mem *nfit_mem, u32 device_handle)
+{
+ struct acpi_device *adev, *adev_dimm;
+ struct device *dev = acpi_desc->dev;
+ unsigned long dsm_mask;
+ const u8 *uuid;
+ int i;
+
+ /* nfit test assumes 1:1 relationship between commands and dsms */
+ nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
+ nfit_mem->family = NVDIMM_FAMILY_INTEL;
+ adev = to_acpi_dev(acpi_desc);
+ if (!adev)
+ return 0;
+
+ adev_dimm = acpi_find_child_device(adev, device_handle, false);
+ nfit_mem->adev = adev_dimm;
+ if (!adev_dimm) {
+ dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
+ device_handle);
+ return force_enable_dimms ? 0 : -ENODEV;
+ }
+
+ /*
+ * Until standardization materializes we need to consider 4
+ * different command sets. Note, that checking for function0 (bit0)
+ * tells us if any commands are reachable through this uuid.
+ */
+ for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
+ if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
+ break;
+
+ /* limit the supported commands to those that are publicly documented */
+ nfit_mem->family = i;
+ if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
+ dsm_mask = 0x3fe;
+ if (disable_vendor_specific)
+ dsm_mask &= ~(1 << ND_CMD_VENDOR);
+ } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) {
+ dsm_mask = 0x1c3c76;
+ } else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
+ dsm_mask = 0x1fe;
+ if (disable_vendor_specific)
+ dsm_mask &= ~(1 << 8);
+ } else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
+ dsm_mask = 0xffffffff;
+ } else {
+ dev_dbg(dev, "unknown dimm command family\n");
+ nfit_mem->family = -1;
+ /* DSMs are optional, continue loading the driver... */
+ return 0;
+ }
+
+ uuid = to_nfit_uuid(nfit_mem->family);
+ for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
+ if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+ set_bit(i, &nfit_mem->dsm_mask);
+
+ return 0;
+}
+
+static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
+{
+ struct nfit_mem *nfit_mem;
+ int dimm_count = 0;
+
+ list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+ struct acpi_nfit_flush_address *flush;
+ unsigned long flags = 0, cmd_mask;
+ struct nvdimm *nvdimm;
+ u32 device_handle;
+ u16 mem_flags;
+ int rc;
+
+ device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+ nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
+ if (nvdimm) {
+ dimm_count++;
+ continue;
+ }
+
+ if (nfit_mem->bdw && nfit_mem->memdev_pmem)
+ flags |= NDD_ALIASING;
+
+ mem_flags = __to_nfit_memdev(nfit_mem)->flags;
+ if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
+ flags |= NDD_UNARMED;
+
+ rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
+ if (rc)
+ continue;
+
+ /*
+ * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
+ * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
+ * userspace interface.
+ */
+ cmd_mask = 1UL << ND_CMD_CALL;
+ if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
+ cmd_mask |= nfit_mem->dsm_mask;
+
+ flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
+ : NULL;
+ nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
+ acpi_nfit_dimm_attribute_groups,
+ flags, cmd_mask, flush ? flush->hint_count : 0,
+ nfit_mem->flush_wpq);
+ if (!nvdimm)
+ return -ENOMEM;
+
+ nfit_mem->nvdimm = nvdimm;
+ dimm_count++;
+
+ if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
+ continue;
+
+ dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n",
+ nvdimm_name(nvdimm),
+ mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
+ mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
+ mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
+ mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "");
+
+ }
+
+ return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
+}
+
+static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
+{
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+ const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+ struct acpi_device *adev;
+ int i;
+
+ nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
+ adev = to_acpi_dev(acpi_desc);
+ if (!adev)
+ return;
+
+ for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
+ if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+ set_bit(i, &nd_desc->cmd_mask);
+}
+
+static ssize_t range_index_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+ struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
+
+ return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
+}
+static DEVICE_ATTR_RO(range_index);
+
+static struct attribute *acpi_nfit_region_attributes[] = {
+ &dev_attr_range_index.attr,
+ NULL,
+};
+
+static struct attribute_group acpi_nfit_region_attribute_group = {
+ .name = "nfit",
+ .attrs = acpi_nfit_region_attributes,
+};
+
+static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
+ &nd_region_attribute_group,
+ &nd_mapping_attribute_group,
+ &nd_device_attribute_group,
+ &nd_numa_attribute_group,
+ &acpi_nfit_region_attribute_group,
+ NULL,
+};
+
+/* enough info to uniquely specify an interleave set */
+struct nfit_set_info {
+ struct nfit_set_info_map {
+ u64 region_offset;
+ u32 serial_number;
+ u32 pad;
+ } mapping[0];
+};
+
+static size_t sizeof_nfit_set_info(int num_mappings)
+{
+ return sizeof(struct nfit_set_info)
+ + num_mappings * sizeof(struct nfit_set_info_map);
+}
+
+static int cmp_map(const void *m0, const void *m1)
+{
+ const struct nfit_set_info_map *map0 = m0;
+ const struct nfit_set_info_map *map1 = m1;
+
+ return memcmp(&map0->region_offset, &map1->region_offset,
+ sizeof(u64));
+}
+
+/* Retrieve the nth entry referencing this spa */
+static struct acpi_nfit_memory_map *memdev_from_spa(
+ struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
+{
+ struct nfit_memdev *nfit_memdev;
+
+ list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
+ if (nfit_memdev->memdev->range_index == range_index)
+ if (n-- == 0)
+ return nfit_memdev->memdev;
+ return NULL;
+}
+
+static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
+ struct nd_region_desc *ndr_desc,
+ struct acpi_nfit_system_address *spa)
+{
+ int i, spa_type = nfit_spa_type(spa);
+ struct device *dev = acpi_desc->dev;
+ struct nd_interleave_set *nd_set;
+ u16 nr = ndr_desc->num_mappings;
+ struct nfit_set_info *info;
+
+ if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
+ /* pass */;
+ else
+ return 0;
+
+ nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
+ if (!nd_set)
+ return -ENOMEM;
+
+ info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ for (i = 0; i < nr; i++) {
+ struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+ struct nfit_set_info_map *map = &info->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
+ spa->range_index, i);
+
+ if (!memdev || !nfit_mem->dcr) {
+ dev_err(dev, "%s: failed to find DCR\n", __func__);
+ return -ENODEV;
+ }
+
+ map->region_offset = memdev->region_offset;
+ map->serial_number = nfit_mem->dcr->serial_number;
+ }
+
+ sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
+ cmp_map, NULL);
+ nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+ ndr_desc->nd_set = nd_set;
+ devm_kfree(dev, info);
+
+ return 0;
+}
+
+static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
+{
+ struct acpi_nfit_interleave *idt = mmio->idt;
+ u32 sub_line_offset, line_index, line_offset;
+ u64 line_no, table_skip_count, table_offset;
+
+ line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
+ table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
+ line_offset = idt->line_offset[line_index]
+ * mmio->line_size;
+ table_offset = table_skip_count * mmio->table_size;
+
+ return mmio->base_offset + line_offset + table_offset + sub_line_offset;
+}
+
+static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
+{
+ struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+ u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+
+ if (mmio->num_lines)
+ offset = to_interleave_offset(offset, mmio);
+
+ return readl(mmio->addr.base + offset);
+}
+
+static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
+ resource_size_t dpa, unsigned int len, unsigned int write)
+{
+ u64 cmd, offset;
+ struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+
+ enum {
+ BCW_OFFSET_MASK = (1ULL << 48)-1,
+ BCW_LEN_SHIFT = 48,
+ BCW_LEN_MASK = (1ULL << 8) - 1,
+ BCW_CMD_SHIFT = 56,
+ };
+
+ cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
+ len = len >> L1_CACHE_SHIFT;
+ cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
+ cmd |= ((u64) write) << BCW_CMD_SHIFT;
+
+ offset = nfit_blk->cmd_offset + mmio->size * bw;
+ if (mmio->num_lines)
+ offset = to_interleave_offset(offset, mmio);
+
+ writeq(cmd, mmio->addr.base + offset);
+ nvdimm_flush(nfit_blk->nd_region);
+
+ if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
+ readq(mmio->addr.base + offset);
+}
+
+static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
+ resource_size_t dpa, void *iobuf, size_t len, int rw,
+ unsigned int lane)
+{
+ struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+ unsigned int copied = 0;
+ u64 base_offset;
+ int rc;
+
+ base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
+ + lane * mmio->size;
+ write_blk_ctl(nfit_blk, lane, dpa, len, rw);
+ while (len) {
+ unsigned int c;
+ u64 offset;
+
+ if (mmio->num_lines) {
+ u32 line_offset;
+
+ offset = to_interleave_offset(base_offset + copied,
+ mmio);
+ div_u64_rem(offset, mmio->line_size, &line_offset);
+ c = min_t(size_t, len, mmio->line_size - line_offset);
+ } else {
+ offset = base_offset + nfit_blk->bdw_offset;
+ c = len;
+ }
+
+ if (rw)
+ memcpy_to_pmem(mmio->addr.aperture + offset,
+ iobuf + copied, c);
+ else {
+ if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
+ mmio_flush_range((void __force *)
+ mmio->addr.aperture + offset, c);
+
+ memcpy_from_pmem(iobuf + copied,
+ mmio->addr.aperture + offset, c);
+ }
+
+ copied += c;
+ len -= c;
+ }
+
+ if (rw)
+ nvdimm_flush(nfit_blk->nd_region);
+
+ rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
+ return rc;
+}
+
+static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
+ resource_size_t dpa, void *iobuf, u64 len, int rw)
+{
+ struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
+ struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+ struct nd_region *nd_region = nfit_blk->nd_region;
+ unsigned int lane, copied = 0;
+ int rc = 0;
+
+ lane = nd_region_acquire_lane(nd_region);
+ while (len) {
+ u64 c = min(len, mmio->size);
+
+ rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
+ iobuf + copied, c, rw, lane);
+ if (rc)
+ break;
+
+ copied += c;
+ len -= c;
+ }
+ nd_region_release_lane(nd_region, lane);
+
+ return rc;
+}
+
+static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
+ struct acpi_nfit_interleave *idt, u16 interleave_ways)
+{
+ if (idt) {
+ mmio->num_lines = idt->line_count;
+ mmio->line_size = idt->line_size;
+ if (interleave_ways == 0)
+ return -ENXIO;
+ mmio->table_size = mmio->num_lines * interleave_ways
+ * mmio->line_size;
+ }
+
+ return 0;
+}
+
+static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
+ struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
+{
+ struct nd_cmd_dimm_flags flags;
+ int rc;
+
+ memset(&flags, 0, sizeof(flags));
+ rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
+ sizeof(flags), NULL);
+
+ if (rc >= 0 && flags.status == 0)
+ nfit_blk->dimm_flags = flags.flags;
+ else if (rc == -ENOTTY) {
+ /* fall back to a conservative default */
+ nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
+ rc = 0;
+ } else
+ rc = -ENXIO;
+
+ return rc;
+}
+
+static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
+ struct device *dev)
+{
+ struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+ struct nd_blk_region *ndbr = to_nd_blk_region(dev);
+ struct nfit_blk_mmio *mmio;
+ struct nfit_blk *nfit_blk;
+ struct nfit_mem *nfit_mem;
+ struct nvdimm *nvdimm;
+ int rc;
+
+ nvdimm = nd_blk_region_to_dimm(ndbr);
+ nfit_mem = nvdimm_provider_data(nvdimm);
+ if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
+ dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
+ nfit_mem ? "" : " nfit_mem",
+ (nfit_mem && nfit_mem->dcr) ? "" : " dcr",
+ (nfit_mem && nfit_mem->bdw) ? "" : " bdw");
+ return -ENXIO;
+ }
+
+ nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
+ if (!nfit_blk)
+ return -ENOMEM;
+ nd_blk_region_set_provider_data(ndbr, nfit_blk);
+ nfit_blk->nd_region = to_nd_region(dev);
+
+ /* map block aperture memory */
+ nfit_blk->bdw_offset = nfit_mem->bdw->offset;
+ mmio = &nfit_blk->mmio[BDW];
+ mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
+ nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM);
+ if (!mmio->addr.base) {
+ dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
+ nvdimm_name(nvdimm));
+ return -ENOMEM;
+ }
+ mmio->size = nfit_mem->bdw->size;
+ mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
+ mmio->idt = nfit_mem->idt_bdw;
+ mmio->spa = nfit_mem->spa_bdw;
+ rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
+ nfit_mem->memdev_bdw->interleave_ways);
+ if (rc) {
+ dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
+ __func__, nvdimm_name(nvdimm));
+ return rc;
+ }
+
+ /* map block control memory */
+ nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
+ nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
+ mmio = &nfit_blk->mmio[DCR];
+ mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
+ nfit_mem->spa_dcr->length);
+ if (!mmio->addr.base) {
+ dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
+ nvdimm_name(nvdimm));
+ return -ENOMEM;
+ }
+ mmio->size = nfit_mem->dcr->window_size;
+ mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
+ mmio->idt = nfit_mem->idt_dcr;
+ mmio->spa = nfit_mem->spa_dcr;
+ rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
+ nfit_mem->memdev_dcr->interleave_ways);
+ if (rc) {
+ dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
+ __func__, nvdimm_name(nvdimm));
+ return rc;
+ }
+
+ rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
+ if (rc < 0) {
+ dev_dbg(dev, "%s: %s failed get DIMM flags\n",
+ __func__, nvdimm_name(nvdimm));
+ return rc;
+ }
+
+ if (nvdimm_has_flush(nfit_blk->nd_region) < 0)
+ dev_warn(dev, "unable to guarantee persistence of writes\n");
+
+ if (mmio->line_size == 0)
+ return 0;
+
+ if ((u32) nfit_blk->cmd_offset % mmio->line_size
+ + 8 > mmio->line_size) {
+ dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
+ return -ENXIO;
+ } else if ((u32) nfit_blk->stat_offset % mmio->line_size
+ + 8 > mmio->line_size) {
+ dev_dbg(dev, "stat_offset crosses interleave boundary\n");
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
+ struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
+{
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ int cmd_rc, rc;
+
+ cmd->address = spa->address;
+ cmd->length = spa->length;
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
+ sizeof(*cmd), &cmd_rc);
+ if (rc < 0)
+ return rc;
+ return cmd_rc;
+}
+
+static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
+{
+ int rc;
+ int cmd_rc;
+ struct nd_cmd_ars_start ars_start;
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+ memset(&ars_start, 0, sizeof(ars_start));
+ ars_start.address = spa->address;
+ ars_start.length = spa->length;
+ if (nfit_spa_type(spa) == NFIT_SPA_PM)
+ ars_start.type = ND_ARS_PERSISTENT;
+ else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
+ ars_start.type = ND_ARS_VOLATILE;
+ else
+ return -ENOTTY;
+
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
+ sizeof(ars_start), &cmd_rc);
+
+ if (rc < 0)
+ return rc;
+ return cmd_rc;
+}
+
+static int ars_continue(struct acpi_nfit_desc *acpi_desc)
+{
+ int rc, cmd_rc;
+ struct nd_cmd_ars_start ars_start;
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+ struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+
+ memset(&ars_start, 0, sizeof(ars_start));
+ ars_start.address = ars_status->restart_address;
+ ars_start.length = ars_status->restart_length;
+ ars_start.type = ars_status->type;
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
+ sizeof(ars_start), &cmd_rc);
+ if (rc < 0)
+ return rc;
+ return cmd_rc;
+}
+
+static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
+{
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+ struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+ int rc, cmd_rc;
+
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
+ acpi_desc->ars_status_size, &cmd_rc);
+ if (rc < 0)
+ return rc;
+ return cmd_rc;
+}
+
+static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
+ struct nd_cmd_ars_status *ars_status)
+{
+ int rc;
+ u32 i;
+
+ for (i = 0; i < ars_status->num_records; i++) {
+ rc = nvdimm_bus_add_poison(nvdimm_bus,
+ ars_status->records[i].err_address,
+ ars_status->records[i].length);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+static void acpi_nfit_remove_resource(void *data)
+{
+ struct resource *res = data;
+
+ remove_resource(res);
+}
+
+static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
+ struct nd_region_desc *ndr_desc)
+{
+ struct resource *res, *nd_res = ndr_desc->res;
+ int is_pmem, ret;
+
+ /* No operation if the region is already registered as PMEM */
+ is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
+ IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
+ if (is_pmem == REGION_INTERSECTS)
+ return 0;
+
+ res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ res->name = "Persistent Memory";
+ res->start = nd_res->start;
+ res->end = nd_res->end;
+ res->flags = IORESOURCE_MEM;
+ res->desc = IORES_DESC_PERSISTENT_MEMORY;
+
+ ret = insert_resource(&iomem_resource, res);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(acpi_desc->dev,
+ acpi_nfit_remove_resource,
+ res);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
+ struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
+ struct acpi_nfit_memory_map *memdev,
+ struct nfit_spa *nfit_spa)
+{
+ struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
+ memdev->device_handle);
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ struct nd_blk_region_desc *ndbr_desc;
+ struct nfit_mem *nfit_mem;
+ int blk_valid = 0;
+
+ if (!nvdimm) {
+ dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
+ spa->range_index, memdev->device_handle);
+ return -ENODEV;
+ }
+
+ nd_mapping->nvdimm = nvdimm;
+ switch (nfit_spa_type(spa)) {
+ case NFIT_SPA_PM:
+ case NFIT_SPA_VOLATILE:
+ nd_mapping->start = memdev->address;
+ nd_mapping->size = memdev->region_size;
+ break;
+ case NFIT_SPA_DCR:
+ nfit_mem = nvdimm_provider_data(nvdimm);
+ if (!nfit_mem || !nfit_mem->bdw) {
+ dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
+ spa->range_index, nvdimm_name(nvdimm));
+ } else {
+ nd_mapping->size = nfit_mem->bdw->capacity;
+ nd_mapping->start = nfit_mem->bdw->start_address;
+ ndr_desc->num_lanes = nfit_mem->bdw->windows;
+ blk_valid = 1;
+ }
+
+ ndr_desc->nd_mapping = nd_mapping;
+ ndr_desc->num_mappings = blk_valid;
+ ndbr_desc = to_blk_region_desc(ndr_desc);
+ ndbr_desc->enable = acpi_nfit_blk_region_enable;
+ ndbr_desc->do_io = acpi_desc->blk_do_io;
+ nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
+ ndr_desc);
+ if (!nfit_spa->nd_region)
+ return -ENOMEM;
+ break;
+ }
+
+ return 0;
+}
+
+static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
+{
+ return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
+ nfit_spa_type(spa) == NFIT_SPA_VCD ||
+ nfit_spa_type(spa) == NFIT_SPA_PDISK ||
+ nfit_spa_type(spa) == NFIT_SPA_PCD);
+}
+
+static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_spa *nfit_spa)
+{
+ static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ struct nd_blk_region_desc ndbr_desc;
+ struct nd_region_desc *ndr_desc;
+ struct nfit_memdev *nfit_memdev;
+ struct nvdimm_bus *nvdimm_bus;
+ struct resource res;
+ int count = 0, rc;
+
+ if (nfit_spa->nd_region)
+ return 0;
+
+ if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
+ dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
+ __func__);
+ return 0;
+ }
+
+ memset(&res, 0, sizeof(res));
+ memset(&nd_mappings, 0, sizeof(nd_mappings));
+ memset(&ndbr_desc, 0, sizeof(ndbr_desc));
+ res.start = spa->address;
+ res.end = res.start + spa->length - 1;
+ ndr_desc = &ndbr_desc.ndr_desc;
+ ndr_desc->res = &res;
+ ndr_desc->provider_data = nfit_spa;
+ ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
+ if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
+ ndr_desc->numa_node = acpi_map_pxm_to_online_node(
+ spa->proximity_domain);
+ else
+ ndr_desc->numa_node = NUMA_NO_NODE;
+
+ list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+ struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
+ struct nd_mapping *nd_mapping;
+
+ if (memdev->range_index != spa->range_index)
+ continue;
+ if (count >= ND_MAX_MAPPINGS) {
+ dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
+ spa->range_index, ND_MAX_MAPPINGS);
+ return -ENXIO;
+ }
+ nd_mapping = &nd_mappings[count++];
+ rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
+ memdev, nfit_spa);
+ if (rc)
+ goto out;
+ }
+
+ ndr_desc->nd_mapping = nd_mappings;
+ ndr_desc->num_mappings = count;
+ rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
+ if (rc)
+ goto out;
+
+ nvdimm_bus = acpi_desc->nvdimm_bus;
+ if (nfit_spa_type(spa) == NFIT_SPA_PM) {
+ rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
+ if (rc) {
+ dev_warn(acpi_desc->dev,
+ "failed to insert pmem resource to iomem: %d\n",
+ rc);
+ goto out;
+ }
+
+ nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
+ ndr_desc);
+ if (!nfit_spa->nd_region)
+ rc = -ENOMEM;
+ } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
+ nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
+ ndr_desc);
+ if (!nfit_spa->nd_region)
+ rc = -ENOMEM;
+ } else if (nfit_spa_is_virtual(spa)) {
+ nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
+ ndr_desc);
+ if (!nfit_spa->nd_region)
+ rc = -ENOMEM;
+ }
+
+ out:
+ if (rc)
+ dev_err(acpi_desc->dev, "failed to register spa range %d\n",
+ nfit_spa->spa->range_index);
+ return rc;
+}
+
+static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
+ u32 max_ars)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nd_cmd_ars_status *ars_status;
+
+ if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
+ memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
+ return 0;
+ }
+
+ if (acpi_desc->ars_status)
+ devm_kfree(dev, acpi_desc->ars_status);
+ acpi_desc->ars_status = NULL;
+ ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
+ if (!ars_status)
+ return -ENOMEM;
+ acpi_desc->ars_status = ars_status;
+ acpi_desc->ars_status_size = max_ars;
+ return 0;
+}
+
+static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_spa *nfit_spa)
+{
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ int rc;
+
+ if (!nfit_spa->max_ars) {
+ struct nd_cmd_ars_cap ars_cap;
+
+ memset(&ars_cap, 0, sizeof(ars_cap));
+ rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
+ if (rc < 0)
+ return rc;
+ nfit_spa->max_ars = ars_cap.max_ars_out;
+ nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
+ /* check that the supported scrub types match the spa type */
+ if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
+ ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
+ return -ENOTTY;
+ else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
+ ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
+ return -ENOTTY;
+ }
+
+ if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
+ return -ENOMEM;
+
+ rc = ars_get_status(acpi_desc);
+ if (rc < 0 && rc != -ENOSPC)
+ return rc;
+
+ if (ars_status_process_records(acpi_desc->nvdimm_bus,
+ acpi_desc->ars_status))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_spa *nfit_spa)
+{
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ unsigned int overflow_retry = scrub_overflow_abort;
+ u64 init_ars_start = 0, init_ars_len = 0;
+ struct device *dev = acpi_desc->dev;
+ unsigned int tmo = scrub_timeout;
+ int rc;
+
+ if (!nfit_spa->ars_required || !nfit_spa->nd_region)
+ return;
+
+ rc = ars_start(acpi_desc, nfit_spa);
+ /*
+ * If we timed out the initial scan we'll still be busy here,
+ * and will wait another timeout before giving up permanently.
+ */
+ if (rc < 0 && rc != -EBUSY)
+ return;
+
+ do {
+ u64 ars_start, ars_len;
+
+ if (acpi_desc->cancel)
+ break;
+ rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+ if (rc == -ENOTTY)
+ break;
+ if (rc == -EBUSY && !tmo) {
+ dev_warn(dev, "range %d ars timeout, aborting\n",
+ spa->range_index);
+ break;
+ }
+
+ if (rc == -EBUSY) {
+ /*
+ * Note, entries may be appended to the list
+ * while the lock is dropped, but the workqueue
+ * being active prevents entries being deleted /
+ * freed.
+ */
+ mutex_unlock(&acpi_desc->init_mutex);
+ ssleep(1);
+ tmo--;
+ mutex_lock(&acpi_desc->init_mutex);
+ continue;
+ }
+
+ /* we got some results, but there are more pending... */
+ if (rc == -ENOSPC && overflow_retry--) {
+ if (!init_ars_len) {
+ init_ars_len = acpi_desc->ars_status->length;
+ init_ars_start = acpi_desc->ars_status->address;
+ }
+ rc = ars_continue(acpi_desc);
+ }
+
+ if (rc < 0) {
+ dev_warn(dev, "range %d ars continuation failed\n",
+ spa->range_index);
+ break;
+ }
+
+ if (init_ars_len) {
+ ars_start = init_ars_start;
+ ars_len = init_ars_len;
+ } else {
+ ars_start = acpi_desc->ars_status->address;
+ ars_len = acpi_desc->ars_status->length;
+ }
+ dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
+ spa->range_index, ars_start, ars_len);
+ /* notify the region about new poison entries */
+ nvdimm_region_notify(nfit_spa->nd_region,
+ NVDIMM_REVALIDATE_POISON);
+ break;
+ } while (1);
+}
+
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+ struct device *dev;
+ u64 init_scrub_length = 0;
+ struct nfit_spa *nfit_spa;
+ u64 init_scrub_address = 0;
+ bool init_ars_done = false;
+ struct acpi_nfit_desc *acpi_desc;
+ unsigned int tmo = scrub_timeout;
+ unsigned int overflow_retry = scrub_overflow_abort;
+
+ acpi_desc = container_of(work, typeof(*acpi_desc), work);
+ dev = acpi_desc->dev;
+
+ /*
+ * We scrub in 2 phases. The first phase waits for any platform
+ * firmware initiated scrubs to complete and then we go search for the
+ * affected spa regions to mark them scanned. In the second phase we
+ * initiate a directed scrub for every range that was not scrubbed in
+ * phase 1. If we're called for a 'rescan', we harmlessly pass through
+ * the first phase, but really only care about running phase 2, where
+ * regions can be notified of new poison.
+ */
+
+ /* process platform firmware initiated scrubs */
+ retry:
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ struct nd_cmd_ars_status *ars_status;
+ struct acpi_nfit_system_address *spa;
+ u64 ars_start, ars_len;
+ int rc;
+
+ if (acpi_desc->cancel)
+ break;
+
+ if (nfit_spa->nd_region)
+ continue;
+
+ if (init_ars_done) {
+ /*
+ * No need to re-query, we're now just
+ * reconciling all the ranges covered by the
+ * initial scrub
+ */
+ rc = 0;
+ } else
+ rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+
+ if (rc == -ENOTTY) {
+ /* no ars capability, just register spa and move on */
+ acpi_nfit_register_region(acpi_desc, nfit_spa);
+ continue;
+ }
+
+ if (rc == -EBUSY && !tmo) {
+ /* fallthrough to directed scrub in phase 2 */
+ dev_warn(dev, "timeout awaiting ars results, continuing...\n");
+ break;
+ } else if (rc == -EBUSY) {
+ mutex_unlock(&acpi_desc->init_mutex);
+ ssleep(1);
+ tmo--;
+ goto retry;
+ }
+
+ /* we got some results, but there are more pending... */
+ if (rc == -ENOSPC && overflow_retry--) {
+ ars_status = acpi_desc->ars_status;
+ /*
+ * Record the original scrub range, so that we
+ * can recall all the ranges impacted by the
+ * initial scrub.
+ */
+ if (!init_scrub_length) {
+ init_scrub_length = ars_status->length;
+ init_scrub_address = ars_status->address;
+ }
+ rc = ars_continue(acpi_desc);
+ if (rc == 0) {
+ mutex_unlock(&acpi_desc->init_mutex);
+ goto retry;
+ }
+ }
+
+ if (rc < 0) {
+ /*
+ * Initial scrub failed, we'll give it one more
+ * try below...
+ */
+ break;
+ }
+
+ /* We got some final results, record completed ranges */
+ ars_status = acpi_desc->ars_status;
+ if (init_scrub_length) {
+ ars_start = init_scrub_address;
+ ars_len = ars_start + init_scrub_length;
+ } else {
+ ars_start = ars_status->address;
+ ars_len = ars_status->length;
+ }
+ spa = nfit_spa->spa;
+
+ if (!init_ars_done) {
+ init_ars_done = true;
+ dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
+ ars_start, ars_len);
+ }
+ if (ars_start <= spa->address && ars_start + ars_len
+ >= spa->address + spa->length)
+ acpi_nfit_register_region(acpi_desc, nfit_spa);
+ }
+
+ /*
+ * For all the ranges not covered by an initial scrub we still
+ * want to see if there are errors, but it's ok to discover them
+ * asynchronously.
+ */
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ /*
+ * Flag all the ranges that still need scrubbing, but
+ * register them now to make data available.
+ */
+ if (!nfit_spa->nd_region) {
+ nfit_spa->ars_required = 1;
+ acpi_nfit_register_region(acpi_desc, nfit_spa);
+ }
+ }
+
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+ acpi_nfit_async_scrub(acpi_desc, nfit_spa);
+ acpi_desc->scrub_count++;
+ if (acpi_desc->scrub_count_state)
+ sysfs_notify_dirent(acpi_desc->scrub_count_state);
+ mutex_unlock(&acpi_desc->init_mutex);
+}
+
+static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
+{
+ struct nfit_spa *nfit_spa;
+ int rc;
+
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+ if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
+ /* BLK regions don't need to wait for ars results */
+ rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+ if (rc)
+ return rc;
+ }
+
+ queue_work(nfit_wq, &acpi_desc->work);
+ return 0;
+}
+
+static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_table_prev *prev)
+{
+ struct device *dev = acpi_desc->dev;
+
+ if (!list_empty(&prev->spas) ||
+ !list_empty(&prev->memdevs) ||
+ !list_empty(&prev->dcrs) ||
+ !list_empty(&prev->bdws) ||
+ !list_empty(&prev->idts) ||
+ !list_empty(&prev->flushes)) {
+ dev_err(dev, "new nfit deletes entries (unsupported)\n");
+ return -ENXIO;
+ }
+ return 0;
+}
+
+static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
+{
+ struct device *dev = acpi_desc->dev;
+ struct kernfs_node *nfit;
+ struct device *bus_dev;
+
+ if (!ars_supported(acpi_desc->nvdimm_bus))
+ return 0;
+
+ bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
+ nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit");
+ if (!nfit) {
+ dev_err(dev, "sysfs_get_dirent 'nfit' failed\n");
+ return -ENODEV;
+ }
+ acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub");
+ sysfs_put(nfit);
+ if (!acpi_desc->scrub_count_state) {
+ dev_err(dev, "sysfs_get_dirent 'scrub' failed\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static void acpi_nfit_destruct(void *data)
+{
+ struct acpi_nfit_desc *acpi_desc = data;
+ struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
+
+ /*
+ * Destruct under acpi_desc_lock so that nfit_handle_mce does not
+ * race teardown
+ */
+ mutex_lock(&acpi_desc_lock);
+ acpi_desc->cancel = 1;
+ /*
+ * Bounce the nvdimm bus lock to make sure any in-flight
+ * acpi_nfit_ars_rescan() submissions have had a chance to
+ * either submit or see ->cancel set.
+ */
+ device_lock(bus_dev);
+ device_unlock(bus_dev);
+
+ flush_workqueue(nfit_wq);
+ if (acpi_desc->scrub_count_state)
+ sysfs_put(acpi_desc->scrub_count_state);
+ nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+ acpi_desc->nvdimm_bus = NULL;
+ list_del(&acpi_desc->list);
+ mutex_unlock(&acpi_desc_lock);
+}
+
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nfit_table_prev prev;
+ const void *end;
+ int rc;
+
+ if (!acpi_desc->nvdimm_bus) {
+ acpi_nfit_init_dsms(acpi_desc);
+
+ acpi_desc->nvdimm_bus = nvdimm_bus_register(dev,
+ &acpi_desc->nd_desc);
+ if (!acpi_desc->nvdimm_bus)
+ return -ENOMEM;
+
+ rc = devm_add_action_or_reset(dev, acpi_nfit_destruct,
+ acpi_desc);
+ if (rc)
+ return rc;
+
+ rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
+ if (rc)
+ return rc;
+
+ /* register this acpi_desc for mce notifications */
+ mutex_lock(&acpi_desc_lock);
+ list_add_tail(&acpi_desc->list, &acpi_descs);
+ mutex_unlock(&acpi_desc_lock);
+ }
+
+ mutex_lock(&acpi_desc->init_mutex);
+
+ INIT_LIST_HEAD(&prev.spas);
+ INIT_LIST_HEAD(&prev.memdevs);
+ INIT_LIST_HEAD(&prev.dcrs);
+ INIT_LIST_HEAD(&prev.bdws);
+ INIT_LIST_HEAD(&prev.idts);
+ INIT_LIST_HEAD(&prev.flushes);
+
+ list_cut_position(&prev.spas, &acpi_desc->spas,
+ acpi_desc->spas.prev);
+ list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
+ acpi_desc->memdevs.prev);
+ list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
+ acpi_desc->dcrs.prev);
+ list_cut_position(&prev.bdws, &acpi_desc->bdws,
+ acpi_desc->bdws.prev);
+ list_cut_position(&prev.idts, &acpi_desc->idts,
+ acpi_desc->idts.prev);
+ list_cut_position(&prev.flushes, &acpi_desc->flushes,
+ acpi_desc->flushes.prev);
+
+ end = data + sz;
+ while (!IS_ERR_OR_NULL(data))
+ data = add_table(acpi_desc, &prev, data, end);
+
+ if (IS_ERR(data)) {
+ dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
+ PTR_ERR(data));
+ rc = PTR_ERR(data);
+ goto out_unlock;
+ }
+
+ rc = acpi_nfit_check_deletions(acpi_desc, &prev);
+ if (rc)
+ goto out_unlock;
+
+ rc = nfit_mem_init(acpi_desc);
+ if (rc)
+ goto out_unlock;
+
+ rc = acpi_nfit_register_dimms(acpi_desc);
+ if (rc)
+ goto out_unlock;
+
+ rc = acpi_nfit_register_regions(acpi_desc);
+
+ out_unlock:
+ mutex_unlock(&acpi_desc->init_mutex);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(acpi_nfit_init);
+
+struct acpi_nfit_flush_work {
+ struct work_struct work;
+ struct completion cmp;
+};
+
+static void flush_probe(struct work_struct *work)
+{
+ struct acpi_nfit_flush_work *flush;
+
+ flush = container_of(work, typeof(*flush), work);
+ complete(&flush->cmp);
+}
+
+static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
+{
+ struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+ struct device *dev = acpi_desc->dev;
+ struct acpi_nfit_flush_work flush;
+
+ /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+ device_lock(dev);
+ device_unlock(dev);
+
+ /*
+ * Scrub work could take 10s of seconds, userspace may give up so we
+ * need to be interruptible while waiting.
+ */
+ INIT_WORK_ONSTACK(&flush.work, flush_probe);
+ COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
+ queue_work(nfit_wq, &flush.work);
+ return wait_for_completion_interruptible(&flush.cmp);
+}
+
+static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
+ struct nvdimm *nvdimm, unsigned int cmd)
+{
+ struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+
+ if (nvdimm)
+ return 0;
+ if (cmd != ND_CMD_ARS_START)
+ return 0;
+
+ /*
+ * The kernel and userspace may race to initiate a scrub, but
+ * the scrub thread is prepared to lose that initial race. It
+ * just needs guarantees that any ars it initiates are not
+ * interrupted by any intervening start reqeusts from userspace.
+ */
+ if (work_busy(&acpi_desc->work))
+ return -EBUSY;
+
+ return 0;
+}
+
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nfit_spa *nfit_spa;
+
+ if (work_busy(&acpi_desc->work))
+ return -EBUSY;
+
+ if (acpi_desc->cancel)
+ return 0;
+
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+
+ if (nfit_spa_type(spa) != NFIT_SPA_PM)
+ continue;
+
+ nfit_spa->ars_required = 1;
+ }
+ queue_work(nfit_wq, &acpi_desc->work);
+ dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
+ mutex_unlock(&acpi_desc->init_mutex);
+
+ return 0;
+}
+
+void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+
+ dev_set_drvdata(dev, acpi_desc);
+ acpi_desc->dev = dev;
+ acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
+ nd_desc = &acpi_desc->nd_desc;
+ nd_desc->provider_name = "ACPI.NFIT";
+ nd_desc->module = THIS_MODULE;
+ nd_desc->ndctl = acpi_nfit_ctl;
+ nd_desc->flush_probe = acpi_nfit_flush_probe;
+ nd_desc->clear_to_send = acpi_nfit_clear_to_send;
+ nd_desc->attr_groups = acpi_nfit_attribute_groups;
+
+ INIT_LIST_HEAD(&acpi_desc->spas);
+ INIT_LIST_HEAD(&acpi_desc->dcrs);
+ INIT_LIST_HEAD(&acpi_desc->bdws);
+ INIT_LIST_HEAD(&acpi_desc->idts);
+ INIT_LIST_HEAD(&acpi_desc->flushes);
+ INIT_LIST_HEAD(&acpi_desc->memdevs);
+ INIT_LIST_HEAD(&acpi_desc->dimms);
+ INIT_LIST_HEAD(&acpi_desc->list);
+ mutex_init(&acpi_desc->init_mutex);
+ INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+}
+EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
+
+static int acpi_nfit_add(struct acpi_device *adev)
+{
+ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_nfit_desc *acpi_desc;
+ struct device *dev = &adev->dev;
+ struct acpi_table_header *tbl;
+ acpi_status status = AE_OK;
+ acpi_size sz;
+ int rc = 0;
+
+ status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
+ if (ACPI_FAILURE(status)) {
+ /* This is ok, we could have an nvdimm hotplugged later */
+ dev_dbg(dev, "failed to find NFIT at startup\n");
+ return 0;
+ }
+
+ acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+ if (!acpi_desc)
+ return -ENOMEM;
+ acpi_nfit_desc_init(acpi_desc, &adev->dev);
+
+ /* Save the acpi header for exporting the revision via sysfs */
+ acpi_desc->acpi_header = *tbl;
+
+ /* Evaluate _FIT and override with that if present */
+ status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
+ if (ACPI_SUCCESS(status) && buf.length > 0) {
+ union acpi_object *obj = buf.pointer;
+
+ if (obj->type == ACPI_TYPE_BUFFER)
+ rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
+ obj->buffer.length);
+ else
+ dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
+ __func__, (int) obj->type);
+ kfree(buf.pointer);
+ } else
+ /* skip over the lead-in header table */
+ rc = acpi_nfit_init(acpi_desc, (void *) tbl
+ + sizeof(struct acpi_table_nfit),
+ sz - sizeof(struct acpi_table_nfit));
+ return rc;
+}
+
+static int acpi_nfit_remove(struct acpi_device *adev)
+{
+ /* see acpi_nfit_destruct */
+ return 0;
+}
+
+static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
+{
+ struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
+ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct device *dev = &adev->dev;
+ union acpi_object *obj;
+ acpi_status status;
+ int ret;
+
+ dev_dbg(dev, "%s: event: %d\n", __func__, event);
+
+ device_lock(dev);
+ if (!dev->driver) {
+ /* dev->driver may be null if we're being removed */
+ dev_dbg(dev, "%s: no driver found for dev\n", __func__);
+ goto out_unlock;
+ }
+
+ if (!acpi_desc) {
+ acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+ if (!acpi_desc)
+ goto out_unlock;
+ acpi_nfit_desc_init(acpi_desc, &adev->dev);
+ } else {
+ /*
+ * Finish previous registration before considering new
+ * regions.
+ */
+ flush_workqueue(nfit_wq);
+ }
+
+ /* Evaluate _FIT */
+ status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
+ if (ACPI_FAILURE(status)) {
+ dev_err(dev, "failed to evaluate _FIT\n");
+ goto out_unlock;
+ }
+
+ obj = buf.pointer;
+ if (obj->type == ACPI_TYPE_BUFFER) {
+ ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
+ obj->buffer.length);
+ if (ret)
+ dev_err(dev, "failed to merge updated NFIT\n");
+ } else
+ dev_err(dev, "Invalid _FIT\n");
+ kfree(buf.pointer);
+
+ out_unlock:
+ device_unlock(dev);
+}
+
+static const struct acpi_device_id acpi_nfit_ids[] = {
+ { "ACPI0012", 0 },
+ { "", 0 },
+};
+MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
+
+static struct acpi_driver acpi_nfit_driver = {
+ .name = KBUILD_MODNAME,
+ .ids = acpi_nfit_ids,
+ .ops = {
+ .add = acpi_nfit_add,
+ .remove = acpi_nfit_remove,
+ .notify = acpi_nfit_notify,
+ },
+};
+
+static __init int nfit_init(void)
+{
+ BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
+ BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
+ BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
+ BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
+ BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
+ BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
+ BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
+
+ acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
+ acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
+ acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
+ acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
+ acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
+ acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
+ acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
+ acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
+ acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
+ acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
+ acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
+ acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
+ acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+
+ nfit_wq = create_singlethread_workqueue("nfit");
+ if (!nfit_wq)
+ return -ENOMEM;
+
+ nfit_mce_register();
+
+ return acpi_bus_register_driver(&acpi_nfit_driver);
+}
+
+static __exit void nfit_exit(void)
+{
+ nfit_mce_unregister();
+ acpi_bus_unregister_driver(&acpi_nfit_driver);
+ destroy_workqueue(nfit_wq);
+ WARN_ON(!list_empty(&acpi_descs));
+}
+
+module_init(nfit_init);
+module_exit(nfit_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
--- /dev/null
+/*
+ * NFIT - Machine Check Handler
+ *
+ * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/notifier.h>
+#include <linux/acpi.h>
+#include <asm/mce.h>
+#include "nfit.h"
+
+static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct mce *mce = (struct mce *)data;
+ struct acpi_nfit_desc *acpi_desc;
+ struct nfit_spa *nfit_spa;
+
+ /* We only care about memory errors */
+ if (!(mce->status & MCACOD))
+ return NOTIFY_DONE;
+
+ /*
+ * mce->addr contains the physical addr accessed that caused the
+ * machine check. We need to walk through the list of NFITs, and see
+ * if any of them matches that address, and only then start a scrub.
+ */
+ mutex_lock(&acpi_desc_lock);
+ list_for_each_entry(acpi_desc, &acpi_descs, list) {
+ struct device *dev = acpi_desc->dev;
+ int found_match = 0;
+
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+
+ if (nfit_spa_type(spa) == NFIT_SPA_PM)
+ continue;
+ /* find the spa that covers the mce addr */
+ if (spa->address > mce->addr)
+ continue;
+ if ((spa->address + spa->length - 1) < mce->addr)
+ continue;
+ found_match = 1;
+ dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
+ __func__, spa->range_index, spa->address,
+ spa->length);
+ /*
+ * We can break at the first match because we're going
+ * to rescan all the SPA ranges. There shouldn't be any
+ * aliasing anyway.
+ */
+ break;
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
+
+ /*
+ * We can ignore an -EBUSY here because if an ARS is already
+ * in progress, just let that be the last authoritative one
+ */
+ if (found_match)
+ acpi_nfit_ars_rescan(acpi_desc);
+ }
+
+ mutex_unlock(&acpi_desc_lock);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nfit_mce_dec = {
+ .notifier_call = nfit_handle_mce,
+};
+
+void nfit_mce_register(void)
+{
+ mce_register_decode_chain(&nfit_mce_dec);
+}
+
+void nfit_mce_unregister(void)
+{
+ mce_unregister_decode_chain(&nfit_mce_dec);
+}
--- /dev/null
+/*
+ * NVDIMM Firmware Interface Table - NFIT
+ *
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_H__
+#define __NFIT_H__
+#include <linux/workqueue.h>
+#include <linux/libnvdimm.h>
+#include <linux/ndctl.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/acpi.h>
+#include <acpi/acuuid.h>
+
+/* ACPI 6.1 */
+#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
+
+/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */
+#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
+
+/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
+#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
+#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
+
+/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
+#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
+
+#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
+ | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
+ | ACPI_NFIT_MEM_NOT_ARMED)
+
+enum nfit_uuids {
+ /* for simplicity alias the uuid index with the family id */
+ NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
+ NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
+ NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
+ NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
+ NFIT_SPA_VOLATILE,
+ NFIT_SPA_PM,
+ NFIT_SPA_DCR,
+ NFIT_SPA_BDW,
+ NFIT_SPA_VDISK,
+ NFIT_SPA_VCD,
+ NFIT_SPA_PDISK,
+ NFIT_SPA_PCD,
+ NFIT_DEV_BUS,
+ NFIT_UUID_MAX,
+};
+
+/*
+ * Region format interface codes are stored with the interface as the
+ * LSB and the function as the MSB.
+ */
+#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
+#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
+#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
+
+enum {
+ NFIT_BLK_READ_FLUSH = 1,
+ NFIT_BLK_DCR_LATCH = 2,
+ NFIT_ARS_STATUS_DONE = 0,
+ NFIT_ARS_STATUS_BUSY = 1 << 16,
+ NFIT_ARS_STATUS_NONE = 2 << 16,
+ NFIT_ARS_STATUS_INTR = 3 << 16,
+ NFIT_ARS_START_BUSY = 6,
+ NFIT_ARS_CAP_NONE = 1,
+ NFIT_ARS_F_OVERFLOW = 1,
+ NFIT_ARS_TIMEOUT = 90,
+};
+
+struct nfit_spa {
+ struct list_head list;
+ struct nd_region *nd_region;
+ unsigned int ars_required:1;
+ u32 clear_err_unit;
+ u32 max_ars;
+ struct acpi_nfit_system_address spa[0];
+};
+
+struct nfit_dcr {
+ struct list_head list;
+ struct acpi_nfit_control_region dcr[0];
+};
+
+struct nfit_bdw {
+ struct list_head list;
+ struct acpi_nfit_data_region bdw[0];
+};
+
+struct nfit_idt {
+ struct list_head list;
+ struct acpi_nfit_interleave idt[0];
+};
+
+struct nfit_flush {
+ struct list_head list;
+ struct acpi_nfit_flush_address flush[0];
+};
+
+struct nfit_memdev {
+ struct list_head list;
+ struct acpi_nfit_memory_map memdev[0];
+};
+
+/* assembled tables for a given dimm/memory-device */
+struct nfit_mem {
+ struct nvdimm *nvdimm;
+ struct acpi_nfit_memory_map *memdev_dcr;
+ struct acpi_nfit_memory_map *memdev_pmem;
+ struct acpi_nfit_memory_map *memdev_bdw;
+ struct acpi_nfit_control_region *dcr;
+ struct acpi_nfit_data_region *bdw;
+ struct acpi_nfit_system_address *spa_dcr;
+ struct acpi_nfit_system_address *spa_bdw;
+ struct acpi_nfit_interleave *idt_dcr;
+ struct acpi_nfit_interleave *idt_bdw;
+ struct nfit_flush *nfit_flush;
+ struct list_head list;
+ struct acpi_device *adev;
+ struct acpi_nfit_desc *acpi_desc;
+ struct resource *flush_wpq;
+ unsigned long dsm_mask;
+ int family;
+};
+
+struct acpi_nfit_desc {
+ struct nvdimm_bus_descriptor nd_desc;
+ struct acpi_table_header acpi_header;
+ struct mutex init_mutex;
+ struct list_head memdevs;
+ struct list_head flushes;
+ struct list_head dimms;
+ struct list_head spas;
+ struct list_head dcrs;
+ struct list_head bdws;
+ struct list_head idts;
+ struct nvdimm_bus *nvdimm_bus;
+ struct device *dev;
+ struct nd_cmd_ars_status *ars_status;
+ size_t ars_status_size;
+ struct work_struct work;
+ struct list_head list;
+ struct kernfs_node *scrub_count_state;
+ unsigned int scrub_count;
+ unsigned int cancel:1;
+ unsigned long dimm_cmd_force_en;
+ unsigned long bus_cmd_force_en;
+ int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+ void *iobuf, u64 len, int rw);
+};
+
+enum nd_blk_mmio_selector {
+ BDW,
+ DCR,
+};
+
+struct nd_blk_addr {
+ union {
+ void __iomem *base;
+ void *aperture;
+ };
+};
+
+struct nfit_blk {
+ struct nfit_blk_mmio {
+ struct nd_blk_addr addr;
+ u64 size;
+ u64 base_offset;
+ u32 line_size;
+ u32 num_lines;
+ u32 table_size;
+ struct acpi_nfit_interleave *idt;
+ struct acpi_nfit_system_address *spa;
+ } mmio[2];
+ struct nd_region *nd_region;
+ u64 bdw_offset; /* post interleave offset */
+ u64 stat_offset;
+ u64 cmd_offset;
+ u32 dimm_flags;
+};
+
+extern struct list_head acpi_descs;
+extern struct mutex acpi_desc_lock;
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
+
+#ifdef CONFIG_X86_MCE
+void nfit_mce_register(void);
+void nfit_mce_unregister(void);
+#else
+static inline void nfit_mce_register(void)
+{
+}
+static inline void nfit_mce_unregister(void)
+{
+}
+#endif
+
+int nfit_spa_type(struct acpi_nfit_system_address *spa);
+
+static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
+ struct nfit_mem *nfit_mem)
+{
+ if (nfit_mem->memdev_dcr)
+ return nfit_mem->memdev_dcr;
+ return nfit_mem->memdev_pmem;
+}
+
+static inline struct acpi_nfit_desc *to_acpi_desc(
+ struct nvdimm_bus_descriptor *nd_desc)
+{
+ return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+const u8 *to_nfit_uuid(enum nfit_uuids id);
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
+void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
+#endif /* __NFIT_H__ */
#ifdef CONFIG_BLK_DEV_RAM_DAX
static long brd_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page;
page = brd_insert_page(brd, sector);
if (!page)
return -ENOSPC;
- *kaddr = (void __pmem *)page_address(page);
+ *kaddr = page_address(page);
*pfn = page_to_pfn_t(page);
return PAGE_SIZE;
}
dax_dev->dev = dev;
- rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
- if (rc) {
- unregister_dax_dev(dev);
+ rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
+ if (rc)
return rc;
- }
return 0;
if (rc)
return rc;
- rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
- if (rc) {
- dax_pmem_percpu_exit(&dax_pmem->ref);
+ rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit,
+ &dax_pmem->ref);
+ if (rc)
return rc;
- }
addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
- rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
- if (rc) {
- dax_pmem_percpu_kill(&dax_pmem->ref);
+ rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
+ &dax_pmem->ref);
+ if (rc)
return rc;
- }
nd_region = to_nd_region(dev->parent);
dax_region = alloc_dax_region(dev, nd_region->id, &res,
}
static long linear_direct_access(struct dm_target *ti, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
}
static long origin_direct_access(struct dm_target *ti, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
DMWARN("device does not support dax.");
return -EIO;
}
static long stripe_direct_access(struct dm_target *ti, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
struct stripe_c *sc = ti->private;
uint32_t stripe;
}
static long io_err_direct_access(struct dm_target *ti, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
return -EIO;
}
EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
struct mapped_device *md = bdev->bd_disk->private_data;
struct dm_table *map;
menuconfig LIBNVDIMM
tristate "NVDIMM (Non-Volatile Memory Device) Support"
depends on PHYS_ADDR_T_64BIT
+ depends on HAS_IOMEM
depends on BLK_DEV
help
Generic support for non-volatile memory devices including
config BLK_DEV_PMEM
tristate "PMEM: Persistent memory block device support"
default LIBNVDIMM
- depends on HAS_IOMEM
select ND_BTT if BTT
select ND_PFN if NVDIMM_PFN
help
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return -ENOMEM;
- if (devm_add_action(dev, nd_blk_release_queue, q)) {
- blk_cleanup_queue(q);
+ if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
return -ENOMEM;
- }
blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
disk = alloc_disk(0);
if (!disk)
return -ENOMEM;
- if (devm_add_action(dev, nd_blk_release_disk, disk)) {
- put_disk(disk);
- return -ENOMEM;
- }
disk->first_minor = 0;
disk->fops = &nd_blk_fops;
set_capacity(disk, 0);
device_add_disk(dev, disk);
+ if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
+ return -ENOMEM;
+
if (nsblk_meta_size(nsblk)) {
int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
{
struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
- if (dev)
- __nd_device_register(dev);
+ __nd_device_register(dev);
return dev;
}
int nvdimm_major;
static int nvdimm_bus_major;
static struct class *nd_class;
+static DEFINE_IDA(nd_ida);
static int to_nd_device_type(struct device *dev)
{
to_nd_device_type(dev));
}
-static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
-{
- struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
-
- return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
-}
-
static struct module *to_bus_provider(struct device *dev)
{
/* pin bus providers while regions are enabled */
if (is_nd_pmem(dev) || is_nd_blk(dev)) {
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
- return nvdimm_bus->module;
+ return nvdimm_bus->nd_desc->module;
}
return NULL;
}
return rc;
}
+static void nvdimm_bus_shutdown(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct nd_device_driver *nd_drv = NULL;
+
+ if (dev->driver)
+ nd_drv = to_nd_device_driver(dev->driver);
+
+ if (nd_drv && nd_drv->shutdown) {
+ nd_drv->shutdown(dev);
+ dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n",
+ dev->driver->name, dev_name(dev));
+ }
+}
+
void nd_device_notify(struct device *dev, enum nvdimm_event event)
{
device_lock(dev);
}
EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);
+
static struct bus_type nvdimm_bus_type = {
.name = "nd",
.uevent = nvdimm_bus_uevent,
.match = nvdimm_bus_match,
.probe = nvdimm_bus_probe,
.remove = nvdimm_bus_remove,
+ .shutdown = nvdimm_bus_shutdown,
+};
+
+static void nvdimm_bus_release(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus;
+
+ nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+ ida_simple_remove(&nd_ida, nvdimm_bus->id);
+ kfree(nvdimm_bus);
+}
+
+static bool is_nvdimm_bus(struct device *dev)
+{
+ return dev->release == nvdimm_bus_release;
+}
+
+struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+{
+ struct device *dev;
+
+ for (dev = nd_dev; dev; dev = dev->parent)
+ if (is_nvdimm_bus(dev))
+ break;
+ dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
+ if (dev)
+ return to_nvdimm_bus(dev);
+ return NULL;
+}
+
+struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus;
+
+ nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+ WARN_ON(!is_nvdimm_bus(dev));
+ return nvdimm_bus;
+}
+EXPORT_SYMBOL_GPL(to_nvdimm_bus);
+
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+ struct nvdimm_bus_descriptor *nd_desc)
+{
+ struct nvdimm_bus *nvdimm_bus;
+ int rc;
+
+ nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
+ if (!nvdimm_bus)
+ return NULL;
+ INIT_LIST_HEAD(&nvdimm_bus->list);
+ INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
+ INIT_LIST_HEAD(&nvdimm_bus->poison_list);
+ init_waitqueue_head(&nvdimm_bus->probe_wait);
+ nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
+ mutex_init(&nvdimm_bus->reconfig_mutex);
+ if (nvdimm_bus->id < 0) {
+ kfree(nvdimm_bus);
+ return NULL;
+ }
+ nvdimm_bus->nd_desc = nd_desc;
+ nvdimm_bus->dev.parent = parent;
+ nvdimm_bus->dev.release = nvdimm_bus_release;
+ nvdimm_bus->dev.groups = nd_desc->attr_groups;
+ nvdimm_bus->dev.bus = &nvdimm_bus_type;
+ dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
+ rc = device_register(&nvdimm_bus->dev);
+ if (rc) {
+ dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
+ goto err;
+ }
+
+ return nvdimm_bus;
+ err:
+ put_device(&nvdimm_bus->dev);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_register);
+
+void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
+{
+ if (!nvdimm_bus)
+ return;
+ device_unregister(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
+
+static int child_unregister(struct device *dev, void *data)
+{
+ /*
+ * the singular ndctl class device per bus needs to be
+ * "device_destroy"ed, so skip it here
+ *
+ * i.e. remove classless children
+ */
+ if (dev->class)
+ /* pass */;
+ else
+ nd_device_unregister(dev, ND_SYNC);
+ return 0;
+}
+
+static void free_poison_list(struct list_head *poison_list)
+{
+ struct nd_poison *pl, *next;
+
+ list_for_each_entry_safe(pl, next, poison_list, list) {
+ list_del(&pl->list);
+ kfree(pl);
+ }
+ list_del_init(poison_list);
+}
+
+static int nd_bus_remove(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+ mutex_lock(&nvdimm_bus_list_mutex);
+ list_del_init(&nvdimm_bus->list);
+ mutex_unlock(&nvdimm_bus_list_mutex);
+
+ nd_synchronize();
+ device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
+
+ nvdimm_bus_lock(&nvdimm_bus->dev);
+ free_poison_list(&nvdimm_bus->poison_list);
+ nvdimm_bus_unlock(&nvdimm_bus->dev);
+
+ nvdimm_bus_destroy_ndctl(nvdimm_bus);
+
+ return 0;
+}
+
+static int nd_bus_probe(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ int rc;
+
+ rc = nvdimm_bus_create_ndctl(nvdimm_bus);
+ if (rc)
+ return rc;
+
+ mutex_lock(&nvdimm_bus_list_mutex);
+ list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
+ mutex_unlock(&nvdimm_bus_list_mutex);
+
+ /* enable bus provider attributes to look up their local context */
+ dev_set_drvdata(dev, nvdimm_bus->nd_desc);
+
+ return 0;
+}
+
+static struct nd_device_driver nd_bus_driver = {
+ .probe = nd_bus_probe,
+ .remove = nd_bus_remove,
+ .drv = {
+ .name = "nd_bus",
+ .suppress_bind_attrs = true,
+ .bus = &nvdimm_bus_type,
+ .owner = THIS_MODULE,
+ .mod_name = KBUILD_MODNAME,
+ },
};
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
+{
+ struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
+
+ if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver)
+ return true;
+
+ return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
+}
+
static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
void nd_synchronize(void)
dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
"ndctl%d", nvdimm_bus->id);
- if (IS_ERR(dev)) {
+ if (IS_ERR(dev))
dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
nvdimm_bus->id, PTR_ERR(dev));
- return PTR_ERR(dev);
- }
- return 0;
+ return PTR_ERR_OR_ZERO(dev);
}
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
goto err_class;
}
+ rc = driver_register(&nd_bus_driver.drv);
+ if (rc)
+ goto err_nd_bus;
+
return 0;
+ err_nd_bus:
+ class_destroy(nd_class);
err_class:
unregister_chrdev(nvdimm_major, "dimmctl");
err_dimm_chrdev:
void nvdimm_bus_exit(void)
{
+ driver_unregister(&nd_bus_driver.drv);
class_destroy(nd_class);
unregister_chrdev(nvdimm_bus_major, "ndctl");
unregister_chrdev(nvdimm_major, "dimmctl");
bus_unregister(&nvdimm_bus_type);
+ ida_destroy(&nd_ida);
}
return memcpy_from_pmem(buf, nsio->addr + offset, size);
} else {
memcpy_to_pmem(nsio->addr + offset, buf, size);
- wmb_pmem();
+ nvdimm_flush(to_nd_region(ndns->dev.parent));
}
return 0;
nsio->addr = devm_memremap(dev, res->start, resource_size(res),
ARCH_MEMREMAP_PMEM);
- if (IS_ERR(nsio->addr))
- return PTR_ERR(nsio->addr);
- return 0;
+
+ return PTR_ERR_OR_ZERO(nsio->addr);
}
EXPORT_SYMBOL_GPL(devm_nsio_enable);
#include <linux/ndctl.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/io.h>
#include "nd-core.h"
#include "nd.h"
LIST_HEAD(nvdimm_bus_list);
DEFINE_MUTEX(nvdimm_bus_list_mutex);
-static DEFINE_IDA(nd_ida);
void nvdimm_bus_lock(struct device *dev)
{
}
EXPORT_SYMBOL(is_nvdimm_bus_locked);
+struct nvdimm_map {
+ struct nvdimm_bus *nvdimm_bus;
+ struct list_head list;
+ resource_size_t offset;
+ unsigned long flags;
+ size_t size;
+ union {
+ void *mem;
+ void __iomem *iomem;
+ };
+ struct kref kref;
+};
+
+static struct nvdimm_map *find_nvdimm_map(struct device *dev,
+ resource_size_t offset)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct nvdimm_map *nvdimm_map;
+
+ list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list)
+ if (nvdimm_map->offset == offset)
+ return nvdimm_map;
+ return NULL;
+}
+
+static struct nvdimm_map *alloc_nvdimm_map(struct device *dev,
+ resource_size_t offset, size_t size, unsigned long flags)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct nvdimm_map *nvdimm_map;
+
+ nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL);
+ if (!nvdimm_map)
+ return NULL;
+
+ INIT_LIST_HEAD(&nvdimm_map->list);
+ nvdimm_map->nvdimm_bus = nvdimm_bus;
+ nvdimm_map->offset = offset;
+ nvdimm_map->flags = flags;
+ nvdimm_map->size = size;
+ kref_init(&nvdimm_map->kref);
+
+ if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev)))
+ goto err_request_region;
+
+ if (flags)
+ nvdimm_map->mem = memremap(offset, size, flags);
+ else
+ nvdimm_map->iomem = ioremap(offset, size);
+
+ if (!nvdimm_map->mem)
+ goto err_map;
+
+ dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!",
+ __func__);
+ list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list);
+
+ return nvdimm_map;
+
+ err_map:
+ release_mem_region(offset, size);
+ err_request_region:
+ kfree(nvdimm_map);
+ return NULL;
+}
+
+static void nvdimm_map_release(struct kref *kref)
+{
+ struct nvdimm_bus *nvdimm_bus;
+ struct nvdimm_map *nvdimm_map;
+
+ nvdimm_map = container_of(kref, struct nvdimm_map, kref);
+ nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+ dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
+ list_del(&nvdimm_map->list);
+ if (nvdimm_map->flags)
+ memunmap(nvdimm_map->mem);
+ else
+ iounmap(nvdimm_map->iomem);
+ release_mem_region(nvdimm_map->offset, nvdimm_map->size);
+ kfree(nvdimm_map);
+}
+
+static void nvdimm_map_put(void *data)
+{
+ struct nvdimm_map *nvdimm_map = data;
+ struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+ nvdimm_bus_lock(&nvdimm_bus->dev);
+ kref_put(&nvdimm_map->kref, nvdimm_map_release);
+ nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+
+/**
+ * devm_nvdimm_memremap - map a resource that is shared across regions
+ * @dev: device that will own a reference to the shared mapping
+ * @offset: physical base address of the mapping
+ * @size: mapping size
+ * @flags: memremap flags, or, if zero, perform an ioremap instead
+ */
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags)
+{
+ struct nvdimm_map *nvdimm_map;
+
+ nvdimm_bus_lock(dev);
+ nvdimm_map = find_nvdimm_map(dev, offset);
+ if (!nvdimm_map)
+ nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags);
+ else
+ kref_get(&nvdimm_map->kref);
+ nvdimm_bus_unlock(dev);
+
+ if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map))
+ return NULL;
+
+ return nvdimm_map->mem;
+}
+EXPORT_SYMBOL_GPL(devm_nvdimm_memremap);
+
u64 nd_fletcher64(void *addr, size_t len, bool le)
{
u32 *buf = addr;
}
EXPORT_SYMBOL_GPL(nd_fletcher64);
-static void nvdimm_bus_release(struct device *dev)
-{
- struct nvdimm_bus *nvdimm_bus;
-
- nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
- ida_simple_remove(&nd_ida, nvdimm_bus->id);
- kfree(nvdimm_bus);
-}
-
-struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
-{
- struct nvdimm_bus *nvdimm_bus;
-
- nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
- WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
- return nvdimm_bus;
-}
-EXPORT_SYMBOL_GPL(to_nvdimm_bus);
-
struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
{
/* struct nvdimm_bus definition is private to libnvdimm */
}
EXPORT_SYMBOL_GPL(to_nd_desc);
-struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
{
- struct device *dev;
-
- for (dev = nd_dev; dev; dev = dev->parent)
- if (dev->release == nvdimm_bus_release)
- break;
- dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
- if (dev)
- return to_nvdimm_bus(dev);
- return NULL;
+ /* struct nvdimm_bus definition is private to libnvdimm */
+ return &nvdimm_bus->dev;
}
+EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);
static bool is_uuid_sep(char sep)
{
};
EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
- struct nvdimm_bus_descriptor *nd_desc, struct module *module)
-{
- struct nvdimm_bus *nvdimm_bus;
- int rc;
-
- nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
- if (!nvdimm_bus)
- return NULL;
- INIT_LIST_HEAD(&nvdimm_bus->list);
- INIT_LIST_HEAD(&nvdimm_bus->poison_list);
- init_waitqueue_head(&nvdimm_bus->probe_wait);
- nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
- mutex_init(&nvdimm_bus->reconfig_mutex);
- if (nvdimm_bus->id < 0) {
- kfree(nvdimm_bus);
- return NULL;
- }
- nvdimm_bus->nd_desc = nd_desc;
- nvdimm_bus->module = module;
- nvdimm_bus->dev.parent = parent;
- nvdimm_bus->dev.release = nvdimm_bus_release;
- nvdimm_bus->dev.groups = nd_desc->attr_groups;
- dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
- rc = device_register(&nvdimm_bus->dev);
- if (rc) {
- dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
- goto err;
- }
-
- rc = nvdimm_bus_create_ndctl(nvdimm_bus);
- if (rc)
- goto err;
-
- mutex_lock(&nvdimm_bus_list_mutex);
- list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
- mutex_unlock(&nvdimm_bus_list_mutex);
-
- return nvdimm_bus;
- err:
- put_device(&nvdimm_bus->dev);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
-
static void set_badblock(struct badblocks *bb, sector_t s, int num)
{
dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
-static void free_poison_list(struct list_head *poison_list)
-{
- struct nd_poison *pl, *next;
-
- list_for_each_entry_safe(pl, next, poison_list, list) {
- list_del(&pl->list);
- kfree(pl);
- }
- list_del_init(poison_list);
-}
-
-static int child_unregister(struct device *dev, void *data)
-{
- /*
- * the singular ndctl class device per bus needs to be
- * "device_destroy"ed, so skip it here
- *
- * i.e. remove classless children
- */
- if (dev->class)
- /* pass */;
- else
- nd_device_unregister(dev, ND_SYNC);
- return 0;
-}
-
-void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
-{
- if (!nvdimm_bus)
- return;
-
- mutex_lock(&nvdimm_bus_list_mutex);
- list_del_init(&nvdimm_bus->list);
- mutex_unlock(&nvdimm_bus_list_mutex);
-
- nd_synchronize();
- device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
-
- nvdimm_bus_lock(&nvdimm_bus->dev);
- free_poison_list(&nvdimm_bus->poison_list);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
-
- nvdimm_bus_destroy_ndctl(nvdimm_bus);
-
- device_unregister(&nvdimm_bus->dev);
-}
-EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
-
#ifdef CONFIG_BLK_DEV_INTEGRITY
int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
{
if (meta_size == 0)
return 0;
- bi.profile = NULL;
+ memset(&bi, 0, sizeof(bi));
+
bi.tuple_size = meta_size;
bi.tag_size = meta_size;
nvdimm_bus_exit();
nd_region_devs_exit();
nvdimm_devs_exit();
- ida_destroy(&nd_ida);
}
MODULE_LICENSE("GPL v2");
struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
const struct attribute_group **groups, unsigned long flags,
- unsigned long cmd_mask)
+ unsigned long cmd_mask, int num_flush,
+ struct resource *flush_wpq)
{
struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
struct device *dev;
nvdimm->provider_data = provider_data;
nvdimm->flags = flags;
nvdimm->cmd_mask = cmd_mask;
+ nvdimm->num_flush = num_flush;
+ nvdimm->flush_wpq = flush_wpq;
atomic_set(&nvdimm->busy, 0);
dev = &nvdimm->dev;
dev_set_name(dev, "nmem%d", nvdimm->id);
nd_desc.attr_groups = e820_pmem_attribute_groups;
nd_desc.provider_name = "e820";
+ nd_desc.module = THIS_MODULE;
nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
if (!nvdimm_bus)
goto err;
struct nvdimm_bus {
struct nvdimm_bus_descriptor *nd_desc;
wait_queue_head_t probe_wait;
- struct module *module;
struct list_head list;
struct device dev;
int id, probe_active;
struct list_head poison_list;
+ struct list_head mapping_list;
struct mutex reconfig_mutex;
};
unsigned long cmd_mask;
struct device dev;
atomic_t busy;
- int id;
+ int id, num_flush;
+ struct resource *flush_wpq;
};
bool is_nvdimm(struct device *dev);
struct kref kref;
};
-struct nd_region_namespaces {
- int count;
- int active;
+struct nd_region_data {
+ int ns_count;
+ int ns_active;
+ unsigned int flush_mask;
+ void __iomem *flush_wpq[0][0];
};
static inline struct nd_namespace_index *to_namespace_index(
struct nd_blk_region {
int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
- void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
void *iobuf, u64 len, int rw);
void *blk_provider_data;
}
#endif
int nd_blk_region_init(struct nd_region *nd_region);
+int nd_region_activate(struct nd_region *nd_region);
void __nd_iostat_start(struct bio *bio, unsigned long *start);
static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
{
#include <linux/slab.h>
#include <linux/pmem.h>
#include <linux/nd.h>
+#include "pmem.h"
#include "pfn.h"
#include "nd.h"
-struct pmem_device {
- /* One contiguous memory region per device */
- phys_addr_t phys_addr;
- /* when non-zero this device is hosting a 'pfn' instance */
- phys_addr_t data_offset;
- u64 pfn_flags;
- void __pmem *virt_addr;
- /* immutable base size of the namespace */
- size_t size;
- /* trim size when namespace capacity has been section aligned */
- u32 pfn_pad;
- struct badblocks bb;
-};
+static struct device *to_dev(struct pmem_device *pmem)
+{
+ /*
+ * nvdimm bus services need a 'dev' parameter, and we record the device
+ * at init in bb.dev.
+ */
+ return pmem->bb.dev;
+}
+
+static struct nd_region *to_region(struct pmem_device *pmem)
+{
+ return to_nd_region(to_dev(pmem)->parent);
+}
static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
unsigned int len)
{
- struct device *dev = pmem->bb.dev;
+ struct device *dev = to_dev(pmem);
sector_t sector;
long cleared;
cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
if (cleared > 0 && cleared / 512) {
- dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
+ dev_dbg(dev, "%s: %#llx clear %ld sector%s\n",
__func__, (unsigned long long) sector,
cleared / 512, cleared / 512 > 1 ? "s" : "");
badblocks_clear(&pmem->bb, sector, cleared / 512);
bool bad_pmem = false;
void *mem = kmap_atomic(page);
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
- void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
+ void *pmem_addr = pmem->virt_addr + pmem_off;
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
bad_pmem = true;
return rc;
}
+/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
+#ifndef REQ_FLUSH
+#define REQ_FLUSH REQ_PREFLUSH
+#endif
+
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
int rc = 0;
struct bio_vec bvec;
struct bvec_iter iter;
struct pmem_device *pmem = q->queuedata;
+ struct nd_region *nd_region = to_region(pmem);
+
+ if (bio->bi_rw & REQ_FLUSH)
+ nvdimm_flush(nd_region);
do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) {
if (do_acct)
nd_iostat_end(bio, start);
- if (bio_data_dir(bio))
- wmb_pmem();
+ if (bio->bi_rw & REQ_FUA)
+ nvdimm_flush(nd_region);
bio_endio(bio);
return BLK_QC_T_NONE;
int rc;
rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
- if (rw & WRITE)
- wmb_pmem();
/*
* The ->rw_page interface is subtle and tricky. The core
return rc;
}
-static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
+__weak long pmem_direct_access(struct block_device *bdev, sector_t sector,
+ void **kaddr, pfn_t *pfn, long size)
{
struct pmem_device *pmem = bdev->bd_queue->queuedata;
resource_size_t offset = sector * 512 + pmem->data_offset;
blk_cleanup_queue(q);
}
-void pmem_release_disk(void *disk)
+static void pmem_release_disk(void *disk)
{
del_gendisk(disk);
put_disk(disk);
struct nd_namespace_common *ndns)
{
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+ struct nd_region *nd_region = to_nd_region(dev->parent);
struct vmem_altmap __altmap, *altmap = NULL;
struct resource *res = &nsio->res;
struct nd_pfn *nd_pfn = NULL;
dev_set_drvdata(dev, pmem);
pmem->phys_addr = res->start;
pmem->size = resource_size(res);
- if (!arch_has_wmb_pmem())
+ if (nvdimm_has_flush(nd_region) < 0)
dev_warn(dev, "unable to guarantee persistence of writes\n");
if (!devm_request_mem_region(dev, res->start, resource_size(res),
* At release time the queue must be dead before
* devm_memremap_pages is unwound
*/
- if (devm_add_action(dev, pmem_release_queue, q)) {
- blk_cleanup_queue(q);
+ if (devm_add_action_or_reset(dev, pmem_release_queue, q))
return -ENOMEM;
- }
if (IS_ERR(addr))
return PTR_ERR(addr);
- pmem->virt_addr = (void __pmem *) addr;
+ pmem->virt_addr = addr;
+ blk_queue_write_cache(q, true, true);
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_max_hw_sectors(q, UINT_MAX);
disk = alloc_disk_node(0, nid);
if (!disk)
return -ENOMEM;
- if (devm_add_action(dev, pmem_release_disk, disk)) {
- put_disk(disk);
- return -ENOMEM;
- }
disk->fops = &pmem_fops;
disk->queue = q;
/ 512);
if (devm_init_badblocks(dev, &pmem->bb))
return -ENOMEM;
- nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
+ nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
disk->bb = &pmem->bb;
device_add_disk(dev, disk);
+
+ if (devm_add_action_or_reset(dev, pmem_release_disk, disk))
+ return -ENOMEM;
+
revalidate_disk(disk);
return 0;
{
if (is_nd_btt(dev))
nvdimm_namespace_detach_btt(to_nd_btt(dev));
+ nvdimm_flush(to_nd_region(dev->parent));
+
return 0;
}
+static void nd_pmem_shutdown(struct device *dev)
+{
+ nvdimm_flush(to_nd_region(dev->parent));
+}
+
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
{
- struct nd_region *nd_region = to_nd_region(dev->parent);
struct pmem_device *pmem = dev_get_drvdata(dev);
+ struct nd_region *nd_region = to_region(pmem);
resource_size_t offset = 0, end_trunc = 0;
struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio;
.probe = nd_pmem_probe,
.remove = nd_pmem_remove,
.notify = nd_pmem_notify,
+ .shutdown = nd_pmem_shutdown,
.drv = {
.name = "nd_pmem",
},
--- /dev/null
+#ifndef __NVDIMM_PMEM_H__
+#define __NVDIMM_PMEM_H__
+#include <linux/badblocks.h>
+#include <linux/types.h>
+#include <linux/pfn_t.h>
+#include <linux/fs.h>
+
+long pmem_direct_access(struct block_device *bdev, sector_t sector,
+ void **kaddr, pfn_t *pfn, long size);
+/* this definition is in it's own header for tools/testing/nvdimm to consume */
+struct pmem_device {
+ /* One contiguous memory region per device */
+ phys_addr_t phys_addr;
+ /* when non-zero this device is hosting a 'pfn' instance */
+ phys_addr_t data_offset;
+ u64 pfn_flags;
+ void *virt_addr;
+ /* immutable base size of the namespace */
+ size_t size;
+ /* trim size when namespace capacity has been section aligned */
+ u32 pfn_pad;
+ struct badblocks bb;
+};
+#endif /* __NVDIMM_PMEM_H__ */
{
int err, rc;
static unsigned long once;
- struct nd_region_namespaces *num_ns;
+ struct nd_region_data *ndrd;
struct nd_region *nd_region = to_nd_region(dev);
if (nd_region->num_lanes > num_online_cpus()
nd_region->num_lanes);
}
+ rc = nd_region_activate(nd_region);
+ if (rc)
+ return rc;
+
rc = nd_blk_region_init(nd_region);
if (rc)
return rc;
rc = nd_region_register_namespaces(nd_region, &err);
- num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
- if (!num_ns)
- return -ENOMEM;
-
if (rc < 0)
return rc;
- num_ns->active = rc;
- num_ns->count = rc + err;
- dev_set_drvdata(dev, num_ns);
+ ndrd = dev_get_drvdata(dev);
+ ndrd->ns_active = rc;
+ ndrd->ns_count = rc + err;
if (rc && err && rc == err)
return -ENODEV;
{
struct nd_region *nd_region = to_nd_region(dev);
+ device_for_each_child(dev, NULL, child_unregister);
+
/* flush attribute readers and disable */
nvdimm_bus_lock(dev);
nd_region->ns_seed = NULL;
dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev);
- device_for_each_child(dev, NULL, child_unregister);
return 0;
}
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/pmem.h>
#include <linux/sort.h>
#include <linux/io.h>
#include <linux/nd.h>
#include "nd-core.h"
#include "nd.h"
+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <linux/io-64-nonatomic-hi-lo.h>
+
static DEFINE_IDA(region_ida);
+static DEFINE_PER_CPU(int, flush_idx);
+
+static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
+ struct nd_region_data *ndrd)
+{
+ int i, j;
+
+ dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
+ nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
+ for (i = 0; i < nvdimm->num_flush; i++) {
+ struct resource *res = &nvdimm->flush_wpq[i];
+ unsigned long pfn = PHYS_PFN(res->start);
+ void __iomem *flush_page;
+
+ /* check if flush hints share a page */
+ for (j = 0; j < i; j++) {
+ struct resource *res_j = &nvdimm->flush_wpq[j];
+ unsigned long pfn_j = PHYS_PFN(res_j->start);
+
+ if (pfn == pfn_j)
+ break;
+ }
+
+ if (j < i)
+ flush_page = (void __iomem *) ((unsigned long)
+ ndrd->flush_wpq[dimm][j] & PAGE_MASK);
+ else
+ flush_page = devm_nvdimm_ioremap(dev,
+ PHYS_PFN(pfn), PAGE_SIZE);
+ if (!flush_page)
+ return -ENXIO;
+ ndrd->flush_wpq[dimm][i] = flush_page
+ + (res->start & ~PAGE_MASK);
+ }
+
+ return 0;
+}
+
+int nd_region_activate(struct nd_region *nd_region)
+{
+ int i, num_flush = 0;
+ struct nd_region_data *ndrd;
+ struct device *dev = &nd_region->dev;
+ size_t flush_data_size = sizeof(void *);
+
+ nvdimm_bus_lock(&nd_region->dev);
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+ /* at least one null hint slot per-dimm for the "no-hint" case */
+ flush_data_size += sizeof(void *);
+ num_flush = min_not_zero(num_flush, nvdimm->num_flush);
+ if (!nvdimm->num_flush)
+ continue;
+ flush_data_size += nvdimm->num_flush * sizeof(void *);
+ }
+ nvdimm_bus_unlock(&nd_region->dev);
+
+ ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
+ if (!ndrd)
+ return -ENOMEM;
+ dev_set_drvdata(dev, ndrd);
+
+ ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+ int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
+
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
static void nd_region_release(struct device *dev)
{
static ssize_t init_namespaces_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
+ struct nd_region_data *ndrd = dev_get_drvdata(dev);
ssize_t rc;
nvdimm_bus_lock(dev);
- if (num_ns)
- rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
+ if (ndrd)
+ rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
else
rc = -ENXIO;
nvdimm_bus_unlock(dev);
if (is_nd_pmem(dev))
return;
-
- to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
}
if (dev->parent && is_nd_blk(dev->parent) && probe) {
nd_region = to_nd_region(dev->parent);
if (ndbr) {
nd_region = &ndbr->nd_region;
ndbr->enable = ndbr_desc->enable;
- ndbr->disable = ndbr_desc->disable;
ndbr->do_io = ndbr_desc->do_io;
}
region_buf = ndbr;
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
+/**
+ * nvdimm_flush - flush any posted write queues between the cpu and pmem media
+ * @nd_region: blk or interleaved pmem region
+ */
+void nvdimm_flush(struct nd_region *nd_region)
+{
+ struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+ int i, idx;
+
+ /*
+ * Try to encourage some diversity in flush hint addresses
+ * across cpus assuming a limited number of flush hints.
+ */
+ idx = this_cpu_read(flush_idx);
+ idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
+
+ /*
+ * The first wmb() is needed to 'sfence' all previous writes
+ * such that they are architecturally visible for the platform
+ * buffer flush. Note that we've already arranged for pmem
+ * writes to avoid the cache via arch_memcpy_to_pmem(). The
+ * final wmb() ensures ordering for the NVDIMM flush write.
+ */
+ wmb();
+ for (i = 0; i < nd_region->ndr_mappings; i++)
+ if (ndrd->flush_wpq[i][0])
+ writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
+ wmb();
+}
+EXPORT_SYMBOL_GPL(nvdimm_flush);
+
+/**
+ * nvdimm_has_flush - determine write flushing requirements
+ * @nd_region: blk or interleaved pmem region
+ *
+ * Returns 1 if writes require flushing
+ * Returns 0 if writes do not require flushing
+ * Returns -ENXIO if flushing capability can not be determined
+ */
+int nvdimm_has_flush(struct nd_region *nd_region)
+{
+ struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+ int i;
+
+ /* no nvdimm == flushing capability unknown */
+ if (nd_region->ndr_mappings == 0)
+ return -ENXIO;
+
+ for (i = 0; i < nd_region->ndr_mappings; i++)
+ /* flush hints present, flushing required */
+ if (ndrd->flush_wpq[i][0])
+ return 1;
+
+ /*
+ * The platform defines dimm devices without hints, assume
+ * platform persistence mechanism like ADR
+ */
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_has_flush);
+
void __exit nd_region_devs_exit(void)
{
ida_destroy(®ion_ida);
static blk_qc_t dcssblk_make_request(struct request_queue *q,
struct bio *bio);
static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
- void __pmem **kaddr, pfn_t *pfn, long size);
+ void **kaddr, pfn_t *pfn, long size);
static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
static long
dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
struct dcssblk_dev_info *dev_info;
unsigned long offset, dev_sz;
return -ENODEV;
dev_sz = dev_info->end - dev_info->start;
offset = secnum * 512;
- *kaddr = (void __pmem *) (dev_info->start + offset);
+ *kaddr = (void *) dev_info->start + offset;
*pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
return dev_sz - offset;
struct request_queue *q = bdev->bd_queue;
long rc = -EIO;
- dax->addr = (void __pmem *) ERR_PTR(-EIO);
+ dax->addr = ERR_PTR(-EIO);
if (blk_queue_enter(q, true) != 0)
return rc;
rc = bdev_direct_access(bdev, dax);
if (rc < 0) {
- dax->addr = (void __pmem *) ERR_PTR(rc);
+ dax->addr = ERR_PTR(rc);
blk_queue_exit(q);
return rc;
}
struct buffer_head *bh)
{
loff_t pos = start, max = start, bh_max = start;
- bool hole = false, need_wmb = false;
+ bool hole = false;
struct block_device *bdev = NULL;
int rw = iov_iter_rw(iter), rc;
long map_len = 0;
struct blk_dax_ctl dax = {
- .addr = (void __pmem *) ERR_PTR(-EIO),
+ .addr = ERR_PTR(-EIO),
};
unsigned blkbits = inode->i_blkbits;
sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
if (iov_iter_rw(iter) == WRITE) {
len = copy_from_iter_pmem(dax.addr, max - pos, iter);
- need_wmb = true;
} else if (!hole)
len = copy_to_iter((void __force *) dax.addr, max - pos,
iter);
dax.addr += len;
}
- if (need_wmb)
- wmb_pmem();
dax_unmap_atomic(bdev, &dax);
return (pos == start) ? rc : pos - start;
return ret;
}
}
- wmb_pmem();
return 0;
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
if (dax_map_atomic(bdev, &dax) < 0)
return PTR_ERR(dax.addr);
clear_pmem(dax.addr + offset, length);
- wmb_pmem();
dax_unmap_atomic(bdev, &dax);
}
return 0;
*/
struct blk_dax_ctl {
sector_t sector;
- void __pmem *addr;
+ void *addr;
long size;
pfn_t pfn;
};
int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
- long (*direct_access)(struct block_device *, sector_t, void __pmem **,
- pfn_t *, long);
+ long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
+ long);
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
/* ->media_changed() is DEPRECATED, use ->check_events() instead */
# define __release(x) __context__(x,-1)
# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
# define __percpu __attribute__((noderef, address_space(3)))
-# define __pmem __attribute__((noderef, address_space(5)))
#ifdef CONFIG_SPARSE_RCU_POINTER
# define __rcu __attribute__((noderef, address_space(4)))
#else /* CONFIG_SPARSE_RCU_POINTER */
# define __cond_lock(x,c) (c)
# define __percpu
# define __rcu
-# define __pmem
# define __private
# define ACCESS_PRIVATE(p, member) ((p)->member)
#endif /* __CHECKER__ */
* >= 0 : the number of bytes accessible at the address
*/
typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size);
+ void **kaddr, pfn_t *pfn, long size);
void dm_error(const char *message);
struct nd_namespace_label;
struct nvdimm_drvdata;
+
struct nd_mapping {
struct nvdimm *nvdimm;
struct nd_namespace_label **labels;
struct nvdimm_bus_descriptor {
const struct attribute_group **attr_groups;
unsigned long cmd_mask;
+ struct module *module;
char *provider_name;
ndctl_fn ndctl;
int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
unsigned long flags;
};
+struct device;
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags);
+static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
+ resource_size_t offset, size_t size)
+{
+ return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0);
+}
+
struct nvdimm_bus;
struct module;
struct device;
struct nd_blk_region;
struct nd_blk_region_desc {
int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
- void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
void *iobuf, u64 len, int rw);
struct nd_region_desc ndr_desc;
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
- struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
-#define nvdimm_bus_register(parent, desc) \
- __nvdimm_bus_register(parent, desc, THIS_MODULE)
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+ struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
struct nvdimm *to_nvdimm(struct device *dev);
struct nd_region *to_nd_region(struct device *dev);
struct nd_blk_region *to_nd_blk_region(struct device *dev);
struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
const char *nvdimm_name(struct nvdimm *nvdimm);
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
void *nvdimm_provider_data(struct nvdimm *nvdimm);
struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
const struct attribute_group **groups, unsigned long flags,
- unsigned long cmd_mask);
+ unsigned long cmd_mask, int num_flush,
+ struct resource *flush_wpq);
const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
u64 nd_fletcher64(void *addr, size_t len, bool le);
+void nvdimm_flush(struct nd_region *nd_region);
+int nvdimm_has_flush(struct nd_region *nd_region);
#endif /* __LIBNVDIMM_H__ */
unsigned long type;
int (*probe)(struct device *dev);
int (*remove)(struct device *dev);
+ void (*shutdown)(struct device *dev);
void (*notify)(struct device *dev, enum nvdimm_event event);
};
struct nd_namespace_common common;
struct resource res;
resource_size_t size;
- void __pmem *addr;
+ void *addr;
struct badblocks bb;
};
return __pfn_to_pfn_t(pfn, 0);
}
-extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags);
+static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
+{
+ return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
+}
static inline bool pfn_t_has_page(pfn_t pfn)
{
* calling these symbols with arch_has_pmem_api() and redirect to the
* implementation in asm/pmem.h.
*/
-static inline bool __arch_has_wmb_pmem(void)
-{
- return false;
-}
-
-static inline void arch_wmb_pmem(void)
-{
- BUG();
-}
-
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
- size_t n)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
BUG();
}
-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
- size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
BUG();
return -EFAULT;
}
-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
BUG();
return 0;
}
-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
{
BUG();
}
-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
BUG();
}
-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
{
BUG();
}
return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
}
-static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
- size_t size)
-{
- memcpy(dst, (void __force *) src, size);
- return 0;
-}
-
/*
* memcpy_from_pmem - read from persistent memory with error handling
* @dst: destination buffer
*
* Returns 0 on success negative error code on failure.
*/
-static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
- size_t size)
+static inline int memcpy_from_pmem(void *dst, void const *src, size_t size)
{
if (arch_has_pmem_api())
return arch_memcpy_from_pmem(dst, src, size);
else
- return default_memcpy_from_pmem(dst, src, size);
-}
-
-/**
- * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
- *
- * For a given cpu implementation within an architecture it is possible
- * that wmb_pmem() resolves to a nop. In the case this returns
- * false, pmem api users are unable to ensure durability and may want to
- * fall back to a different data consistency model, or otherwise notify
- * the user.
- */
-static inline bool arch_has_wmb_pmem(void)
-{
- return arch_has_pmem_api() && __arch_has_wmb_pmem();
-}
-
-/*
- * These defaults seek to offer decent performance and minimize the
- * window between i/o completion and writes being durable on media.
- * However, it is undefined / architecture specific whether
- * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
- * making data durable relative to i/o completion.
- */
-static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
- size_t size)
-{
- memcpy((void __force *) dst, src, size);
-}
-
-static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
- size_t bytes, struct iov_iter *i)
-{
- return copy_from_iter_nocache((void __force *)addr, bytes, i);
-}
-
-static inline void default_clear_pmem(void __pmem *addr, size_t size)
-{
- if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
- clear_page((void __force *)addr);
- else
- memset((void __force *)addr, 0, size);
+ memcpy(dst, src, size);
+ return 0;
}
/**
* being effectively evicted from, or never written to, the processor
* cache hierarchy after the copy completes. After memcpy_to_pmem()
* data may still reside in cpu or platform buffers, so this operation
- * must be followed by a wmb_pmem().
+ * must be followed by a blkdev_issue_flush() on the pmem block device.
*/
-static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
+static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
{
if (arch_has_pmem_api())
arch_memcpy_to_pmem(dst, src, n);
else
- default_memcpy_to_pmem(dst, src, n);
-}
-
-/**
- * wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of memcpy_to_pmem() operations this drains data from
- * cpu write buffers and any platform (memory controller) buffers to
- * ensure that written data is durable on persistent memory media.
- */
-static inline void wmb_pmem(void)
-{
- if (arch_has_wmb_pmem())
- arch_wmb_pmem();
- else
- wmb();
+ memcpy(dst, src, n);
}
/**
* @i: iterator with source data
*
* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
*/
-static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
if (arch_has_pmem_api())
return arch_copy_from_iter_pmem(addr, bytes, i);
- return default_copy_from_iter_pmem(addr, bytes, i);
+ return copy_from_iter_nocache(addr, bytes, i);
}
/**
* @size: number of bytes to zero
*
* Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
*/
-static inline void clear_pmem(void __pmem *addr, size_t size)
+static inline void clear_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_clear_pmem(addr, size);
else
- default_clear_pmem(addr, size);
+ memset(addr, 0, size);
}
/**
* For platforms that support clearing poison this flushes any poisoned
* ranges out of the cache
*/
-static inline void invalidate_pmem(void __pmem *addr, size_t size)
+static inline void invalidate_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_invalidate_pmem(addr, size);
* @size: number of bytes to write back
*
* Write back the processor cache range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
*/
-static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void wb_cache_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_wb_cache_pmem(addr, size);
#define NVDIMM_FAMILY_INTEL 0
#define NVDIMM_FAMILY_HPE1 1
#define NVDIMM_FAMILY_HPE2 2
+#define NVDIMM_FAMILY_MSFT 3
#define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\
struct nd_cmd_pkg)
}
EXPORT_SYMBOL(devm_memunmap);
-pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
-{
- return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
-}
-EXPORT_SYMBOL(phys_to_pfn_t);
-
#ifdef CONFIG_ZONE_DEVICE
static DEFINE_MUTEX(pgmap_lock);
static RADIX_TREE(pgmap_radix, GFP_KERNEL);
__kernel|
__force|
__iomem|
- __pmem|
__must_check|
__init_refok|
__kprobes|
4: XSAVE
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
EndTable
GrpTable: Grp16
"0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
-{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
-"66 0f ae f8 \tpcommit ",},
"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
{{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
-{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
-"66 0f ae f8 \tpcommit ",},
#endif /* #ifndef __x86_64__ */
- /* pcommit */
-
- asm volatile("pcommit");
-
/* Following line is a marker for the awk script - do not change */
asm volatile("rdtsc"); /* Stop here */
4: XSAVE
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
EndTable
GrpTable: Grp16
ldflags-y += --wrap=__request_region
ldflags-y += --wrap=__release_region
ldflags-y += --wrap=devm_memremap_pages
-ldflags-y += --wrap=phys_to_pfn_t
+ldflags-y += --wrap=insert_resource
+ldflags-y += --wrap=remove_resource
DRIVERS := ../../../drivers
NVDIMM_SRC := $(DRIVERS)/nvdimm
-ACPI_SRC := $(DRIVERS)/acpi
+ACPI_SRC := $(DRIVERS)/acpi/nfit
DAX_SRC := $(DRIVERS)/dax
+ccflags-y := -I$(src)/$(NVDIMM_SRC)/
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
obj-$(CONFIG_DEV_DAX) += dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
-nfit-y := $(ACPI_SRC)/nfit.o
+nfit-y := $(ACPI_SRC)/core.o
+nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
nfit-y += config_check.o
nd_pmem-y := $(NVDIMM_SRC)/pmem.o
+nd_pmem-y += pmem-dax.o
nd_pmem-y += config_check.o
nd_btt-y := $(NVDIMM_SRC)/btt.o
BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
--- /dev/null
+/*
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/blkdev.h>
+#include <pmem.h>
+#include <nd.h>
+
+long pmem_direct_access(struct block_device *bdev, sector_t sector,
+ void **kaddr, pfn_t *pfn, long size)
+{
+ struct pmem_device *pmem = bdev->bd_queue->queuedata;
+ resource_size_t offset = sector * 512 + pmem->data_offset;
+
+ if (unlikely(is_bad_pmem(&pmem->bb, sector, size)))
+ return -EIO;
+
+ /*
+ * Limit dax to a single page at a time given vmalloc()-backed
+ * in the nfit_test case.
+ */
+ if (get_nfit_res(pmem->phys_addr + offset)) {
+ struct page *page;
+
+ *kaddr = pmem->virt_addr + offset;
+ page = vmalloc_to_page(pmem->virt_addr + offset);
+ *pfn = page_to_pfn_t(page);
+ dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent,
+ "%s: sector: %#llx pfn: %#lx\n", __func__,
+ (unsigned long long) sector, page_to_pfn(page));
+
+ return PAGE_SIZE;
+ }
+
+ *kaddr = pmem->virt_addr + offset;
+ *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+ /*
+ * If badblocks are present, limit known good range to the
+ * requested range.
+ */
+ if (unlikely(pmem->bb.count))
+ return size;
+ return pmem->size - pmem->pfn_pad - offset;
+}
ccflags-y := -I$(src)/../../../../drivers/nvdimm/
-ccflags-y += -I$(src)/../../../../drivers/acpi/
+ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
obj-m += nfit_test.o
obj-m += nfit_test_iomap.o
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
+#include <linux/memremap.h>
#include <linux/rculist.h>
#include <linux/export.h>
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/types.h>
+#include <linux/pfn_t.h>
#include <linux/io.h>
#include <linux/mm.h>
#include "nfit_test.h"
return NULL;
}
-static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
+struct nfit_test_resource *get_nfit_res(resource_size_t resource)
{
struct nfit_test_resource *res;
return res;
}
+EXPORT_SYMBOL(get_nfit_res);
void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
}
EXPORT_SYMBOL(__wrap_devm_memremap);
-#ifdef __HAVE_ARCH_PTE_DEVMAP
-#include <linux/memremap.h>
-#include <linux/pfn_t.h>
-
void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap)
{
return phys_to_pfn_t(addr, flags);
}
EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
-#else
-/* to be removed post 4.5-rc1 */
-void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res)
-{
- resource_size_t offset = res->start;
- struct nfit_test_resource *nfit_res = get_nfit_res(offset);
-
- if (nfit_res)
- return nfit_res->buf + offset - nfit_res->res->start;
- return devm_memremap_pages(dev, res);
-}
-EXPORT_SYMBOL(__wrap_devm_memremap_pages);
-#endif
void *__wrap_memremap(resource_size_t offset, size_t size,
unsigned long flags)
}
EXPORT_SYMBOL(__wrap___request_region);
+int __wrap_insert_resource(struct resource *parent, struct resource *res)
+{
+ if (get_nfit_res(res->start))
+ return 0;
+ return insert_resource(parent, res);
+}
+EXPORT_SYMBOL(__wrap_insert_resource);
+
+int __wrap_remove_resource(struct resource *res)
+{
+ if (get_nfit_res(res->start))
+ return 0;
+ return remove_resource(res);
+}
+EXPORT_SYMBOL(__wrap_remove_resource);
+
struct resource *__wrap___devm_request_region(struct device *dev,
struct resource *parent, resource_size_t start,
resource_size_t n, const char *name)
enum {
NUM_PM = 3,
NUM_DCR = 5,
+ NUM_HINTS = 8,
NUM_BDW = NUM_DCR,
NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
DIMM_SIZE = SZ_32M,
LABEL_SIZE = SZ_128K,
+ SPA_VCD_SIZE = SZ_4M,
SPA0_SIZE = DIMM_SIZE,
SPA1_SIZE = DIMM_SIZE*2,
SPA2_SIZE = DIMM_SIZE,
list_del(&nfit_res->list);
spin_unlock(&nfit_test_lock);
- if (is_vmalloc_addr(nfit_res->buf))
- vfree(nfit_res->buf);
- else
- dma_free_coherent(nfit_res->dev, resource_size(res),
- nfit_res->buf, res->start);
+ vfree(nfit_res->buf);
kfree(res);
kfree(nfit_res);
}
return nfit_res->buf;
err:
- if (buf && !is_vmalloc_addr(buf))
- dma_free_coherent(dev, size, buf, *dma);
- else if (buf)
+ if (buf)
vfree(buf);
kfree(res);
kfree(nfit_res);
return __test_alloc(t, size, dma, buf);
}
-static void *test_alloc_coherent(struct nfit_test *t, size_t size,
- dma_addr_t *dma)
-{
- struct device *dev = &t->pdev.dev;
- void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
-
- return __test_alloc(t, size, dma, buf);
-}
-
static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
{
int i;
+ offsetof(struct acpi_nfit_control_region,
window_size) * NUM_DCR
+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
- + sizeof(struct acpi_nfit_flush_address) * NUM_DCR;
+ + (sizeof(struct acpi_nfit_flush_address)
+ + sizeof(u64) * NUM_HINTS) * NUM_DCR;
int i;
t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
return -ENOMEM;
t->nfit_size = nfit_size;
- t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]);
+ t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]);
if (!t->spa_set[0])
return -ENOMEM;
- t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]);
+ t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]);
if (!t->spa_set[1])
return -ENOMEM;
- t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]);
+ t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]);
if (!t->spa_set[2])
return -ENOMEM;
return -ENOMEM;
sprintf(t->label[i], "label%d", i);
- t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]);
+ t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS,
+ &t->flush_dma[i]);
if (!t->flush[i])
return -ENOMEM;
}
static int nfit_test1_alloc(struct nfit_test *t)
{
- size_t nfit_size = sizeof(struct acpi_nfit_system_address)
+ size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+ sizeof(struct acpi_nfit_memory_map)
+ offsetof(struct acpi_nfit_control_region, window_size);
return -ENOMEM;
t->nfit_size = nfit_size;
- t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]);
+ t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]);
if (!t->spa_set[0])
return -ENOMEM;
+ t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
+ if (!t->spa_set[1])
+ return -ENOMEM;
+
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
+static void dcr_common_init(struct acpi_nfit_control_region *dcr)
+{
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->valid_fields = 1;
+ dcr->manufacturing_location = 0xa;
+ dcr->manufacturing_date = cpu_to_be16(2016);
+}
+
static void nfit_test0_setup(struct nfit_test *t)
{
+ const int flush_hint_size = sizeof(struct acpi_nfit_flush_address)
+ + (sizeof(u64) * NUM_HINTS);
struct acpi_nfit_desc *acpi_desc;
struct acpi_nfit_memory_map *memdev;
void *nfit_buf = t->nfit_buf;
struct acpi_nfit_control_region *dcr;
struct acpi_nfit_data_region *bdw;
struct acpi_nfit_flush_address *flush;
- unsigned int offset;
+ unsigned int offset, i;
/*
* spa0 (interleave first half of dimm0 and dimm1, note storage
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 0+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[0];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 1+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[1];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 2+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[2];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 3+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[3];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 4+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[0];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 5+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[1];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 6+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[2];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 7+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[3];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
/* flush0 (dimm0) */
flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
- flush->header.length = sizeof(struct acpi_nfit_flush_address);
+ flush->header.length = flush_hint_size;
flush->device_handle = handle[0];
- flush->hint_count = 1;
- flush->hint_address[0] = t->flush_dma[0];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
/* flush1 (dimm1) */
- flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1;
+ flush = nfit_buf + offset + flush_hint_size * 1;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
- flush->header.length = sizeof(struct acpi_nfit_flush_address);
+ flush->header.length = flush_hint_size;
flush->device_handle = handle[1];
- flush->hint_count = 1;
- flush->hint_address[0] = t->flush_dma[1];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
/* flush2 (dimm2) */
- flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2;
+ flush = nfit_buf + offset + flush_hint_size * 2;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
- flush->header.length = sizeof(struct acpi_nfit_flush_address);
+ flush->header.length = flush_hint_size;
flush->device_handle = handle[2];
- flush->hint_count = 1;
- flush->hint_address[0] = t->flush_dma[2];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
/* flush3 (dimm3) */
- flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3;
+ flush = nfit_buf + offset + flush_hint_size * 3;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
- flush->header.length = sizeof(struct acpi_nfit_flush_address);
+ flush->header.length = flush_hint_size;
flush->device_handle = handle[3];
- flush->hint_count = 1;
- flush->hint_address[0] = t->flush_dma[3];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
if (t->setup_hotplug) {
- offset = offset + sizeof(struct acpi_nfit_flush_address) * 4;
+ offset = offset + flush_hint_size * 4;
/* dcr-descriptor4: blk */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 8+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[4];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 9+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~handle[4];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
/* flush3 (dimm4) */
flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
- flush->header.length = sizeof(struct acpi_nfit_flush_address);
+ flush->header.length = flush_hint_size;
flush->device_handle = handle[4];
- flush->hint_count = 1;
- flush->hint_address[0] = t->flush_dma[4];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[4]
+ + i * sizeof(u64);
}
post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
spa->address = t->spa_set_dma[0];
spa->length = SPA2_SIZE;
- offset += sizeof(*spa);
+ /* virtual cd region */
+ spa = nfit_buf + sizeof(*spa);
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
+ spa->range_index = 0;
+ spa->address = t->spa_set_dma[1];
+ spa->length = SPA_VCD_SIZE;
+
+ offset += sizeof(*spa) * 2;
/* mem-region0 (spa0, dimm0) */
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 0+1;
- dcr->vendor_id = 0xabcd;
- dcr->device_id = 0;
- dcr->revision_id = 1;
+ dcr_common_init(dcr);
dcr->serial_number = ~0;
dcr->code = NFIT_FIC_BYTE;
dcr->windows = 0;
nfit_test->setup(nfit_test);
acpi_desc = &nfit_test->acpi_desc;
acpi_nfit_desc_init(acpi_desc, &pdev->dev);
- acpi_desc->nfit = nfit_test->nfit_buf;
acpi_desc->blk_do_io = nfit_test_blk_do_io;
nd_desc = &acpi_desc->nd_desc;
nd_desc->provider_name = NULL;
+ nd_desc->module = THIS_MODULE;
nd_desc->ndctl = nfit_test_ctl;
- acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc);
- if (!acpi_desc->nvdimm_bus)
- return -ENXIO;
- rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
- if (rc) {
- nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+ rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+ nfit_test->nfit_size);
+ if (rc)
return rc;
- }
if (nfit_test->setup != nfit_test0_setup)
return 0;
nfit_test->setup_hotplug = 1;
nfit_test->setup(nfit_test);
- rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
- if (rc) {
- nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+ rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+ nfit_test->nfit_size);
+ if (rc)
return rc;
- }
return 0;
}
static int nfit_test_remove(struct platform_device *pdev)
{
- struct nfit_test *nfit_test = to_nfit_test(&pdev->dev);
- struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc;
-
- nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
-
return 0;
}
.id_table = nfit_test_id,
};
-#ifdef CONFIG_CMA_SIZE_MBYTES
-#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
-#else
-#define CMA_SIZE_MBYTES 0
-#endif
-
static __init int nfit_test_init(void)
{
int rc, i;
for (i = 0; i < NUM_NFITS; i++) {
struct nfit_test *nfit_test;
struct platform_device *pdev;
- static int once;
nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
if (!nfit_test) {
goto err_register;
instances[i] = nfit_test;
-
- if (!once++) {
- dma_addr_t dma;
- void *buf;
-
- buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma,
- GFP_KERNEL);
- if (!buf) {
- rc = -ENOMEM;
- dev_warn(&pdev->dev, "need 128M of free cma\n");
- goto err_register;
- }
- dma_free_coherent(&pdev->dev, SZ_128M, buf, dma);
- }
}
rc = platform_driver_register(&nfit_test_driver);
*/
#ifndef __NFIT_TEST_H__
#define __NFIT_TEST_H__
+#include <linux/list.h>
struct nfit_test_resource {
struct list_head list;
void __wrap_iounmap(volatile void __iomem *addr);
void nfit_test_setup(nfit_test_lookup_fn lookup);
void nfit_test_teardown(void);
+struct nfit_test_resource *get_nfit_res(resource_size_t resource);
#endif