{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
uint64_t rc, token;
+ uint64_t saved = 0;
/*
* When the hypervisor cannot map all the requested memory in a single
rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
p->blocks, BIND_ANY_ADDR, token);
token = ret[0];
+ if (!saved)
+ saved = ret[1];
cond_resched();
} while (rc == H_BUSY);
return -ENXIO;
}
- p->bound_addr = ret[1];
+ p->bound_addr = saved;
dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
memset(&ndr_desc, 0, sizeof(ndr_desc));
ndr_desc.attr_groups = region_attr_groups;
ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
+ ndr_desc.target_node = ndr_desc.numa_node;
ndr_desc.res = &p->res;
ndr_desc.of_node = p->dn;
ndr_desc.provider_data = p;
#include <acpi/nfit.h>
#include "intel.h"
#include "nfit.h"
-#include "intel.h"
/*
* For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
module_param(no_init_ars, bool, 0644);
MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time");
+static bool force_labels;
+module_param(force_labels, bool, 0444);
+MODULE_PARM_DESC(force_labels, "Opt-in to labels despite missing methods");
+
LIST_HEAD(acpi_descs);
DEFINE_MUTEX(acpi_desc_lock);
}
EXPORT_SYMBOL(to_nfit_uuid);
-static struct acpi_nfit_desc *to_acpi_nfit_desc(
- struct nvdimm_bus_descriptor *nd_desc)
-{
- return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
-}
-
static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
{
struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
return true;
}
+static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd,
+ struct nd_cmd_pkg *call_pkg)
+{
+ if (call_pkg) {
+ int i;
+
+ if (nfit_mem && nfit_mem->family != call_pkg->nd_family)
+ return -ENOTTY;
+
+ for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++)
+ if (call_pkg->nd_reserved2[i])
+ return -EINVAL;
+ return call_pkg->nd_command;
+ }
+
+ /* In the !call_pkg case, bus commands == bus functions */
+ if (!nfit_mem)
+ return cmd;
+
+ /* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */
+ if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
+ return cmd;
+
+ /*
+ * Force function number validation to fail since 0 is never
+ * published as a valid function in dsm_mask.
+ */
+ return 0;
+}
+
int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
{
- struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
union acpi_object in_obj, in_buf, *out_obj;
const struct nd_cmd_desc *desc = NULL;
unsigned long cmd_mask, dsm_mask;
u32 offset, fw_status = 0;
acpi_handle handle;
- unsigned int func;
const guid_t *guid;
- int rc, i;
+ int func, rc, i;
if (cmd_rc)
*cmd_rc = -EINVAL;
- func = cmd;
- if (cmd == ND_CMD_CALL) {
- call_pkg = buf;
- func = call_pkg->nd_command;
- for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++)
- if (call_pkg->nd_reserved2[i])
- return -EINVAL;
- }
+ if (cmd == ND_CMD_CALL)
+ call_pkg = buf;
+ func = cmd_to_func(nfit_mem, cmd, call_pkg);
+ if (func < 0)
+ return func;
if (nvdimm) {
struct acpi_device *adev = nfit_mem->adev;
if (!adev)
return -ENOTTY;
- if (call_pkg && nfit_mem->family != call_pkg->nd_family)
- return -ENOTTY;
dimm_name = nvdimm_name(nvdimm);
cmd_name = nvdimm_cmd_name(cmd);
cmd_name = nvdimm_bus_cmd_name(cmd);
cmd_mask = nd_desc->cmd_mask;
- dsm_mask = cmd_mask;
- if (cmd == ND_CMD_CALL)
- dsm_mask = nd_desc->bus_dsm_mask;
+ dsm_mask = nd_desc->bus_dsm_mask;
desc = nd_cmd_bus_desc(cmd);
guid = to_nfit_uuid(NFIT_DEV_BUS);
handle = adev->handle;
if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
return -ENOTTY;
- if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
+ /*
+ * Check for a valid command. For ND_CMD_CALL, we also have to
+ * make sure that the DSM function is supported.
+ */
+ if (cmd == ND_CMD_CALL && !test_bit(func, &dsm_mask))
+ return -ENOTTY;
+ else if (!test_bit(cmd, &cmd_mask))
return -ENOTTY;
in_obj.type = ACPI_TYPE_PACKAGE;
return -EINVAL;
}
+ if (out_obj->type != ACPI_TYPE_BUFFER) {
+ dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
+ dimm_name, cmd_name, out_obj->type);
+ rc = -EINVAL;
+ goto out;
+ }
+
if (call_pkg) {
call_pkg->nd_fw_size = out_obj->buffer.length;
memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
return 0;
}
- if (out_obj->package.type != ACPI_TYPE_BUFFER) {
- dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
- dimm_name, cmd_name, out_obj->type);
- rc = -EINVAL;
- goto out;
- }
-
dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
cmd_name, out_obj->buffer.length);
print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
struct acpi_nfit_memory_map *memdev;
struct acpi_nfit_desc *acpi_desc;
struct nfit_mem *nfit_mem;
+ u16 physical_id;
mutex_lock(&acpi_desc_lock);
list_for_each_entry(acpi_desc, &acpi_descs, list) {
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
memdev = __to_nfit_memdev(nfit_mem);
if (memdev->device_handle == device_handle) {
+ *flags = memdev->flags;
+ physical_id = memdev->physical_id;
mutex_unlock(&acpi_desc->init_mutex);
mutex_unlock(&acpi_desc_lock);
- *flags = memdev->flags;
- return memdev->physical_id;
+ return physical_id;
}
}
mutex_unlock(&acpi_desc->init_mutex);
struct device_attribute *attr, char *buf)
{
struct nvdimm_bus_descriptor *nd_desc;
+ struct acpi_nfit_desc *acpi_desc;
ssize_t rc = -ENXIO;
+ bool busy;
device_lock(dev);
nd_desc = dev_get_drvdata(dev);
- if (nd_desc) {
- struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+ if (!nd_desc) {
+ device_unlock(dev);
+ return rc;
+ }
+ acpi_desc = to_acpi_desc(nd_desc);
- mutex_lock(&acpi_desc->init_mutex);
- rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
- acpi_desc->scrub_busy
- && !acpi_desc->cancel ? "+\n" : "\n");
- mutex_unlock(&acpi_desc->init_mutex);
+ mutex_lock(&acpi_desc->init_mutex);
+ busy = test_bit(ARS_BUSY, &acpi_desc->scrub_flags)
+ && !test_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
+ rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, busy ? "+\n" : "\n");
+ /* Allow an admin to poll the busy state at a higher rate */
+ if (busy && capable(CAP_SYS_RAWIO) && !test_and_set_bit(ARS_POLL,
+ &acpi_desc->scrub_flags)) {
+ acpi_desc->scrub_tmo = 1;
+ mod_delayed_work(nfit_wq, &acpi_desc->dwork, HZ);
}
+
+ mutex_unlock(&acpi_desc->init_mutex);
device_unlock(dev);
return rc;
}
__weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
{
+ struct device *dev = &nfit_mem->adev->dev;
struct nd_intel_smart smart = { 0 };
union acpi_object in_buf = {
- .type = ACPI_TYPE_BUFFER,
- .buffer.pointer = (char *) &smart,
- .buffer.length = sizeof(smart),
+ .buffer.type = ACPI_TYPE_BUFFER,
+ .buffer.length = 0,
};
union acpi_object in_obj = {
- .type = ACPI_TYPE_PACKAGE,
+ .package.type = ACPI_TYPE_PACKAGE,
.package.count = 1,
.package.elements = &in_buf,
};
return;
out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj);
- if (!out_obj)
+ if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER
+ || out_obj->buffer.length < sizeof(smart)) {
+ dev_dbg(dev->parent, "%s: failed to retrieve initial health\n",
+ dev_name(dev));
+ ACPI_FREE(out_obj);
return;
+ }
+ memcpy(&smart, out_obj->buffer.pointer, sizeof(smart));
+ ACPI_FREE(out_obj);
if (smart.flags & ND_INTEL_SMART_SHUTDOWN_VALID) {
if (smart.shutdown_state)
set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
nfit_mem->dirty_shutdown = smart.shutdown_count;
}
- ACPI_FREE(out_obj);
}
static void populate_shutdown_status(struct nfit_mem *nfit_mem)
dev_set_drvdata(&adev_dimm->dev, nfit_mem);
/*
- * Until standardization materializes we need to consider 4
- * different command sets. Note, that checking for function0 (bit0)
- * tells us if any commands are reachable through this GUID.
+ * There are 4 "legacy" NVDIMM command sets
+ * (NVDIMM_FAMILY_{INTEL,MSFT,HPE1,HPE2}) that were created before
+ * an EFI working group was established to constrain this
+ * proliferation. The nfit driver probes for the supported command
+ * set by GUID. Note, if you're a platform developer looking to add
+ * a new command set to this probe, consider using an existing set,
+ * or otherwise seek approval to publish the command set at
+ * http://www.uefi.org/RFIC_LIST.
+ *
+ * Note, that checking for function0 (bit0) tells us if any commands
+ * are reachable through this GUID.
*/
for (i = 0; i <= NVDIMM_FAMILY_MAX; i++)
if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
dsm_mask &= ~(1 << 8);
} else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
dsm_mask = 0xffffffff;
+ } else if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) {
+ dsm_mask = 0x1f;
} else {
dev_dbg(dev, "unknown dimm command family\n");
nfit_mem->family = -1;
return 0;
}
+ /*
+ * Function 0 is the command interrogation function, don't
+ * export it to potential userspace use, and enable it to be
+ * used as an error value in acpi_nfit_ctl().
+ */
+ dsm_mask &= ~1UL;
+
guid = to_nfit_uuid(nfit_mem->family);
for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
if (acpi_check_dsm(adev_dimm->handle, guid,
| 1 << ND_CMD_SET_CONFIG_DATA;
if (family == NVDIMM_FAMILY_INTEL
&& (dsm_mask & label_mask) == label_mask)
- return 0;
+ /* skip _LS{I,R,W} enabling */;
+ else {
+ if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
+ && acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
+ dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
+ set_bit(NFIT_MEM_LSR, &nfit_mem->flags);
+ }
- if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
- && acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
- dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
- set_bit(NFIT_MEM_LSR, &nfit_mem->flags);
- }
+ if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)
+ && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
+ dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
+ set_bit(NFIT_MEM_LSW, &nfit_mem->flags);
+ }
- if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)
- && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
- dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
- set_bit(NFIT_MEM_LSW, &nfit_mem->flags);
+ /*
+ * Quirk read-only label configurations to preserve
+ * access to label-less namespaces by default.
+ */
+ if (!test_bit(NFIT_MEM_LSW, &nfit_mem->flags)
+ && !force_labels) {
+ dev_dbg(dev, "%s: No _LSW, disable labels\n",
+ dev_name(&adev_dimm->dev));
+ clear_bit(NFIT_MEM_LSR, &nfit_mem->flags);
+ } else
+ dev_dbg(dev, "%s: Force enable labels\n",
+ dev_name(&adev_dimm->dev));
}
populate_shutdown_status(nfit_mem);
cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
}
+ /* Quirk to ignore LOCAL for labels on HYPERV DIMMs */
+ if (nfit_mem->family == NVDIMM_FAMILY_HYPERV)
+ set_bit(NDD_NOBLK, &flags);
+
if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
continue;
- dev_info(acpi_desc->dev, "%s flags:%s%s%s%s%s\n",
+ dev_err(acpi_desc->dev, "Error found in NVDIMM %s flags:%s%s%s%s%s\n",
nvdimm_name(nvdimm),
mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
if (!nvdimm)
continue;
- rc = nvdimm_security_setup_events(nvdimm);
- if (rc < 0)
- dev_warn(acpi_desc->dev,
- "security event setup failed: %d\n", rc);
-
nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
if (nfit_kernfs)
nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
if (!nd_set)
return -ENOMEM;
- ndr_desc->nd_set = nd_set;
guid_copy(&nd_set->type_guid, (guid_t *) spa->range_guid);
info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
if (rc < 0)
return rc;
- return cmd_rc;
+ if (cmd_rc < 0)
+ return cmd_rc;
+ set_bit(ARS_VALID, &acpi_desc->scrub_flags);
+ return 0;
}
static int ars_continue(struct acpi_nfit_desc *acpi_desc)
struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
- memset(&ars_start, 0, sizeof(ars_start));
- ars_start.address = ars_status->restart_address;
- ars_start.length = ars_status->restart_length;
- ars_start.type = ars_status->type;
- ars_start.flags = acpi_desc->ars_start_flags;
+ ars_start = (struct nd_cmd_ars_start) {
+ .address = ars_status->restart_address,
+ .length = ars_status->restart_length,
+ .type = ars_status->type,
+ };
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
sizeof(ars_start), &cmd_rc);
if (rc < 0)
*/
if (ars_status->out_length < 44)
return 0;
+
+ /*
+ * Ignore potentially stale results that are only refreshed
+ * after a start-ARS event.
+ */
+ if (!test_and_clear_bit(ARS_VALID, &acpi_desc->scrub_flags)) {
+ dev_dbg(acpi_desc->dev, "skip %d stale records\n",
+ ars_status->num_records);
+ return 0;
+ }
+
for (i = 0; i < ars_status->num_records; i++) {
/* only process full records */
if (ars_status->out_length
ndr_desc->res = &res;
ndr_desc->provider_data = nfit_spa;
ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
- if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
+ if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) {
ndr_desc->numa_node = acpi_map_pxm_to_online_node(
spa->proximity_domain);
- else
+ ndr_desc->target_node = acpi_map_pxm_to_node(
+ spa->proximity_domain);
+ } else {
ndr_desc->numa_node = NUMA_NO_NODE;
+ ndr_desc->target_node = NUMA_NO_NODE;
+ }
/*
* Persistence domain bits are hierarchical, if
{
int rc;
- if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state))
+ if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
return acpi_nfit_register_region(acpi_desc, nfit_spa);
set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
- set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);
+ if (!no_init_ars)
+ set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);
switch (acpi_nfit_query_poison(acpi_desc)) {
case 0:
+ case -ENOSPC:
case -EAGAIN:
rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT);
/* shouldn't happen, try again later */
break;
case -EBUSY:
case -ENOMEM:
- case -ENOSPC:
/*
* BIOS was using ARS, wait for it to complete (or
* resources to become available) and then perform our
lockdep_assert_held(&acpi_desc->init_mutex);
- if (acpi_desc->cancel)
+ if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags))
return 0;
if (query_rc == -EBUSY) {
{
lockdep_assert_held(&acpi_desc->init_mutex);
- acpi_desc->scrub_busy = 1;
+ set_bit(ARS_BUSY, &acpi_desc->scrub_flags);
/* note this should only be set from within the workqueue */
if (tmo)
acpi_desc->scrub_tmo = tmo;
{
lockdep_assert_held(&acpi_desc->init_mutex);
- acpi_desc->scrub_busy = 0;
+ clear_bit(ARS_BUSY, &acpi_desc->scrub_flags);
acpi_desc->scrub_count++;
if (acpi_desc->scrub_count_state)
sysfs_notify_dirent(acpi_desc->scrub_count_state);
else
notify_ars_done(acpi_desc);
memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
+ clear_bit(ARS_POLL, &acpi_desc->scrub_flags);
mutex_unlock(&acpi_desc->init_mutex);
}
struct nfit_spa *nfit_spa;
int rc;
+ set_bit(ARS_VALID, &acpi_desc->scrub_flags);
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
switch (nfit_spa_type(nfit_spa->spa)) {
case NFIT_SPA_VOLATILE:
static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
{
- struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
struct device *dev = acpi_desc->dev;
/* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
static int __acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd)
{
- struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
if (nvdimm)
return 0;
struct nfit_spa *nfit_spa;
mutex_lock(&acpi_desc->init_mutex);
- if (acpi_desc->cancel) {
+ if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags)) {
mutex_unlock(&acpi_desc->init_mutex);
return 0;
}
mutex_unlock(&acpi_desc_lock);
mutex_lock(&acpi_desc->init_mutex);
- acpi_desc->cancel = 1;
+ set_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
cancel_delayed_work_sync(&acpi_desc->dwork);
mutex_unlock(&acpi_desc->init_mutex);
guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+ guid_parse(UUID_NFIT_DIMM_N_HYPERV, &nfit_uuid[NFIT_DEV_DIMM_N_HYPERV]);
nfit_wq = create_singlethread_workqueue("nfit");
if (!nfit_wq)
return node;
}
+ EXPORT_SYMBOL(acpi_map_pxm_to_node);
/**
* acpi_map_pxm_to_online_node - Map proximity ID to online node
{
struct acpi_srat_mem_affinity *p =
(struct acpi_srat_mem_affinity *)header;
- pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
- (unsigned long)p->base_address,
- (unsigned long)p->length,
+ pr_debug("SRAT Memory (0x%llx length 0x%llx) in proximity domain %d %s%s%s\n",
+ (unsigned long long)p->base_address,
+ (unsigned long long)p->length,
p->proximity_domain,
(p->flags & ACPI_SRAT_MEM_ENABLED) ?
"enabled" : "disabled",
#include <linux/uio.h>
#include <linux/dax.h>
#include <linux/fs.h>
+ #include "dax-private.h"
static dev_t dax_devt;
DEFINE_STATIC_SRCU(dax_srcu);
{
struct dax_device *dax_dev;
bool dax_enabled = false;
+ pgoff_t pgoff, pgoff_end;
struct request_queue *q;
- pgoff_t pgoff;
- int err, id;
- pfn_t pfn;
- long len;
char buf[BDEVNAME_SIZE];
+ void *kaddr, *end_kaddr;
+ pfn_t pfn, end_pfn;
+ sector_t last_page;
+ long len, len2;
+ int err, id;
if (blocksize != PAGE_SIZE) {
pr_debug("%s: error: unsupported blocksize for dax\n",
return false;
}
+ last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8;
+ err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
+ if (err) {
+ pr_debug("%s: error: unaligned partition for dax\n",
+ bdevname(bdev, buf));
+ return false;
+ }
+
dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
if (!dax_dev) {
pr_debug("%s: error: device does not support dax\n",
}
id = dax_read_lock();
- len = dax_direct_access(dax_dev, pgoff, 1, NULL, &pfn);
+ len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
+ len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
dax_read_unlock(id);
put_dax(dax_dev);
- if (len < 1) {
+ if (len < 1 || len2 < 1) {
pr_debug("%s: error: dax access failed (%ld)\n",
- bdevname(bdev, buf), len);
+ bdevname(bdev, buf), len < 1 ? len : len2);
return false;
}
*/
WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
dax_enabled = true;
- } else if (pfn_t_devmap(pfn)) {
- struct dev_pagemap *pgmap;
+ } else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
+ struct dev_pagemap *pgmap, *end_pgmap;
pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
- if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
+ end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
+ if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX
+ && pfn_t_to_page(pfn)->pgmap == pgmap
+ && pfn_t_to_page(end_pfn)->pgmap == pgmap
+ && pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
+ && pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr)))
dax_enabled = true;
put_dev_pagemap(pgmap);
+ put_dev_pagemap(end_pgmap);
+
}
if (!dax_enabled) {
spin_lock(&dax_host_lock);
hlist_del_init(&dax_dev->list);
spin_unlock(&dax_host_lock);
-
- dax_dev->private = NULL;
}
EXPORT_SYMBOL_GPL(kill_dax);
+ void run_dax(struct dax_device *dax_dev)
+ {
+ set_bit(DAXDEV_ALIVE, &dax_dev->flags);
+ }
+ EXPORT_SYMBOL_GPL(run_dax);
+
static struct inode *dax_alloc_inode(struct super_block *sb)
{
struct dax_device *dax_dev;
void *dax_get_private(struct dax_device *dax_dev)
{
+ if (!test_bit(DAXDEV_ALIVE, &dax_dev->flags))
+ return NULL;
return dax_dev->private;
}
EXPORT_SYMBOL_GPL(dax_get_private);
inode_init_once(inode);
}
- static int __dax_fs_init(void)
+ static int dax_fs_init(void)
{
int rc;
return rc;
}
- static void __dax_fs_exit(void)
+ static void dax_fs_exit(void)
{
kern_unmount(dax_mnt);
unregister_filesystem(&dax_fs_type);
kmem_cache_destroy(dax_cache);
}
- static int __init dax_fs_init(void)
+ static int __init dax_core_init(void)
{
int rc;
- rc = __dax_fs_init();
+ rc = dax_fs_init();
if (rc)
return rc;
rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
if (rc)
- __dax_fs_exit();
- return rc;
+ goto err_chrdev;
+
+ rc = dax_bus_init();
+ if (rc)
+ goto err_bus;
+ return 0;
+
+ err_bus:
+ unregister_chrdev_region(dax_devt, MINORMASK+1);
+ err_chrdev:
+ dax_fs_exit();
+ return 0;
}
- static void __exit dax_fs_exit(void)
+ static void __exit dax_core_exit(void)
{
unregister_chrdev_region(dax_devt, MINORMASK+1);
ida_destroy(&dax_minor_ida);
- __dax_fs_exit();
+ dax_fs_exit();
}
MODULE_AUTHOR("Intel Corporation");
MODULE_LICENSE("GPL v2");
- subsys_initcall(dax_fs_init);
- module_exit(dax_fs_exit);
+ subsys_initcall(dax_core_init);
+ module_exit(dax_core_exit);
u16 ndr_mappings;
u64 ndr_size;
u64 ndr_start;
- int id, num_lanes, ro, numa_node;
+ int id, num_lanes, ro, numa_node, target_node;
void *provider_data;
struct kernfs_node *bb_state;
struct badblocks bb;
void nvdimm_set_aliasing(struct device *dev);
void nvdimm_set_locked(struct device *dev);
void nvdimm_clear_locked(struct device *dev);
+int nvdimm_security_setup_events(struct device *dev);
#if IS_ENABLED(CONFIG_NVDIMM_KEYS)
int nvdimm_security_unlock(struct device *dev);
#else
memset(&ndr_desc, 0, sizeof(ndr_desc));
ndr_desc.attr_groups = region_attr_groups;
ndr_desc.numa_node = dev_to_node(&pdev->dev);
+ ndr_desc.target_node = ndr_desc.numa_node;
ndr_desc.res = &pdev->resource[i];
ndr_desc.of_node = np;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
.remove = of_pmem_region_remove,
.driver = {
.name = "of_pmem",
- .owner = THIS_MODULE,
.of_match_table = of_pmem_region_match,
},
};
if (test_bit(NDD_UNARMED, &nvdimm->flags))
ro = 1;
+
+ if (test_bit(NDD_NOBLK, &nvdimm->flags)
+ && dev_type == &nd_blk_device_type) {
+ dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n",
+ caller, dev_name(&nvdimm->dev), i);
+ return NULL;
+ }
}
if (dev_type == &nd_blk_device_type) {
nd_region->flags = ndr_desc->flags;
nd_region->ro = ro;
nd_region->numa_node = ndr_desc->numa_node;
+ nd_region->target_node = ndr_desc->target_node;
ida_init(&nd_region->ns_ida);
ida_init(&nd_region->btt_ida);
ida_init(&nd_region->pfn_ida);
#ifdef CONFIG_ACPI_NUMA
int acpi_map_pxm_to_online_node(int pxm);
+ int acpi_map_pxm_to_node(int pxm);
int acpi_get_node(acpi_handle handle);
#else
static inline int acpi_map_pxm_to_online_node(int pxm)
{
return 0;
}
+ static inline int acpi_map_pxm_to_node(int pxm)
+ {
+ return 0;
+ }
static inline int acpi_get_node(acpi_handle handle)
{
return 0;
#if defined(CONFIG_ACPI) && defined(CONFIG_DYNAMIC_DEBUG)
__printf(3, 4)
void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const char *fmt, ...);
-#else
-#define __acpi_handle_debug(descriptor, handle, fmt, ...) \
- acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__);
#endif
/*
#else
#if defined(CONFIG_DYNAMIC_DEBUG)
#define acpi_handle_debug(handle, fmt, ...) \
-do { \
- DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
- if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \
- __acpi_handle_debug(&descriptor, handle, pr_fmt(fmt), \
- ##__VA_ARGS__); \
-} while (0)
+ _dynamic_func_call(fmt, __acpi_handle_debug, \
+ handle, pr_fmt(fmt), ##__VA_ARGS__)
#else
#define acpi_handle_debug(handle, fmt, ...) \
({ \
/* Ignore IoRestriction field */
#define ACPI_GPIO_QUIRK_NO_IO_RESTRICTION BIT(0)
+/*
+ * When ACPI GPIO mapping table is in use the index parameter inside it
+ * refers to the GPIO resource in _CRS method. That index has no
+ * distinction of actual type of the resource. When consumer wants to
+ * get GpioIo type explicitly, this quirk may be used.
+ */
+#define ACPI_GPIO_QUIRK_ONLY_GPIOIO BIT(1)
unsigned int quirks;
};
}
#endif
-#if defined(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C)
-bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
- struct acpi_resource_i2c_serialbus **i2c);
-#else
-static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
- struct acpi_resource_i2c_serialbus **i2c)
-{
- return false;
-}
-#endif
-
/* Device properties */
#ifdef CONFIG_ACPI
NDD_SECURITY_OVERWRITE = 3,
/* tracking whether or not there is a pending device reference */
NDD_WORK_PENDING = 4,
+ /* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */
+ NDD_NOBLK = 5,
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
void *provider_data;
int num_lanes;
int numa_node;
+ int target_node;
unsigned long flags;
struct device_node *of_node;
};
}
enum nvdimm_security_state {
+ NVDIMM_SECURITY_ERROR = -1,
NVDIMM_SECURITY_DISABLED,
NVDIMM_SECURITY_UNLOCKED,
NVDIMM_SECURITY_LOCKED,
cmd_mask, num_flush, flush_wpq, NULL, NULL);
}
-int nvdimm_security_setup_events(struct nvdimm *nvdimm);
const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
int (*func)(struct resource *, void *))
{
struct resource res;
- int ret = -1;
+ int ret = -EINVAL;
while (start < end &&
!find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) {
arg, func);
}
-#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
-
/*
* This function calls the @func callback against all memory ranges of type
* System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
* It is to be used only for System RAM.
+ *
+ * This will find System RAM ranges that are children of top-level resources
+ * in addition to top-level System RAM resources.
*/
int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg, int (*func)(unsigned long, unsigned long, void *))
unsigned long flags;
struct resource res;
unsigned long pfn, end_pfn;
- int ret = -1;
+ int ret = -EINVAL;
start = (u64) start_pfn << PAGE_SHIFT;
end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
while (start < end &&
!find_next_iomem_res(start, end, flags, IORES_DESC_NONE,
- true, &res)) {
+ false, &res)) {
pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
end_pfn = (res.end + 1) >> PAGE_SHIFT;
if (end_pfn > pfn)
return ret;
}
-#endif
-
static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
{
return 1;
conflict = __request_resource(parent, res);
if (!conflict)
break;
+ /*
+ * mm/hmm.c reserves physical addresses which then
+ * become unavailable to other users. Conflicts are
+ * not expected. Warn to aid debugging if encountered.
+ */
+ if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
+ pr_warn("Unaddressable device %s %pR conflicts with %pR",
+ conflict->name, conflict, res);
+ }
if (conflict != parent) {
if (!(conflict->flags & IORESOURCE_BUSY)) {
parent = conflict;
* and restore_online_page_callback() for generic callback restore.
*/
-static void generic_online_page(struct page *page);
+static void generic_online_page(struct page *page, unsigned int order);
static online_page_callback_t online_page_callback = generic_online_page;
static DEFINE_MUTEX(online_page_callback_lock);
cpus_read_unlock();
}
+u64 max_mem_size = U64_MAX;
+
/* add this memory to iomem resource */
static struct resource *register_memory_resource(u64 start, u64 size)
{
- struct resource *res, *conflict;
+ struct resource *res;
+ unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ char *resource_name = "System RAM";
- res = kzalloc(sizeof(struct resource), GFP_KERNEL);
- if (!res)
- return ERR_PTR(-ENOMEM);
-
- res->name = "System RAM";
- res->start = start;
- res->end = start + size - 1;
- res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
- conflict = request_resource_conflict(&iomem_resource, res);
- if (conflict) {
- if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
- pr_debug("Device unaddressable memory block "
- "memory hotplug at %#010llx !\n",
- (unsigned long long)start);
- }
- pr_debug("System RAM resource %pR cannot be added\n", res);
- kfree(res);
+ if (start + size > max_mem_size)
+ return ERR_PTR(-E2BIG);
+
+ /*
+ * Request ownership of the new memory range. This might be
+ * a child of an existing resource that was present but
+ * not marked as busy.
+ */
+ res = __request_region(&iomem_resource, start, size,
+ resource_name, flags);
+
+ if (!res) {
+ pr_debug("Unable to reserve System RAM region: %016llx->%016llx\n",
+ start, start + size);
return ERR_PTR(-EEXIST);
}
return res;
}
EXPORT_SYMBOL_GPL(__online_page_free);
-static void generic_online_page(struct page *page)
+static void generic_online_page(struct page *page, unsigned int order)
+{
+ kernel_map_pages(page, 1 << order, 1);
+ __free_pages_core(page, order);
+ totalram_pages_add(1UL << order);
+#ifdef CONFIG_HIGHMEM
+ if (PageHighMem(page))
+ totalhigh_pages_add(1UL << order);
+#endif
+}
+
+static int online_pages_blocks(unsigned long start, unsigned long nr_pages)
{
- __online_page_set_limits(page);
- __online_page_increment_counters(page);
- __online_page_free(page);
+ unsigned long end = start + nr_pages;
+ int order, onlined_pages = 0;
+
+ while (start < end) {
+ order = min(MAX_ORDER - 1,
+ get_order(PFN_PHYS(end) - PFN_PHYS(start)));
+ (*online_page_callback)(pfn_to_page(start), order);
+
+ onlined_pages += (1UL << order);
+ start += (1UL << order);
+ }
+ return onlined_pages;
}
static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg)
{
- unsigned long i;
unsigned long onlined_pages = *(unsigned long *)arg;
- struct page *page;
if (PageReserved(pfn_to_page(start_pfn)))
- for (i = 0; i < nr_pages; i++) {
- page = pfn_to_page(start_pfn + i);
- (*online_page_callback)(page);
- onlined_pages++;
- }
+ onlined_pages += online_pages_blocks(start_pfn, nr_pages);
online_mem_sections(start_pfn, start_pfn + nr_pages);
{
int nid = zone_to_nid(zone);
- arg->status_change_nid = -1;
- arg->status_change_nid_normal = -1;
- arg->status_change_nid_high = -1;
+ arg->status_change_nid = NUMA_NO_NODE;
+ arg->status_change_nid_normal = NUMA_NO_NODE;
+ arg->status_change_nid_high = NUMA_NO_NODE;
if (!node_state(nid, N_MEMORY))
arg->status_change_nid = nid;
return PageBuddy(page) && page_order(page) >= pageblock_order;
}
-/* Return the start of the next active pageblock after a given page */
-static struct page *next_active_pageblock(struct page *page)
+/* Return the pfn of the start of the next active pageblock after a given pfn */
+static unsigned long next_active_pageblock(unsigned long pfn)
{
+ struct page *page = pfn_to_page(pfn);
+
/* Ensure the starting page is pageblock-aligned */
- BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
+ BUG_ON(pfn & (pageblock_nr_pages - 1));
/* If the entire pageblock is free, move to the end of free page */
if (pageblock_free(page)) {
/* be careful. we don't have locks, page_order can be changed.*/
order = page_order(page);
if ((order < MAX_ORDER) && (order >= pageblock_order))
- return page + (1 << order);
+ return pfn + (1 << order);
}
- return page + pageblock_nr_pages;
+ return pfn + pageblock_nr_pages;
}
-static bool is_pageblock_removable_nolock(struct page *page)
+static bool is_pageblock_removable_nolock(unsigned long pfn)
{
+ struct page *page = pfn_to_page(pfn);
struct zone *zone;
- unsigned long pfn;
/*
* We have to be careful here because we are iterating over memory
/* Checks if this range of memory is likely to be hot-removable. */
bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
{
- struct page *page = pfn_to_page(start_pfn);
- struct page *end_page = page + nr_pages;
+ unsigned long end_pfn, pfn;
+
+ end_pfn = min(start_pfn + nr_pages,
+ zone_end_pfn(page_zone(pfn_to_page(start_pfn))));
/* Check the starting page of each pageblock within the range */
- for (; page < end_page; page = next_active_pageblock(page)) {
- if (!is_pageblock_removable_nolock(page))
+ for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) {
+ if (!is_pageblock_removable_nolock(pfn))
return false;
cond_resched();
}
i++;
if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
continue;
+ /* Check if we got outside of the zone */
+ if (zone && !zone_spans_pfn(zone, pfn + i))
+ return 0;
page = pfn_to_page(pfn + i);
if (zone && page_zone(page) != zone)
return 0;
static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
{
unsigned long pfn;
- struct page *page;
+
for (pfn = start; pfn < end; pfn++) {
- if (pfn_valid(pfn)) {
- page = pfn_to_page(pfn);
- if (PageLRU(page))
- return pfn;
- if (__PageMovable(page))
- return pfn;
- if (PageHuge(page)) {
- if (hugepage_migration_supported(page_hstate(page)) &&
- page_huge_active(page))
- return pfn;
- else
- pfn = round_up(pfn + 1,
- 1 << compound_order(page)) - 1;
- }
- }
+ struct page *page, *head;
+ unsigned long skip;
+
+ if (!pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ if (PageLRU(page))
+ return pfn;
+ if (__PageMovable(page))
+ return pfn;
+
+ if (!PageHuge(page))
+ continue;
+ head = compound_head(page);
+ if (hugepage_migration_supported(page_hstate(head)) &&
+ page_huge_active(head))
+ return pfn;
+ skip = (1 << compound_order(head)) - (page - head);
+ pfn += skip - 1;
}
return 0;
}
{
unsigned long pfn;
struct page *page;
- int not_managed = 0;
int ret = 0;
LIST_HEAD(source);
if (PageHuge(page)) {
struct page *head = compound_head(page);
- pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
if (compound_order(head) > PFN_SECTION_SHIFT) {
ret = -EBUSY;
break;
}
- isolate_huge_page(page, &source);
+ pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
+ isolate_huge_page(head, &source);
continue;
} else if (PageTransHuge(page))
pfn = page_to_pfn(compound_head(page))
else
ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
if (!ret) { /* Success */
- put_page(page);
list_add_tail(&page->lru, &source);
if (!__PageMovable(page))
inc_node_page_state(page, NR_ISOLATED_ANON +
} else {
pr_warn("failed to isolate pfn %lx\n", pfn);
dump_page(page, "isolation failed");
- put_page(page);
- /* Because we don't have big zone->lock. we should
- check this again here. */
- if (page_count(page)) {
- not_managed++;
- ret = -EBUSY;
- break;
- }
}
+ put_page(page);
}
if (!list_empty(&source)) {
- if (not_managed) {
- putback_movable_pages(&source);
- goto out;
- }
-
/* Allocate a new page from the nearest neighbor node */
ret = migrate_pages(&source, new_node_page, NULL, 0,
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
putback_movable_pages(&source);
}
}
-out:
+
return ret;
}
unsigned long present_pages = 0;
enum zone_type zt;
- arg->status_change_nid = -1;
- arg->status_change_nid_normal = -1;
- arg->status_change_nid_high = -1;
+ arg->status_change_nid = NUMA_NO_NODE;
+ arg->status_change_nid_normal = NUMA_NO_NODE;
+ arg->status_change_nid_high = NUMA_NO_NODE;
/*
* Check whether node_states[N_NORMAL_MEMORY] will be changed.
we assume this for now. .*/
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
&valid_end)) {
- mem_hotplug_done();
ret = -EINVAL;
reason = "multizone range";
goto failed_removal;
MIGRATE_MOVABLE,
SKIP_HWPOISON | REPORT_FAILURE);
if (ret) {
- mem_hotplug_done();
reason = "failure to isolate range";
goto failed_removal;
}
cond_resched();
lru_add_drain_all();
- drain_all_pages(zone);
pfn = scan_movable_pages(pfn, end_pfn);
if (pfn) {