Commit | Line | Data |
---|---|---|
e1455744 | 1 | /* |
cd03412a | 2 | * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. |
e1455744 DW |
3 | * |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of version 2 of the GNU General Public License as | |
6 | * published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | */ | |
ac515c08 | 13 | #include <linux/memremap.h> |
e1455744 DW |
14 | #include <linux/blkdev.h> |
15 | #include <linux/device.h> | |
16 | #include <linux/genhd.h> | |
17 | #include <linux/sizes.h> | |
18 | #include <linux/slab.h> | |
19 | #include <linux/fs.h> | |
20 | #include <linux/mm.h> | |
21 | #include "nd-core.h" | |
22 | #include "pfn.h" | |
23 | #include "nd.h" | |
24 | ||
25 | static void nd_pfn_release(struct device *dev) | |
26 | { | |
27 | struct nd_region *nd_region = to_nd_region(dev->parent); | |
28 | struct nd_pfn *nd_pfn = to_nd_pfn(dev); | |
29 | ||
426824d6 | 30 | dev_dbg(dev, "trace\n"); |
e1455744 DW |
31 | nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); |
32 | ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); | |
33 | kfree(nd_pfn->uuid); | |
34 | kfree(nd_pfn); | |
35 | } | |
36 | ||
37 | static struct device_type nd_pfn_device_type = { | |
38 | .name = "nd_pfn", | |
39 | .release = nd_pfn_release, | |
40 | }; | |
41 | ||
42 | bool is_nd_pfn(struct device *dev) | |
43 | { | |
44 | return dev ? dev->type == &nd_pfn_device_type : false; | |
45 | } | |
46 | EXPORT_SYMBOL(is_nd_pfn); | |
47 | ||
48 | struct nd_pfn *to_nd_pfn(struct device *dev) | |
49 | { | |
50 | struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); | |
51 | ||
52 | WARN_ON(!is_nd_pfn(dev)); | |
53 | return nd_pfn; | |
54 | } | |
55 | EXPORT_SYMBOL(to_nd_pfn); | |
56 | ||
57 | static ssize_t mode_show(struct device *dev, | |
58 | struct device_attribute *attr, char *buf) | |
59 | { | |
cd03412a | 60 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
e1455744 DW |
61 | |
62 | switch (nd_pfn->mode) { | |
63 | case PFN_MODE_RAM: | |
64 | return sprintf(buf, "ram\n"); | |
65 | case PFN_MODE_PMEM: | |
66 | return sprintf(buf, "pmem\n"); | |
67 | default: | |
68 | return sprintf(buf, "none\n"); | |
69 | } | |
70 | } | |
71 | ||
72 | static ssize_t mode_store(struct device *dev, | |
73 | struct device_attribute *attr, const char *buf, size_t len) | |
74 | { | |
cd03412a | 75 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
e1455744 DW |
76 | ssize_t rc = 0; |
77 | ||
78 | device_lock(dev); | |
79 | nvdimm_bus_lock(dev); | |
80 | if (dev->driver) | |
81 | rc = -EBUSY; | |
82 | else { | |
83 | size_t n = len - 1; | |
84 | ||
85 | if (strncmp(buf, "pmem\n", n) == 0 | |
86 | || strncmp(buf, "pmem", n) == 0) { | |
d2c0f041 | 87 | nd_pfn->mode = PFN_MODE_PMEM; |
e1455744 DW |
88 | } else if (strncmp(buf, "ram\n", n) == 0 |
89 | || strncmp(buf, "ram", n) == 0) | |
90 | nd_pfn->mode = PFN_MODE_RAM; | |
91 | else if (strncmp(buf, "none\n", n) == 0 | |
92 | || strncmp(buf, "none", n) == 0) | |
93 | nd_pfn->mode = PFN_MODE_NONE; | |
94 | else | |
95 | rc = -EINVAL; | |
96 | } | |
426824d6 DW |
97 | dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, |
98 | buf[len - 1] == '\n' ? "" : "\n"); | |
e1455744 DW |
99 | nvdimm_bus_unlock(dev); |
100 | device_unlock(dev); | |
101 | ||
102 | return rc ? rc : len; | |
103 | } | |
104 | static DEVICE_ATTR_RW(mode); | |
105 | ||
315c5625 DW |
106 | static ssize_t align_show(struct device *dev, |
107 | struct device_attribute *attr, char *buf) | |
108 | { | |
cd03412a | 109 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
315c5625 | 110 | |
af7d9f0c | 111 | return sprintf(buf, "%ld\n", nd_pfn->align); |
315c5625 DW |
112 | } |
113 | ||
1fdadbeb OH |
114 | static const unsigned long *nd_pfn_supported_alignments(void) |
115 | { | |
116 | /* | |
117 | * This needs to be a non-static variable because the *_SIZE | |
118 | * macros aren't always constants. | |
119 | */ | |
120 | const unsigned long supported_alignments[] = { | |
121 | PAGE_SIZE, | |
122 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
123 | HPAGE_PMD_SIZE, | |
124 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | |
125 | HPAGE_PUD_SIZE, | |
126 | #endif | |
127 | #endif | |
128 | 0, | |
129 | }; | |
130 | static unsigned long data[ARRAY_SIZE(supported_alignments)]; | |
131 | ||
132 | memcpy(data, supported_alignments, sizeof(data)); | |
133 | ||
134 | return data; | |
135 | } | |
136 | ||
315c5625 DW |
137 | static ssize_t align_store(struct device *dev, |
138 | struct device_attribute *attr, const char *buf, size_t len) | |
139 | { | |
cd03412a | 140 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
315c5625 DW |
141 | ssize_t rc; |
142 | ||
143 | device_lock(dev); | |
144 | nvdimm_bus_lock(dev); | |
f13d2b61 DW |
145 | rc = nd_size_select_store(dev, buf, &nd_pfn->align, |
146 | nd_pfn_supported_alignments()); | |
426824d6 DW |
147 | dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, |
148 | buf[len - 1] == '\n' ? "" : "\n"); | |
315c5625 DW |
149 | nvdimm_bus_unlock(dev); |
150 | device_unlock(dev); | |
151 | ||
152 | return rc ? rc : len; | |
153 | } | |
154 | static DEVICE_ATTR_RW(align); | |
155 | ||
e1455744 DW |
156 | static ssize_t uuid_show(struct device *dev, |
157 | struct device_attribute *attr, char *buf) | |
158 | { | |
cd03412a | 159 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
e1455744 DW |
160 | |
161 | if (nd_pfn->uuid) | |
162 | return sprintf(buf, "%pUb\n", nd_pfn->uuid); | |
163 | return sprintf(buf, "\n"); | |
164 | } | |
165 | ||
166 | static ssize_t uuid_store(struct device *dev, | |
167 | struct device_attribute *attr, const char *buf, size_t len) | |
168 | { | |
cd03412a | 169 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
e1455744 DW |
170 | ssize_t rc; |
171 | ||
172 | device_lock(dev); | |
173 | rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); | |
426824d6 DW |
174 | dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, |
175 | buf[len - 1] == '\n' ? "" : "\n"); | |
e1455744 DW |
176 | device_unlock(dev); |
177 | ||
178 | return rc ? rc : len; | |
179 | } | |
180 | static DEVICE_ATTR_RW(uuid); | |
181 | ||
182 | static ssize_t namespace_show(struct device *dev, | |
183 | struct device_attribute *attr, char *buf) | |
184 | { | |
cd03412a | 185 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
e1455744 DW |
186 | ssize_t rc; |
187 | ||
188 | nvdimm_bus_lock(dev); | |
189 | rc = sprintf(buf, "%s\n", nd_pfn->ndns | |
190 | ? dev_name(&nd_pfn->ndns->dev) : ""); | |
191 | nvdimm_bus_unlock(dev); | |
192 | return rc; | |
193 | } | |
194 | ||
195 | static ssize_t namespace_store(struct device *dev, | |
196 | struct device_attribute *attr, const char *buf, size_t len) | |
197 | { | |
cd03412a | 198 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
e1455744 DW |
199 | ssize_t rc; |
200 | ||
e1455744 | 201 | device_lock(dev); |
4ca8b57a | 202 | nvdimm_bus_lock(dev); |
e1455744 | 203 | rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); |
426824d6 DW |
204 | dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, |
205 | buf[len - 1] == '\n' ? "" : "\n"); | |
e1455744 | 206 | nvdimm_bus_unlock(dev); |
4ca8b57a | 207 | device_unlock(dev); |
e1455744 DW |
208 | |
209 | return rc; | |
210 | } | |
211 | static DEVICE_ATTR_RW(namespace); | |
212 | ||
f6ed58c7 DW |
213 | static ssize_t resource_show(struct device *dev, |
214 | struct device_attribute *attr, char *buf) | |
215 | { | |
cd03412a | 216 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
f6ed58c7 DW |
217 | ssize_t rc; |
218 | ||
219 | device_lock(dev); | |
220 | if (dev->driver) { | |
221 | struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; | |
222 | u64 offset = __le64_to_cpu(pfn_sb->dataoff); | |
223 | struct nd_namespace_common *ndns = nd_pfn->ndns; | |
224 | u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); | |
225 | struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); | |
226 | ||
227 | rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start | |
228 | + start_pad + offset); | |
229 | } else { | |
230 | /* no address to convey if the pfn instance is disabled */ | |
231 | rc = -ENXIO; | |
232 | } | |
233 | device_unlock(dev); | |
234 | ||
235 | return rc; | |
236 | } | |
237 | static DEVICE_ATTR_RO(resource); | |
238 | ||
239 | static ssize_t size_show(struct device *dev, | |
240 | struct device_attribute *attr, char *buf) | |
241 | { | |
cd03412a | 242 | struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); |
f6ed58c7 DW |
243 | ssize_t rc; |
244 | ||
245 | device_lock(dev); | |
246 | if (dev->driver) { | |
247 | struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; | |
248 | u64 offset = __le64_to_cpu(pfn_sb->dataoff); | |
249 | struct nd_namespace_common *ndns = nd_pfn->ndns; | |
250 | u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); | |
251 | u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); | |
252 | struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); | |
253 | ||
254 | rc = sprintf(buf, "%llu\n", (unsigned long long) | |
255 | resource_size(&nsio->res) - start_pad | |
256 | - end_trunc - offset); | |
257 | } else { | |
258 | /* no size to convey if the pfn instance is disabled */ | |
259 | rc = -ENXIO; | |
260 | } | |
261 | device_unlock(dev); | |
262 | ||
263 | return rc; | |
264 | } | |
265 | static DEVICE_ATTR_RO(size); | |
266 | ||
1fdadbeb OH |
267 | static ssize_t supported_alignments_show(struct device *dev, |
268 | struct device_attribute *attr, char *buf) | |
269 | { | |
270 | return nd_size_select_show(0, nd_pfn_supported_alignments(), buf); | |
271 | } | |
272 | static DEVICE_ATTR_RO(supported_alignments); | |
273 | ||
e1455744 DW |
274 | static struct attribute *nd_pfn_attributes[] = { |
275 | &dev_attr_mode.attr, | |
276 | &dev_attr_namespace.attr, | |
277 | &dev_attr_uuid.attr, | |
315c5625 | 278 | &dev_attr_align.attr, |
f6ed58c7 DW |
279 | &dev_attr_resource.attr, |
280 | &dev_attr_size.attr, | |
1fdadbeb | 281 | &dev_attr_supported_alignments.attr, |
e1455744 DW |
282 | NULL, |
283 | }; | |
284 | ||
26417ae4 DW |
285 | static umode_t pfn_visible(struct kobject *kobj, struct attribute *a, int n) |
286 | { | |
287 | if (a == &dev_attr_resource.attr) | |
288 | return 0400; | |
289 | return a->mode; | |
290 | } | |
291 | ||
cd03412a | 292 | struct attribute_group nd_pfn_attribute_group = { |
e1455744 | 293 | .attrs = nd_pfn_attributes, |
26417ae4 | 294 | .is_visible = pfn_visible, |
e1455744 DW |
295 | }; |
296 | ||
297 | static const struct attribute_group *nd_pfn_attribute_groups[] = { | |
298 | &nd_pfn_attribute_group, | |
299 | &nd_device_attribute_group, | |
300 | &nd_numa_attribute_group, | |
301 | NULL, | |
302 | }; | |
303 | ||
cd03412a | 304 | struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, |
e1455744 DW |
305 | struct nd_namespace_common *ndns) |
306 | { | |
0cbfeef2 | 307 | struct device *dev; |
e1455744 | 308 | |
cd03412a | 309 | if (!nd_pfn) |
e1455744 DW |
310 | return NULL; |
311 | ||
cd03412a | 312 | nd_pfn->mode = PFN_MODE_NONE; |
0dd69643 | 313 | nd_pfn->align = PFN_DEFAULT_ALIGNMENT; |
cd03412a DW |
314 | dev = &nd_pfn->dev; |
315 | device_initialize(&nd_pfn->dev); | |
316 | if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { | |
426824d6 DW |
317 | dev_dbg(&ndns->dev, "failed, already claimed by %s\n", |
318 | dev_name(ndns->claim)); | |
cd03412a | 319 | put_device(dev); |
e1455744 | 320 | return NULL; |
cd03412a DW |
321 | } |
322 | return dev; | |
323 | } | |
324 | ||
325 | static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) | |
326 | { | |
327 | struct nd_pfn *nd_pfn; | |
328 | struct device *dev; | |
e1455744 DW |
329 | |
330 | nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); | |
331 | if (!nd_pfn) | |
332 | return NULL; | |
333 | ||
334 | nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL); | |
335 | if (nd_pfn->id < 0) { | |
336 | kfree(nd_pfn); | |
337 | return NULL; | |
338 | } | |
339 | ||
e1455744 DW |
340 | dev = &nd_pfn->dev; |
341 | dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); | |
e1455744 | 342 | dev->groups = nd_pfn_attribute_groups; |
cd03412a DW |
343 | dev->type = &nd_pfn_device_type; |
344 | dev->parent = &nd_region->dev; | |
345 | ||
346 | return nd_pfn; | |
e1455744 DW |
347 | } |
348 | ||
349 | struct device *nd_pfn_create(struct nd_region *nd_region) | |
350 | { | |
cd03412a DW |
351 | struct nd_pfn *nd_pfn; |
352 | struct device *dev; | |
353 | ||
c9e582aa | 354 | if (!is_memory(&nd_region->dev)) |
cd03412a | 355 | return NULL; |
e1455744 | 356 | |
cd03412a DW |
357 | nd_pfn = nd_pfn_alloc(nd_region); |
358 | dev = nd_pfn_devinit(nd_pfn, NULL); | |
e1455744 | 359 | |
cd03412a | 360 | __nd_device_register(dev); |
e1455744 DW |
361 | return dev; |
362 | } | |
363 | ||
48af2f7e VV |
364 | /* |
365 | * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap | |
366 | * space associated with the namespace. If the memmap is set to DRAM, then | |
367 | * this is a no-op. Since the memmap area is freshly initialized during | |
368 | * probe, we have an opportunity to clear any badblocks in this area. | |
369 | */ | |
370 | static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) | |
371 | { | |
372 | struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); | |
373 | struct nd_namespace_common *ndns = nd_pfn->ndns; | |
374 | void *zero_page = page_address(ZERO_PAGE(0)); | |
375 | struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; | |
376 | int num_bad, meta_num, rc, bb_present; | |
377 | sector_t first_bad, meta_start; | |
378 | struct nd_namespace_io *nsio; | |
379 | ||
380 | if (nd_pfn->mode != PFN_MODE_PMEM) | |
381 | return 0; | |
382 | ||
383 | nsio = to_nd_namespace_io(&ndns->dev); | |
384 | meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; | |
385 | meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; | |
386 | ||
387 | do { | |
388 | unsigned long zero_len; | |
389 | u64 nsoff; | |
390 | ||
391 | bb_present = badblocks_check(&nd_region->bb, meta_start, | |
392 | meta_num, &first_bad, &num_bad); | |
393 | if (bb_present) { | |
394 | dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %lx\n", | |
395 | num_bad, first_bad); | |
396 | nsoff = ALIGN_DOWN((nd_region->ndr_start | |
397 | + (first_bad << 9)) - nsio->res.start, | |
398 | PAGE_SIZE); | |
399 | zero_len = ALIGN(num_bad << 9, PAGE_SIZE); | |
400 | while (zero_len) { | |
401 | unsigned long chunk = min(zero_len, PAGE_SIZE); | |
402 | ||
403 | rc = nvdimm_write_bytes(ndns, nsoff, zero_page, | |
404 | chunk, 0); | |
405 | if (rc) | |
406 | break; | |
407 | ||
408 | zero_len -= chunk; | |
409 | nsoff += chunk; | |
410 | } | |
411 | if (rc) { | |
412 | dev_err(&nd_pfn->dev, | |
413 | "error clearing %x badblocks at %lx\n", | |
414 | num_bad, first_bad); | |
415 | return rc; | |
416 | } | |
417 | } | |
418 | } while (bb_present); | |
419 | ||
420 | return 0; | |
421 | } | |
422 | ||
c5ed9268 | 423 | int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) |
e1455744 | 424 | { |
e1455744 | 425 | u64 checksum, offset; |
1ee6667c | 426 | enum nd_pfn_mode mode; |
a34d5e8a | 427 | struct nd_namespace_io *nsio; |
19deaa21 | 428 | unsigned long align, start_pad; |
a34d5e8a DW |
429 | struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; |
430 | struct nd_namespace_common *ndns = nd_pfn->ndns; | |
431 | const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); | |
e1455744 DW |
432 | |
433 | if (!pfn_sb || !ndns) | |
434 | return -ENODEV; | |
435 | ||
c9e582aa | 436 | if (!is_memory(nd_pfn->dev.parent)) |
e1455744 DW |
437 | return -ENODEV; |
438 | ||
3ae3d67b | 439 | if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) |
e1455744 DW |
440 | return -ENXIO; |
441 | ||
c5ed9268 | 442 | if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0) |
e1455744 DW |
443 | return -ENODEV; |
444 | ||
445 | checksum = le64_to_cpu(pfn_sb->checksum); | |
446 | pfn_sb->checksum = 0; | |
447 | if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) | |
448 | return -ENODEV; | |
449 | pfn_sb->checksum = cpu_to_le64(checksum); | |
450 | ||
a34d5e8a DW |
451 | if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) |
452 | return -ENODEV; | |
453 | ||
cfe30b87 DW |
454 | if (__le16_to_cpu(pfn_sb->version_minor) < 1) { |
455 | pfn_sb->start_pad = 0; | |
456 | pfn_sb->end_trunc = 0; | |
457 | } | |
458 | ||
45a0dac0 DW |
459 | if (__le16_to_cpu(pfn_sb->version_minor) < 2) |
460 | pfn_sb->align = 0; | |
461 | ||
e1455744 DW |
462 | switch (le32_to_cpu(pfn_sb->mode)) { |
463 | case PFN_MODE_RAM: | |
e1455744 | 464 | case PFN_MODE_PMEM: |
45eb570a | 465 | break; |
e1455744 DW |
466 | default: |
467 | return -ENXIO; | |
468 | } | |
469 | ||
1ee6667c DW |
470 | align = le32_to_cpu(pfn_sb->align); |
471 | offset = le64_to_cpu(pfn_sb->dataoff); | |
19deaa21 | 472 | start_pad = le32_to_cpu(pfn_sb->start_pad); |
1ee6667c DW |
473 | if (align == 0) |
474 | align = 1UL << ilog2(offset); | |
475 | mode = le32_to_cpu(pfn_sb->mode); | |
476 | ||
e1455744 | 477 | if (!nd_pfn->uuid) { |
1ee6667c DW |
478 | /* |
479 | * When probing a namepace via nd_pfn_probe() the uuid | |
480 | * is NULL (see: nd_pfn_devinit()) we init settings from | |
481 | * pfn_sb | |
482 | */ | |
e1455744 DW |
483 | nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); |
484 | if (!nd_pfn->uuid) | |
485 | return -ENOMEM; | |
1ee6667c DW |
486 | nd_pfn->align = align; |
487 | nd_pfn->mode = mode; | |
e1455744 | 488 | } else { |
1ee6667c DW |
489 | /* |
490 | * When probing a pfn / dax instance we validate the | |
491 | * live settings against the pfn_sb | |
492 | */ | |
e1455744 | 493 | if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) |
e5670563 | 494 | return -ENODEV; |
1ee6667c DW |
495 | |
496 | /* | |
497 | * If the uuid validates, but other settings mismatch | |
498 | * return EINVAL because userspace has managed to change | |
499 | * the configuration without specifying new | |
500 | * identification. | |
501 | */ | |
502 | if (nd_pfn->align != align || nd_pfn->mode != mode) { | |
503 | dev_err(&nd_pfn->dev, | |
504 | "init failed, settings mismatch\n"); | |
505 | dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", | |
506 | nd_pfn->align, align, nd_pfn->mode, | |
507 | mode); | |
508 | return -EINVAL; | |
509 | } | |
e1455744 DW |
510 | } |
511 | ||
1ee6667c | 512 | if (align > nvdimm_namespace_capacity(ndns)) { |
315c5625 | 513 | dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", |
1ee6667c | 514 | align, nvdimm_namespace_capacity(ndns)); |
315c5625 DW |
515 | return -EINVAL; |
516 | } | |
517 | ||
e1455744 DW |
518 | /* |
519 | * These warnings are verbose because they can only trigger in | |
520 | * the case where the physical address alignment of the | |
521 | * namespace has changed since the pfn superblock was | |
522 | * established. | |
523 | */ | |
e1455744 | 524 | nsio = to_nd_namespace_io(&ndns->dev); |
9f1e8cee | 525 | if (offset >= resource_size(&nsio->res)) { |
e1455744 DW |
526 | dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", |
527 | dev_name(&ndns->dev)); | |
528 | return -EBUSY; | |
529 | } | |
530 | ||
19deaa21 | 531 | if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) |
5e24c9fd | 532 | || !IS_ALIGNED(offset, PAGE_SIZE)) { |
1ee6667c DW |
533 | dev_err(&nd_pfn->dev, |
534 | "bad offset: %#llx dax disabled align: %#lx\n", | |
535 | offset, align); | |
315c5625 DW |
536 | return -ENXIO; |
537 | } | |
538 | ||
48af2f7e | 539 | return nd_pfn_clear_memmap_errors(nd_pfn); |
e1455744 | 540 | } |
32ab0a3f | 541 | EXPORT_SYMBOL(nd_pfn_validate); |
e1455744 | 542 | |
200c79da | 543 | int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) |
e1455744 DW |
544 | { |
545 | int rc; | |
e1455744 | 546 | struct nd_pfn *nd_pfn; |
bd032943 | 547 | struct device *pfn_dev; |
e1455744 DW |
548 | struct nd_pfn_sb *pfn_sb; |
549 | struct nd_region *nd_region = to_nd_region(ndns->dev.parent); | |
550 | ||
551 | if (ndns->force_raw) | |
552 | return -ENODEV; | |
553 | ||
b3fde74e DW |
554 | switch (ndns->claim_class) { |
555 | case NVDIMM_CCLASS_NONE: | |
556 | case NVDIMM_CCLASS_PFN: | |
557 | break; | |
558 | default: | |
559 | return -ENODEV; | |
560 | } | |
561 | ||
e1455744 | 562 | nvdimm_bus_lock(&ndns->dev); |
cd03412a DW |
563 | nd_pfn = nd_pfn_alloc(nd_region); |
564 | pfn_dev = nd_pfn_devinit(nd_pfn, ndns); | |
e1455744 | 565 | nvdimm_bus_unlock(&ndns->dev); |
bd032943 | 566 | if (!pfn_dev) |
e1455744 | 567 | return -ENOMEM; |
bd032943 DW |
568 | pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); |
569 | nd_pfn = to_nd_pfn(pfn_dev); | |
e1455744 | 570 | nd_pfn->pfn_sb = pfn_sb; |
c5ed9268 | 571 | rc = nd_pfn_validate(nd_pfn, PFN_SIG); |
426824d6 | 572 | dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>"); |
e1455744 | 573 | if (rc < 0) { |
452bae0a | 574 | nd_detach_ndns(pfn_dev, &nd_pfn->ndns); |
bd032943 | 575 | put_device(pfn_dev); |
e1455744 | 576 | } else |
bd032943 | 577 | __nd_device_register(pfn_dev); |
e1455744 DW |
578 | |
579 | return rc; | |
580 | } | |
581 | EXPORT_SYMBOL(nd_pfn_probe); | |
ac515c08 DW |
582 | |
583 | /* | |
584 | * We hotplug memory at section granularity, pad the reserved area from | |
585 | * the previous section base to the namespace base address. | |
586 | */ | |
587 | static unsigned long init_altmap_base(resource_size_t base) | |
588 | { | |
589 | unsigned long base_pfn = PHYS_PFN(base); | |
590 | ||
591 | return PFN_SECTION_ALIGN_DOWN(base_pfn); | |
592 | } | |
593 | ||
594 | static unsigned long init_altmap_reserve(resource_size_t base) | |
595 | { | |
596 | unsigned long reserve = PHYS_PFN(SZ_8K); | |
597 | unsigned long base_pfn = PHYS_PFN(base); | |
598 | ||
599 | reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); | |
600 | return reserve; | |
601 | } | |
602 | ||
e8d51348 | 603 | static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) |
ac515c08 | 604 | { |
e8d51348 CH |
605 | struct resource *res = &pgmap->res; |
606 | struct vmem_altmap *altmap = &pgmap->altmap; | |
ac515c08 DW |
607 | struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; |
608 | u64 offset = le64_to_cpu(pfn_sb->dataoff); | |
609 | u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); | |
610 | u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); | |
611 | struct nd_namespace_common *ndns = nd_pfn->ndns; | |
612 | struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); | |
613 | resource_size_t base = nsio->res.start + start_pad; | |
614 | struct vmem_altmap __altmap = { | |
615 | .base_pfn = init_altmap_base(base), | |
616 | .reserve = init_altmap_reserve(base), | |
617 | }; | |
618 | ||
619 | memcpy(res, &nsio->res, sizeof(*res)); | |
620 | res->start += start_pad; | |
621 | res->end -= end_trunc; | |
622 | ||
ac515c08 DW |
623 | if (nd_pfn->mode == PFN_MODE_RAM) { |
624 | if (offset < SZ_8K) | |
e8d51348 | 625 | return -EINVAL; |
ac515c08 | 626 | nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); |
e8d51348 | 627 | pgmap->altmap_valid = false; |
ac515c08 | 628 | } else if (nd_pfn->mode == PFN_MODE_PMEM) { |
d5483fed DW |
629 | nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) |
630 | - offset) / PAGE_SIZE); | |
ac515c08 DW |
631 | if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) |
632 | dev_info(&nd_pfn->dev, | |
633 | "number of pfns truncated from %lld to %ld\n", | |
634 | le64_to_cpu(nd_pfn->pfn_sb->npfns), | |
635 | nd_pfn->npfns); | |
636 | memcpy(altmap, &__altmap, sizeof(*altmap)); | |
637 | altmap->free = PHYS_PFN(offset - SZ_8K); | |
638 | altmap->alloc = 0; | |
e8d51348 | 639 | pgmap->altmap_valid = true; |
ac515c08 | 640 | } else |
e8d51348 | 641 | return -ENXIO; |
ac515c08 | 642 | |
e8d51348 | 643 | return 0; |
ac515c08 DW |
644 | } |
645 | ||
41fce90f DW |
646 | static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) |
647 | { | |
648 | return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), | |
649 | ALIGN_DOWN(phys, nd_pfn->align)); | |
650 | } | |
651 | ||
ae86cbfe DW |
652 | /* |
653 | * Check if pmem collides with 'System RAM', or other regions when | |
654 | * section aligned. Trim it accordingly. | |
655 | */ | |
656 | static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc) | |
657 | { | |
658 | struct nd_namespace_common *ndns = nd_pfn->ndns; | |
659 | struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); | |
660 | struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); | |
661 | const resource_size_t start = nsio->res.start; | |
662 | const resource_size_t end = start + resource_size(&nsio->res); | |
663 | resource_size_t adjust, size; | |
664 | ||
665 | *start_pad = 0; | |
666 | *end_trunc = 0; | |
667 | ||
668 | adjust = start - PHYS_SECTION_ALIGN_DOWN(start); | |
669 | size = resource_size(&nsio->res) + adjust; | |
670 | if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM, | |
671 | IORES_DESC_NONE) == REGION_MIXED | |
672 | || nd_region_conflict(nd_region, start - adjust, size)) | |
673 | *start_pad = PHYS_SECTION_ALIGN_UP(start) - start; | |
674 | ||
675 | /* Now check that end of the range does not collide. */ | |
676 | adjust = PHYS_SECTION_ALIGN_UP(end) - end; | |
677 | size = resource_size(&nsio->res) + adjust; | |
678 | if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, | |
679 | IORES_DESC_NONE) == REGION_MIXED | |
680 | || !IS_ALIGNED(end, nd_pfn->align) | |
681 | || nd_region_conflict(nd_region, start, size + adjust)) | |
682 | *end_trunc = end - phys_pmem_align_down(nd_pfn, end); | |
683 | } | |
684 | ||
ac515c08 DW |
685 | static int nd_pfn_init(struct nd_pfn *nd_pfn) |
686 | { | |
52ac23b2 | 687 | u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; |
ac515c08 | 688 | struct nd_namespace_common *ndns = nd_pfn->ndns; |
ae86cbfe | 689 | struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); |
ac515c08 | 690 | resource_size_t start, size; |
ac515c08 | 691 | struct nd_region *nd_region; |
ae86cbfe | 692 | u32 start_pad, end_trunc; |
ac515c08 DW |
693 | struct nd_pfn_sb *pfn_sb; |
694 | unsigned long npfns; | |
695 | phys_addr_t offset; | |
c5ed9268 | 696 | const char *sig; |
ac515c08 DW |
697 | u64 checksum; |
698 | int rc; | |
699 | ||
700 | pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); | |
701 | if (!pfn_sb) | |
702 | return -ENOMEM; | |
703 | ||
704 | nd_pfn->pfn_sb = pfn_sb; | |
c5ed9268 DW |
705 | if (is_nd_dax(&nd_pfn->dev)) |
706 | sig = DAX_SIG; | |
707 | else | |
708 | sig = PFN_SIG; | |
709 | rc = nd_pfn_validate(nd_pfn, sig); | |
ac515c08 DW |
710 | if (rc != -ENODEV) |
711 | return rc; | |
712 | ||
713 | /* no info block, do init */; | |
714 | nd_region = to_nd_region(nd_pfn->dev.parent); | |
715 | if (nd_region->ro) { | |
716 | dev_info(&nd_pfn->dev, | |
717 | "%s is read-only, unable to init metadata\n", | |
718 | dev_name(&nd_region->dev)); | |
719 | return -ENXIO; | |
720 | } | |
721 | ||
722 | memset(pfn_sb, 0, sizeof(*pfn_sb)); | |
723 | ||
ae86cbfe | 724 | trim_pfn_device(nd_pfn, &start_pad, &end_trunc); |
ac515c08 | 725 | if (start_pad + end_trunc) |
41fce90f | 726 | dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", |
ac515c08 DW |
727 | dev_name(&ndns->dev), start_pad + end_trunc); |
728 | ||
729 | /* | |
730 | * Note, we use 64 here for the standard size of struct page, | |
731 | * debugging options may cause it to be larger in which case the | |
732 | * implementation will limit the pfns advertised through | |
733 | * ->direct_access() to those that are included in the memmap. | |
734 | */ | |
ae86cbfe | 735 | start = nsio->res.start + start_pad; |
ac515c08 | 736 | size = resource_size(&nsio->res); |
d5483fed DW |
737 | npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) |
738 | / PAGE_SIZE); | |
594d6d96 | 739 | if (nd_pfn->mode == PFN_MODE_PMEM) { |
594d6d96 | 740 | /* |
0dd69643 OH |
741 | * The altmap should be padded out to the block size used |
742 | * when populating the vmemmap. This *should* be equal to | |
743 | * PMD_SIZE for most architectures. | |
594d6d96 | 744 | */ |
bfb34527 | 745 | offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve, |
0dd69643 | 746 | max(nd_pfn->align, PMD_SIZE)) - start; |
594d6d96 | 747 | } else if (nd_pfn->mode == PFN_MODE_RAM) |
52ac23b2 DW |
748 | offset = ALIGN(start + SZ_8K + dax_label_reserve, |
749 | nd_pfn->align) - start; | |
ac515c08 DW |
750 | else |
751 | return -ENXIO; | |
752 | ||
753 | if (offset + start_pad + end_trunc >= size) { | |
754 | dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", | |
755 | dev_name(&ndns->dev)); | |
756 | return -ENXIO; | |
757 | } | |
758 | ||
759 | npfns = (size - offset - start_pad - end_trunc) / SZ_4K; | |
760 | pfn_sb->mode = cpu_to_le32(nd_pfn->mode); | |
761 | pfn_sb->dataoff = cpu_to_le64(offset); | |
762 | pfn_sb->npfns = cpu_to_le64(npfns); | |
c5ed9268 | 763 | memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); |
ac515c08 DW |
764 | memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); |
765 | memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); | |
766 | pfn_sb->version_major = cpu_to_le16(1); | |
45a0dac0 | 767 | pfn_sb->version_minor = cpu_to_le16(2); |
ac515c08 DW |
768 | pfn_sb->start_pad = cpu_to_le32(start_pad); |
769 | pfn_sb->end_trunc = cpu_to_le32(end_trunc); | |
45a0dac0 | 770 | pfn_sb->align = cpu_to_le32(nd_pfn->align); |
ac515c08 DW |
771 | checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); |
772 | pfn_sb->checksum = cpu_to_le64(checksum); | |
773 | ||
3ae3d67b | 774 | return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0); |
ac515c08 DW |
775 | } |
776 | ||
777 | /* | |
778 | * Determine the effective resource range and vmem_altmap from an nd_pfn | |
779 | * instance. | |
780 | */ | |
e8d51348 | 781 | int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) |
ac515c08 DW |
782 | { |
783 | int rc; | |
784 | ||
785 | if (!nd_pfn->uuid || !nd_pfn->ndns) | |
e8d51348 | 786 | return -ENODEV; |
ac515c08 DW |
787 | |
788 | rc = nd_pfn_init(nd_pfn); | |
789 | if (rc) | |
e8d51348 | 790 | return rc; |
ac515c08 | 791 | |
e8d51348 CH |
792 | /* we need a valid pfn_sb before we can init a dev_pagemap */ |
793 | return __nvdimm_setup_pfn(nd_pfn, pgmap); | |
ac515c08 DW |
794 | } |
795 | EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); |