Commit | Line | Data |
---|---|---|
b5beae5e OH |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #define pr_fmt(fmt) "papr-scm: " fmt | |
4 | ||
5 | #include <linux/of.h> | |
6 | #include <linux/kernel.h> | |
7 | #include <linux/module.h> | |
8 | #include <linux/ioport.h> | |
9 | #include <linux/slab.h> | |
10 | #include <linux/ndctl.h> | |
11 | #include <linux/sched.h> | |
12 | #include <linux/libnvdimm.h> | |
13 | #include <linux/platform_device.h> | |
0d7fc080 | 14 | #include <linux/delay.h> |
b5beae5e OH |
15 | |
16 | #include <asm/plpar_wrappers.h> | |
17 | ||
18 | #define BIND_ANY_ADDR (~0ul) | |
19 | ||
20 | #define PAPR_SCM_DIMM_CMD_MASK \ | |
21 | ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ | |
22 | (1ul << ND_CMD_GET_CONFIG_DATA) | \ | |
23 | (1ul << ND_CMD_SET_CONFIG_DATA)) | |
24 | ||
25 | struct papr_scm_priv { | |
26 | struct platform_device *pdev; | |
27 | struct device_node *dn; | |
28 | uint32_t drc_index; | |
29 | uint64_t blocks; | |
30 | uint64_t block_size; | |
31 | int metadata_size; | |
2a0ffbd4 | 32 | bool is_volatile; |
b5beae5e OH |
33 | |
34 | uint64_t bound_addr; | |
35 | ||
36 | struct nvdimm_bus_descriptor bus_desc; | |
37 | struct nvdimm_bus *bus; | |
38 | struct nvdimm *nvdimm; | |
39 | struct resource res; | |
40 | struct nd_region *region; | |
41 | struct nd_interleave_set nd_set; | |
42 | }; | |
43 | ||
44 | static int drc_pmem_bind(struct papr_scm_priv *p) | |
45 | { | |
46 | unsigned long ret[PLPAR_HCALL_BUFSIZE]; | |
5a3840a4 | 47 | uint64_t saved = 0; |
3a855b7a VJ |
48 | uint64_t token; |
49 | int64_t rc; | |
b5beae5e OH |
50 | |
51 | /* | |
52 | * When the hypervisor cannot map all the requested memory in a single | |
53 | * hcall it returns H_BUSY and we call again with the token until | |
54 | * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS | |
55 | * leave the system in an undefined state, so we wait. | |
56 | */ | |
57 | token = 0; | |
58 | ||
59 | do { | |
60 | rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, | |
61 | p->blocks, BIND_ANY_ADDR, token); | |
409dd7dc | 62 | token = ret[0]; |
5a3840a4 OH |
63 | if (!saved) |
64 | saved = ret[1]; | |
b5beae5e OH |
65 | cond_resched(); |
66 | } while (rc == H_BUSY); | |
67 | ||
68 | if (rc) { | |
3a855b7a VJ |
69 | /* H_OVERLAP needs a separate error path */ |
70 | if (rc == H_OVERLAP) | |
71 | return -EBUSY; | |
72 | ||
b5beae5e OH |
73 | dev_err(&p->pdev->dev, "bind err: %lld\n", rc); |
74 | return -ENXIO; | |
75 | } | |
76 | ||
5a3840a4 | 77 | p->bound_addr = saved; |
b5beae5e OH |
78 | |
79 | dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res); | |
80 | ||
81 | return 0; | |
82 | } | |
83 | ||
84 | static int drc_pmem_unbind(struct papr_scm_priv *p) | |
85 | { | |
86 | unsigned long ret[PLPAR_HCALL_BUFSIZE]; | |
0d7fc080 VJ |
87 | uint64_t token = 0; |
88 | int64_t rc; | |
b5beae5e | 89 | |
0d7fc080 | 90 | dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index); |
b5beae5e | 91 | |
0d7fc080 | 92 | /* NB: unbind has the same retry requirements as drc_pmem_bind() */ |
b5beae5e | 93 | do { |
0d7fc080 VJ |
94 | |
95 | /* Unbind of all SCM resources associated with drcIndex */ | |
96 | rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, | |
97 | p->drc_index, token); | |
409dd7dc | 98 | token = ret[0]; |
0d7fc080 VJ |
99 | |
100 | /* Check if we are stalled for some time */ | |
101 | if (H_IS_LONG_BUSY(rc)) { | |
102 | msleep(get_longbusy_msecs(rc)); | |
103 | rc = H_BUSY; | |
104 | } else if (rc == H_BUSY) { | |
105 | cond_resched(); | |
106 | } | |
107 | ||
b5beae5e OH |
108 | } while (rc == H_BUSY); |
109 | ||
110 | if (rc) | |
111 | dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); | |
0d7fc080 VJ |
112 | else |
113 | dev_dbg(&p->pdev->dev, "unbind drc %x complete\n", | |
114 | p->drc_index); | |
b5beae5e | 115 | |
0d7fc080 | 116 | return rc == H_SUCCESS ? 0 : -ENXIO; |
b5beae5e OH |
117 | } |
118 | ||
119 | static int papr_scm_meta_get(struct papr_scm_priv *p, | |
53e80bd0 | 120 | struct nd_cmd_get_config_data_hdr *hdr) |
b5beae5e OH |
121 | { |
122 | unsigned long data[PLPAR_HCALL_BUFSIZE]; | |
53e80bd0 AK |
123 | unsigned long offset, data_offset; |
124 | int len, read; | |
b5beae5e OH |
125 | int64_t ret; |
126 | ||
53e80bd0 | 127 | if ((hdr->in_offset + hdr->in_length) >= p->metadata_size) |
b5beae5e OH |
128 | return -EINVAL; |
129 | ||
53e80bd0 AK |
130 | for (len = hdr->in_length; len; len -= read) { |
131 | ||
132 | data_offset = hdr->in_length - len; | |
133 | offset = hdr->in_offset + data_offset; | |
134 | ||
135 | if (len >= 8) | |
136 | read = 8; | |
137 | else if (len >= 4) | |
138 | read = 4; | |
139 | else if (len >= 2) | |
140 | read = 2; | |
141 | else | |
142 | read = 1; | |
143 | ||
144 | ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, | |
145 | offset, read); | |
146 | ||
147 | if (ret == H_PARAMETER) /* bad DRC index */ | |
148 | return -ENODEV; | |
149 | if (ret) | |
150 | return -EINVAL; /* other invalid parameter */ | |
151 | ||
152 | switch (read) { | |
153 | case 8: | |
154 | *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]); | |
155 | break; | |
156 | case 4: | |
157 | *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff); | |
158 | break; | |
159 | ||
160 | case 2: | |
161 | *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff); | |
162 | break; | |
163 | ||
164 | case 1: | |
165 | *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff); | |
166 | break; | |
167 | } | |
168 | } | |
b5beae5e OH |
169 | return 0; |
170 | } | |
171 | ||
172 | static int papr_scm_meta_set(struct papr_scm_priv *p, | |
53e80bd0 | 173 | struct nd_cmd_set_config_hdr *hdr) |
b5beae5e | 174 | { |
53e80bd0 AK |
175 | unsigned long offset, data_offset; |
176 | int len, wrote; | |
177 | unsigned long data; | |
178 | __be64 data_be; | |
b5beae5e OH |
179 | int64_t ret; |
180 | ||
53e80bd0 | 181 | if ((hdr->in_offset + hdr->in_length) >= p->metadata_size) |
b5beae5e OH |
182 | return -EINVAL; |
183 | ||
53e80bd0 AK |
184 | for (len = hdr->in_length; len; len -= wrote) { |
185 | ||
186 | data_offset = hdr->in_length - len; | |
187 | offset = hdr->in_offset + data_offset; | |
188 | ||
189 | if (len >= 8) { | |
190 | data = *(uint64_t *)(hdr->in_buf + data_offset); | |
191 | data_be = cpu_to_be64(data); | |
192 | wrote = 8; | |
193 | } else if (len >= 4) { | |
194 | data = *(uint32_t *)(hdr->in_buf + data_offset); | |
195 | data &= 0xffffffff; | |
196 | data_be = cpu_to_be32(data); | |
197 | wrote = 4; | |
198 | } else if (len >= 2) { | |
199 | data = *(uint16_t *)(hdr->in_buf + data_offset); | |
200 | data &= 0xffff; | |
201 | data_be = cpu_to_be16(data); | |
202 | wrote = 2; | |
203 | } else { | |
204 | data_be = *(uint8_t *)(hdr->in_buf + data_offset); | |
205 | data_be &= 0xff; | |
206 | wrote = 1; | |
207 | } | |
208 | ||
209 | ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index, | |
210 | offset, data_be, wrote); | |
211 | if (ret == H_PARAMETER) /* bad DRC index */ | |
212 | return -ENODEV; | |
213 | if (ret) | |
214 | return -EINVAL; /* other invalid parameter */ | |
215 | } | |
b5beae5e OH |
216 | |
217 | return 0; | |
218 | } | |
219 | ||
220 | int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, | |
221 | unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) | |
222 | { | |
223 | struct nd_cmd_get_config_size *get_size_hdr; | |
224 | struct papr_scm_priv *p; | |
225 | ||
226 | /* Only dimm-specific calls are supported atm */ | |
227 | if (!nvdimm) | |
228 | return -EINVAL; | |
229 | ||
230 | p = nvdimm_provider_data(nvdimm); | |
231 | ||
232 | switch (cmd) { | |
233 | case ND_CMD_GET_CONFIG_SIZE: | |
234 | get_size_hdr = buf; | |
235 | ||
236 | get_size_hdr->status = 0; | |
53e80bd0 | 237 | get_size_hdr->max_xfer = 8; |
b5beae5e OH |
238 | get_size_hdr->config_size = p->metadata_size; |
239 | *cmd_rc = 0; | |
240 | break; | |
241 | ||
242 | case ND_CMD_GET_CONFIG_DATA: | |
243 | *cmd_rc = papr_scm_meta_get(p, buf); | |
244 | break; | |
245 | ||
246 | case ND_CMD_SET_CONFIG_DATA: | |
247 | *cmd_rc = papr_scm_meta_set(p, buf); | |
248 | break; | |
249 | ||
250 | default: | |
251 | return -EINVAL; | |
252 | } | |
253 | ||
254 | dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc); | |
255 | ||
256 | return 0; | |
257 | } | |
258 | ||
259 | static const struct attribute_group *region_attr_groups[] = { | |
260 | &nd_region_attribute_group, | |
261 | &nd_device_attribute_group, | |
262 | &nd_mapping_attribute_group, | |
263 | &nd_numa_attribute_group, | |
264 | NULL, | |
265 | }; | |
266 | ||
267 | static const struct attribute_group *bus_attr_groups[] = { | |
268 | &nvdimm_bus_attribute_group, | |
269 | NULL, | |
270 | }; | |
271 | ||
272 | static const struct attribute_group *papr_scm_dimm_groups[] = { | |
273 | &nvdimm_attribute_group, | |
274 | &nd_device_attribute_group, | |
275 | NULL, | |
276 | }; | |
277 | ||
da1115fd AK |
278 | static inline int papr_scm_node(int node) |
279 | { | |
280 | int min_dist = INT_MAX, dist; | |
281 | int nid, min_node; | |
282 | ||
283 | if ((node == NUMA_NO_NODE) || node_online(node)) | |
284 | return node; | |
285 | ||
286 | min_node = first_online_node; | |
287 | for_each_online_node(nid) { | |
288 | dist = node_distance(node, nid); | |
289 | if (dist < min_dist) { | |
290 | min_dist = dist; | |
291 | min_node = nid; | |
292 | } | |
293 | } | |
294 | return min_node; | |
295 | } | |
296 | ||
b5beae5e OH |
297 | static int papr_scm_nvdimm_init(struct papr_scm_priv *p) |
298 | { | |
299 | struct device *dev = &p->pdev->dev; | |
300 | struct nd_mapping_desc mapping; | |
301 | struct nd_region_desc ndr_desc; | |
302 | unsigned long dimm_flags; | |
da1115fd | 303 | int target_nid, online_nid; |
b5beae5e OH |
304 | |
305 | p->bus_desc.ndctl = papr_scm_ndctl; | |
306 | p->bus_desc.module = THIS_MODULE; | |
307 | p->bus_desc.of_node = p->pdev->dev.of_node; | |
308 | p->bus_desc.attr_groups = bus_attr_groups; | |
309 | p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); | |
310 | ||
311 | if (!p->bus_desc.provider_name) | |
312 | return -ENOMEM; | |
313 | ||
314 | p->bus = nvdimm_bus_register(NULL, &p->bus_desc); | |
315 | if (!p->bus) { | |
316 | dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); | |
317 | return -ENXIO; | |
318 | } | |
319 | ||
320 | dimm_flags = 0; | |
321 | set_bit(NDD_ALIASING, &dimm_flags); | |
322 | ||
323 | p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups, | |
324 | dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); | |
325 | if (!p->nvdimm) { | |
326 | dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); | |
327 | goto err; | |
328 | } | |
329 | ||
b0d65a8c OH |
330 | if (nvdimm_bus_check_dimm_count(p->bus, 1)) |
331 | goto err; | |
332 | ||
b5beae5e OH |
333 | /* now add the region */ |
334 | ||
335 | memset(&mapping, 0, sizeof(mapping)); | |
336 | mapping.nvdimm = p->nvdimm; | |
337 | mapping.start = 0; | |
338 | mapping.size = p->blocks * p->block_size; // XXX: potential overflow? | |
339 | ||
340 | memset(&ndr_desc, 0, sizeof(ndr_desc)); | |
341 | ndr_desc.attr_groups = region_attr_groups; | |
da1115fd AK |
342 | target_nid = dev_to_node(&p->pdev->dev); |
343 | online_nid = papr_scm_node(target_nid); | |
344 | ndr_desc.numa_node = online_nid; | |
345 | ndr_desc.target_node = target_nid; | |
b5beae5e OH |
346 | ndr_desc.res = &p->res; |
347 | ndr_desc.of_node = p->dn; | |
348 | ndr_desc.provider_data = p; | |
349 | ndr_desc.mapping = &mapping; | |
350 | ndr_desc.num_mappings = 1; | |
351 | ndr_desc.nd_set = &p->nd_set; | |
352 | set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); | |
353 | ||
2a0ffbd4 AK |
354 | if (p->is_volatile) |
355 | p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); | |
356 | else | |
357 | p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); | |
b5beae5e OH |
358 | if (!p->region) { |
359 | dev_err(dev, "Error registering region %pR from %pOF\n", | |
360 | ndr_desc.res, p->dn); | |
361 | goto err; | |
362 | } | |
da1115fd AK |
363 | if (target_nid != online_nid) |
364 | dev_info(dev, "Region registered with target node %d and online node %d", | |
365 | target_nid, online_nid); | |
b5beae5e OH |
366 | |
367 | return 0; | |
368 | ||
369 | err: nvdimm_bus_unregister(p->bus); | |
370 | kfree(p->bus_desc.provider_name); | |
371 | return -ENXIO; | |
372 | } | |
373 | ||
374 | static int papr_scm_probe(struct platform_device *pdev) | |
375 | { | |
b5beae5e | 376 | struct device_node *dn = pdev->dev.of_node; |
683ec0e0 OH |
377 | u32 drc_index, metadata_size; |
378 | u64 blocks, block_size; | |
b5beae5e | 379 | struct papr_scm_priv *p; |
43001c52 OH |
380 | const char *uuid_str; |
381 | u64 uuid[2]; | |
b5beae5e OH |
382 | int rc; |
383 | ||
384 | /* check we have all the required DT properties */ | |
385 | if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) { | |
386 | dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn); | |
387 | return -ENODEV; | |
388 | } | |
389 | ||
683ec0e0 OH |
390 | if (of_property_read_u64(dn, "ibm,block-size", &block_size)) { |
391 | dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn); | |
392 | return -ENODEV; | |
393 | } | |
394 | ||
395 | if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) { | |
396 | dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn); | |
b5beae5e OH |
397 | return -ENODEV; |
398 | } | |
399 | ||
43001c52 OH |
400 | if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) { |
401 | dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn); | |
402 | return -ENODEV; | |
403 | } | |
404 | ||
2a0ffbd4 | 405 | |
b5beae5e OH |
406 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
407 | if (!p) | |
408 | return -ENOMEM; | |
409 | ||
410 | /* optional DT properties */ | |
411 | of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); | |
412 | ||
413 | p->dn = dn; | |
414 | p->drc_index = drc_index; | |
683ec0e0 OH |
415 | p->block_size = block_size; |
416 | p->blocks = blocks; | |
2a0ffbd4 | 417 | p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); |
b5beae5e | 418 | |
43001c52 OH |
419 | /* We just need to ensure that set cookies are unique across */ |
420 | uuid_parse(uuid_str, (uuid_t *) uuid); | |
259a948c AK |
421 | /* |
422 | * cookie1 and cookie2 are not really little endian | |
423 | * we store a little endian representation of the | |
424 | * uuid str so that we can compare this with the label | |
425 | * area cookie irrespective of the endian config with which | |
426 | * the kernel is built. | |
427 | */ | |
428 | p->nd_set.cookie1 = cpu_to_le64(uuid[0]); | |
429 | p->nd_set.cookie2 = cpu_to_le64(uuid[1]); | |
43001c52 | 430 | |
b5beae5e OH |
431 | /* might be zero */ |
432 | p->metadata_size = metadata_size; | |
433 | p->pdev = pdev; | |
434 | ||
435 | /* request the hypervisor to bind this region to somewhere in memory */ | |
436 | rc = drc_pmem_bind(p); | |
3a855b7a VJ |
437 | |
438 | /* If phyp says drc memory still bound then force unbound and retry */ | |
439 | if (rc == -EBUSY) { | |
440 | dev_warn(&pdev->dev, "Retrying bind after unbinding\n"); | |
441 | drc_pmem_unbind(p); | |
442 | rc = drc_pmem_bind(p); | |
443 | } | |
444 | ||
b5beae5e OH |
445 | if (rc) |
446 | goto err; | |
447 | ||
448 | /* setup the resource for the newly bound range */ | |
449 | p->res.start = p->bound_addr; | |
59613526 | 450 | p->res.end = p->bound_addr + p->blocks * p->block_size - 1; |
b5beae5e OH |
451 | p->res.name = pdev->name; |
452 | p->res.flags = IORESOURCE_MEM; | |
453 | ||
454 | rc = papr_scm_nvdimm_init(p); | |
455 | if (rc) | |
456 | goto err2; | |
457 | ||
458 | platform_set_drvdata(pdev, p); | |
459 | ||
460 | return 0; | |
461 | ||
462 | err2: drc_pmem_unbind(p); | |
463 | err: kfree(p); | |
464 | return rc; | |
465 | } | |
466 | ||
467 | static int papr_scm_remove(struct platform_device *pdev) | |
468 | { | |
469 | struct papr_scm_priv *p = platform_get_drvdata(pdev); | |
470 | ||
471 | nvdimm_bus_unregister(p->bus); | |
472 | drc_pmem_unbind(p); | |
473 | kfree(p); | |
474 | ||
475 | return 0; | |
476 | } | |
477 | ||
478 | static const struct of_device_id papr_scm_match[] = { | |
479 | { .compatible = "ibm,pmemory" }, | |
480 | { }, | |
481 | }; | |
482 | ||
483 | static struct platform_driver papr_scm_driver = { | |
484 | .probe = papr_scm_probe, | |
485 | .remove = papr_scm_remove, | |
486 | .driver = { | |
487 | .name = "papr_scm", | |
488 | .owner = THIS_MODULE, | |
489 | .of_match_table = papr_scm_match, | |
490 | }, | |
491 | }; | |
492 | ||
493 | module_platform_driver(papr_scm_driver); | |
494 | MODULE_DEVICE_TABLE(of, papr_scm_match); | |
495 | MODULE_LICENSE("GPL"); | |
496 | MODULE_AUTHOR("IBM Corporation"); |