x86/insn: Directly assign x86_64 state in insn_init()
[linux-2.6-block.git] / drivers / cxl / core / region.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3 #include <linux/memregion.h>
4 #include <linux/genalloc.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/slab.h>
8 #include <linux/uuid.h>
9 #include <linux/sort.h>
10 #include <linux/idr.h>
11 #include <cxlmem.h>
12 #include <cxl.h>
13 #include "core.h"
14
15 /**
16  * DOC: cxl core region
17  *
18  * CXL Regions represent mapped memory capacity in system physical address
19  * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
20  * Memory ranges, Regions represent the active mapped capacity by the HDM
21  * Decoder Capability structures throughout the Host Bridges, Switches, and
22  * Endpoints in the topology.
23  *
24  * Region configuration has ordering constraints. UUID may be set at any time
25  * but is only visible for persistent regions.
26  * 1. Interleave granularity
27  * 2. Interleave size
28  * 3. Decoder targets
29  */
30
31 static struct cxl_region *to_cxl_region(struct device *dev);
32
33 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
34                          char *buf)
35 {
36         struct cxl_region *cxlr = to_cxl_region(dev);
37         struct cxl_region_params *p = &cxlr->params;
38         ssize_t rc;
39
40         rc = down_read_interruptible(&cxl_region_rwsem);
41         if (rc)
42                 return rc;
43         if (cxlr->mode != CXL_DECODER_PMEM)
44                 rc = sysfs_emit(buf, "\n");
45         else
46                 rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
47         up_read(&cxl_region_rwsem);
48
49         return rc;
50 }
51
52 static int is_dup(struct device *match, void *data)
53 {
54         struct cxl_region_params *p;
55         struct cxl_region *cxlr;
56         uuid_t *uuid = data;
57
58         if (!is_cxl_region(match))
59                 return 0;
60
61         lockdep_assert_held(&cxl_region_rwsem);
62         cxlr = to_cxl_region(match);
63         p = &cxlr->params;
64
65         if (uuid_equal(&p->uuid, uuid)) {
66                 dev_dbg(match, "already has uuid: %pUb\n", uuid);
67                 return -EBUSY;
68         }
69
70         return 0;
71 }
72
73 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
74                           const char *buf, size_t len)
75 {
76         struct cxl_region *cxlr = to_cxl_region(dev);
77         struct cxl_region_params *p = &cxlr->params;
78         uuid_t temp;
79         ssize_t rc;
80
81         if (len != UUID_STRING_LEN + 1)
82                 return -EINVAL;
83
84         rc = uuid_parse(buf, &temp);
85         if (rc)
86                 return rc;
87
88         if (uuid_is_null(&temp))
89                 return -EINVAL;
90
91         rc = down_write_killable(&cxl_region_rwsem);
92         if (rc)
93                 return rc;
94
95         if (uuid_equal(&p->uuid, &temp))
96                 goto out;
97
98         rc = -EBUSY;
99         if (p->state >= CXL_CONFIG_ACTIVE)
100                 goto out;
101
102         rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
103         if (rc < 0)
104                 goto out;
105
106         uuid_copy(&p->uuid, &temp);
107 out:
108         up_write(&cxl_region_rwsem);
109
110         if (rc)
111                 return rc;
112         return len;
113 }
114 static DEVICE_ATTR_RW(uuid);
115
116 static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
117                                           struct cxl_region *cxlr)
118 {
119         return xa_load(&port->regions, (unsigned long)cxlr);
120 }
121
122 static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
123 {
124         if (!cpu_cache_has_invalidate_memregion()) {
125                 if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
126                         dev_info_once(
127                                 &cxlr->dev,
128                                 "Bypassing cpu_cache_invalidate_memregion() for testing!\n");
129                         return 0;
130                 } else {
131                         dev_err(&cxlr->dev,
132                                 "Failed to synchronize CPU cache state\n");
133                         return -ENXIO;
134                 }
135         }
136
137         cpu_cache_invalidate_memregion(IORES_DESC_CXL);
138         return 0;
139 }
140
141 static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
142 {
143         struct cxl_region_params *p = &cxlr->params;
144         int i, rc = 0;
145
146         /*
147          * Before region teardown attempt to flush, and if the flush
148          * fails cancel the region teardown for data consistency
149          * concerns
150          */
151         rc = cxl_region_invalidate_memregion(cxlr);
152         if (rc)
153                 return rc;
154
155         for (i = count - 1; i >= 0; i--) {
156                 struct cxl_endpoint_decoder *cxled = p->targets[i];
157                 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
158                 struct cxl_port *iter = cxled_to_port(cxled);
159                 struct cxl_dev_state *cxlds = cxlmd->cxlds;
160                 struct cxl_ep *ep;
161
162                 if (cxlds->rcd)
163                         goto endpoint_reset;
164
165                 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
166                         iter = to_cxl_port(iter->dev.parent);
167
168                 for (ep = cxl_ep_load(iter, cxlmd); iter;
169                      iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
170                         struct cxl_region_ref *cxl_rr;
171                         struct cxl_decoder *cxld;
172
173                         cxl_rr = cxl_rr_load(iter, cxlr);
174                         cxld = cxl_rr->decoder;
175                         if (cxld->reset)
176                                 rc = cxld->reset(cxld);
177                         if (rc)
178                                 return rc;
179                         set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
180                 }
181
182 endpoint_reset:
183                 rc = cxled->cxld.reset(&cxled->cxld);
184                 if (rc)
185                         return rc;
186                 set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
187         }
188
189         /* all decoders associated with this region have been torn down */
190         clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
191
192         return 0;
193 }
194
195 static int commit_decoder(struct cxl_decoder *cxld)
196 {
197         struct cxl_switch_decoder *cxlsd = NULL;
198
199         if (cxld->commit)
200                 return cxld->commit(cxld);
201
202         if (is_switch_decoder(&cxld->dev))
203                 cxlsd = to_cxl_switch_decoder(&cxld->dev);
204
205         if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
206                           "->commit() is required\n"))
207                 return -ENXIO;
208         return 0;
209 }
210
211 static int cxl_region_decode_commit(struct cxl_region *cxlr)
212 {
213         struct cxl_region_params *p = &cxlr->params;
214         int i, rc = 0;
215
216         for (i = 0; i < p->nr_targets; i++) {
217                 struct cxl_endpoint_decoder *cxled = p->targets[i];
218                 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
219                 struct cxl_region_ref *cxl_rr;
220                 struct cxl_decoder *cxld;
221                 struct cxl_port *iter;
222                 struct cxl_ep *ep;
223
224                 /* commit bottom up */
225                 for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
226                      iter = to_cxl_port(iter->dev.parent)) {
227                         cxl_rr = cxl_rr_load(iter, cxlr);
228                         cxld = cxl_rr->decoder;
229                         rc = commit_decoder(cxld);
230                         if (rc)
231                                 break;
232                 }
233
234                 if (rc) {
235                         /* programming @iter failed, teardown */
236                         for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
237                              iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
238                                 cxl_rr = cxl_rr_load(iter, cxlr);
239                                 cxld = cxl_rr->decoder;
240                                 if (cxld->reset)
241                                         cxld->reset(cxld);
242                         }
243
244                         cxled->cxld.reset(&cxled->cxld);
245                         goto err;
246                 }
247         }
248
249         return 0;
250
251 err:
252         /* undo the targets that were successfully committed */
253         cxl_region_decode_reset(cxlr, i);
254         return rc;
255 }
256
257 static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
258                             const char *buf, size_t len)
259 {
260         struct cxl_region *cxlr = to_cxl_region(dev);
261         struct cxl_region_params *p = &cxlr->params;
262         bool commit;
263         ssize_t rc;
264
265         rc = kstrtobool(buf, &commit);
266         if (rc)
267                 return rc;
268
269         rc = down_write_killable(&cxl_region_rwsem);
270         if (rc)
271                 return rc;
272
273         /* Already in the requested state? */
274         if (commit && p->state >= CXL_CONFIG_COMMIT)
275                 goto out;
276         if (!commit && p->state < CXL_CONFIG_COMMIT)
277                 goto out;
278
279         /* Not ready to commit? */
280         if (commit && p->state < CXL_CONFIG_ACTIVE) {
281                 rc = -ENXIO;
282                 goto out;
283         }
284
285         /*
286          * Invalidate caches before region setup to drop any speculative
287          * consumption of this address space
288          */
289         rc = cxl_region_invalidate_memregion(cxlr);
290         if (rc)
291                 goto out;
292
293         if (commit) {
294                 rc = cxl_region_decode_commit(cxlr);
295                 if (rc == 0)
296                         p->state = CXL_CONFIG_COMMIT;
297         } else {
298                 p->state = CXL_CONFIG_RESET_PENDING;
299                 up_write(&cxl_region_rwsem);
300                 device_release_driver(&cxlr->dev);
301                 down_write(&cxl_region_rwsem);
302
303                 /*
304                  * The lock was dropped, so need to revalidate that the reset is
305                  * still pending.
306                  */
307                 if (p->state == CXL_CONFIG_RESET_PENDING) {
308                         rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
309                         /*
310                          * Revert to committed since there may still be active
311                          * decoders associated with this region, or move forward
312                          * to active to mark the reset successful
313                          */
314                         if (rc)
315                                 p->state = CXL_CONFIG_COMMIT;
316                         else
317                                 p->state = CXL_CONFIG_ACTIVE;
318                 }
319         }
320
321 out:
322         up_write(&cxl_region_rwsem);
323
324         if (rc)
325                 return rc;
326         return len;
327 }
328
329 static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
330                            char *buf)
331 {
332         struct cxl_region *cxlr = to_cxl_region(dev);
333         struct cxl_region_params *p = &cxlr->params;
334         ssize_t rc;
335
336         rc = down_read_interruptible(&cxl_region_rwsem);
337         if (rc)
338                 return rc;
339         rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
340         up_read(&cxl_region_rwsem);
341
342         return rc;
343 }
344 static DEVICE_ATTR_RW(commit);
345
346 static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
347                                   int n)
348 {
349         struct device *dev = kobj_to_dev(kobj);
350         struct cxl_region *cxlr = to_cxl_region(dev);
351
352         /*
353          * Support tooling that expects to find a 'uuid' attribute for all
354          * regions regardless of mode.
355          */
356         if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
357                 return 0444;
358         return a->mode;
359 }
360
361 static ssize_t interleave_ways_show(struct device *dev,
362                                     struct device_attribute *attr, char *buf)
363 {
364         struct cxl_region *cxlr = to_cxl_region(dev);
365         struct cxl_region_params *p = &cxlr->params;
366         ssize_t rc;
367
368         rc = down_read_interruptible(&cxl_region_rwsem);
369         if (rc)
370                 return rc;
371         rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
372         up_read(&cxl_region_rwsem);
373
374         return rc;
375 }
376
377 static const struct attribute_group *get_cxl_region_target_group(void);
378
379 static ssize_t interleave_ways_store(struct device *dev,
380                                      struct device_attribute *attr,
381                                      const char *buf, size_t len)
382 {
383         struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
384         struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
385         struct cxl_region *cxlr = to_cxl_region(dev);
386         struct cxl_region_params *p = &cxlr->params;
387         unsigned int val, save;
388         int rc;
389         u8 iw;
390
391         rc = kstrtouint(buf, 0, &val);
392         if (rc)
393                 return rc;
394
395         rc = ways_to_eiw(val, &iw);
396         if (rc)
397                 return rc;
398
399         /*
400          * Even for x3, x6, and x12 interleaves the region interleave must be a
401          * power of 2 multiple of the host bridge interleave.
402          */
403         if (!is_power_of_2(val / cxld->interleave_ways) ||
404             (val % cxld->interleave_ways)) {
405                 dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
406                 return -EINVAL;
407         }
408
409         rc = down_write_killable(&cxl_region_rwsem);
410         if (rc)
411                 return rc;
412         if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
413                 rc = -EBUSY;
414                 goto out;
415         }
416
417         save = p->interleave_ways;
418         p->interleave_ways = val;
419         rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
420         if (rc)
421                 p->interleave_ways = save;
422 out:
423         up_write(&cxl_region_rwsem);
424         if (rc)
425                 return rc;
426         return len;
427 }
428 static DEVICE_ATTR_RW(interleave_ways);
429
430 static ssize_t interleave_granularity_show(struct device *dev,
431                                            struct device_attribute *attr,
432                                            char *buf)
433 {
434         struct cxl_region *cxlr = to_cxl_region(dev);
435         struct cxl_region_params *p = &cxlr->params;
436         ssize_t rc;
437
438         rc = down_read_interruptible(&cxl_region_rwsem);
439         if (rc)
440                 return rc;
441         rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
442         up_read(&cxl_region_rwsem);
443
444         return rc;
445 }
446
447 static ssize_t interleave_granularity_store(struct device *dev,
448                                             struct device_attribute *attr,
449                                             const char *buf, size_t len)
450 {
451         struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
452         struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
453         struct cxl_region *cxlr = to_cxl_region(dev);
454         struct cxl_region_params *p = &cxlr->params;
455         int rc, val;
456         u16 ig;
457
458         rc = kstrtoint(buf, 0, &val);
459         if (rc)
460                 return rc;
461
462         rc = granularity_to_eig(val, &ig);
463         if (rc)
464                 return rc;
465
466         /*
467          * When the host-bridge is interleaved, disallow region granularity !=
468          * root granularity. Regions with a granularity less than the root
469          * interleave result in needing multiple endpoints to support a single
470          * slot in the interleave (possible to support in the future). Regions
471          * with a granularity greater than the root interleave result in invalid
472          * DPA translations (invalid to support).
473          */
474         if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
475                 return -EINVAL;
476
477         rc = down_write_killable(&cxl_region_rwsem);
478         if (rc)
479                 return rc;
480         if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
481                 rc = -EBUSY;
482                 goto out;
483         }
484
485         p->interleave_granularity = val;
486 out:
487         up_write(&cxl_region_rwsem);
488         if (rc)
489                 return rc;
490         return len;
491 }
492 static DEVICE_ATTR_RW(interleave_granularity);
493
494 static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
495                              char *buf)
496 {
497         struct cxl_region *cxlr = to_cxl_region(dev);
498         struct cxl_region_params *p = &cxlr->params;
499         u64 resource = -1ULL;
500         ssize_t rc;
501
502         rc = down_read_interruptible(&cxl_region_rwsem);
503         if (rc)
504                 return rc;
505         if (p->res)
506                 resource = p->res->start;
507         rc = sysfs_emit(buf, "%#llx\n", resource);
508         up_read(&cxl_region_rwsem);
509
510         return rc;
511 }
512 static DEVICE_ATTR_RO(resource);
513
514 static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
515                          char *buf)
516 {
517         struct cxl_region *cxlr = to_cxl_region(dev);
518
519         return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
520 }
521 static DEVICE_ATTR_RO(mode);
522
523 static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
524 {
525         struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
526         struct cxl_region_params *p = &cxlr->params;
527         struct resource *res;
528         u64 remainder = 0;
529
530         lockdep_assert_held_write(&cxl_region_rwsem);
531
532         /* Nothing to do... */
533         if (p->res && resource_size(p->res) == size)
534                 return 0;
535
536         /* To change size the old size must be freed first */
537         if (p->res)
538                 return -EBUSY;
539
540         if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
541                 return -EBUSY;
542
543         /* ways, granularity and uuid (if PMEM) need to be set before HPA */
544         if (!p->interleave_ways || !p->interleave_granularity ||
545             (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
546                 return -ENXIO;
547
548         div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
549         if (remainder)
550                 return -EINVAL;
551
552         res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
553                                     dev_name(&cxlr->dev));
554         if (IS_ERR(res)) {
555                 dev_dbg(&cxlr->dev,
556                         "HPA allocation error (%ld) for size:%pap in %s %pr\n",
557                         PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res);
558                 return PTR_ERR(res);
559         }
560
561         p->res = res;
562         p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
563
564         return 0;
565 }
566
567 static void cxl_region_iomem_release(struct cxl_region *cxlr)
568 {
569         struct cxl_region_params *p = &cxlr->params;
570
571         if (device_is_registered(&cxlr->dev))
572                 lockdep_assert_held_write(&cxl_region_rwsem);
573         if (p->res) {
574                 /*
575                  * Autodiscovered regions may not have been able to insert their
576                  * resource.
577                  */
578                 if (p->res->parent)
579                         remove_resource(p->res);
580                 kfree(p->res);
581                 p->res = NULL;
582         }
583 }
584
585 static int free_hpa(struct cxl_region *cxlr)
586 {
587         struct cxl_region_params *p = &cxlr->params;
588
589         lockdep_assert_held_write(&cxl_region_rwsem);
590
591         if (!p->res)
592                 return 0;
593
594         if (p->state >= CXL_CONFIG_ACTIVE)
595                 return -EBUSY;
596
597         cxl_region_iomem_release(cxlr);
598         p->state = CXL_CONFIG_IDLE;
599         return 0;
600 }
601
602 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
603                           const char *buf, size_t len)
604 {
605         struct cxl_region *cxlr = to_cxl_region(dev);
606         u64 val;
607         int rc;
608
609         rc = kstrtou64(buf, 0, &val);
610         if (rc)
611                 return rc;
612
613         rc = down_write_killable(&cxl_region_rwsem);
614         if (rc)
615                 return rc;
616
617         if (val)
618                 rc = alloc_hpa(cxlr, val);
619         else
620                 rc = free_hpa(cxlr);
621         up_write(&cxl_region_rwsem);
622
623         if (rc)
624                 return rc;
625
626         return len;
627 }
628
629 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
630                          char *buf)
631 {
632         struct cxl_region *cxlr = to_cxl_region(dev);
633         struct cxl_region_params *p = &cxlr->params;
634         u64 size = 0;
635         ssize_t rc;
636
637         rc = down_read_interruptible(&cxl_region_rwsem);
638         if (rc)
639                 return rc;
640         if (p->res)
641                 size = resource_size(p->res);
642         rc = sysfs_emit(buf, "%#llx\n", size);
643         up_read(&cxl_region_rwsem);
644
645         return rc;
646 }
647 static DEVICE_ATTR_RW(size);
648
649 static struct attribute *cxl_region_attrs[] = {
650         &dev_attr_uuid.attr,
651         &dev_attr_commit.attr,
652         &dev_attr_interleave_ways.attr,
653         &dev_attr_interleave_granularity.attr,
654         &dev_attr_resource.attr,
655         &dev_attr_size.attr,
656         &dev_attr_mode.attr,
657         NULL,
658 };
659
660 static const struct attribute_group cxl_region_group = {
661         .attrs = cxl_region_attrs,
662         .is_visible = cxl_region_visible,
663 };
664
665 static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
666 {
667         struct cxl_region_params *p = &cxlr->params;
668         struct cxl_endpoint_decoder *cxled;
669         int rc;
670
671         rc = down_read_interruptible(&cxl_region_rwsem);
672         if (rc)
673                 return rc;
674
675         if (pos >= p->interleave_ways) {
676                 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
677                         p->interleave_ways);
678                 rc = -ENXIO;
679                 goto out;
680         }
681
682         cxled = p->targets[pos];
683         if (!cxled)
684                 rc = sysfs_emit(buf, "\n");
685         else
686                 rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
687 out:
688         up_read(&cxl_region_rwsem);
689
690         return rc;
691 }
692
693 static int match_free_decoder(struct device *dev, void *data)
694 {
695         struct cxl_decoder *cxld;
696         int *id = data;
697
698         if (!is_switch_decoder(dev))
699                 return 0;
700
701         cxld = to_cxl_decoder(dev);
702
703         /* enforce ordered allocation */
704         if (cxld->id != *id)
705                 return 0;
706
707         if (!cxld->region)
708                 return 1;
709
710         (*id)++;
711
712         return 0;
713 }
714
715 static int match_auto_decoder(struct device *dev, void *data)
716 {
717         struct cxl_region_params *p = data;
718         struct cxl_decoder *cxld;
719         struct range *r;
720
721         if (!is_switch_decoder(dev))
722                 return 0;
723
724         cxld = to_cxl_decoder(dev);
725         r = &cxld->hpa_range;
726
727         if (p->res && p->res->start == r->start && p->res->end == r->end)
728                 return 1;
729
730         return 0;
731 }
732
733 static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
734                                                    struct cxl_region *cxlr)
735 {
736         struct device *dev;
737         int id = 0;
738
739         if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
740                 dev = device_find_child(&port->dev, &cxlr->params,
741                                         match_auto_decoder);
742         else
743                 dev = device_find_child(&port->dev, &id, match_free_decoder);
744         if (!dev)
745                 return NULL;
746         /*
747          * This decoder is pinned registered as long as the endpoint decoder is
748          * registered, and endpoint decoder unregistration holds the
749          * cxl_region_rwsem over unregister events, so no need to hold on to
750          * this extra reference.
751          */
752         put_device(dev);
753         return to_cxl_decoder(dev);
754 }
755
756 static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
757                                                struct cxl_region *cxlr)
758 {
759         struct cxl_region_params *p = &cxlr->params;
760         struct cxl_region_ref *cxl_rr, *iter;
761         unsigned long index;
762         int rc;
763
764         xa_for_each(&port->regions, index, iter) {
765                 struct cxl_region_params *ip = &iter->region->params;
766
767                 if (!ip->res)
768                         continue;
769
770                 if (ip->res->start > p->res->start) {
771                         dev_dbg(&cxlr->dev,
772                                 "%s: HPA order violation %s:%pr vs %pr\n",
773                                 dev_name(&port->dev),
774                                 dev_name(&iter->region->dev), ip->res, p->res);
775                         return ERR_PTR(-EBUSY);
776                 }
777         }
778
779         cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
780         if (!cxl_rr)
781                 return ERR_PTR(-ENOMEM);
782         cxl_rr->port = port;
783         cxl_rr->region = cxlr;
784         cxl_rr->nr_targets = 1;
785         xa_init(&cxl_rr->endpoints);
786
787         rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
788         if (rc) {
789                 dev_dbg(&cxlr->dev,
790                         "%s: failed to track region reference: %d\n",
791                         dev_name(&port->dev), rc);
792                 kfree(cxl_rr);
793                 return ERR_PTR(rc);
794         }
795
796         return cxl_rr;
797 }
798
799 static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
800 {
801         struct cxl_region *cxlr = cxl_rr->region;
802         struct cxl_decoder *cxld = cxl_rr->decoder;
803
804         if (!cxld)
805                 return;
806
807         dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
808         if (cxld->region == cxlr) {
809                 cxld->region = NULL;
810                 put_device(&cxlr->dev);
811         }
812 }
813
814 static void free_region_ref(struct cxl_region_ref *cxl_rr)
815 {
816         struct cxl_port *port = cxl_rr->port;
817         struct cxl_region *cxlr = cxl_rr->region;
818
819         cxl_rr_free_decoder(cxl_rr);
820         xa_erase(&port->regions, (unsigned long)cxlr);
821         xa_destroy(&cxl_rr->endpoints);
822         kfree(cxl_rr);
823 }
824
825 static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
826                          struct cxl_endpoint_decoder *cxled)
827 {
828         int rc;
829         struct cxl_port *port = cxl_rr->port;
830         struct cxl_region *cxlr = cxl_rr->region;
831         struct cxl_decoder *cxld = cxl_rr->decoder;
832         struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
833
834         if (ep) {
835                 rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
836                                GFP_KERNEL);
837                 if (rc)
838                         return rc;
839         }
840         cxl_rr->nr_eps++;
841
842         if (!cxld->region) {
843                 cxld->region = cxlr;
844                 get_device(&cxlr->dev);
845         }
846
847         return 0;
848 }
849
850 static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
851                                 struct cxl_endpoint_decoder *cxled,
852                                 struct cxl_region_ref *cxl_rr)
853 {
854         struct cxl_decoder *cxld;
855
856         if (port == cxled_to_port(cxled))
857                 cxld = &cxled->cxld;
858         else
859                 cxld = cxl_region_find_decoder(port, cxlr);
860         if (!cxld) {
861                 dev_dbg(&cxlr->dev, "%s: no decoder available\n",
862                         dev_name(&port->dev));
863                 return -EBUSY;
864         }
865
866         if (cxld->region) {
867                 dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
868                         dev_name(&port->dev), dev_name(&cxld->dev),
869                         dev_name(&cxld->region->dev));
870                 return -EBUSY;
871         }
872
873         /*
874          * Endpoints should already match the region type, but backstop that
875          * assumption with an assertion. Switch-decoders change mapping-type
876          * based on what is mapped when they are assigned to a region.
877          */
878         dev_WARN_ONCE(&cxlr->dev,
879                       port == cxled_to_port(cxled) &&
880                               cxld->target_type != cxlr->type,
881                       "%s:%s mismatch decoder type %d -> %d\n",
882                       dev_name(&cxled_to_memdev(cxled)->dev),
883                       dev_name(&cxld->dev), cxld->target_type, cxlr->type);
884         cxld->target_type = cxlr->type;
885         cxl_rr->decoder = cxld;
886         return 0;
887 }
888
889 /**
890  * cxl_port_attach_region() - track a region's interest in a port by endpoint
891  * @port: port to add a new region reference 'struct cxl_region_ref'
892  * @cxlr: region to attach to @port
893  * @cxled: endpoint decoder used to create or further pin a region reference
894  * @pos: interleave position of @cxled in @cxlr
895  *
896  * The attach event is an opportunity to validate CXL decode setup
897  * constraints and record metadata needed for programming HDM decoders,
898  * in particular decoder target lists.
899  *
900  * The steps are:
901  *
902  * - validate that there are no other regions with a higher HPA already
903  *   associated with @port
904  * - establish a region reference if one is not already present
905  *
906  *   - additionally allocate a decoder instance that will host @cxlr on
907  *     @port
908  *
909  * - pin the region reference by the endpoint
910  * - account for how many entries in @port's target list are needed to
911  *   cover all of the added endpoints.
912  */
913 static int cxl_port_attach_region(struct cxl_port *port,
914                                   struct cxl_region *cxlr,
915                                   struct cxl_endpoint_decoder *cxled, int pos)
916 {
917         struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
918         struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
919         struct cxl_region_ref *cxl_rr;
920         bool nr_targets_inc = false;
921         struct cxl_decoder *cxld;
922         unsigned long index;
923         int rc = -EBUSY;
924
925         lockdep_assert_held_write(&cxl_region_rwsem);
926
927         cxl_rr = cxl_rr_load(port, cxlr);
928         if (cxl_rr) {
929                 struct cxl_ep *ep_iter;
930                 int found = 0;
931
932                 /*
933                  * Walk the existing endpoints that have been attached to
934                  * @cxlr at @port and see if they share the same 'next' port
935                  * in the downstream direction. I.e. endpoints that share common
936                  * upstream switch.
937                  */
938                 xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
939                         if (ep_iter == ep)
940                                 continue;
941                         if (ep_iter->next == ep->next) {
942                                 found++;
943                                 break;
944                         }
945                 }
946
947                 /*
948                  * New target port, or @port is an endpoint port that always
949                  * accounts its own local decode as a target.
950                  */
951                 if (!found || !ep->next) {
952                         cxl_rr->nr_targets++;
953                         nr_targets_inc = true;
954                 }
955         } else {
956                 cxl_rr = alloc_region_ref(port, cxlr);
957                 if (IS_ERR(cxl_rr)) {
958                         dev_dbg(&cxlr->dev,
959                                 "%s: failed to allocate region reference\n",
960                                 dev_name(&port->dev));
961                         return PTR_ERR(cxl_rr);
962                 }
963                 nr_targets_inc = true;
964
965                 rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
966                 if (rc)
967                         goto out_erase;
968         }
969         cxld = cxl_rr->decoder;
970
971         rc = cxl_rr_ep_add(cxl_rr, cxled);
972         if (rc) {
973                 dev_dbg(&cxlr->dev,
974                         "%s: failed to track endpoint %s:%s reference\n",
975                         dev_name(&port->dev), dev_name(&cxlmd->dev),
976                         dev_name(&cxld->dev));
977                 goto out_erase;
978         }
979
980         dev_dbg(&cxlr->dev,
981                 "%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
982                 dev_name(port->uport_dev), dev_name(&port->dev),
983                 dev_name(&cxld->dev), dev_name(&cxlmd->dev),
984                 dev_name(&cxled->cxld.dev), pos,
985                 ep ? ep->next ? dev_name(ep->next->uport_dev) :
986                                       dev_name(&cxlmd->dev) :
987                            "none",
988                 cxl_rr->nr_eps, cxl_rr->nr_targets);
989
990         return 0;
991 out_erase:
992         if (nr_targets_inc)
993                 cxl_rr->nr_targets--;
994         if (cxl_rr->nr_eps == 0)
995                 free_region_ref(cxl_rr);
996         return rc;
997 }
998
999 static void cxl_port_detach_region(struct cxl_port *port,
1000                                    struct cxl_region *cxlr,
1001                                    struct cxl_endpoint_decoder *cxled)
1002 {
1003         struct cxl_region_ref *cxl_rr;
1004         struct cxl_ep *ep = NULL;
1005
1006         lockdep_assert_held_write(&cxl_region_rwsem);
1007
1008         cxl_rr = cxl_rr_load(port, cxlr);
1009         if (!cxl_rr)
1010                 return;
1011
1012         /*
1013          * Endpoint ports do not carry cxl_ep references, and they
1014          * never target more than one endpoint by definition
1015          */
1016         if (cxl_rr->decoder == &cxled->cxld)
1017                 cxl_rr->nr_eps--;
1018         else
1019                 ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
1020         if (ep) {
1021                 struct cxl_ep *ep_iter;
1022                 unsigned long index;
1023                 int found = 0;
1024
1025                 cxl_rr->nr_eps--;
1026                 xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1027                         if (ep_iter->next == ep->next) {
1028                                 found++;
1029                                 break;
1030                         }
1031                 }
1032                 if (!found)
1033                         cxl_rr->nr_targets--;
1034         }
1035
1036         if (cxl_rr->nr_eps == 0)
1037                 free_region_ref(cxl_rr);
1038 }
1039
1040 static int check_last_peer(struct cxl_endpoint_decoder *cxled,
1041                            struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
1042                            int distance)
1043 {
1044         struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1045         struct cxl_region *cxlr = cxl_rr->region;
1046         struct cxl_region_params *p = &cxlr->params;
1047         struct cxl_endpoint_decoder *cxled_peer;
1048         struct cxl_port *port = cxl_rr->port;
1049         struct cxl_memdev *cxlmd_peer;
1050         struct cxl_ep *ep_peer;
1051         int pos = cxled->pos;
1052
1053         /*
1054          * If this position wants to share a dport with the last endpoint mapped
1055          * then that endpoint, at index 'position - distance', must also be
1056          * mapped by this dport.
1057          */
1058         if (pos < distance) {
1059                 dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
1060                         dev_name(port->uport_dev), dev_name(&port->dev),
1061                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1062                 return -ENXIO;
1063         }
1064         cxled_peer = p->targets[pos - distance];
1065         cxlmd_peer = cxled_to_memdev(cxled_peer);
1066         ep_peer = cxl_ep_load(port, cxlmd_peer);
1067         if (ep->dport != ep_peer->dport) {
1068                 dev_dbg(&cxlr->dev,
1069                         "%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
1070                         dev_name(port->uport_dev), dev_name(&port->dev),
1071                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
1072                         dev_name(&cxlmd_peer->dev),
1073                         dev_name(&cxled_peer->cxld.dev));
1074                 return -ENXIO;
1075         }
1076
1077         return 0;
1078 }
1079
1080 static int cxl_port_setup_targets(struct cxl_port *port,
1081                                   struct cxl_region *cxlr,
1082                                   struct cxl_endpoint_decoder *cxled)
1083 {
1084         struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1085         int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
1086         struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
1087         struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1088         struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1089         struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1090         struct cxl_region_params *p = &cxlr->params;
1091         struct cxl_decoder *cxld = cxl_rr->decoder;
1092         struct cxl_switch_decoder *cxlsd;
1093         u16 eig, peig;
1094         u8 eiw, peiw;
1095
1096         /*
1097          * While root level decoders support x3, x6, x12, switch level
1098          * decoders only support powers of 2 up to x16.
1099          */
1100         if (!is_power_of_2(cxl_rr->nr_targets)) {
1101                 dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
1102                         dev_name(port->uport_dev), dev_name(&port->dev),
1103                         cxl_rr->nr_targets);
1104                 return -EINVAL;
1105         }
1106
1107         cxlsd = to_cxl_switch_decoder(&cxld->dev);
1108         if (cxl_rr->nr_targets_set) {
1109                 int i, distance;
1110
1111                 /*
1112                  * Passthrough decoders impose no distance requirements between
1113                  * peers
1114                  */
1115                 if (cxl_rr->nr_targets == 1)
1116                         distance = 0;
1117                 else
1118                         distance = p->nr_targets / cxl_rr->nr_targets;
1119                 for (i = 0; i < cxl_rr->nr_targets_set; i++)
1120                         if (ep->dport == cxlsd->target[i]) {
1121                                 rc = check_last_peer(cxled, ep, cxl_rr,
1122                                                      distance);
1123                                 if (rc)
1124                                         return rc;
1125                                 goto out_target_set;
1126                         }
1127                 goto add_target;
1128         }
1129
1130         if (is_cxl_root(parent_port)) {
1131                 /*
1132                  * Root decoder IG is always set to value in CFMWS which
1133                  * may be different than this region's IG.  We can use the
1134                  * region's IG here since interleave_granularity_store()
1135                  * does not allow interleaved host-bridges with
1136                  * root IG != region IG.
1137                  */
1138                 parent_ig = p->interleave_granularity;
1139                 parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1140                 /*
1141                  * For purposes of address bit routing, use power-of-2 math for
1142                  * switch ports.
1143                  */
1144                 if (!is_power_of_2(parent_iw))
1145                         parent_iw /= 3;
1146         } else {
1147                 struct cxl_region_ref *parent_rr;
1148                 struct cxl_decoder *parent_cxld;
1149
1150                 parent_rr = cxl_rr_load(parent_port, cxlr);
1151                 parent_cxld = parent_rr->decoder;
1152                 parent_ig = parent_cxld->interleave_granularity;
1153                 parent_iw = parent_cxld->interleave_ways;
1154         }
1155
1156         rc = granularity_to_eig(parent_ig, &peig);
1157         if (rc) {
1158                 dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1159                         dev_name(parent_port->uport_dev),
1160                         dev_name(&parent_port->dev), parent_ig);
1161                 return rc;
1162         }
1163
1164         rc = ways_to_eiw(parent_iw, &peiw);
1165         if (rc) {
1166                 dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1167                         dev_name(parent_port->uport_dev),
1168                         dev_name(&parent_port->dev), parent_iw);
1169                 return rc;
1170         }
1171
1172         iw = cxl_rr->nr_targets;
1173         rc = ways_to_eiw(iw, &eiw);
1174         if (rc) {
1175                 dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1176                         dev_name(port->uport_dev), dev_name(&port->dev), iw);
1177                 return rc;
1178         }
1179
1180         /*
1181          * Interleave granularity is a multiple of @parent_port granularity.
1182          * Multiplier is the parent port interleave ways.
1183          */
1184         rc = granularity_to_eig(parent_ig * parent_iw, &eig);
1185         if (rc) {
1186                 dev_dbg(&cxlr->dev,
1187                         "%s: invalid granularity calculation (%d * %d)\n",
1188                         dev_name(&parent_port->dev), parent_ig, parent_iw);
1189                 return rc;
1190         }
1191
1192         rc = eig_to_granularity(eig, &ig);
1193         if (rc) {
1194                 dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1195                         dev_name(port->uport_dev), dev_name(&port->dev),
1196                         256 << eig);
1197                 return rc;
1198         }
1199
1200         if (iw > 8 || iw > cxlsd->nr_targets) {
1201                 dev_dbg(&cxlr->dev,
1202                         "%s:%s:%s: ways: %d overflows targets: %d\n",
1203                         dev_name(port->uport_dev), dev_name(&port->dev),
1204                         dev_name(&cxld->dev), iw, cxlsd->nr_targets);
1205                 return -ENXIO;
1206         }
1207
1208         if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1209                 if (cxld->interleave_ways != iw ||
1210                     cxld->interleave_granularity != ig ||
1211                     cxld->hpa_range.start != p->res->start ||
1212                     cxld->hpa_range.end != p->res->end ||
1213                     ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
1214                         dev_err(&cxlr->dev,
1215                                 "%s:%s %s expected iw: %d ig: %d %pr\n",
1216                                 dev_name(port->uport_dev), dev_name(&port->dev),
1217                                 __func__, iw, ig, p->res);
1218                         dev_err(&cxlr->dev,
1219                                 "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
1220                                 dev_name(port->uport_dev), dev_name(&port->dev),
1221                                 __func__, cxld->interleave_ways,
1222                                 cxld->interleave_granularity,
1223                                 (cxld->flags & CXL_DECODER_F_ENABLE) ?
1224                                         "enabled" :
1225                                         "disabled",
1226                                 cxld->hpa_range.start, cxld->hpa_range.end);
1227                         return -ENXIO;
1228                 }
1229         } else {
1230                 cxld->interleave_ways = iw;
1231                 cxld->interleave_granularity = ig;
1232                 cxld->hpa_range = (struct range) {
1233                         .start = p->res->start,
1234                         .end = p->res->end,
1235                 };
1236         }
1237         dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport_dev),
1238                 dev_name(&port->dev), iw, ig);
1239 add_target:
1240         if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1241                 dev_dbg(&cxlr->dev,
1242                         "%s:%s: targets full trying to add %s:%s at %d\n",
1243                         dev_name(port->uport_dev), dev_name(&port->dev),
1244                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1245                 return -ENXIO;
1246         }
1247         if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1248                 if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
1249                         dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
1250                                 dev_name(port->uport_dev), dev_name(&port->dev),
1251                                 dev_name(&cxlsd->cxld.dev),
1252                                 dev_name(ep->dport->dport_dev),
1253                                 cxl_rr->nr_targets_set);
1254                         return -ENXIO;
1255                 }
1256         } else
1257                 cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1258         inc = 1;
1259 out_target_set:
1260         cxl_rr->nr_targets_set += inc;
1261         dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1262                 dev_name(port->uport_dev), dev_name(&port->dev),
1263                 cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),
1264                 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1265
1266         return 0;
1267 }
1268
1269 static void cxl_port_reset_targets(struct cxl_port *port,
1270                                    struct cxl_region *cxlr)
1271 {
1272         struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1273         struct cxl_decoder *cxld;
1274
1275         /*
1276          * After the last endpoint has been detached the entire cxl_rr may now
1277          * be gone.
1278          */
1279         if (!cxl_rr)
1280                 return;
1281         cxl_rr->nr_targets_set = 0;
1282
1283         cxld = cxl_rr->decoder;
1284         cxld->hpa_range = (struct range) {
1285                 .start = 0,
1286                 .end = -1,
1287         };
1288 }
1289
1290 static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1291 {
1292         struct cxl_region_params *p = &cxlr->params;
1293         struct cxl_endpoint_decoder *cxled;
1294         struct cxl_dev_state *cxlds;
1295         struct cxl_memdev *cxlmd;
1296         struct cxl_port *iter;
1297         struct cxl_ep *ep;
1298         int i;
1299
1300         /*
1301          * In the auto-discovery case skip automatic teardown since the
1302          * address space is already active
1303          */
1304         if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
1305                 return;
1306
1307         for (i = 0; i < p->nr_targets; i++) {
1308                 cxled = p->targets[i];
1309                 cxlmd = cxled_to_memdev(cxled);
1310                 cxlds = cxlmd->cxlds;
1311
1312                 if (cxlds->rcd)
1313                         continue;
1314
1315                 iter = cxled_to_port(cxled);
1316                 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1317                         iter = to_cxl_port(iter->dev.parent);
1318
1319                 for (ep = cxl_ep_load(iter, cxlmd); iter;
1320                      iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1321                         cxl_port_reset_targets(iter, cxlr);
1322         }
1323 }
1324
1325 static int cxl_region_setup_targets(struct cxl_region *cxlr)
1326 {
1327         struct cxl_region_params *p = &cxlr->params;
1328         struct cxl_endpoint_decoder *cxled;
1329         struct cxl_dev_state *cxlds;
1330         int i, rc, rch = 0, vh = 0;
1331         struct cxl_memdev *cxlmd;
1332         struct cxl_port *iter;
1333         struct cxl_ep *ep;
1334
1335         for (i = 0; i < p->nr_targets; i++) {
1336                 cxled = p->targets[i];
1337                 cxlmd = cxled_to_memdev(cxled);
1338                 cxlds = cxlmd->cxlds;
1339
1340                 /* validate that all targets agree on topology */
1341                 if (!cxlds->rcd) {
1342                         vh++;
1343                 } else {
1344                         rch++;
1345                         continue;
1346                 }
1347
1348                 iter = cxled_to_port(cxled);
1349                 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1350                         iter = to_cxl_port(iter->dev.parent);
1351
1352                 /*
1353                  * Descend the topology tree programming / validating
1354                  * targets while looking for conflicts.
1355                  */
1356                 for (ep = cxl_ep_load(iter, cxlmd); iter;
1357                      iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1358                         rc = cxl_port_setup_targets(iter, cxlr, cxled);
1359                         if (rc) {
1360                                 cxl_region_teardown_targets(cxlr);
1361                                 return rc;
1362                         }
1363                 }
1364         }
1365
1366         if (rch && vh) {
1367                 dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
1368                 cxl_region_teardown_targets(cxlr);
1369                 return -ENXIO;
1370         }
1371
1372         return 0;
1373 }
1374
1375 static int cxl_region_validate_position(struct cxl_region *cxlr,
1376                                         struct cxl_endpoint_decoder *cxled,
1377                                         int pos)
1378 {
1379         struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1380         struct cxl_region_params *p = &cxlr->params;
1381         int i;
1382
1383         if (pos < 0 || pos >= p->interleave_ways) {
1384                 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1385                         p->interleave_ways);
1386                 return -ENXIO;
1387         }
1388
1389         if (p->targets[pos] == cxled)
1390                 return 0;
1391
1392         if (p->targets[pos]) {
1393                 struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1394                 struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1395
1396                 dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1397                         pos, dev_name(&cxlmd_target->dev),
1398                         dev_name(&cxled_target->cxld.dev));
1399                 return -EBUSY;
1400         }
1401
1402         for (i = 0; i < p->interleave_ways; i++) {
1403                 struct cxl_endpoint_decoder *cxled_target;
1404                 struct cxl_memdev *cxlmd_target;
1405
1406                 cxled_target = p->targets[i];
1407                 if (!cxled_target)
1408                         continue;
1409
1410                 cxlmd_target = cxled_to_memdev(cxled_target);
1411                 if (cxlmd_target == cxlmd) {
1412                         dev_dbg(&cxlr->dev,
1413                                 "%s already specified at position %d via: %s\n",
1414                                 dev_name(&cxlmd->dev), pos,
1415                                 dev_name(&cxled_target->cxld.dev));
1416                         return -EBUSY;
1417                 }
1418         }
1419
1420         return 0;
1421 }
1422
1423 static int cxl_region_attach_position(struct cxl_region *cxlr,
1424                                       struct cxl_root_decoder *cxlrd,
1425                                       struct cxl_endpoint_decoder *cxled,
1426                                       const struct cxl_dport *dport, int pos)
1427 {
1428         struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1429         struct cxl_port *iter;
1430         int rc;
1431
1432         if (cxlrd->calc_hb(cxlrd, pos) != dport) {
1433                 dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1434                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1435                         dev_name(&cxlrd->cxlsd.cxld.dev));
1436                 return -ENXIO;
1437         }
1438
1439         for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1440              iter = to_cxl_port(iter->dev.parent)) {
1441                 rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1442                 if (rc)
1443                         goto err;
1444         }
1445
1446         return 0;
1447
1448 err:
1449         for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1450              iter = to_cxl_port(iter->dev.parent))
1451                 cxl_port_detach_region(iter, cxlr, cxled);
1452         return rc;
1453 }
1454
1455 static int cxl_region_attach_auto(struct cxl_region *cxlr,
1456                                   struct cxl_endpoint_decoder *cxled, int pos)
1457 {
1458         struct cxl_region_params *p = &cxlr->params;
1459
1460         if (cxled->state != CXL_DECODER_STATE_AUTO) {
1461                 dev_err(&cxlr->dev,
1462                         "%s: unable to add decoder to autodetected region\n",
1463                         dev_name(&cxled->cxld.dev));
1464                 return -EINVAL;
1465         }
1466
1467         if (pos >= 0) {
1468                 dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
1469                         dev_name(&cxled->cxld.dev), pos);
1470                 return -EINVAL;
1471         }
1472
1473         if (p->nr_targets >= p->interleave_ways) {
1474                 dev_err(&cxlr->dev, "%s: no more target slots available\n",
1475                         dev_name(&cxled->cxld.dev));
1476                 return -ENXIO;
1477         }
1478
1479         /*
1480          * Temporarily record the endpoint decoder into the target array. Yes,
1481          * this means that userspace can view devices in the wrong position
1482          * before the region activates, and must be careful to understand when
1483          * it might be racing region autodiscovery.
1484          */
1485         pos = p->nr_targets;
1486         p->targets[pos] = cxled;
1487         cxled->pos = pos;
1488         p->nr_targets++;
1489
1490         return 0;
1491 }
1492
1493 static int cmp_interleave_pos(const void *a, const void *b)
1494 {
1495         struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
1496         struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
1497
1498         return cxled_a->pos - cxled_b->pos;
1499 }
1500
1501 static struct cxl_port *next_port(struct cxl_port *port)
1502 {
1503         if (!port->parent_dport)
1504                 return NULL;
1505         return port->parent_dport->port;
1506 }
1507
1508 static int match_switch_decoder_by_range(struct device *dev, void *data)
1509 {
1510         struct cxl_switch_decoder *cxlsd;
1511         struct range *r1, *r2 = data;
1512
1513         if (!is_switch_decoder(dev))
1514                 return 0;
1515
1516         cxlsd = to_cxl_switch_decoder(dev);
1517         r1 = &cxlsd->cxld.hpa_range;
1518
1519         if (is_root_decoder(dev))
1520                 return range_contains(r1, r2);
1521         return (r1->start == r2->start && r1->end == r2->end);
1522 }
1523
1524 static int find_pos_and_ways(struct cxl_port *port, struct range *range,
1525                              int *pos, int *ways)
1526 {
1527         struct cxl_switch_decoder *cxlsd;
1528         struct cxl_port *parent;
1529         struct device *dev;
1530         int rc = -ENXIO;
1531
1532         parent = next_port(port);
1533         if (!parent)
1534                 return rc;
1535
1536         dev = device_find_child(&parent->dev, range,
1537                                 match_switch_decoder_by_range);
1538         if (!dev) {
1539                 dev_err(port->uport_dev,
1540                         "failed to find decoder mapping %#llx-%#llx\n",
1541                         range->start, range->end);
1542                 return rc;
1543         }
1544         cxlsd = to_cxl_switch_decoder(dev);
1545         *ways = cxlsd->cxld.interleave_ways;
1546
1547         for (int i = 0; i < *ways; i++) {
1548                 if (cxlsd->target[i] == port->parent_dport) {
1549                         *pos = i;
1550                         rc = 0;
1551                         break;
1552                 }
1553         }
1554         put_device(dev);
1555
1556         return rc;
1557 }
1558
1559 /**
1560  * cxl_calc_interleave_pos() - calculate an endpoint position in a region
1561  * @cxled: endpoint decoder member of given region
1562  *
1563  * The endpoint position is calculated by traversing the topology from
1564  * the endpoint to the root decoder and iteratively applying this
1565  * calculation:
1566  *
1567  *    position = position * parent_ways + parent_pos;
1568  *
1569  * ...where @position is inferred from switch and root decoder target lists.
1570  *
1571  * Return: position >= 0 on success
1572  *         -ENXIO on failure
1573  */
1574 static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
1575 {
1576         struct cxl_port *iter, *port = cxled_to_port(cxled);
1577         struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1578         struct range *range = &cxled->cxld.hpa_range;
1579         int parent_ways = 0, parent_pos = 0, pos = 0;
1580         int rc;
1581
1582         /*
1583          * Example: the expected interleave order of the 4-way region shown
1584          * below is: mem0, mem2, mem1, mem3
1585          *
1586          *                root_port
1587          *                 /      \
1588          *      host_bridge_0    host_bridge_1
1589          *        |    |           |    |
1590          *       mem0 mem1        mem2 mem3
1591          *
1592          * In the example the calculator will iterate twice. The first iteration
1593          * uses the mem position in the host-bridge and the ways of the host-
1594          * bridge to generate the first, or local, position. The second
1595          * iteration uses the host-bridge position in the root_port and the ways
1596          * of the root_port to refine the position.
1597          *
1598          * A trace of the calculation per endpoint looks like this:
1599          * mem0: pos = 0 * 2 + 0    mem2: pos = 0 * 2 + 0
1600          *       pos = 0 * 2 + 0          pos = 0 * 2 + 1
1601          *       pos: 0                   pos: 1
1602          *
1603          * mem1: pos = 0 * 2 + 1    mem3: pos = 0 * 2 + 1
1604          *       pos = 1 * 2 + 0          pos = 1 * 2 + 1
1605          *       pos: 2                   pos = 3
1606          *
1607          * Note that while this example is simple, the method applies to more
1608          * complex topologies, including those with switches.
1609          */
1610
1611         /* Iterate from endpoint to root_port refining the position */
1612         for (iter = port; iter; iter = next_port(iter)) {
1613                 if (is_cxl_root(iter))
1614                         break;
1615
1616                 rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways);
1617                 if (rc)
1618                         return rc;
1619
1620                 pos = pos * parent_ways + parent_pos;
1621         }
1622
1623         dev_dbg(&cxlmd->dev,
1624                 "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
1625                 dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
1626                 dev_name(&port->dev), range->start, range->end, pos);
1627
1628         return pos;
1629 }
1630
1631 static int cxl_region_sort_targets(struct cxl_region *cxlr)
1632 {
1633         struct cxl_region_params *p = &cxlr->params;
1634         int i, rc = 0;
1635
1636         for (i = 0; i < p->nr_targets; i++) {
1637                 struct cxl_endpoint_decoder *cxled = p->targets[i];
1638
1639                 cxled->pos = cxl_calc_interleave_pos(cxled);
1640                 /*
1641                  * Record that sorting failed, but still continue to calc
1642                  * cxled->pos so that follow-on code paths can reliably
1643                  * do p->targets[cxled->pos] to self-reference their entry.
1644                  */
1645                 if (cxled->pos < 0)
1646                         rc = -ENXIO;
1647         }
1648         /* Keep the cxlr target list in interleave position order */
1649         sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
1650              cmp_interleave_pos, NULL);
1651
1652         dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
1653         return rc;
1654 }
1655
1656 static int cxl_region_attach(struct cxl_region *cxlr,
1657                              struct cxl_endpoint_decoder *cxled, int pos)
1658 {
1659         struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1660         struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1661         struct cxl_region_params *p = &cxlr->params;
1662         struct cxl_port *ep_port, *root_port;
1663         struct cxl_dport *dport;
1664         int rc = -ENXIO;
1665
1666         if (cxled->mode != cxlr->mode) {
1667                 dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
1668                         dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
1669                 return -EINVAL;
1670         }
1671
1672         if (cxled->mode == CXL_DECODER_DEAD) {
1673                 dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
1674                 return -ENODEV;
1675         }
1676
1677         /* all full of members, or interleave config not established? */
1678         if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
1679                 dev_dbg(&cxlr->dev, "region already active\n");
1680                 return -EBUSY;
1681         } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
1682                 dev_dbg(&cxlr->dev, "interleave config missing\n");
1683                 return -ENXIO;
1684         }
1685
1686         if (p->nr_targets >= p->interleave_ways) {
1687                 dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
1688                         p->nr_targets);
1689                 return -EINVAL;
1690         }
1691
1692         ep_port = cxled_to_port(cxled);
1693         root_port = cxlrd_to_port(cxlrd);
1694         dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
1695         if (!dport) {
1696                 dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
1697                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1698                         dev_name(cxlr->dev.parent));
1699                 return -ENXIO;
1700         }
1701
1702         if (cxled->cxld.target_type != cxlr->type) {
1703                 dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
1704                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1705                         cxled->cxld.target_type, cxlr->type);
1706                 return -ENXIO;
1707         }
1708
1709         if (!cxled->dpa_res) {
1710                 dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
1711                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
1712                 return -ENXIO;
1713         }
1714
1715         if (resource_size(cxled->dpa_res) * p->interleave_ways !=
1716             resource_size(p->res)) {
1717                 dev_dbg(&cxlr->dev,
1718                         "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
1719                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1720                         (u64)resource_size(cxled->dpa_res), p->interleave_ways,
1721                         (u64)resource_size(p->res));
1722                 return -EINVAL;
1723         }
1724
1725         if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1726                 int i;
1727
1728                 rc = cxl_region_attach_auto(cxlr, cxled, pos);
1729                 if (rc)
1730                         return rc;
1731
1732                 /* await more targets to arrive... */
1733                 if (p->nr_targets < p->interleave_ways)
1734                         return 0;
1735
1736                 /*
1737                  * All targets are here, which implies all PCI enumeration that
1738                  * affects this region has been completed. Walk the topology to
1739                  * sort the devices into their relative region decode position.
1740                  */
1741                 rc = cxl_region_sort_targets(cxlr);
1742                 if (rc)
1743                         return rc;
1744
1745                 for (i = 0; i < p->nr_targets; i++) {
1746                         cxled = p->targets[i];
1747                         ep_port = cxled_to_port(cxled);
1748                         dport = cxl_find_dport_by_dev(root_port,
1749                                                       ep_port->host_bridge);
1750                         rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
1751                                                         dport, i);
1752                         if (rc)
1753                                 return rc;
1754                 }
1755
1756                 rc = cxl_region_setup_targets(cxlr);
1757                 if (rc)
1758                         return rc;
1759
1760                 /*
1761                  * If target setup succeeds in the autodiscovery case
1762                  * then the region is already committed.
1763                  */
1764                 p->state = CXL_CONFIG_COMMIT;
1765
1766                 return 0;
1767         }
1768
1769         rc = cxl_region_validate_position(cxlr, cxled, pos);
1770         if (rc)
1771                 return rc;
1772
1773         rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
1774         if (rc)
1775                 return rc;
1776
1777         p->targets[pos] = cxled;
1778         cxled->pos = pos;
1779         p->nr_targets++;
1780
1781         if (p->nr_targets == p->interleave_ways) {
1782                 rc = cxl_region_setup_targets(cxlr);
1783                 if (rc)
1784                         return rc;
1785                 p->state = CXL_CONFIG_ACTIVE;
1786         }
1787
1788         cxled->cxld.interleave_ways = p->interleave_ways;
1789         cxled->cxld.interleave_granularity = p->interleave_granularity;
1790         cxled->cxld.hpa_range = (struct range) {
1791                 .start = p->res->start,
1792                 .end = p->res->end,
1793         };
1794
1795         if (p->nr_targets != p->interleave_ways)
1796                 return 0;
1797
1798         /*
1799          * Test the auto-discovery position calculator function
1800          * against this successfully created user-defined region.
1801          * A fail message here means that this interleave config
1802          * will fail when presented as CXL_REGION_F_AUTO.
1803          */
1804         for (int i = 0; i < p->nr_targets; i++) {
1805                 struct cxl_endpoint_decoder *cxled = p->targets[i];
1806                 int test_pos;
1807
1808                 test_pos = cxl_calc_interleave_pos(cxled);
1809                 dev_dbg(&cxled->cxld.dev,
1810                         "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
1811                         (test_pos == cxled->pos) ? "success" : "fail",
1812                         test_pos, cxled->pos);
1813         }
1814
1815         return 0;
1816 }
1817
1818 static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
1819 {
1820         struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
1821         struct cxl_region *cxlr = cxled->cxld.region;
1822         struct cxl_region_params *p;
1823         int rc = 0;
1824
1825         lockdep_assert_held_write(&cxl_region_rwsem);
1826
1827         if (!cxlr)
1828                 return 0;
1829
1830         p = &cxlr->params;
1831         get_device(&cxlr->dev);
1832
1833         if (p->state > CXL_CONFIG_ACTIVE) {
1834                 /*
1835                  * TODO: tear down all impacted regions if a device is
1836                  * removed out of order
1837                  */
1838                 rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
1839                 if (rc)
1840                         goto out;
1841                 p->state = CXL_CONFIG_ACTIVE;
1842         }
1843
1844         for (iter = ep_port; !is_cxl_root(iter);
1845              iter = to_cxl_port(iter->dev.parent))
1846                 cxl_port_detach_region(iter, cxlr, cxled);
1847
1848         if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
1849             p->targets[cxled->pos] != cxled) {
1850                 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1851
1852                 dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
1853                               dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1854                               cxled->pos);
1855                 goto out;
1856         }
1857
1858         if (p->state == CXL_CONFIG_ACTIVE) {
1859                 p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
1860                 cxl_region_teardown_targets(cxlr);
1861         }
1862         p->targets[cxled->pos] = NULL;
1863         p->nr_targets--;
1864         cxled->cxld.hpa_range = (struct range) {
1865                 .start = 0,
1866                 .end = -1,
1867         };
1868
1869         /* notify the region driver that one of its targets has departed */
1870         up_write(&cxl_region_rwsem);
1871         device_release_driver(&cxlr->dev);
1872         down_write(&cxl_region_rwsem);
1873 out:
1874         put_device(&cxlr->dev);
1875         return rc;
1876 }
1877
1878 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
1879 {
1880         down_write(&cxl_region_rwsem);
1881         cxled->mode = CXL_DECODER_DEAD;
1882         cxl_region_detach(cxled);
1883         up_write(&cxl_region_rwsem);
1884 }
1885
1886 static int attach_target(struct cxl_region *cxlr,
1887                          struct cxl_endpoint_decoder *cxled, int pos,
1888                          unsigned int state)
1889 {
1890         int rc = 0;
1891
1892         if (state == TASK_INTERRUPTIBLE)
1893                 rc = down_write_killable(&cxl_region_rwsem);
1894         else
1895                 down_write(&cxl_region_rwsem);
1896         if (rc)
1897                 return rc;
1898
1899         down_read(&cxl_dpa_rwsem);
1900         rc = cxl_region_attach(cxlr, cxled, pos);
1901         up_read(&cxl_dpa_rwsem);
1902         up_write(&cxl_region_rwsem);
1903         return rc;
1904 }
1905
1906 static int detach_target(struct cxl_region *cxlr, int pos)
1907 {
1908         struct cxl_region_params *p = &cxlr->params;
1909         int rc;
1910
1911         rc = down_write_killable(&cxl_region_rwsem);
1912         if (rc)
1913                 return rc;
1914
1915         if (pos >= p->interleave_ways) {
1916                 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1917                         p->interleave_ways);
1918                 rc = -ENXIO;
1919                 goto out;
1920         }
1921
1922         if (!p->targets[pos]) {
1923                 rc = 0;
1924                 goto out;
1925         }
1926
1927         rc = cxl_region_detach(p->targets[pos]);
1928 out:
1929         up_write(&cxl_region_rwsem);
1930         return rc;
1931 }
1932
1933 static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
1934                             size_t len)
1935 {
1936         int rc;
1937
1938         if (sysfs_streq(buf, "\n"))
1939                 rc = detach_target(cxlr, pos);
1940         else {
1941                 struct device *dev;
1942
1943                 dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
1944                 if (!dev)
1945                         return -ENODEV;
1946
1947                 if (!is_endpoint_decoder(dev)) {
1948                         rc = -EINVAL;
1949                         goto out;
1950                 }
1951
1952                 rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
1953                                    TASK_INTERRUPTIBLE);
1954 out:
1955                 put_device(dev);
1956         }
1957
1958         if (rc < 0)
1959                 return rc;
1960         return len;
1961 }
1962
1963 #define TARGET_ATTR_RW(n)                                              \
1964 static ssize_t target##n##_show(                                       \
1965         struct device *dev, struct device_attribute *attr, char *buf)  \
1966 {                                                                      \
1967         return show_targetN(to_cxl_region(dev), buf, (n));             \
1968 }                                                                      \
1969 static ssize_t target##n##_store(struct device *dev,                   \
1970                                  struct device_attribute *attr,        \
1971                                  const char *buf, size_t len)          \
1972 {                                                                      \
1973         return store_targetN(to_cxl_region(dev), buf, (n), len);       \
1974 }                                                                      \
1975 static DEVICE_ATTR_RW(target##n)
1976
1977 TARGET_ATTR_RW(0);
1978 TARGET_ATTR_RW(1);
1979 TARGET_ATTR_RW(2);
1980 TARGET_ATTR_RW(3);
1981 TARGET_ATTR_RW(4);
1982 TARGET_ATTR_RW(5);
1983 TARGET_ATTR_RW(6);
1984 TARGET_ATTR_RW(7);
1985 TARGET_ATTR_RW(8);
1986 TARGET_ATTR_RW(9);
1987 TARGET_ATTR_RW(10);
1988 TARGET_ATTR_RW(11);
1989 TARGET_ATTR_RW(12);
1990 TARGET_ATTR_RW(13);
1991 TARGET_ATTR_RW(14);
1992 TARGET_ATTR_RW(15);
1993
1994 static struct attribute *target_attrs[] = {
1995         &dev_attr_target0.attr,
1996         &dev_attr_target1.attr,
1997         &dev_attr_target2.attr,
1998         &dev_attr_target3.attr,
1999         &dev_attr_target4.attr,
2000         &dev_attr_target5.attr,
2001         &dev_attr_target6.attr,
2002         &dev_attr_target7.attr,
2003         &dev_attr_target8.attr,
2004         &dev_attr_target9.attr,
2005         &dev_attr_target10.attr,
2006         &dev_attr_target11.attr,
2007         &dev_attr_target12.attr,
2008         &dev_attr_target13.attr,
2009         &dev_attr_target14.attr,
2010         &dev_attr_target15.attr,
2011         NULL,
2012 };
2013
2014 static umode_t cxl_region_target_visible(struct kobject *kobj,
2015                                          struct attribute *a, int n)
2016 {
2017         struct device *dev = kobj_to_dev(kobj);
2018         struct cxl_region *cxlr = to_cxl_region(dev);
2019         struct cxl_region_params *p = &cxlr->params;
2020
2021         if (n < p->interleave_ways)
2022                 return a->mode;
2023         return 0;
2024 }
2025
2026 static const struct attribute_group cxl_region_target_group = {
2027         .attrs = target_attrs,
2028         .is_visible = cxl_region_target_visible,
2029 };
2030
2031 static const struct attribute_group *get_cxl_region_target_group(void)
2032 {
2033         return &cxl_region_target_group;
2034 }
2035
2036 static const struct attribute_group *region_groups[] = {
2037         &cxl_base_attribute_group,
2038         &cxl_region_group,
2039         &cxl_region_target_group,
2040         NULL,
2041 };
2042
2043 static void cxl_region_release(struct device *dev)
2044 {
2045         struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
2046         struct cxl_region *cxlr = to_cxl_region(dev);
2047         int id = atomic_read(&cxlrd->region_id);
2048
2049         /*
2050          * Try to reuse the recently idled id rather than the cached
2051          * next id to prevent the region id space from increasing
2052          * unnecessarily.
2053          */
2054         if (cxlr->id < id)
2055                 if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
2056                         memregion_free(id);
2057                         goto out;
2058                 }
2059
2060         memregion_free(cxlr->id);
2061 out:
2062         put_device(dev->parent);
2063         kfree(cxlr);
2064 }
2065
2066 const struct device_type cxl_region_type = {
2067         .name = "cxl_region",
2068         .release = cxl_region_release,
2069         .groups = region_groups
2070 };
2071
2072 bool is_cxl_region(struct device *dev)
2073 {
2074         return dev->type == &cxl_region_type;
2075 }
2076 EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
2077
2078 static struct cxl_region *to_cxl_region(struct device *dev)
2079 {
2080         if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
2081                           "not a cxl_region device\n"))
2082                 return NULL;
2083
2084         return container_of(dev, struct cxl_region, dev);
2085 }
2086
2087 static void unregister_region(void *_cxlr)
2088 {
2089         struct cxl_region *cxlr = _cxlr;
2090         struct cxl_region_params *p = &cxlr->params;
2091         int i;
2092
2093         device_del(&cxlr->dev);
2094
2095         /*
2096          * Now that region sysfs is shutdown, the parameter block is now
2097          * read-only, so no need to hold the region rwsem to access the
2098          * region parameters.
2099          */
2100         for (i = 0; i < p->interleave_ways; i++)
2101                 detach_target(cxlr, i);
2102
2103         cxl_region_iomem_release(cxlr);
2104         put_device(&cxlr->dev);
2105 }
2106
2107 static struct lock_class_key cxl_region_key;
2108
2109 static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
2110 {
2111         struct cxl_region *cxlr;
2112         struct device *dev;
2113
2114         cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
2115         if (!cxlr) {
2116                 memregion_free(id);
2117                 return ERR_PTR(-ENOMEM);
2118         }
2119
2120         dev = &cxlr->dev;
2121         device_initialize(dev);
2122         lockdep_set_class(&dev->mutex, &cxl_region_key);
2123         dev->parent = &cxlrd->cxlsd.cxld.dev;
2124         /*
2125          * Keep root decoder pinned through cxl_region_release to fixup
2126          * region id allocations
2127          */
2128         get_device(dev->parent);
2129         device_set_pm_not_required(dev);
2130         dev->bus = &cxl_bus_type;
2131         dev->type = &cxl_region_type;
2132         cxlr->id = id;
2133
2134         return cxlr;
2135 }
2136
2137 /**
2138  * devm_cxl_add_region - Adds a region to a decoder
2139  * @cxlrd: root decoder
2140  * @id: memregion id to create, or memregion_free() on failure
2141  * @mode: mode for the endpoint decoders of this region
2142  * @type: select whether this is an expander or accelerator (type-2 or type-3)
2143  *
2144  * This is the second step of region initialization. Regions exist within an
2145  * address space which is mapped by a @cxlrd.
2146  *
2147  * Return: 0 if the region was added to the @cxlrd, else returns negative error
2148  * code. The region will be named "regionZ" where Z is the unique region number.
2149  */
2150 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
2151                                               int id,
2152                                               enum cxl_decoder_mode mode,
2153                                               enum cxl_decoder_type type)
2154 {
2155         struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
2156         struct cxl_region *cxlr;
2157         struct device *dev;
2158         int rc;
2159
2160         switch (mode) {
2161         case CXL_DECODER_RAM:
2162         case CXL_DECODER_PMEM:
2163                 break;
2164         default:
2165                 dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
2166                 return ERR_PTR(-EINVAL);
2167         }
2168
2169         cxlr = cxl_region_alloc(cxlrd, id);
2170         if (IS_ERR(cxlr))
2171                 return cxlr;
2172         cxlr->mode = mode;
2173         cxlr->type = type;
2174
2175         dev = &cxlr->dev;
2176         rc = dev_set_name(dev, "region%d", id);
2177         if (rc)
2178                 goto err;
2179
2180         rc = device_add(dev);
2181         if (rc)
2182                 goto err;
2183
2184         rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
2185         if (rc)
2186                 return ERR_PTR(rc);
2187
2188         dev_dbg(port->uport_dev, "%s: created %s\n",
2189                 dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
2190         return cxlr;
2191
2192 err:
2193         put_device(dev);
2194         return ERR_PTR(rc);
2195 }
2196
2197 static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
2198 {
2199         return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
2200 }
2201
2202 static ssize_t create_pmem_region_show(struct device *dev,
2203                                        struct device_attribute *attr, char *buf)
2204 {
2205         return __create_region_show(to_cxl_root_decoder(dev), buf);
2206 }
2207
2208 static ssize_t create_ram_region_show(struct device *dev,
2209                                       struct device_attribute *attr, char *buf)
2210 {
2211         return __create_region_show(to_cxl_root_decoder(dev), buf);
2212 }
2213
2214 static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
2215                                           enum cxl_decoder_mode mode, int id)
2216 {
2217         int rc;
2218
2219         rc = memregion_alloc(GFP_KERNEL);
2220         if (rc < 0)
2221                 return ERR_PTR(rc);
2222
2223         if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
2224                 memregion_free(rc);
2225                 return ERR_PTR(-EBUSY);
2226         }
2227
2228         return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
2229 }
2230
2231 static ssize_t create_pmem_region_store(struct device *dev,
2232                                         struct device_attribute *attr,
2233                                         const char *buf, size_t len)
2234 {
2235         struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2236         struct cxl_region *cxlr;
2237         int rc, id;
2238
2239         rc = sscanf(buf, "region%d\n", &id);
2240         if (rc != 1)
2241                 return -EINVAL;
2242
2243         cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
2244         if (IS_ERR(cxlr))
2245                 return PTR_ERR(cxlr);
2246
2247         return len;
2248 }
2249 DEVICE_ATTR_RW(create_pmem_region);
2250
2251 static ssize_t create_ram_region_store(struct device *dev,
2252                                        struct device_attribute *attr,
2253                                        const char *buf, size_t len)
2254 {
2255         struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2256         struct cxl_region *cxlr;
2257         int rc, id;
2258
2259         rc = sscanf(buf, "region%d\n", &id);
2260         if (rc != 1)
2261                 return -EINVAL;
2262
2263         cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
2264         if (IS_ERR(cxlr))
2265                 return PTR_ERR(cxlr);
2266
2267         return len;
2268 }
2269 DEVICE_ATTR_RW(create_ram_region);
2270
2271 static ssize_t region_show(struct device *dev, struct device_attribute *attr,
2272                            char *buf)
2273 {
2274         struct cxl_decoder *cxld = to_cxl_decoder(dev);
2275         ssize_t rc;
2276
2277         rc = down_read_interruptible(&cxl_region_rwsem);
2278         if (rc)
2279                 return rc;
2280
2281         if (cxld->region)
2282                 rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
2283         else
2284                 rc = sysfs_emit(buf, "\n");
2285         up_read(&cxl_region_rwsem);
2286
2287         return rc;
2288 }
2289 DEVICE_ATTR_RO(region);
2290
2291 static struct cxl_region *
2292 cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
2293 {
2294         struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
2295         struct device *region_dev;
2296
2297         region_dev = device_find_child_by_name(&cxld->dev, name);
2298         if (!region_dev)
2299                 return ERR_PTR(-ENODEV);
2300
2301         return to_cxl_region(region_dev);
2302 }
2303
2304 static ssize_t delete_region_store(struct device *dev,
2305                                    struct device_attribute *attr,
2306                                    const char *buf, size_t len)
2307 {
2308         struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2309         struct cxl_port *port = to_cxl_port(dev->parent);
2310         struct cxl_region *cxlr;
2311
2312         cxlr = cxl_find_region_by_name(cxlrd, buf);
2313         if (IS_ERR(cxlr))
2314                 return PTR_ERR(cxlr);
2315
2316         devm_release_action(port->uport_dev, unregister_region, cxlr);
2317         put_device(&cxlr->dev);
2318
2319         return len;
2320 }
2321 DEVICE_ATTR_WO(delete_region);
2322
2323 static void cxl_pmem_region_release(struct device *dev)
2324 {
2325         struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
2326         int i;
2327
2328         for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
2329                 struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
2330
2331                 put_device(&cxlmd->dev);
2332         }
2333
2334         kfree(cxlr_pmem);
2335 }
2336
2337 static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
2338         &cxl_base_attribute_group,
2339         NULL,
2340 };
2341
2342 const struct device_type cxl_pmem_region_type = {
2343         .name = "cxl_pmem_region",
2344         .release = cxl_pmem_region_release,
2345         .groups = cxl_pmem_region_attribute_groups,
2346 };
2347
2348 bool is_cxl_pmem_region(struct device *dev)
2349 {
2350         return dev->type == &cxl_pmem_region_type;
2351 }
2352 EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
2353
2354 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
2355 {
2356         if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
2357                           "not a cxl_pmem_region device\n"))
2358                 return NULL;
2359         return container_of(dev, struct cxl_pmem_region, dev);
2360 }
2361 EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
2362
2363 struct cxl_poison_context {
2364         struct cxl_port *port;
2365         enum cxl_decoder_mode mode;
2366         u64 offset;
2367 };
2368
2369 static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
2370                                    struct cxl_poison_context *ctx)
2371 {
2372         struct cxl_dev_state *cxlds = cxlmd->cxlds;
2373         u64 offset, length;
2374         int rc = 0;
2375
2376         /*
2377          * Collect poison for the remaining unmapped resources
2378          * after poison is collected by committed endpoints.
2379          *
2380          * Knowing that PMEM must always follow RAM, get poison
2381          * for unmapped resources based on the last decoder's mode:
2382          *      ram: scan remains of ram range, then any pmem range
2383          *      pmem: scan remains of pmem range
2384          */
2385
2386         if (ctx->mode == CXL_DECODER_RAM) {
2387                 offset = ctx->offset;
2388                 length = resource_size(&cxlds->ram_res) - offset;
2389                 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2390                 if (rc == -EFAULT)
2391                         rc = 0;
2392                 if (rc)
2393                         return rc;
2394         }
2395         if (ctx->mode == CXL_DECODER_PMEM) {
2396                 offset = ctx->offset;
2397                 length = resource_size(&cxlds->dpa_res) - offset;
2398                 if (!length)
2399                         return 0;
2400         } else if (resource_size(&cxlds->pmem_res)) {
2401                 offset = cxlds->pmem_res.start;
2402                 length = resource_size(&cxlds->pmem_res);
2403         } else {
2404                 return 0;
2405         }
2406
2407         return cxl_mem_get_poison(cxlmd, offset, length, NULL);
2408 }
2409
2410 static int poison_by_decoder(struct device *dev, void *arg)
2411 {
2412         struct cxl_poison_context *ctx = arg;
2413         struct cxl_endpoint_decoder *cxled;
2414         struct cxl_memdev *cxlmd;
2415         u64 offset, length;
2416         int rc = 0;
2417
2418         if (!is_endpoint_decoder(dev))
2419                 return rc;
2420
2421         cxled = to_cxl_endpoint_decoder(dev);
2422         if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
2423                 return rc;
2424
2425         /*
2426          * Regions are only created with single mode decoders: pmem or ram.
2427          * Linux does not support mixed mode decoders. This means that
2428          * reading poison per endpoint decoder adheres to the requirement
2429          * that poison reads of pmem and ram must be separated.
2430          * CXL 3.0 Spec 8.2.9.8.4.1
2431          */
2432         if (cxled->mode == CXL_DECODER_MIXED) {
2433                 dev_dbg(dev, "poison list read unsupported in mixed mode\n");
2434                 return rc;
2435         }
2436
2437         cxlmd = cxled_to_memdev(cxled);
2438         if (cxled->skip) {
2439                 offset = cxled->dpa_res->start - cxled->skip;
2440                 length = cxled->skip;
2441                 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2442                 if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
2443                         rc = 0;
2444                 if (rc)
2445                         return rc;
2446         }
2447
2448         offset = cxled->dpa_res->start;
2449         length = cxled->dpa_res->end - offset + 1;
2450         rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
2451         if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
2452                 rc = 0;
2453         if (rc)
2454                 return rc;
2455
2456         /* Iterate until commit_end is reached */
2457         if (cxled->cxld.id == ctx->port->commit_end) {
2458                 ctx->offset = cxled->dpa_res->end + 1;
2459                 ctx->mode = cxled->mode;
2460                 return 1;
2461         }
2462
2463         return 0;
2464 }
2465
2466 int cxl_get_poison_by_endpoint(struct cxl_port *port)
2467 {
2468         struct cxl_poison_context ctx;
2469         int rc = 0;
2470
2471         ctx = (struct cxl_poison_context) {
2472                 .port = port
2473         };
2474
2475         rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
2476         if (rc == 1)
2477                 rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
2478                                              &ctx);
2479
2480         return rc;
2481 }
2482
2483 static struct lock_class_key cxl_pmem_region_key;
2484
2485 static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
2486 {
2487         struct cxl_region_params *p = &cxlr->params;
2488         struct cxl_nvdimm_bridge *cxl_nvb;
2489         struct cxl_pmem_region *cxlr_pmem;
2490         struct device *dev;
2491         int i;
2492
2493         down_read(&cxl_region_rwsem);
2494         if (p->state != CXL_CONFIG_COMMIT) {
2495                 cxlr_pmem = ERR_PTR(-ENXIO);
2496                 goto out;
2497         }
2498
2499         cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets),
2500                             GFP_KERNEL);
2501         if (!cxlr_pmem) {
2502                 cxlr_pmem = ERR_PTR(-ENOMEM);
2503                 goto out;
2504         }
2505
2506         cxlr_pmem->hpa_range.start = p->res->start;
2507         cxlr_pmem->hpa_range.end = p->res->end;
2508
2509         /* Snapshot the region configuration underneath the cxl_region_rwsem */
2510         cxlr_pmem->nr_mappings = p->nr_targets;
2511         for (i = 0; i < p->nr_targets; i++) {
2512                 struct cxl_endpoint_decoder *cxled = p->targets[i];
2513                 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2514                 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
2515
2516                 /*
2517                  * Regions never span CXL root devices, so by definition the
2518                  * bridge for one device is the same for all.
2519                  */
2520                 if (i == 0) {
2521                         cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
2522                         if (!cxl_nvb) {
2523                                 cxlr_pmem = ERR_PTR(-ENODEV);
2524                                 goto out;
2525                         }
2526                         cxlr->cxl_nvb = cxl_nvb;
2527                 }
2528                 m->cxlmd = cxlmd;
2529                 get_device(&cxlmd->dev);
2530                 m->start = cxled->dpa_res->start;
2531                 m->size = resource_size(cxled->dpa_res);
2532                 m->position = i;
2533         }
2534
2535         dev = &cxlr_pmem->dev;
2536         cxlr_pmem->cxlr = cxlr;
2537         cxlr->cxlr_pmem = cxlr_pmem;
2538         device_initialize(dev);
2539         lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
2540         device_set_pm_not_required(dev);
2541         dev->parent = &cxlr->dev;
2542         dev->bus = &cxl_bus_type;
2543         dev->type = &cxl_pmem_region_type;
2544 out:
2545         up_read(&cxl_region_rwsem);
2546
2547         return cxlr_pmem;
2548 }
2549
2550 static void cxl_dax_region_release(struct device *dev)
2551 {
2552         struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
2553
2554         kfree(cxlr_dax);
2555 }
2556
2557 static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
2558         &cxl_base_attribute_group,
2559         NULL,
2560 };
2561
2562 const struct device_type cxl_dax_region_type = {
2563         .name = "cxl_dax_region",
2564         .release = cxl_dax_region_release,
2565         .groups = cxl_dax_region_attribute_groups,
2566 };
2567
2568 static bool is_cxl_dax_region(struct device *dev)
2569 {
2570         return dev->type == &cxl_dax_region_type;
2571 }
2572
2573 struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
2574 {
2575         if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
2576                           "not a cxl_dax_region device\n"))
2577                 return NULL;
2578         return container_of(dev, struct cxl_dax_region, dev);
2579 }
2580 EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
2581
2582 static struct lock_class_key cxl_dax_region_key;
2583
2584 static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
2585 {
2586         struct cxl_region_params *p = &cxlr->params;
2587         struct cxl_dax_region *cxlr_dax;
2588         struct device *dev;
2589
2590         down_read(&cxl_region_rwsem);
2591         if (p->state != CXL_CONFIG_COMMIT) {
2592                 cxlr_dax = ERR_PTR(-ENXIO);
2593                 goto out;
2594         }
2595
2596         cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
2597         if (!cxlr_dax) {
2598                 cxlr_dax = ERR_PTR(-ENOMEM);
2599                 goto out;
2600         }
2601
2602         cxlr_dax->hpa_range.start = p->res->start;
2603         cxlr_dax->hpa_range.end = p->res->end;
2604
2605         dev = &cxlr_dax->dev;
2606         cxlr_dax->cxlr = cxlr;
2607         device_initialize(dev);
2608         lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
2609         device_set_pm_not_required(dev);
2610         dev->parent = &cxlr->dev;
2611         dev->bus = &cxl_bus_type;
2612         dev->type = &cxl_dax_region_type;
2613 out:
2614         up_read(&cxl_region_rwsem);
2615
2616         return cxlr_dax;
2617 }
2618
2619 static void cxlr_pmem_unregister(void *_cxlr_pmem)
2620 {
2621         struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
2622         struct cxl_region *cxlr = cxlr_pmem->cxlr;
2623         struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
2624
2625         /*
2626          * Either the bridge is in ->remove() context under the device_lock(),
2627          * or cxlr_release_nvdimm() is cancelling the bridge's release action
2628          * for @cxlr_pmem and doing it itself (while manually holding the bridge
2629          * lock).
2630          */
2631         device_lock_assert(&cxl_nvb->dev);
2632         cxlr->cxlr_pmem = NULL;
2633         cxlr_pmem->cxlr = NULL;
2634         device_unregister(&cxlr_pmem->dev);
2635 }
2636
2637 static void cxlr_release_nvdimm(void *_cxlr)
2638 {
2639         struct cxl_region *cxlr = _cxlr;
2640         struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
2641
2642         device_lock(&cxl_nvb->dev);
2643         if (cxlr->cxlr_pmem)
2644                 devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
2645                                     cxlr->cxlr_pmem);
2646         device_unlock(&cxl_nvb->dev);
2647         cxlr->cxl_nvb = NULL;
2648         put_device(&cxl_nvb->dev);
2649 }
2650
2651 /**
2652  * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
2653  * @cxlr: parent CXL region for this pmem region bridge device
2654  *
2655  * Return: 0 on success negative error code on failure.
2656  */
2657 static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
2658 {
2659         struct cxl_pmem_region *cxlr_pmem;
2660         struct cxl_nvdimm_bridge *cxl_nvb;
2661         struct device *dev;
2662         int rc;
2663
2664         cxlr_pmem = cxl_pmem_region_alloc(cxlr);
2665         if (IS_ERR(cxlr_pmem))
2666                 return PTR_ERR(cxlr_pmem);
2667         cxl_nvb = cxlr->cxl_nvb;
2668
2669         dev = &cxlr_pmem->dev;
2670         rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
2671         if (rc)
2672                 goto err;
2673
2674         rc = device_add(dev);
2675         if (rc)
2676                 goto err;
2677
2678         dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
2679                 dev_name(dev));
2680
2681         device_lock(&cxl_nvb->dev);
2682         if (cxl_nvb->dev.driver)
2683                 rc = devm_add_action_or_reset(&cxl_nvb->dev,
2684                                               cxlr_pmem_unregister, cxlr_pmem);
2685         else
2686                 rc = -ENXIO;
2687         device_unlock(&cxl_nvb->dev);
2688
2689         if (rc)
2690                 goto err_bridge;
2691
2692         /* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
2693         return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
2694
2695 err:
2696         put_device(dev);
2697 err_bridge:
2698         put_device(&cxl_nvb->dev);
2699         cxlr->cxl_nvb = NULL;
2700         return rc;
2701 }
2702
2703 static void cxlr_dax_unregister(void *_cxlr_dax)
2704 {
2705         struct cxl_dax_region *cxlr_dax = _cxlr_dax;
2706
2707         device_unregister(&cxlr_dax->dev);
2708 }
2709
2710 static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
2711 {
2712         struct cxl_dax_region *cxlr_dax;
2713         struct device *dev;
2714         int rc;
2715
2716         cxlr_dax = cxl_dax_region_alloc(cxlr);
2717         if (IS_ERR(cxlr_dax))
2718                 return PTR_ERR(cxlr_dax);
2719
2720         dev = &cxlr_dax->dev;
2721         rc = dev_set_name(dev, "dax_region%d", cxlr->id);
2722         if (rc)
2723                 goto err;
2724
2725         rc = device_add(dev);
2726         if (rc)
2727                 goto err;
2728
2729         dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
2730                 dev_name(dev));
2731
2732         return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
2733                                         cxlr_dax);
2734 err:
2735         put_device(dev);
2736         return rc;
2737 }
2738
2739 static int match_root_decoder_by_range(struct device *dev, void *data)
2740 {
2741         struct range *r1, *r2 = data;
2742         struct cxl_root_decoder *cxlrd;
2743
2744         if (!is_root_decoder(dev))
2745                 return 0;
2746
2747         cxlrd = to_cxl_root_decoder(dev);
2748         r1 = &cxlrd->cxlsd.cxld.hpa_range;
2749         return range_contains(r1, r2);
2750 }
2751
2752 static int match_region_by_range(struct device *dev, void *data)
2753 {
2754         struct cxl_region_params *p;
2755         struct cxl_region *cxlr;
2756         struct range *r = data;
2757         int rc = 0;
2758
2759         if (!is_cxl_region(dev))
2760                 return 0;
2761
2762         cxlr = to_cxl_region(dev);
2763         p = &cxlr->params;
2764
2765         down_read(&cxl_region_rwsem);
2766         if (p->res && p->res->start == r->start && p->res->end == r->end)
2767                 rc = 1;
2768         up_read(&cxl_region_rwsem);
2769
2770         return rc;
2771 }
2772
2773 /* Establish an empty region covering the given HPA range */
2774 static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
2775                                            struct cxl_endpoint_decoder *cxled)
2776 {
2777         struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2778         struct cxl_port *port = cxlrd_to_port(cxlrd);
2779         struct range *hpa = &cxled->cxld.hpa_range;
2780         struct cxl_region_params *p;
2781         struct cxl_region *cxlr;
2782         struct resource *res;
2783         int rc;
2784
2785         do {
2786                 cxlr = __create_region(cxlrd, cxled->mode,
2787                                        atomic_read(&cxlrd->region_id));
2788         } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
2789
2790         if (IS_ERR(cxlr)) {
2791                 dev_err(cxlmd->dev.parent,
2792                         "%s:%s: %s failed assign region: %ld\n",
2793                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2794                         __func__, PTR_ERR(cxlr));
2795                 return cxlr;
2796         }
2797
2798         down_write(&cxl_region_rwsem);
2799         p = &cxlr->params;
2800         if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
2801                 dev_err(cxlmd->dev.parent,
2802                         "%s:%s: %s autodiscovery interrupted\n",
2803                         dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2804                         __func__);
2805                 rc = -EBUSY;
2806                 goto err;
2807         }
2808
2809         set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
2810
2811         res = kmalloc(sizeof(*res), GFP_KERNEL);
2812         if (!res) {
2813                 rc = -ENOMEM;
2814                 goto err;
2815         }
2816
2817         *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
2818                                     dev_name(&cxlr->dev));
2819         rc = insert_resource(cxlrd->res, res);
2820         if (rc) {
2821                 /*
2822                  * Platform-firmware may not have split resources like "System
2823                  * RAM" on CXL window boundaries see cxl_region_iomem_release()
2824                  */
2825                 dev_warn(cxlmd->dev.parent,
2826                          "%s:%s: %s %s cannot insert resource\n",
2827                          dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2828                          __func__, dev_name(&cxlr->dev));
2829         }
2830
2831         p->res = res;
2832         p->interleave_ways = cxled->cxld.interleave_ways;
2833         p->interleave_granularity = cxled->cxld.interleave_granularity;
2834         p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
2835
2836         rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
2837         if (rc)
2838                 goto err;
2839
2840         dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
2841                 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
2842                 dev_name(&cxlr->dev), p->res, p->interleave_ways,
2843                 p->interleave_granularity);
2844
2845         /* ...to match put_device() in cxl_add_to_region() */
2846         get_device(&cxlr->dev);
2847         up_write(&cxl_region_rwsem);
2848
2849         return cxlr;
2850
2851 err:
2852         up_write(&cxl_region_rwsem);
2853         devm_release_action(port->uport_dev, unregister_region, cxlr);
2854         return ERR_PTR(rc);
2855 }
2856
2857 int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
2858 {
2859         struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2860         struct range *hpa = &cxled->cxld.hpa_range;
2861         struct cxl_decoder *cxld = &cxled->cxld;
2862         struct device *cxlrd_dev, *region_dev;
2863         struct cxl_root_decoder *cxlrd;
2864         struct cxl_region_params *p;
2865         struct cxl_region *cxlr;
2866         bool attach = false;
2867         int rc;
2868
2869         cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
2870                                       match_root_decoder_by_range);
2871         if (!cxlrd_dev) {
2872                 dev_err(cxlmd->dev.parent,
2873                         "%s:%s no CXL window for range %#llx:%#llx\n",
2874                         dev_name(&cxlmd->dev), dev_name(&cxld->dev),
2875                         cxld->hpa_range.start, cxld->hpa_range.end);
2876                 return -ENXIO;
2877         }
2878
2879         cxlrd = to_cxl_root_decoder(cxlrd_dev);
2880
2881         /*
2882          * Ensure that if multiple threads race to construct_region() for @hpa
2883          * one does the construction and the others add to that.
2884          */
2885         mutex_lock(&cxlrd->range_lock);
2886         region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
2887                                        match_region_by_range);
2888         if (!region_dev) {
2889                 cxlr = construct_region(cxlrd, cxled);
2890                 region_dev = &cxlr->dev;
2891         } else
2892                 cxlr = to_cxl_region(region_dev);
2893         mutex_unlock(&cxlrd->range_lock);
2894
2895         rc = PTR_ERR_OR_ZERO(cxlr);
2896         if (rc)
2897                 goto out;
2898
2899         attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
2900
2901         down_read(&cxl_region_rwsem);
2902         p = &cxlr->params;
2903         attach = p->state == CXL_CONFIG_COMMIT;
2904         up_read(&cxl_region_rwsem);
2905
2906         if (attach) {
2907                 /*
2908                  * If device_attach() fails the range may still be active via
2909                  * the platform-firmware memory map, otherwise the driver for
2910                  * regions is local to this file, so driver matching can't fail.
2911                  */
2912                 if (device_attach(&cxlr->dev) < 0)
2913                         dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
2914                                 p->res);
2915         }
2916
2917         put_device(region_dev);
2918 out:
2919         put_device(cxlrd_dev);
2920         return rc;
2921 }
2922 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
2923
2924 static int is_system_ram(struct resource *res, void *arg)
2925 {
2926         struct cxl_region *cxlr = arg;
2927         struct cxl_region_params *p = &cxlr->params;
2928
2929         dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
2930         return 1;
2931 }
2932
2933 static int cxl_region_probe(struct device *dev)
2934 {
2935         struct cxl_region *cxlr = to_cxl_region(dev);
2936         struct cxl_region_params *p = &cxlr->params;
2937         int rc;
2938
2939         rc = down_read_interruptible(&cxl_region_rwsem);
2940         if (rc) {
2941                 dev_dbg(&cxlr->dev, "probe interrupted\n");
2942                 return rc;
2943         }
2944
2945         if (p->state < CXL_CONFIG_COMMIT) {
2946                 dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
2947                 rc = -ENXIO;
2948                 goto out;
2949         }
2950
2951         if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
2952                 dev_err(&cxlr->dev,
2953                         "failed to activate, re-commit region and retry\n");
2954                 rc = -ENXIO;
2955                 goto out;
2956         }
2957
2958         /*
2959          * From this point on any path that changes the region's state away from
2960          * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
2961          */
2962 out:
2963         up_read(&cxl_region_rwsem);
2964
2965         if (rc)
2966                 return rc;
2967
2968         switch (cxlr->mode) {
2969         case CXL_DECODER_PMEM:
2970                 return devm_cxl_add_pmem_region(cxlr);
2971         case CXL_DECODER_RAM:
2972                 /*
2973                  * The region can not be manged by CXL if any portion of
2974                  * it is already online as 'System RAM'
2975                  */
2976                 if (walk_iomem_res_desc(IORES_DESC_NONE,
2977                                         IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
2978                                         p->res->start, p->res->end, cxlr,
2979                                         is_system_ram) > 0)
2980                         return 0;
2981                 return devm_cxl_add_dax_region(cxlr);
2982         default:
2983                 dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
2984                         cxlr->mode);
2985                 return -ENXIO;
2986         }
2987 }
2988
2989 static struct cxl_driver cxl_region_driver = {
2990         .name = "cxl_region",
2991         .probe = cxl_region_probe,
2992         .id = CXL_DEVICE_REGION,
2993 };
2994
2995 int cxl_region_init(void)
2996 {
2997         return cxl_driver_register(&cxl_region_driver);
2998 }
2999
3000 void cxl_region_exit(void)
3001 {
3002         cxl_driver_unregister(&cxl_region_driver);
3003 }
3004
3005 MODULE_IMPORT_NS(CXL);
3006 MODULE_IMPORT_NS(DEVMEM);
3007 MODULE_ALIAS_CXL(CXL_DEVICE_REGION);