Commit | Line | Data |
---|---|---|
d87f36a0 | 1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
5b5c4e40 | 2 | /* |
d87f36a0 | 3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
5b5c4e40 EP |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the "Software"), | |
7 | * to deal in the Software without restriction, including without limitation | |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
9 | * and/or sell copies of the Software, and to permit persons to whom the | |
10 | * Software is furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice shall be included in | |
13 | * all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
21 | * OTHER DEALINGS IN THE SOFTWARE. | |
22 | */ | |
23 | ||
24 | #include <linux/types.h> | |
25 | #include <linux/kernel.h> | |
26 | #include <linux/pci.h> | |
27 | #include <linux/errno.h> | |
28 | #include <linux/acpi.h> | |
29 | #include <linux/hash.h> | |
30 | #include <linux/cpufreq.h> | |
f7c826ad | 31 | #include <linux/log2.h> |
520b8fb7 FK |
32 | #include <linux/dmi.h> |
33 | #include <linux/atomic.h> | |
5b5c4e40 EP |
34 | |
35 | #include "kfd_priv.h" | |
36 | #include "kfd_crat.h" | |
37 | #include "kfd_topology.h" | |
851a645e | 38 | #include "kfd_device_queue_manager.h" |
5a75ea56 | 39 | #include "kfd_svm.h" |
fc7f1d96 | 40 | #include "kfd_debug.h" |
5b87245f | 41 | #include "amdgpu_amdkfd.h" |
0dee45a2 | 42 | #include "amdgpu_ras.h" |
0f28cca8 | 43 | #include "amdgpu.h" |
5b5c4e40 | 44 | |
4f449311 HK |
45 | /* topology_device_list - Master list of all topology devices */ |
46 | static struct list_head topology_device_list; | |
520b8fb7 | 47 | static struct kfd_system_properties sys_props; |
5b5c4e40 EP |
48 | |
49 | static DECLARE_RWSEM(topology_lock); | |
46d18d51 | 50 | static uint32_t topology_crat_proximity_domain; |
5b5c4e40 | 51 | |
46d18d51 | 52 | struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( |
3a87177e HK |
53 | uint32_t proximity_domain) |
54 | { | |
55 | struct kfd_topology_device *top_dev; | |
56 | struct kfd_topology_device *device = NULL; | |
57 | ||
3a87177e HK |
58 | list_for_each_entry(top_dev, &topology_device_list, list) |
59 | if (top_dev->proximity_domain == proximity_domain) { | |
60 | device = top_dev; | |
61 | break; | |
62 | } | |
63 | ||
46d18d51 MJ |
64 | return device; |
65 | } | |
66 | ||
67 | struct kfd_topology_device *kfd_topology_device_by_proximity_domain( | |
68 | uint32_t proximity_domain) | |
69 | { | |
70 | struct kfd_topology_device *device = NULL; | |
71 | ||
72 | down_read(&topology_lock); | |
73 | ||
74 | device = kfd_topology_device_by_proximity_domain_no_lock( | |
75 | proximity_domain); | |
3a87177e HK |
76 | up_read(&topology_lock); |
77 | ||
78 | return device; | |
79 | } | |
80 | ||
44d8cc6f | 81 | struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id) |
5b5c4e40 | 82 | { |
44d8cc6f YZ |
83 | struct kfd_topology_device *top_dev = NULL; |
84 | struct kfd_topology_device *ret = NULL; | |
5b5c4e40 EP |
85 | |
86 | down_read(&topology_lock); | |
87 | ||
88 | list_for_each_entry(top_dev, &topology_device_list, list) | |
89 | if (top_dev->gpu_id == gpu_id) { | |
44d8cc6f | 90 | ret = top_dev; |
5b5c4e40 EP |
91 | break; |
92 | } | |
93 | ||
94 | up_read(&topology_lock); | |
95 | ||
44d8cc6f YZ |
96 | return ret; |
97 | } | |
98 | ||
8dc1db31 | 99 | struct kfd_node *kfd_device_by_id(uint32_t gpu_id) |
44d8cc6f YZ |
100 | { |
101 | struct kfd_topology_device *top_dev; | |
102 | ||
103 | top_dev = kfd_topology_device_by_id(gpu_id); | |
104 | if (!top_dev) | |
105 | return NULL; | |
106 | ||
107 | return top_dev->gpu; | |
5b5c4e40 EP |
108 | } |
109 | ||
8dc1db31 | 110 | struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev) |
5b5c4e40 EP |
111 | { |
112 | struct kfd_topology_device *top_dev; | |
8dc1db31 | 113 | struct kfd_node *device = NULL; |
5b5c4e40 EP |
114 | |
115 | down_read(&topology_lock); | |
116 | ||
117 | list_for_each_entry(top_dev, &topology_device_list, list) | |
d69a3b76 | 118 | if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) { |
5b5c4e40 EP |
119 | device = top_dev->gpu; |
120 | break; | |
121 | } | |
122 | ||
123 | up_read(&topology_lock); | |
124 | ||
125 | return device; | |
126 | } | |
127 | ||
3a87177e | 128 | /* Called with write topology_lock acquired */ |
5b5c4e40 EP |
129 | static void kfd_release_topology_device(struct kfd_topology_device *dev) |
130 | { | |
131 | struct kfd_mem_properties *mem; | |
132 | struct kfd_cache_properties *cache; | |
133 | struct kfd_iolink_properties *iolink; | |
0f28cca8 | 134 | struct kfd_iolink_properties *p2plink; |
f4757347 | 135 | struct kfd_perf_properties *perf; |
5b5c4e40 | 136 | |
5b5c4e40 EP |
137 | list_del(&dev->list); |
138 | ||
139 | while (dev->mem_props.next != &dev->mem_props) { | |
140 | mem = container_of(dev->mem_props.next, | |
141 | struct kfd_mem_properties, list); | |
142 | list_del(&mem->list); | |
143 | kfree(mem); | |
144 | } | |
145 | ||
146 | while (dev->cache_props.next != &dev->cache_props) { | |
147 | cache = container_of(dev->cache_props.next, | |
148 | struct kfd_cache_properties, list); | |
149 | list_del(&cache->list); | |
150 | kfree(cache); | |
151 | } | |
152 | ||
153 | while (dev->io_link_props.next != &dev->io_link_props) { | |
154 | iolink = container_of(dev->io_link_props.next, | |
155 | struct kfd_iolink_properties, list); | |
156 | list_del(&iolink->list); | |
157 | kfree(iolink); | |
158 | } | |
159 | ||
0f28cca8 RE |
160 | while (dev->p2p_link_props.next != &dev->p2p_link_props) { |
161 | p2plink = container_of(dev->p2p_link_props.next, | |
162 | struct kfd_iolink_properties, list); | |
163 | list_del(&p2plink->list); | |
164 | kfree(p2plink); | |
165 | } | |
166 | ||
f4757347 AL |
167 | while (dev->perf_props.next != &dev->perf_props) { |
168 | perf = container_of(dev->perf_props.next, | |
169 | struct kfd_perf_properties, list); | |
170 | list_del(&perf->list); | |
171 | kfree(perf); | |
172 | } | |
173 | ||
5b5c4e40 | 174 | kfree(dev); |
5b5c4e40 EP |
175 | } |
176 | ||
4f449311 | 177 | void kfd_release_topology_device_list(struct list_head *device_list) |
5b5c4e40 EP |
178 | { |
179 | struct kfd_topology_device *dev; | |
180 | ||
4f449311 HK |
181 | while (!list_empty(device_list)) { |
182 | dev = list_first_entry(device_list, | |
183 | struct kfd_topology_device, list); | |
5b5c4e40 | 184 | kfd_release_topology_device(dev); |
4f449311 | 185 | } |
5b5c4e40 EP |
186 | } |
187 | ||
4f449311 HK |
188 | static void kfd_release_live_view(void) |
189 | { | |
190 | kfd_release_topology_device_list(&topology_device_list); | |
5b5c4e40 EP |
191 | memset(&sys_props, 0, sizeof(sys_props)); |
192 | } | |
193 | ||
4f449311 HK |
194 | struct kfd_topology_device *kfd_create_topology_device( |
195 | struct list_head *device_list) | |
5b5c4e40 EP |
196 | { |
197 | struct kfd_topology_device *dev; | |
198 | ||
199 | dev = kfd_alloc_struct(dev); | |
4eacc26b | 200 | if (!dev) { |
5b5c4e40 | 201 | pr_err("No memory to allocate a topology device"); |
16b9201c | 202 | return NULL; |
5b5c4e40 EP |
203 | } |
204 | ||
205 | INIT_LIST_HEAD(&dev->mem_props); | |
206 | INIT_LIST_HEAD(&dev->cache_props); | |
207 | INIT_LIST_HEAD(&dev->io_link_props); | |
0f28cca8 | 208 | INIT_LIST_HEAD(&dev->p2p_link_props); |
f4757347 | 209 | INIT_LIST_HEAD(&dev->perf_props); |
5b5c4e40 | 210 | |
4f449311 | 211 | list_add_tail(&dev->list, device_list); |
5b5c4e40 EP |
212 | |
213 | return dev; | |
16b9201c | 214 | } |
5b5c4e40 | 215 | |
5b5c4e40 | 216 | |
83a13ef5 FK |
217 | #define sysfs_show_gen_prop(buffer, offs, fmt, ...) \ |
218 | (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \ | |
219 | fmt, __VA_ARGS__)) | |
220 | #define sysfs_show_32bit_prop(buffer, offs, name, value) \ | |
221 | sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value) | |
222 | #define sysfs_show_64bit_prop(buffer, offs, name, value) \ | |
223 | sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value) | |
224 | #define sysfs_show_32bit_val(buffer, offs, value) \ | |
225 | sysfs_show_gen_prop(buffer, offs, "%u\n", value) | |
226 | #define sysfs_show_str_val(buffer, offs, value) \ | |
227 | sysfs_show_gen_prop(buffer, offs, "%s\n", value) | |
5b5c4e40 EP |
228 | |
229 | static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, | |
230 | char *buffer) | |
231 | { | |
83a13ef5 | 232 | int offs = 0; |
5b5c4e40 EP |
233 | |
234 | /* Making sure that the buffer is an empty string */ | |
235 | buffer[0] = 0; | |
236 | ||
237 | if (attr == &sys_props.attr_genid) { | |
83a13ef5 FK |
238 | sysfs_show_32bit_val(buffer, offs, |
239 | sys_props.generation_count); | |
5b5c4e40 | 240 | } else if (attr == &sys_props.attr_props) { |
83a13ef5 FK |
241 | sysfs_show_64bit_prop(buffer, offs, "platform_oem", |
242 | sys_props.platform_oem); | |
243 | sysfs_show_64bit_prop(buffer, offs, "platform_id", | |
244 | sys_props.platform_id); | |
245 | sysfs_show_64bit_prop(buffer, offs, "platform_rev", | |
246 | sys_props.platform_rev); | |
5b5c4e40 | 247 | } else { |
83a13ef5 | 248 | offs = -EINVAL; |
5b5c4e40 EP |
249 | } |
250 | ||
83a13ef5 | 251 | return offs; |
5b5c4e40 EP |
252 | } |
253 | ||
5108d768 YZ |
254 | static void kfd_topology_kobj_release(struct kobject *kobj) |
255 | { | |
256 | kfree(kobj); | |
257 | } | |
258 | ||
5b5c4e40 EP |
259 | static const struct sysfs_ops sysprops_ops = { |
260 | .show = sysprops_show, | |
261 | }; | |
262 | ||
4fa01c63 | 263 | static const struct kobj_type sysprops_type = { |
5108d768 | 264 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
265 | .sysfs_ops = &sysprops_ops, |
266 | }; | |
267 | ||
268 | static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, | |
269 | char *buffer) | |
270 | { | |
83a13ef5 | 271 | int offs = 0; |
5b5c4e40 EP |
272 | struct kfd_iolink_properties *iolink; |
273 | ||
274 | /* Making sure that the buffer is an empty string */ | |
275 | buffer[0] = 0; | |
276 | ||
277 | iolink = container_of(attr, struct kfd_iolink_properties, attr); | |
6b855f7b HK |
278 | if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) |
279 | return -EPERM; | |
83a13ef5 FK |
280 | sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type); |
281 | sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj); | |
282 | sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min); | |
283 | sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from); | |
284 | sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to); | |
285 | sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight); | |
286 | sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency); | |
287 | sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency); | |
288 | sysfs_show_32bit_prop(buffer, offs, "min_bandwidth", | |
289 | iolink->min_bandwidth); | |
290 | sysfs_show_32bit_prop(buffer, offs, "max_bandwidth", | |
291 | iolink->max_bandwidth); | |
292 | sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size", | |
293 | iolink->rec_transfer_size); | |
294 | sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); | |
295 | ||
296 | return offs; | |
5b5c4e40 EP |
297 | } |
298 | ||
299 | static const struct sysfs_ops iolink_ops = { | |
300 | .show = iolink_show, | |
301 | }; | |
302 | ||
4fa01c63 | 303 | static const struct kobj_type iolink_type = { |
5108d768 | 304 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
305 | .sysfs_ops = &iolink_ops, |
306 | }; | |
307 | ||
308 | static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, | |
309 | char *buffer) | |
310 | { | |
83a13ef5 | 311 | int offs = 0; |
5b5c4e40 EP |
312 | struct kfd_mem_properties *mem; |
313 | ||
314 | /* Making sure that the buffer is an empty string */ | |
315 | buffer[0] = 0; | |
316 | ||
317 | mem = container_of(attr, struct kfd_mem_properties, attr); | |
6b855f7b HK |
318 | if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) |
319 | return -EPERM; | |
83a13ef5 FK |
320 | sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type); |
321 | sysfs_show_64bit_prop(buffer, offs, "size_in_bytes", | |
322 | mem->size_in_bytes); | |
323 | sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags); | |
324 | sysfs_show_32bit_prop(buffer, offs, "width", mem->width); | |
325 | sysfs_show_32bit_prop(buffer, offs, "mem_clk_max", | |
326 | mem->mem_clk_max); | |
327 | ||
328 | return offs; | |
5b5c4e40 EP |
329 | } |
330 | ||
331 | static const struct sysfs_ops mem_ops = { | |
332 | .show = mem_show, | |
333 | }; | |
334 | ||
4fa01c63 | 335 | static const struct kobj_type mem_type = { |
5108d768 | 336 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
337 | .sysfs_ops = &mem_ops, |
338 | }; | |
339 | ||
340 | static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, | |
341 | char *buffer) | |
342 | { | |
83a13ef5 | 343 | int offs = 0; |
bc0c75a3 | 344 | uint32_t i, j; |
5b5c4e40 EP |
345 | struct kfd_cache_properties *cache; |
346 | ||
347 | /* Making sure that the buffer is an empty string */ | |
348 | buffer[0] = 0; | |
5b5c4e40 | 349 | cache = container_of(attr, struct kfd_cache_properties, attr); |
6b855f7b HK |
350 | if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) |
351 | return -EPERM; | |
83a13ef5 | 352 | sysfs_show_32bit_prop(buffer, offs, "processor_id_low", |
5b5c4e40 | 353 | cache->processor_id_low); |
83a13ef5 FK |
354 | sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level); |
355 | sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size); | |
356 | sysfs_show_32bit_prop(buffer, offs, "cache_line_size", | |
357 | cache->cacheline_size); | |
358 | sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag", | |
359 | cache->cachelines_per_tag); | |
360 | sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); | |
361 | sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); | |
362 | sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); | |
c0cc999f | 363 | |
83a13ef5 | 364 | offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); |
c0cc999f | 365 | for (i = 0; i < cache->sibling_map_size; i++) |
83a13ef5 | 366 | for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) |
bc0c75a3 | 367 | /* Check each bit */ |
83a13ef5 | 368 | offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", |
c0cc999f | 369 | (cache->sibling_map[i] >> j) & 1); |
83a13ef5 | 370 | |
bc0c75a3 | 371 | /* Replace the last "," with end of line */ |
83a13ef5 FK |
372 | buffer[offs-1] = '\n'; |
373 | return offs; | |
5b5c4e40 EP |
374 | } |
375 | ||
376 | static const struct sysfs_ops cache_ops = { | |
377 | .show = kfd_cache_show, | |
378 | }; | |
379 | ||
4fa01c63 | 380 | static const struct kobj_type cache_type = { |
5108d768 | 381 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
382 | .sysfs_ops = &cache_ops, |
383 | }; | |
384 | ||
f4757347 AL |
385 | /****** Sysfs of Performance Counters ******/ |
386 | ||
387 | struct kfd_perf_attr { | |
388 | struct kobj_attribute attr; | |
389 | uint32_t data; | |
390 | }; | |
391 | ||
392 | static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs, | |
393 | char *buf) | |
394 | { | |
83a13ef5 | 395 | int offs = 0; |
f4757347 AL |
396 | struct kfd_perf_attr *attr; |
397 | ||
398 | buf[0] = 0; | |
399 | attr = container_of(attrs, struct kfd_perf_attr, attr); | |
400 | if (!attr->data) /* invalid data for PMC */ | |
401 | return 0; | |
402 | else | |
83a13ef5 | 403 | return sysfs_show_32bit_val(buf, offs, attr->data); |
f4757347 AL |
404 | } |
405 | ||
406 | #define KFD_PERF_DESC(_name, _data) \ | |
407 | { \ | |
408 | .attr = __ATTR(_name, 0444, perf_show, NULL), \ | |
409 | .data = _data, \ | |
410 | } | |
411 | ||
412 | static struct kfd_perf_attr perf_attr_iommu[] = { | |
413 | KFD_PERF_DESC(max_concurrent, 0), | |
414 | KFD_PERF_DESC(num_counters, 0), | |
415 | KFD_PERF_DESC(counter_ids, 0), | |
416 | }; | |
417 | /****************************************/ | |
418 | ||
5b5c4e40 EP |
419 | static ssize_t node_show(struct kobject *kobj, struct attribute *attr, |
420 | char *buffer) | |
421 | { | |
83a13ef5 | 422 | int offs = 0; |
5b5c4e40 | 423 | struct kfd_topology_device *dev; |
f7c826ad | 424 | uint32_t log_max_watch_addr; |
5b5c4e40 EP |
425 | |
426 | /* Making sure that the buffer is an empty string */ | |
427 | buffer[0] = 0; | |
428 | ||
429 | if (strcmp(attr->name, "gpu_id") == 0) { | |
430 | dev = container_of(attr, struct kfd_topology_device, | |
431 | attr_gpuid); | |
6b855f7b HK |
432 | if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) |
433 | return -EPERM; | |
83a13ef5 | 434 | return sysfs_show_32bit_val(buffer, offs, dev->gpu_id); |
f7c826ad AS |
435 | } |
436 | ||
437 | if (strcmp(attr->name, "name") == 0) { | |
5b5c4e40 EP |
438 | dev = container_of(attr, struct kfd_topology_device, |
439 | attr_name); | |
c181159a | 440 | |
6b855f7b HK |
441 | if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) |
442 | return -EPERM; | |
83a13ef5 | 443 | return sysfs_show_str_val(buffer, offs, dev->node_props.name); |
f7c826ad | 444 | } |
5b5c4e40 | 445 | |
f7c826ad AS |
446 | dev = container_of(attr, struct kfd_topology_device, |
447 | attr_props); | |
6b855f7b HK |
448 | if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) |
449 | return -EPERM; | |
83a13ef5 FK |
450 | sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count", |
451 | dev->node_props.cpu_cores_count); | |
452 | sysfs_show_32bit_prop(buffer, offs, "simd_count", | |
97e3c6a8 | 453 | dev->gpu ? dev->node_props.simd_count : 0); |
83a13ef5 FK |
454 | sysfs_show_32bit_prop(buffer, offs, "mem_banks_count", |
455 | dev->node_props.mem_banks_count); | |
456 | sysfs_show_32bit_prop(buffer, offs, "caches_count", | |
457 | dev->node_props.caches_count); | |
458 | sysfs_show_32bit_prop(buffer, offs, "io_links_count", | |
459 | dev->node_props.io_links_count); | |
0f28cca8 RE |
460 | sysfs_show_32bit_prop(buffer, offs, "p2p_links_count", |
461 | dev->node_props.p2p_links_count); | |
83a13ef5 FK |
462 | sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base", |
463 | dev->node_props.cpu_core_id_base); | |
464 | sysfs_show_32bit_prop(buffer, offs, "simd_id_base", | |
465 | dev->node_props.simd_id_base); | |
466 | sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd", | |
467 | dev->node_props.max_waves_per_simd); | |
468 | sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb", | |
469 | dev->node_props.lds_size_in_kb); | |
470 | sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb", | |
471 | dev->node_props.gds_size_in_kb); | |
472 | sysfs_show_32bit_prop(buffer, offs, "num_gws", | |
473 | dev->node_props.num_gws); | |
474 | sysfs_show_32bit_prop(buffer, offs, "wave_front_size", | |
475 | dev->node_props.wave_front_size); | |
476 | sysfs_show_32bit_prop(buffer, offs, "array_count", | |
f38f147a | 477 | dev->gpu ? (dev->node_props.array_count * |
c4050ff1 | 478 | NUM_XCC(dev->gpu->xcc_mask)) : 0); |
83a13ef5 FK |
479 | sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine", |
480 | dev->node_props.simd_arrays_per_engine); | |
481 | sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array", | |
482 | dev->node_props.cu_per_simd_array); | |
483 | sysfs_show_32bit_prop(buffer, offs, "simd_per_cu", | |
484 | dev->node_props.simd_per_cu); | |
485 | sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu", | |
486 | dev->node_props.max_slots_scratch_cu); | |
9d6fa9c7 GS |
487 | sysfs_show_32bit_prop(buffer, offs, "gfx_target_version", |
488 | dev->node_props.gfx_target_version); | |
83a13ef5 FK |
489 | sysfs_show_32bit_prop(buffer, offs, "vendor_id", |
490 | dev->node_props.vendor_id); | |
491 | sysfs_show_32bit_prop(buffer, offs, "device_id", | |
492 | dev->node_props.device_id); | |
493 | sysfs_show_32bit_prop(buffer, offs, "location_id", | |
494 | dev->node_props.location_id); | |
495 | sysfs_show_32bit_prop(buffer, offs, "domain", | |
496 | dev->node_props.domain); | |
497 | sysfs_show_32bit_prop(buffer, offs, "drm_render_minor", | |
498 | dev->node_props.drm_render_minor); | |
499 | sysfs_show_64bit_prop(buffer, offs, "hive_id", | |
500 | dev->node_props.hive_id); | |
501 | sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines", | |
502 | dev->node_props.num_sdma_engines); | |
503 | sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines", | |
504 | dev->node_props.num_sdma_xgmi_engines); | |
505 | sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine", | |
506 | dev->node_props.num_sdma_queues_per_engine); | |
507 | sysfs_show_32bit_prop(buffer, offs, "num_cp_queues", | |
508 | dev->node_props.num_cp_queues); | |
f7c826ad AS |
509 | |
510 | if (dev->gpu) { | |
511 | log_max_watch_addr = | |
8dc1db31 | 512 | __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); |
f7c826ad AS |
513 | |
514 | if (log_max_watch_addr) { | |
515 | dev->node_props.capability |= | |
516 | HSA_CAP_WATCH_POINTS_SUPPORTED; | |
517 | ||
518 | dev->node_props.capability |= | |
519 | ((log_max_watch_addr << | |
520 | HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) & | |
521 | HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); | |
5b5c4e40 EP |
522 | } |
523 | ||
7eb0502a | 524 | if (dev->gpu->adev->asic_type == CHIP_TONGA) |
413e85d5 BG |
525 | dev->node_props.capability |= |
526 | HSA_CAP_AQL_QUEUE_DOUBLE_MAP; | |
527 | ||
83a13ef5 | 528 | sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute", |
3a87177e | 529 | dev->node_props.max_engine_clk_fcompute); |
42e08c78 | 530 | |
83a13ef5 | 531 | sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL); |
f7c826ad | 532 | |
83a13ef5 | 533 | sysfs_show_32bit_prop(buffer, offs, "fw_version", |
8dc1db31 | 534 | dev->gpu->kfd->mec_fw_version); |
83a13ef5 FK |
535 | sysfs_show_32bit_prop(buffer, offs, "capability", |
536 | dev->node_props.capability); | |
d230f1bf JK |
537 | sysfs_show_64bit_prop(buffer, offs, "debug_prop", |
538 | dev->node_props.debug_prop); | |
83a13ef5 | 539 | sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", |
8dc1db31 | 540 | dev->gpu->kfd->sdma_fw_version); |
11964258 | 541 | sysfs_show_64bit_prop(buffer, offs, "unique_id", |
02274fc0 | 542 | dev->gpu->adev->unique_id); |
74c5b85d | 543 | sysfs_show_32bit_prop(buffer, offs, "num_xcc", |
c4050ff1 | 544 | NUM_XCC(dev->gpu->xcc_mask)); |
5b5c4e40 EP |
545 | } |
546 | ||
83a13ef5 FK |
547 | return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute", |
548 | cpufreq_quick_get_max(0)/1000); | |
5b5c4e40 EP |
549 | } |
550 | ||
551 | static const struct sysfs_ops node_ops = { | |
552 | .show = node_show, | |
553 | }; | |
554 | ||
4fa01c63 | 555 | static const struct kobj_type node_type = { |
5108d768 | 556 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
557 | .sysfs_ops = &node_ops, |
558 | }; | |
559 | ||
560 | static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) | |
561 | { | |
562 | sysfs_remove_file(kobj, attr); | |
563 | kobject_del(kobj); | |
564 | kobject_put(kobj); | |
565 | } | |
566 | ||
567 | static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) | |
568 | { | |
0f28cca8 | 569 | struct kfd_iolink_properties *p2plink; |
5b5c4e40 EP |
570 | struct kfd_iolink_properties *iolink; |
571 | struct kfd_cache_properties *cache; | |
572 | struct kfd_mem_properties *mem; | |
f4757347 | 573 | struct kfd_perf_properties *perf; |
5b5c4e40 | 574 | |
5b5c4e40 EP |
575 | if (dev->kobj_iolink) { |
576 | list_for_each_entry(iolink, &dev->io_link_props, list) | |
577 | if (iolink->kobj) { | |
578 | kfd_remove_sysfs_file(iolink->kobj, | |
579 | &iolink->attr); | |
16b9201c | 580 | iolink->kobj = NULL; |
5b5c4e40 EP |
581 | } |
582 | kobject_del(dev->kobj_iolink); | |
583 | kobject_put(dev->kobj_iolink); | |
16b9201c | 584 | dev->kobj_iolink = NULL; |
5b5c4e40 EP |
585 | } |
586 | ||
0f28cca8 RE |
587 | if (dev->kobj_p2plink) { |
588 | list_for_each_entry(p2plink, &dev->p2p_link_props, list) | |
589 | if (p2plink->kobj) { | |
590 | kfd_remove_sysfs_file(p2plink->kobj, | |
591 | &p2plink->attr); | |
592 | p2plink->kobj = NULL; | |
593 | } | |
594 | kobject_del(dev->kobj_p2plink); | |
595 | kobject_put(dev->kobj_p2plink); | |
596 | dev->kobj_p2plink = NULL; | |
597 | } | |
598 | ||
5b5c4e40 EP |
599 | if (dev->kobj_cache) { |
600 | list_for_each_entry(cache, &dev->cache_props, list) | |
601 | if (cache->kobj) { | |
602 | kfd_remove_sysfs_file(cache->kobj, | |
603 | &cache->attr); | |
16b9201c | 604 | cache->kobj = NULL; |
5b5c4e40 EP |
605 | } |
606 | kobject_del(dev->kobj_cache); | |
607 | kobject_put(dev->kobj_cache); | |
16b9201c | 608 | dev->kobj_cache = NULL; |
5b5c4e40 EP |
609 | } |
610 | ||
611 | if (dev->kobj_mem) { | |
612 | list_for_each_entry(mem, &dev->mem_props, list) | |
613 | if (mem->kobj) { | |
614 | kfd_remove_sysfs_file(mem->kobj, &mem->attr); | |
16b9201c | 615 | mem->kobj = NULL; |
5b5c4e40 EP |
616 | } |
617 | kobject_del(dev->kobj_mem); | |
618 | kobject_put(dev->kobj_mem); | |
16b9201c | 619 | dev->kobj_mem = NULL; |
5b5c4e40 EP |
620 | } |
621 | ||
f4757347 AL |
622 | if (dev->kobj_perf) { |
623 | list_for_each_entry(perf, &dev->perf_props, list) { | |
624 | kfree(perf->attr_group); | |
625 | perf->attr_group = NULL; | |
626 | } | |
627 | kobject_del(dev->kobj_perf); | |
628 | kobject_put(dev->kobj_perf); | |
629 | dev->kobj_perf = NULL; | |
630 | } | |
631 | ||
5b5c4e40 EP |
632 | if (dev->kobj_node) { |
633 | sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); | |
634 | sysfs_remove_file(dev->kobj_node, &dev->attr_name); | |
635 | sysfs_remove_file(dev->kobj_node, &dev->attr_props); | |
636 | kobject_del(dev->kobj_node); | |
637 | kobject_put(dev->kobj_node); | |
16b9201c | 638 | dev->kobj_node = NULL; |
5b5c4e40 EP |
639 | } |
640 | } | |
641 | ||
642 | static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, | |
643 | uint32_t id) | |
644 | { | |
0f28cca8 | 645 | struct kfd_iolink_properties *p2plink; |
5b5c4e40 EP |
646 | struct kfd_iolink_properties *iolink; |
647 | struct kfd_cache_properties *cache; | |
648 | struct kfd_mem_properties *mem; | |
f4757347 | 649 | struct kfd_perf_properties *perf; |
5b5c4e40 | 650 | int ret; |
f4757347 AL |
651 | uint32_t i, num_attrs; |
652 | struct attribute **attrs; | |
5b5c4e40 | 653 | |
32fa8219 FK |
654 | if (WARN_ON(dev->kobj_node)) |
655 | return -EEXIST; | |
656 | ||
5b5c4e40 EP |
657 | /* |
658 | * Creating the sysfs folders | |
659 | */ | |
5b5c4e40 EP |
660 | dev->kobj_node = kfd_alloc_struct(dev->kobj_node); |
661 | if (!dev->kobj_node) | |
662 | return -ENOMEM; | |
663 | ||
664 | ret = kobject_init_and_add(dev->kobj_node, &node_type, | |
665 | sys_props.kobj_nodes, "%d", id); | |
20eca012 QW |
666 | if (ret < 0) { |
667 | kobject_put(dev->kobj_node); | |
5b5c4e40 | 668 | return ret; |
20eca012 | 669 | } |
5b5c4e40 EP |
670 | |
671 | dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); | |
672 | if (!dev->kobj_mem) | |
673 | return -ENOMEM; | |
674 | ||
675 | dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); | |
676 | if (!dev->kobj_cache) | |
677 | return -ENOMEM; | |
678 | ||
679 | dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); | |
680 | if (!dev->kobj_iolink) | |
681 | return -ENOMEM; | |
682 | ||
0f28cca8 RE |
683 | dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node); |
684 | if (!dev->kobj_p2plink) | |
685 | return -ENOMEM; | |
686 | ||
f4757347 AL |
687 | dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); |
688 | if (!dev->kobj_perf) | |
689 | return -ENOMEM; | |
690 | ||
5b5c4e40 EP |
691 | /* |
692 | * Creating sysfs files for node properties | |
693 | */ | |
694 | dev->attr_gpuid.name = "gpu_id"; | |
695 | dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; | |
696 | sysfs_attr_init(&dev->attr_gpuid); | |
697 | dev->attr_name.name = "name"; | |
698 | dev->attr_name.mode = KFD_SYSFS_FILE_MODE; | |
699 | sysfs_attr_init(&dev->attr_name); | |
700 | dev->attr_props.name = "properties"; | |
701 | dev->attr_props.mode = KFD_SYSFS_FILE_MODE; | |
702 | sysfs_attr_init(&dev->attr_props); | |
703 | ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); | |
704 | if (ret < 0) | |
705 | return ret; | |
706 | ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); | |
707 | if (ret < 0) | |
708 | return ret; | |
709 | ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); | |
710 | if (ret < 0) | |
711 | return ret; | |
712 | ||
713 | i = 0; | |
714 | list_for_each_entry(mem, &dev->mem_props, list) { | |
715 | mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); | |
716 | if (!mem->kobj) | |
717 | return -ENOMEM; | |
718 | ret = kobject_init_and_add(mem->kobj, &mem_type, | |
719 | dev->kobj_mem, "%d", i); | |
20eca012 QW |
720 | if (ret < 0) { |
721 | kobject_put(mem->kobj); | |
5b5c4e40 | 722 | return ret; |
20eca012 | 723 | } |
5b5c4e40 EP |
724 | |
725 | mem->attr.name = "properties"; | |
726 | mem->attr.mode = KFD_SYSFS_FILE_MODE; | |
727 | sysfs_attr_init(&mem->attr); | |
728 | ret = sysfs_create_file(mem->kobj, &mem->attr); | |
729 | if (ret < 0) | |
730 | return ret; | |
731 | i++; | |
732 | } | |
733 | ||
734 | i = 0; | |
735 | list_for_each_entry(cache, &dev->cache_props, list) { | |
736 | cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); | |
737 | if (!cache->kobj) | |
738 | return -ENOMEM; | |
739 | ret = kobject_init_and_add(cache->kobj, &cache_type, | |
740 | dev->kobj_cache, "%d", i); | |
20eca012 QW |
741 | if (ret < 0) { |
742 | kobject_put(cache->kobj); | |
5b5c4e40 | 743 | return ret; |
20eca012 | 744 | } |
5b5c4e40 EP |
745 | |
746 | cache->attr.name = "properties"; | |
747 | cache->attr.mode = KFD_SYSFS_FILE_MODE; | |
748 | sysfs_attr_init(&cache->attr); | |
749 | ret = sysfs_create_file(cache->kobj, &cache->attr); | |
750 | if (ret < 0) | |
751 | return ret; | |
752 | i++; | |
753 | } | |
754 | ||
755 | i = 0; | |
756 | list_for_each_entry(iolink, &dev->io_link_props, list) { | |
757 | iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); | |
758 | if (!iolink->kobj) | |
759 | return -ENOMEM; | |
760 | ret = kobject_init_and_add(iolink->kobj, &iolink_type, | |
761 | dev->kobj_iolink, "%d", i); | |
20eca012 QW |
762 | if (ret < 0) { |
763 | kobject_put(iolink->kobj); | |
5b5c4e40 | 764 | return ret; |
20eca012 | 765 | } |
5b5c4e40 EP |
766 | |
767 | iolink->attr.name = "properties"; | |
768 | iolink->attr.mode = KFD_SYSFS_FILE_MODE; | |
769 | sysfs_attr_init(&iolink->attr); | |
770 | ret = sysfs_create_file(iolink->kobj, &iolink->attr); | |
771 | if (ret < 0) | |
772 | return ret; | |
773 | i++; | |
f4757347 AL |
774 | } |
775 | ||
0f28cca8 RE |
776 | i = 0; |
777 | list_for_each_entry(p2plink, &dev->p2p_link_props, list) { | |
778 | p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); | |
779 | if (!p2plink->kobj) | |
780 | return -ENOMEM; | |
781 | ret = kobject_init_and_add(p2plink->kobj, &iolink_type, | |
782 | dev->kobj_p2plink, "%d", i); | |
783 | if (ret < 0) { | |
784 | kobject_put(p2plink->kobj); | |
785 | return ret; | |
786 | } | |
787 | ||
788 | p2plink->attr.name = "properties"; | |
789 | p2plink->attr.mode = KFD_SYSFS_FILE_MODE; | |
1f9d1ff1 | 790 | sysfs_attr_init(&p2plink->attr); |
0f28cca8 RE |
791 | ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); |
792 | if (ret < 0) | |
793 | return ret; | |
794 | i++; | |
795 | } | |
796 | ||
f4757347 | 797 | /* All hardware blocks have the same number of attributes. */ |
3f866f5f | 798 | num_attrs = ARRAY_SIZE(perf_attr_iommu); |
f4757347 AL |
799 | list_for_each_entry(perf, &dev->perf_props, list) { |
800 | perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) | |
801 | * num_attrs + sizeof(struct attribute_group), | |
802 | GFP_KERNEL); | |
803 | if (!perf->attr_group) | |
804 | return -ENOMEM; | |
805 | ||
806 | attrs = (struct attribute **)(perf->attr_group + 1); | |
807 | if (!strcmp(perf->block_name, "iommu")) { | |
808 | /* Information of IOMMU's num_counters and counter_ids is shown | |
809 | * under /sys/bus/event_source/devices/amd_iommu. We don't | |
810 | * duplicate here. | |
811 | */ | |
812 | perf_attr_iommu[0].data = perf->max_concurrent; | |
813 | for (i = 0; i < num_attrs; i++) | |
814 | attrs[i] = &perf_attr_iommu[i].attr.attr; | |
815 | } | |
816 | perf->attr_group->name = perf->block_name; | |
817 | perf->attr_group->attrs = attrs; | |
818 | ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); | |
819 | if (ret < 0) | |
820 | return ret; | |
821 | } | |
5b5c4e40 EP |
822 | |
823 | return 0; | |
824 | } | |
825 | ||
3a87177e | 826 | /* Called with write topology lock acquired */ |
5b5c4e40 EP |
827 | static int kfd_build_sysfs_node_tree(void) |
828 | { | |
829 | struct kfd_topology_device *dev; | |
830 | int ret; | |
831 | uint32_t i = 0; | |
832 | ||
833 | list_for_each_entry(dev, &topology_device_list, list) { | |
8dfead6c | 834 | ret = kfd_build_sysfs_node_entry(dev, i); |
5b5c4e40 EP |
835 | if (ret < 0) |
836 | return ret; | |
837 | i++; | |
838 | } | |
839 | ||
840 | return 0; | |
841 | } | |
842 | ||
3a87177e | 843 | /* Called with write topology lock acquired */ |
5b5c4e40 EP |
844 | static void kfd_remove_sysfs_node_tree(void) |
845 | { | |
846 | struct kfd_topology_device *dev; | |
847 | ||
848 | list_for_each_entry(dev, &topology_device_list, list) | |
849 | kfd_remove_sysfs_node_entry(dev); | |
850 | } | |
851 | ||
852 | static int kfd_topology_update_sysfs(void) | |
853 | { | |
854 | int ret; | |
855 | ||
4eacc26b | 856 | if (!sys_props.kobj_topology) { |
5b5c4e40 EP |
857 | sys_props.kobj_topology = |
858 | kfd_alloc_struct(sys_props.kobj_topology); | |
859 | if (!sys_props.kobj_topology) | |
860 | return -ENOMEM; | |
861 | ||
862 | ret = kobject_init_and_add(sys_props.kobj_topology, | |
863 | &sysprops_type, &kfd_device->kobj, | |
864 | "topology"); | |
20eca012 QW |
865 | if (ret < 0) { |
866 | kobject_put(sys_props.kobj_topology); | |
5b5c4e40 | 867 | return ret; |
20eca012 | 868 | } |
5b5c4e40 EP |
869 | |
870 | sys_props.kobj_nodes = kobject_create_and_add("nodes", | |
871 | sys_props.kobj_topology); | |
872 | if (!sys_props.kobj_nodes) | |
873 | return -ENOMEM; | |
874 | ||
875 | sys_props.attr_genid.name = "generation_id"; | |
876 | sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE; | |
877 | sysfs_attr_init(&sys_props.attr_genid); | |
878 | ret = sysfs_create_file(sys_props.kobj_topology, | |
879 | &sys_props.attr_genid); | |
880 | if (ret < 0) | |
881 | return ret; | |
882 | ||
883 | sys_props.attr_props.name = "system_properties"; | |
884 | sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE; | |
885 | sysfs_attr_init(&sys_props.attr_props); | |
886 | ret = sysfs_create_file(sys_props.kobj_topology, | |
887 | &sys_props.attr_props); | |
888 | if (ret < 0) | |
889 | return ret; | |
890 | } | |
891 | ||
892 | kfd_remove_sysfs_node_tree(); | |
893 | ||
894 | return kfd_build_sysfs_node_tree(); | |
895 | } | |
896 | ||
897 | static void kfd_topology_release_sysfs(void) | |
898 | { | |
899 | kfd_remove_sysfs_node_tree(); | |
900 | if (sys_props.kobj_topology) { | |
901 | sysfs_remove_file(sys_props.kobj_topology, | |
902 | &sys_props.attr_genid); | |
903 | sysfs_remove_file(sys_props.kobj_topology, | |
904 | &sys_props.attr_props); | |
905 | if (sys_props.kobj_nodes) { | |
906 | kobject_del(sys_props.kobj_nodes); | |
907 | kobject_put(sys_props.kobj_nodes); | |
16b9201c | 908 | sys_props.kobj_nodes = NULL; |
5b5c4e40 EP |
909 | } |
910 | kobject_del(sys_props.kobj_topology); | |
911 | kobject_put(sys_props.kobj_topology); | |
16b9201c | 912 | sys_props.kobj_topology = NULL; |
5b5c4e40 EP |
913 | } |
914 | } | |
915 | ||
4f449311 HK |
916 | /* Called with write topology_lock acquired */ |
917 | static void kfd_topology_update_device_list(struct list_head *temp_list, | |
918 | struct list_head *master_list) | |
919 | { | |
920 | while (!list_empty(temp_list)) { | |
921 | list_move_tail(temp_list->next, master_list); | |
922 | sys_props.num_devices++; | |
923 | } | |
924 | } | |
925 | ||
520b8fb7 FK |
926 | static void kfd_debug_print_topology(void) |
927 | { | |
928 | struct kfd_topology_device *dev; | |
929 | ||
930 | down_read(&topology_lock); | |
931 | ||
932 | dev = list_last_entry(&topology_device_list, | |
933 | struct kfd_topology_device, list); | |
934 | if (dev) { | |
935 | if (dev->node_props.cpu_cores_count && | |
936 | dev->node_props.simd_count) { | |
937 | pr_info("Topology: Add APU node [0x%0x:0x%0x]\n", | |
938 | dev->node_props.device_id, | |
939 | dev->node_props.vendor_id); | |
940 | } else if (dev->node_props.cpu_cores_count) | |
941 | pr_info("Topology: Add CPU node\n"); | |
942 | else if (dev->node_props.simd_count) | |
943 | pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n", | |
944 | dev->node_props.device_id, | |
945 | dev->node_props.vendor_id); | |
946 | } | |
947 | up_read(&topology_lock); | |
948 | } | |
949 | ||
950 | /* Helper function for intializing platform_xx members of | |
951 | * kfd_system_properties. Uses OEM info from the last CPU/APU node. | |
952 | */ | |
953 | static void kfd_update_system_properties(void) | |
954 | { | |
955 | struct kfd_topology_device *dev; | |
956 | ||
957 | down_read(&topology_lock); | |
958 | dev = list_last_entry(&topology_device_list, | |
959 | struct kfd_topology_device, list); | |
960 | if (dev) { | |
961 | sys_props.platform_id = | |
962 | (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; | |
963 | sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); | |
964 | sys_props.platform_rev = dev->oem_revision; | |
965 | } | |
966 | up_read(&topology_lock); | |
967 | } | |
968 | ||
969 | static void find_system_memory(const struct dmi_header *dm, | |
970 | void *private) | |
971 | { | |
972 | struct kfd_mem_properties *mem; | |
973 | u16 mem_width, mem_clock; | |
974 | struct kfd_topology_device *kdev = | |
975 | (struct kfd_topology_device *)private; | |
976 | const u8 *dmi_data = (const u8 *)(dm + 1); | |
977 | ||
978 | if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) { | |
979 | mem_width = (u16)(*(const u16 *)(dmi_data + 0x6)); | |
980 | mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11)); | |
981 | list_for_each_entry(mem, &kdev->mem_props, list) { | |
982 | if (mem_width != 0xFFFF && mem_width != 0) | |
983 | mem->width = mem_width; | |
984 | if (mem_clock != 0) | |
985 | mem->mem_clk_max = mem_clock; | |
986 | } | |
987 | } | |
988 | } | |
f4757347 | 989 | |
520b8fb7 FK |
990 | /* kfd_add_non_crat_information - Add information that is not currently |
991 | * defined in CRAT but is necessary for KFD topology | |
992 | * @dev - topology device to which addition info is added | |
993 | */ | |
994 | static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) | |
995 | { | |
996 | /* Check if CPU only node. */ | |
997 | if (!kdev->gpu) { | |
998 | /* Add system memory information */ | |
999 | dmi_walk(find_system_memory, kdev); | |
1000 | } | |
1001 | /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ | |
1002 | } | |
1003 | ||
5b5c4e40 EP |
1004 | int kfd_topology_init(void) |
1005 | { | |
16b9201c | 1006 | void *crat_image = NULL; |
5b5c4e40 EP |
1007 | size_t image_size = 0; |
1008 | int ret; | |
4f449311 | 1009 | struct list_head temp_topology_device_list; |
520b8fb7 FK |
1010 | int cpu_only_node = 0; |
1011 | struct kfd_topology_device *kdev; | |
1012 | int proximity_domain; | |
5b5c4e40 | 1013 | |
4f449311 HK |
1014 | /* topology_device_list - Master list of all topology devices |
1015 | * temp_topology_device_list - temporary list created while parsing CRAT | |
1016 | * or VCRAT. Once parsing is complete the contents of list is moved to | |
1017 | * topology_device_list | |
5b5c4e40 | 1018 | */ |
4f449311 HK |
1019 | |
1020 | /* Initialize the head for the both the lists */ | |
5b5c4e40 | 1021 | INIT_LIST_HEAD(&topology_device_list); |
4f449311 | 1022 | INIT_LIST_HEAD(&temp_topology_device_list); |
5b5c4e40 | 1023 | init_rwsem(&topology_lock); |
5b5c4e40 EP |
1024 | |
1025 | memset(&sys_props, 0, sizeof(sys_props)); | |
1026 | ||
520b8fb7 FK |
1027 | /* Proximity domains in ACPI CRAT tables start counting at |
1028 | * 0. The same should be true for virtual CRAT tables created | |
1029 | * at this stage. GPUs added later in kfd_topology_add_device | |
1030 | * use a counter. | |
1031 | */ | |
1032 | proximity_domain = 0; | |
1033 | ||
c99a2e7a AD |
1034 | ret = kfd_create_crat_image_virtual(&crat_image, &image_size, |
1035 | COMPUTE_UNIT_CPU, NULL, | |
1036 | proximity_domain); | |
1037 | cpu_only_node = 1; | |
1038 | if (ret) { | |
1039 | pr_err("Error creating VCRAT table for CPU\n"); | |
1040 | return ret; | |
520b8fb7 FK |
1041 | } |
1042 | ||
c99a2e7a AD |
1043 | ret = kfd_parse_crat_table(crat_image, |
1044 | &temp_topology_device_list, | |
1045 | proximity_domain); | |
1046 | if (ret) { | |
1047 | pr_err("Error parsing VCRAT table for CPU\n"); | |
1048 | goto err; | |
5b5c4e40 EP |
1049 | } |
1050 | ||
f4757347 AL |
1051 | kdev = list_first_entry(&temp_topology_device_list, |
1052 | struct kfd_topology_device, list); | |
f4757347 | 1053 | |
8e05247d | 1054 | down_write(&topology_lock); |
4f449311 HK |
1055 | kfd_topology_update_device_list(&temp_topology_device_list, |
1056 | &topology_device_list); | |
46d18d51 | 1057 | topology_crat_proximity_domain = sys_props.num_devices-1; |
8e05247d HK |
1058 | ret = kfd_topology_update_sysfs(); |
1059 | up_write(&topology_lock); | |
1060 | ||
4f449311 HK |
1061 | if (!ret) { |
1062 | sys_props.generation_count++; | |
520b8fb7 FK |
1063 | kfd_update_system_properties(); |
1064 | kfd_debug_print_topology(); | |
4f449311 | 1065 | } else |
8e05247d HK |
1066 | pr_err("Failed to update topology in sysfs ret=%d\n", ret); |
1067 | ||
520b8fb7 FK |
1068 | /* For nodes with GPU, this information gets added |
1069 | * when GPU is detected (kfd_topology_add_device). | |
1070 | */ | |
1071 | if (cpu_only_node) { | |
1072 | /* Add additional information to CPU only node created above */ | |
1073 | down_write(&topology_lock); | |
1074 | kdev = list_first_entry(&topology_device_list, | |
1075 | struct kfd_topology_device, list); | |
1076 | up_write(&topology_lock); | |
1077 | kfd_add_non_crat_information(kdev); | |
1078 | } | |
1079 | ||
5b5c4e40 | 1080 | err: |
8e05247d | 1081 | kfd_destroy_crat_image(crat_image); |
5b5c4e40 EP |
1082 | return ret; |
1083 | } | |
1084 | ||
1085 | void kfd_topology_shutdown(void) | |
1086 | { | |
4f449311 | 1087 | down_write(&topology_lock); |
5b5c4e40 EP |
1088 | kfd_topology_release_sysfs(); |
1089 | kfd_release_live_view(); | |
4f449311 | 1090 | up_write(&topology_lock); |
5b5c4e40 EP |
1091 | } |
1092 | ||
8dc1db31 | 1093 | static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) |
5b5c4e40 EP |
1094 | { |
1095 | uint32_t hashout; | |
74c5b85d | 1096 | uint32_t buf[8]; |
585f0e6c | 1097 | uint64_t local_mem_size; |
5b5c4e40 EP |
1098 | int i; |
1099 | ||
1100 | if (!gpu) | |
1101 | return 0; | |
1102 | ||
315e29ec MJ |
1103 | local_mem_size = gpu->local_mem_info.local_mem_size_private + |
1104 | gpu->local_mem_info.local_mem_size_public; | |
d69a3b76 MJ |
1105 | buf[0] = gpu->adev->pdev->devfn; |
1106 | buf[1] = gpu->adev->pdev->subsystem_vendor | | |
1107 | (gpu->adev->pdev->subsystem_device << 16); | |
1108 | buf[2] = pci_domain_nr(gpu->adev->pdev->bus); | |
1109 | buf[3] = gpu->adev->pdev->device; | |
1110 | buf[4] = gpu->adev->pdev->bus->number; | |
585f0e6c EC |
1111 | buf[5] = lower_32_bits(local_mem_size); |
1112 | buf[6] = upper_32_bits(local_mem_size); | |
c4050ff1 | 1113 | buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); |
5b5c4e40 | 1114 | |
74c5b85d | 1115 | for (i = 0, hashout = 0; i < 8; i++) |
5b5c4e40 EP |
1116 | hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); |
1117 | ||
1118 | return hashout; | |
1119 | } | |
3a87177e HK |
1120 | /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If |
1121 | * the GPU device is not already present in the topology device | |
1122 | * list then return NULL. This means a new topology device has to | |
1123 | * be created for this GPU. | |
3a87177e | 1124 | */ |
8dc1db31 | 1125 | static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu) |
5b5c4e40 EP |
1126 | { |
1127 | struct kfd_topology_device *dev; | |
16b9201c | 1128 | struct kfd_topology_device *out_dev = NULL; |
171bc67e HK |
1129 | struct kfd_mem_properties *mem; |
1130 | struct kfd_cache_properties *cache; | |
1131 | struct kfd_iolink_properties *iolink; | |
0f28cca8 | 1132 | struct kfd_iolink_properties *p2plink; |
5b5c4e40 | 1133 | |
b8fe0524 FK |
1134 | list_for_each_entry(dev, &topology_device_list, list) { |
1135 | /* Discrete GPUs need their own topology device list | |
1136 | * entries. Don't assign them to CPU/APU nodes. | |
1137 | */ | |
c99a2e7a | 1138 | if (dev->node_props.cpu_cores_count) |
b8fe0524 FK |
1139 | continue; |
1140 | ||
4eacc26b | 1141 | if (!dev->gpu && (dev->node_props.simd_count > 0)) { |
5b5c4e40 EP |
1142 | dev->gpu = gpu; |
1143 | out_dev = dev; | |
171bc67e HK |
1144 | |
1145 | list_for_each_entry(mem, &dev->mem_props, list) | |
1146 | mem->gpu = dev->gpu; | |
1147 | list_for_each_entry(cache, &dev->cache_props, list) | |
1148 | cache->gpu = dev->gpu; | |
1149 | list_for_each_entry(iolink, &dev->io_link_props, list) | |
1150 | iolink->gpu = dev->gpu; | |
0f28cca8 RE |
1151 | list_for_each_entry(p2plink, &dev->p2p_link_props, list) |
1152 | p2plink->gpu = dev->gpu; | |
5b5c4e40 EP |
1153 | break; |
1154 | } | |
b8fe0524 | 1155 | } |
5b5c4e40 EP |
1156 | return out_dev; |
1157 | } | |
1158 | ||
1159 | static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival) | |
1160 | { | |
1161 | /* | |
1162 | * TODO: Generate an event for thunk about the arrival/removal | |
1163 | * of the GPU | |
1164 | */ | |
1165 | } | |
1166 | ||
3a87177e HK |
1167 | /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info, |
1168 | * patch this after CRAT parsing. | |
1169 | */ | |
1170 | static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) | |
1171 | { | |
1172 | struct kfd_mem_properties *mem; | |
1173 | struct kfd_local_mem_info local_mem_info; | |
1174 | ||
1175 | if (!dev) | |
1176 | return; | |
1177 | ||
1178 | /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with | |
1179 | * single bank of VRAM local memory. | |
1180 | * for dGPUs - VCRAT reports only one bank of Local Memory | |
1181 | * for APUs - If CRAT from ACPI reports more than one bank, then | |
1182 | * all the banks will report the same mem_clk_max information | |
1183 | */ | |
315e29ec | 1184 | amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, |
9a3ce1a7 | 1185 | dev->gpu->xcp); |
3a87177e HK |
1186 | |
1187 | list_for_each_entry(mem, &dev->mem_props, list) | |
1188 | mem->mem_clk_max = local_mem_info.mem_clk_max; | |
1189 | } | |
1190 | ||
bdd24657 JK |
1191 | static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, |
1192 | struct kfd_topology_device *target_gpu_dev, | |
1193 | struct kfd_iolink_properties *link) | |
3a87177e | 1194 | { |
bdd24657 JK |
1195 | /* xgmi always supports atomics between links. */ |
1196 | if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) | |
3a87177e HK |
1197 | return; |
1198 | ||
bdd24657 JK |
1199 | /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */ |
1200 | if (target_gpu_dev) { | |
1201 | uint32_t cap; | |
1202 | ||
d69a3b76 | 1203 | pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev, |
deb68983 | 1204 | PCI_EXP_DEVCAP2, &cap); |
d35f00d8 | 1205 | |
deb68983 JK |
1206 | if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | |
1207 | PCI_EXP_DEVCAP2_ATOMIC_COMP64))) | |
bdd24657 | 1208 | link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | |
deb68983 | 1209 | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; |
bdd24657 JK |
1210 | /* set gpu (dev) flags. */ |
1211 | } else { | |
8dc1db31 | 1212 | if (!dev->gpu->kfd->pci_atomic_requested || |
7eb0502a | 1213 | dev->gpu->adev->asic_type == CHIP_HAWAII) |
bdd24657 | 1214 | link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | |
deb68983 JK |
1215 | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; |
1216 | } | |
bdd24657 JK |
1217 | } |
1218 | ||
c9cfbf7f EH |
1219 | static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, |
1220 | struct kfd_iolink_properties *outbound_link, | |
1221 | struct kfd_iolink_properties *inbound_link) | |
1222 | { | |
1223 | /* CPU -> GPU with PCIe */ | |
1224 | if (!to_dev->gpu && | |
1225 | inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) | |
1226 | inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; | |
1227 | ||
1228 | if (to_dev->gpu) { | |
1229 | /* GPU <-> GPU with PCIe and | |
1230 | * Vega20 with XGMI | |
1231 | */ | |
1232 | if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || | |
1233 | (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && | |
046e674b | 1234 | KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { |
c9cfbf7f EH |
1235 | outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; |
1236 | inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; | |
1237 | } | |
1238 | } | |
1239 | } | |
1240 | ||
bdd24657 JK |
1241 | static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) |
1242 | { | |
1243 | struct kfd_iolink_properties *link, *inbound_link; | |
1244 | struct kfd_topology_device *peer_dev; | |
1245 | ||
1246 | if (!dev || !dev->gpu) | |
1247 | return; | |
d35f00d8 EH |
1248 | |
1249 | /* GPU only creates direct links so apply flags setting to all */ | |
1250 | list_for_each_entry(link, &dev->io_link_props, list) { | |
bdd24657 JK |
1251 | link->flags = CRAT_IOLINK_FLAGS_ENABLED; |
1252 | kfd_set_iolink_no_atomics(dev, NULL, link); | |
1253 | peer_dev = kfd_topology_device_by_proximity_domain( | |
d35f00d8 | 1254 | link->node_to); |
bdd24657 JK |
1255 | |
1256 | if (!peer_dev) | |
1257 | continue; | |
1258 | ||
a0af5dbd | 1259 | /* Include the CPU peer in GPU hive if connected over xGMI. */ |
b2ef2fdf | 1260 | if (!peer_dev->gpu && |
1698e200 JK |
1261 | link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { |
1262 | /* | |
1263 | * If the GPU is not part of a GPU hive, use its pci | |
1264 | * device location as the hive ID to bind with the CPU. | |
1265 | */ | |
1266 | if (!dev->node_props.hive_id) | |
1267 | dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev); | |
a0af5dbd | 1268 | peer_dev->node_props.hive_id = dev->node_props.hive_id; |
1698e200 | 1269 | } |
a0af5dbd | 1270 | |
bdd24657 JK |
1271 | list_for_each_entry(inbound_link, &peer_dev->io_link_props, |
1272 | list) { | |
1273 | if (inbound_link->node_to != link->node_from) | |
1274 | continue; | |
1275 | ||
1276 | inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; | |
1277 | kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); | |
c9cfbf7f | 1278 | kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); |
d35f00d8 EH |
1279 | } |
1280 | } | |
0f28cca8 RE |
1281 | |
1282 | /* Create indirect links so apply flags setting to all */ | |
1283 | list_for_each_entry(link, &dev->p2p_link_props, list) { | |
1284 | link->flags = CRAT_IOLINK_FLAGS_ENABLED; | |
1285 | kfd_set_iolink_no_atomics(dev, NULL, link); | |
1286 | peer_dev = kfd_topology_device_by_proximity_domain( | |
1287 | link->node_to); | |
1288 | ||
1289 | if (!peer_dev) | |
1290 | continue; | |
1291 | ||
1292 | list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, | |
1293 | list) { | |
1294 | if (inbound_link->node_to != link->node_from) | |
1295 | continue; | |
1296 | ||
1297 | inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; | |
1298 | kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); | |
1299 | kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); | |
1300 | } | |
1301 | } | |
1302 | } | |
1303 | ||
1304 | static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev, | |
1305 | struct kfd_iolink_properties *p2plink) | |
1306 | { | |
1307 | int ret; | |
1308 | ||
1309 | p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); | |
1310 | if (!p2plink->kobj) | |
1311 | return -ENOMEM; | |
1312 | ||
1313 | ret = kobject_init_and_add(p2plink->kobj, &iolink_type, | |
1314 | dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1); | |
1315 | if (ret < 0) { | |
1316 | kobject_put(p2plink->kobj); | |
1317 | return ret; | |
1318 | } | |
1319 | ||
1320 | p2plink->attr.name = "properties"; | |
1321 | p2plink->attr.mode = KFD_SYSFS_FILE_MODE; | |
1322 | sysfs_attr_init(&p2plink->attr); | |
1323 | ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); | |
1324 | if (ret < 0) | |
1325 | return ret; | |
1326 | ||
1327 | return 0; | |
1328 | } | |
1329 | ||
1330 | static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node) | |
1331 | { | |
7d50b92d | 1332 | struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link; |
0f28cca8 | 1333 | struct kfd_iolink_properties *props = NULL, *props2 = NULL; |
0f28cca8 RE |
1334 | struct kfd_topology_device *cpu_dev; |
1335 | int ret = 0; | |
1336 | int i, num_cpu; | |
1337 | ||
1338 | num_cpu = 0; | |
1339 | list_for_each_entry(cpu_dev, &topology_device_list, list) { | |
1340 | if (cpu_dev->gpu) | |
1341 | break; | |
1342 | num_cpu++; | |
1343 | } | |
1344 | ||
1345 | gpu_link = list_first_entry(&kdev->io_link_props, | |
1346 | struct kfd_iolink_properties, list); | |
1347 | if (!gpu_link) | |
1348 | return -ENOMEM; | |
1349 | ||
1350 | for (i = 0; i < num_cpu; i++) { | |
1351 | /* CPU <--> GPU */ | |
1352 | if (gpu_link->node_to == i) | |
1353 | continue; | |
1354 | ||
1355 | /* find CPU <--> CPU links */ | |
7d50b92d | 1356 | cpu_link = NULL; |
0f28cca8 RE |
1357 | cpu_dev = kfd_topology_device_by_proximity_domain(i); |
1358 | if (cpu_dev) { | |
7d50b92d | 1359 | list_for_each_entry(tmp_link, |
0f28cca8 | 1360 | &cpu_dev->io_link_props, list) { |
7d50b92d DC |
1361 | if (tmp_link->node_to == gpu_link->node_to) { |
1362 | cpu_link = tmp_link; | |
0f28cca8 | 1363 | break; |
7d50b92d | 1364 | } |
0f28cca8 RE |
1365 | } |
1366 | } | |
1367 | ||
7d50b92d | 1368 | if (!cpu_link) |
0f28cca8 RE |
1369 | return -ENOMEM; |
1370 | ||
1371 | /* CPU <--> CPU <--> GPU, GPU node*/ | |
1372 | props = kfd_alloc_struct(props); | |
1373 | if (!props) | |
1374 | return -ENOMEM; | |
1375 | ||
1376 | memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties)); | |
1377 | props->weight = gpu_link->weight + cpu_link->weight; | |
1378 | props->min_latency = gpu_link->min_latency + cpu_link->min_latency; | |
1379 | props->max_latency = gpu_link->max_latency + cpu_link->max_latency; | |
1380 | props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); | |
1381 | props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); | |
1382 | ||
1383 | props->node_from = gpu_node; | |
1384 | props->node_to = i; | |
1385 | kdev->node_props.p2p_links_count++; | |
1386 | list_add_tail(&props->list, &kdev->p2p_link_props); | |
1387 | ret = kfd_build_p2p_node_entry(kdev, props); | |
1388 | if (ret < 0) | |
1389 | return ret; | |
1390 | ||
1391 | /* for small Bar, no CPU --> GPU in-direct links */ | |
1392 | if (kfd_dev_is_large_bar(kdev->gpu)) { | |
1393 | /* CPU <--> CPU <--> GPU, CPU node*/ | |
1394 | props2 = kfd_alloc_struct(props2); | |
1395 | if (!props2) | |
1396 | return -ENOMEM; | |
1397 | ||
1398 | memcpy(props2, props, sizeof(struct kfd_iolink_properties)); | |
1399 | props2->node_from = i; | |
1400 | props2->node_to = gpu_node; | |
1401 | props2->kobj = NULL; | |
1402 | cpu_dev->node_props.p2p_links_count++; | |
1403 | list_add_tail(&props2->list, &cpu_dev->p2p_link_props); | |
1404 | ret = kfd_build_p2p_node_entry(cpu_dev, props2); | |
1405 | if (ret < 0) | |
1406 | return ret; | |
1407 | } | |
1408 | } | |
1409 | return ret; | |
1410 | } | |
1411 | ||
1412 | #if defined(CONFIG_HSA_AMD_P2P) | |
1413 | static int kfd_add_peer_prop(struct kfd_topology_device *kdev, | |
1414 | struct kfd_topology_device *peer, int from, int to) | |
1415 | { | |
1416 | struct kfd_iolink_properties *props = NULL; | |
1417 | struct kfd_iolink_properties *iolink1, *iolink2, *iolink3; | |
1418 | struct kfd_topology_device *cpu_dev; | |
1419 | int ret = 0; | |
1420 | ||
1421 | if (!amdgpu_device_is_peer_accessible( | |
1422 | kdev->gpu->adev, | |
1423 | peer->gpu->adev)) | |
1424 | return ret; | |
1425 | ||
1426 | iolink1 = list_first_entry(&kdev->io_link_props, | |
1427 | struct kfd_iolink_properties, list); | |
1428 | if (!iolink1) | |
1429 | return -ENOMEM; | |
1430 | ||
1431 | iolink2 = list_first_entry(&peer->io_link_props, | |
1432 | struct kfd_iolink_properties, list); | |
1433 | if (!iolink2) | |
1434 | return -ENOMEM; | |
1435 | ||
1436 | props = kfd_alloc_struct(props); | |
1437 | if (!props) | |
1438 | return -ENOMEM; | |
1439 | ||
1440 | memcpy(props, iolink1, sizeof(struct kfd_iolink_properties)); | |
1441 | ||
1442 | props->weight = iolink1->weight + iolink2->weight; | |
1443 | props->min_latency = iolink1->min_latency + iolink2->min_latency; | |
1444 | props->max_latency = iolink1->max_latency + iolink2->max_latency; | |
1445 | props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); | |
1446 | props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); | |
1447 | ||
1448 | if (iolink1->node_to != iolink2->node_to) { | |
1449 | /* CPU->CPU link*/ | |
1450 | cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); | |
1451 | if (cpu_dev) { | |
1452 | list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) | |
1453 | if (iolink3->node_to == iolink2->node_to) | |
1454 | break; | |
1455 | ||
1456 | props->weight += iolink3->weight; | |
1457 | props->min_latency += iolink3->min_latency; | |
1458 | props->max_latency += iolink3->max_latency; | |
1459 | props->min_bandwidth = min(props->min_bandwidth, | |
1460 | iolink3->min_bandwidth); | |
1461 | props->max_bandwidth = min(props->max_bandwidth, | |
1462 | iolink3->max_bandwidth); | |
1463 | } else { | |
1464 | WARN(1, "CPU node not found"); | |
1465 | } | |
1466 | } | |
1467 | ||
1468 | props->node_from = from; | |
1469 | props->node_to = to; | |
1470 | peer->node_props.p2p_links_count++; | |
1471 | list_add_tail(&props->list, &peer->p2p_link_props); | |
1472 | ret = kfd_build_p2p_node_entry(peer, props); | |
1473 | ||
1474 | return ret; | |
1475 | } | |
1476 | #endif | |
1477 | ||
1478 | static int kfd_dev_create_p2p_links(void) | |
1479 | { | |
1480 | struct kfd_topology_device *dev; | |
1481 | struct kfd_topology_device *new_dev; | |
914da384 AD |
1482 | #if defined(CONFIG_HSA_AMD_P2P) |
1483 | uint32_t i; | |
1484 | #endif | |
1485 | uint32_t k; | |
0f28cca8 RE |
1486 | int ret = 0; |
1487 | ||
1488 | k = 0; | |
1489 | list_for_each_entry(dev, &topology_device_list, list) | |
1490 | k++; | |
1491 | if (k < 2) | |
1492 | return 0; | |
1493 | ||
1494 | new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list); | |
1495 | if (WARN_ON(!new_dev->gpu)) | |
1496 | return 0; | |
1497 | ||
1498 | k--; | |
0f28cca8 RE |
1499 | |
1500 | /* create in-direct links */ | |
1501 | ret = kfd_create_indirect_link_prop(new_dev, k); | |
1502 | if (ret < 0) | |
1503 | goto out; | |
1504 | ||
1505 | /* create p2p links */ | |
1506 | #if defined(CONFIG_HSA_AMD_P2P) | |
914da384 | 1507 | i = 0; |
0f28cca8 RE |
1508 | list_for_each_entry(dev, &topology_device_list, list) { |
1509 | if (dev == new_dev) | |
1510 | break; | |
1511 | if (!dev->gpu || !dev->gpu->adev || | |
8dc1db31 MJ |
1512 | (dev->gpu->kfd->hive_id && |
1513 | dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) | |
0f28cca8 RE |
1514 | goto next; |
1515 | ||
1516 | /* check if node(s) is/are peer accessible in one direction or bi-direction */ | |
1517 | ret = kfd_add_peer_prop(new_dev, dev, i, k); | |
1518 | if (ret < 0) | |
1519 | goto out; | |
1520 | ||
1521 | ret = kfd_add_peer_prop(dev, new_dev, k, i); | |
1522 | if (ret < 0) | |
1523 | goto out; | |
1524 | next: | |
1525 | i++; | |
1526 | } | |
1527 | #endif | |
1528 | ||
1529 | out: | |
1530 | return ret; | |
3a87177e HK |
1531 | } |
1532 | ||
c0cc999f MJ |
1533 | /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ |
1534 | static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, | |
1535 | struct kfd_gpu_cache_info *pcache_info, | |
1536 | struct kfd_cu_info *cu_info, | |
1537 | int cu_bitmask, | |
1538 | int cache_type, unsigned int cu_processor_id, | |
1539 | int cu_block) | |
1540 | { | |
1541 | unsigned int cu_sibling_map_mask; | |
1542 | int first_active_cu; | |
1543 | struct kfd_cache_properties *pcache = NULL; | |
1544 | ||
1545 | cu_sibling_map_mask = cu_bitmask; | |
1546 | cu_sibling_map_mask >>= cu_block; | |
1547 | cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); | |
1548 | first_active_cu = ffs(cu_sibling_map_mask); | |
1549 | ||
1550 | /* CU could be inactive. In case of shared cache find the first active | |
1551 | * CU. and incase of non-shared cache check if the CU is inactive. If | |
1552 | * inactive active skip it | |
1553 | */ | |
1554 | if (first_active_cu) { | |
1555 | pcache = kfd_alloc_struct(pcache); | |
1556 | if (!pcache) | |
1557 | return -ENOMEM; | |
1558 | ||
1559 | memset(pcache, 0, sizeof(struct kfd_cache_properties)); | |
1560 | pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); | |
1561 | pcache->cache_level = pcache_info[cache_type].cache_level; | |
1562 | pcache->cache_size = pcache_info[cache_type].cache_size; | |
1563 | ||
1564 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) | |
1565 | pcache->cache_type |= HSA_CACHE_TYPE_DATA; | |
1566 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) | |
1567 | pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; | |
1568 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) | |
1569 | pcache->cache_type |= HSA_CACHE_TYPE_CPU; | |
1570 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) | |
1571 | pcache->cache_type |= HSA_CACHE_TYPE_HSACU; | |
1572 | ||
1573 | /* Sibling map is w.r.t processor_id_low, so shift out | |
1574 | * inactive CU | |
1575 | */ | |
1576 | cu_sibling_map_mask = | |
1577 | cu_sibling_map_mask >> (first_active_cu - 1); | |
1578 | ||
1579 | pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); | |
1580 | pcache->sibling_map[1] = | |
1581 | (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); | |
1582 | pcache->sibling_map[2] = | |
1583 | (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); | |
1584 | pcache->sibling_map[3] = | |
1585 | (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); | |
1586 | ||
1587 | pcache->sibling_map_size = 4; | |
1588 | *props_ext = pcache; | |
1589 | ||
1590 | return 0; | |
1591 | } | |
1592 | return 1; | |
1593 | } | |
1594 | ||
1595 | /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ | |
1596 | static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, | |
1597 | struct kfd_gpu_cache_info *pcache_info, | |
1598 | struct kfd_cu_info *cu_info, | |
0752e66e MJ |
1599 | int cache_type, unsigned int cu_processor_id, |
1600 | struct kfd_node *knode) | |
c0cc999f MJ |
1601 | { |
1602 | unsigned int cu_sibling_map_mask; | |
1603 | int first_active_cu; | |
0752e66e | 1604 | int i, j, k, xcc, start, end; |
c0cc999f MJ |
1605 | struct kfd_cache_properties *pcache = NULL; |
1606 | ||
0752e66e MJ |
1607 | start = ffs(knode->xcc_mask) - 1; |
1608 | end = start + NUM_XCC(knode->xcc_mask); | |
1609 | cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0]; | |
c0cc999f MJ |
1610 | cu_sibling_map_mask &= |
1611 | ((1 << pcache_info[cache_type].num_cu_shared) - 1); | |
1612 | first_active_cu = ffs(cu_sibling_map_mask); | |
1613 | ||
1614 | /* CU could be inactive. In case of shared cache find the first active | |
1615 | * CU. and incase of non-shared cache check if the CU is inactive. If | |
1616 | * inactive active skip it | |
1617 | */ | |
1618 | if (first_active_cu) { | |
1619 | pcache = kfd_alloc_struct(pcache); | |
1620 | if (!pcache) | |
1621 | return -ENOMEM; | |
1622 | ||
1623 | memset(pcache, 0, sizeof(struct kfd_cache_properties)); | |
1624 | pcache->processor_id_low = cu_processor_id | |
1625 | + (first_active_cu - 1); | |
1626 | pcache->cache_level = pcache_info[cache_type].cache_level; | |
1627 | pcache->cache_size = pcache_info[cache_type].cache_size; | |
1628 | ||
1629 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) | |
1630 | pcache->cache_type |= HSA_CACHE_TYPE_DATA; | |
1631 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) | |
1632 | pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; | |
1633 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) | |
1634 | pcache->cache_type |= HSA_CACHE_TYPE_CPU; | |
1635 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) | |
1636 | pcache->cache_type |= HSA_CACHE_TYPE_HSACU; | |
1637 | ||
1638 | /* Sibling map is w.r.t processor_id_low, so shift out | |
1639 | * inactive CU | |
1640 | */ | |
1641 | cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); | |
1642 | k = 0; | |
1643 | ||
0752e66e MJ |
1644 | for (xcc = start; xcc < end; xcc++) { |
1645 | for (i = 0; i < cu_info->num_shader_engines; i++) { | |
1646 | for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { | |
1647 | pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); | |
1648 | pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); | |
1649 | pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); | |
1650 | pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); | |
1651 | k += 4; | |
1652 | ||
1653 | cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4]; | |
1654 | cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); | |
1655 | } | |
c0cc999f MJ |
1656 | } |
1657 | } | |
1658 | pcache->sibling_map_size = k; | |
1659 | *props_ext = pcache; | |
1660 | return 0; | |
1661 | } | |
1662 | return 1; | |
1663 | } | |
1664 | ||
1665 | #define KFD_MAX_CACHE_TYPES 6 | |
1666 | ||
1667 | /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info | |
1668 | * tables | |
1669 | */ | |
8dc1db31 | 1670 | static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev) |
c0cc999f MJ |
1671 | { |
1672 | struct kfd_gpu_cache_info *pcache_info = NULL; | |
0752e66e | 1673 | int i, j, k, xcc, start, end; |
c0cc999f MJ |
1674 | int ct = 0; |
1675 | unsigned int cu_processor_id; | |
1676 | int ret; | |
1677 | unsigned int num_cu_shared; | |
1678 | struct kfd_cu_info cu_info; | |
1679 | struct kfd_cu_info *pcu_info; | |
1680 | int gpu_processor_id; | |
1681 | struct kfd_cache_properties *props_ext; | |
1682 | int num_of_entries = 0; | |
1683 | int num_of_cache_types = 0; | |
1684 | struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; | |
1685 | ||
1686 | amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); | |
1687 | pcu_info = &cu_info; | |
1688 | ||
1689 | gpu_processor_id = dev->node_props.simd_id_base; | |
1690 | ||
1691 | pcache_info = cache_info; | |
1692 | num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info); | |
1693 | if (!num_of_cache_types) { | |
1694 | pr_warn("no cache info found\n"); | |
1695 | return; | |
1696 | } | |
1697 | ||
1698 | /* For each type of cache listed in the kfd_gpu_cache_info table, | |
1699 | * go through all available Compute Units. | |
1700 | * The [i,j,k] loop will | |
1701 | * if kfd_gpu_cache_info.num_cu_shared = 1 | |
1702 | * will parse through all available CU | |
1703 | * If (kfd_gpu_cache_info.num_cu_shared != 1) | |
1704 | * then it will consider only one CU from | |
1705 | * the shared unit | |
1706 | */ | |
0752e66e MJ |
1707 | start = ffs(kdev->xcc_mask) - 1; |
1708 | end = start + NUM_XCC(kdev->xcc_mask); | |
1709 | ||
c0cc999f MJ |
1710 | for (ct = 0; ct < num_of_cache_types; ct++) { |
1711 | cu_processor_id = gpu_processor_id; | |
1712 | if (pcache_info[ct].cache_level == 1) { | |
0752e66e MJ |
1713 | for (xcc = start; xcc < end; xcc++) { |
1714 | for (i = 0; i < pcu_info->num_shader_engines; i++) { | |
1715 | for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { | |
1716 | for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { | |
1717 | ||
1718 | ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, | |
1719 | pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct, | |
1720 | cu_processor_id, k); | |
1721 | ||
1722 | if (ret < 0) | |
1723 | break; | |
1724 | ||
1725 | if (!ret) { | |
1726 | num_of_entries++; | |
1727 | list_add_tail(&props_ext->list, &dev->cache_props); | |
1728 | } | |
1729 | ||
1730 | /* Move to next CU block */ | |
1731 | num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= | |
1732 | pcu_info->num_cu_per_sh) ? | |
1733 | pcache_info[ct].num_cu_shared : | |
1734 | (pcu_info->num_cu_per_sh - k); | |
1735 | cu_processor_id += num_cu_shared; | |
c0cc999f | 1736 | } |
c0cc999f MJ |
1737 | } |
1738 | } | |
1739 | } | |
1740 | } else { | |
1741 | ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, | |
0752e66e | 1742 | pcu_info, ct, cu_processor_id, kdev); |
c0cc999f MJ |
1743 | |
1744 | if (ret < 0) | |
1745 | break; | |
1746 | ||
1747 | if (!ret) { | |
1748 | num_of_entries++; | |
1749 | list_add_tail(&props_ext->list, &dev->cache_props); | |
1750 | } | |
1751 | } | |
1752 | } | |
1753 | dev->node_props.caches_count += num_of_entries; | |
1754 | pr_debug("Added [%d] GPU cache entries\n", num_of_entries); | |
1755 | } | |
1756 | ||
8dc1db31 | 1757 | static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id, |
f701acb6 FK |
1758 | struct kfd_topology_device **dev) |
1759 | { | |
1760 | int proximity_domain = ++topology_crat_proximity_domain; | |
1761 | struct list_head temp_topology_device_list; | |
1762 | void *crat_image = NULL; | |
1763 | size_t image_size = 0; | |
1764 | int res; | |
1765 | ||
1766 | res = kfd_create_crat_image_virtual(&crat_image, &image_size, | |
1767 | COMPUTE_UNIT_GPU, gpu, | |
1768 | proximity_domain); | |
1769 | if (res) { | |
1770 | pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", | |
1771 | gpu_id); | |
1772 | topology_crat_proximity_domain--; | |
1773 | goto err; | |
1774 | } | |
1775 | ||
1776 | INIT_LIST_HEAD(&temp_topology_device_list); | |
1777 | ||
1778 | res = kfd_parse_crat_table(crat_image, | |
1779 | &temp_topology_device_list, | |
1780 | proximity_domain); | |
1781 | if (res) { | |
1782 | pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", | |
1783 | gpu_id); | |
1784 | topology_crat_proximity_domain--; | |
1785 | goto err; | |
1786 | } | |
1787 | ||
1788 | kfd_topology_update_device_list(&temp_topology_device_list, | |
1789 | &topology_device_list); | |
1790 | ||
1791 | *dev = kfd_assign_gpu(gpu); | |
1792 | if (WARN_ON(!*dev)) { | |
1793 | res = -ENODEV; | |
1794 | goto err; | |
1795 | } | |
1796 | ||
1797 | /* Fill the cache affinity information here for the GPUs | |
1798 | * using VCRAT | |
1799 | */ | |
1800 | kfd_fill_cache_non_crat_info(*dev, gpu); | |
1801 | ||
1802 | /* Update the SYSFS tree, since we added another topology | |
1803 | * device | |
1804 | */ | |
1805 | res = kfd_topology_update_sysfs(); | |
1806 | if (!res) | |
1807 | sys_props.generation_count++; | |
1808 | else | |
1809 | pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", | |
1810 | gpu_id, res); | |
1811 | ||
1812 | err: | |
1813 | kfd_destroy_crat_image(crat_image); | |
1814 | return res; | |
1815 | } | |
1816 | ||
d230f1bf JK |
1817 | static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *dev) |
1818 | { | |
1819 | bool firmware_supported = true; | |
1820 | ||
1821 | if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && | |
1822 | KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { | |
09d49e14 JK |
1823 | uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & |
1824 | AMDGPU_MES_API_VERSION_MASK) >> | |
1825 | AMDGPU_MES_API_VERSION_SHIFT; | |
1826 | uint32_t mes_rev = dev->gpu->adev->mes.sched_version & | |
1827 | AMDGPU_MES_VERSION_MASK; | |
1828 | ||
1829 | firmware_supported = (mes_api_rev >= 14) && (mes_rev >= 64); | |
d230f1bf JK |
1830 | goto out; |
1831 | } | |
1832 | ||
1833 | /* | |
1834 | * Note: Any unlisted devices here are assumed to support exception handling. | |
1835 | * Add additional checks here as needed. | |
1836 | */ | |
1837 | switch (KFD_GC_VERSION(dev->gpu)) { | |
1838 | case IP_VERSION(9, 0, 1): | |
1839 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768; | |
1840 | break; | |
1841 | case IP_VERSION(9, 1, 0): | |
1842 | case IP_VERSION(9, 2, 1): | |
1843 | case IP_VERSION(9, 2, 2): | |
1844 | case IP_VERSION(9, 3, 0): | |
1845 | case IP_VERSION(9, 4, 0): | |
1846 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 459; | |
1847 | break; | |
1848 | case IP_VERSION(9, 4, 1): | |
1849 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 60; | |
1850 | break; | |
1851 | case IP_VERSION(9, 4, 2): | |
1852 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 51; | |
1853 | break; | |
1854 | case IP_VERSION(10, 1, 10): | |
1855 | case IP_VERSION(10, 1, 2): | |
1856 | case IP_VERSION(10, 1, 1): | |
1857 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 144; | |
1858 | break; | |
1859 | case IP_VERSION(10, 3, 0): | |
1860 | case IP_VERSION(10, 3, 2): | |
1861 | case IP_VERSION(10, 3, 1): | |
1862 | case IP_VERSION(10, 3, 4): | |
1863 | case IP_VERSION(10, 3, 5): | |
1864 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 89; | |
1865 | break; | |
1866 | case IP_VERSION(10, 1, 3): | |
1867 | case IP_VERSION(10, 3, 3): | |
1868 | firmware_supported = false; | |
1869 | break; | |
1870 | default: | |
1871 | break; | |
1872 | } | |
1873 | ||
1874 | out: | |
1875 | if (firmware_supported) | |
1876 | dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; | |
1877 | } | |
1878 | ||
1879 | static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) | |
1880 | { | |
1881 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << | |
1882 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & | |
1883 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); | |
1884 | ||
1885 | dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT | | |
1886 | HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED | | |
1887 | HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED; | |
1888 | ||
fc7f1d96 JK |
1889 | if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) |
1890 | dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; | |
1891 | ||
d230f1bf | 1892 | if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { |
567db9e0 JK |
1893 | if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3)) |
1894 | dev->node_props.debug_prop |= | |
1895 | HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 | | |
1896 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3; | |
1897 | else | |
1898 | dev->node_props.debug_prop |= | |
1899 | HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 | | |
1900 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; | |
d230f1bf | 1901 | |
8e436326 | 1902 | if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) |
d230f1bf JK |
1903 | dev->node_props.capability |= |
1904 | HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; | |
1905 | } else { | |
1906 | dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | | |
1907 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; | |
1908 | ||
fc7f1d96 | 1909 | if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) |
d230f1bf JK |
1910 | dev->node_props.capability |= |
1911 | HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; | |
1912 | } | |
1913 | ||
1914 | kfd_topology_set_dbg_firmware_support(dev); | |
1915 | } | |
1916 | ||
8dc1db31 | 1917 | int kfd_topology_add_device(struct kfd_node *gpu) |
5b5c4e40 EP |
1918 | { |
1919 | uint32_t gpu_id; | |
1920 | struct kfd_topology_device *dev; | |
f7ce2fad | 1921 | struct kfd_cu_info cu_info; |
4f449311 | 1922 | int res = 0; |
b7675b7b GS |
1923 | int i; |
1924 | const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; | |
4f449311 | 1925 | |
5b5c4e40 | 1926 | gpu_id = kfd_generate_gpu_id(gpu); |
400a39f1 JZ |
1927 | if (gpu->xcp && !gpu->xcp->ddev) { |
1928 | dev_warn(gpu->adev->dev, | |
1929 | "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.", | |
1930 | gpu_id); | |
1931 | return 0; | |
1932 | } else { | |
1933 | pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); | |
1934 | } | |
5b5c4e40 | 1935 | |
3a87177e HK |
1936 | /* Check to see if this gpu device exists in the topology_device_list. |
1937 | * If so, assign the gpu to that device, | |
1938 | * else create a Virtual CRAT for this gpu device and then parse that | |
1939 | * CRAT to create a new topology device. Once created assign the gpu to | |
1940 | * that topology device | |
5b5c4e40 | 1941 | */ |
c0cc999f | 1942 | down_write(&topology_lock); |
5b5c4e40 | 1943 | dev = kfd_assign_gpu(gpu); |
f701acb6 FK |
1944 | if (!dev) |
1945 | res = kfd_topology_add_device_locked(gpu, gpu_id, &dev); | |
c0cc999f | 1946 | up_write(&topology_lock); |
f701acb6 FK |
1947 | if (res) |
1948 | return res; | |
5b5c4e40 EP |
1949 | |
1950 | dev->gpu_id = gpu_id; | |
1951 | gpu->id = gpu_id; | |
3a87177e | 1952 | |
0f28cca8 RE |
1953 | kfd_dev_create_p2p_links(); |
1954 | ||
3a87177e HK |
1955 | /* TODO: Move the following lines to function |
1956 | * kfd_add_non_crat_information | |
1957 | */ | |
1958 | ||
1959 | /* Fill-in additional information that is not available in CRAT but | |
1960 | * needed for the topology | |
1961 | */ | |
1962 | ||
574c4183 | 1963 | amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info); |
c181159a | 1964 | |
b7675b7b GS |
1965 | for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { |
1966 | dev->node_props.name[i] = __tolower(asic_name[i]); | |
1967 | if (asic_name[i] == '\0') | |
1968 | break; | |
1969 | } | |
1970 | dev->node_props.name[i] = '\0'; | |
c181159a | 1971 | |
3a87177e HK |
1972 | dev->node_props.simd_arrays_per_engine = |
1973 | cu_info.num_shader_arrays_per_engine; | |
1974 | ||
8dc1db31 MJ |
1975 | dev->node_props.gfx_target_version = |
1976 | gpu->kfd->device_info.gfx_target_version; | |
d69a3b76 MJ |
1977 | dev->node_props.vendor_id = gpu->adev->pdev->vendor; |
1978 | dev->node_props.device_id = gpu->adev->pdev->device; | |
c6d1ec41 | 1979 | dev->node_props.capability |= |
02274fc0 | 1980 | ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & |
c6d1ec41 | 1981 | HSA_CAP_ASIC_REVISION_MASK); |
92085240 | 1982 | |
d69a3b76 | 1983 | dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); |
92085240 JK |
1984 | if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) |
1985 | dev->node_props.location_id |= dev->gpu->node_id; | |
1986 | ||
d69a3b76 | 1987 | dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); |
3a87177e | 1988 | dev->node_props.max_engine_clk_fcompute = |
574c4183 | 1989 | amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); |
3a87177e HK |
1990 | dev->node_props.max_engine_clk_ccompute = |
1991 | cpufreq_quick_get_max(0) / 1000; | |
a476c0c6 PY |
1992 | |
1993 | if (gpu->xcp) | |
1994 | dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index; | |
1995 | else | |
1996 | dev->node_props.drm_render_minor = | |
1997 | gpu->kfd->shared_resources.drm_render_minor; | |
3a87177e | 1998 | |
8dc1db31 | 1999 | dev->node_props.hive_id = gpu->kfd->hive_id; |
ee2f17f4 | 2000 | dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); |
14568cf6 | 2001 | dev->node_props.num_sdma_xgmi_engines = |
ee2f17f4 | 2002 | kfd_get_num_xgmi_sdma_engines(gpu); |
bb71c74d | 2003 | dev->node_props.num_sdma_queues_per_engine = |
8dc1db31 MJ |
2004 | gpu->kfd->device_info.num_sdma_queues_per_engine - |
2005 | gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; | |
29633d0e | 2006 | dev->node_props.num_gws = (dev->gpu->gws && |
29e76462 | 2007 | dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? |
02274fc0 | 2008 | dev->gpu->adev->gds.gws_size : 0; |
e6945304 | 2009 | dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); |
0c1690e3 | 2010 | |
3a87177e HK |
2011 | kfd_fill_mem_clk_max_info(dev); |
2012 | kfd_fill_iolink_non_crat_info(dev); | |
2013 | ||
7eb0502a | 2014 | switch (dev->gpu->adev->asic_type) { |
3a87177e HK |
2015 | case CHIP_KAVERI: |
2016 | case CHIP_HAWAII: | |
2017 | case CHIP_TONGA: | |
2018 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << | |
2019 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & | |
2020 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); | |
2021 | break; | |
2022 | case CHIP_CARRIZO: | |
2023 | case CHIP_FIJI: | |
2024 | case CHIP_POLARIS10: | |
2025 | case CHIP_POLARIS11: | |
846a44d7 | 2026 | case CHIP_POLARIS12: |
ed81cd6e | 2027 | case CHIP_VEGAM: |
42aa8793 | 2028 | pr_debug("Adding doorbell packet type capability\n"); |
3a87177e HK |
2029 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << |
2030 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & | |
2031 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); | |
2032 | break; | |
2033 | default: | |
d230f1bf | 2034 | if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1)) |
e4804a39 | 2035 | WARN(1, "Unexpected ASIC family %u", |
7eb0502a | 2036 | dev->gpu->adev->asic_type); |
d230f1bf JK |
2037 | else |
2038 | kfd_topology_set_capabilities(dev); | |
7639a8c4 BG |
2039 | } |
2040 | ||
1ae99eab | 2041 | /* |
2243f493 RB |
2042 | * Overwrite ATS capability according to needs_iommu_device to fix |
2043 | * potential missing corresponding bit in CRAT of BIOS. | |
2044 | */ | |
c99a2e7a | 2045 | dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; |
1ae99eab | 2046 | |
3a87177e HK |
2047 | /* Fix errors in CZ CRAT. |
2048 | * simd_count: Carrizo CRAT reports wrong simd_count, probably | |
2049 | * because it doesn't consider masked out CUs | |
70f372bf | 2050 | * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd |
3a87177e | 2051 | */ |
7eb0502a | 2052 | if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { |
3a87177e HK |
2053 | dev->node_props.simd_count = |
2054 | cu_info.simd_per_cu * cu_info.cu_active_number; | |
70f372bf | 2055 | dev->node_props.max_waves_per_simd = 10; |
70f372bf | 2056 | } |
3a87177e | 2057 | |
5436ab94 SY |
2058 | /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ |
2059 | dev->node_props.capability |= | |
56c5977e | 2060 | ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? |
5436ab94 | 2061 | HSA_CAP_SRAM_EDCSUPPORTED : 0; |
56c5977e GS |
2062 | dev->node_props.capability |= |
2063 | ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? | |
5436ab94 SY |
2064 | HSA_CAP_MEM_EDCSUPPORTED : 0; |
2065 | ||
046e674b | 2066 | if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) |
56c5977e | 2067 | dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? |
0dee45a2 | 2068 | HSA_CAP_RASEVENTNOTIFY : 0; |
0dee45a2 | 2069 | |
610dab11 | 2070 | if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) |
4c166eb9 PY |
2071 | dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; |
2072 | ||
03d400e7 AS |
2073 | if (dev->gpu->adev->gmc.is_app_apu || |
2074 | dev->gpu->adev->gmc.xgmi.connected_to_cpu) | |
2075 | dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; | |
2076 | ||
3a87177e HK |
2077 | kfd_debug_print_topology(); |
2078 | ||
7d4f8db4 | 2079 | kfd_notify_gpu_change(gpu_id, 1); |
f701acb6 | 2080 | |
7d4f8db4 | 2081 | return 0; |
5b5c4e40 EP |
2082 | } |
2083 | ||
46d18d51 MJ |
2084 | /** |
2085 | * kfd_topology_update_io_links() - Update IO links after device removal. | |
2086 | * @proximity_domain: Proximity domain value of the dev being removed. | |
2087 | * | |
2088 | * The topology list currently is arranged in increasing order of | |
2089 | * proximity domain. | |
2090 | * | |
2091 | * Two things need to be done when a device is removed: | |
2092 | * 1. All the IO links to this device need to be removed. | |
2093 | * 2. All nodes after the current device node need to move | |
2094 | * up once this device node is removed from the topology | |
2095 | * list. As a result, the proximity domain values for | |
2096 | * all nodes after the node being deleted reduce by 1. | |
2097 | * This would also cause the proximity domain values for | |
2098 | * io links to be updated based on new proximity domain | |
2099 | * values. | |
2100 | * | |
2101 | * Context: The caller must hold write topology_lock. | |
2102 | */ | |
2103 | static void kfd_topology_update_io_links(int proximity_domain) | |
2104 | { | |
2105 | struct kfd_topology_device *dev; | |
0f28cca8 | 2106 | struct kfd_iolink_properties *iolink, *p2plink, *tmp; |
46d18d51 MJ |
2107 | |
2108 | list_for_each_entry(dev, &topology_device_list, list) { | |
2109 | if (dev->proximity_domain > proximity_domain) | |
2110 | dev->proximity_domain--; | |
2111 | ||
2112 | list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { | |
2113 | /* | |
2114 | * If there is an io link to the dev being deleted | |
2115 | * then remove that IO link also. | |
2116 | */ | |
2117 | if (iolink->node_to == proximity_domain) { | |
2118 | list_del(&iolink->list); | |
46d18d51 | 2119 | dev->node_props.io_links_count--; |
98447635 MJ |
2120 | } else { |
2121 | if (iolink->node_from > proximity_domain) | |
2122 | iolink->node_from--; | |
2123 | if (iolink->node_to > proximity_domain) | |
2124 | iolink->node_to--; | |
46d18d51 MJ |
2125 | } |
2126 | } | |
0f28cca8 RE |
2127 | |
2128 | list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { | |
2129 | /* | |
2130 | * If there is a p2p link to the dev being deleted | |
2131 | * then remove that p2p link also. | |
2132 | */ | |
2133 | if (p2plink->node_to == proximity_domain) { | |
2134 | list_del(&p2plink->list); | |
2135 | dev->node_props.p2p_links_count--; | |
2136 | } else { | |
2137 | if (p2plink->node_from > proximity_domain) | |
2138 | p2plink->node_from--; | |
2139 | if (p2plink->node_to > proximity_domain) | |
2140 | p2plink->node_to--; | |
2141 | } | |
2142 | } | |
46d18d51 MJ |
2143 | } |
2144 | } | |
2145 | ||
8dc1db31 | 2146 | int kfd_topology_remove_device(struct kfd_node *gpu) |
5b5c4e40 | 2147 | { |
4f449311 | 2148 | struct kfd_topology_device *dev, *tmp; |
5b5c4e40 EP |
2149 | uint32_t gpu_id; |
2150 | int res = -ENODEV; | |
46d18d51 | 2151 | int i = 0; |
5b5c4e40 | 2152 | |
5b5c4e40 EP |
2153 | down_write(&topology_lock); |
2154 | ||
46d18d51 | 2155 | list_for_each_entry_safe(dev, tmp, &topology_device_list, list) { |
5b5c4e40 EP |
2156 | if (dev->gpu == gpu) { |
2157 | gpu_id = dev->gpu_id; | |
2158 | kfd_remove_sysfs_node_entry(dev); | |
2159 | kfd_release_topology_device(dev); | |
4f449311 | 2160 | sys_props.num_devices--; |
46d18d51 MJ |
2161 | kfd_topology_update_io_links(i); |
2162 | topology_crat_proximity_domain = sys_props.num_devices-1; | |
2163 | sys_props.generation_count++; | |
5b5c4e40 EP |
2164 | res = 0; |
2165 | if (kfd_topology_update_sysfs() < 0) | |
2166 | kfd_topology_release_sysfs(); | |
2167 | break; | |
2168 | } | |
46d18d51 MJ |
2169 | i++; |
2170 | } | |
5b5c4e40 EP |
2171 | |
2172 | up_write(&topology_lock); | |
2173 | ||
174de876 | 2174 | if (!res) |
5b5c4e40 EP |
2175 | kfd_notify_gpu_change(gpu_id, 0); |
2176 | ||
2177 | return res; | |
2178 | } | |
2179 | ||
6d82eb0e HK |
2180 | /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD |
2181 | * topology. If GPU device is found @idx, then valid kfd_dev pointer is | |
2182 | * returned through @kdev | |
2183 | * Return - 0: On success (@kdev will be NULL for non GPU nodes) | |
2184 | * -1: If end of list | |
5b5c4e40 | 2185 | */ |
8dc1db31 | 2186 | int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev) |
5b5c4e40 EP |
2187 | { |
2188 | ||
2189 | struct kfd_topology_device *top_dev; | |
5b5c4e40 EP |
2190 | uint8_t device_idx = 0; |
2191 | ||
6d82eb0e | 2192 | *kdev = NULL; |
5b5c4e40 EP |
2193 | down_read(&topology_lock); |
2194 | ||
2195 | list_for_each_entry(top_dev, &topology_device_list, list) { | |
2196 | if (device_idx == idx) { | |
6d82eb0e HK |
2197 | *kdev = top_dev->gpu; |
2198 | up_read(&topology_lock); | |
2199 | return 0; | |
5b5c4e40 EP |
2200 | } |
2201 | ||
2202 | device_idx++; | |
2203 | } | |
2204 | ||
2205 | up_read(&topology_lock); | |
2206 | ||
6d82eb0e | 2207 | return -1; |
5b5c4e40 EP |
2208 | |
2209 | } | |
851a645e | 2210 | |
520b8fb7 FK |
2211 | static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) |
2212 | { | |
520b8fb7 FK |
2213 | int first_cpu_of_numa_node; |
2214 | ||
2215 | if (!cpumask || cpumask == cpu_none_mask) | |
2216 | return -1; | |
2217 | first_cpu_of_numa_node = cpumask_first(cpumask); | |
2218 | if (first_cpu_of_numa_node >= nr_cpu_ids) | |
2219 | return -1; | |
df1dd4f4 FK |
2220 | #ifdef CONFIG_X86_64 |
2221 | return cpu_data(first_cpu_of_numa_node).apicid; | |
2222 | #else | |
2223 | return first_cpu_of_numa_node; | |
2224 | #endif | |
520b8fb7 FK |
2225 | } |
2226 | ||
2227 | /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor | |
2228 | * of the given NUMA node (numa_node_id) | |
2229 | * Return -1 on failure | |
2230 | */ | |
2231 | int kfd_numa_node_to_apic_id(int numa_node_id) | |
2232 | { | |
2233 | if (numa_node_id == -1) { | |
2234 | pr_warn("Invalid NUMA Node. Use online CPU mask\n"); | |
2235 | return kfd_cpumask_to_apic_id(cpu_online_mask); | |
2236 | } | |
2237 | return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id)); | |
2238 | } | |
2239 | ||
851a645e FK |
2240 | #if defined(CONFIG_DEBUG_FS) |
2241 | ||
2242 | int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) | |
2243 | { | |
2244 | struct kfd_topology_device *dev; | |
2245 | unsigned int i = 0; | |
2246 | int r = 0; | |
2247 | ||
2248 | down_read(&topology_lock); | |
2249 | ||
2250 | list_for_each_entry(dev, &topology_device_list, list) { | |
2251 | if (!dev->gpu) { | |
2252 | i++; | |
2253 | continue; | |
2254 | } | |
2255 | ||
2256 | seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); | |
2257 | r = dqm_debugfs_hqds(m, dev->gpu->dqm); | |
2258 | if (r) | |
2259 | break; | |
2260 | } | |
2261 | ||
2262 | up_read(&topology_lock); | |
2263 | ||
2264 | return r; | |
2265 | } | |
2266 | ||
2267 | int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) | |
2268 | { | |
2269 | struct kfd_topology_device *dev; | |
2270 | unsigned int i = 0; | |
2271 | int r = 0; | |
2272 | ||
2273 | down_read(&topology_lock); | |
2274 | ||
2275 | list_for_each_entry(dev, &topology_device_list, list) { | |
2276 | if (!dev->gpu) { | |
2277 | i++; | |
2278 | continue; | |
2279 | } | |
2280 | ||
2281 | seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); | |
9af5379c | 2282 | r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr); |
851a645e FK |
2283 | if (r) |
2284 | break; | |
2285 | } | |
2286 | ||
2287 | up_read(&topology_lock); | |
2288 | ||
2289 | return r; | |
2290 | } | |
2291 | ||
2292 | #endif |