Commit | Line | Data |
---|---|---|
5b5c4e40 EP |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | #include <linux/types.h> | |
24 | #include <linux/kernel.h> | |
25 | #include <linux/pci.h> | |
26 | #include <linux/errno.h> | |
27 | #include <linux/acpi.h> | |
28 | #include <linux/hash.h> | |
29 | #include <linux/cpufreq.h> | |
f7c826ad | 30 | #include <linux/log2.h> |
520b8fb7 FK |
31 | #include <linux/dmi.h> |
32 | #include <linux/atomic.h> | |
5b5c4e40 EP |
33 | |
34 | #include "kfd_priv.h" | |
35 | #include "kfd_crat.h" | |
36 | #include "kfd_topology.h" | |
851a645e | 37 | #include "kfd_device_queue_manager.h" |
64d1c3a4 | 38 | #include "kfd_iommu.h" |
5b87245f | 39 | #include "amdgpu_amdkfd.h" |
0dee45a2 | 40 | #include "amdgpu_ras.h" |
5b5c4e40 | 41 | |
4f449311 HK |
42 | /* topology_device_list - Master list of all topology devices */ |
43 | static struct list_head topology_device_list; | |
520b8fb7 | 44 | static struct kfd_system_properties sys_props; |
5b5c4e40 EP |
45 | |
46 | static DECLARE_RWSEM(topology_lock); | |
520b8fb7 | 47 | static atomic_t topology_crat_proximity_domain; |
5b5c4e40 | 48 | |
3a87177e HK |
49 | struct kfd_topology_device *kfd_topology_device_by_proximity_domain( |
50 | uint32_t proximity_domain) | |
51 | { | |
52 | struct kfd_topology_device *top_dev; | |
53 | struct kfd_topology_device *device = NULL; | |
54 | ||
55 | down_read(&topology_lock); | |
56 | ||
57 | list_for_each_entry(top_dev, &topology_device_list, list) | |
58 | if (top_dev->proximity_domain == proximity_domain) { | |
59 | device = top_dev; | |
60 | break; | |
61 | } | |
62 | ||
63 | up_read(&topology_lock); | |
64 | ||
65 | return device; | |
66 | } | |
67 | ||
44d8cc6f | 68 | struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id) |
5b5c4e40 | 69 | { |
44d8cc6f YZ |
70 | struct kfd_topology_device *top_dev = NULL; |
71 | struct kfd_topology_device *ret = NULL; | |
5b5c4e40 EP |
72 | |
73 | down_read(&topology_lock); | |
74 | ||
75 | list_for_each_entry(top_dev, &topology_device_list, list) | |
76 | if (top_dev->gpu_id == gpu_id) { | |
44d8cc6f | 77 | ret = top_dev; |
5b5c4e40 EP |
78 | break; |
79 | } | |
80 | ||
81 | up_read(&topology_lock); | |
82 | ||
44d8cc6f YZ |
83 | return ret; |
84 | } | |
85 | ||
86 | struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) | |
87 | { | |
88 | struct kfd_topology_device *top_dev; | |
89 | ||
90 | top_dev = kfd_topology_device_by_id(gpu_id); | |
91 | if (!top_dev) | |
92 | return NULL; | |
93 | ||
94 | return top_dev->gpu; | |
5b5c4e40 EP |
95 | } |
96 | ||
97 | struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) | |
98 | { | |
99 | struct kfd_topology_device *top_dev; | |
100 | struct kfd_dev *device = NULL; | |
101 | ||
102 | down_read(&topology_lock); | |
103 | ||
104 | list_for_each_entry(top_dev, &topology_device_list, list) | |
3704d56e | 105 | if (top_dev->gpu && top_dev->gpu->pdev == pdev) { |
5b5c4e40 EP |
106 | device = top_dev->gpu; |
107 | break; | |
108 | } | |
109 | ||
110 | up_read(&topology_lock); | |
111 | ||
112 | return device; | |
113 | } | |
114 | ||
1dde0ea9 FK |
115 | struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) |
116 | { | |
117 | struct kfd_topology_device *top_dev; | |
118 | struct kfd_dev *device = NULL; | |
119 | ||
120 | down_read(&topology_lock); | |
121 | ||
122 | list_for_each_entry(top_dev, &topology_device_list, list) | |
123 | if (top_dev->gpu && top_dev->gpu->kgd == kgd) { | |
124 | device = top_dev->gpu; | |
125 | break; | |
126 | } | |
127 | ||
128 | up_read(&topology_lock); | |
129 | ||
130 | return device; | |
131 | } | |
132 | ||
3a87177e | 133 | /* Called with write topology_lock acquired */ |
5b5c4e40 EP |
134 | static void kfd_release_topology_device(struct kfd_topology_device *dev) |
135 | { | |
136 | struct kfd_mem_properties *mem; | |
137 | struct kfd_cache_properties *cache; | |
138 | struct kfd_iolink_properties *iolink; | |
f4757347 | 139 | struct kfd_perf_properties *perf; |
5b5c4e40 | 140 | |
5b5c4e40 EP |
141 | list_del(&dev->list); |
142 | ||
143 | while (dev->mem_props.next != &dev->mem_props) { | |
144 | mem = container_of(dev->mem_props.next, | |
145 | struct kfd_mem_properties, list); | |
146 | list_del(&mem->list); | |
147 | kfree(mem); | |
148 | } | |
149 | ||
150 | while (dev->cache_props.next != &dev->cache_props) { | |
151 | cache = container_of(dev->cache_props.next, | |
152 | struct kfd_cache_properties, list); | |
153 | list_del(&cache->list); | |
154 | kfree(cache); | |
155 | } | |
156 | ||
157 | while (dev->io_link_props.next != &dev->io_link_props) { | |
158 | iolink = container_of(dev->io_link_props.next, | |
159 | struct kfd_iolink_properties, list); | |
160 | list_del(&iolink->list); | |
161 | kfree(iolink); | |
162 | } | |
163 | ||
f4757347 AL |
164 | while (dev->perf_props.next != &dev->perf_props) { |
165 | perf = container_of(dev->perf_props.next, | |
166 | struct kfd_perf_properties, list); | |
167 | list_del(&perf->list); | |
168 | kfree(perf); | |
169 | } | |
170 | ||
5b5c4e40 | 171 | kfree(dev); |
5b5c4e40 EP |
172 | } |
173 | ||
4f449311 | 174 | void kfd_release_topology_device_list(struct list_head *device_list) |
5b5c4e40 EP |
175 | { |
176 | struct kfd_topology_device *dev; | |
177 | ||
4f449311 HK |
178 | while (!list_empty(device_list)) { |
179 | dev = list_first_entry(device_list, | |
180 | struct kfd_topology_device, list); | |
5b5c4e40 | 181 | kfd_release_topology_device(dev); |
4f449311 | 182 | } |
5b5c4e40 EP |
183 | } |
184 | ||
4f449311 HK |
185 | static void kfd_release_live_view(void) |
186 | { | |
187 | kfd_release_topology_device_list(&topology_device_list); | |
5b5c4e40 EP |
188 | memset(&sys_props, 0, sizeof(sys_props)); |
189 | } | |
190 | ||
4f449311 HK |
191 | struct kfd_topology_device *kfd_create_topology_device( |
192 | struct list_head *device_list) | |
5b5c4e40 EP |
193 | { |
194 | struct kfd_topology_device *dev; | |
195 | ||
196 | dev = kfd_alloc_struct(dev); | |
4eacc26b | 197 | if (!dev) { |
5b5c4e40 | 198 | pr_err("No memory to allocate a topology device"); |
16b9201c | 199 | return NULL; |
5b5c4e40 EP |
200 | } |
201 | ||
202 | INIT_LIST_HEAD(&dev->mem_props); | |
203 | INIT_LIST_HEAD(&dev->cache_props); | |
204 | INIT_LIST_HEAD(&dev->io_link_props); | |
f4757347 | 205 | INIT_LIST_HEAD(&dev->perf_props); |
5b5c4e40 | 206 | |
4f449311 | 207 | list_add_tail(&dev->list, device_list); |
5b5c4e40 EP |
208 | |
209 | return dev; | |
16b9201c | 210 | } |
5b5c4e40 | 211 | |
5b5c4e40 | 212 | |
83a13ef5 FK |
213 | #define sysfs_show_gen_prop(buffer, offs, fmt, ...) \ |
214 | (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \ | |
215 | fmt, __VA_ARGS__)) | |
216 | #define sysfs_show_32bit_prop(buffer, offs, name, value) \ | |
217 | sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value) | |
218 | #define sysfs_show_64bit_prop(buffer, offs, name, value) \ | |
219 | sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value) | |
220 | #define sysfs_show_32bit_val(buffer, offs, value) \ | |
221 | sysfs_show_gen_prop(buffer, offs, "%u\n", value) | |
222 | #define sysfs_show_str_val(buffer, offs, value) \ | |
223 | sysfs_show_gen_prop(buffer, offs, "%s\n", value) | |
5b5c4e40 EP |
224 | |
225 | static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, | |
226 | char *buffer) | |
227 | { | |
83a13ef5 | 228 | int offs = 0; |
5b5c4e40 EP |
229 | |
230 | /* Making sure that the buffer is an empty string */ | |
231 | buffer[0] = 0; | |
232 | ||
233 | if (attr == &sys_props.attr_genid) { | |
83a13ef5 FK |
234 | sysfs_show_32bit_val(buffer, offs, |
235 | sys_props.generation_count); | |
5b5c4e40 | 236 | } else if (attr == &sys_props.attr_props) { |
83a13ef5 FK |
237 | sysfs_show_64bit_prop(buffer, offs, "platform_oem", |
238 | sys_props.platform_oem); | |
239 | sysfs_show_64bit_prop(buffer, offs, "platform_id", | |
240 | sys_props.platform_id); | |
241 | sysfs_show_64bit_prop(buffer, offs, "platform_rev", | |
242 | sys_props.platform_rev); | |
5b5c4e40 | 243 | } else { |
83a13ef5 | 244 | offs = -EINVAL; |
5b5c4e40 EP |
245 | } |
246 | ||
83a13ef5 | 247 | return offs; |
5b5c4e40 EP |
248 | } |
249 | ||
5108d768 YZ |
250 | static void kfd_topology_kobj_release(struct kobject *kobj) |
251 | { | |
252 | kfree(kobj); | |
253 | } | |
254 | ||
5b5c4e40 EP |
255 | static const struct sysfs_ops sysprops_ops = { |
256 | .show = sysprops_show, | |
257 | }; | |
258 | ||
259 | static struct kobj_type sysprops_type = { | |
5108d768 | 260 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
261 | .sysfs_ops = &sysprops_ops, |
262 | }; | |
263 | ||
264 | static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, | |
265 | char *buffer) | |
266 | { | |
83a13ef5 | 267 | int offs = 0; |
5b5c4e40 EP |
268 | struct kfd_iolink_properties *iolink; |
269 | ||
270 | /* Making sure that the buffer is an empty string */ | |
271 | buffer[0] = 0; | |
272 | ||
273 | iolink = container_of(attr, struct kfd_iolink_properties, attr); | |
6b855f7b HK |
274 | if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) |
275 | return -EPERM; | |
83a13ef5 FK |
276 | sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type); |
277 | sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj); | |
278 | sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min); | |
279 | sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from); | |
280 | sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to); | |
281 | sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight); | |
282 | sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency); | |
283 | sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency); | |
284 | sysfs_show_32bit_prop(buffer, offs, "min_bandwidth", | |
285 | iolink->min_bandwidth); | |
286 | sysfs_show_32bit_prop(buffer, offs, "max_bandwidth", | |
287 | iolink->max_bandwidth); | |
288 | sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size", | |
289 | iolink->rec_transfer_size); | |
290 | sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); | |
291 | ||
292 | return offs; | |
5b5c4e40 EP |
293 | } |
294 | ||
295 | static const struct sysfs_ops iolink_ops = { | |
296 | .show = iolink_show, | |
297 | }; | |
298 | ||
299 | static struct kobj_type iolink_type = { | |
5108d768 | 300 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
301 | .sysfs_ops = &iolink_ops, |
302 | }; | |
303 | ||
304 | static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, | |
305 | char *buffer) | |
306 | { | |
83a13ef5 | 307 | int offs = 0; |
5b5c4e40 EP |
308 | struct kfd_mem_properties *mem; |
309 | ||
310 | /* Making sure that the buffer is an empty string */ | |
311 | buffer[0] = 0; | |
312 | ||
313 | mem = container_of(attr, struct kfd_mem_properties, attr); | |
6b855f7b HK |
314 | if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) |
315 | return -EPERM; | |
83a13ef5 FK |
316 | sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type); |
317 | sysfs_show_64bit_prop(buffer, offs, "size_in_bytes", | |
318 | mem->size_in_bytes); | |
319 | sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags); | |
320 | sysfs_show_32bit_prop(buffer, offs, "width", mem->width); | |
321 | sysfs_show_32bit_prop(buffer, offs, "mem_clk_max", | |
322 | mem->mem_clk_max); | |
323 | ||
324 | return offs; | |
5b5c4e40 EP |
325 | } |
326 | ||
327 | static const struct sysfs_ops mem_ops = { | |
328 | .show = mem_show, | |
329 | }; | |
330 | ||
331 | static struct kobj_type mem_type = { | |
5108d768 | 332 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
333 | .sysfs_ops = &mem_ops, |
334 | }; | |
335 | ||
336 | static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, | |
337 | char *buffer) | |
338 | { | |
83a13ef5 | 339 | int offs = 0; |
bc0c75a3 | 340 | uint32_t i, j; |
5b5c4e40 EP |
341 | struct kfd_cache_properties *cache; |
342 | ||
343 | /* Making sure that the buffer is an empty string */ | |
344 | buffer[0] = 0; | |
345 | ||
346 | cache = container_of(attr, struct kfd_cache_properties, attr); | |
6b855f7b HK |
347 | if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) |
348 | return -EPERM; | |
83a13ef5 | 349 | sysfs_show_32bit_prop(buffer, offs, "processor_id_low", |
5b5c4e40 | 350 | cache->processor_id_low); |
83a13ef5 FK |
351 | sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level); |
352 | sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size); | |
353 | sysfs_show_32bit_prop(buffer, offs, "cache_line_size", | |
354 | cache->cacheline_size); | |
355 | sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag", | |
356 | cache->cachelines_per_tag); | |
357 | sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); | |
358 | sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); | |
359 | sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); | |
360 | offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); | |
bc0c75a3 | 361 | for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) |
83a13ef5 | 362 | for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) |
bc0c75a3 | 363 | /* Check each bit */ |
83a13ef5 FK |
364 | offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", |
365 | (cache->sibling_map[i] >> j) & 1); | |
366 | ||
bc0c75a3 | 367 | /* Replace the last "," with end of line */ |
83a13ef5 FK |
368 | buffer[offs-1] = '\n'; |
369 | return offs; | |
5b5c4e40 EP |
370 | } |
371 | ||
372 | static const struct sysfs_ops cache_ops = { | |
373 | .show = kfd_cache_show, | |
374 | }; | |
375 | ||
376 | static struct kobj_type cache_type = { | |
5108d768 | 377 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
378 | .sysfs_ops = &cache_ops, |
379 | }; | |
380 | ||
f4757347 AL |
381 | /****** Sysfs of Performance Counters ******/ |
382 | ||
383 | struct kfd_perf_attr { | |
384 | struct kobj_attribute attr; | |
385 | uint32_t data; | |
386 | }; | |
387 | ||
388 | static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs, | |
389 | char *buf) | |
390 | { | |
83a13ef5 | 391 | int offs = 0; |
f4757347 AL |
392 | struct kfd_perf_attr *attr; |
393 | ||
394 | buf[0] = 0; | |
395 | attr = container_of(attrs, struct kfd_perf_attr, attr); | |
396 | if (!attr->data) /* invalid data for PMC */ | |
397 | return 0; | |
398 | else | |
83a13ef5 | 399 | return sysfs_show_32bit_val(buf, offs, attr->data); |
f4757347 AL |
400 | } |
401 | ||
402 | #define KFD_PERF_DESC(_name, _data) \ | |
403 | { \ | |
404 | .attr = __ATTR(_name, 0444, perf_show, NULL), \ | |
405 | .data = _data, \ | |
406 | } | |
407 | ||
408 | static struct kfd_perf_attr perf_attr_iommu[] = { | |
409 | KFD_PERF_DESC(max_concurrent, 0), | |
410 | KFD_PERF_DESC(num_counters, 0), | |
411 | KFD_PERF_DESC(counter_ids, 0), | |
412 | }; | |
413 | /****************************************/ | |
414 | ||
5b5c4e40 EP |
415 | static ssize_t node_show(struct kobject *kobj, struct attribute *attr, |
416 | char *buffer) | |
417 | { | |
83a13ef5 | 418 | int offs = 0; |
5b5c4e40 | 419 | struct kfd_topology_device *dev; |
f7c826ad | 420 | uint32_t log_max_watch_addr; |
5b5c4e40 EP |
421 | |
422 | /* Making sure that the buffer is an empty string */ | |
423 | buffer[0] = 0; | |
424 | ||
425 | if (strcmp(attr->name, "gpu_id") == 0) { | |
426 | dev = container_of(attr, struct kfd_topology_device, | |
427 | attr_gpuid); | |
6b855f7b HK |
428 | if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) |
429 | return -EPERM; | |
83a13ef5 | 430 | return sysfs_show_32bit_val(buffer, offs, dev->gpu_id); |
f7c826ad AS |
431 | } |
432 | ||
433 | if (strcmp(attr->name, "name") == 0) { | |
5b5c4e40 EP |
434 | dev = container_of(attr, struct kfd_topology_device, |
435 | attr_name); | |
c181159a | 436 | |
6b855f7b HK |
437 | if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) |
438 | return -EPERM; | |
83a13ef5 | 439 | return sysfs_show_str_val(buffer, offs, dev->node_props.name); |
f7c826ad | 440 | } |
5b5c4e40 | 441 | |
f7c826ad AS |
442 | dev = container_of(attr, struct kfd_topology_device, |
443 | attr_props); | |
6b855f7b HK |
444 | if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) |
445 | return -EPERM; | |
83a13ef5 FK |
446 | sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count", |
447 | dev->node_props.cpu_cores_count); | |
448 | sysfs_show_32bit_prop(buffer, offs, "simd_count", | |
6127896f | 449 | dev->gpu ? dev->node_props.simd_count : 0); |
83a13ef5 FK |
450 | sysfs_show_32bit_prop(buffer, offs, "mem_banks_count", |
451 | dev->node_props.mem_banks_count); | |
452 | sysfs_show_32bit_prop(buffer, offs, "caches_count", | |
453 | dev->node_props.caches_count); | |
454 | sysfs_show_32bit_prop(buffer, offs, "io_links_count", | |
455 | dev->node_props.io_links_count); | |
456 | sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base", | |
457 | dev->node_props.cpu_core_id_base); | |
458 | sysfs_show_32bit_prop(buffer, offs, "simd_id_base", | |
459 | dev->node_props.simd_id_base); | |
460 | sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd", | |
461 | dev->node_props.max_waves_per_simd); | |
462 | sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb", | |
463 | dev->node_props.lds_size_in_kb); | |
464 | sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb", | |
465 | dev->node_props.gds_size_in_kb); | |
466 | sysfs_show_32bit_prop(buffer, offs, "num_gws", | |
467 | dev->node_props.num_gws); | |
468 | sysfs_show_32bit_prop(buffer, offs, "wave_front_size", | |
469 | dev->node_props.wave_front_size); | |
470 | sysfs_show_32bit_prop(buffer, offs, "array_count", | |
471 | dev->node_props.array_count); | |
472 | sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine", | |
473 | dev->node_props.simd_arrays_per_engine); | |
474 | sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array", | |
475 | dev->node_props.cu_per_simd_array); | |
476 | sysfs_show_32bit_prop(buffer, offs, "simd_per_cu", | |
477 | dev->node_props.simd_per_cu); | |
478 | sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu", | |
479 | dev->node_props.max_slots_scratch_cu); | |
480 | sysfs_show_32bit_prop(buffer, offs, "vendor_id", | |
481 | dev->node_props.vendor_id); | |
482 | sysfs_show_32bit_prop(buffer, offs, "device_id", | |
483 | dev->node_props.device_id); | |
484 | sysfs_show_32bit_prop(buffer, offs, "location_id", | |
485 | dev->node_props.location_id); | |
486 | sysfs_show_32bit_prop(buffer, offs, "domain", | |
487 | dev->node_props.domain); | |
488 | sysfs_show_32bit_prop(buffer, offs, "drm_render_minor", | |
489 | dev->node_props.drm_render_minor); | |
490 | sysfs_show_64bit_prop(buffer, offs, "hive_id", | |
491 | dev->node_props.hive_id); | |
492 | sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines", | |
493 | dev->node_props.num_sdma_engines); | |
494 | sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines", | |
495 | dev->node_props.num_sdma_xgmi_engines); | |
496 | sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine", | |
497 | dev->node_props.num_sdma_queues_per_engine); | |
498 | sysfs_show_32bit_prop(buffer, offs, "num_cp_queues", | |
499 | dev->node_props.num_cp_queues); | |
f7c826ad AS |
500 | |
501 | if (dev->gpu) { | |
502 | log_max_watch_addr = | |
503 | __ilog2_u32(dev->gpu->device_info->num_of_watch_points); | |
504 | ||
505 | if (log_max_watch_addr) { | |
506 | dev->node_props.capability |= | |
507 | HSA_CAP_WATCH_POINTS_SUPPORTED; | |
508 | ||
509 | dev->node_props.capability |= | |
510 | ((log_max_watch_addr << | |
511 | HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) & | |
512 | HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); | |
5b5c4e40 EP |
513 | } |
514 | ||
413e85d5 BG |
515 | if (dev->gpu->device_info->asic_family == CHIP_TONGA) |
516 | dev->node_props.capability |= | |
517 | HSA_CAP_AQL_QUEUE_DOUBLE_MAP; | |
518 | ||
83a13ef5 | 519 | sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute", |
3a87177e | 520 | dev->node_props.max_engine_clk_fcompute); |
42e08c78 | 521 | |
83a13ef5 | 522 | sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL); |
f7c826ad | 523 | |
83a13ef5 FK |
524 | sysfs_show_32bit_prop(buffer, offs, "fw_version", |
525 | dev->gpu->mec_fw_version); | |
526 | sysfs_show_32bit_prop(buffer, offs, "capability", | |
527 | dev->node_props.capability); | |
528 | sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", | |
529 | dev->gpu->sdma_fw_version); | |
11964258 KR |
530 | sysfs_show_64bit_prop(buffer, offs, "unique_id", |
531 | amdgpu_amdkfd_get_unique_id(dev->gpu->kgd)); | |
532 | ||
5b5c4e40 EP |
533 | } |
534 | ||
83a13ef5 FK |
535 | return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute", |
536 | cpufreq_quick_get_max(0)/1000); | |
5b5c4e40 EP |
537 | } |
538 | ||
539 | static const struct sysfs_ops node_ops = { | |
540 | .show = node_show, | |
541 | }; | |
542 | ||
543 | static struct kobj_type node_type = { | |
5108d768 | 544 | .release = kfd_topology_kobj_release, |
5b5c4e40 EP |
545 | .sysfs_ops = &node_ops, |
546 | }; | |
547 | ||
548 | static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) | |
549 | { | |
550 | sysfs_remove_file(kobj, attr); | |
551 | kobject_del(kobj); | |
552 | kobject_put(kobj); | |
553 | } | |
554 | ||
555 | static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) | |
556 | { | |
557 | struct kfd_iolink_properties *iolink; | |
558 | struct kfd_cache_properties *cache; | |
559 | struct kfd_mem_properties *mem; | |
f4757347 | 560 | struct kfd_perf_properties *perf; |
5b5c4e40 | 561 | |
5b5c4e40 EP |
562 | if (dev->kobj_iolink) { |
563 | list_for_each_entry(iolink, &dev->io_link_props, list) | |
564 | if (iolink->kobj) { | |
565 | kfd_remove_sysfs_file(iolink->kobj, | |
566 | &iolink->attr); | |
16b9201c | 567 | iolink->kobj = NULL; |
5b5c4e40 EP |
568 | } |
569 | kobject_del(dev->kobj_iolink); | |
570 | kobject_put(dev->kobj_iolink); | |
16b9201c | 571 | dev->kobj_iolink = NULL; |
5b5c4e40 EP |
572 | } |
573 | ||
574 | if (dev->kobj_cache) { | |
575 | list_for_each_entry(cache, &dev->cache_props, list) | |
576 | if (cache->kobj) { | |
577 | kfd_remove_sysfs_file(cache->kobj, | |
578 | &cache->attr); | |
16b9201c | 579 | cache->kobj = NULL; |
5b5c4e40 EP |
580 | } |
581 | kobject_del(dev->kobj_cache); | |
582 | kobject_put(dev->kobj_cache); | |
16b9201c | 583 | dev->kobj_cache = NULL; |
5b5c4e40 EP |
584 | } |
585 | ||
586 | if (dev->kobj_mem) { | |
587 | list_for_each_entry(mem, &dev->mem_props, list) | |
588 | if (mem->kobj) { | |
589 | kfd_remove_sysfs_file(mem->kobj, &mem->attr); | |
16b9201c | 590 | mem->kobj = NULL; |
5b5c4e40 EP |
591 | } |
592 | kobject_del(dev->kobj_mem); | |
593 | kobject_put(dev->kobj_mem); | |
16b9201c | 594 | dev->kobj_mem = NULL; |
5b5c4e40 EP |
595 | } |
596 | ||
f4757347 AL |
597 | if (dev->kobj_perf) { |
598 | list_for_each_entry(perf, &dev->perf_props, list) { | |
599 | kfree(perf->attr_group); | |
600 | perf->attr_group = NULL; | |
601 | } | |
602 | kobject_del(dev->kobj_perf); | |
603 | kobject_put(dev->kobj_perf); | |
604 | dev->kobj_perf = NULL; | |
605 | } | |
606 | ||
5b5c4e40 EP |
607 | if (dev->kobj_node) { |
608 | sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); | |
609 | sysfs_remove_file(dev->kobj_node, &dev->attr_name); | |
610 | sysfs_remove_file(dev->kobj_node, &dev->attr_props); | |
611 | kobject_del(dev->kobj_node); | |
612 | kobject_put(dev->kobj_node); | |
16b9201c | 613 | dev->kobj_node = NULL; |
5b5c4e40 EP |
614 | } |
615 | } | |
616 | ||
617 | static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, | |
618 | uint32_t id) | |
619 | { | |
620 | struct kfd_iolink_properties *iolink; | |
621 | struct kfd_cache_properties *cache; | |
622 | struct kfd_mem_properties *mem; | |
f4757347 | 623 | struct kfd_perf_properties *perf; |
5b5c4e40 | 624 | int ret; |
f4757347 AL |
625 | uint32_t i, num_attrs; |
626 | struct attribute **attrs; | |
5b5c4e40 | 627 | |
32fa8219 FK |
628 | if (WARN_ON(dev->kobj_node)) |
629 | return -EEXIST; | |
630 | ||
5b5c4e40 EP |
631 | /* |
632 | * Creating the sysfs folders | |
633 | */ | |
5b5c4e40 EP |
634 | dev->kobj_node = kfd_alloc_struct(dev->kobj_node); |
635 | if (!dev->kobj_node) | |
636 | return -ENOMEM; | |
637 | ||
638 | ret = kobject_init_and_add(dev->kobj_node, &node_type, | |
639 | sys_props.kobj_nodes, "%d", id); | |
20eca012 QW |
640 | if (ret < 0) { |
641 | kobject_put(dev->kobj_node); | |
5b5c4e40 | 642 | return ret; |
20eca012 | 643 | } |
5b5c4e40 EP |
644 | |
645 | dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); | |
646 | if (!dev->kobj_mem) | |
647 | return -ENOMEM; | |
648 | ||
649 | dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); | |
650 | if (!dev->kobj_cache) | |
651 | return -ENOMEM; | |
652 | ||
653 | dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); | |
654 | if (!dev->kobj_iolink) | |
655 | return -ENOMEM; | |
656 | ||
f4757347 AL |
657 | dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); |
658 | if (!dev->kobj_perf) | |
659 | return -ENOMEM; | |
660 | ||
5b5c4e40 EP |
661 | /* |
662 | * Creating sysfs files for node properties | |
663 | */ | |
664 | dev->attr_gpuid.name = "gpu_id"; | |
665 | dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; | |
666 | sysfs_attr_init(&dev->attr_gpuid); | |
667 | dev->attr_name.name = "name"; | |
668 | dev->attr_name.mode = KFD_SYSFS_FILE_MODE; | |
669 | sysfs_attr_init(&dev->attr_name); | |
670 | dev->attr_props.name = "properties"; | |
671 | dev->attr_props.mode = KFD_SYSFS_FILE_MODE; | |
672 | sysfs_attr_init(&dev->attr_props); | |
673 | ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); | |
674 | if (ret < 0) | |
675 | return ret; | |
676 | ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); | |
677 | if (ret < 0) | |
678 | return ret; | |
679 | ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); | |
680 | if (ret < 0) | |
681 | return ret; | |
682 | ||
683 | i = 0; | |
684 | list_for_each_entry(mem, &dev->mem_props, list) { | |
685 | mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); | |
686 | if (!mem->kobj) | |
687 | return -ENOMEM; | |
688 | ret = kobject_init_and_add(mem->kobj, &mem_type, | |
689 | dev->kobj_mem, "%d", i); | |
20eca012 QW |
690 | if (ret < 0) { |
691 | kobject_put(mem->kobj); | |
5b5c4e40 | 692 | return ret; |
20eca012 | 693 | } |
5b5c4e40 EP |
694 | |
695 | mem->attr.name = "properties"; | |
696 | mem->attr.mode = KFD_SYSFS_FILE_MODE; | |
697 | sysfs_attr_init(&mem->attr); | |
698 | ret = sysfs_create_file(mem->kobj, &mem->attr); | |
699 | if (ret < 0) | |
700 | return ret; | |
701 | i++; | |
702 | } | |
703 | ||
704 | i = 0; | |
705 | list_for_each_entry(cache, &dev->cache_props, list) { | |
706 | cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); | |
707 | if (!cache->kobj) | |
708 | return -ENOMEM; | |
709 | ret = kobject_init_and_add(cache->kobj, &cache_type, | |
710 | dev->kobj_cache, "%d", i); | |
20eca012 QW |
711 | if (ret < 0) { |
712 | kobject_put(cache->kobj); | |
5b5c4e40 | 713 | return ret; |
20eca012 | 714 | } |
5b5c4e40 EP |
715 | |
716 | cache->attr.name = "properties"; | |
717 | cache->attr.mode = KFD_SYSFS_FILE_MODE; | |
718 | sysfs_attr_init(&cache->attr); | |
719 | ret = sysfs_create_file(cache->kobj, &cache->attr); | |
720 | if (ret < 0) | |
721 | return ret; | |
722 | i++; | |
723 | } | |
724 | ||
725 | i = 0; | |
726 | list_for_each_entry(iolink, &dev->io_link_props, list) { | |
727 | iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); | |
728 | if (!iolink->kobj) | |
729 | return -ENOMEM; | |
730 | ret = kobject_init_and_add(iolink->kobj, &iolink_type, | |
731 | dev->kobj_iolink, "%d", i); | |
20eca012 QW |
732 | if (ret < 0) { |
733 | kobject_put(iolink->kobj); | |
5b5c4e40 | 734 | return ret; |
20eca012 | 735 | } |
5b5c4e40 EP |
736 | |
737 | iolink->attr.name = "properties"; | |
738 | iolink->attr.mode = KFD_SYSFS_FILE_MODE; | |
739 | sysfs_attr_init(&iolink->attr); | |
740 | ret = sysfs_create_file(iolink->kobj, &iolink->attr); | |
741 | if (ret < 0) | |
742 | return ret; | |
743 | i++; | |
f4757347 AL |
744 | } |
745 | ||
746 | /* All hardware blocks have the same number of attributes. */ | |
3f866f5f | 747 | num_attrs = ARRAY_SIZE(perf_attr_iommu); |
f4757347 AL |
748 | list_for_each_entry(perf, &dev->perf_props, list) { |
749 | perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) | |
750 | * num_attrs + sizeof(struct attribute_group), | |
751 | GFP_KERNEL); | |
752 | if (!perf->attr_group) | |
753 | return -ENOMEM; | |
754 | ||
755 | attrs = (struct attribute **)(perf->attr_group + 1); | |
756 | if (!strcmp(perf->block_name, "iommu")) { | |
757 | /* Information of IOMMU's num_counters and counter_ids is shown | |
758 | * under /sys/bus/event_source/devices/amd_iommu. We don't | |
759 | * duplicate here. | |
760 | */ | |
761 | perf_attr_iommu[0].data = perf->max_concurrent; | |
762 | for (i = 0; i < num_attrs; i++) | |
763 | attrs[i] = &perf_attr_iommu[i].attr.attr; | |
764 | } | |
765 | perf->attr_group->name = perf->block_name; | |
766 | perf->attr_group->attrs = attrs; | |
767 | ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); | |
768 | if (ret < 0) | |
769 | return ret; | |
770 | } | |
5b5c4e40 EP |
771 | |
772 | return 0; | |
773 | } | |
774 | ||
3a87177e | 775 | /* Called with write topology lock acquired */ |
5b5c4e40 EP |
776 | static int kfd_build_sysfs_node_tree(void) |
777 | { | |
778 | struct kfd_topology_device *dev; | |
779 | int ret; | |
780 | uint32_t i = 0; | |
781 | ||
782 | list_for_each_entry(dev, &topology_device_list, list) { | |
8dfead6c | 783 | ret = kfd_build_sysfs_node_entry(dev, i); |
5b5c4e40 EP |
784 | if (ret < 0) |
785 | return ret; | |
786 | i++; | |
787 | } | |
788 | ||
789 | return 0; | |
790 | } | |
791 | ||
3a87177e | 792 | /* Called with write topology lock acquired */ |
5b5c4e40 EP |
793 | static void kfd_remove_sysfs_node_tree(void) |
794 | { | |
795 | struct kfd_topology_device *dev; | |
796 | ||
797 | list_for_each_entry(dev, &topology_device_list, list) | |
798 | kfd_remove_sysfs_node_entry(dev); | |
799 | } | |
800 | ||
801 | static int kfd_topology_update_sysfs(void) | |
802 | { | |
803 | int ret; | |
804 | ||
4eacc26b | 805 | if (!sys_props.kobj_topology) { |
5b5c4e40 EP |
806 | sys_props.kobj_topology = |
807 | kfd_alloc_struct(sys_props.kobj_topology); | |
808 | if (!sys_props.kobj_topology) | |
809 | return -ENOMEM; | |
810 | ||
811 | ret = kobject_init_and_add(sys_props.kobj_topology, | |
812 | &sysprops_type, &kfd_device->kobj, | |
813 | "topology"); | |
20eca012 QW |
814 | if (ret < 0) { |
815 | kobject_put(sys_props.kobj_topology); | |
5b5c4e40 | 816 | return ret; |
20eca012 | 817 | } |
5b5c4e40 EP |
818 | |
819 | sys_props.kobj_nodes = kobject_create_and_add("nodes", | |
820 | sys_props.kobj_topology); | |
821 | if (!sys_props.kobj_nodes) | |
822 | return -ENOMEM; | |
823 | ||
824 | sys_props.attr_genid.name = "generation_id"; | |
825 | sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE; | |
826 | sysfs_attr_init(&sys_props.attr_genid); | |
827 | ret = sysfs_create_file(sys_props.kobj_topology, | |
828 | &sys_props.attr_genid); | |
829 | if (ret < 0) | |
830 | return ret; | |
831 | ||
832 | sys_props.attr_props.name = "system_properties"; | |
833 | sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE; | |
834 | sysfs_attr_init(&sys_props.attr_props); | |
835 | ret = sysfs_create_file(sys_props.kobj_topology, | |
836 | &sys_props.attr_props); | |
837 | if (ret < 0) | |
838 | return ret; | |
839 | } | |
840 | ||
841 | kfd_remove_sysfs_node_tree(); | |
842 | ||
843 | return kfd_build_sysfs_node_tree(); | |
844 | } | |
845 | ||
846 | static void kfd_topology_release_sysfs(void) | |
847 | { | |
848 | kfd_remove_sysfs_node_tree(); | |
849 | if (sys_props.kobj_topology) { | |
850 | sysfs_remove_file(sys_props.kobj_topology, | |
851 | &sys_props.attr_genid); | |
852 | sysfs_remove_file(sys_props.kobj_topology, | |
853 | &sys_props.attr_props); | |
854 | if (sys_props.kobj_nodes) { | |
855 | kobject_del(sys_props.kobj_nodes); | |
856 | kobject_put(sys_props.kobj_nodes); | |
16b9201c | 857 | sys_props.kobj_nodes = NULL; |
5b5c4e40 EP |
858 | } |
859 | kobject_del(sys_props.kobj_topology); | |
860 | kobject_put(sys_props.kobj_topology); | |
16b9201c | 861 | sys_props.kobj_topology = NULL; |
5b5c4e40 EP |
862 | } |
863 | } | |
864 | ||
4f449311 HK |
865 | /* Called with write topology_lock acquired */ |
866 | static void kfd_topology_update_device_list(struct list_head *temp_list, | |
867 | struct list_head *master_list) | |
868 | { | |
869 | while (!list_empty(temp_list)) { | |
870 | list_move_tail(temp_list->next, master_list); | |
871 | sys_props.num_devices++; | |
872 | } | |
873 | } | |
874 | ||
520b8fb7 FK |
875 | static void kfd_debug_print_topology(void) |
876 | { | |
877 | struct kfd_topology_device *dev; | |
878 | ||
879 | down_read(&topology_lock); | |
880 | ||
881 | dev = list_last_entry(&topology_device_list, | |
882 | struct kfd_topology_device, list); | |
883 | if (dev) { | |
884 | if (dev->node_props.cpu_cores_count && | |
885 | dev->node_props.simd_count) { | |
886 | pr_info("Topology: Add APU node [0x%0x:0x%0x]\n", | |
887 | dev->node_props.device_id, | |
888 | dev->node_props.vendor_id); | |
889 | } else if (dev->node_props.cpu_cores_count) | |
890 | pr_info("Topology: Add CPU node\n"); | |
891 | else if (dev->node_props.simd_count) | |
892 | pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n", | |
893 | dev->node_props.device_id, | |
894 | dev->node_props.vendor_id); | |
895 | } | |
896 | up_read(&topology_lock); | |
897 | } | |
898 | ||
899 | /* Helper function for intializing platform_xx members of | |
900 | * kfd_system_properties. Uses OEM info from the last CPU/APU node. | |
901 | */ | |
902 | static void kfd_update_system_properties(void) | |
903 | { | |
904 | struct kfd_topology_device *dev; | |
905 | ||
906 | down_read(&topology_lock); | |
907 | dev = list_last_entry(&topology_device_list, | |
908 | struct kfd_topology_device, list); | |
909 | if (dev) { | |
910 | sys_props.platform_id = | |
911 | (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; | |
912 | sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); | |
913 | sys_props.platform_rev = dev->oem_revision; | |
914 | } | |
915 | up_read(&topology_lock); | |
916 | } | |
917 | ||
918 | static void find_system_memory(const struct dmi_header *dm, | |
919 | void *private) | |
920 | { | |
921 | struct kfd_mem_properties *mem; | |
922 | u16 mem_width, mem_clock; | |
923 | struct kfd_topology_device *kdev = | |
924 | (struct kfd_topology_device *)private; | |
925 | const u8 *dmi_data = (const u8 *)(dm + 1); | |
926 | ||
927 | if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) { | |
928 | mem_width = (u16)(*(const u16 *)(dmi_data + 0x6)); | |
929 | mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11)); | |
930 | list_for_each_entry(mem, &kdev->mem_props, list) { | |
931 | if (mem_width != 0xFFFF && mem_width != 0) | |
932 | mem->width = mem_width; | |
933 | if (mem_clock != 0) | |
934 | mem->mem_clk_max = mem_clock; | |
935 | } | |
936 | } | |
937 | } | |
f4757347 AL |
938 | |
939 | /* | |
940 | * Performance counters information is not part of CRAT but we would like to | |
941 | * put them in the sysfs under topology directory for Thunk to get the data. | |
942 | * This function is called before updating the sysfs. | |
943 | */ | |
944 | static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) | |
945 | { | |
64d1c3a4 FK |
946 | /* These are the only counters supported so far */ |
947 | return kfd_iommu_add_perf_counters(kdev); | |
f4757347 AL |
948 | } |
949 | ||
520b8fb7 FK |
950 | /* kfd_add_non_crat_information - Add information that is not currently |
951 | * defined in CRAT but is necessary for KFD topology | |
952 | * @dev - topology device to which addition info is added | |
953 | */ | |
954 | static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) | |
955 | { | |
956 | /* Check if CPU only node. */ | |
957 | if (!kdev->gpu) { | |
958 | /* Add system memory information */ | |
959 | dmi_walk(find_system_memory, kdev); | |
960 | } | |
961 | /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ | |
962 | } | |
963 | ||
b441093e HK |
964 | /* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices. |
965 | * Ignore CRAT for all other devices. AMD APU is identified if both CPU | |
966 | * and GPU cores are present. | |
967 | * @device_list - topology device list created by parsing ACPI CRAT table. | |
968 | * @return - TRUE if invalid, FALSE is valid. | |
969 | */ | |
970 | static bool kfd_is_acpi_crat_invalid(struct list_head *device_list) | |
971 | { | |
972 | struct kfd_topology_device *dev; | |
973 | ||
974 | list_for_each_entry(dev, device_list, list) { | |
975 | if (dev->node_props.cpu_cores_count && | |
976 | dev->node_props.simd_count) | |
977 | return false; | |
978 | } | |
979 | pr_info("Ignoring ACPI CRAT on non-APU system\n"); | |
980 | return true; | |
981 | } | |
982 | ||
5b5c4e40 EP |
983 | int kfd_topology_init(void) |
984 | { | |
16b9201c | 985 | void *crat_image = NULL; |
5b5c4e40 EP |
986 | size_t image_size = 0; |
987 | int ret; | |
4f449311 | 988 | struct list_head temp_topology_device_list; |
520b8fb7 FK |
989 | int cpu_only_node = 0; |
990 | struct kfd_topology_device *kdev; | |
991 | int proximity_domain; | |
5b5c4e40 | 992 | |
4f449311 HK |
993 | /* topology_device_list - Master list of all topology devices |
994 | * temp_topology_device_list - temporary list created while parsing CRAT | |
995 | * or VCRAT. Once parsing is complete the contents of list is moved to | |
996 | * topology_device_list | |
5b5c4e40 | 997 | */ |
4f449311 HK |
998 | |
999 | /* Initialize the head for the both the lists */ | |
5b5c4e40 | 1000 | INIT_LIST_HEAD(&topology_device_list); |
4f449311 | 1001 | INIT_LIST_HEAD(&temp_topology_device_list); |
5b5c4e40 | 1002 | init_rwsem(&topology_lock); |
5b5c4e40 EP |
1003 | |
1004 | memset(&sys_props, 0, sizeof(sys_props)); | |
1005 | ||
520b8fb7 FK |
1006 | /* Proximity domains in ACPI CRAT tables start counting at |
1007 | * 0. The same should be true for virtual CRAT tables created | |
1008 | * at this stage. GPUs added later in kfd_topology_add_device | |
1009 | * use a counter. | |
1010 | */ | |
1011 | proximity_domain = 0; | |
1012 | ||
5b5c4e40 | 1013 | /* |
520b8fb7 | 1014 | * Get the CRAT image from the ACPI. If ACPI doesn't have one |
b441093e | 1015 | * or if ACPI CRAT is invalid create a virtual CRAT. |
520b8fb7 FK |
1016 | * NOTE: The current implementation expects all AMD APUs to have |
1017 | * CRAT. If no CRAT is available, it is assumed to be a CPU | |
5b5c4e40 | 1018 | */ |
8e05247d HK |
1019 | ret = kfd_create_crat_image_acpi(&crat_image, &image_size); |
1020 | if (!ret) { | |
4f449311 | 1021 | ret = kfd_parse_crat_table(crat_image, |
520b8fb7 FK |
1022 | &temp_topology_device_list, |
1023 | proximity_domain); | |
b441093e HK |
1024 | if (ret || |
1025 | kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { | |
520b8fb7 FK |
1026 | kfd_release_topology_device_list( |
1027 | &temp_topology_device_list); | |
1028 | kfd_destroy_crat_image(crat_image); | |
1029 | crat_image = NULL; | |
1030 | } | |
1031 | } | |
1032 | ||
1033 | if (!crat_image) { | |
1034 | ret = kfd_create_crat_image_virtual(&crat_image, &image_size, | |
1035 | COMPUTE_UNIT_CPU, NULL, | |
1036 | proximity_domain); | |
1037 | cpu_only_node = 1; | |
1038 | if (ret) { | |
1039 | pr_err("Error creating VCRAT table for CPU\n"); | |
1040 | return ret; | |
1041 | } | |
1042 | ||
1043 | ret = kfd_parse_crat_table(crat_image, | |
1044 | &temp_topology_device_list, | |
1045 | proximity_domain); | |
1046 | if (ret) { | |
1047 | pr_err("Error parsing VCRAT table for CPU\n"); | |
5b5c4e40 | 1048 | goto err; |
520b8fb7 | 1049 | } |
5b5c4e40 EP |
1050 | } |
1051 | ||
f4757347 AL |
1052 | kdev = list_first_entry(&temp_topology_device_list, |
1053 | struct kfd_topology_device, list); | |
1054 | kfd_add_perf_to_topology(kdev); | |
1055 | ||
8e05247d | 1056 | down_write(&topology_lock); |
4f449311 HK |
1057 | kfd_topology_update_device_list(&temp_topology_device_list, |
1058 | &topology_device_list); | |
520b8fb7 | 1059 | atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); |
8e05247d HK |
1060 | ret = kfd_topology_update_sysfs(); |
1061 | up_write(&topology_lock); | |
1062 | ||
4f449311 HK |
1063 | if (!ret) { |
1064 | sys_props.generation_count++; | |
520b8fb7 FK |
1065 | kfd_update_system_properties(); |
1066 | kfd_debug_print_topology(); | |
4f449311 | 1067 | } else |
8e05247d HK |
1068 | pr_err("Failed to update topology in sysfs ret=%d\n", ret); |
1069 | ||
520b8fb7 FK |
1070 | /* For nodes with GPU, this information gets added |
1071 | * when GPU is detected (kfd_topology_add_device). | |
1072 | */ | |
1073 | if (cpu_only_node) { | |
1074 | /* Add additional information to CPU only node created above */ | |
1075 | down_write(&topology_lock); | |
1076 | kdev = list_first_entry(&topology_device_list, | |
1077 | struct kfd_topology_device, list); | |
1078 | up_write(&topology_lock); | |
1079 | kfd_add_non_crat_information(kdev); | |
1080 | } | |
1081 | ||
5b5c4e40 | 1082 | err: |
8e05247d | 1083 | kfd_destroy_crat_image(crat_image); |
5b5c4e40 EP |
1084 | return ret; |
1085 | } | |
1086 | ||
1087 | void kfd_topology_shutdown(void) | |
1088 | { | |
4f449311 | 1089 | down_write(&topology_lock); |
5b5c4e40 EP |
1090 | kfd_topology_release_sysfs(); |
1091 | kfd_release_live_view(); | |
4f449311 | 1092 | up_write(&topology_lock); |
5b5c4e40 EP |
1093 | } |
1094 | ||
5b5c4e40 EP |
1095 | static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) |
1096 | { | |
1097 | uint32_t hashout; | |
1098 | uint32_t buf[7]; | |
585f0e6c | 1099 | uint64_t local_mem_size; |
5b5c4e40 | 1100 | int i; |
0504cccf | 1101 | struct kfd_local_mem_info local_mem_info; |
5b5c4e40 EP |
1102 | |
1103 | if (!gpu) | |
1104 | return 0; | |
1105 | ||
7cd52c91 | 1106 | amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info); |
0504cccf HK |
1107 | |
1108 | local_mem_size = local_mem_info.local_mem_size_private + | |
1109 | local_mem_info.local_mem_size_public; | |
585f0e6c | 1110 | |
5b5c4e40 | 1111 | buf[0] = gpu->pdev->devfn; |
46096058 AL |
1112 | buf[1] = gpu->pdev->subsystem_vendor | |
1113 | (gpu->pdev->subsystem_device << 16); | |
1114 | buf[2] = pci_domain_nr(gpu->pdev->bus); | |
5b5c4e40 EP |
1115 | buf[3] = gpu->pdev->device; |
1116 | buf[4] = gpu->pdev->bus->number; | |
585f0e6c EC |
1117 | buf[5] = lower_32_bits(local_mem_size); |
1118 | buf[6] = upper_32_bits(local_mem_size); | |
5b5c4e40 EP |
1119 | |
1120 | for (i = 0, hashout = 0; i < 7; i++) | |
1121 | hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); | |
1122 | ||
1123 | return hashout; | |
1124 | } | |
3a87177e HK |
1125 | /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If |
1126 | * the GPU device is not already present in the topology device | |
1127 | * list then return NULL. This means a new topology device has to | |
1128 | * be created for this GPU. | |
3a87177e | 1129 | */ |
5b5c4e40 EP |
1130 | static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) |
1131 | { | |
1132 | struct kfd_topology_device *dev; | |
16b9201c | 1133 | struct kfd_topology_device *out_dev = NULL; |
171bc67e HK |
1134 | struct kfd_mem_properties *mem; |
1135 | struct kfd_cache_properties *cache; | |
1136 | struct kfd_iolink_properties *iolink; | |
5b5c4e40 | 1137 | |
3a87177e | 1138 | down_write(&topology_lock); |
b8fe0524 FK |
1139 | list_for_each_entry(dev, &topology_device_list, list) { |
1140 | /* Discrete GPUs need their own topology device list | |
1141 | * entries. Don't assign them to CPU/APU nodes. | |
1142 | */ | |
6127896f | 1143 | if (!gpu->use_iommu_v2 && |
b8fe0524 FK |
1144 | dev->node_props.cpu_cores_count) |
1145 | continue; | |
1146 | ||
4eacc26b | 1147 | if (!dev->gpu && (dev->node_props.simd_count > 0)) { |
5b5c4e40 EP |
1148 | dev->gpu = gpu; |
1149 | out_dev = dev; | |
171bc67e HK |
1150 | |
1151 | list_for_each_entry(mem, &dev->mem_props, list) | |
1152 | mem->gpu = dev->gpu; | |
1153 | list_for_each_entry(cache, &dev->cache_props, list) | |
1154 | cache->gpu = dev->gpu; | |
1155 | list_for_each_entry(iolink, &dev->io_link_props, list) | |
1156 | iolink->gpu = dev->gpu; | |
5b5c4e40 EP |
1157 | break; |
1158 | } | |
b8fe0524 | 1159 | } |
3a87177e | 1160 | up_write(&topology_lock); |
5b5c4e40 EP |
1161 | return out_dev; |
1162 | } | |
1163 | ||
1164 | static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival) | |
1165 | { | |
1166 | /* | |
1167 | * TODO: Generate an event for thunk about the arrival/removal | |
1168 | * of the GPU | |
1169 | */ | |
1170 | } | |
1171 | ||
3a87177e HK |
1172 | /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info, |
1173 | * patch this after CRAT parsing. | |
1174 | */ | |
1175 | static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) | |
1176 | { | |
1177 | struct kfd_mem_properties *mem; | |
1178 | struct kfd_local_mem_info local_mem_info; | |
1179 | ||
1180 | if (!dev) | |
1181 | return; | |
1182 | ||
1183 | /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with | |
1184 | * single bank of VRAM local memory. | |
1185 | * for dGPUs - VCRAT reports only one bank of Local Memory | |
1186 | * for APUs - If CRAT from ACPI reports more than one bank, then | |
1187 | * all the banks will report the same mem_clk_max information | |
1188 | */ | |
7cd52c91 | 1189 | amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info); |
3a87177e HK |
1190 | |
1191 | list_for_each_entry(mem, &dev->mem_props, list) | |
1192 | mem->mem_clk_max = local_mem_info.mem_clk_max; | |
1193 | } | |
1194 | ||
bdd24657 JK |
1195 | static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, |
1196 | struct kfd_topology_device *target_gpu_dev, | |
1197 | struct kfd_iolink_properties *link) | |
3a87177e | 1198 | { |
bdd24657 JK |
1199 | /* xgmi always supports atomics between links. */ |
1200 | if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) | |
3a87177e HK |
1201 | return; |
1202 | ||
bdd24657 JK |
1203 | /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */ |
1204 | if (target_gpu_dev) { | |
1205 | uint32_t cap; | |
1206 | ||
1207 | pcie_capability_read_dword(target_gpu_dev->gpu->pdev, | |
deb68983 | 1208 | PCI_EXP_DEVCAP2, &cap); |
d35f00d8 | 1209 | |
deb68983 JK |
1210 | if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | |
1211 | PCI_EXP_DEVCAP2_ATOMIC_COMP64))) | |
bdd24657 | 1212 | link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | |
deb68983 | 1213 | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; |
bdd24657 JK |
1214 | /* set gpu (dev) flags. */ |
1215 | } else { | |
deb68983 JK |
1216 | if (!dev->gpu->pci_atomic_requested || |
1217 | dev->gpu->device_info->asic_family == | |
1218 | CHIP_HAWAII) | |
bdd24657 | 1219 | link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | |
deb68983 JK |
1220 | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; |
1221 | } | |
bdd24657 JK |
1222 | } |
1223 | ||
1224 | static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) | |
1225 | { | |
1226 | struct kfd_iolink_properties *link, *inbound_link; | |
1227 | struct kfd_topology_device *peer_dev; | |
1228 | ||
1229 | if (!dev || !dev->gpu) | |
1230 | return; | |
d35f00d8 EH |
1231 | |
1232 | /* GPU only creates direct links so apply flags setting to all */ | |
1233 | list_for_each_entry(link, &dev->io_link_props, list) { | |
bdd24657 JK |
1234 | link->flags = CRAT_IOLINK_FLAGS_ENABLED; |
1235 | kfd_set_iolink_no_atomics(dev, NULL, link); | |
1236 | peer_dev = kfd_topology_device_by_proximity_domain( | |
d35f00d8 | 1237 | link->node_to); |
bdd24657 JK |
1238 | |
1239 | if (!peer_dev) | |
1240 | continue; | |
1241 | ||
1242 | list_for_each_entry(inbound_link, &peer_dev->io_link_props, | |
1243 | list) { | |
1244 | if (inbound_link->node_to != link->node_from) | |
1245 | continue; | |
1246 | ||
1247 | inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; | |
1248 | kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); | |
d35f00d8 EH |
1249 | } |
1250 | } | |
3a87177e HK |
1251 | } |
1252 | ||
5b5c4e40 EP |
1253 | int kfd_topology_add_device(struct kfd_dev *gpu) |
1254 | { | |
1255 | uint32_t gpu_id; | |
1256 | struct kfd_topology_device *dev; | |
f7ce2fad | 1257 | struct kfd_cu_info cu_info; |
4f449311 HK |
1258 | int res = 0; |
1259 | struct list_head temp_topology_device_list; | |
3a87177e HK |
1260 | void *crat_image = NULL; |
1261 | size_t image_size = 0; | |
1262 | int proximity_domain; | |
5436ab94 | 1263 | struct amdgpu_device *adev; |
4f449311 HK |
1264 | |
1265 | INIT_LIST_HEAD(&temp_topology_device_list); | |
5b5c4e40 | 1266 | |
5b5c4e40 EP |
1267 | gpu_id = kfd_generate_gpu_id(gpu); |
1268 | ||
79775b62 | 1269 | pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); |
5b5c4e40 | 1270 | |
3a87177e HK |
1271 | proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); |
1272 | ||
1273 | /* Check to see if this gpu device exists in the topology_device_list. | |
1274 | * If so, assign the gpu to that device, | |
1275 | * else create a Virtual CRAT for this gpu device and then parse that | |
1276 | * CRAT to create a new topology device. Once created assign the gpu to | |
1277 | * that topology device | |
5b5c4e40 EP |
1278 | */ |
1279 | dev = kfd_assign_gpu(gpu); | |
1280 | if (!dev) { | |
3a87177e HK |
1281 | res = kfd_create_crat_image_virtual(&crat_image, &image_size, |
1282 | COMPUTE_UNIT_GPU, gpu, | |
1283 | proximity_domain); | |
1284 | if (res) { | |
1285 | pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", | |
1286 | gpu_id); | |
1287 | return res; | |
1288 | } | |
1289 | res = kfd_parse_crat_table(crat_image, | |
1290 | &temp_topology_device_list, | |
1291 | proximity_domain); | |
1292 | if (res) { | |
1293 | pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", | |
1294 | gpu_id); | |
5b5c4e40 EP |
1295 | goto err; |
1296 | } | |
4f449311 | 1297 | |
4f449311 HK |
1298 | down_write(&topology_lock); |
1299 | kfd_topology_update_device_list(&temp_topology_device_list, | |
1300 | &topology_device_list); | |
1301 | ||
8eabaf54 KR |
1302 | /* Update the SYSFS tree, since we added another topology |
1303 | * device | |
5b5c4e40 | 1304 | */ |
3a87177e | 1305 | res = kfd_topology_update_sysfs(); |
4f449311 HK |
1306 | up_write(&topology_lock); |
1307 | ||
3a87177e HK |
1308 | if (!res) |
1309 | sys_props.generation_count++; | |
1310 | else | |
1311 | pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", | |
1312 | gpu_id, res); | |
1313 | dev = kfd_assign_gpu(gpu); | |
1314 | if (WARN_ON(!dev)) { | |
1315 | res = -ENODEV; | |
1316 | goto err; | |
1317 | } | |
5b5c4e40 EP |
1318 | } |
1319 | ||
1320 | dev->gpu_id = gpu_id; | |
1321 | gpu->id = gpu_id; | |
3a87177e HK |
1322 | |
1323 | /* TODO: Move the following lines to function | |
1324 | * kfd_add_non_crat_information | |
1325 | */ | |
1326 | ||
1327 | /* Fill-in additional information that is not available in CRAT but | |
1328 | * needed for the topology | |
1329 | */ | |
1330 | ||
7cd52c91 | 1331 | amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info); |
c181159a YZ |
1332 | |
1333 | strncpy(dev->node_props.name, gpu->device_info->asic_name, | |
1334 | KFD_TOPOLOGY_PUBLIC_NAME_SIZE); | |
1335 | ||
3a87177e HK |
1336 | dev->node_props.simd_arrays_per_engine = |
1337 | cu_info.num_shader_arrays_per_engine; | |
1338 | ||
5b5c4e40 EP |
1339 | dev->node_props.vendor_id = gpu->pdev->vendor; |
1340 | dev->node_props.device_id = gpu->pdev->device; | |
c6d1ec41 JG |
1341 | dev->node_props.capability |= |
1342 | ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) << | |
1343 | HSA_CAP_ASIC_REVISION_SHIFT) & | |
1344 | HSA_CAP_ASIC_REVISION_MASK); | |
babe2ef3 | 1345 | dev->node_props.location_id = pci_dev_id(gpu->pdev); |
3e58e95a | 1346 | dev->node_props.domain = pci_domain_nr(gpu->pdev->bus); |
3a87177e | 1347 | dev->node_props.max_engine_clk_fcompute = |
7cd52c91 | 1348 | amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); |
3a87177e HK |
1349 | dev->node_props.max_engine_clk_ccompute = |
1350 | cpufreq_quick_get_max(0) / 1000; | |
7c9b7171 OZ |
1351 | dev->node_props.drm_render_minor = |
1352 | gpu->shared_resources.drm_render_minor; | |
3a87177e | 1353 | |
0c1690e3 | 1354 | dev->node_props.hive_id = gpu->hive_id; |
14568cf6 OZ |
1355 | dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; |
1356 | dev->node_props.num_sdma_xgmi_engines = | |
1357 | gpu->device_info->num_xgmi_sdma_engines; | |
bb71c74d HR |
1358 | dev->node_props.num_sdma_queues_per_engine = |
1359 | gpu->device_info->num_sdma_queues_per_engine; | |
29633d0e | 1360 | dev->node_props.num_gws = (dev->gpu->gws && |
29e76462 OZ |
1361 | dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? |
1362 | amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; | |
e6945304 | 1363 | dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); |
0c1690e3 | 1364 | |
3a87177e HK |
1365 | kfd_fill_mem_clk_max_info(dev); |
1366 | kfd_fill_iolink_non_crat_info(dev); | |
1367 | ||
1368 | switch (dev->gpu->device_info->asic_family) { | |
1369 | case CHIP_KAVERI: | |
1370 | case CHIP_HAWAII: | |
1371 | case CHIP_TONGA: | |
1372 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << | |
1373 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & | |
1374 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); | |
1375 | break; | |
1376 | case CHIP_CARRIZO: | |
1377 | case CHIP_FIJI: | |
1378 | case CHIP_POLARIS10: | |
1379 | case CHIP_POLARIS11: | |
846a44d7 | 1380 | case CHIP_POLARIS12: |
ed81cd6e | 1381 | case CHIP_VEGAM: |
42aa8793 | 1382 | pr_debug("Adding doorbell packet type capability\n"); |
3a87177e HK |
1383 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << |
1384 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & | |
1385 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); | |
1386 | break; | |
389056e5 | 1387 | case CHIP_VEGA10: |
846a44d7 | 1388 | case CHIP_VEGA12: |
22a3a294 | 1389 | case CHIP_VEGA20: |
389056e5 | 1390 | case CHIP_RAVEN: |
f5d843d4 | 1391 | case CHIP_RENOIR: |
49adcf8a | 1392 | case CHIP_ARCTURUS: |
36e22d59 | 1393 | case CHIP_ALDEBARAN: |
14328aa5 | 1394 | case CHIP_NAVI10: |
0e94b564 | 1395 | case CHIP_NAVI12: |
8099ae40 | 1396 | case CHIP_NAVI14: |
3a2f0c81 | 1397 | case CHIP_SIENNA_CICHLID: |
de89b2e4 | 1398 | case CHIP_NAVY_FLOUNDER: |
3a5e715d | 1399 | case CHIP_VANGOGH: |
eb5a34d4 | 1400 | case CHIP_DIMGREY_CAVEFISH: |
389056e5 FK |
1401 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << |
1402 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & | |
1403 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); | |
1404 | break; | |
3a87177e HK |
1405 | default: |
1406 | WARN(1, "Unexpected ASIC family %u", | |
1407 | dev->gpu->device_info->asic_family); | |
7639a8c4 BG |
1408 | } |
1409 | ||
1ae99eab OZ |
1410 | /* |
1411 | * Overwrite ATS capability according to needs_iommu_device to fix | |
1412 | * potential missing corresponding bit in CRAT of BIOS. | |
1413 | */ | |
6127896f | 1414 | if (dev->gpu->use_iommu_v2) |
1ae99eab OZ |
1415 | dev->node_props.capability |= HSA_CAP_ATS_PRESENT; |
1416 | else | |
1417 | dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; | |
1418 | ||
3a87177e HK |
1419 | /* Fix errors in CZ CRAT. |
1420 | * simd_count: Carrizo CRAT reports wrong simd_count, probably | |
1421 | * because it doesn't consider masked out CUs | |
70f372bf | 1422 | * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd |
3a87177e | 1423 | */ |
70f372bf | 1424 | if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { |
3a87177e HK |
1425 | dev->node_props.simd_count = |
1426 | cu_info.simd_per_cu * cu_info.cu_active_number; | |
70f372bf | 1427 | dev->node_props.max_waves_per_simd = 10; |
70f372bf | 1428 | } |
3a87177e | 1429 | |
5436ab94 SY |
1430 | adev = (struct amdgpu_device *)(dev->gpu->kgd); |
1431 | /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ | |
1432 | dev->node_props.capability |= | |
8ab0d6f0 | 1433 | ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? |
5436ab94 | 1434 | HSA_CAP_SRAM_EDCSUPPORTED : 0; |
8ab0d6f0 | 1435 | dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? |
5436ab94 SY |
1436 | HSA_CAP_MEM_EDCSUPPORTED : 0; |
1437 | ||
1438 | if (adev->asic_type != CHIP_VEGA10) | |
8ab0d6f0 | 1439 | dev->node_props.capability |= (adev->ras_enabled != 0) ? |
0dee45a2 | 1440 | HSA_CAP_RASEVENTNOTIFY : 0; |
0dee45a2 | 1441 | |
4c166eb9 PY |
1442 | /* SVM API and HMM page migration work together, device memory type |
1443 | * is initialized to not 0 when page migration register device memory. | |
1444 | */ | |
1445 | if (adev->kfd.dev->pgmap.type != 0) | |
1446 | dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; | |
1447 | ||
3a87177e HK |
1448 | kfd_debug_print_topology(); |
1449 | ||
4f449311 | 1450 | if (!res) |
5b5c4e40 | 1451 | kfd_notify_gpu_change(gpu_id, 1); |
4f449311 | 1452 | err: |
3a87177e | 1453 | kfd_destroy_crat_image(crat_image); |
5b5c4e40 EP |
1454 | return res; |
1455 | } | |
1456 | ||
1457 | int kfd_topology_remove_device(struct kfd_dev *gpu) | |
1458 | { | |
4f449311 | 1459 | struct kfd_topology_device *dev, *tmp; |
5b5c4e40 EP |
1460 | uint32_t gpu_id; |
1461 | int res = -ENODEV; | |
1462 | ||
5b5c4e40 EP |
1463 | down_write(&topology_lock); |
1464 | ||
4f449311 | 1465 | list_for_each_entry_safe(dev, tmp, &topology_device_list, list) |
5b5c4e40 EP |
1466 | if (dev->gpu == gpu) { |
1467 | gpu_id = dev->gpu_id; | |
1468 | kfd_remove_sysfs_node_entry(dev); | |
1469 | kfd_release_topology_device(dev); | |
4f449311 | 1470 | sys_props.num_devices--; |
5b5c4e40 EP |
1471 | res = 0; |
1472 | if (kfd_topology_update_sysfs() < 0) | |
1473 | kfd_topology_release_sysfs(); | |
1474 | break; | |
1475 | } | |
1476 | ||
1477 | up_write(&topology_lock); | |
1478 | ||
174de876 | 1479 | if (!res) |
5b5c4e40 EP |
1480 | kfd_notify_gpu_change(gpu_id, 0); |
1481 | ||
1482 | return res; | |
1483 | } | |
1484 | ||
6d82eb0e HK |
1485 | /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD |
1486 | * topology. If GPU device is found @idx, then valid kfd_dev pointer is | |
1487 | * returned through @kdev | |
1488 | * Return - 0: On success (@kdev will be NULL for non GPU nodes) | |
1489 | * -1: If end of list | |
5b5c4e40 | 1490 | */ |
6d82eb0e | 1491 | int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) |
5b5c4e40 EP |
1492 | { |
1493 | ||
1494 | struct kfd_topology_device *top_dev; | |
5b5c4e40 EP |
1495 | uint8_t device_idx = 0; |
1496 | ||
6d82eb0e | 1497 | *kdev = NULL; |
5b5c4e40 EP |
1498 | down_read(&topology_lock); |
1499 | ||
1500 | list_for_each_entry(top_dev, &topology_device_list, list) { | |
1501 | if (device_idx == idx) { | |
6d82eb0e HK |
1502 | *kdev = top_dev->gpu; |
1503 | up_read(&topology_lock); | |
1504 | return 0; | |
5b5c4e40 EP |
1505 | } |
1506 | ||
1507 | device_idx++; | |
1508 | } | |
1509 | ||
1510 | up_read(&topology_lock); | |
1511 | ||
6d82eb0e | 1512 | return -1; |
5b5c4e40 EP |
1513 | |
1514 | } | |
851a645e | 1515 | |
520b8fb7 FK |
1516 | static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) |
1517 | { | |
520b8fb7 FK |
1518 | int first_cpu_of_numa_node; |
1519 | ||
1520 | if (!cpumask || cpumask == cpu_none_mask) | |
1521 | return -1; | |
1522 | first_cpu_of_numa_node = cpumask_first(cpumask); | |
1523 | if (first_cpu_of_numa_node >= nr_cpu_ids) | |
1524 | return -1; | |
df1dd4f4 FK |
1525 | #ifdef CONFIG_X86_64 |
1526 | return cpu_data(first_cpu_of_numa_node).apicid; | |
1527 | #else | |
1528 | return first_cpu_of_numa_node; | |
1529 | #endif | |
520b8fb7 FK |
1530 | } |
1531 | ||
1532 | /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor | |
1533 | * of the given NUMA node (numa_node_id) | |
1534 | * Return -1 on failure | |
1535 | */ | |
1536 | int kfd_numa_node_to_apic_id(int numa_node_id) | |
1537 | { | |
1538 | if (numa_node_id == -1) { | |
1539 | pr_warn("Invalid NUMA Node. Use online CPU mask\n"); | |
1540 | return kfd_cpumask_to_apic_id(cpu_online_mask); | |
1541 | } | |
1542 | return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id)); | |
1543 | } | |
1544 | ||
6127896f HR |
1545 | void kfd_double_confirm_iommu_support(struct kfd_dev *gpu) |
1546 | { | |
1547 | struct kfd_topology_device *dev; | |
1548 | ||
1549 | gpu->use_iommu_v2 = false; | |
1550 | ||
1551 | if (!gpu->device_info->needs_iommu_device) | |
1552 | return; | |
1553 | ||
1554 | down_read(&topology_lock); | |
1555 | ||
1556 | /* Only use IOMMUv2 if there is an APU topology node with no GPU | |
1557 | * assigned yet. This GPU will be assigned to it. | |
1558 | */ | |
1559 | list_for_each_entry(dev, &topology_device_list, list) | |
1560 | if (dev->node_props.cpu_cores_count && | |
1561 | dev->node_props.simd_count && | |
1562 | !dev->gpu) | |
1563 | gpu->use_iommu_v2 = true; | |
1564 | ||
1565 | up_read(&topology_lock); | |
1566 | } | |
1567 | ||
851a645e FK |
1568 | #if defined(CONFIG_DEBUG_FS) |
1569 | ||
1570 | int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) | |
1571 | { | |
1572 | struct kfd_topology_device *dev; | |
1573 | unsigned int i = 0; | |
1574 | int r = 0; | |
1575 | ||
1576 | down_read(&topology_lock); | |
1577 | ||
1578 | list_for_each_entry(dev, &topology_device_list, list) { | |
1579 | if (!dev->gpu) { | |
1580 | i++; | |
1581 | continue; | |
1582 | } | |
1583 | ||
1584 | seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); | |
1585 | r = dqm_debugfs_hqds(m, dev->gpu->dqm); | |
1586 | if (r) | |
1587 | break; | |
1588 | } | |
1589 | ||
1590 | up_read(&topology_lock); | |
1591 | ||
1592 | return r; | |
1593 | } | |
1594 | ||
1595 | int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) | |
1596 | { | |
1597 | struct kfd_topology_device *dev; | |
1598 | unsigned int i = 0; | |
1599 | int r = 0; | |
1600 | ||
1601 | down_read(&topology_lock); | |
1602 | ||
1603 | list_for_each_entry(dev, &topology_device_list, list) { | |
1604 | if (!dev->gpu) { | |
1605 | i++; | |
1606 | continue; | |
1607 | } | |
1608 | ||
1609 | seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); | |
1610 | r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets); | |
1611 | if (r) | |
1612 | break; | |
1613 | } | |
1614 | ||
1615 | up_read(&topology_lock); | |
1616 | ||
1617 | return r; | |
1618 | } | |
1619 | ||
1620 | #endif |