Commit | Line | Data |
---|---|---|
174de876 FK |
1 | /* |
2 | * Copyright 2015-2017 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
3a87177e HK |
22 | |
23 | #include <linux/pci.h> | |
174de876 FK |
24 | #include <linux/acpi.h> |
25 | #include "kfd_crat.h" | |
520b8fb7 | 26 | #include "kfd_priv.h" |
174de876 | 27 | #include "kfd_topology.h" |
64d1c3a4 | 28 | #include "kfd_iommu.h" |
174de876 | 29 | |
3a87177e HK |
30 | /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. |
31 | * GPU processor ID are expressed with Bit[31]=1. | |
32 | * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs | |
33 | * used in the CRAT. | |
34 | */ | |
35 | static uint32_t gpu_processor_id_low = 0x80001000; | |
36 | ||
37 | /* Return the next available gpu_processor_id and increment it for next GPU | |
38 | * @total_cu_count - Total CUs present in the GPU including ones | |
39 | * masked off | |
40 | */ | |
41 | static inline unsigned int get_and_inc_gpu_processor_id( | |
42 | unsigned int total_cu_count) | |
43 | { | |
44 | int current_id = gpu_processor_id_low; | |
45 | ||
46 | gpu_processor_id_low += total_cu_count; | |
47 | return current_id; | |
48 | } | |
49 | ||
50 | /* Static table to describe GPU Cache information */ | |
51 | struct kfd_gpu_cache_info { | |
52 | uint32_t cache_size; | |
53 | uint32_t cache_level; | |
54 | uint32_t flags; | |
55 | /* Indicates how many Compute Units share this cache | |
56 | * Value = 1 indicates the cache is not shared | |
57 | */ | |
58 | uint32_t num_cu_shared; | |
59 | }; | |
60 | ||
61 | static struct kfd_gpu_cache_info kaveri_cache_info[] = { | |
62 | { | |
63 | /* TCP L1 Cache per CU */ | |
64 | .cache_size = 16, | |
65 | .cache_level = 1, | |
66 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
67 | CRAT_CACHE_FLAGS_DATA_CACHE | | |
68 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
69 | .num_cu_shared = 1, | |
70 | ||
71 | }, | |
72 | { | |
73 | /* Scalar L1 Instruction Cache (in SQC module) per bank */ | |
74 | .cache_size = 16, | |
75 | .cache_level = 1, | |
76 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
77 | CRAT_CACHE_FLAGS_INST_CACHE | | |
78 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
79 | .num_cu_shared = 2, | |
80 | }, | |
81 | { | |
82 | /* Scalar L1 Data Cache (in SQC module) per bank */ | |
83 | .cache_size = 8, | |
84 | .cache_level = 1, | |
85 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
86 | CRAT_CACHE_FLAGS_DATA_CACHE | | |
87 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
88 | .num_cu_shared = 2, | |
89 | }, | |
90 | ||
91 | /* TODO: Add L2 Cache information */ | |
92 | }; | |
93 | ||
94 | ||
95 | static struct kfd_gpu_cache_info carrizo_cache_info[] = { | |
96 | { | |
97 | /* TCP L1 Cache per CU */ | |
98 | .cache_size = 16, | |
99 | .cache_level = 1, | |
100 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
101 | CRAT_CACHE_FLAGS_DATA_CACHE | | |
102 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
103 | .num_cu_shared = 1, | |
104 | }, | |
105 | { | |
106 | /* Scalar L1 Instruction Cache (in SQC module) per bank */ | |
107 | .cache_size = 8, | |
108 | .cache_level = 1, | |
109 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
110 | CRAT_CACHE_FLAGS_INST_CACHE | | |
111 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
112 | .num_cu_shared = 4, | |
113 | }, | |
114 | { | |
115 | /* Scalar L1 Data Cache (in SQC module) per bank. */ | |
116 | .cache_size = 4, | |
117 | .cache_level = 1, | |
118 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
119 | CRAT_CACHE_FLAGS_DATA_CACHE | | |
120 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
121 | .num_cu_shared = 4, | |
122 | }, | |
123 | ||
124 | /* TODO: Add L2 Cache information */ | |
125 | }; | |
126 | ||
127 | /* NOTE: In future if more information is added to struct kfd_gpu_cache_info | |
128 | * the following ASICs may need a separate table. | |
129 | */ | |
130 | #define hawaii_cache_info kaveri_cache_info | |
131 | #define tonga_cache_info carrizo_cache_info | |
132 | #define fiji_cache_info carrizo_cache_info | |
133 | #define polaris10_cache_info carrizo_cache_info | |
134 | #define polaris11_cache_info carrizo_cache_info | |
135 | ||
174de876 FK |
136 | static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, |
137 | struct crat_subtype_computeunit *cu) | |
138 | { | |
139 | dev->node_props.cpu_cores_count = cu->num_cpu_cores; | |
140 | dev->node_props.cpu_core_id_base = cu->processor_id_low; | |
141 | if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) | |
142 | dev->node_props.capability |= HSA_CAP_ATS_PRESENT; | |
143 | ||
42aa8793 | 144 | pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, |
174de876 FK |
145 | cu->processor_id_low); |
146 | } | |
147 | ||
148 | static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, | |
149 | struct crat_subtype_computeunit *cu) | |
150 | { | |
151 | dev->node_props.simd_id_base = cu->processor_id_low; | |
152 | dev->node_props.simd_count = cu->num_simd_cores; | |
153 | dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; | |
154 | dev->node_props.max_waves_per_simd = cu->max_waves_simd; | |
155 | dev->node_props.wave_front_size = cu->wave_front_size; | |
3a87177e | 156 | dev->node_props.array_count = cu->array_count; |
174de876 FK |
157 | dev->node_props.cu_per_simd_array = cu->num_cu_per_array; |
158 | dev->node_props.simd_per_cu = cu->num_simd_per_cu; | |
159 | dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; | |
160 | if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) | |
161 | dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; | |
42aa8793 | 162 | pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); |
174de876 FK |
163 | } |
164 | ||
4f449311 HK |
165 | /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct |
166 | * topology device present in the device_list | |
167 | */ | |
168 | static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, | |
169 | struct list_head *device_list) | |
174de876 FK |
170 | { |
171 | struct kfd_topology_device *dev; | |
174de876 | 172 | |
42aa8793 | 173 | pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", |
174de876 | 174 | cu->proximity_domain, cu->hsa_capability); |
4f449311 HK |
175 | list_for_each_entry(dev, device_list, list) { |
176 | if (cu->proximity_domain == dev->proximity_domain) { | |
174de876 FK |
177 | if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) |
178 | kfd_populated_cu_info_cpu(dev, cu); | |
179 | ||
180 | if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) | |
181 | kfd_populated_cu_info_gpu(dev, cu); | |
182 | break; | |
183 | } | |
174de876 FK |
184 | } |
185 | ||
186 | return 0; | |
187 | } | |
188 | ||
4f449311 HK |
189 | /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct |
190 | * topology device present in the device_list | |
174de876 | 191 | */ |
4f449311 HK |
192 | static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, |
193 | struct list_head *device_list) | |
174de876 FK |
194 | { |
195 | struct kfd_mem_properties *props; | |
196 | struct kfd_topology_device *dev; | |
174de876 | 197 | |
42aa8793 | 198 | pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", |
174de876 | 199 | mem->proximity_domain); |
4f449311 HK |
200 | list_for_each_entry(dev, device_list, list) { |
201 | if (mem->proximity_domain == dev->proximity_domain) { | |
174de876 FK |
202 | props = kfd_alloc_struct(props); |
203 | if (!props) | |
204 | return -ENOMEM; | |
205 | ||
3a87177e HK |
206 | /* We're on GPU node */ |
207 | if (dev->node_props.cpu_cores_count == 0) { | |
208 | /* APU */ | |
209 | if (mem->visibility_type == 0) | |
210 | props->heap_type = | |
211 | HSA_MEM_HEAP_TYPE_FB_PRIVATE; | |
212 | /* dGPU */ | |
213 | else | |
214 | props->heap_type = mem->visibility_type; | |
215 | } else | |
174de876 FK |
216 | props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; |
217 | ||
218 | if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) | |
219 | props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; | |
220 | if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) | |
221 | props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; | |
222 | ||
223 | props->size_in_bytes = | |
224 | ((uint64_t)mem->length_high << 32) + | |
225 | mem->length_low; | |
226 | props->width = mem->width; | |
227 | ||
175b9263 | 228 | dev->node_props.mem_banks_count++; |
174de876 FK |
229 | list_add_tail(&props->list, &dev->mem_props); |
230 | ||
231 | break; | |
232 | } | |
174de876 FK |
233 | } |
234 | ||
235 | return 0; | |
236 | } | |
237 | ||
4f449311 HK |
238 | /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct |
239 | * topology device present in the device_list | |
174de876 | 240 | */ |
4f449311 HK |
241 | static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, |
242 | struct list_head *device_list) | |
174de876 FK |
243 | { |
244 | struct kfd_cache_properties *props; | |
245 | struct kfd_topology_device *dev; | |
246 | uint32_t id; | |
3a87177e | 247 | uint32_t total_num_of_cu; |
174de876 FK |
248 | |
249 | id = cache->processor_id_low; | |
250 | ||
42aa8793 | 251 | pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); |
3a87177e HK |
252 | list_for_each_entry(dev, device_list, list) { |
253 | total_num_of_cu = (dev->node_props.array_count * | |
254 | dev->node_props.cu_per_simd_array); | |
255 | ||
256 | /* Cache infomration in CRAT doesn't have proximity_domain | |
257 | * information as it is associated with a CPU core or GPU | |
258 | * Compute Unit. So map the cache using CPU core Id or SIMD | |
259 | * (GPU) ID. | |
260 | * TODO: This works because currently we can safely assume that | |
261 | * Compute Units are parsed before caches are parsed. In | |
262 | * future, remove this dependency | |
263 | */ | |
264 | if ((id >= dev->node_props.cpu_core_id_base && | |
265 | id <= dev->node_props.cpu_core_id_base + | |
266 | dev->node_props.cpu_cores_count) || | |
267 | (id >= dev->node_props.simd_id_base && | |
268 | id < dev->node_props.simd_id_base + | |
269 | total_num_of_cu)) { | |
174de876 FK |
270 | props = kfd_alloc_struct(props); |
271 | if (!props) | |
272 | return -ENOMEM; | |
273 | ||
274 | props->processor_id_low = id; | |
275 | props->cache_level = cache->cache_level; | |
276 | props->cache_size = cache->cache_size; | |
277 | props->cacheline_size = cache->cache_line_size; | |
278 | props->cachelines_per_tag = cache->lines_per_tag; | |
279 | props->cache_assoc = cache->associativity; | |
280 | props->cache_latency = cache->cache_latency; | |
3a87177e HK |
281 | memcpy(props->sibling_map, cache->sibling_map, |
282 | sizeof(props->sibling_map)); | |
174de876 FK |
283 | |
284 | if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) | |
285 | props->cache_type |= HSA_CACHE_TYPE_DATA; | |
286 | if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) | |
287 | props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; | |
288 | if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) | |
289 | props->cache_type |= HSA_CACHE_TYPE_CPU; | |
290 | if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) | |
291 | props->cache_type |= HSA_CACHE_TYPE_HSACU; | |
292 | ||
293 | dev->cache_count++; | |
294 | dev->node_props.caches_count++; | |
295 | list_add_tail(&props->list, &dev->cache_props); | |
296 | ||
297 | break; | |
298 | } | |
3a87177e | 299 | } |
174de876 FK |
300 | |
301 | return 0; | |
302 | } | |
303 | ||
4f449311 HK |
304 | /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct |
305 | * topology device present in the device_list | |
174de876 | 306 | */ |
4f449311 HK |
307 | static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, |
308 | struct list_head *device_list) | |
174de876 | 309 | { |
3a87177e HK |
310 | struct kfd_iolink_properties *props = NULL, *props2; |
311 | struct kfd_topology_device *dev, *cpu_dev; | |
174de876 FK |
312 | uint32_t id_from; |
313 | uint32_t id_to; | |
314 | ||
315 | id_from = iolink->proximity_domain_from; | |
316 | id_to = iolink->proximity_domain_to; | |
317 | ||
42aa8793 FK |
318 | pr_debug("Found IO link entry in CRAT table with id_from=%d\n", |
319 | id_from); | |
4f449311 HK |
320 | list_for_each_entry(dev, device_list, list) { |
321 | if (id_from == dev->proximity_domain) { | |
174de876 FK |
322 | props = kfd_alloc_struct(props); |
323 | if (!props) | |
324 | return -ENOMEM; | |
325 | ||
326 | props->node_from = id_from; | |
327 | props->node_to = id_to; | |
328 | props->ver_maj = iolink->version_major; | |
329 | props->ver_min = iolink->version_minor; | |
3a87177e | 330 | props->iolink_type = iolink->io_interface_type; |
174de876 | 331 | |
3a87177e HK |
332 | if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) |
333 | props->weight = 20; | |
334 | else | |
335 | props->weight = node_distance(id_from, id_to); | |
174de876 FK |
336 | |
337 | props->min_latency = iolink->minimum_latency; | |
338 | props->max_latency = iolink->maximum_latency; | |
339 | props->min_bandwidth = iolink->minimum_bandwidth_mbs; | |
340 | props->max_bandwidth = iolink->maximum_bandwidth_mbs; | |
341 | props->rec_transfer_size = | |
342 | iolink->recommended_transfer_size; | |
343 | ||
344 | dev->io_link_count++; | |
345 | dev->node_props.io_links_count++; | |
346 | list_add_tail(&props->list, &dev->io_link_props); | |
174de876 FK |
347 | break; |
348 | } | |
174de876 FK |
349 | } |
350 | ||
3a87177e HK |
351 | /* CPU topology is created before GPUs are detected, so CPU->GPU |
352 | * links are not built at that time. If a PCIe type is discovered, it | |
353 | * means a GPU is detected and we are adding GPU->CPU to the topology. | |
354 | * At this time, also add the corresponded CPU->GPU link. | |
355 | */ | |
356 | if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) { | |
357 | cpu_dev = kfd_topology_device_by_proximity_domain(id_to); | |
358 | if (!cpu_dev) | |
359 | return -ENODEV; | |
360 | /* same everything but the other direction */ | |
361 | props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); | |
362 | props2->node_from = id_to; | |
363 | props2->node_to = id_from; | |
364 | props2->kobj = NULL; | |
365 | cpu_dev->io_link_count++; | |
366 | cpu_dev->node_props.io_links_count++; | |
367 | list_add_tail(&props2->list, &cpu_dev->io_link_props); | |
368 | } | |
369 | ||
174de876 FK |
370 | return 0; |
371 | } | |
372 | ||
4f449311 HK |
373 | /* kfd_parse_subtype - parse subtypes and attach it to correct topology device |
374 | * present in the device_list | |
375 | * @sub_type_hdr - subtype section of crat_image | |
376 | * @device_list - list of topology devices present in this crat_image | |
377 | */ | |
378 | static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, | |
379 | struct list_head *device_list) | |
174de876 FK |
380 | { |
381 | struct crat_subtype_computeunit *cu; | |
382 | struct crat_subtype_memory *mem; | |
383 | struct crat_subtype_cache *cache; | |
384 | struct crat_subtype_iolink *iolink; | |
385 | int ret = 0; | |
386 | ||
387 | switch (sub_type_hdr->type) { | |
388 | case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: | |
389 | cu = (struct crat_subtype_computeunit *)sub_type_hdr; | |
4f449311 | 390 | ret = kfd_parse_subtype_cu(cu, device_list); |
174de876 FK |
391 | break; |
392 | case CRAT_SUBTYPE_MEMORY_AFFINITY: | |
393 | mem = (struct crat_subtype_memory *)sub_type_hdr; | |
4f449311 | 394 | ret = kfd_parse_subtype_mem(mem, device_list); |
174de876 FK |
395 | break; |
396 | case CRAT_SUBTYPE_CACHE_AFFINITY: | |
397 | cache = (struct crat_subtype_cache *)sub_type_hdr; | |
4f449311 | 398 | ret = kfd_parse_subtype_cache(cache, device_list); |
174de876 FK |
399 | break; |
400 | case CRAT_SUBTYPE_TLB_AFFINITY: | |
401 | /* | |
402 | * For now, nothing to do here | |
403 | */ | |
42aa8793 | 404 | pr_debug("Found TLB entry in CRAT table (not processing)\n"); |
174de876 FK |
405 | break; |
406 | case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: | |
407 | /* | |
408 | * For now, nothing to do here | |
409 | */ | |
42aa8793 | 410 | pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); |
174de876 FK |
411 | break; |
412 | case CRAT_SUBTYPE_IOLINK_AFFINITY: | |
413 | iolink = (struct crat_subtype_iolink *)sub_type_hdr; | |
4f449311 | 414 | ret = kfd_parse_subtype_iolink(iolink, device_list); |
174de876 FK |
415 | break; |
416 | default: | |
417 | pr_warn("Unknown subtype %d in CRAT\n", | |
418 | sub_type_hdr->type); | |
419 | } | |
420 | ||
421 | return ret; | |
422 | } | |
423 | ||
4f449311 HK |
424 | /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT |
425 | * create a kfd_topology_device and add in to device_list. Also parse | |
426 | * CRAT subtypes and attach it to appropriate kfd_topology_device | |
427 | * @crat_image - input image containing CRAT | |
428 | * @device_list - [OUT] list of kfd_topology_device generated after | |
429 | * parsing crat_image | |
430 | * @proximity_domain - Proximity domain of the first device in the table | |
431 | * | |
432 | * Return - 0 if successful else -ve value | |
433 | */ | |
434 | int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, | |
435 | uint32_t proximity_domain) | |
174de876 | 436 | { |
520b8fb7 | 437 | struct kfd_topology_device *top_dev = NULL; |
174de876 FK |
438 | struct crat_subtype_generic *sub_type_hdr; |
439 | uint16_t node_id; | |
4f449311 | 440 | int ret = 0; |
174de876 FK |
441 | struct crat_header *crat_table = (struct crat_header *)crat_image; |
442 | uint16_t num_nodes; | |
443 | uint32_t image_len; | |
444 | ||
445 | if (!crat_image) | |
446 | return -EINVAL; | |
447 | ||
4f449311 HK |
448 | if (!list_empty(device_list)) { |
449 | pr_warn("Error device list should be empty\n"); | |
450 | return -EINVAL; | |
451 | } | |
452 | ||
174de876 FK |
453 | num_nodes = crat_table->num_domains; |
454 | image_len = crat_table->length; | |
455 | ||
456 | pr_info("Parsing CRAT table with %d nodes\n", num_nodes); | |
457 | ||
458 | for (node_id = 0; node_id < num_nodes; node_id++) { | |
4f449311 HK |
459 | top_dev = kfd_create_topology_device(device_list); |
460 | if (!top_dev) | |
461 | break; | |
462 | top_dev->proximity_domain = proximity_domain++; | |
463 | } | |
464 | ||
465 | if (!top_dev) { | |
466 | ret = -ENOMEM; | |
467 | goto err; | |
174de876 FK |
468 | } |
469 | ||
520b8fb7 FK |
470 | memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); |
471 | memcpy(top_dev->oem_table_id, crat_table->oem_table_id, | |
472 | CRAT_OEMTABLEID_LENGTH); | |
473 | top_dev->oem_revision = crat_table->oem_revision; | |
174de876 FK |
474 | |
475 | sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); | |
476 | while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < | |
477 | ((char *)crat_image) + image_len) { | |
478 | if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { | |
4f449311 HK |
479 | ret = kfd_parse_subtype(sub_type_hdr, device_list); |
480 | if (ret) | |
481 | break; | |
174de876 FK |
482 | } |
483 | ||
484 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
485 | sub_type_hdr->length); | |
486 | } | |
487 | ||
4f449311 HK |
488 | err: |
489 | if (ret) | |
490 | kfd_release_topology_device_list(device_list); | |
174de876 | 491 | |
4f449311 | 492 | return ret; |
174de876 FK |
493 | } |
494 | ||
3a87177e HK |
495 | /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ |
496 | static int fill_in_pcache(struct crat_subtype_cache *pcache, | |
497 | struct kfd_gpu_cache_info *pcache_info, | |
498 | struct kfd_cu_info *cu_info, | |
499 | int mem_available, | |
500 | int cu_bitmask, | |
501 | int cache_type, unsigned int cu_processor_id, | |
502 | int cu_block) | |
503 | { | |
504 | unsigned int cu_sibling_map_mask; | |
505 | int first_active_cu; | |
506 | ||
507 | /* First check if enough memory is available */ | |
508 | if (sizeof(struct crat_subtype_cache) > mem_available) | |
509 | return -ENOMEM; | |
510 | ||
511 | cu_sibling_map_mask = cu_bitmask; | |
512 | cu_sibling_map_mask >>= cu_block; | |
513 | cu_sibling_map_mask &= | |
514 | ((1 << pcache_info[cache_type].num_cu_shared) - 1); | |
515 | first_active_cu = ffs(cu_sibling_map_mask); | |
516 | ||
517 | /* CU could be inactive. In case of shared cache find the first active | |
518 | * CU. and incase of non-shared cache check if the CU is inactive. If | |
519 | * inactive active skip it | |
520 | */ | |
521 | if (first_active_cu) { | |
522 | memset(pcache, 0, sizeof(struct crat_subtype_cache)); | |
523 | pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; | |
524 | pcache->length = sizeof(struct crat_subtype_cache); | |
525 | pcache->flags = pcache_info[cache_type].flags; | |
526 | pcache->processor_id_low = cu_processor_id | |
527 | + (first_active_cu - 1); | |
528 | pcache->cache_level = pcache_info[cache_type].cache_level; | |
529 | pcache->cache_size = pcache_info[cache_type].cache_size; | |
530 | ||
531 | /* Sibling map is w.r.t processor_id_low, so shift out | |
532 | * inactive CU | |
533 | */ | |
534 | cu_sibling_map_mask = | |
535 | cu_sibling_map_mask >> (first_active_cu - 1); | |
536 | ||
537 | pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); | |
538 | pcache->sibling_map[1] = | |
539 | (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); | |
540 | pcache->sibling_map[2] = | |
541 | (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); | |
542 | pcache->sibling_map[3] = | |
543 | (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); | |
544 | return 0; | |
545 | } | |
546 | return 1; | |
547 | } | |
548 | ||
549 | /* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info | |
550 | * tables | |
551 | * | |
552 | * @kdev - [IN] GPU device | |
553 | * @gpu_processor_id - [IN] GPU processor ID to which these caches | |
554 | * associate | |
555 | * @available_size - [IN] Amount of memory available in pcache | |
556 | * @cu_info - [IN] Compute Unit info obtained from KGD | |
557 | * @pcache - [OUT] memory into which cache data is to be filled in. | |
558 | * @size_filled - [OUT] amount of data used up in pcache. | |
559 | * @num_of_entries - [OUT] number of caches added | |
560 | */ | |
561 | static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, | |
562 | int gpu_processor_id, | |
563 | int available_size, | |
564 | struct kfd_cu_info *cu_info, | |
565 | struct crat_subtype_cache *pcache, | |
566 | int *size_filled, | |
567 | int *num_of_entries) | |
568 | { | |
569 | struct kfd_gpu_cache_info *pcache_info; | |
570 | int num_of_cache_types = 0; | |
571 | int i, j, k; | |
572 | int ct = 0; | |
573 | int mem_available = available_size; | |
574 | unsigned int cu_processor_id; | |
575 | int ret; | |
576 | ||
577 | switch (kdev->device_info->asic_family) { | |
578 | case CHIP_KAVERI: | |
579 | pcache_info = kaveri_cache_info; | |
580 | num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); | |
581 | break; | |
582 | case CHIP_HAWAII: | |
583 | pcache_info = hawaii_cache_info; | |
584 | num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); | |
585 | break; | |
586 | case CHIP_CARRIZO: | |
587 | pcache_info = carrizo_cache_info; | |
588 | num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); | |
589 | break; | |
590 | case CHIP_TONGA: | |
591 | pcache_info = tonga_cache_info; | |
592 | num_of_cache_types = ARRAY_SIZE(tonga_cache_info); | |
593 | break; | |
594 | case CHIP_FIJI: | |
595 | pcache_info = fiji_cache_info; | |
596 | num_of_cache_types = ARRAY_SIZE(fiji_cache_info); | |
597 | break; | |
598 | case CHIP_POLARIS10: | |
599 | pcache_info = polaris10_cache_info; | |
600 | num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); | |
601 | break; | |
602 | case CHIP_POLARIS11: | |
603 | pcache_info = polaris11_cache_info; | |
604 | num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); | |
605 | break; | |
606 | default: | |
607 | return -EINVAL; | |
608 | } | |
609 | ||
610 | *size_filled = 0; | |
611 | *num_of_entries = 0; | |
612 | ||
613 | /* For each type of cache listed in the kfd_gpu_cache_info table, | |
614 | * go through all available Compute Units. | |
615 | * The [i,j,k] loop will | |
616 | * if kfd_gpu_cache_info.num_cu_shared = 1 | |
617 | * will parse through all available CU | |
618 | * If (kfd_gpu_cache_info.num_cu_shared != 1) | |
619 | * then it will consider only one CU from | |
620 | * the shared unit | |
621 | */ | |
622 | ||
623 | for (ct = 0; ct < num_of_cache_types; ct++) { | |
624 | cu_processor_id = gpu_processor_id; | |
625 | for (i = 0; i < cu_info->num_shader_engines; i++) { | |
626 | for (j = 0; j < cu_info->num_shader_arrays_per_engine; | |
627 | j++) { | |
628 | for (k = 0; k < cu_info->num_cu_per_sh; | |
629 | k += pcache_info[ct].num_cu_shared) { | |
630 | ||
631 | ret = fill_in_pcache(pcache, | |
632 | pcache_info, | |
633 | cu_info, | |
634 | mem_available, | |
635 | cu_info->cu_bitmap[i][j], | |
636 | ct, | |
637 | cu_processor_id, | |
638 | k); | |
639 | ||
640 | if (ret < 0) | |
641 | break; | |
642 | ||
643 | if (!ret) { | |
644 | pcache++; | |
645 | (*num_of_entries)++; | |
646 | mem_available -= | |
647 | sizeof(*pcache); | |
648 | (*size_filled) += | |
649 | sizeof(*pcache); | |
650 | } | |
651 | ||
652 | /* Move to next CU block */ | |
653 | cu_processor_id += | |
654 | pcache_info[ct].num_cu_shared; | |
655 | } | |
656 | } | |
657 | } | |
658 | } | |
659 | ||
660 | pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); | |
661 | ||
662 | return 0; | |
663 | } | |
664 | ||
8e05247d HK |
665 | /* |
666 | * kfd_create_crat_image_acpi - Allocates memory for CRAT image and | |
667 | * copies CRAT from ACPI (if available). | |
668 | * NOTE: Call kfd_destroy_crat_image to free CRAT image memory | |
669 | * | |
670 | * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then | |
671 | * crat_image will be NULL | |
672 | * @size: [OUT] size of crat_image | |
673 | * | |
674 | * Return 0 if successful else return error code | |
675 | */ | |
676 | int kfd_create_crat_image_acpi(void **crat_image, size_t *size) | |
174de876 FK |
677 | { |
678 | struct acpi_table_header *crat_table; | |
679 | acpi_status status; | |
8e05247d | 680 | void *pcrat_image; |
174de876 | 681 | |
8e05247d | 682 | if (!crat_image) |
174de876 FK |
683 | return -EINVAL; |
684 | ||
8e05247d HK |
685 | *crat_image = NULL; |
686 | ||
687 | /* Fetch the CRAT table from ACPI */ | |
174de876 FK |
688 | status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); |
689 | if (status == AE_NOT_FOUND) { | |
690 | pr_warn("CRAT table not found\n"); | |
691 | return -ENODATA; | |
692 | } else if (ACPI_FAILURE(status)) { | |
693 | const char *err = acpi_format_exception(status); | |
694 | ||
695 | pr_err("CRAT table error: %s\n", err); | |
696 | return -EINVAL; | |
697 | } | |
698 | ||
ebcfd1e2 FK |
699 | if (ignore_crat) { |
700 | pr_info("CRAT table disabled by module option\n"); | |
701 | return -ENODATA; | |
702 | } | |
703 | ||
8e05247d HK |
704 | pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); |
705 | if (!pcrat_image) | |
706 | return -ENOMEM; | |
707 | ||
708 | memcpy(pcrat_image, crat_table, crat_table->length); | |
174de876 | 709 | |
8e05247d | 710 | *crat_image = pcrat_image; |
174de876 FK |
711 | *size = crat_table->length; |
712 | ||
713 | return 0; | |
714 | } | |
8e05247d | 715 | |
520b8fb7 FK |
716 | /* Memory required to create Virtual CRAT. |
717 | * Since there is no easy way to predict the amount of memory required, the | |
718 | * following amount are allocated for CPU and GPU Virtual CRAT. This is | |
719 | * expected to cover all known conditions. But to be safe additional check | |
720 | * is put in the code to ensure we don't overwrite. | |
721 | */ | |
722 | #define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) | |
723 | #define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE) | |
724 | ||
725 | /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node | |
726 | * | |
727 | * @numa_node_id: CPU NUMA node id | |
728 | * @avail_size: Available size in the memory | |
729 | * @sub_type_hdr: Memory into which compute info will be filled in | |
730 | * | |
731 | * Return 0 if successful else return -ve value | |
732 | */ | |
733 | static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, | |
734 | int proximity_domain, | |
735 | struct crat_subtype_computeunit *sub_type_hdr) | |
736 | { | |
737 | const struct cpumask *cpumask; | |
738 | ||
739 | *avail_size -= sizeof(struct crat_subtype_computeunit); | |
740 | if (*avail_size < 0) | |
741 | return -ENOMEM; | |
742 | ||
743 | memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); | |
744 | ||
745 | /* Fill in subtype header data */ | |
746 | sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; | |
747 | sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); | |
748 | sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; | |
749 | ||
750 | cpumask = cpumask_of_node(numa_node_id); | |
751 | ||
752 | /* Fill in CU data */ | |
753 | sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; | |
754 | sub_type_hdr->proximity_domain = proximity_domain; | |
755 | sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); | |
756 | if (sub_type_hdr->processor_id_low == -1) | |
757 | return -EINVAL; | |
758 | ||
759 | sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); | |
760 | ||
761 | return 0; | |
762 | } | |
763 | ||
764 | /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node | |
765 | * | |
766 | * @numa_node_id: CPU NUMA node id | |
767 | * @avail_size: Available size in the memory | |
768 | * @sub_type_hdr: Memory into which compute info will be filled in | |
769 | * | |
770 | * Return 0 if successful else return -ve value | |
771 | */ | |
772 | static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, | |
773 | int proximity_domain, | |
774 | struct crat_subtype_memory *sub_type_hdr) | |
775 | { | |
776 | uint64_t mem_in_bytes = 0; | |
777 | pg_data_t *pgdat; | |
778 | int zone_type; | |
779 | ||
780 | *avail_size -= sizeof(struct crat_subtype_memory); | |
781 | if (*avail_size < 0) | |
782 | return -ENOMEM; | |
783 | ||
784 | memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); | |
785 | ||
786 | /* Fill in subtype header data */ | |
787 | sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; | |
788 | sub_type_hdr->length = sizeof(struct crat_subtype_memory); | |
789 | sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; | |
790 | ||
791 | /* Fill in Memory Subunit data */ | |
792 | ||
793 | /* Unlike si_meminfo, si_meminfo_node is not exported. So | |
794 | * the following lines are duplicated from si_meminfo_node | |
795 | * function | |
796 | */ | |
797 | pgdat = NODE_DATA(numa_node_id); | |
798 | for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) | |
799 | mem_in_bytes += pgdat->node_zones[zone_type].managed_pages; | |
800 | mem_in_bytes <<= PAGE_SHIFT; | |
801 | ||
802 | sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); | |
803 | sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); | |
804 | sub_type_hdr->proximity_domain = proximity_domain; | |
805 | ||
806 | return 0; | |
807 | } | |
808 | ||
809 | static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, | |
810 | uint32_t *num_entries, | |
811 | struct crat_subtype_iolink *sub_type_hdr) | |
812 | { | |
813 | int nid; | |
814 | struct cpuinfo_x86 *c = &cpu_data(0); | |
815 | uint8_t link_type; | |
816 | ||
817 | if (c->x86_vendor == X86_VENDOR_AMD) | |
818 | link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT; | |
819 | else | |
820 | link_type = CRAT_IOLINK_TYPE_QPI_1_1; | |
821 | ||
822 | *num_entries = 0; | |
823 | ||
824 | /* Create IO links from this node to other CPU nodes */ | |
825 | for_each_online_node(nid) { | |
826 | if (nid == numa_node_id) /* node itself */ | |
827 | continue; | |
828 | ||
829 | *avail_size -= sizeof(struct crat_subtype_iolink); | |
830 | if (*avail_size < 0) | |
831 | return -ENOMEM; | |
832 | ||
833 | memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); | |
834 | ||
835 | /* Fill in subtype header data */ | |
836 | sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; | |
837 | sub_type_hdr->length = sizeof(struct crat_subtype_iolink); | |
838 | sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; | |
839 | ||
840 | /* Fill in IO link data */ | |
841 | sub_type_hdr->proximity_domain_from = numa_node_id; | |
842 | sub_type_hdr->proximity_domain_to = nid; | |
843 | sub_type_hdr->io_interface_type = link_type; | |
844 | ||
845 | (*num_entries)++; | |
846 | sub_type_hdr++; | |
847 | } | |
848 | ||
849 | return 0; | |
850 | } | |
851 | ||
852 | /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU | |
853 | * | |
854 | * @pcrat_image: Fill in VCRAT for CPU | |
855 | * @size: [IN] allocated size of crat_image. | |
856 | * [OUT] actual size of data filled in crat_image | |
857 | */ | |
858 | static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) | |
859 | { | |
860 | struct crat_header *crat_table = (struct crat_header *)pcrat_image; | |
861 | struct acpi_table_header *acpi_table; | |
862 | acpi_status status; | |
863 | struct crat_subtype_generic *sub_type_hdr; | |
864 | int avail_size = *size; | |
865 | int numa_node_id; | |
866 | uint32_t entries = 0; | |
867 | int ret = 0; | |
868 | ||
869 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) | |
870 | return -EINVAL; | |
871 | ||
872 | /* Fill in CRAT Header. | |
873 | * Modify length and total_entries as subunits are added. | |
874 | */ | |
875 | avail_size -= sizeof(struct crat_header); | |
876 | if (avail_size < 0) | |
877 | return -ENOMEM; | |
878 | ||
879 | memset(crat_table, 0, sizeof(struct crat_header)); | |
880 | memcpy(&crat_table->signature, CRAT_SIGNATURE, | |
881 | sizeof(crat_table->signature)); | |
882 | crat_table->length = sizeof(struct crat_header); | |
883 | ||
884 | status = acpi_get_table("DSDT", 0, &acpi_table); | |
48a44387 | 885 | if (status != AE_OK) |
520b8fb7 FK |
886 | pr_warn("DSDT table not found for OEM information\n"); |
887 | else { | |
888 | crat_table->oem_revision = acpi_table->revision; | |
889 | memcpy(crat_table->oem_id, acpi_table->oem_id, | |
890 | CRAT_OEMID_LENGTH); | |
891 | memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, | |
892 | CRAT_OEMTABLEID_LENGTH); | |
893 | } | |
894 | crat_table->total_entries = 0; | |
895 | crat_table->num_domains = 0; | |
896 | ||
897 | sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); | |
898 | ||
899 | for_each_online_node(numa_node_id) { | |
900 | if (kfd_numa_node_to_apic_id(numa_node_id) == -1) | |
901 | continue; | |
902 | ||
903 | /* Fill in Subtype: Compute Unit */ | |
904 | ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size, | |
905 | crat_table->num_domains, | |
906 | (struct crat_subtype_computeunit *)sub_type_hdr); | |
907 | if (ret < 0) | |
908 | return ret; | |
909 | crat_table->length += sub_type_hdr->length; | |
910 | crat_table->total_entries++; | |
911 | ||
912 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
913 | sub_type_hdr->length); | |
914 | ||
915 | /* Fill in Subtype: Memory */ | |
916 | ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size, | |
917 | crat_table->num_domains, | |
918 | (struct crat_subtype_memory *)sub_type_hdr); | |
919 | if (ret < 0) | |
920 | return ret; | |
921 | crat_table->length += sub_type_hdr->length; | |
922 | crat_table->total_entries++; | |
923 | ||
924 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
925 | sub_type_hdr->length); | |
926 | ||
927 | /* Fill in Subtype: IO Link */ | |
928 | ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, | |
929 | &entries, | |
930 | (struct crat_subtype_iolink *)sub_type_hdr); | |
931 | if (ret < 0) | |
932 | return ret; | |
933 | crat_table->length += (sub_type_hdr->length * entries); | |
934 | crat_table->total_entries += entries; | |
935 | ||
936 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
937 | sub_type_hdr->length * entries); | |
938 | ||
939 | crat_table->num_domains++; | |
940 | } | |
941 | ||
942 | /* TODO: Add cache Subtype for CPU. | |
943 | * Currently, CPU cache information is available in function | |
944 | * detect_cache_attributes(cpu) defined in the file | |
945 | * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not | |
946 | * exported and to get the same information the code needs to be | |
947 | * duplicated. | |
948 | */ | |
949 | ||
950 | *size = crat_table->length; | |
951 | pr_info("Virtual CRAT table created for CPU\n"); | |
952 | ||
953 | return 0; | |
954 | } | |
955 | ||
3a87177e HK |
956 | static int kfd_fill_gpu_memory_affinity(int *avail_size, |
957 | struct kfd_dev *kdev, uint8_t type, uint64_t size, | |
958 | struct crat_subtype_memory *sub_type_hdr, | |
959 | uint32_t proximity_domain, | |
960 | const struct kfd_local_mem_info *local_mem_info) | |
961 | { | |
962 | *avail_size -= sizeof(struct crat_subtype_memory); | |
963 | if (*avail_size < 0) | |
964 | return -ENOMEM; | |
965 | ||
966 | memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); | |
967 | sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; | |
968 | sub_type_hdr->length = sizeof(struct crat_subtype_memory); | |
969 | sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; | |
970 | ||
971 | sub_type_hdr->proximity_domain = proximity_domain; | |
972 | ||
973 | pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", | |
974 | type, size); | |
975 | ||
976 | sub_type_hdr->length_low = lower_32_bits(size); | |
977 | sub_type_hdr->length_high = upper_32_bits(size); | |
978 | ||
979 | sub_type_hdr->width = local_mem_info->vram_width; | |
980 | sub_type_hdr->visibility_type = type; | |
981 | ||
982 | return 0; | |
983 | } | |
984 | ||
985 | /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU | |
986 | * to its NUMA node | |
987 | * @avail_size: Available size in the memory | |
988 | * @kdev - [IN] GPU device | |
989 | * @sub_type_hdr: Memory into which io link info will be filled in | |
990 | * @proximity_domain - proximity domain of the GPU node | |
991 | * | |
992 | * Return 0 if successful else return -ve value | |
993 | */ | |
994 | static int kfd_fill_gpu_direct_io_link(int *avail_size, | |
995 | struct kfd_dev *kdev, | |
996 | struct crat_subtype_iolink *sub_type_hdr, | |
997 | uint32_t proximity_domain) | |
998 | { | |
999 | *avail_size -= sizeof(struct crat_subtype_iolink); | |
1000 | if (*avail_size < 0) | |
1001 | return -ENOMEM; | |
1002 | ||
1003 | memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); | |
1004 | ||
1005 | /* Fill in subtype header data */ | |
1006 | sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; | |
1007 | sub_type_hdr->length = sizeof(struct crat_subtype_iolink); | |
1008 | sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; | |
1009 | ||
1010 | /* Fill in IOLINK subtype. | |
1011 | * TODO: Fill-in other fields of iolink subtype | |
1012 | */ | |
1013 | sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; | |
1014 | sub_type_hdr->proximity_domain_from = proximity_domain; | |
1015 | #ifdef CONFIG_NUMA | |
1016 | if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) | |
1017 | sub_type_hdr->proximity_domain_to = 0; | |
1018 | else | |
1019 | sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node; | |
1020 | #else | |
1021 | sub_type_hdr->proximity_domain_to = 0; | |
1022 | #endif | |
1023 | return 0; | |
1024 | } | |
1025 | ||
1026 | /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU | |
1027 | * | |
1028 | * @pcrat_image: Fill in VCRAT for GPU | |
1029 | * @size: [IN] allocated size of crat_image. | |
1030 | * [OUT] actual size of data filled in crat_image | |
1031 | */ | |
1032 | static int kfd_create_vcrat_image_gpu(void *pcrat_image, | |
1033 | size_t *size, struct kfd_dev *kdev, | |
1034 | uint32_t proximity_domain) | |
1035 | { | |
1036 | struct crat_header *crat_table = (struct crat_header *)pcrat_image; | |
1037 | struct crat_subtype_generic *sub_type_hdr; | |
1038 | struct crat_subtype_computeunit *cu; | |
1039 | struct kfd_cu_info cu_info; | |
3a87177e HK |
1040 | int avail_size = *size; |
1041 | uint32_t total_num_of_cu; | |
1042 | int num_of_cache_entries = 0; | |
1043 | int cache_mem_filled = 0; | |
1044 | int ret = 0; | |
3a87177e HK |
1045 | struct kfd_local_mem_info local_mem_info; |
1046 | ||
1047 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) | |
1048 | return -EINVAL; | |
1049 | ||
1050 | /* Fill the CRAT Header. | |
1051 | * Modify length and total_entries as subunits are added. | |
1052 | */ | |
1053 | avail_size -= sizeof(struct crat_header); | |
1054 | if (avail_size < 0) | |
1055 | return -ENOMEM; | |
1056 | ||
1057 | memset(crat_table, 0, sizeof(struct crat_header)); | |
1058 | ||
1059 | memcpy(&crat_table->signature, CRAT_SIGNATURE, | |
1060 | sizeof(crat_table->signature)); | |
1061 | /* Change length as we add more subtypes*/ | |
1062 | crat_table->length = sizeof(struct crat_header); | |
1063 | crat_table->num_domains = 1; | |
1064 | crat_table->total_entries = 0; | |
1065 | ||
1066 | /* Fill in Subtype: Compute Unit | |
1067 | * First fill in the sub type header and then sub type data | |
1068 | */ | |
1069 | avail_size -= sizeof(struct crat_subtype_computeunit); | |
1070 | if (avail_size < 0) | |
1071 | return -ENOMEM; | |
1072 | ||
1073 | sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1); | |
1074 | memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); | |
1075 | ||
1076 | sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; | |
1077 | sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); | |
1078 | sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; | |
1079 | ||
1080 | /* Fill CU subtype data */ | |
1081 | cu = (struct crat_subtype_computeunit *)sub_type_hdr; | |
1082 | cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; | |
1083 | cu->proximity_domain = proximity_domain; | |
1084 | ||
1085 | kdev->kfd2kgd->get_cu_info(kdev->kgd, &cu_info); | |
1086 | cu->num_simd_per_cu = cu_info.simd_per_cu; | |
1087 | cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; | |
1088 | cu->max_waves_simd = cu_info.max_waves_per_simd; | |
1089 | ||
1090 | cu->wave_front_size = cu_info.wave_front_size; | |
1091 | cu->array_count = cu_info.num_shader_arrays_per_engine * | |
1092 | cu_info.num_shader_engines; | |
1093 | total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh); | |
1094 | cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); | |
1095 | cu->num_cu_per_array = cu_info.num_cu_per_sh; | |
1096 | cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu; | |
1097 | cu->num_banks = cu_info.num_shader_engines; | |
1098 | cu->lds_size_in_kb = cu_info.lds_size; | |
1099 | ||
1100 | cu->hsa_capability = 0; | |
1101 | ||
1102 | /* Check if this node supports IOMMU. During parsing this flag will | |
1103 | * translate to HSA_CAP_ATS_PRESENT | |
1104 | */ | |
64d1c3a4 FK |
1105 | if (!kfd_iommu_check_device(kdev)) |
1106 | cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; | |
3a87177e HK |
1107 | |
1108 | crat_table->length += sub_type_hdr->length; | |
1109 | crat_table->total_entries++; | |
1110 | ||
1111 | /* Fill in Subtype: Memory. Only on systems with large BAR (no | |
1112 | * private FB), report memory as public. On other systems | |
1113 | * report the total FB size (public+private) as a single | |
1114 | * private heap. | |
1115 | */ | |
1116 | kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info); | |
1117 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
1118 | sub_type_hdr->length); | |
1119 | ||
374200b1 FK |
1120 | if (debug_largebar) |
1121 | local_mem_info.local_mem_size_private = 0; | |
1122 | ||
3a87177e HK |
1123 | if (local_mem_info.local_mem_size_private == 0) |
1124 | ret = kfd_fill_gpu_memory_affinity(&avail_size, | |
1125 | kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, | |
1126 | local_mem_info.local_mem_size_public, | |
1127 | (struct crat_subtype_memory *)sub_type_hdr, | |
1128 | proximity_domain, | |
1129 | &local_mem_info); | |
1130 | else | |
1131 | ret = kfd_fill_gpu_memory_affinity(&avail_size, | |
1132 | kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE, | |
1133 | local_mem_info.local_mem_size_public + | |
1134 | local_mem_info.local_mem_size_private, | |
1135 | (struct crat_subtype_memory *)sub_type_hdr, | |
1136 | proximity_domain, | |
1137 | &local_mem_info); | |
1138 | if (ret < 0) | |
1139 | return ret; | |
1140 | ||
1141 | crat_table->length += sizeof(struct crat_subtype_memory); | |
1142 | crat_table->total_entries++; | |
1143 | ||
1144 | /* TODO: Fill in cache information. This information is NOT readily | |
1145 | * available in KGD | |
1146 | */ | |
1147 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
1148 | sub_type_hdr->length); | |
1149 | ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, | |
1150 | avail_size, | |
1151 | &cu_info, | |
1152 | (struct crat_subtype_cache *)sub_type_hdr, | |
1153 | &cache_mem_filled, | |
1154 | &num_of_cache_entries); | |
1155 | ||
1156 | if (ret < 0) | |
1157 | return ret; | |
1158 | ||
1159 | crat_table->length += cache_mem_filled; | |
1160 | crat_table->total_entries += num_of_cache_entries; | |
1161 | avail_size -= cache_mem_filled; | |
1162 | ||
1163 | /* Fill in Subtype: IO_LINKS | |
1164 | * Only direct links are added here which is Link from GPU to | |
1165 | * to its NUMA node. Indirect links are added by userspace. | |
1166 | */ | |
1167 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
1168 | cache_mem_filled); | |
1169 | ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev, | |
1170 | (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); | |
1171 | ||
1172 | if (ret < 0) | |
1173 | return ret; | |
1174 | ||
1175 | crat_table->length += sub_type_hdr->length; | |
1176 | crat_table->total_entries++; | |
1177 | ||
1178 | *size = crat_table->length; | |
1179 | pr_info("Virtual CRAT table created for GPU\n"); | |
1180 | ||
1181 | return ret; | |
1182 | } | |
1183 | ||
520b8fb7 FK |
1184 | /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and |
1185 | * creates a Virtual CRAT (VCRAT) image | |
1186 | * | |
1187 | * NOTE: Call kfd_destroy_crat_image to free CRAT image memory | |
1188 | * | |
1189 | * @crat_image: VCRAT image created because ACPI does not have a | |
1190 | * CRAT for this device | |
1191 | * @size: [OUT] size of virtual crat_image | |
1192 | * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device | |
1193 | * COMPUTE_UNIT_GPU - Create VCRAT for GPU | |
1194 | * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU | |
1195 | * -- this option is not currently implemented. | |
1196 | * The assumption is that all AMD APUs will have CRAT | |
1197 | * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU | |
1198 | * | |
1199 | * Return 0 if successful else return -ve value | |
1200 | */ | |
1201 | int kfd_create_crat_image_virtual(void **crat_image, size_t *size, | |
1202 | int flags, struct kfd_dev *kdev, | |
1203 | uint32_t proximity_domain) | |
1204 | { | |
1205 | void *pcrat_image = NULL; | |
1206 | int ret = 0; | |
1207 | ||
1208 | if (!crat_image) | |
1209 | return -EINVAL; | |
1210 | ||
1211 | *crat_image = NULL; | |
1212 | ||
1213 | /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and | |
1214 | * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover | |
1215 | * all the current conditions. A check is put not to overwrite beyond | |
1216 | * allocated size | |
1217 | */ | |
1218 | switch (flags) { | |
1219 | case COMPUTE_UNIT_CPU: | |
1220 | pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL); | |
1221 | if (!pcrat_image) | |
1222 | return -ENOMEM; | |
1223 | *size = VCRAT_SIZE_FOR_CPU; | |
1224 | ret = kfd_create_vcrat_image_cpu(pcrat_image, size); | |
1225 | break; | |
1226 | case COMPUTE_UNIT_GPU: | |
3a87177e HK |
1227 | if (!kdev) |
1228 | return -EINVAL; | |
1229 | pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); | |
1230 | if (!pcrat_image) | |
1231 | return -ENOMEM; | |
1232 | *size = VCRAT_SIZE_FOR_GPU; | |
1233 | ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, | |
1234 | proximity_domain); | |
520b8fb7 FK |
1235 | break; |
1236 | case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): | |
1237 | /* TODO: */ | |
1238 | ret = -EINVAL; | |
1239 | pr_err("VCRAT not implemented for APU\n"); | |
1240 | break; | |
1241 | default: | |
1242 | ret = -EINVAL; | |
1243 | } | |
1244 | ||
1245 | if (!ret) | |
1246 | *crat_image = pcrat_image; | |
1247 | else | |
1248 | kfree(pcrat_image); | |
1249 | ||
1250 | return ret; | |
1251 | } | |
1252 | ||
1253 | ||
1254 | /* kfd_destroy_crat_image | |
8e05247d HK |
1255 | * |
1256 | * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) | |
1257 | * | |
1258 | */ | |
1259 | void kfd_destroy_crat_image(void *crat_image) | |
1260 | { | |
1261 | kfree(crat_image); | |
1262 | } |