Merge tag 'acpi-6.5-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdkfd / kfd_crat.c
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2015-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23
24 #include <linux/pci.h>
25 #include <linux/acpi.h>
26 #include "kfd_crat.h"
27 #include "kfd_priv.h"
28 #include "kfd_topology.h"
29 #include "kfd_iommu.h"
30 #include "amdgpu.h"
31 #include "amdgpu_amdkfd.h"
32
33 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
34  * GPU processor ID are expressed with Bit[31]=1.
35  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
36  * used in the CRAT.
37  */
38 static uint32_t gpu_processor_id_low = 0x80001000;
39
40 /* Return the next available gpu_processor_id and increment it for next GPU
41  *      @total_cu_count - Total CUs present in the GPU including ones
42  *                        masked off
43  */
44 static inline unsigned int get_and_inc_gpu_processor_id(
45                                 unsigned int total_cu_count)
46 {
47         int current_id = gpu_processor_id_low;
48
49         gpu_processor_id_low += total_cu_count;
50         return current_id;
51 }
52
53
54 static struct kfd_gpu_cache_info kaveri_cache_info[] = {
55         {
56                 /* TCP L1 Cache per CU */
57                 .cache_size = 16,
58                 .cache_level = 1,
59                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
60                                 CRAT_CACHE_FLAGS_DATA_CACHE |
61                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
62                 .num_cu_shared = 1,
63         },
64         {
65                 /* Scalar L1 Instruction Cache (in SQC module) per bank */
66                 .cache_size = 16,
67                 .cache_level = 1,
68                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
69                                 CRAT_CACHE_FLAGS_INST_CACHE |
70                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
71                 .num_cu_shared = 2,
72         },
73         {
74                 /* Scalar L1 Data Cache (in SQC module) per bank */
75                 .cache_size = 8,
76                 .cache_level = 1,
77                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
78                                 CRAT_CACHE_FLAGS_DATA_CACHE |
79                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
80                 .num_cu_shared = 2,
81         },
82
83         /* TODO: Add L2 Cache information */
84 };
85
86
87 static struct kfd_gpu_cache_info carrizo_cache_info[] = {
88         {
89                 /* TCP L1 Cache per CU */
90                 .cache_size = 16,
91                 .cache_level = 1,
92                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
93                                 CRAT_CACHE_FLAGS_DATA_CACHE |
94                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
95                 .num_cu_shared = 1,
96         },
97         {
98                 /* Scalar L1 Instruction Cache (in SQC module) per bank */
99                 .cache_size = 8,
100                 .cache_level = 1,
101                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
102                                 CRAT_CACHE_FLAGS_INST_CACHE |
103                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
104                 .num_cu_shared = 4,
105         },
106         {
107                 /* Scalar L1 Data Cache (in SQC module) per bank. */
108                 .cache_size = 4,
109                 .cache_level = 1,
110                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
111                                 CRAT_CACHE_FLAGS_DATA_CACHE |
112                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
113                 .num_cu_shared = 4,
114         },
115
116         /* TODO: Add L2 Cache information */
117 };
118
119 #define hawaii_cache_info kaveri_cache_info
120 #define tonga_cache_info carrizo_cache_info
121 #define fiji_cache_info  carrizo_cache_info
122 #define polaris10_cache_info carrizo_cache_info
123 #define polaris11_cache_info carrizo_cache_info
124 #define polaris12_cache_info carrizo_cache_info
125 #define vegam_cache_info carrizo_cache_info
126
127 /* NOTE: L1 cache information has been updated and L2/L3
128  * cache information has been added for Vega10 and
129  * newer ASICs. The unit for cache_size is KiB.
130  * In future,  check & update cache details
131  * for every new ASIC is required.
132  */
133
134 static struct kfd_gpu_cache_info vega10_cache_info[] = {
135         {
136                 /* TCP L1 Cache per CU */
137                 .cache_size = 16,
138                 .cache_level = 1,
139                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
140                                 CRAT_CACHE_FLAGS_DATA_CACHE |
141                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
142                 .num_cu_shared = 1,
143         },
144         {
145                 /* Scalar L1 Instruction Cache per SQC */
146                 .cache_size = 32,
147                 .cache_level = 1,
148                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
149                                 CRAT_CACHE_FLAGS_INST_CACHE |
150                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
151                 .num_cu_shared = 3,
152         },
153         {
154                 /* Scalar L1 Data Cache per SQC */
155                 .cache_size = 16,
156                 .cache_level = 1,
157                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
158                                 CRAT_CACHE_FLAGS_DATA_CACHE |
159                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
160                 .num_cu_shared = 3,
161         },
162         {
163                 /* L2 Data Cache per GPU (Total Tex Cache) */
164                 .cache_size = 4096,
165                 .cache_level = 2,
166                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
167                                 CRAT_CACHE_FLAGS_DATA_CACHE |
168                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
169                 .num_cu_shared = 16,
170         },
171 };
172
173 static struct kfd_gpu_cache_info raven_cache_info[] = {
174         {
175                 /* TCP L1 Cache per CU */
176                 .cache_size = 16,
177                 .cache_level = 1,
178                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
179                                 CRAT_CACHE_FLAGS_DATA_CACHE |
180                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
181                 .num_cu_shared = 1,
182         },
183         {
184                 /* Scalar L1 Instruction Cache per SQC */
185                 .cache_size = 32,
186                 .cache_level = 1,
187                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
188                                 CRAT_CACHE_FLAGS_INST_CACHE |
189                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
190                 .num_cu_shared = 3,
191         },
192         {
193                 /* Scalar L1 Data Cache per SQC */
194                 .cache_size = 16,
195                 .cache_level = 1,
196                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
197                                 CRAT_CACHE_FLAGS_DATA_CACHE |
198                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
199                 .num_cu_shared = 3,
200         },
201         {
202                 /* L2 Data Cache per GPU (Total Tex Cache) */
203                 .cache_size = 1024,
204                 .cache_level = 2,
205                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
206                                 CRAT_CACHE_FLAGS_DATA_CACHE |
207                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
208                 .num_cu_shared = 11,
209         },
210 };
211
212 static struct kfd_gpu_cache_info renoir_cache_info[] = {
213         {
214                 /* TCP L1 Cache per CU */
215                 .cache_size = 16,
216                 .cache_level = 1,
217                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
218                                 CRAT_CACHE_FLAGS_DATA_CACHE |
219                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
220                 .num_cu_shared = 1,
221         },
222         {
223                 /* Scalar L1 Instruction Cache per SQC */
224                 .cache_size = 32,
225                 .cache_level = 1,
226                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
227                                 CRAT_CACHE_FLAGS_INST_CACHE |
228                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
229                 .num_cu_shared = 3,
230         },
231         {
232                 /* Scalar L1 Data Cache per SQC */
233                 .cache_size = 16,
234                 .cache_level = 1,
235                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
236                                 CRAT_CACHE_FLAGS_DATA_CACHE |
237                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
238                 .num_cu_shared = 3,
239         },
240         {
241                 /* L2 Data Cache per GPU (Total Tex Cache) */
242                 .cache_size = 1024,
243                 .cache_level = 2,
244                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
245                                 CRAT_CACHE_FLAGS_DATA_CACHE |
246                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
247                 .num_cu_shared = 8,
248         },
249 };
250
251 static struct kfd_gpu_cache_info vega12_cache_info[] = {
252         {
253                 /* TCP L1 Cache per CU */
254                 .cache_size = 16,
255                 .cache_level = 1,
256                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
257                                 CRAT_CACHE_FLAGS_DATA_CACHE |
258                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
259                 .num_cu_shared = 1,
260         },
261         {
262                 /* Scalar L1 Instruction Cache per SQC */
263                 .cache_size = 32,
264                 .cache_level = 1,
265                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
266                                 CRAT_CACHE_FLAGS_INST_CACHE |
267                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
268                 .num_cu_shared = 3,
269         },
270         {
271                 /* Scalar L1 Data Cache per SQC */
272                 .cache_size = 16,
273                 .cache_level = 1,
274                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
275                                 CRAT_CACHE_FLAGS_DATA_CACHE |
276                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
277                 .num_cu_shared = 3,
278         },
279         {
280                 /* L2 Data Cache per GPU (Total Tex Cache) */
281                 .cache_size = 2048,
282                 .cache_level = 2,
283                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
284                                 CRAT_CACHE_FLAGS_DATA_CACHE |
285                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
286                 .num_cu_shared = 5,
287         },
288 };
289
290 static struct kfd_gpu_cache_info vega20_cache_info[] = {
291         {
292                 /* TCP L1 Cache per CU */
293                 .cache_size = 16,
294                 .cache_level = 1,
295                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
296                                 CRAT_CACHE_FLAGS_DATA_CACHE |
297                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
298                 .num_cu_shared = 1,
299         },
300         {
301                 /* Scalar L1 Instruction Cache per SQC */
302                 .cache_size = 32,
303                 .cache_level = 1,
304                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
305                                 CRAT_CACHE_FLAGS_INST_CACHE |
306                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
307                 .num_cu_shared = 3,
308         },
309         {
310                 /* Scalar L1 Data Cache per SQC */
311                 .cache_size = 16,
312                 .cache_level = 1,
313                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
314                                 CRAT_CACHE_FLAGS_DATA_CACHE |
315                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
316                 .num_cu_shared = 3,
317         },
318         {
319                 /* L2 Data Cache per GPU (Total Tex Cache) */
320                 .cache_size = 8192,
321                 .cache_level = 2,
322                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
323                                 CRAT_CACHE_FLAGS_DATA_CACHE |
324                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
325                 .num_cu_shared = 16,
326         },
327 };
328
329 static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
330         {
331                 /* TCP L1 Cache per CU */
332                 .cache_size = 16,
333                 .cache_level = 1,
334                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
335                                 CRAT_CACHE_FLAGS_DATA_CACHE |
336                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
337                 .num_cu_shared = 1,
338         },
339         {
340                 /* Scalar L1 Instruction Cache per SQC */
341                 .cache_size = 32,
342                 .cache_level = 1,
343                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
344                                 CRAT_CACHE_FLAGS_INST_CACHE |
345                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
346                 .num_cu_shared = 2,
347         },
348         {
349                 /* Scalar L1 Data Cache per SQC */
350                 .cache_size = 16,
351                 .cache_level = 1,
352                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
353                                 CRAT_CACHE_FLAGS_DATA_CACHE |
354                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
355                 .num_cu_shared = 2,
356         },
357         {
358                 /* L2 Data Cache per GPU (Total Tex Cache) */
359                 .cache_size = 8192,
360                 .cache_level = 2,
361                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
362                                 CRAT_CACHE_FLAGS_DATA_CACHE |
363                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
364                 .num_cu_shared = 14,
365         },
366 };
367
368 static struct kfd_gpu_cache_info navi10_cache_info[] = {
369         {
370                 /* TCP L1 Cache per CU */
371                 .cache_size = 16,
372                 .cache_level = 1,
373                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
374                                 CRAT_CACHE_FLAGS_DATA_CACHE |
375                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
376                 .num_cu_shared = 1,
377         },
378         {
379                 /* Scalar L1 Instruction Cache per SQC */
380                 .cache_size = 32,
381                 .cache_level = 1,
382                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
383                                 CRAT_CACHE_FLAGS_INST_CACHE |
384                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
385                 .num_cu_shared = 2,
386         },
387         {
388                 /* Scalar L1 Data Cache per SQC */
389                 .cache_size = 16,
390                 .cache_level = 1,
391                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
392                                 CRAT_CACHE_FLAGS_DATA_CACHE |
393                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
394                 .num_cu_shared = 2,
395         },
396         {
397                 /* GL1 Data Cache per SA */
398                 .cache_size = 128,
399                 .cache_level = 1,
400                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
401                                 CRAT_CACHE_FLAGS_DATA_CACHE |
402                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
403                 .num_cu_shared = 10,
404         },
405         {
406                 /* L2 Data Cache per GPU (Total Tex Cache) */
407                 .cache_size = 4096,
408                 .cache_level = 2,
409                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
410                                 CRAT_CACHE_FLAGS_DATA_CACHE |
411                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
412                 .num_cu_shared = 10,
413         },
414 };
415
416 static struct kfd_gpu_cache_info vangogh_cache_info[] = {
417         {
418                 /* TCP L1 Cache per CU */
419                 .cache_size = 16,
420                 .cache_level = 1,
421                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
422                                 CRAT_CACHE_FLAGS_DATA_CACHE |
423                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
424                 .num_cu_shared = 1,
425         },
426         {
427                 /* Scalar L1 Instruction Cache per SQC */
428                 .cache_size = 32,
429                 .cache_level = 1,
430                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
431                                 CRAT_CACHE_FLAGS_INST_CACHE |
432                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
433                 .num_cu_shared = 2,
434         },
435         {
436                 /* Scalar L1 Data Cache per SQC */
437                 .cache_size = 16,
438                 .cache_level = 1,
439                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
440                                 CRAT_CACHE_FLAGS_DATA_CACHE |
441                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
442                 .num_cu_shared = 2,
443         },
444         {
445                 /* GL1 Data Cache per SA */
446                 .cache_size = 128,
447                 .cache_level = 1,
448                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
449                                 CRAT_CACHE_FLAGS_DATA_CACHE |
450                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
451                 .num_cu_shared = 8,
452         },
453         {
454                 /* L2 Data Cache per GPU (Total Tex Cache) */
455                 .cache_size = 1024,
456                 .cache_level = 2,
457                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
458                                 CRAT_CACHE_FLAGS_DATA_CACHE |
459                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
460                 .num_cu_shared = 8,
461         },
462 };
463
464 static struct kfd_gpu_cache_info navi14_cache_info[] = {
465         {
466                 /* TCP L1 Cache per CU */
467                 .cache_size = 16,
468                 .cache_level = 1,
469                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
470                                 CRAT_CACHE_FLAGS_DATA_CACHE |
471                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
472                 .num_cu_shared = 1,
473         },
474         {
475                 /* Scalar L1 Instruction Cache per SQC */
476                 .cache_size = 32,
477                 .cache_level = 1,
478                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
479                                 CRAT_CACHE_FLAGS_INST_CACHE |
480                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
481                 .num_cu_shared = 2,
482         },
483         {
484                 /* Scalar L1 Data Cache per SQC */
485                 .cache_size = 16,
486                 .cache_level = 1,
487                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
488                                 CRAT_CACHE_FLAGS_DATA_CACHE |
489                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
490                 .num_cu_shared = 2,
491         },
492         {
493                 /* GL1 Data Cache per SA */
494                 .cache_size = 128,
495                 .cache_level = 1,
496                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
497                                 CRAT_CACHE_FLAGS_DATA_CACHE |
498                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
499                 .num_cu_shared = 12,
500         },
501         {
502                 /* L2 Data Cache per GPU (Total Tex Cache) */
503                 .cache_size = 2048,
504                 .cache_level = 2,
505                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
506                                 CRAT_CACHE_FLAGS_DATA_CACHE |
507                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
508                 .num_cu_shared = 12,
509         },
510 };
511
512 static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
513         {
514                 /* TCP L1 Cache per CU */
515                 .cache_size = 16,
516                 .cache_level = 1,
517                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
518                                 CRAT_CACHE_FLAGS_DATA_CACHE |
519                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
520                 .num_cu_shared = 1,
521         },
522         {
523                 /* Scalar L1 Instruction Cache per SQC */
524                 .cache_size = 32,
525                 .cache_level = 1,
526                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
527                                 CRAT_CACHE_FLAGS_INST_CACHE |
528                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
529                 .num_cu_shared = 2,
530         },
531         {
532                 /* Scalar L1 Data Cache per SQC */
533                 .cache_size = 16,
534                 .cache_level = 1,
535                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
536                                 CRAT_CACHE_FLAGS_DATA_CACHE |
537                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
538                 .num_cu_shared = 2,
539         },
540         {
541                 /* GL1 Data Cache per SA */
542                 .cache_size = 128,
543                 .cache_level = 1,
544                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
545                                 CRAT_CACHE_FLAGS_DATA_CACHE |
546                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
547                 .num_cu_shared = 10,
548         },
549         {
550                 /* L2 Data Cache per GPU (Total Tex Cache) */
551                 .cache_size = 4096,
552                 .cache_level = 2,
553                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
554                                 CRAT_CACHE_FLAGS_DATA_CACHE |
555                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
556                 .num_cu_shared = 10,
557         },
558         {
559                 /* L3 Data Cache per GPU */
560                 .cache_size = 128*1024,
561                 .cache_level = 3,
562                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
563                                 CRAT_CACHE_FLAGS_DATA_CACHE |
564                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
565                 .num_cu_shared = 10,
566         },
567 };
568
569 static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
570         {
571                 /* TCP L1 Cache per CU */
572                 .cache_size = 16,
573                 .cache_level = 1,
574                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
575                                 CRAT_CACHE_FLAGS_DATA_CACHE |
576                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
577                 .num_cu_shared = 1,
578         },
579         {
580                 /* Scalar L1 Instruction Cache per SQC */
581                 .cache_size = 32,
582                 .cache_level = 1,
583                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
584                                 CRAT_CACHE_FLAGS_INST_CACHE |
585                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
586                 .num_cu_shared = 2,
587         },
588         {
589                 /* Scalar L1 Data Cache per SQC */
590                 .cache_size = 16,
591                 .cache_level = 1,
592                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
593                                 CRAT_CACHE_FLAGS_DATA_CACHE |
594                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
595                 .num_cu_shared = 2,
596         },
597         {
598                 /* GL1 Data Cache per SA */
599                 .cache_size = 128,
600                 .cache_level = 1,
601                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
602                                 CRAT_CACHE_FLAGS_DATA_CACHE |
603                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
604                 .num_cu_shared = 10,
605         },
606         {
607                 /* L2 Data Cache per GPU (Total Tex Cache) */
608                 .cache_size = 3072,
609                 .cache_level = 2,
610                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
611                                 CRAT_CACHE_FLAGS_DATA_CACHE |
612                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
613                 .num_cu_shared = 10,
614         },
615         {
616                 /* L3 Data Cache per GPU */
617                 .cache_size = 96*1024,
618                 .cache_level = 3,
619                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
620                                 CRAT_CACHE_FLAGS_DATA_CACHE |
621                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
622                 .num_cu_shared = 10,
623         },
624 };
625
626 static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
627         {
628                 /* TCP L1 Cache per CU */
629                 .cache_size = 16,
630                 .cache_level = 1,
631                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
632                                 CRAT_CACHE_FLAGS_DATA_CACHE |
633                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
634                 .num_cu_shared = 1,
635         },
636         {
637                 /* Scalar L1 Instruction Cache per SQC */
638                 .cache_size = 32,
639                 .cache_level = 1,
640                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
641                                 CRAT_CACHE_FLAGS_INST_CACHE |
642                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
643                 .num_cu_shared = 2,
644         },
645         {
646                 /* Scalar L1 Data Cache per SQC */
647                 .cache_size = 16,
648                 .cache_level = 1,
649                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
650                                 CRAT_CACHE_FLAGS_DATA_CACHE |
651                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
652                 .num_cu_shared = 2,
653         },
654         {
655                 /* GL1 Data Cache per SA */
656                 .cache_size = 128,
657                 .cache_level = 1,
658                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
659                                 CRAT_CACHE_FLAGS_DATA_CACHE |
660                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
661                 .num_cu_shared = 8,
662         },
663         {
664                 /* L2 Data Cache per GPU (Total Tex Cache) */
665                 .cache_size = 2048,
666                 .cache_level = 2,
667                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
668                                 CRAT_CACHE_FLAGS_DATA_CACHE |
669                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
670                 .num_cu_shared = 8,
671         },
672         {
673                 /* L3 Data Cache per GPU */
674                 .cache_size = 32*1024,
675                 .cache_level = 3,
676                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
677                                 CRAT_CACHE_FLAGS_DATA_CACHE |
678                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
679                 .num_cu_shared = 8,
680         },
681 };
682
683 static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
684         {
685                 /* TCP L1 Cache per CU */
686                 .cache_size = 16,
687                 .cache_level = 1,
688                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
689                                 CRAT_CACHE_FLAGS_DATA_CACHE |
690                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
691                 .num_cu_shared = 1,
692         },
693         {
694                 /* Scalar L1 Instruction Cache per SQC */
695                 .cache_size = 32,
696                 .cache_level = 1,
697                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
698                                 CRAT_CACHE_FLAGS_INST_CACHE |
699                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
700                 .num_cu_shared = 2,
701         },
702         {
703                 /* Scalar L1 Data Cache per SQC */
704                 .cache_size = 16,
705                 .cache_level = 1,
706                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
707                                 CRAT_CACHE_FLAGS_DATA_CACHE |
708                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
709                 .num_cu_shared = 2,
710         },
711         {
712                 /* GL1 Data Cache per SA */
713                 .cache_size = 128,
714                 .cache_level = 1,
715                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
716                                 CRAT_CACHE_FLAGS_DATA_CACHE |
717                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
718                 .num_cu_shared = 8,
719         },
720         {
721                 /* L2 Data Cache per GPU (Total Tex Cache) */
722                 .cache_size = 1024,
723                 .cache_level = 2,
724                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
725                                 CRAT_CACHE_FLAGS_DATA_CACHE |
726                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
727                 .num_cu_shared = 8,
728         },
729         {
730                 /* L3 Data Cache per GPU */
731                 .cache_size = 16*1024,
732                 .cache_level = 3,
733                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
734                                 CRAT_CACHE_FLAGS_DATA_CACHE |
735                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
736                 .num_cu_shared = 8,
737         },
738 };
739
740 static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
741         {
742                 /* TCP L1 Cache per CU */
743                 .cache_size = 16,
744                 .cache_level = 1,
745                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
746                                 CRAT_CACHE_FLAGS_DATA_CACHE |
747                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
748                 .num_cu_shared = 1,
749         },
750         {
751                 /* Scalar L1 Instruction Cache per SQC */
752                 .cache_size = 32,
753                 .cache_level = 1,
754                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
755                                 CRAT_CACHE_FLAGS_INST_CACHE |
756                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
757                 .num_cu_shared = 2,
758         },
759         {
760                 /* Scalar L1 Data Cache per SQC */
761                 .cache_size = 16,
762                 .cache_level = 1,
763                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
764                                 CRAT_CACHE_FLAGS_DATA_CACHE |
765                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
766                 .num_cu_shared = 2,
767         },
768         {
769                 /* GL1 Data Cache per SA */
770                 .cache_size = 128,
771                 .cache_level = 1,
772                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
773                                 CRAT_CACHE_FLAGS_DATA_CACHE |
774                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
775                 .num_cu_shared = 6,
776         },
777         {
778                 /* L2 Data Cache per GPU (Total Tex Cache) */
779                 .cache_size = 2048,
780                 .cache_level = 2,
781                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
782                                 CRAT_CACHE_FLAGS_DATA_CACHE |
783                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
784                 .num_cu_shared = 6,
785         },
786 };
787
788 static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
789         {
790                 /* TCP L1 Cache per CU */
791                 .cache_size = 16,
792                 .cache_level = 1,
793                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
794                                 CRAT_CACHE_FLAGS_DATA_CACHE |
795                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
796                 .num_cu_shared = 1,
797         },
798         {
799                 /* Scalar L1 Instruction Cache per SQC */
800                 .cache_size = 32,
801                 .cache_level = 1,
802                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
803                                 CRAT_CACHE_FLAGS_INST_CACHE |
804                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
805                 .num_cu_shared = 2,
806         },
807         {
808                 /* Scalar L1 Data Cache per SQC */
809                 .cache_size = 16,
810                 .cache_level = 1,
811                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
812                                 CRAT_CACHE_FLAGS_DATA_CACHE |
813                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
814                 .num_cu_shared = 2,
815         },
816         {
817                 /* GL1 Data Cache per SA */
818                 .cache_size = 128,
819                 .cache_level = 1,
820                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
821                                 CRAT_CACHE_FLAGS_DATA_CACHE |
822                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
823                 .num_cu_shared = 2,
824         },
825         {
826                 /* L2 Data Cache per GPU (Total Tex Cache) */
827                 .cache_size = 256,
828                 .cache_level = 2,
829                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
830                                 CRAT_CACHE_FLAGS_DATA_CACHE |
831                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
832                 .num_cu_shared = 2,
833         },
834 };
835
836 static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
837         {
838                 /* TCP L1 Cache per CU */
839                 .cache_size = 16,
840                 .cache_level = 1,
841                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
842                           CRAT_CACHE_FLAGS_DATA_CACHE |
843                           CRAT_CACHE_FLAGS_SIMD_CACHE),
844                 .num_cu_shared = 1,
845         },
846         {
847                 /* Scalar L1 Instruction Cache per SQC */
848                 .cache_size = 32,
849                 .cache_level = 1,
850                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
851                           CRAT_CACHE_FLAGS_INST_CACHE |
852                           CRAT_CACHE_FLAGS_SIMD_CACHE),
853                 .num_cu_shared = 2,
854         },
855         {
856                 /* Scalar L1 Data Cache per SQC */
857                 .cache_size = 16,
858                 .cache_level = 1,
859                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
860                           CRAT_CACHE_FLAGS_DATA_CACHE |
861                           CRAT_CACHE_FLAGS_SIMD_CACHE),
862                 .num_cu_shared = 2,
863         },
864         {
865                 /* GL1 Data Cache per SA */
866                 .cache_size = 128,
867                 .cache_level = 1,
868                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
869                           CRAT_CACHE_FLAGS_DATA_CACHE |
870                           CRAT_CACHE_FLAGS_SIMD_CACHE),
871                 .num_cu_shared = 2,
872         },
873         {
874                 /* L2 Data Cache per GPU (Total Tex Cache) */
875                 .cache_size = 256,
876                 .cache_level = 2,
877                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
878                           CRAT_CACHE_FLAGS_DATA_CACHE |
879                           CRAT_CACHE_FLAGS_SIMD_CACHE),
880                 .num_cu_shared = 2,
881         },
882 };
883
884 static struct kfd_gpu_cache_info dummy_cache_info[] = {
885         {
886                 /* TCP L1 Cache per CU */
887                 .cache_size = 16,
888                 .cache_level = 1,
889                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
890                                 CRAT_CACHE_FLAGS_DATA_CACHE |
891                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
892                 .num_cu_shared = 1,
893         },
894         {
895                 /* Scalar L1 Instruction Cache per SQC */
896                 .cache_size = 32,
897                 .cache_level = 1,
898                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
899                                 CRAT_CACHE_FLAGS_INST_CACHE |
900                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
901                 .num_cu_shared = 2,
902         },
903         {
904                 /* Scalar L1 Data Cache per SQC */
905                 .cache_size = 16,
906                 .cache_level = 1,
907                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
908                                 CRAT_CACHE_FLAGS_DATA_CACHE |
909                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
910                 .num_cu_shared = 2,
911         },
912         {
913                 /* GL1 Data Cache per SA */
914                 .cache_size = 128,
915                 .cache_level = 1,
916                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
917                                 CRAT_CACHE_FLAGS_DATA_CACHE |
918                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
919                 .num_cu_shared = 6,
920         },
921         {
922                 /* L2 Data Cache per GPU (Total Tex Cache) */
923                 .cache_size = 2048,
924                 .cache_level = 2,
925                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
926                                 CRAT_CACHE_FLAGS_DATA_CACHE |
927                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
928                 .num_cu_shared = 6,
929         },
930 };
931
932 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
933                 struct crat_subtype_computeunit *cu)
934 {
935         dev->node_props.cpu_cores_count = cu->num_cpu_cores;
936         dev->node_props.cpu_core_id_base = cu->processor_id_low;
937         if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
938                 dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
939
940         pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
941                         cu->processor_id_low);
942 }
943
944 static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
945                 struct crat_subtype_computeunit *cu)
946 {
947         dev->node_props.simd_id_base = cu->processor_id_low;
948         dev->node_props.simd_count = cu->num_simd_cores;
949         dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
950         dev->node_props.max_waves_per_simd = cu->max_waves_simd;
951         dev->node_props.wave_front_size = cu->wave_front_size;
952         dev->node_props.array_count = cu->array_count;
953         dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
954         dev->node_props.simd_per_cu = cu->num_simd_per_cu;
955         dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
956         if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
957                 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
958         pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);
959 }
960
961 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
962  * topology device present in the device_list
963  */
964 static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
965                                 struct list_head *device_list)
966 {
967         struct kfd_topology_device *dev;
968
969         pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
970                         cu->proximity_domain, cu->hsa_capability);
971         list_for_each_entry(dev, device_list, list) {
972                 if (cu->proximity_domain == dev->proximity_domain) {
973                         if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
974                                 kfd_populated_cu_info_cpu(dev, cu);
975
976                         if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
977                                 kfd_populated_cu_info_gpu(dev, cu);
978                         break;
979                 }
980         }
981
982         return 0;
983 }
984
985 static struct kfd_mem_properties *
986 find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
987                 struct kfd_topology_device *dev)
988 {
989         struct kfd_mem_properties *props;
990
991         list_for_each_entry(props, &dev->mem_props, list) {
992                 if (props->heap_type == heap_type
993                                 && props->flags == flags
994                                 && props->width == width)
995                         return props;
996         }
997
998         return NULL;
999 }
1000 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1001  * topology device present in the device_list
1002  */
1003 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
1004                                 struct list_head *device_list)
1005 {
1006         struct kfd_mem_properties *props;
1007         struct kfd_topology_device *dev;
1008         uint32_t heap_type;
1009         uint64_t size_in_bytes;
1010         uint32_t flags = 0;
1011         uint32_t width;
1012
1013         pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
1014                         mem->proximity_domain);
1015         list_for_each_entry(dev, device_list, list) {
1016                 if (mem->proximity_domain == dev->proximity_domain) {
1017                         /* We're on GPU node */
1018                         if (dev->node_props.cpu_cores_count == 0) {
1019                                 /* APU */
1020                                 if (mem->visibility_type == 0)
1021                                         heap_type =
1022                                                 HSA_MEM_HEAP_TYPE_FB_PRIVATE;
1023                                 /* dGPU */
1024                                 else
1025                                         heap_type = mem->visibility_type;
1026                         } else
1027                                 heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
1028
1029                         if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
1030                                 flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
1031                         if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
1032                                 flags |= HSA_MEM_FLAGS_NON_VOLATILE;
1033
1034                         size_in_bytes =
1035                                 ((uint64_t)mem->length_high << 32) +
1036                                                         mem->length_low;
1037                         width = mem->width;
1038
1039                         /* Multiple banks of the same type are aggregated into
1040                          * one. User mode doesn't care about multiple physical
1041                          * memory segments. It's managed as a single virtual
1042                          * heap for user mode.
1043                          */
1044                         props = find_subtype_mem(heap_type, flags, width, dev);
1045                         if (props) {
1046                                 props->size_in_bytes += size_in_bytes;
1047                                 break;
1048                         }
1049
1050                         props = kfd_alloc_struct(props);
1051                         if (!props)
1052                                 return -ENOMEM;
1053
1054                         props->heap_type = heap_type;
1055                         props->flags = flags;
1056                         props->size_in_bytes = size_in_bytes;
1057                         props->width = width;
1058
1059                         dev->node_props.mem_banks_count++;
1060                         list_add_tail(&props->list, &dev->mem_props);
1061
1062                         break;
1063                 }
1064         }
1065
1066         return 0;
1067 }
1068
1069 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1070  * topology device present in the device_list
1071  */
1072 static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
1073                         struct list_head *device_list)
1074 {
1075         struct kfd_cache_properties *props;
1076         struct kfd_topology_device *dev;
1077         uint32_t id;
1078         uint32_t total_num_of_cu;
1079
1080         id = cache->processor_id_low;
1081
1082         pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);
1083         list_for_each_entry(dev, device_list, list) {
1084                 total_num_of_cu = (dev->node_props.array_count *
1085                                         dev->node_props.cu_per_simd_array);
1086
1087                 /* Cache infomration in CRAT doesn't have proximity_domain
1088                  * information as it is associated with a CPU core or GPU
1089                  * Compute Unit. So map the cache using CPU core Id or SIMD
1090                  * (GPU) ID.
1091                  * TODO: This works because currently we can safely assume that
1092                  *  Compute Units are parsed before caches are parsed. In
1093                  *  future, remove this dependency
1094                  */
1095                 if ((id >= dev->node_props.cpu_core_id_base &&
1096                         id <= dev->node_props.cpu_core_id_base +
1097                                 dev->node_props.cpu_cores_count) ||
1098                         (id >= dev->node_props.simd_id_base &&
1099                         id < dev->node_props.simd_id_base +
1100                                 total_num_of_cu)) {
1101                         props = kfd_alloc_struct(props);
1102                         if (!props)
1103                                 return -ENOMEM;
1104
1105                         props->processor_id_low = id;
1106                         props->cache_level = cache->cache_level;
1107                         props->cache_size = cache->cache_size;
1108                         props->cacheline_size = cache->cache_line_size;
1109                         props->cachelines_per_tag = cache->lines_per_tag;
1110                         props->cache_assoc = cache->associativity;
1111                         props->cache_latency = cache->cache_latency;
1112
1113                         memcpy(props->sibling_map, cache->sibling_map,
1114                                         CRAT_SIBLINGMAP_SIZE);
1115
1116                         /* set the sibling_map_size as 32 for CRAT from ACPI */
1117                         props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
1118
1119                         if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
1120                                 props->cache_type |= HSA_CACHE_TYPE_DATA;
1121                         if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
1122                                 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
1123                         if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
1124                                 props->cache_type |= HSA_CACHE_TYPE_CPU;
1125                         if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
1126                                 props->cache_type |= HSA_CACHE_TYPE_HSACU;
1127
1128                         dev->node_props.caches_count++;
1129                         list_add_tail(&props->list, &dev->cache_props);
1130
1131                         break;
1132                 }
1133         }
1134
1135         return 0;
1136 }
1137
1138 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1139  * topology device present in the device_list
1140  */
1141 static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
1142                                         struct list_head *device_list)
1143 {
1144         struct kfd_iolink_properties *props = NULL, *props2;
1145         struct kfd_topology_device *dev, *to_dev;
1146         uint32_t id_from;
1147         uint32_t id_to;
1148
1149         id_from = iolink->proximity_domain_from;
1150         id_to = iolink->proximity_domain_to;
1151
1152         pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
1153                         id_from, id_to);
1154         list_for_each_entry(dev, device_list, list) {
1155                 if (id_from == dev->proximity_domain) {
1156                         props = kfd_alloc_struct(props);
1157                         if (!props)
1158                                 return -ENOMEM;
1159
1160                         props->node_from = id_from;
1161                         props->node_to = id_to;
1162                         props->ver_maj = iolink->version_major;
1163                         props->ver_min = iolink->version_minor;
1164                         props->iolink_type = iolink->io_interface_type;
1165
1166                         if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
1167                                 props->weight = 20;
1168                         else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
1169                                 props->weight = iolink->weight_xgmi;
1170                         else
1171                                 props->weight = node_distance(id_from, id_to);
1172
1173                         props->min_latency = iolink->minimum_latency;
1174                         props->max_latency = iolink->maximum_latency;
1175                         props->min_bandwidth = iolink->minimum_bandwidth_mbs;
1176                         props->max_bandwidth = iolink->maximum_bandwidth_mbs;
1177                         props->rec_transfer_size =
1178                                         iolink->recommended_transfer_size;
1179
1180                         dev->node_props.io_links_count++;
1181                         list_add_tail(&props->list, &dev->io_link_props);
1182                         break;
1183                 }
1184         }
1185
1186         /* CPU topology is created before GPUs are detected, so CPU->GPU
1187          * links are not built at that time. If a PCIe type is discovered, it
1188          * means a GPU is detected and we are adding GPU->CPU to the topology.
1189          * At this time, also add the corresponded CPU->GPU link if GPU
1190          * is large bar.
1191          * For xGMI, we only added the link with one direction in the crat
1192          * table, add corresponded reversed direction link now.
1193          */
1194         if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
1195                 to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
1196                 if (!to_dev)
1197                         return -ENODEV;
1198                 /* same everything but the other direction */
1199                 props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
1200                 if (!props2)
1201                         return -ENOMEM;
1202
1203                 props2->node_from = id_to;
1204                 props2->node_to = id_from;
1205                 props2->kobj = NULL;
1206                 to_dev->node_props.io_links_count++;
1207                 list_add_tail(&props2->list, &to_dev->io_link_props);
1208         }
1209
1210         return 0;
1211 }
1212
1213 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1214  * present in the device_list
1215  *      @sub_type_hdr - subtype section of crat_image
1216  *      @device_list - list of topology devices present in this crat_image
1217  */
1218 static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
1219                                 struct list_head *device_list)
1220 {
1221         struct crat_subtype_computeunit *cu;
1222         struct crat_subtype_memory *mem;
1223         struct crat_subtype_cache *cache;
1224         struct crat_subtype_iolink *iolink;
1225         int ret = 0;
1226
1227         switch (sub_type_hdr->type) {
1228         case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
1229                 cu = (struct crat_subtype_computeunit *)sub_type_hdr;
1230                 ret = kfd_parse_subtype_cu(cu, device_list);
1231                 break;
1232         case CRAT_SUBTYPE_MEMORY_AFFINITY:
1233                 mem = (struct crat_subtype_memory *)sub_type_hdr;
1234                 ret = kfd_parse_subtype_mem(mem, device_list);
1235                 break;
1236         case CRAT_SUBTYPE_CACHE_AFFINITY:
1237                 cache = (struct crat_subtype_cache *)sub_type_hdr;
1238                 ret = kfd_parse_subtype_cache(cache, device_list);
1239                 break;
1240         case CRAT_SUBTYPE_TLB_AFFINITY:
1241                 /*
1242                  * For now, nothing to do here
1243                  */
1244                 pr_debug("Found TLB entry in CRAT table (not processing)\n");
1245                 break;
1246         case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
1247                 /*
1248                  * For now, nothing to do here
1249                  */
1250                 pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
1251                 break;
1252         case CRAT_SUBTYPE_IOLINK_AFFINITY:
1253                 iolink = (struct crat_subtype_iolink *)sub_type_hdr;
1254                 ret = kfd_parse_subtype_iolink(iolink, device_list);
1255                 break;
1256         default:
1257                 pr_warn("Unknown subtype %d in CRAT\n",
1258                                 sub_type_hdr->type);
1259         }
1260
1261         return ret;
1262 }
1263
1264 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1265  * create a kfd_topology_device and add in to device_list. Also parse
1266  * CRAT subtypes and attach it to appropriate kfd_topology_device
1267  *      @crat_image - input image containing CRAT
1268  *      @device_list - [OUT] list of kfd_topology_device generated after
1269  *                     parsing crat_image
1270  *      @proximity_domain - Proximity domain of the first device in the table
1271  *
1272  *      Return - 0 if successful else -ve value
1273  */
1274 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
1275                          uint32_t proximity_domain)
1276 {
1277         struct kfd_topology_device *top_dev = NULL;
1278         struct crat_subtype_generic *sub_type_hdr;
1279         uint16_t node_id;
1280         int ret = 0;
1281         struct crat_header *crat_table = (struct crat_header *)crat_image;
1282         uint16_t num_nodes;
1283         uint32_t image_len;
1284
1285         if (!crat_image)
1286                 return -EINVAL;
1287
1288         if (!list_empty(device_list)) {
1289                 pr_warn("Error device list should be empty\n");
1290                 return -EINVAL;
1291         }
1292
1293         num_nodes = crat_table->num_domains;
1294         image_len = crat_table->length;
1295
1296         pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
1297
1298         for (node_id = 0; node_id < num_nodes; node_id++) {
1299                 top_dev = kfd_create_topology_device(device_list);
1300                 if (!top_dev)
1301                         break;
1302                 top_dev->proximity_domain = proximity_domain++;
1303         }
1304
1305         if (!top_dev) {
1306                 ret = -ENOMEM;
1307                 goto err;
1308         }
1309
1310         memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
1311         memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
1312                         CRAT_OEMTABLEID_LENGTH);
1313         top_dev->oem_revision = crat_table->oem_revision;
1314
1315         sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1316         while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
1317                         ((char *)crat_image) + image_len) {
1318                 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
1319                         ret = kfd_parse_subtype(sub_type_hdr, device_list);
1320                         if (ret)
1321                                 break;
1322                 }
1323
1324                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1325                                 sub_type_hdr->length);
1326         }
1327
1328 err:
1329         if (ret)
1330                 kfd_release_topology_device_list(device_list);
1331
1332         return ret;
1333 }
1334
1335
1336 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
1337                                                    struct kfd_gpu_cache_info *pcache_info)
1338 {
1339         struct amdgpu_device *adev = kdev->adev;
1340         int i = 0;
1341
1342         /* TCP L1 Cache per CU */
1343         if (adev->gfx.config.gc_tcp_l1_size) {
1344                 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
1345                 pcache_info[i].cache_level = 1;
1346                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1347                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1348                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1349                 pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
1350                 i++;
1351         }
1352         /* Scalar L1 Instruction Cache per SQC */
1353         if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
1354                 pcache_info[i].cache_size =
1355                         adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
1356                 pcache_info[i].cache_level = 1;
1357                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1358                                         CRAT_CACHE_FLAGS_INST_CACHE |
1359                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1360                 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1361                 i++;
1362         }
1363         /* Scalar L1 Data Cache per SQC */
1364         if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
1365                 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
1366                 pcache_info[i].cache_level = 1;
1367                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1368                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1369                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1370                 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1371                 i++;
1372         }
1373         /* GL1 Data Cache per SA */
1374         if (adev->gfx.config.gc_gl1c_per_sa &&
1375             adev->gfx.config.gc_gl1c_size_per_instance) {
1376                 pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
1377                         adev->gfx.config.gc_gl1c_size_per_instance;
1378                 pcache_info[i].cache_level = 1;
1379                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1380                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1381                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1382                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1383                 i++;
1384         }
1385         /* L2 Data Cache per GPU (Total Tex Cache) */
1386         if (adev->gfx.config.gc_gl2c_per_gpu) {
1387                 pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
1388                 pcache_info[i].cache_level = 2;
1389                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1390                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1391                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1392                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1393                 i++;
1394         }
1395         /* L3 Data Cache per GPU */
1396         if (adev->gmc.mall_size) {
1397                 pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
1398                 pcache_info[i].cache_level = 3;
1399                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1400                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1401                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1402                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1403                 i++;
1404         }
1405         return i;
1406 }
1407
1408 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
1409 {
1410         int num_of_cache_types = 0;
1411
1412         switch (kdev->adev->asic_type) {
1413         case CHIP_KAVERI:
1414                 *pcache_info = kaveri_cache_info;
1415                 num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
1416                 break;
1417         case CHIP_HAWAII:
1418                 *pcache_info = hawaii_cache_info;
1419                 num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
1420                 break;
1421         case CHIP_CARRIZO:
1422                 *pcache_info = carrizo_cache_info;
1423                 num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
1424                 break;
1425         case CHIP_TONGA:
1426                 *pcache_info = tonga_cache_info;
1427                 num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
1428                 break;
1429         case CHIP_FIJI:
1430                 *pcache_info = fiji_cache_info;
1431                 num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
1432                 break;
1433         case CHIP_POLARIS10:
1434                 *pcache_info = polaris10_cache_info;
1435                 num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
1436                 break;
1437         case CHIP_POLARIS11:
1438                 *pcache_info = polaris11_cache_info;
1439                 num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
1440                 break;
1441         case CHIP_POLARIS12:
1442                 *pcache_info = polaris12_cache_info;
1443                 num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
1444                 break;
1445         case CHIP_VEGAM:
1446                 *pcache_info = vegam_cache_info;
1447                 num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
1448                 break;
1449         default:
1450                 switch (KFD_GC_VERSION(kdev)) {
1451                 case IP_VERSION(9, 0, 1):
1452                         *pcache_info = vega10_cache_info;
1453                         num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
1454                         break;
1455                 case IP_VERSION(9, 2, 1):
1456                         *pcache_info = vega12_cache_info;
1457                         num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
1458                         break;
1459                 case IP_VERSION(9, 4, 0):
1460                 case IP_VERSION(9, 4, 1):
1461                         *pcache_info = vega20_cache_info;
1462                         num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
1463                         break;
1464                 case IP_VERSION(9, 4, 2):
1465                 case IP_VERSION(9, 4, 3):
1466                         *pcache_info = aldebaran_cache_info;
1467                         num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
1468                         break;
1469                 case IP_VERSION(9, 1, 0):
1470                 case IP_VERSION(9, 2, 2):
1471                         *pcache_info = raven_cache_info;
1472                         num_of_cache_types = ARRAY_SIZE(raven_cache_info);
1473                         break;
1474                 case IP_VERSION(9, 3, 0):
1475                         *pcache_info = renoir_cache_info;
1476                         num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
1477                         break;
1478                 case IP_VERSION(10, 1, 10):
1479                 case IP_VERSION(10, 1, 2):
1480                 case IP_VERSION(10, 1, 3):
1481                 case IP_VERSION(10, 1, 4):
1482                         *pcache_info = navi10_cache_info;
1483                         num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
1484                         break;
1485                 case IP_VERSION(10, 1, 1):
1486                         *pcache_info = navi14_cache_info;
1487                         num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
1488                         break;
1489                 case IP_VERSION(10, 3, 0):
1490                         *pcache_info = sienna_cichlid_cache_info;
1491                         num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
1492                         break;
1493                 case IP_VERSION(10, 3, 2):
1494                         *pcache_info = navy_flounder_cache_info;
1495                         num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
1496                         break;
1497                 case IP_VERSION(10, 3, 4):
1498                         *pcache_info = dimgrey_cavefish_cache_info;
1499                         num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
1500                         break;
1501                 case IP_VERSION(10, 3, 1):
1502                         *pcache_info = vangogh_cache_info;
1503                         num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
1504                         break;
1505                 case IP_VERSION(10, 3, 5):
1506                         *pcache_info = beige_goby_cache_info;
1507                         num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
1508                         break;
1509                 case IP_VERSION(10, 3, 3):
1510                         *pcache_info = yellow_carp_cache_info;
1511                         num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
1512                         break;
1513                 case IP_VERSION(10, 3, 6):
1514                         *pcache_info = gc_10_3_6_cache_info;
1515                         num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
1516                         break;
1517                 case IP_VERSION(10, 3, 7):
1518                         *pcache_info = gfx1037_cache_info;
1519                         num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
1520                         break;
1521                 case IP_VERSION(11, 0, 0):
1522                 case IP_VERSION(11, 0, 1):
1523                 case IP_VERSION(11, 0, 2):
1524                 case IP_VERSION(11, 0, 3):
1525                 case IP_VERSION(11, 0, 4):
1526                         num_of_cache_types =
1527                                 kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
1528                         break;
1529                 default:
1530                         *pcache_info = dummy_cache_info;
1531                         num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
1532                         pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
1533                         break;
1534                 }
1535         }
1536         return num_of_cache_types;
1537 }
1538
1539 static bool kfd_ignore_crat(void)
1540 {
1541         bool ret;
1542
1543         if (ignore_crat)
1544                 return true;
1545
1546         ret = true;
1547
1548         return ret;
1549 }
1550
1551 /*
1552  * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
1553  * copies CRAT from ACPI (if available).
1554  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
1555  *
1556  *      @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
1557  *                   crat_image will be NULL
1558  *      @size: [OUT] size of crat_image
1559  *
1560  *      Return 0 if successful else return error code
1561  */
1562 int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
1563 {
1564         struct acpi_table_header *crat_table;
1565         acpi_status status;
1566         void *pcrat_image;
1567         int rc = 0;
1568
1569         if (!crat_image)
1570                 return -EINVAL;
1571
1572         *crat_image = NULL;
1573
1574         if (kfd_ignore_crat()) {
1575                 pr_info("CRAT table disabled by module option\n");
1576                 return -ENODATA;
1577         }
1578
1579         /* Fetch the CRAT table from ACPI */
1580         status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
1581         if (status == AE_NOT_FOUND) {
1582                 pr_info("CRAT table not found\n");
1583                 return -ENODATA;
1584         } else if (ACPI_FAILURE(status)) {
1585                 const char *err = acpi_format_exception(status);
1586
1587                 pr_err("CRAT table error: %s\n", err);
1588                 return -EINVAL;
1589         }
1590
1591         pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
1592         if (!pcrat_image) {
1593                 rc = -ENOMEM;
1594                 goto out;
1595         }
1596
1597         memcpy(pcrat_image, crat_table, crat_table->length);
1598         *crat_image = pcrat_image;
1599         *size = crat_table->length;
1600 out:
1601         acpi_put_table(crat_table);
1602         return rc;
1603 }
1604
1605 /* Memory required to create Virtual CRAT.
1606  * Since there is no easy way to predict the amount of memory required, the
1607  * following amount is allocated for GPU Virtual CRAT. This is
1608  * expected to cover all known conditions. But to be safe additional check
1609  * is put in the code to ensure we don't overwrite.
1610  */
1611 #define VCRAT_SIZE_FOR_GPU      (4 * PAGE_SIZE)
1612
1613 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1614  *
1615  *      @numa_node_id: CPU NUMA node id
1616  *      @avail_size: Available size in the memory
1617  *      @sub_type_hdr: Memory into which compute info will be filled in
1618  *
1619  *      Return 0 if successful else return -ve value
1620  */
1621 static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
1622                                 int proximity_domain,
1623                                 struct crat_subtype_computeunit *sub_type_hdr)
1624 {
1625         const struct cpumask *cpumask;
1626
1627         *avail_size -= sizeof(struct crat_subtype_computeunit);
1628         if (*avail_size < 0)
1629                 return -ENOMEM;
1630
1631         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
1632
1633         /* Fill in subtype header data */
1634         sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
1635         sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
1636         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1637
1638         cpumask = cpumask_of_node(numa_node_id);
1639
1640         /* Fill in CU data */
1641         sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
1642         sub_type_hdr->proximity_domain = proximity_domain;
1643         sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
1644         if (sub_type_hdr->processor_id_low == -1)
1645                 return -EINVAL;
1646
1647         sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
1648
1649         return 0;
1650 }
1651
1652 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1653  *
1654  *      @numa_node_id: CPU NUMA node id
1655  *      @avail_size: Available size in the memory
1656  *      @sub_type_hdr: Memory into which compute info will be filled in
1657  *
1658  *      Return 0 if successful else return -ve value
1659  */
1660 static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
1661                         int proximity_domain,
1662                         struct crat_subtype_memory *sub_type_hdr)
1663 {
1664         uint64_t mem_in_bytes = 0;
1665         pg_data_t *pgdat;
1666         int zone_type;
1667
1668         *avail_size -= sizeof(struct crat_subtype_memory);
1669         if (*avail_size < 0)
1670                 return -ENOMEM;
1671
1672         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1673
1674         /* Fill in subtype header data */
1675         sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1676         sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1677         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1678
1679         /* Fill in Memory Subunit data */
1680
1681         /* Unlike si_meminfo, si_meminfo_node is not exported. So
1682          * the following lines are duplicated from si_meminfo_node
1683          * function
1684          */
1685         pgdat = NODE_DATA(numa_node_id);
1686         for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
1687                 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
1688         mem_in_bytes <<= PAGE_SHIFT;
1689
1690         sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
1691         sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
1692         sub_type_hdr->proximity_domain = proximity_domain;
1693
1694         return 0;
1695 }
1696
1697 #ifdef CONFIG_X86_64
1698 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
1699                                 uint32_t *num_entries,
1700                                 struct crat_subtype_iolink *sub_type_hdr)
1701 {
1702         int nid;
1703         struct cpuinfo_x86 *c = &cpu_data(0);
1704         uint8_t link_type;
1705
1706         if (c->x86_vendor == X86_VENDOR_AMD)
1707                 link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
1708         else
1709                 link_type = CRAT_IOLINK_TYPE_QPI_1_1;
1710
1711         *num_entries = 0;
1712
1713         /* Create IO links from this node to other CPU nodes */
1714         for_each_online_node(nid) {
1715                 if (nid == numa_node_id) /* node itself */
1716                         continue;
1717
1718                 *avail_size -= sizeof(struct crat_subtype_iolink);
1719                 if (*avail_size < 0)
1720                         return -ENOMEM;
1721
1722                 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1723
1724                 /* Fill in subtype header data */
1725                 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
1726                 sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
1727                 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1728
1729                 /* Fill in IO link data */
1730                 sub_type_hdr->proximity_domain_from = numa_node_id;
1731                 sub_type_hdr->proximity_domain_to = nid;
1732                 sub_type_hdr->io_interface_type = link_type;
1733
1734                 (*num_entries)++;
1735                 sub_type_hdr++;
1736         }
1737
1738         return 0;
1739 }
1740 #endif
1741
1742 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1743  *
1744  *      @pcrat_image: Fill in VCRAT for CPU
1745  *      @size:  [IN] allocated size of crat_image.
1746  *              [OUT] actual size of data filled in crat_image
1747  */
1748 static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
1749 {
1750         struct crat_header *crat_table = (struct crat_header *)pcrat_image;
1751         struct acpi_table_header *acpi_table;
1752         acpi_status status;
1753         struct crat_subtype_generic *sub_type_hdr;
1754         int avail_size = *size;
1755         int numa_node_id;
1756 #ifdef CONFIG_X86_64
1757         uint32_t entries = 0;
1758 #endif
1759         int ret = 0;
1760
1761         if (!pcrat_image)
1762                 return -EINVAL;
1763
1764         /* Fill in CRAT Header.
1765          * Modify length and total_entries as subunits are added.
1766          */
1767         avail_size -= sizeof(struct crat_header);
1768         if (avail_size < 0)
1769                 return -ENOMEM;
1770
1771         memset(crat_table, 0, sizeof(struct crat_header));
1772         memcpy(&crat_table->signature, CRAT_SIGNATURE,
1773                         sizeof(crat_table->signature));
1774         crat_table->length = sizeof(struct crat_header);
1775
1776         status = acpi_get_table("DSDT", 0, &acpi_table);
1777         if (status != AE_OK)
1778                 pr_warn("DSDT table not found for OEM information\n");
1779         else {
1780                 crat_table->oem_revision = acpi_table->revision;
1781                 memcpy(crat_table->oem_id, acpi_table->oem_id,
1782                                 CRAT_OEMID_LENGTH);
1783                 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
1784                                 CRAT_OEMTABLEID_LENGTH);
1785                 acpi_put_table(acpi_table);
1786         }
1787         crat_table->total_entries = 0;
1788         crat_table->num_domains = 0;
1789
1790         sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1791
1792         for_each_online_node(numa_node_id) {
1793                 if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
1794                         continue;
1795
1796                 /* Fill in Subtype: Compute Unit */
1797                 ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
1798                         crat_table->num_domains,
1799                         (struct crat_subtype_computeunit *)sub_type_hdr);
1800                 if (ret < 0)
1801                         return ret;
1802                 crat_table->length += sub_type_hdr->length;
1803                 crat_table->total_entries++;
1804
1805                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1806                         sub_type_hdr->length);
1807
1808                 /* Fill in Subtype: Memory */
1809                 ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
1810                         crat_table->num_domains,
1811                         (struct crat_subtype_memory *)sub_type_hdr);
1812                 if (ret < 0)
1813                         return ret;
1814                 crat_table->length += sub_type_hdr->length;
1815                 crat_table->total_entries++;
1816
1817                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1818                         sub_type_hdr->length);
1819
1820                 /* Fill in Subtype: IO Link */
1821 #ifdef CONFIG_X86_64
1822                 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
1823                                 &entries,
1824                                 (struct crat_subtype_iolink *)sub_type_hdr);
1825                 if (ret < 0)
1826                         return ret;
1827
1828                 if (entries) {
1829                         crat_table->length += (sub_type_hdr->length * entries);
1830                         crat_table->total_entries += entries;
1831
1832                         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1833                                         sub_type_hdr->length * entries);
1834                 }
1835 #else
1836                 pr_info("IO link not available for non x86 platforms\n");
1837 #endif
1838
1839                 crat_table->num_domains++;
1840         }
1841
1842         /* TODO: Add cache Subtype for CPU.
1843          * Currently, CPU cache information is available in function
1844          * detect_cache_attributes(cpu) defined in the file
1845          * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
1846          * exported and to get the same information the code needs to be
1847          * duplicated.
1848          */
1849
1850         *size = crat_table->length;
1851         pr_info("Virtual CRAT table created for CPU\n");
1852
1853         return 0;
1854 }
1855
1856 static int kfd_fill_gpu_memory_affinity(int *avail_size,
1857                 struct kfd_node *kdev, uint8_t type, uint64_t size,
1858                 struct crat_subtype_memory *sub_type_hdr,
1859                 uint32_t proximity_domain,
1860                 const struct kfd_local_mem_info *local_mem_info)
1861 {
1862         *avail_size -= sizeof(struct crat_subtype_memory);
1863         if (*avail_size < 0)
1864                 return -ENOMEM;
1865
1866         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1867         sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1868         sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1869         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
1870
1871         sub_type_hdr->proximity_domain = proximity_domain;
1872
1873         pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",
1874                         type, size);
1875
1876         sub_type_hdr->length_low = lower_32_bits(size);
1877         sub_type_hdr->length_high = upper_32_bits(size);
1878
1879         sub_type_hdr->width = local_mem_info->vram_width;
1880         sub_type_hdr->visibility_type = type;
1881
1882         return 0;
1883 }
1884
1885 #ifdef CONFIG_ACPI_NUMA
1886 static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
1887 {
1888         struct acpi_table_header *table_header = NULL;
1889         struct acpi_subtable_header *sub_header = NULL;
1890         unsigned long table_end, subtable_len;
1891         u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
1892                         pci_dev_id(kdev->adev->pdev);
1893         u32 bdf;
1894         acpi_status status;
1895         struct acpi_srat_cpu_affinity *cpu;
1896         struct acpi_srat_generic_affinity *gpu;
1897         int pxm = 0, max_pxm = 0;
1898         int numa_node = NUMA_NO_NODE;
1899         bool found = false;
1900
1901         /* Fetch the SRAT table from ACPI */
1902         status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
1903         if (status == AE_NOT_FOUND) {
1904                 pr_warn("SRAT table not found\n");
1905                 return;
1906         } else if (ACPI_FAILURE(status)) {
1907                 const char *err = acpi_format_exception(status);
1908                 pr_err("SRAT table error: %s\n", err);
1909                 return;
1910         }
1911
1912         table_end = (unsigned long)table_header + table_header->length;
1913
1914         /* Parse all entries looking for a match. */
1915         sub_header = (struct acpi_subtable_header *)
1916                         ((unsigned long)table_header +
1917                         sizeof(struct acpi_table_srat));
1918         subtable_len = sub_header->length;
1919
1920         while (((unsigned long)sub_header) + subtable_len  < table_end) {
1921                 /*
1922                  * If length is 0, break from this loop to avoid
1923                  * infinite loop.
1924                  */
1925                 if (subtable_len == 0) {
1926                         pr_err("SRAT invalid zero length\n");
1927                         break;
1928                 }
1929
1930                 switch (sub_header->type) {
1931                 case ACPI_SRAT_TYPE_CPU_AFFINITY:
1932                         cpu = (struct acpi_srat_cpu_affinity *)sub_header;
1933                         pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
1934                                         cpu->proximity_domain_lo;
1935                         if (pxm > max_pxm)
1936                                 max_pxm = pxm;
1937                         break;
1938                 case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
1939                         gpu = (struct acpi_srat_generic_affinity *)sub_header;
1940                         bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
1941                                         *((u16 *)(&gpu->device_handle[2]));
1942                         if (bdf == pci_id) {
1943                                 found = true;
1944                                 numa_node = pxm_to_node(gpu->proximity_domain);
1945                         }
1946                         break;
1947                 default:
1948                         break;
1949                 }
1950
1951                 if (found)
1952                         break;
1953
1954                 sub_header = (struct acpi_subtable_header *)
1955                                 ((unsigned long)sub_header + subtable_len);
1956                 subtable_len = sub_header->length;
1957         }
1958
1959         acpi_put_table(table_header);
1960
1961         /* Workaround bad cpu-gpu binding case */
1962         if (found && (numa_node < 0 ||
1963                         numa_node > pxm_to_node(max_pxm)))
1964                 numa_node = 0;
1965
1966         if (numa_node != NUMA_NO_NODE)
1967                 set_dev_node(&kdev->adev->pdev->dev, numa_node);
1968 }
1969 #endif
1970
1971 #define KFD_CRAT_INTRA_SOCKET_WEIGHT    13
1972 #define KFD_CRAT_XGMI_WEIGHT            15
1973
1974 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
1975  * to its NUMA node
1976  *      @avail_size: Available size in the memory
1977  *      @kdev - [IN] GPU device
1978  *      @sub_type_hdr: Memory into which io link info will be filled in
1979  *      @proximity_domain - proximity domain of the GPU node
1980  *
1981  *      Return 0 if successful else return -ve value
1982  */
1983 static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
1984                         struct kfd_node *kdev,
1985                         struct crat_subtype_iolink *sub_type_hdr,
1986                         uint32_t proximity_domain)
1987 {
1988         *avail_size -= sizeof(struct crat_subtype_iolink);
1989         if (*avail_size < 0)
1990                 return -ENOMEM;
1991
1992         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1993
1994         /* Fill in subtype header data */
1995         sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
1996         sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
1997         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
1998         if (kfd_dev_is_large_bar(kdev))
1999                 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2000
2001         /* Fill in IOLINK subtype.
2002          * TODO: Fill-in other fields of iolink subtype
2003          */
2004         if (kdev->adev->gmc.xgmi.connected_to_cpu ||
2005             (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
2006              kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
2007              AMDGPU_PKG_TYPE_APU)) {
2008                 bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
2009                 int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
2010                                                         KFD_CRAT_INTRA_SOCKET_WEIGHT;
2011                 uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
2012                                                         kdev->adev, NULL, true) : mem_bw;
2013
2014                 /*
2015                  * with host gpu xgmi link, host can access gpu memory whether
2016                  * or not pcie bar type is large, so always create bidirectional
2017                  * io link.
2018                  */
2019                 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2020                 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2021                 sub_type_hdr->weight_xgmi = weight;
2022                 sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
2023                 sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
2024         } else {
2025                 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
2026                 sub_type_hdr->minimum_bandwidth_mbs =
2027                                 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
2028                 sub_type_hdr->maximum_bandwidth_mbs =
2029                                 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
2030         }
2031
2032         sub_type_hdr->proximity_domain_from = proximity_domain;
2033
2034 #ifdef CONFIG_ACPI_NUMA
2035         if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&
2036             num_possible_nodes() > 1)
2037                 kfd_find_numa_node_in_srat(kdev);
2038 #endif
2039 #ifdef CONFIG_NUMA
2040         if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
2041                 sub_type_hdr->proximity_domain_to = 0;
2042         else
2043                 sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;
2044 #else
2045         sub_type_hdr->proximity_domain_to = 0;
2046 #endif
2047         return 0;
2048 }
2049
2050 static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
2051                         struct kfd_node *kdev,
2052                         struct kfd_node *peer_kdev,
2053                         struct crat_subtype_iolink *sub_type_hdr,
2054                         uint32_t proximity_domain_from,
2055                         uint32_t proximity_domain_to)
2056 {
2057         bool use_ta_info = kdev->kfd->num_nodes == 1;
2058
2059         *avail_size -= sizeof(struct crat_subtype_iolink);
2060         if (*avail_size < 0)
2061                 return -ENOMEM;
2062
2063         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
2064
2065         sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
2066         sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
2067         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
2068                                CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2069
2070         sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2071         sub_type_hdr->proximity_domain_from = proximity_domain_from;
2072         sub_type_hdr->proximity_domain_to = proximity_domain_to;
2073
2074         if (use_ta_info) {
2075                 sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
2076                         amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
2077                 sub_type_hdr->maximum_bandwidth_mbs =
2078                         amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
2079                                                         peer_kdev->adev, false);
2080                 sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
2081                         amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
2082         } else {
2083                 bool is_single_hop = kdev->kfd == peer_kdev->kfd;
2084                 int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
2085                         (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
2086                 int mem_bw = 819200;
2087
2088                 sub_type_hdr->weight_xgmi = weight;
2089                 sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2090                 sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2091         }
2092
2093         return 0;
2094 }
2095
2096 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2097  *
2098  *      @pcrat_image: Fill in VCRAT for GPU
2099  *      @size:  [IN] allocated size of crat_image.
2100  *              [OUT] actual size of data filled in crat_image
2101  */
2102 static int kfd_create_vcrat_image_gpu(void *pcrat_image,
2103                                       size_t *size, struct kfd_node *kdev,
2104                                       uint32_t proximity_domain)
2105 {
2106         struct crat_header *crat_table = (struct crat_header *)pcrat_image;
2107         struct crat_subtype_generic *sub_type_hdr;
2108         struct kfd_local_mem_info local_mem_info;
2109         struct kfd_topology_device *peer_dev;
2110         struct crat_subtype_computeunit *cu;
2111         struct kfd_cu_info cu_info;
2112         int avail_size = *size;
2113         uint32_t total_num_of_cu;
2114         uint32_t nid = 0;
2115         int ret = 0;
2116
2117         if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
2118                 return -EINVAL;
2119
2120         /* Fill the CRAT Header.
2121          * Modify length and total_entries as subunits are added.
2122          */
2123         avail_size -= sizeof(struct crat_header);
2124         if (avail_size < 0)
2125                 return -ENOMEM;
2126
2127         memset(crat_table, 0, sizeof(struct crat_header));
2128
2129         memcpy(&crat_table->signature, CRAT_SIGNATURE,
2130                         sizeof(crat_table->signature));
2131         /* Change length as we add more subtypes*/
2132         crat_table->length = sizeof(struct crat_header);
2133         crat_table->num_domains = 1;
2134         crat_table->total_entries = 0;
2135
2136         /* Fill in Subtype: Compute Unit
2137          * First fill in the sub type header and then sub type data
2138          */
2139         avail_size -= sizeof(struct crat_subtype_computeunit);
2140         if (avail_size < 0)
2141                 return -ENOMEM;
2142
2143         sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
2144         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
2145
2146         sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
2147         sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
2148         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
2149
2150         /* Fill CU subtype data */
2151         cu = (struct crat_subtype_computeunit *)sub_type_hdr;
2152         cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
2153         cu->proximity_domain = proximity_domain;
2154
2155         amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
2156         cu->num_simd_per_cu = cu_info.simd_per_cu;
2157         cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
2158         cu->max_waves_simd = cu_info.max_waves_per_simd;
2159
2160         cu->wave_front_size = cu_info.wave_front_size;
2161         cu->array_count = cu_info.num_shader_arrays_per_engine *
2162                 cu_info.num_shader_engines;
2163         total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
2164         cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
2165         cu->num_cu_per_array = cu_info.num_cu_per_sh;
2166         cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
2167         cu->num_banks = cu_info.num_shader_engines;
2168         cu->lds_size_in_kb = cu_info.lds_size;
2169
2170         cu->hsa_capability = 0;
2171
2172         /* Check if this node supports IOMMU. During parsing this flag will
2173          * translate to HSA_CAP_ATS_PRESENT
2174          */
2175         if (!kfd_iommu_check_device(kdev->kfd))
2176                 cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
2177
2178         crat_table->length += sub_type_hdr->length;
2179         crat_table->total_entries++;
2180
2181         /* Fill in Subtype: Memory. Only on systems with large BAR (no
2182          * private FB), report memory as public. On other systems
2183          * report the total FB size (public+private) as a single
2184          * private heap.
2185          */
2186         local_mem_info = kdev->local_mem_info;
2187         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2188                         sub_type_hdr->length);
2189
2190         if (debug_largebar)
2191                 local_mem_info.local_mem_size_private = 0;
2192
2193         if (local_mem_info.local_mem_size_private == 0)
2194                 ret = kfd_fill_gpu_memory_affinity(&avail_size,
2195                                 kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
2196                                 local_mem_info.local_mem_size_public,
2197                                 (struct crat_subtype_memory *)sub_type_hdr,
2198                                 proximity_domain,
2199                                 &local_mem_info);
2200         else
2201                 ret = kfd_fill_gpu_memory_affinity(&avail_size,
2202                                 kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE,
2203                                 local_mem_info.local_mem_size_public +
2204                                 local_mem_info.local_mem_size_private,
2205                                 (struct crat_subtype_memory *)sub_type_hdr,
2206                                 proximity_domain,
2207                                 &local_mem_info);
2208         if (ret < 0)
2209                 return ret;
2210
2211         crat_table->length += sizeof(struct crat_subtype_memory);
2212         crat_table->total_entries++;
2213
2214         /* Fill in Subtype: IO_LINKS
2215          *  Only direct links are added here which is Link from GPU to
2216          *  its NUMA node. Indirect links are added by userspace.
2217          */
2218         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2219                 sub_type_hdr->length);
2220         ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
2221                 (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
2222
2223         if (ret < 0)
2224                 return ret;
2225
2226         crat_table->length += sub_type_hdr->length;
2227         crat_table->total_entries++;
2228
2229
2230         /* Fill in Subtype: IO_LINKS
2231          * Direct links from GPU to other GPUs through xGMI.
2232          * We will loop GPUs that already be processed (with lower value
2233          * of proximity_domain), add the link for the GPUs with same
2234          * hive id (from this GPU to other GPU) . The reversed iolink
2235          * (from other GPU to this GPU) will be added
2236          * in kfd_parse_subtype_iolink.
2237          */
2238         if (kdev->kfd->hive_id) {
2239                 for (nid = 0; nid < proximity_domain; ++nid) {
2240                         peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
2241                         if (!peer_dev->gpu)
2242                                 continue;
2243                         if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
2244                                 continue;
2245                         sub_type_hdr = (typeof(sub_type_hdr))(
2246                                 (char *)sub_type_hdr +
2247                                 sizeof(struct crat_subtype_iolink));
2248                         ret = kfd_fill_gpu_xgmi_link_to_gpu(
2249                                 &avail_size, kdev, peer_dev->gpu,
2250                                 (struct crat_subtype_iolink *)sub_type_hdr,
2251                                 proximity_domain, nid);
2252                         if (ret < 0)
2253                                 return ret;
2254                         crat_table->length += sub_type_hdr->length;
2255                         crat_table->total_entries++;
2256                 }
2257         }
2258         *size = crat_table->length;
2259         pr_info("Virtual CRAT table created for GPU\n");
2260
2261         return ret;
2262 }
2263
2264 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2265  *              creates a Virtual CRAT (VCRAT) image
2266  *
2267  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
2268  *
2269  *      @crat_image: VCRAT image created because ACPI does not have a
2270  *                   CRAT for this device
2271  *      @size: [OUT] size of virtual crat_image
2272  *      @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2273  *              COMPUTE_UNIT_GPU - Create VCRAT for GPU
2274  *              (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2275  *                      -- this option is not currently implemented.
2276  *                      The assumption is that all AMD APUs will have CRAT
2277  *      @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
2278  *
2279  *      Return 0 if successful else return -ve value
2280  */
2281 int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
2282                                   int flags, struct kfd_node *kdev,
2283                                   uint32_t proximity_domain)
2284 {
2285         void *pcrat_image = NULL;
2286         int ret = 0, num_nodes;
2287         size_t dyn_size;
2288
2289         if (!crat_image)
2290                 return -EINVAL;
2291
2292         *crat_image = NULL;
2293
2294         /* Allocate the CPU Virtual CRAT size based on the number of online
2295          * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.
2296          * This should cover all the current conditions. A check is put not
2297          * to overwrite beyond allocated size for GPUs
2298          */
2299         switch (flags) {
2300         case COMPUTE_UNIT_CPU:
2301                 num_nodes = num_online_nodes();
2302                 dyn_size = sizeof(struct crat_header) +
2303                         num_nodes * (sizeof(struct crat_subtype_computeunit) +
2304                         sizeof(struct crat_subtype_memory) +
2305                         (num_nodes - 1) * sizeof(struct crat_subtype_iolink));
2306                 pcrat_image = kvmalloc(dyn_size, GFP_KERNEL);
2307                 if (!pcrat_image)
2308                         return -ENOMEM;
2309                 *size = dyn_size;
2310                 pr_debug("CRAT size is %ld", dyn_size);
2311                 ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
2312                 break;
2313         case COMPUTE_UNIT_GPU:
2314                 if (!kdev)
2315                         return -EINVAL;
2316                 pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
2317                 if (!pcrat_image)
2318                         return -ENOMEM;
2319                 *size = VCRAT_SIZE_FOR_GPU;
2320                 ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev,
2321                                                  proximity_domain);
2322                 break;
2323         case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
2324                 /* TODO: */
2325                 ret = -EINVAL;
2326                 pr_err("VCRAT not implemented for APU\n");
2327                 break;
2328         default:
2329                 ret = -EINVAL;
2330         }
2331
2332         if (!ret)
2333                 *crat_image = pcrat_image;
2334         else
2335                 kvfree(pcrat_image);
2336
2337         return ret;
2338 }
2339
2340
2341 /* kfd_destroy_crat_image
2342  *
2343  *      @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
2344  *
2345  */
2346 void kfd_destroy_crat_image(void *crat_image)
2347 {
2348         kvfree(crat_image);
2349 }