ASoC: Merge v6.5-rc2
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdkfd / kfd_crat.c
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2015-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23
24 #include <linux/pci.h>
25 #include <linux/acpi.h>
26 #include "kfd_crat.h"
27 #include "kfd_priv.h"
28 #include "kfd_topology.h"
29 #include "kfd_iommu.h"
30 #include "amdgpu.h"
31 #include "amdgpu_amdkfd.h"
32
33 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
34  * GPU processor ID are expressed with Bit[31]=1.
35  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
36  * used in the CRAT.
37  */
38 static uint32_t gpu_processor_id_low = 0x80001000;
39
40 /* Return the next available gpu_processor_id and increment it for next GPU
41  *      @total_cu_count - Total CUs present in the GPU including ones
42  *                        masked off
43  */
44 static inline unsigned int get_and_inc_gpu_processor_id(
45                                 unsigned int total_cu_count)
46 {
47         int current_id = gpu_processor_id_low;
48
49         gpu_processor_id_low += total_cu_count;
50         return current_id;
51 }
52
53
54 static struct kfd_gpu_cache_info kaveri_cache_info[] = {
55         {
56                 /* TCP L1 Cache per CU */
57                 .cache_size = 16,
58                 .cache_level = 1,
59                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
60                                 CRAT_CACHE_FLAGS_DATA_CACHE |
61                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
62                 .num_cu_shared = 1,
63         },
64         {
65                 /* Scalar L1 Instruction Cache (in SQC module) per bank */
66                 .cache_size = 16,
67                 .cache_level = 1,
68                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
69                                 CRAT_CACHE_FLAGS_INST_CACHE |
70                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
71                 .num_cu_shared = 2,
72         },
73         {
74                 /* Scalar L1 Data Cache (in SQC module) per bank */
75                 .cache_size = 8,
76                 .cache_level = 1,
77                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
78                                 CRAT_CACHE_FLAGS_DATA_CACHE |
79                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
80                 .num_cu_shared = 2,
81         },
82
83         /* TODO: Add L2 Cache information */
84 };
85
86
87 static struct kfd_gpu_cache_info carrizo_cache_info[] = {
88         {
89                 /* TCP L1 Cache per CU */
90                 .cache_size = 16,
91                 .cache_level = 1,
92                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
93                                 CRAT_CACHE_FLAGS_DATA_CACHE |
94                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
95                 .num_cu_shared = 1,
96         },
97         {
98                 /* Scalar L1 Instruction Cache (in SQC module) per bank */
99                 .cache_size = 8,
100                 .cache_level = 1,
101                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
102                                 CRAT_CACHE_FLAGS_INST_CACHE |
103                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
104                 .num_cu_shared = 4,
105         },
106         {
107                 /* Scalar L1 Data Cache (in SQC module) per bank. */
108                 .cache_size = 4,
109                 .cache_level = 1,
110                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
111                                 CRAT_CACHE_FLAGS_DATA_CACHE |
112                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
113                 .num_cu_shared = 4,
114         },
115
116         /* TODO: Add L2 Cache information */
117 };
118
119 #define hawaii_cache_info kaveri_cache_info
120 #define tonga_cache_info carrizo_cache_info
121 #define fiji_cache_info  carrizo_cache_info
122 #define polaris10_cache_info carrizo_cache_info
123 #define polaris11_cache_info carrizo_cache_info
124 #define polaris12_cache_info carrizo_cache_info
125 #define vegam_cache_info carrizo_cache_info
126
127 /* NOTE: L1 cache information has been updated and L2/L3
128  * cache information has been added for Vega10 and
129  * newer ASICs. The unit for cache_size is KiB.
130  * In future,  check & update cache details
131  * for every new ASIC is required.
132  */
133
134 static struct kfd_gpu_cache_info vega10_cache_info[] = {
135         {
136                 /* TCP L1 Cache per CU */
137                 .cache_size = 16,
138                 .cache_level = 1,
139                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
140                                 CRAT_CACHE_FLAGS_DATA_CACHE |
141                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
142                 .num_cu_shared = 1,
143         },
144         {
145                 /* Scalar L1 Instruction Cache per SQC */
146                 .cache_size = 32,
147                 .cache_level = 1,
148                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
149                                 CRAT_CACHE_FLAGS_INST_CACHE |
150                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
151                 .num_cu_shared = 3,
152         },
153         {
154                 /* Scalar L1 Data Cache per SQC */
155                 .cache_size = 16,
156                 .cache_level = 1,
157                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
158                                 CRAT_CACHE_FLAGS_DATA_CACHE |
159                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
160                 .num_cu_shared = 3,
161         },
162         {
163                 /* L2 Data Cache per GPU (Total Tex Cache) */
164                 .cache_size = 4096,
165                 .cache_level = 2,
166                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
167                                 CRAT_CACHE_FLAGS_DATA_CACHE |
168                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
169                 .num_cu_shared = 16,
170         },
171 };
172
173 static struct kfd_gpu_cache_info raven_cache_info[] = {
174         {
175                 /* TCP L1 Cache per CU */
176                 .cache_size = 16,
177                 .cache_level = 1,
178                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
179                                 CRAT_CACHE_FLAGS_DATA_CACHE |
180                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
181                 .num_cu_shared = 1,
182         },
183         {
184                 /* Scalar L1 Instruction Cache per SQC */
185                 .cache_size = 32,
186                 .cache_level = 1,
187                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
188                                 CRAT_CACHE_FLAGS_INST_CACHE |
189                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
190                 .num_cu_shared = 3,
191         },
192         {
193                 /* Scalar L1 Data Cache per SQC */
194                 .cache_size = 16,
195                 .cache_level = 1,
196                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
197                                 CRAT_CACHE_FLAGS_DATA_CACHE |
198                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
199                 .num_cu_shared = 3,
200         },
201         {
202                 /* L2 Data Cache per GPU (Total Tex Cache) */
203                 .cache_size = 1024,
204                 .cache_level = 2,
205                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
206                                 CRAT_CACHE_FLAGS_DATA_CACHE |
207                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
208                 .num_cu_shared = 11,
209         },
210 };
211
212 static struct kfd_gpu_cache_info renoir_cache_info[] = {
213         {
214                 /* TCP L1 Cache per CU */
215                 .cache_size = 16,
216                 .cache_level = 1,
217                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
218                                 CRAT_CACHE_FLAGS_DATA_CACHE |
219                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
220                 .num_cu_shared = 1,
221         },
222         {
223                 /* Scalar L1 Instruction Cache per SQC */
224                 .cache_size = 32,
225                 .cache_level = 1,
226                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
227                                 CRAT_CACHE_FLAGS_INST_CACHE |
228                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
229                 .num_cu_shared = 3,
230         },
231         {
232                 /* Scalar L1 Data Cache per SQC */
233                 .cache_size = 16,
234                 .cache_level = 1,
235                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
236                                 CRAT_CACHE_FLAGS_DATA_CACHE |
237                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
238                 .num_cu_shared = 3,
239         },
240         {
241                 /* L2 Data Cache per GPU (Total Tex Cache) */
242                 .cache_size = 1024,
243                 .cache_level = 2,
244                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
245                                 CRAT_CACHE_FLAGS_DATA_CACHE |
246                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
247                 .num_cu_shared = 8,
248         },
249 };
250
251 static struct kfd_gpu_cache_info vega12_cache_info[] = {
252         {
253                 /* TCP L1 Cache per CU */
254                 .cache_size = 16,
255                 .cache_level = 1,
256                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
257                                 CRAT_CACHE_FLAGS_DATA_CACHE |
258                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
259                 .num_cu_shared = 1,
260         },
261         {
262                 /* Scalar L1 Instruction Cache per SQC */
263                 .cache_size = 32,
264                 .cache_level = 1,
265                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
266                                 CRAT_CACHE_FLAGS_INST_CACHE |
267                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
268                 .num_cu_shared = 3,
269         },
270         {
271                 /* Scalar L1 Data Cache per SQC */
272                 .cache_size = 16,
273                 .cache_level = 1,
274                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
275                                 CRAT_CACHE_FLAGS_DATA_CACHE |
276                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
277                 .num_cu_shared = 3,
278         },
279         {
280                 /* L2 Data Cache per GPU (Total Tex Cache) */
281                 .cache_size = 2048,
282                 .cache_level = 2,
283                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
284                                 CRAT_CACHE_FLAGS_DATA_CACHE |
285                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
286                 .num_cu_shared = 5,
287         },
288 };
289
290 static struct kfd_gpu_cache_info vega20_cache_info[] = {
291         {
292                 /* TCP L1 Cache per CU */
293                 .cache_size = 16,
294                 .cache_level = 1,
295                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
296                                 CRAT_CACHE_FLAGS_DATA_CACHE |
297                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
298                 .num_cu_shared = 1,
299         },
300         {
301                 /* Scalar L1 Instruction Cache per SQC */
302                 .cache_size = 32,
303                 .cache_level = 1,
304                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
305                                 CRAT_CACHE_FLAGS_INST_CACHE |
306                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
307                 .num_cu_shared = 3,
308         },
309         {
310                 /* Scalar L1 Data Cache per SQC */
311                 .cache_size = 16,
312                 .cache_level = 1,
313                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
314                                 CRAT_CACHE_FLAGS_DATA_CACHE |
315                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
316                 .num_cu_shared = 3,
317         },
318         {
319                 /* L2 Data Cache per GPU (Total Tex Cache) */
320                 .cache_size = 8192,
321                 .cache_level = 2,
322                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
323                                 CRAT_CACHE_FLAGS_DATA_CACHE |
324                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
325                 .num_cu_shared = 16,
326         },
327 };
328
329 static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
330         {
331                 /* TCP L1 Cache per CU */
332                 .cache_size = 16,
333                 .cache_level = 1,
334                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
335                                 CRAT_CACHE_FLAGS_DATA_CACHE |
336                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
337                 .num_cu_shared = 1,
338         },
339         {
340                 /* Scalar L1 Instruction Cache per SQC */
341                 .cache_size = 32,
342                 .cache_level = 1,
343                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
344                                 CRAT_CACHE_FLAGS_INST_CACHE |
345                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
346                 .num_cu_shared = 2,
347         },
348         {
349                 /* Scalar L1 Data Cache per SQC */
350                 .cache_size = 16,
351                 .cache_level = 1,
352                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
353                                 CRAT_CACHE_FLAGS_DATA_CACHE |
354                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
355                 .num_cu_shared = 2,
356         },
357         {
358                 /* L2 Data Cache per GPU (Total Tex Cache) */
359                 .cache_size = 8192,
360                 .cache_level = 2,
361                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
362                                 CRAT_CACHE_FLAGS_DATA_CACHE |
363                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
364                 .num_cu_shared = 14,
365         },
366 };
367
368 static struct kfd_gpu_cache_info navi10_cache_info[] = {
369         {
370                 /* TCP L1 Cache per CU */
371                 .cache_size = 16,
372                 .cache_level = 1,
373                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
374                                 CRAT_CACHE_FLAGS_DATA_CACHE |
375                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
376                 .num_cu_shared = 1,
377         },
378         {
379                 /* Scalar L1 Instruction Cache per SQC */
380                 .cache_size = 32,
381                 .cache_level = 1,
382                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
383                                 CRAT_CACHE_FLAGS_INST_CACHE |
384                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
385                 .num_cu_shared = 2,
386         },
387         {
388                 /* Scalar L1 Data Cache per SQC */
389                 .cache_size = 16,
390                 .cache_level = 1,
391                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
392                                 CRAT_CACHE_FLAGS_DATA_CACHE |
393                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
394                 .num_cu_shared = 2,
395         },
396         {
397                 /* GL1 Data Cache per SA */
398                 .cache_size = 128,
399                 .cache_level = 1,
400                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
401                                 CRAT_CACHE_FLAGS_DATA_CACHE |
402                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
403                 .num_cu_shared = 10,
404         },
405         {
406                 /* L2 Data Cache per GPU (Total Tex Cache) */
407                 .cache_size = 4096,
408                 .cache_level = 2,
409                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
410                                 CRAT_CACHE_FLAGS_DATA_CACHE |
411                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
412                 .num_cu_shared = 10,
413         },
414 };
415
416 static struct kfd_gpu_cache_info vangogh_cache_info[] = {
417         {
418                 /* TCP L1 Cache per CU */
419                 .cache_size = 16,
420                 .cache_level = 1,
421                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
422                                 CRAT_CACHE_FLAGS_DATA_CACHE |
423                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
424                 .num_cu_shared = 1,
425         },
426         {
427                 /* Scalar L1 Instruction Cache per SQC */
428                 .cache_size = 32,
429                 .cache_level = 1,
430                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
431                                 CRAT_CACHE_FLAGS_INST_CACHE |
432                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
433                 .num_cu_shared = 2,
434         },
435         {
436                 /* Scalar L1 Data Cache per SQC */
437                 .cache_size = 16,
438                 .cache_level = 1,
439                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
440                                 CRAT_CACHE_FLAGS_DATA_CACHE |
441                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
442                 .num_cu_shared = 2,
443         },
444         {
445                 /* GL1 Data Cache per SA */
446                 .cache_size = 128,
447                 .cache_level = 1,
448                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
449                                 CRAT_CACHE_FLAGS_DATA_CACHE |
450                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
451                 .num_cu_shared = 8,
452         },
453         {
454                 /* L2 Data Cache per GPU (Total Tex Cache) */
455                 .cache_size = 1024,
456                 .cache_level = 2,
457                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
458                                 CRAT_CACHE_FLAGS_DATA_CACHE |
459                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
460                 .num_cu_shared = 8,
461         },
462 };
463
464 static struct kfd_gpu_cache_info navi14_cache_info[] = {
465         {
466                 /* TCP L1 Cache per CU */
467                 .cache_size = 16,
468                 .cache_level = 1,
469                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
470                                 CRAT_CACHE_FLAGS_DATA_CACHE |
471                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
472                 .num_cu_shared = 1,
473         },
474         {
475                 /* Scalar L1 Instruction Cache per SQC */
476                 .cache_size = 32,
477                 .cache_level = 1,
478                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
479                                 CRAT_CACHE_FLAGS_INST_CACHE |
480                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
481                 .num_cu_shared = 2,
482         },
483         {
484                 /* Scalar L1 Data Cache per SQC */
485                 .cache_size = 16,
486                 .cache_level = 1,
487                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
488                                 CRAT_CACHE_FLAGS_DATA_CACHE |
489                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
490                 .num_cu_shared = 2,
491         },
492         {
493                 /* GL1 Data Cache per SA */
494                 .cache_size = 128,
495                 .cache_level = 1,
496                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
497                                 CRAT_CACHE_FLAGS_DATA_CACHE |
498                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
499                 .num_cu_shared = 12,
500         },
501         {
502                 /* L2 Data Cache per GPU (Total Tex Cache) */
503                 .cache_size = 2048,
504                 .cache_level = 2,
505                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
506                                 CRAT_CACHE_FLAGS_DATA_CACHE |
507                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
508                 .num_cu_shared = 12,
509         },
510 };
511
512 static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
513         {
514                 /* TCP L1 Cache per CU */
515                 .cache_size = 16,
516                 .cache_level = 1,
517                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
518                                 CRAT_CACHE_FLAGS_DATA_CACHE |
519                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
520                 .num_cu_shared = 1,
521         },
522         {
523                 /* Scalar L1 Instruction Cache per SQC */
524                 .cache_size = 32,
525                 .cache_level = 1,
526                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
527                                 CRAT_CACHE_FLAGS_INST_CACHE |
528                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
529                 .num_cu_shared = 2,
530         },
531         {
532                 /* Scalar L1 Data Cache per SQC */
533                 .cache_size = 16,
534                 .cache_level = 1,
535                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
536                                 CRAT_CACHE_FLAGS_DATA_CACHE |
537                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
538                 .num_cu_shared = 2,
539         },
540         {
541                 /* GL1 Data Cache per SA */
542                 .cache_size = 128,
543                 .cache_level = 1,
544                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
545                                 CRAT_CACHE_FLAGS_DATA_CACHE |
546                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
547                 .num_cu_shared = 10,
548         },
549         {
550                 /* L2 Data Cache per GPU (Total Tex Cache) */
551                 .cache_size = 4096,
552                 .cache_level = 2,
553                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
554                                 CRAT_CACHE_FLAGS_DATA_CACHE |
555                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
556                 .num_cu_shared = 10,
557         },
558         {
559                 /* L3 Data Cache per GPU */
560                 .cache_size = 128*1024,
561                 .cache_level = 3,
562                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
563                                 CRAT_CACHE_FLAGS_DATA_CACHE |
564                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
565                 .num_cu_shared = 10,
566         },
567 };
568
569 static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
570         {
571                 /* TCP L1 Cache per CU */
572                 .cache_size = 16,
573                 .cache_level = 1,
574                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
575                                 CRAT_CACHE_FLAGS_DATA_CACHE |
576                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
577                 .num_cu_shared = 1,
578         },
579         {
580                 /* Scalar L1 Instruction Cache per SQC */
581                 .cache_size = 32,
582                 .cache_level = 1,
583                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
584                                 CRAT_CACHE_FLAGS_INST_CACHE |
585                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
586                 .num_cu_shared = 2,
587         },
588         {
589                 /* Scalar L1 Data Cache per SQC */
590                 .cache_size = 16,
591                 .cache_level = 1,
592                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
593                                 CRAT_CACHE_FLAGS_DATA_CACHE |
594                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
595                 .num_cu_shared = 2,
596         },
597         {
598                 /* GL1 Data Cache per SA */
599                 .cache_size = 128,
600                 .cache_level = 1,
601                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
602                                 CRAT_CACHE_FLAGS_DATA_CACHE |
603                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
604                 .num_cu_shared = 10,
605         },
606         {
607                 /* L2 Data Cache per GPU (Total Tex Cache) */
608                 .cache_size = 3072,
609                 .cache_level = 2,
610                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
611                                 CRAT_CACHE_FLAGS_DATA_CACHE |
612                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
613                 .num_cu_shared = 10,
614         },
615         {
616                 /* L3 Data Cache per GPU */
617                 .cache_size = 96*1024,
618                 .cache_level = 3,
619                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
620                                 CRAT_CACHE_FLAGS_DATA_CACHE |
621                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
622                 .num_cu_shared = 10,
623         },
624 };
625
626 static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
627         {
628                 /* TCP L1 Cache per CU */
629                 .cache_size = 16,
630                 .cache_level = 1,
631                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
632                                 CRAT_CACHE_FLAGS_DATA_CACHE |
633                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
634                 .num_cu_shared = 1,
635         },
636         {
637                 /* Scalar L1 Instruction Cache per SQC */
638                 .cache_size = 32,
639                 .cache_level = 1,
640                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
641                                 CRAT_CACHE_FLAGS_INST_CACHE |
642                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
643                 .num_cu_shared = 2,
644         },
645         {
646                 /* Scalar L1 Data Cache per SQC */
647                 .cache_size = 16,
648                 .cache_level = 1,
649                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
650                                 CRAT_CACHE_FLAGS_DATA_CACHE |
651                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
652                 .num_cu_shared = 2,
653         },
654         {
655                 /* GL1 Data Cache per SA */
656                 .cache_size = 128,
657                 .cache_level = 1,
658                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
659                                 CRAT_CACHE_FLAGS_DATA_CACHE |
660                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
661                 .num_cu_shared = 8,
662         },
663         {
664                 /* L2 Data Cache per GPU (Total Tex Cache) */
665                 .cache_size = 2048,
666                 .cache_level = 2,
667                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
668                                 CRAT_CACHE_FLAGS_DATA_CACHE |
669                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
670                 .num_cu_shared = 8,
671         },
672         {
673                 /* L3 Data Cache per GPU */
674                 .cache_size = 32*1024,
675                 .cache_level = 3,
676                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
677                                 CRAT_CACHE_FLAGS_DATA_CACHE |
678                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
679                 .num_cu_shared = 8,
680         },
681 };
682
683 static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
684         {
685                 /* TCP L1 Cache per CU */
686                 .cache_size = 16,
687                 .cache_level = 1,
688                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
689                                 CRAT_CACHE_FLAGS_DATA_CACHE |
690                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
691                 .num_cu_shared = 1,
692         },
693         {
694                 /* Scalar L1 Instruction Cache per SQC */
695                 .cache_size = 32,
696                 .cache_level = 1,
697                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
698                                 CRAT_CACHE_FLAGS_INST_CACHE |
699                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
700                 .num_cu_shared = 2,
701         },
702         {
703                 /* Scalar L1 Data Cache per SQC */
704                 .cache_size = 16,
705                 .cache_level = 1,
706                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
707                                 CRAT_CACHE_FLAGS_DATA_CACHE |
708                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
709                 .num_cu_shared = 2,
710         },
711         {
712                 /* GL1 Data Cache per SA */
713                 .cache_size = 128,
714                 .cache_level = 1,
715                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
716                                 CRAT_CACHE_FLAGS_DATA_CACHE |
717                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
718                 .num_cu_shared = 8,
719         },
720         {
721                 /* L2 Data Cache per GPU (Total Tex Cache) */
722                 .cache_size = 1024,
723                 .cache_level = 2,
724                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
725                                 CRAT_CACHE_FLAGS_DATA_CACHE |
726                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
727                 .num_cu_shared = 8,
728         },
729         {
730                 /* L3 Data Cache per GPU */
731                 .cache_size = 16*1024,
732                 .cache_level = 3,
733                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
734                                 CRAT_CACHE_FLAGS_DATA_CACHE |
735                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
736                 .num_cu_shared = 8,
737         },
738 };
739
740 static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
741         {
742                 /* TCP L1 Cache per CU */
743                 .cache_size = 16,
744                 .cache_level = 1,
745                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
746                                 CRAT_CACHE_FLAGS_DATA_CACHE |
747                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
748                 .num_cu_shared = 1,
749         },
750         {
751                 /* Scalar L1 Instruction Cache per SQC */
752                 .cache_size = 32,
753                 .cache_level = 1,
754                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
755                                 CRAT_CACHE_FLAGS_INST_CACHE |
756                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
757                 .num_cu_shared = 2,
758         },
759         {
760                 /* Scalar L1 Data Cache per SQC */
761                 .cache_size = 16,
762                 .cache_level = 1,
763                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
764                                 CRAT_CACHE_FLAGS_DATA_CACHE |
765                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
766                 .num_cu_shared = 2,
767         },
768         {
769                 /* GL1 Data Cache per SA */
770                 .cache_size = 128,
771                 .cache_level = 1,
772                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
773                                 CRAT_CACHE_FLAGS_DATA_CACHE |
774                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
775                 .num_cu_shared = 6,
776         },
777         {
778                 /* L2 Data Cache per GPU (Total Tex Cache) */
779                 .cache_size = 2048,
780                 .cache_level = 2,
781                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
782                                 CRAT_CACHE_FLAGS_DATA_CACHE |
783                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
784                 .num_cu_shared = 6,
785         },
786 };
787
788 static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
789         {
790                 /* TCP L1 Cache per CU */
791                 .cache_size = 16,
792                 .cache_level = 1,
793                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
794                                 CRAT_CACHE_FLAGS_DATA_CACHE |
795                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
796                 .num_cu_shared = 1,
797         },
798         {
799                 /* Scalar L1 Instruction Cache per SQC */
800                 .cache_size = 32,
801                 .cache_level = 1,
802                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
803                                 CRAT_CACHE_FLAGS_INST_CACHE |
804                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
805                 .num_cu_shared = 2,
806         },
807         {
808                 /* Scalar L1 Data Cache per SQC */
809                 .cache_size = 16,
810                 .cache_level = 1,
811                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
812                                 CRAT_CACHE_FLAGS_DATA_CACHE |
813                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
814                 .num_cu_shared = 2,
815         },
816         {
817                 /* GL1 Data Cache per SA */
818                 .cache_size = 128,
819                 .cache_level = 1,
820                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
821                                 CRAT_CACHE_FLAGS_DATA_CACHE |
822                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
823                 .num_cu_shared = 2,
824         },
825         {
826                 /* L2 Data Cache per GPU (Total Tex Cache) */
827                 .cache_size = 256,
828                 .cache_level = 2,
829                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
830                                 CRAT_CACHE_FLAGS_DATA_CACHE |
831                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
832                 .num_cu_shared = 2,
833         },
834 };
835
836 static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
837         {
838                 /* TCP L1 Cache per CU */
839                 .cache_size = 16,
840                 .cache_level = 1,
841                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
842                           CRAT_CACHE_FLAGS_DATA_CACHE |
843                           CRAT_CACHE_FLAGS_SIMD_CACHE),
844                 .num_cu_shared = 1,
845         },
846         {
847                 /* Scalar L1 Instruction Cache per SQC */
848                 .cache_size = 32,
849                 .cache_level = 1,
850                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
851                           CRAT_CACHE_FLAGS_INST_CACHE |
852                           CRAT_CACHE_FLAGS_SIMD_CACHE),
853                 .num_cu_shared = 2,
854         },
855         {
856                 /* Scalar L1 Data Cache per SQC */
857                 .cache_size = 16,
858                 .cache_level = 1,
859                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
860                           CRAT_CACHE_FLAGS_DATA_CACHE |
861                           CRAT_CACHE_FLAGS_SIMD_CACHE),
862                 .num_cu_shared = 2,
863         },
864         {
865                 /* GL1 Data Cache per SA */
866                 .cache_size = 128,
867                 .cache_level = 1,
868                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
869                           CRAT_CACHE_FLAGS_DATA_CACHE |
870                           CRAT_CACHE_FLAGS_SIMD_CACHE),
871                 .num_cu_shared = 2,
872         },
873         {
874                 /* L2 Data Cache per GPU (Total Tex Cache) */
875                 .cache_size = 256,
876                 .cache_level = 2,
877                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
878                           CRAT_CACHE_FLAGS_DATA_CACHE |
879                           CRAT_CACHE_FLAGS_SIMD_CACHE),
880                 .num_cu_shared = 2,
881         },
882 };
883
884 static struct kfd_gpu_cache_info dummy_cache_info[] = {
885         {
886                 /* TCP L1 Cache per CU */
887                 .cache_size = 16,
888                 .cache_level = 1,
889                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
890                                 CRAT_CACHE_FLAGS_DATA_CACHE |
891                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
892                 .num_cu_shared = 1,
893         },
894         {
895                 /* Scalar L1 Instruction Cache per SQC */
896                 .cache_size = 32,
897                 .cache_level = 1,
898                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
899                                 CRAT_CACHE_FLAGS_INST_CACHE |
900                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
901                 .num_cu_shared = 2,
902         },
903         {
904                 /* Scalar L1 Data Cache per SQC */
905                 .cache_size = 16,
906                 .cache_level = 1,
907                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
908                                 CRAT_CACHE_FLAGS_DATA_CACHE |
909                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
910                 .num_cu_shared = 2,
911         },
912         {
913                 /* GL1 Data Cache per SA */
914                 .cache_size = 128,
915                 .cache_level = 1,
916                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
917                                 CRAT_CACHE_FLAGS_DATA_CACHE |
918                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
919                 .num_cu_shared = 6,
920         },
921         {
922                 /* L2 Data Cache per GPU (Total Tex Cache) */
923                 .cache_size = 2048,
924                 .cache_level = 2,
925                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
926                                 CRAT_CACHE_FLAGS_DATA_CACHE |
927                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
928                 .num_cu_shared = 6,
929         },
930 };
931
932 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
933                 struct crat_subtype_computeunit *cu)
934 {
935         dev->node_props.cpu_cores_count = cu->num_cpu_cores;
936         dev->node_props.cpu_core_id_base = cu->processor_id_low;
937         if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
938                 dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
939
940         pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
941                         cu->processor_id_low);
942 }
943
944 static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
945                 struct crat_subtype_computeunit *cu)
946 {
947         dev->node_props.simd_id_base = cu->processor_id_low;
948         dev->node_props.simd_count = cu->num_simd_cores;
949         dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
950         dev->node_props.max_waves_per_simd = cu->max_waves_simd;
951         dev->node_props.wave_front_size = cu->wave_front_size;
952         dev->node_props.array_count = cu->array_count;
953         dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
954         dev->node_props.simd_per_cu = cu->num_simd_per_cu;
955         dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
956         if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
957                 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
958         pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);
959 }
960
961 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
962  * topology device present in the device_list
963  */
964 static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
965                                 struct list_head *device_list)
966 {
967         struct kfd_topology_device *dev;
968
969         pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
970                         cu->proximity_domain, cu->hsa_capability);
971         list_for_each_entry(dev, device_list, list) {
972                 if (cu->proximity_domain == dev->proximity_domain) {
973                         if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
974                                 kfd_populated_cu_info_cpu(dev, cu);
975
976                         if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
977                                 kfd_populated_cu_info_gpu(dev, cu);
978                         break;
979                 }
980         }
981
982         return 0;
983 }
984
985 static struct kfd_mem_properties *
986 find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
987                 struct kfd_topology_device *dev)
988 {
989         struct kfd_mem_properties *props;
990
991         list_for_each_entry(props, &dev->mem_props, list) {
992                 if (props->heap_type == heap_type
993                                 && props->flags == flags
994                                 && props->width == width)
995                         return props;
996         }
997
998         return NULL;
999 }
1000 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1001  * topology device present in the device_list
1002  */
1003 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
1004                                 struct list_head *device_list)
1005 {
1006         struct kfd_mem_properties *props;
1007         struct kfd_topology_device *dev;
1008         uint32_t heap_type;
1009         uint64_t size_in_bytes;
1010         uint32_t flags = 0;
1011         uint32_t width;
1012
1013         pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
1014                         mem->proximity_domain);
1015         list_for_each_entry(dev, device_list, list) {
1016                 if (mem->proximity_domain == dev->proximity_domain) {
1017                         /* We're on GPU node */
1018                         if (dev->node_props.cpu_cores_count == 0) {
1019                                 /* APU */
1020                                 if (mem->visibility_type == 0)
1021                                         heap_type =
1022                                                 HSA_MEM_HEAP_TYPE_FB_PRIVATE;
1023                                 /* dGPU */
1024                                 else
1025                                         heap_type = mem->visibility_type;
1026                         } else
1027                                 heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
1028
1029                         if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
1030                                 flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
1031                         if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
1032                                 flags |= HSA_MEM_FLAGS_NON_VOLATILE;
1033
1034                         size_in_bytes =
1035                                 ((uint64_t)mem->length_high << 32) +
1036                                                         mem->length_low;
1037                         width = mem->width;
1038
1039                         /* Multiple banks of the same type are aggregated into
1040                          * one. User mode doesn't care about multiple physical
1041                          * memory segments. It's managed as a single virtual
1042                          * heap for user mode.
1043                          */
1044                         props = find_subtype_mem(heap_type, flags, width, dev);
1045                         if (props) {
1046                                 props->size_in_bytes += size_in_bytes;
1047                                 break;
1048                         }
1049
1050                         props = kfd_alloc_struct(props);
1051                         if (!props)
1052                                 return -ENOMEM;
1053
1054                         props->heap_type = heap_type;
1055                         props->flags = flags;
1056                         props->size_in_bytes = size_in_bytes;
1057                         props->width = width;
1058
1059                         dev->node_props.mem_banks_count++;
1060                         list_add_tail(&props->list, &dev->mem_props);
1061
1062                         break;
1063                 }
1064         }
1065
1066         return 0;
1067 }
1068
1069 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1070  * topology device present in the device_list
1071  */
1072 static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
1073                         struct list_head *device_list)
1074 {
1075         struct kfd_cache_properties *props;
1076         struct kfd_topology_device *dev;
1077         uint32_t id;
1078         uint32_t total_num_of_cu;
1079
1080         id = cache->processor_id_low;
1081
1082         pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);
1083         list_for_each_entry(dev, device_list, list) {
1084                 total_num_of_cu = (dev->node_props.array_count *
1085                                         dev->node_props.cu_per_simd_array);
1086
1087                 /* Cache infomration in CRAT doesn't have proximity_domain
1088                  * information as it is associated with a CPU core or GPU
1089                  * Compute Unit. So map the cache using CPU core Id or SIMD
1090                  * (GPU) ID.
1091                  * TODO: This works because currently we can safely assume that
1092                  *  Compute Units are parsed before caches are parsed. In
1093                  *  future, remove this dependency
1094                  */
1095                 if ((id >= dev->node_props.cpu_core_id_base &&
1096                         id <= dev->node_props.cpu_core_id_base +
1097                                 dev->node_props.cpu_cores_count) ||
1098                         (id >= dev->node_props.simd_id_base &&
1099                         id < dev->node_props.simd_id_base +
1100                                 total_num_of_cu)) {
1101                         props = kfd_alloc_struct(props);
1102                         if (!props)
1103                                 return -ENOMEM;
1104
1105                         props->processor_id_low = id;
1106                         props->cache_level = cache->cache_level;
1107                         props->cache_size = cache->cache_size;
1108                         props->cacheline_size = cache->cache_line_size;
1109                         props->cachelines_per_tag = cache->lines_per_tag;
1110                         props->cache_assoc = cache->associativity;
1111                         props->cache_latency = cache->cache_latency;
1112
1113                         memcpy(props->sibling_map, cache->sibling_map,
1114                                         CRAT_SIBLINGMAP_SIZE);
1115
1116                         /* set the sibling_map_size as 32 for CRAT from ACPI */
1117                         props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
1118
1119                         if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
1120                                 props->cache_type |= HSA_CACHE_TYPE_DATA;
1121                         if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
1122                                 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
1123                         if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
1124                                 props->cache_type |= HSA_CACHE_TYPE_CPU;
1125                         if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
1126                                 props->cache_type |= HSA_CACHE_TYPE_HSACU;
1127
1128                         dev->node_props.caches_count++;
1129                         list_add_tail(&props->list, &dev->cache_props);
1130
1131                         break;
1132                 }
1133         }
1134
1135         return 0;
1136 }
1137
1138 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1139  * topology device present in the device_list
1140  */
1141 static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
1142                                         struct list_head *device_list)
1143 {
1144         struct kfd_iolink_properties *props = NULL, *props2;
1145         struct kfd_topology_device *dev, *to_dev;
1146         uint32_t id_from;
1147         uint32_t id_to;
1148
1149         id_from = iolink->proximity_domain_from;
1150         id_to = iolink->proximity_domain_to;
1151
1152         pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
1153                         id_from, id_to);
1154         list_for_each_entry(dev, device_list, list) {
1155                 if (id_from == dev->proximity_domain) {
1156                         props = kfd_alloc_struct(props);
1157                         if (!props)
1158                                 return -ENOMEM;
1159
1160                         props->node_from = id_from;
1161                         props->node_to = id_to;
1162                         props->ver_maj = iolink->version_major;
1163                         props->ver_min = iolink->version_minor;
1164                         props->iolink_type = iolink->io_interface_type;
1165
1166                         if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
1167                                 props->weight = 20;
1168                         else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
1169                                 props->weight = iolink->weight_xgmi;
1170                         else
1171                                 props->weight = node_distance(id_from, id_to);
1172
1173                         props->min_latency = iolink->minimum_latency;
1174                         props->max_latency = iolink->maximum_latency;
1175                         props->min_bandwidth = iolink->minimum_bandwidth_mbs;
1176                         props->max_bandwidth = iolink->maximum_bandwidth_mbs;
1177                         props->rec_transfer_size =
1178                                         iolink->recommended_transfer_size;
1179
1180                         dev->node_props.io_links_count++;
1181                         list_add_tail(&props->list, &dev->io_link_props);
1182                         break;
1183                 }
1184         }
1185
1186         /* CPU topology is created before GPUs are detected, so CPU->GPU
1187          * links are not built at that time. If a PCIe type is discovered, it
1188          * means a GPU is detected and we are adding GPU->CPU to the topology.
1189          * At this time, also add the corresponded CPU->GPU link if GPU
1190          * is large bar.
1191          * For xGMI, we only added the link with one direction in the crat
1192          * table, add corresponded reversed direction link now.
1193          */
1194         if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
1195                 to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
1196                 if (!to_dev)
1197                         return -ENODEV;
1198                 /* same everything but the other direction */
1199                 props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
1200                 if (!props2)
1201                         return -ENOMEM;
1202
1203                 props2->node_from = id_to;
1204                 props2->node_to = id_from;
1205                 props2->kobj = NULL;
1206                 to_dev->node_props.io_links_count++;
1207                 list_add_tail(&props2->list, &to_dev->io_link_props);
1208         }
1209
1210         return 0;
1211 }
1212
1213 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1214  * present in the device_list
1215  *      @sub_type_hdr - subtype section of crat_image
1216  *      @device_list - list of topology devices present in this crat_image
1217  */
1218 static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
1219                                 struct list_head *device_list)
1220 {
1221         struct crat_subtype_computeunit *cu;
1222         struct crat_subtype_memory *mem;
1223         struct crat_subtype_cache *cache;
1224         struct crat_subtype_iolink *iolink;
1225         int ret = 0;
1226
1227         switch (sub_type_hdr->type) {
1228         case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
1229                 cu = (struct crat_subtype_computeunit *)sub_type_hdr;
1230                 ret = kfd_parse_subtype_cu(cu, device_list);
1231                 break;
1232         case CRAT_SUBTYPE_MEMORY_AFFINITY:
1233                 mem = (struct crat_subtype_memory *)sub_type_hdr;
1234                 ret = kfd_parse_subtype_mem(mem, device_list);
1235                 break;
1236         case CRAT_SUBTYPE_CACHE_AFFINITY:
1237                 cache = (struct crat_subtype_cache *)sub_type_hdr;
1238                 ret = kfd_parse_subtype_cache(cache, device_list);
1239                 break;
1240         case CRAT_SUBTYPE_TLB_AFFINITY:
1241                 /*
1242                  * For now, nothing to do here
1243                  */
1244                 pr_debug("Found TLB entry in CRAT table (not processing)\n");
1245                 break;
1246         case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
1247                 /*
1248                  * For now, nothing to do here
1249                  */
1250                 pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
1251                 break;
1252         case CRAT_SUBTYPE_IOLINK_AFFINITY:
1253                 iolink = (struct crat_subtype_iolink *)sub_type_hdr;
1254                 ret = kfd_parse_subtype_iolink(iolink, device_list);
1255                 break;
1256         default:
1257                 pr_warn("Unknown subtype %d in CRAT\n",
1258                                 sub_type_hdr->type);
1259         }
1260
1261         return ret;
1262 }
1263
1264 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1265  * create a kfd_topology_device and add in to device_list. Also parse
1266  * CRAT subtypes and attach it to appropriate kfd_topology_device
1267  *      @crat_image - input image containing CRAT
1268  *      @device_list - [OUT] list of kfd_topology_device generated after
1269  *                     parsing crat_image
1270  *      @proximity_domain - Proximity domain of the first device in the table
1271  *
1272  *      Return - 0 if successful else -ve value
1273  */
1274 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
1275                          uint32_t proximity_domain)
1276 {
1277         struct kfd_topology_device *top_dev = NULL;
1278         struct crat_subtype_generic *sub_type_hdr;
1279         uint16_t node_id;
1280         int ret = 0;
1281         struct crat_header *crat_table = (struct crat_header *)crat_image;
1282         uint16_t num_nodes;
1283         uint32_t image_len;
1284
1285         if (!crat_image)
1286                 return -EINVAL;
1287
1288         if (!list_empty(device_list)) {
1289                 pr_warn("Error device list should be empty\n");
1290                 return -EINVAL;
1291         }
1292
1293         num_nodes = crat_table->num_domains;
1294         image_len = crat_table->length;
1295
1296         pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
1297
1298         for (node_id = 0; node_id < num_nodes; node_id++) {
1299                 top_dev = kfd_create_topology_device(device_list);
1300                 if (!top_dev)
1301                         break;
1302                 top_dev->proximity_domain = proximity_domain++;
1303         }
1304
1305         if (!top_dev) {
1306                 ret = -ENOMEM;
1307                 goto err;
1308         }
1309
1310         memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
1311         memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
1312                         CRAT_OEMTABLEID_LENGTH);
1313         top_dev->oem_revision = crat_table->oem_revision;
1314
1315         sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1316         while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
1317                         ((char *)crat_image) + image_len) {
1318                 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
1319                         ret = kfd_parse_subtype(sub_type_hdr, device_list);
1320                         if (ret)
1321                                 break;
1322                 }
1323
1324                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1325                                 sub_type_hdr->length);
1326         }
1327
1328 err:
1329         if (ret)
1330                 kfd_release_topology_device_list(device_list);
1331
1332         return ret;
1333 }
1334
1335
1336 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
1337                                                    struct kfd_gpu_cache_info *pcache_info)
1338 {
1339         struct amdgpu_device *adev = kdev->adev;
1340         int i = 0;
1341
1342         /* TCP L1 Cache per CU */
1343         if (adev->gfx.config.gc_tcp_l1_size) {
1344                 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
1345                 pcache_info[i].cache_level = 1;
1346                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1347                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1348                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1349                 pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
1350                 i++;
1351         }
1352         /* Scalar L1 Instruction Cache per SQC */
1353         if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
1354                 pcache_info[i].cache_size =
1355                         adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
1356                 pcache_info[i].cache_level = 1;
1357                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1358                                         CRAT_CACHE_FLAGS_INST_CACHE |
1359                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1360                 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1361                 i++;
1362         }
1363         /* Scalar L1 Data Cache per SQC */
1364         if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
1365                 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
1366                 pcache_info[i].cache_level = 1;
1367                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1368                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1369                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1370                 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1371                 i++;
1372         }
1373         /* GL1 Data Cache per SA */
1374         if (adev->gfx.config.gc_gl1c_per_sa &&
1375             adev->gfx.config.gc_gl1c_size_per_instance) {
1376                 pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
1377                         adev->gfx.config.gc_gl1c_size_per_instance;
1378                 pcache_info[i].cache_level = 1;
1379                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1380                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1381                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1382                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1383                 i++;
1384         }
1385         /* L2 Data Cache per GPU (Total Tex Cache) */
1386         if (adev->gfx.config.gc_gl2c_per_gpu) {
1387                 pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
1388                 pcache_info[i].cache_level = 2;
1389                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1390                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1391                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1392                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1393                 i++;
1394         }
1395         /* L3 Data Cache per GPU */
1396         if (adev->gmc.mall_size) {
1397                 pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
1398                 pcache_info[i].cache_level = 3;
1399                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1400                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1401                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1402                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1403                 i++;
1404         }
1405         return i;
1406 }
1407
1408 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
1409 {
1410         int num_of_cache_types = 0;
1411
1412         switch (kdev->adev->asic_type) {
1413         case CHIP_KAVERI:
1414                 *pcache_info = kaveri_cache_info;
1415                 num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
1416                 break;
1417         case CHIP_HAWAII:
1418                 *pcache_info = hawaii_cache_info;
1419                 num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
1420                 break;
1421         case CHIP_CARRIZO:
1422                 *pcache_info = carrizo_cache_info;
1423                 num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
1424                 break;
1425         case CHIP_TONGA:
1426                 *pcache_info = tonga_cache_info;
1427                 num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
1428                 break;
1429         case CHIP_FIJI:
1430                 *pcache_info = fiji_cache_info;
1431                 num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
1432                 break;
1433         case CHIP_POLARIS10:
1434                 *pcache_info = polaris10_cache_info;
1435                 num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
1436                 break;
1437         case CHIP_POLARIS11:
1438                 *pcache_info = polaris11_cache_info;
1439                 num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
1440                 break;
1441         case CHIP_POLARIS12:
1442                 *pcache_info = polaris12_cache_info;
1443                 num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
1444                 break;
1445         case CHIP_VEGAM:
1446                 *pcache_info = vegam_cache_info;
1447                 num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
1448                 break;
1449         default:
1450                 switch (KFD_GC_VERSION(kdev)) {
1451                 case IP_VERSION(9, 0, 1):
1452                         *pcache_info = vega10_cache_info;
1453                         num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
1454                         break;
1455                 case IP_VERSION(9, 2, 1):
1456                         *pcache_info = vega12_cache_info;
1457                         num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
1458                         break;
1459                 case IP_VERSION(9, 4, 0):
1460                 case IP_VERSION(9, 4, 1):
1461                         *pcache_info = vega20_cache_info;
1462                         num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
1463                         break;
1464                 case IP_VERSION(9, 4, 2):
1465                 case IP_VERSION(9, 4, 3):
1466                         *pcache_info = aldebaran_cache_info;
1467                         num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
1468                         break;
1469                 case IP_VERSION(9, 1, 0):
1470                 case IP_VERSION(9, 2, 2):
1471                         *pcache_info = raven_cache_info;
1472                         num_of_cache_types = ARRAY_SIZE(raven_cache_info);
1473                         break;
1474                 case IP_VERSION(9, 3, 0):
1475                         *pcache_info = renoir_cache_info;
1476                         num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
1477                         break;
1478                 case IP_VERSION(10, 1, 10):
1479                 case IP_VERSION(10, 1, 2):
1480                 case IP_VERSION(10, 1, 3):
1481                 case IP_VERSION(10, 1, 4):
1482                         *pcache_info = navi10_cache_info;
1483                         num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
1484                         break;
1485                 case IP_VERSION(10, 1, 1):
1486                         *pcache_info = navi14_cache_info;
1487                         num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
1488                         break;
1489                 case IP_VERSION(10, 3, 0):
1490                         *pcache_info = sienna_cichlid_cache_info;
1491                         num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
1492                         break;
1493                 case IP_VERSION(10, 3, 2):
1494                         *pcache_info = navy_flounder_cache_info;
1495                         num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
1496                         break;
1497                 case IP_VERSION(10, 3, 4):
1498                         *pcache_info = dimgrey_cavefish_cache_info;
1499                         num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
1500                         break;
1501                 case IP_VERSION(10, 3, 1):
1502                         *pcache_info = vangogh_cache_info;
1503                         num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
1504                         break;
1505                 case IP_VERSION(10, 3, 5):
1506                         *pcache_info = beige_goby_cache_info;
1507                         num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
1508                         break;
1509                 case IP_VERSION(10, 3, 3):
1510                         *pcache_info = yellow_carp_cache_info;
1511                         num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
1512                         break;
1513                 case IP_VERSION(10, 3, 6):
1514                         *pcache_info = gc_10_3_6_cache_info;
1515                         num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
1516                         break;
1517                 case IP_VERSION(10, 3, 7):
1518                         *pcache_info = gfx1037_cache_info;
1519                         num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
1520                         break;
1521                 case IP_VERSION(11, 0, 0):
1522                 case IP_VERSION(11, 0, 1):
1523                 case IP_VERSION(11, 0, 2):
1524                 case IP_VERSION(11, 0, 3):
1525                 case IP_VERSION(11, 0, 4):
1526                         num_of_cache_types =
1527                                 kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
1528                         break;
1529                 default:
1530                         *pcache_info = dummy_cache_info;
1531                         num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
1532                         pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
1533                         break;
1534                 }
1535         }
1536         return num_of_cache_types;
1537 }
1538
1539 static bool kfd_ignore_crat(void)
1540 {
1541         bool ret;
1542
1543         if (ignore_crat)
1544                 return true;
1545
1546 #ifndef KFD_SUPPORT_IOMMU_V2
1547         ret = true;
1548 #else
1549         ret = false;
1550 #endif
1551
1552         return ret;
1553 }
1554
1555 /*
1556  * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
1557  * copies CRAT from ACPI (if available).
1558  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
1559  *
1560  *      @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
1561  *                   crat_image will be NULL
1562  *      @size: [OUT] size of crat_image
1563  *
1564  *      Return 0 if successful else return error code
1565  */
1566 int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
1567 {
1568         struct acpi_table_header *crat_table;
1569         acpi_status status;
1570         void *pcrat_image;
1571         int rc = 0;
1572
1573         if (!crat_image)
1574                 return -EINVAL;
1575
1576         *crat_image = NULL;
1577
1578         if (kfd_ignore_crat()) {
1579                 pr_info("CRAT table disabled by module option\n");
1580                 return -ENODATA;
1581         }
1582
1583         /* Fetch the CRAT table from ACPI */
1584         status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
1585         if (status == AE_NOT_FOUND) {
1586                 pr_info("CRAT table not found\n");
1587                 return -ENODATA;
1588         } else if (ACPI_FAILURE(status)) {
1589                 const char *err = acpi_format_exception(status);
1590
1591                 pr_err("CRAT table error: %s\n", err);
1592                 return -EINVAL;
1593         }
1594
1595         pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
1596         if (!pcrat_image) {
1597                 rc = -ENOMEM;
1598                 goto out;
1599         }
1600
1601         memcpy(pcrat_image, crat_table, crat_table->length);
1602         *crat_image = pcrat_image;
1603         *size = crat_table->length;
1604 out:
1605         acpi_put_table(crat_table);
1606         return rc;
1607 }
1608
1609 /* Memory required to create Virtual CRAT.
1610  * Since there is no easy way to predict the amount of memory required, the
1611  * following amount is allocated for GPU Virtual CRAT. This is
1612  * expected to cover all known conditions. But to be safe additional check
1613  * is put in the code to ensure we don't overwrite.
1614  */
1615 #define VCRAT_SIZE_FOR_GPU      (4 * PAGE_SIZE)
1616
1617 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1618  *
1619  *      @numa_node_id: CPU NUMA node id
1620  *      @avail_size: Available size in the memory
1621  *      @sub_type_hdr: Memory into which compute info will be filled in
1622  *
1623  *      Return 0 if successful else return -ve value
1624  */
1625 static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
1626                                 int proximity_domain,
1627                                 struct crat_subtype_computeunit *sub_type_hdr)
1628 {
1629         const struct cpumask *cpumask;
1630
1631         *avail_size -= sizeof(struct crat_subtype_computeunit);
1632         if (*avail_size < 0)
1633                 return -ENOMEM;
1634
1635         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
1636
1637         /* Fill in subtype header data */
1638         sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
1639         sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
1640         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1641
1642         cpumask = cpumask_of_node(numa_node_id);
1643
1644         /* Fill in CU data */
1645         sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
1646         sub_type_hdr->proximity_domain = proximity_domain;
1647         sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
1648         if (sub_type_hdr->processor_id_low == -1)
1649                 return -EINVAL;
1650
1651         sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
1652
1653         return 0;
1654 }
1655
1656 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1657  *
1658  *      @numa_node_id: CPU NUMA node id
1659  *      @avail_size: Available size in the memory
1660  *      @sub_type_hdr: Memory into which compute info will be filled in
1661  *
1662  *      Return 0 if successful else return -ve value
1663  */
1664 static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
1665                         int proximity_domain,
1666                         struct crat_subtype_memory *sub_type_hdr)
1667 {
1668         uint64_t mem_in_bytes = 0;
1669         pg_data_t *pgdat;
1670         int zone_type;
1671
1672         *avail_size -= sizeof(struct crat_subtype_memory);
1673         if (*avail_size < 0)
1674                 return -ENOMEM;
1675
1676         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1677
1678         /* Fill in subtype header data */
1679         sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1680         sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1681         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1682
1683         /* Fill in Memory Subunit data */
1684
1685         /* Unlike si_meminfo, si_meminfo_node is not exported. So
1686          * the following lines are duplicated from si_meminfo_node
1687          * function
1688          */
1689         pgdat = NODE_DATA(numa_node_id);
1690         for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
1691                 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
1692         mem_in_bytes <<= PAGE_SHIFT;
1693
1694         sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
1695         sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
1696         sub_type_hdr->proximity_domain = proximity_domain;
1697
1698         return 0;
1699 }
1700
1701 #ifdef CONFIG_X86_64
1702 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
1703                                 uint32_t *num_entries,
1704                                 struct crat_subtype_iolink *sub_type_hdr)
1705 {
1706         int nid;
1707         struct cpuinfo_x86 *c = &cpu_data(0);
1708         uint8_t link_type;
1709
1710         if (c->x86_vendor == X86_VENDOR_AMD)
1711                 link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
1712         else
1713                 link_type = CRAT_IOLINK_TYPE_QPI_1_1;
1714
1715         *num_entries = 0;
1716
1717         /* Create IO links from this node to other CPU nodes */
1718         for_each_online_node(nid) {
1719                 if (nid == numa_node_id) /* node itself */
1720                         continue;
1721
1722                 *avail_size -= sizeof(struct crat_subtype_iolink);
1723                 if (*avail_size < 0)
1724                         return -ENOMEM;
1725
1726                 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1727
1728                 /* Fill in subtype header data */
1729                 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
1730                 sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
1731                 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1732
1733                 /* Fill in IO link data */
1734                 sub_type_hdr->proximity_domain_from = numa_node_id;
1735                 sub_type_hdr->proximity_domain_to = nid;
1736                 sub_type_hdr->io_interface_type = link_type;
1737
1738                 (*num_entries)++;
1739                 sub_type_hdr++;
1740         }
1741
1742         return 0;
1743 }
1744 #endif
1745
1746 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1747  *
1748  *      @pcrat_image: Fill in VCRAT for CPU
1749  *      @size:  [IN] allocated size of crat_image.
1750  *              [OUT] actual size of data filled in crat_image
1751  */
1752 static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
1753 {
1754         struct crat_header *crat_table = (struct crat_header *)pcrat_image;
1755         struct acpi_table_header *acpi_table;
1756         acpi_status status;
1757         struct crat_subtype_generic *sub_type_hdr;
1758         int avail_size = *size;
1759         int numa_node_id;
1760 #ifdef CONFIG_X86_64
1761         uint32_t entries = 0;
1762 #endif
1763         int ret = 0;
1764
1765         if (!pcrat_image)
1766                 return -EINVAL;
1767
1768         /* Fill in CRAT Header.
1769          * Modify length and total_entries as subunits are added.
1770          */
1771         avail_size -= sizeof(struct crat_header);
1772         if (avail_size < 0)
1773                 return -ENOMEM;
1774
1775         memset(crat_table, 0, sizeof(struct crat_header));
1776         memcpy(&crat_table->signature, CRAT_SIGNATURE,
1777                         sizeof(crat_table->signature));
1778         crat_table->length = sizeof(struct crat_header);
1779
1780         status = acpi_get_table("DSDT", 0, &acpi_table);
1781         if (status != AE_OK)
1782                 pr_warn("DSDT table not found for OEM information\n");
1783         else {
1784                 crat_table->oem_revision = acpi_table->revision;
1785                 memcpy(crat_table->oem_id, acpi_table->oem_id,
1786                                 CRAT_OEMID_LENGTH);
1787                 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
1788                                 CRAT_OEMTABLEID_LENGTH);
1789                 acpi_put_table(acpi_table);
1790         }
1791         crat_table->total_entries = 0;
1792         crat_table->num_domains = 0;
1793
1794         sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1795
1796         for_each_online_node(numa_node_id) {
1797                 if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
1798                         continue;
1799
1800                 /* Fill in Subtype: Compute Unit */
1801                 ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
1802                         crat_table->num_domains,
1803                         (struct crat_subtype_computeunit *)sub_type_hdr);
1804                 if (ret < 0)
1805                         return ret;
1806                 crat_table->length += sub_type_hdr->length;
1807                 crat_table->total_entries++;
1808
1809                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1810                         sub_type_hdr->length);
1811
1812                 /* Fill in Subtype: Memory */
1813                 ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
1814                         crat_table->num_domains,
1815                         (struct crat_subtype_memory *)sub_type_hdr);
1816                 if (ret < 0)
1817                         return ret;
1818                 crat_table->length += sub_type_hdr->length;
1819                 crat_table->total_entries++;
1820
1821                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1822                         sub_type_hdr->length);
1823
1824                 /* Fill in Subtype: IO Link */
1825 #ifdef CONFIG_X86_64
1826                 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
1827                                 &entries,
1828                                 (struct crat_subtype_iolink *)sub_type_hdr);
1829                 if (ret < 0)
1830                         return ret;
1831
1832                 if (entries) {
1833                         crat_table->length += (sub_type_hdr->length * entries);
1834                         crat_table->total_entries += entries;
1835
1836                         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1837                                         sub_type_hdr->length * entries);
1838                 }
1839 #else
1840                 pr_info("IO link not available for non x86 platforms\n");
1841 #endif
1842
1843                 crat_table->num_domains++;
1844         }
1845
1846         /* TODO: Add cache Subtype for CPU.
1847          * Currently, CPU cache information is available in function
1848          * detect_cache_attributes(cpu) defined in the file
1849          * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
1850          * exported and to get the same information the code needs to be
1851          * duplicated.
1852          */
1853
1854         *size = crat_table->length;
1855         pr_info("Virtual CRAT table created for CPU\n");
1856
1857         return 0;
1858 }
1859
1860 static int kfd_fill_gpu_memory_affinity(int *avail_size,
1861                 struct kfd_node *kdev, uint8_t type, uint64_t size,
1862                 struct crat_subtype_memory *sub_type_hdr,
1863                 uint32_t proximity_domain,
1864                 const struct kfd_local_mem_info *local_mem_info)
1865 {
1866         *avail_size -= sizeof(struct crat_subtype_memory);
1867         if (*avail_size < 0)
1868                 return -ENOMEM;
1869
1870         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1871         sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1872         sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1873         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
1874
1875         sub_type_hdr->proximity_domain = proximity_domain;
1876
1877         pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",
1878                         type, size);
1879
1880         sub_type_hdr->length_low = lower_32_bits(size);
1881         sub_type_hdr->length_high = upper_32_bits(size);
1882
1883         sub_type_hdr->width = local_mem_info->vram_width;
1884         sub_type_hdr->visibility_type = type;
1885
1886         return 0;
1887 }
1888
1889 #ifdef CONFIG_ACPI_NUMA
1890 static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
1891 {
1892         struct acpi_table_header *table_header = NULL;
1893         struct acpi_subtable_header *sub_header = NULL;
1894         unsigned long table_end, subtable_len;
1895         u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
1896                         pci_dev_id(kdev->adev->pdev);
1897         u32 bdf;
1898         acpi_status status;
1899         struct acpi_srat_cpu_affinity *cpu;
1900         struct acpi_srat_generic_affinity *gpu;
1901         int pxm = 0, max_pxm = 0;
1902         int numa_node = NUMA_NO_NODE;
1903         bool found = false;
1904
1905         /* Fetch the SRAT table from ACPI */
1906         status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
1907         if (status == AE_NOT_FOUND) {
1908                 pr_warn("SRAT table not found\n");
1909                 return;
1910         } else if (ACPI_FAILURE(status)) {
1911                 const char *err = acpi_format_exception(status);
1912                 pr_err("SRAT table error: %s\n", err);
1913                 return;
1914         }
1915
1916         table_end = (unsigned long)table_header + table_header->length;
1917
1918         /* Parse all entries looking for a match. */
1919         sub_header = (struct acpi_subtable_header *)
1920                         ((unsigned long)table_header +
1921                         sizeof(struct acpi_table_srat));
1922         subtable_len = sub_header->length;
1923
1924         while (((unsigned long)sub_header) + subtable_len  < table_end) {
1925                 /*
1926                  * If length is 0, break from this loop to avoid
1927                  * infinite loop.
1928                  */
1929                 if (subtable_len == 0) {
1930                         pr_err("SRAT invalid zero length\n");
1931                         break;
1932                 }
1933
1934                 switch (sub_header->type) {
1935                 case ACPI_SRAT_TYPE_CPU_AFFINITY:
1936                         cpu = (struct acpi_srat_cpu_affinity *)sub_header;
1937                         pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
1938                                         cpu->proximity_domain_lo;
1939                         if (pxm > max_pxm)
1940                                 max_pxm = pxm;
1941                         break;
1942                 case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
1943                         gpu = (struct acpi_srat_generic_affinity *)sub_header;
1944                         bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
1945                                         *((u16 *)(&gpu->device_handle[2]));
1946                         if (bdf == pci_id) {
1947                                 found = true;
1948                                 numa_node = pxm_to_node(gpu->proximity_domain);
1949                         }
1950                         break;
1951                 default:
1952                         break;
1953                 }
1954
1955                 if (found)
1956                         break;
1957
1958                 sub_header = (struct acpi_subtable_header *)
1959                                 ((unsigned long)sub_header + subtable_len);
1960                 subtable_len = sub_header->length;
1961         }
1962
1963         acpi_put_table(table_header);
1964
1965         /* Workaround bad cpu-gpu binding case */
1966         if (found && (numa_node < 0 ||
1967                         numa_node > pxm_to_node(max_pxm)))
1968                 numa_node = 0;
1969
1970         if (numa_node != NUMA_NO_NODE)
1971                 set_dev_node(&kdev->adev->pdev->dev, numa_node);
1972 }
1973 #endif
1974
1975 #define KFD_CRAT_INTRA_SOCKET_WEIGHT    13
1976 #define KFD_CRAT_XGMI_WEIGHT            15
1977
1978 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
1979  * to its NUMA node
1980  *      @avail_size: Available size in the memory
1981  *      @kdev - [IN] GPU device
1982  *      @sub_type_hdr: Memory into which io link info will be filled in
1983  *      @proximity_domain - proximity domain of the GPU node
1984  *
1985  *      Return 0 if successful else return -ve value
1986  */
1987 static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
1988                         struct kfd_node *kdev,
1989                         struct crat_subtype_iolink *sub_type_hdr,
1990                         uint32_t proximity_domain)
1991 {
1992         *avail_size -= sizeof(struct crat_subtype_iolink);
1993         if (*avail_size < 0)
1994                 return -ENOMEM;
1995
1996         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1997
1998         /* Fill in subtype header data */
1999         sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
2000         sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
2001         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
2002         if (kfd_dev_is_large_bar(kdev))
2003                 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2004
2005         /* Fill in IOLINK subtype.
2006          * TODO: Fill-in other fields of iolink subtype
2007          */
2008         if (kdev->adev->gmc.xgmi.connected_to_cpu ||
2009             (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
2010              kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
2011              AMDGPU_PKG_TYPE_APU)) {
2012                 bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
2013                 int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
2014                                                         KFD_CRAT_INTRA_SOCKET_WEIGHT;
2015                 uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
2016                                                         kdev->adev, NULL, true) : mem_bw;
2017
2018                 /*
2019                  * with host gpu xgmi link, host can access gpu memory whether
2020                  * or not pcie bar type is large, so always create bidirectional
2021                  * io link.
2022                  */
2023                 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2024                 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2025                 sub_type_hdr->weight_xgmi = weight;
2026                 sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
2027                 sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
2028         } else {
2029                 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
2030                 sub_type_hdr->minimum_bandwidth_mbs =
2031                                 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
2032                 sub_type_hdr->maximum_bandwidth_mbs =
2033                                 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
2034         }
2035
2036         sub_type_hdr->proximity_domain_from = proximity_domain;
2037
2038 #ifdef CONFIG_ACPI_NUMA
2039         if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&
2040             num_possible_nodes() > 1)
2041                 kfd_find_numa_node_in_srat(kdev);
2042 #endif
2043 #ifdef CONFIG_NUMA
2044         if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
2045                 sub_type_hdr->proximity_domain_to = 0;
2046         else
2047                 sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;
2048 #else
2049         sub_type_hdr->proximity_domain_to = 0;
2050 #endif
2051         return 0;
2052 }
2053
2054 static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
2055                         struct kfd_node *kdev,
2056                         struct kfd_node *peer_kdev,
2057                         struct crat_subtype_iolink *sub_type_hdr,
2058                         uint32_t proximity_domain_from,
2059                         uint32_t proximity_domain_to)
2060 {
2061         bool use_ta_info = kdev->kfd->num_nodes == 1;
2062
2063         *avail_size -= sizeof(struct crat_subtype_iolink);
2064         if (*avail_size < 0)
2065                 return -ENOMEM;
2066
2067         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
2068
2069         sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
2070         sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
2071         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
2072                                CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2073
2074         sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2075         sub_type_hdr->proximity_domain_from = proximity_domain_from;
2076         sub_type_hdr->proximity_domain_to = proximity_domain_to;
2077
2078         if (use_ta_info) {
2079                 sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
2080                         amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
2081                 sub_type_hdr->maximum_bandwidth_mbs =
2082                         amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
2083                                                         peer_kdev->adev, false);
2084                 sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
2085                         amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
2086         } else {
2087                 bool is_single_hop = kdev->kfd == peer_kdev->kfd;
2088                 int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
2089                         (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
2090                 int mem_bw = 819200;
2091
2092                 sub_type_hdr->weight_xgmi = weight;
2093                 sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2094                 sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2095         }
2096
2097         return 0;
2098 }
2099
2100 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2101  *
2102  *      @pcrat_image: Fill in VCRAT for GPU
2103  *      @size:  [IN] allocated size of crat_image.
2104  *              [OUT] actual size of data filled in crat_image
2105  */
2106 static int kfd_create_vcrat_image_gpu(void *pcrat_image,
2107                                       size_t *size, struct kfd_node *kdev,
2108                                       uint32_t proximity_domain)
2109 {
2110         struct crat_header *crat_table = (struct crat_header *)pcrat_image;
2111         struct crat_subtype_generic *sub_type_hdr;
2112         struct kfd_local_mem_info local_mem_info;
2113         struct kfd_topology_device *peer_dev;
2114         struct crat_subtype_computeunit *cu;
2115         struct kfd_cu_info cu_info;
2116         int avail_size = *size;
2117         uint32_t total_num_of_cu;
2118         uint32_t nid = 0;
2119         int ret = 0;
2120
2121         if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
2122                 return -EINVAL;
2123
2124         /* Fill the CRAT Header.
2125          * Modify length and total_entries as subunits are added.
2126          */
2127         avail_size -= sizeof(struct crat_header);
2128         if (avail_size < 0)
2129                 return -ENOMEM;
2130
2131         memset(crat_table, 0, sizeof(struct crat_header));
2132
2133         memcpy(&crat_table->signature, CRAT_SIGNATURE,
2134                         sizeof(crat_table->signature));
2135         /* Change length as we add more subtypes*/
2136         crat_table->length = sizeof(struct crat_header);
2137         crat_table->num_domains = 1;
2138         crat_table->total_entries = 0;
2139
2140         /* Fill in Subtype: Compute Unit
2141          * First fill in the sub type header and then sub type data
2142          */
2143         avail_size -= sizeof(struct crat_subtype_computeunit);
2144         if (avail_size < 0)
2145                 return -ENOMEM;
2146
2147         sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
2148         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
2149
2150         sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
2151         sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
2152         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
2153
2154         /* Fill CU subtype data */
2155         cu = (struct crat_subtype_computeunit *)sub_type_hdr;
2156         cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
2157         cu->proximity_domain = proximity_domain;
2158
2159         amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
2160         cu->num_simd_per_cu = cu_info.simd_per_cu;
2161         cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
2162         cu->max_waves_simd = cu_info.max_waves_per_simd;
2163
2164         cu->wave_front_size = cu_info.wave_front_size;
2165         cu->array_count = cu_info.num_shader_arrays_per_engine *
2166                 cu_info.num_shader_engines;
2167         total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
2168         cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
2169         cu->num_cu_per_array = cu_info.num_cu_per_sh;
2170         cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
2171         cu->num_banks = cu_info.num_shader_engines;
2172         cu->lds_size_in_kb = cu_info.lds_size;
2173
2174         cu->hsa_capability = 0;
2175
2176         /* Check if this node supports IOMMU. During parsing this flag will
2177          * translate to HSA_CAP_ATS_PRESENT
2178          */
2179         if (!kfd_iommu_check_device(kdev->kfd))
2180                 cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
2181
2182         crat_table->length += sub_type_hdr->length;
2183         crat_table->total_entries++;
2184
2185         /* Fill in Subtype: Memory. Only on systems with large BAR (no
2186          * private FB), report memory as public. On other systems
2187          * report the total FB size (public+private) as a single
2188          * private heap.
2189          */
2190         local_mem_info = kdev->local_mem_info;
2191         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2192                         sub_type_hdr->length);
2193
2194         if (debug_largebar)
2195                 local_mem_info.local_mem_size_private = 0;
2196
2197         if (local_mem_info.local_mem_size_private == 0)
2198                 ret = kfd_fill_gpu_memory_affinity(&avail_size,
2199                                 kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
2200                                 local_mem_info.local_mem_size_public,
2201                                 (struct crat_subtype_memory *)sub_type_hdr,
2202                                 proximity_domain,
2203                                 &local_mem_info);
2204         else
2205                 ret = kfd_fill_gpu_memory_affinity(&avail_size,
2206                                 kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE,
2207                                 local_mem_info.local_mem_size_public +
2208                                 local_mem_info.local_mem_size_private,
2209                                 (struct crat_subtype_memory *)sub_type_hdr,
2210                                 proximity_domain,
2211                                 &local_mem_info);
2212         if (ret < 0)
2213                 return ret;
2214
2215         crat_table->length += sizeof(struct crat_subtype_memory);
2216         crat_table->total_entries++;
2217
2218         /* Fill in Subtype: IO_LINKS
2219          *  Only direct links are added here which is Link from GPU to
2220          *  its NUMA node. Indirect links are added by userspace.
2221          */
2222         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2223                 sub_type_hdr->length);
2224         ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
2225                 (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
2226
2227         if (ret < 0)
2228                 return ret;
2229
2230         crat_table->length += sub_type_hdr->length;
2231         crat_table->total_entries++;
2232
2233
2234         /* Fill in Subtype: IO_LINKS
2235          * Direct links from GPU to other GPUs through xGMI.
2236          * We will loop GPUs that already be processed (with lower value
2237          * of proximity_domain), add the link for the GPUs with same
2238          * hive id (from this GPU to other GPU) . The reversed iolink
2239          * (from other GPU to this GPU) will be added
2240          * in kfd_parse_subtype_iolink.
2241          */
2242         if (kdev->kfd->hive_id) {
2243                 for (nid = 0; nid < proximity_domain; ++nid) {
2244                         peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
2245                         if (!peer_dev->gpu)
2246                                 continue;
2247                         if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
2248                                 continue;
2249                         sub_type_hdr = (typeof(sub_type_hdr))(
2250                                 (char *)sub_type_hdr +
2251                                 sizeof(struct crat_subtype_iolink));
2252                         ret = kfd_fill_gpu_xgmi_link_to_gpu(
2253                                 &avail_size, kdev, peer_dev->gpu,
2254                                 (struct crat_subtype_iolink *)sub_type_hdr,
2255                                 proximity_domain, nid);
2256                         if (ret < 0)
2257                                 return ret;
2258                         crat_table->length += sub_type_hdr->length;
2259                         crat_table->total_entries++;
2260                 }
2261         }
2262         *size = crat_table->length;
2263         pr_info("Virtual CRAT table created for GPU\n");
2264
2265         return ret;
2266 }
2267
2268 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2269  *              creates a Virtual CRAT (VCRAT) image
2270  *
2271  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
2272  *
2273  *      @crat_image: VCRAT image created because ACPI does not have a
2274  *                   CRAT for this device
2275  *      @size: [OUT] size of virtual crat_image
2276  *      @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2277  *              COMPUTE_UNIT_GPU - Create VCRAT for GPU
2278  *              (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2279  *                      -- this option is not currently implemented.
2280  *                      The assumption is that all AMD APUs will have CRAT
2281  *      @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
2282  *
2283  *      Return 0 if successful else return -ve value
2284  */
2285 int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
2286                                   int flags, struct kfd_node *kdev,
2287                                   uint32_t proximity_domain)
2288 {
2289         void *pcrat_image = NULL;
2290         int ret = 0, num_nodes;
2291         size_t dyn_size;
2292
2293         if (!crat_image)
2294                 return -EINVAL;
2295
2296         *crat_image = NULL;
2297
2298         /* Allocate the CPU Virtual CRAT size based on the number of online
2299          * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.
2300          * This should cover all the current conditions. A check is put not
2301          * to overwrite beyond allocated size for GPUs
2302          */
2303         switch (flags) {
2304         case COMPUTE_UNIT_CPU:
2305                 num_nodes = num_online_nodes();
2306                 dyn_size = sizeof(struct crat_header) +
2307                         num_nodes * (sizeof(struct crat_subtype_computeunit) +
2308                         sizeof(struct crat_subtype_memory) +
2309                         (num_nodes - 1) * sizeof(struct crat_subtype_iolink));
2310                 pcrat_image = kvmalloc(dyn_size, GFP_KERNEL);
2311                 if (!pcrat_image)
2312                         return -ENOMEM;
2313                 *size = dyn_size;
2314                 pr_debug("CRAT size is %ld", dyn_size);
2315                 ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
2316                 break;
2317         case COMPUTE_UNIT_GPU:
2318                 if (!kdev)
2319                         return -EINVAL;
2320                 pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
2321                 if (!pcrat_image)
2322                         return -ENOMEM;
2323                 *size = VCRAT_SIZE_FOR_GPU;
2324                 ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev,
2325                                                  proximity_domain);
2326                 break;
2327         case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
2328                 /* TODO: */
2329                 ret = -EINVAL;
2330                 pr_err("VCRAT not implemented for APU\n");
2331                 break;
2332         default:
2333                 ret = -EINVAL;
2334         }
2335
2336         if (!ret)
2337                 *crat_image = pcrat_image;
2338         else
2339                 kvfree(pcrat_image);
2340
2341         return ret;
2342 }
2343
2344
2345 /* kfd_destroy_crat_image
2346  *
2347  *      @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
2348  *
2349  */
2350 void kfd_destroy_crat_image(void *crat_image)
2351 {
2352         kvfree(crat_image);
2353 }