drivers/gpu/drm/amd/amdkfd/kfd_crat.c

   1 // SPDX-License-Identifier: GPL-2.0 OR MIT
   2 /*
   3  * Copyright 2015-2022 Advanced Micro Devices, Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21  * OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include <linux/pci.h>
  25 #include <linux/acpi.h>
  26 #include "kfd_crat.h"
  27 #include "kfd_priv.h"
  28 #include "kfd_topology.h"
  29 #include "kfd_iommu.h"
  30 #include "amdgpu.h"
  31 #include "amdgpu_amdkfd.h"
  32
  33 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
  34  * GPU processor ID are expressed with Bit[31]=1.
  35  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
  36  * used in the CRAT.
  37  */
  38 static uint32_t gpu_processor_id_low = 0x80001000;
  39
  40 /* Return the next available gpu_processor_id and increment it for next GPU
  41  *      @total_cu_count - Total CUs present in the GPU including ones
  42  *                        masked off
  43  */
  44 static inline unsigned int get_and_inc_gpu_processor_id(
  45                                 unsigned int total_cu_count)
  46 {
  47         int current_id = gpu_processor_id_low;
  48
  49         gpu_processor_id_low += total_cu_count;
  50         return current_id;
  51 }
  52
  53
  54 static struct kfd_gpu_cache_info kaveri_cache_info[] = {
  55         {
  56                 /* TCP L1 Cache per CU */
  57                 .cache_size = 16,
  58                 .cache_level = 1,
  59                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
  60                                 CRAT_CACHE_FLAGS_DATA_CACHE |
  61                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
  62                 .num_cu_shared = 1,
  63         },
  64         {
  65                 /* Scalar L1 Instruction Cache (in SQC module) per bank */
  66                 .cache_size = 16,
  67                 .cache_level = 1,
  68                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
  69                                 CRAT_CACHE_FLAGS_INST_CACHE |
  70                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
  71                 .num_cu_shared = 2,
  72         },
  73         {
  74                 /* Scalar L1 Data Cache (in SQC module) per bank */
  75                 .cache_size = 8,
  76                 .cache_level = 1,
  77                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
  78                                 CRAT_CACHE_FLAGS_DATA_CACHE |
  79                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
  80                 .num_cu_shared = 2,
  81         },
  82
  83         /* TODO: Add L2 Cache information */
  84 };
  85
  86
  87 static struct kfd_gpu_cache_info carrizo_cache_info[] = {
  88         {
  89                 /* TCP L1 Cache per CU */
  90                 .cache_size = 16,
  91                 .cache_level = 1,
  92                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
  93                                 CRAT_CACHE_FLAGS_DATA_CACHE |
  94                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
  95                 .num_cu_shared = 1,
  96         },
  97         {
  98                 /* Scalar L1 Instruction Cache (in SQC module) per bank */
  99                 .cache_size = 8,
 100                 .cache_level = 1,
 101                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 102                                 CRAT_CACHE_FLAGS_INST_CACHE |
 103                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 104                 .num_cu_shared = 4,
 105         },
 106         {
 107                 /* Scalar L1 Data Cache (in SQC module) per bank. */
 108                 .cache_size = 4,
 109                 .cache_level = 1,
 110                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 111                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 112                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 113                 .num_cu_shared = 4,
 114         },
 115
 116         /* TODO: Add L2 Cache information */
 117 };
 118
 119 #define hawaii_cache_info kaveri_cache_info
 120 #define tonga_cache_info carrizo_cache_info
 121 #define fiji_cache_info  carrizo_cache_info
 122 #define polaris10_cache_info carrizo_cache_info
 123 #define polaris11_cache_info carrizo_cache_info
 124 #define polaris12_cache_info carrizo_cache_info
 125 #define vegam_cache_info carrizo_cache_info
 126
 127 /* NOTE: L1 cache information has been updated and L2/L3
 128  * cache information has been added for Vega10 and
 129  * newer ASICs. The unit for cache_size is KiB.
 130  * In future,  check & update cache details
 131  * for every new ASIC is required.
 132  */
 133
 134 static struct kfd_gpu_cache_info vega10_cache_info[] = {
 135         {
 136                 /* TCP L1 Cache per CU */
 137                 .cache_size = 16,
 138                 .cache_level = 1,
 139                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 140                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 141                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 142                 .num_cu_shared = 1,
 143         },
 144         {
 145                 /* Scalar L1 Instruction Cache per SQC */
 146                 .cache_size = 32,
 147                 .cache_level = 1,
 148                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 149                                 CRAT_CACHE_FLAGS_INST_CACHE |
 150                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 151                 .num_cu_shared = 3,
 152         },
 153         {
 154                 /* Scalar L1 Data Cache per SQC */
 155                 .cache_size = 16,
 156                 .cache_level = 1,
 157                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 158                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 159                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 160                 .num_cu_shared = 3,
 161         },
 162         {
 163                 /* L2 Data Cache per GPU (Total Tex Cache) */
 164                 .cache_size = 4096,
 165                 .cache_level = 2,
 166                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 167                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 168                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 169                 .num_cu_shared = 16,
 170         },
 171 };
 172
 173 static struct kfd_gpu_cache_info raven_cache_info[] = {
 174         {
 175                 /* TCP L1 Cache per CU */
 176                 .cache_size = 16,
 177                 .cache_level = 1,
 178                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 179                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 180                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 181                 .num_cu_shared = 1,
 182         },
 183         {
 184                 /* Scalar L1 Instruction Cache per SQC */
 185                 .cache_size = 32,
 186                 .cache_level = 1,
 187                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 188                                 CRAT_CACHE_FLAGS_INST_CACHE |
 189                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 190                 .num_cu_shared = 3,
 191         },
 192         {
 193                 /* Scalar L1 Data Cache per SQC */
 194                 .cache_size = 16,
 195                 .cache_level = 1,
 196                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 197                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 198                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 199                 .num_cu_shared = 3,
 200         },
 201         {
 202                 /* L2 Data Cache per GPU (Total Tex Cache) */
 203                 .cache_size = 1024,
 204                 .cache_level = 2,
 205                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 206                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 207                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 208                 .num_cu_shared = 11,
 209         },
 210 };
 211
 212 static struct kfd_gpu_cache_info renoir_cache_info[] = {
 213         {
 214                 /* TCP L1 Cache per CU */
 215                 .cache_size = 16,
 216                 .cache_level = 1,
 217                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 218                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 219                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 220                 .num_cu_shared = 1,
 221         },
 222         {
 223                 /* Scalar L1 Instruction Cache per SQC */
 224                 .cache_size = 32,
 225                 .cache_level = 1,
 226                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 227                                 CRAT_CACHE_FLAGS_INST_CACHE |
 228                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 229                 .num_cu_shared = 3,
 230         },
 231         {
 232                 /* Scalar L1 Data Cache per SQC */
 233                 .cache_size = 16,
 234                 .cache_level = 1,
 235                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 236                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 237                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 238                 .num_cu_shared = 3,
 239         },
 240         {
 241                 /* L2 Data Cache per GPU (Total Tex Cache) */
 242                 .cache_size = 1024,
 243                 .cache_level = 2,
 244                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 245                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 246                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 247                 .num_cu_shared = 8,
 248         },
 249 };
 250
 251 static struct kfd_gpu_cache_info vega12_cache_info[] = {
 252         {
 253                 /* TCP L1 Cache per CU */
 254                 .cache_size = 16,
 255                 .cache_level = 1,
 256                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 257                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 258                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 259                 .num_cu_shared = 1,
 260         },
 261         {
 262                 /* Scalar L1 Instruction Cache per SQC */
 263                 .cache_size = 32,
 264                 .cache_level = 1,
 265                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 266                                 CRAT_CACHE_FLAGS_INST_CACHE |
 267                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 268                 .num_cu_shared = 3,
 269         },
 270         {
 271                 /* Scalar L1 Data Cache per SQC */
 272                 .cache_size = 16,
 273                 .cache_level = 1,
 274                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 275                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 276                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 277                 .num_cu_shared = 3,
 278         },
 279         {
 280                 /* L2 Data Cache per GPU (Total Tex Cache) */
 281                 .cache_size = 2048,
 282                 .cache_level = 2,
 283                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 284                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 285                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 286                 .num_cu_shared = 5,
 287         },
 288 };
 289
 290 static struct kfd_gpu_cache_info vega20_cache_info[] = {
 291         {
 292                 /* TCP L1 Cache per CU */
 293                 .cache_size = 16,
 294                 .cache_level = 1,
 295                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 296                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 297                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 298                 .num_cu_shared = 1,
 299         },
 300         {
 301                 /* Scalar L1 Instruction Cache per SQC */
 302                 .cache_size = 32,
 303                 .cache_level = 1,
 304                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 305                                 CRAT_CACHE_FLAGS_INST_CACHE |
 306                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 307                 .num_cu_shared = 3,
 308         },
 309         {
 310                 /* Scalar L1 Data Cache per SQC */
 311                 .cache_size = 16,
 312                 .cache_level = 1,
 313                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 314                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 315                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 316                 .num_cu_shared = 3,
 317         },
 318         {
 319                 /* L2 Data Cache per GPU (Total Tex Cache) */
 320                 .cache_size = 8192,
 321                 .cache_level = 2,
 322                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 323                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 324                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 325                 .num_cu_shared = 16,
 326         },
 327 };
 328
 329 static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
 330         {
 331                 /* TCP L1 Cache per CU */
 332                 .cache_size = 16,
 333                 .cache_level = 1,
 334                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 335                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 336                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 337                 .num_cu_shared = 1,
 338         },
 339         {
 340                 /* Scalar L1 Instruction Cache per SQC */
 341                 .cache_size = 32,
 342                 .cache_level = 1,
 343                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 344                                 CRAT_CACHE_FLAGS_INST_CACHE |
 345                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 346                 .num_cu_shared = 2,
 347         },
 348         {
 349                 /* Scalar L1 Data Cache per SQC */
 350                 .cache_size = 16,
 351                 .cache_level = 1,
 352                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 353                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 354                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 355                 .num_cu_shared = 2,
 356         },
 357         {
 358                 /* L2 Data Cache per GPU (Total Tex Cache) */
 359                 .cache_size = 8192,
 360                 .cache_level = 2,
 361                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 362                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 363                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 364                 .num_cu_shared = 14,
 365         },
 366 };
 367
 368 static struct kfd_gpu_cache_info navi10_cache_info[] = {
 369         {
 370                 /* TCP L1 Cache per CU */
 371                 .cache_size = 16,
 372                 .cache_level = 1,
 373                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 374                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 375                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 376                 .num_cu_shared = 1,
 377         },
 378         {
 379                 /* Scalar L1 Instruction Cache per SQC */
 380                 .cache_size = 32,
 381                 .cache_level = 1,
 382                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 383                                 CRAT_CACHE_FLAGS_INST_CACHE |
 384                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 385                 .num_cu_shared = 2,
 386         },
 387         {
 388                 /* Scalar L1 Data Cache per SQC */
 389                 .cache_size = 16,
 390                 .cache_level = 1,
 391                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 392                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 393                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 394                 .num_cu_shared = 2,
 395         },
 396         {
 397                 /* GL1 Data Cache per SA */
 398                 .cache_size = 128,
 399                 .cache_level = 1,
 400                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 401                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 402                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 403                 .num_cu_shared = 10,
 404         },
 405         {
 406                 /* L2 Data Cache per GPU (Total Tex Cache) */
 407                 .cache_size = 4096,
 408                 .cache_level = 2,
 409                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 410                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 411                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 412                 .num_cu_shared = 10,
 413         },
 414 };
 415
 416 static struct kfd_gpu_cache_info vangogh_cache_info[] = {
 417         {
 418                 /* TCP L1 Cache per CU */
 419                 .cache_size = 16,
 420                 .cache_level = 1,
 421                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 422                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 423                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 424                 .num_cu_shared = 1,
 425         },
 426         {
 427                 /* Scalar L1 Instruction Cache per SQC */
 428                 .cache_size = 32,
 429                 .cache_level = 1,
 430                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 431                                 CRAT_CACHE_FLAGS_INST_CACHE |
 432                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 433                 .num_cu_shared = 2,
 434         },
 435         {
 436                 /* Scalar L1 Data Cache per SQC */
 437                 .cache_size = 16,
 438                 .cache_level = 1,
 439                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 440                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 441                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 442                 .num_cu_shared = 2,
 443         },
 444         {
 445                 /* GL1 Data Cache per SA */
 446                 .cache_size = 128,
 447                 .cache_level = 1,
 448                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 449                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 450                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 451                 .num_cu_shared = 8,
 452         },
 453         {
 454                 /* L2 Data Cache per GPU (Total Tex Cache) */
 455                 .cache_size = 1024,
 456                 .cache_level = 2,
 457                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 458                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 459                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 460                 .num_cu_shared = 8,
 461         },
 462 };
 463
 464 static struct kfd_gpu_cache_info navi14_cache_info[] = {
 465         {
 466                 /* TCP L1 Cache per CU */
 467                 .cache_size = 16,
 468                 .cache_level = 1,
 469                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 470                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 471                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 472                 .num_cu_shared = 1,
 473         },
 474         {
 475                 /* Scalar L1 Instruction Cache per SQC */
 476                 .cache_size = 32,
 477                 .cache_level = 1,
 478                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 479                                 CRAT_CACHE_FLAGS_INST_CACHE |
 480                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 481                 .num_cu_shared = 2,
 482         },
 483         {
 484                 /* Scalar L1 Data Cache per SQC */
 485                 .cache_size = 16,
 486                 .cache_level = 1,
 487                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 488                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 489                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 490                 .num_cu_shared = 2,
 491         },
 492         {
 493                 /* GL1 Data Cache per SA */
 494                 .cache_size = 128,
 495                 .cache_level = 1,
 496                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 497                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 498                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 499                 .num_cu_shared = 12,
 500         },
 501         {
 502                 /* L2 Data Cache per GPU (Total Tex Cache) */
 503                 .cache_size = 2048,
 504                 .cache_level = 2,
 505                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 506                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 507                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 508                 .num_cu_shared = 12,
 509         },
 510 };
 511
 512 static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
 513         {
 514                 /* TCP L1 Cache per CU */
 515                 .cache_size = 16,
 516                 .cache_level = 1,
 517                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 518                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 519                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 520                 .num_cu_shared = 1,
 521         },
 522         {
 523                 /* Scalar L1 Instruction Cache per SQC */
 524                 .cache_size = 32,
 525                 .cache_level = 1,
 526                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 527                                 CRAT_CACHE_FLAGS_INST_CACHE |
 528                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 529                 .num_cu_shared = 2,
 530         },
 531         {
 532                 /* Scalar L1 Data Cache per SQC */
 533                 .cache_size = 16,
 534                 .cache_level = 1,
 535                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 536                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 537                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 538                 .num_cu_shared = 2,
 539         },
 540         {
 541                 /* GL1 Data Cache per SA */
 542                 .cache_size = 128,
 543                 .cache_level = 1,
 544                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 545                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 546                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 547                 .num_cu_shared = 10,
 548         },
 549         {
 550                 /* L2 Data Cache per GPU (Total Tex Cache) */
 551                 .cache_size = 4096,
 552                 .cache_level = 2,
 553                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 554                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 555                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 556                 .num_cu_shared = 10,
 557         },
 558         {
 559                 /* L3 Data Cache per GPU */
 560                 .cache_size = 128*1024,
 561                 .cache_level = 3,
 562                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 563                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 564                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 565                 .num_cu_shared = 10,
 566         },
 567 };
 568
 569 static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
 570         {
 571                 /* TCP L1 Cache per CU */
 572                 .cache_size = 16,
 573                 .cache_level = 1,
 574                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 575                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 576                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 577                 .num_cu_shared = 1,
 578         },
 579         {
 580                 /* Scalar L1 Instruction Cache per SQC */
 581                 .cache_size = 32,
 582                 .cache_level = 1,
 583                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 584                                 CRAT_CACHE_FLAGS_INST_CACHE |
 585                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 586                 .num_cu_shared = 2,
 587         },
 588         {
 589                 /* Scalar L1 Data Cache per SQC */
 590                 .cache_size = 16,
 591                 .cache_level = 1,
 592                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 593                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 594                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 595                 .num_cu_shared = 2,
 596         },
 597         {
 598                 /* GL1 Data Cache per SA */
 599                 .cache_size = 128,
 600                 .cache_level = 1,
 601                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 602                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 603                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 604                 .num_cu_shared = 10,
 605         },
 606         {
 607                 /* L2 Data Cache per GPU (Total Tex Cache) */
 608                 .cache_size = 3072,
 609                 .cache_level = 2,
 610                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 611                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 612                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 613                 .num_cu_shared = 10,
 614         },
 615         {
 616                 /* L3 Data Cache per GPU */
 617                 .cache_size = 96*1024,
 618                 .cache_level = 3,
 619                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 620                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 621                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 622                 .num_cu_shared = 10,
 623         },
 624 };
 625
 626 static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
 627         {
 628                 /* TCP L1 Cache per CU */
 629                 .cache_size = 16,
 630                 .cache_level = 1,
 631                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 632                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 633                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 634                 .num_cu_shared = 1,
 635         },
 636         {
 637                 /* Scalar L1 Instruction Cache per SQC */
 638                 .cache_size = 32,
 639                 .cache_level = 1,
 640                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 641                                 CRAT_CACHE_FLAGS_INST_CACHE |
 642                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 643                 .num_cu_shared = 2,
 644         },
 645         {
 646                 /* Scalar L1 Data Cache per SQC */
 647                 .cache_size = 16,
 648                 .cache_level = 1,
 649                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 650                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 651                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 652                 .num_cu_shared = 2,
 653         },
 654         {
 655                 /* GL1 Data Cache per SA */
 656                 .cache_size = 128,
 657                 .cache_level = 1,
 658                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 659                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 660                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 661                 .num_cu_shared = 8,
 662         },
 663         {
 664                 /* L2 Data Cache per GPU (Total Tex Cache) */
 665                 .cache_size = 2048,
 666                 .cache_level = 2,
 667                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 668                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 669                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 670                 .num_cu_shared = 8,
 671         },
 672         {
 673                 /* L3 Data Cache per GPU */
 674                 .cache_size = 32*1024,
 675                 .cache_level = 3,
 676                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 677                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 678                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 679                 .num_cu_shared = 8,
 680         },
 681 };
 682
 683 static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
 684         {
 685                 /* TCP L1 Cache per CU */
 686                 .cache_size = 16,
 687                 .cache_level = 1,
 688                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 689                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 690                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 691                 .num_cu_shared = 1,
 692         },
 693         {
 694                 /* Scalar L1 Instruction Cache per SQC */
 695                 .cache_size = 32,
 696                 .cache_level = 1,
 697                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 698                                 CRAT_CACHE_FLAGS_INST_CACHE |
 699                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 700                 .num_cu_shared = 2,
 701         },
 702         {
 703                 /* Scalar L1 Data Cache per SQC */
 704                 .cache_size = 16,
 705                 .cache_level = 1,
 706                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 707                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 708                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 709                 .num_cu_shared = 2,
 710         },
 711         {
 712                 /* GL1 Data Cache per SA */
 713                 .cache_size = 128,
 714                 .cache_level = 1,
 715                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 716                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 717                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 718                 .num_cu_shared = 8,
 719         },
 720         {
 721                 /* L2 Data Cache per GPU (Total Tex Cache) */
 722                 .cache_size = 1024,
 723                 .cache_level = 2,
 724                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 725                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 726                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 727                 .num_cu_shared = 8,
 728         },
 729         {
 730                 /* L3 Data Cache per GPU */
 731                 .cache_size = 16*1024,
 732                 .cache_level = 3,
 733                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 734                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 735                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 736                 .num_cu_shared = 8,
 737         },
 738 };
 739
 740 static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
 741         {
 742                 /* TCP L1 Cache per CU */
 743                 .cache_size = 16,
 744                 .cache_level = 1,
 745                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 746                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 747                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 748                 .num_cu_shared = 1,
 749         },
 750         {
 751                 /* Scalar L1 Instruction Cache per SQC */
 752                 .cache_size = 32,
 753                 .cache_level = 1,
 754                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 755                                 CRAT_CACHE_FLAGS_INST_CACHE |
 756                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 757                 .num_cu_shared = 2,
 758         },
 759         {
 760                 /* Scalar L1 Data Cache per SQC */
 761                 .cache_size = 16,
 762                 .cache_level = 1,
 763                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 764                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 765                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 766                 .num_cu_shared = 2,
 767         },
 768         {
 769                 /* GL1 Data Cache per SA */
 770                 .cache_size = 128,
 771                 .cache_level = 1,
 772                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 773                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 774                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 775                 .num_cu_shared = 6,
 776         },
 777         {
 778                 /* L2 Data Cache per GPU (Total Tex Cache) */
 779                 .cache_size = 2048,
 780                 .cache_level = 2,
 781                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 782                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 783                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 784                 .num_cu_shared = 6,
 785         },
 786 };
 787
 788 static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
 789         {
 790                 /* TCP L1 Cache per CU */
 791                 .cache_size = 16,
 792                 .cache_level = 1,
 793                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 794                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 795                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 796                 .num_cu_shared = 1,
 797         },
 798         {
 799                 /* Scalar L1 Instruction Cache per SQC */
 800                 .cache_size = 32,
 801                 .cache_level = 1,
 802                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 803                                 CRAT_CACHE_FLAGS_INST_CACHE |
 804                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 805                 .num_cu_shared = 2,
 806         },
 807         {
 808                 /* Scalar L1 Data Cache per SQC */
 809                 .cache_size = 16,
 810                 .cache_level = 1,
 811                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 812                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 813                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 814                 .num_cu_shared = 2,
 815         },
 816         {
 817                 /* GL1 Data Cache per SA */
 818                 .cache_size = 128,
 819                 .cache_level = 1,
 820                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 821                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 822                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 823                 .num_cu_shared = 2,
 824         },
 825         {
 826                 /* L2 Data Cache per GPU (Total Tex Cache) */
 827                 .cache_size = 256,
 828                 .cache_level = 2,
 829                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 830                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 831                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 832                 .num_cu_shared = 2,
 833         },
 834 };
 835
 836 static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
 837         {
 838                 /* TCP L1 Cache per CU */
 839                 .cache_size = 16,
 840                 .cache_level = 1,
 841                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 842                           CRAT_CACHE_FLAGS_DATA_CACHE |
 843                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 844                 .num_cu_shared = 1,
 845         },
 846         {
 847                 /* Scalar L1 Instruction Cache per SQC */
 848                 .cache_size = 32,
 849                 .cache_level = 1,
 850                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 851                           CRAT_CACHE_FLAGS_INST_CACHE |
 852                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 853                 .num_cu_shared = 2,
 854         },
 855         {
 856                 /* Scalar L1 Data Cache per SQC */
 857                 .cache_size = 16,
 858                 .cache_level = 1,
 859                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 860                           CRAT_CACHE_FLAGS_DATA_CACHE |
 861                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 862                 .num_cu_shared = 2,
 863         },
 864         {
 865                 /* GL1 Data Cache per SA */
 866                 .cache_size = 128,
 867                 .cache_level = 1,
 868                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 869                           CRAT_CACHE_FLAGS_DATA_CACHE |
 870                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 871                 .num_cu_shared = 2,
 872         },
 873         {
 874                 /* L2 Data Cache per GPU (Total Tex Cache) */
 875                 .cache_size = 256,
 876                 .cache_level = 2,
 877                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 878                           CRAT_CACHE_FLAGS_DATA_CACHE |
 879                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 880                 .num_cu_shared = 2,
 881         },
 882 };
 883
 884 static struct kfd_gpu_cache_info dummy_cache_info[] = {
 885         {
 886                 /* TCP L1 Cache per CU */
 887                 .cache_size = 16,
 888                 .cache_level = 1,
 889                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 890                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 891                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 892                 .num_cu_shared = 1,
 893         },
 894         {
 895                 /* Scalar L1 Instruction Cache per SQC */
 896                 .cache_size = 32,
 897                 .cache_level = 1,
 898                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 899                                 CRAT_CACHE_FLAGS_INST_CACHE |
 900                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 901                 .num_cu_shared = 2,
 902         },
 903         {
 904                 /* Scalar L1 Data Cache per SQC */
 905                 .cache_size = 16,
 906                 .cache_level = 1,
 907                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 908                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 909                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 910                 .num_cu_shared = 2,
 911         },
 912         {
 913                 /* GL1 Data Cache per SA */
 914                 .cache_size = 128,
 915                 .cache_level = 1,
 916                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 917                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 918                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 919                 .num_cu_shared = 6,
 920         },
 921         {
 922                 /* L2 Data Cache per GPU (Total Tex Cache) */
 923                 .cache_size = 2048,
 924                 .cache_level = 2,
 925                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 926                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 927                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 928                 .num_cu_shared = 6,
 929         },
 930 };
 931
 932 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
 933                 struct crat_subtype_computeunit *cu)
 934 {
 935         dev->node_props.cpu_cores_count = cu->num_cpu_cores;
 936         dev->node_props.cpu_core_id_base = cu->processor_id_low;
 937         if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
 938                 dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
 939
 940         pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
 941                         cu->processor_id_low);
 942 }
 943
 944 static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
 945                 struct crat_subtype_computeunit *cu)
 946 {
 947         dev->node_props.simd_id_base = cu->processor_id_low;
 948         dev->node_props.simd_count = cu->num_simd_cores;
 949         dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
 950         dev->node_props.max_waves_per_simd = cu->max_waves_simd;
 951         dev->node_props.wave_front_size = cu->wave_front_size;
 952         dev->node_props.array_count = cu->array_count;
 953         dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
 954         dev->node_props.simd_per_cu = cu->num_simd_per_cu;
 955         dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
 956         if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
 957                 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
 958         pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);
 959 }
 960
 961 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
 962  * topology device present in the device_list
 963  */
 964 static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
 965                                 struct list_head *device_list)
 966 {
 967         struct kfd_topology_device *dev;
 968
 969         pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
 970                         cu->proximity_domain, cu->hsa_capability);
 971         list_for_each_entry(dev, device_list, list) {
 972                 if (cu->proximity_domain == dev->proximity_domain) {
 973                         if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
 974                                 kfd_populated_cu_info_cpu(dev, cu);
 975
 976                         if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
 977                                 kfd_populated_cu_info_gpu(dev, cu);
 978                         break;
 979                 }
 980         }
 981
 982         return 0;
 983 }
 984
 985 static struct kfd_mem_properties *
 986 find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
 987                 struct kfd_topology_device *dev)
 988 {
 989         struct kfd_mem_properties *props;
 990
 991         list_for_each_entry(props, &dev->mem_props, list) {
 992                 if (props->heap_type == heap_type
 993                                 && props->flags == flags
 994                                 && props->width == width)
 995                         return props;
 996         }
 997
 998         return NULL;
 999 }
1000 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1001  * topology device present in the device_list
1002  */
1003 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
1004                                 struct list_head *device_list)
1005 {
1006         struct kfd_mem_properties *props;
1007         struct kfd_topology_device *dev;
1008         uint32_t heap_type;
1009         uint64_t size_in_bytes;
1010         uint32_t flags = 0;
1011         uint32_t width;
1012
1013         pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
1014                         mem->proximity_domain);
1015         list_for_each_entry(dev, device_list, list) {
1016                 if (mem->proximity_domain == dev->proximity_domain) {
1017                         /* We're on GPU node */
1018                         if (dev->node_props.cpu_cores_count == 0) {
1019                                 /* APU */
1020                                 if (mem->visibility_type == 0)
1021                                         heap_type =
1022                                                 HSA_MEM_HEAP_TYPE_FB_PRIVATE;
1023                                 /* dGPU */
1024                                 else
1025                                         heap_type = mem->visibility_type;
1026                         } else
1027                                 heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
1028
1029                         if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
1030                                 flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
1031                         if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
1032                                 flags |= HSA_MEM_FLAGS_NON_VOLATILE;
1033
1034                         size_in_bytes =
1035                                 ((uint64_t)mem->length_high << 32) +
1036                                                         mem->length_low;
1037                         width = mem->width;
1038
1039                         /* Multiple banks of the same type are aggregated into
1040                          * one. User mode doesn't care about multiple physical
1041                          * memory segments. It's managed as a single virtual
1042                          * heap for user mode.
1043                          */
1044                         props = find_subtype_mem(heap_type, flags, width, dev);
1045                         if (props) {
1046                                 props->size_in_bytes += size_in_bytes;
1047                                 break;
1048                         }
1049
1050                         props = kfd_alloc_struct(props);
1051                         if (!props)
1052                                 return -ENOMEM;
1053
1054                         props->heap_type = heap_type;
1055                         props->flags = flags;
1056                         props->size_in_bytes = size_in_bytes;
1057                         props->width = width;
1058
1059                         dev->node_props.mem_banks_count++;
1060                         list_add_tail(&props->list, &dev->mem_props);
1061
1062                         break;
1063                 }
1064         }
1065
1066         return 0;
1067 }
1068
1069 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1070  * topology device present in the device_list
1071  */
1072 static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
1073                         struct list_head *device_list)
1074 {
1075         struct kfd_cache_properties *props;
1076         struct kfd_topology_device *dev;
1077         uint32_t id;
1078         uint32_t total_num_of_cu;
1079
1080         id = cache->processor_id_low;
1081
1082         pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);
1083         list_for_each_entry(dev, device_list, list) {
1084                 total_num_of_cu = (dev->node_props.array_count *
1085                                         dev->node_props.cu_per_simd_array);
1086
1087                 /* Cache infomration in CRAT doesn't have proximity_domain
1088                  * information as it is associated with a CPU core or GPU
1089                  * Compute Unit. So map the cache using CPU core Id or SIMD
1090                  * (GPU) ID.
1091                  * TODO: This works because currently we can safely assume that
1092                  *  Compute Units are parsed before caches are parsed. In
1093                  *  future, remove this dependency
1094                  */
1095                 if ((id >= dev->node_props.cpu_core_id_base &&
1096                         id <= dev->node_props.cpu_core_id_base +
1097                                 dev->node_props.cpu_cores_count) ||
1098                         (id >= dev->node_props.simd_id_base &&
1099                         id < dev->node_props.simd_id_base +
1100                                 total_num_of_cu)) {
1101                         props = kfd_alloc_struct(props);
1102                         if (!props)
1103                                 return -ENOMEM;
1104
1105                         props->processor_id_low = id;
1106                         props->cache_level = cache->cache_level;
1107                         props->cache_size = cache->cache_size;
1108                         props->cacheline_size = cache->cache_line_size;
1109                         props->cachelines_per_tag = cache->lines_per_tag;
1110                         props->cache_assoc = cache->associativity;
1111                         props->cache_latency = cache->cache_latency;
1112
1113                         memcpy(props->sibling_map, cache->sibling_map,
1114                                         CRAT_SIBLINGMAP_SIZE);
1115
1116                         /* set the sibling_map_size as 32 for CRAT from ACPI */
1117                         props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
1118
1119                         if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
1120                                 props->cache_type |= HSA_CACHE_TYPE_DATA;
1121                         if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
1122                                 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
1123                         if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
1124                                 props->cache_type |= HSA_CACHE_TYPE_CPU;
1125                         if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
1126                                 props->cache_type |= HSA_CACHE_TYPE_HSACU;
1127
1128                         dev->node_props.caches_count++;
1129                         list_add_tail(&props->list, &dev->cache_props);
1130
1131                         break;
1132                 }
1133         }
1134
1135         return 0;
1136 }
1137
1138 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1139  * topology device present in the device_list
1140  */
1141 static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
1142                                         struct list_head *device_list)
1143 {
1144         struct kfd_iolink_properties *props = NULL, *props2;
1145         struct kfd_topology_device *dev, *to_dev;
1146         uint32_t id_from;
1147         uint32_t id_to;
1148
1149         id_from = iolink->proximity_domain_from;
1150         id_to = iolink->proximity_domain_to;
1151
1152         pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
1153                         id_from, id_to);
1154         list_for_each_entry(dev, device_list, list) {
1155                 if (id_from == dev->proximity_domain) {
1156                         props = kfd_alloc_struct(props);
1157                         if (!props)
1158                                 return -ENOMEM;
1159
1160                         props->node_from = id_from;
1161                         props->node_to = id_to;
1162                         props->ver_maj = iolink->version_major;
1163                         props->ver_min = iolink->version_minor;
1164                         props->iolink_type = iolink->io_interface_type;
1165
1166                         if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
1167                                 props->weight = 20;
1168                         else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
1169                                 props->weight = iolink->weight_xgmi;
1170                         else
1171                                 props->weight = node_distance(id_from, id_to);
1172
1173                         props->min_latency = iolink->minimum_latency;
1174                         props->max_latency = iolink->maximum_latency;
1175                         props->min_bandwidth = iolink->minimum_bandwidth_mbs;
1176                         props->max_bandwidth = iolink->maximum_bandwidth_mbs;
1177                         props->rec_transfer_size =
1178                                         iolink->recommended_transfer_size;
1179
1180                         dev->node_props.io_links_count++;
1181                         list_add_tail(&props->list, &dev->io_link_props);
1182                         break;
1183                 }
1184         }
1185
1186         /* CPU topology is created before GPUs are detected, so CPU->GPU
1187          * links are not built at that time. If a PCIe type is discovered, it
1188          * means a GPU is detected and we are adding GPU->CPU to the topology.
1189          * At this time, also add the corresponded CPU->GPU link if GPU
1190          * is large bar.
1191          * For xGMI, we only added the link with one direction in the crat
1192          * table, add corresponded reversed direction link now.
1193          */
1194         if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
1195                 to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
1196                 if (!to_dev)
1197                         return -ENODEV;
1198                 /* same everything but the other direction */
1199                 props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
1200                 if (!props2)
1201                         return -ENOMEM;
1202
1203                 props2->node_from = id_to;
1204                 props2->node_to = id_from;
1205                 props2->kobj = NULL;
1206                 to_dev->node_props.io_links_count++;
1207                 list_add_tail(&props2->list, &to_dev->io_link_props);
1208         }
1209
1210         return 0;
1211 }
1212
1213 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1214  * present in the device_list
1215  *      @sub_type_hdr - subtype section of crat_image
1216  *      @device_list - list of topology devices present in this crat_image
1217  */
1218 static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
1219                                 struct list_head *device_list)
1220 {
1221         struct crat_subtype_computeunit *cu;
1222         struct crat_subtype_memory *mem;
1223         struct crat_subtype_cache *cache;
1224         struct crat_subtype_iolink *iolink;
1225         int ret = 0;
1226
1227         switch (sub_type_hdr->type) {
1228         case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
1229                 cu = (struct crat_subtype_computeunit *)sub_type_hdr;
1230                 ret = kfd_parse_subtype_cu(cu, device_list);
1231                 break;
1232         case CRAT_SUBTYPE_MEMORY_AFFINITY:
1233                 mem = (struct crat_subtype_memory *)sub_type_hdr;
1234                 ret = kfd_parse_subtype_mem(mem, device_list);
1235                 break;
1236         case CRAT_SUBTYPE_CACHE_AFFINITY:
1237                 cache = (struct crat_subtype_cache *)sub_type_hdr;
1238                 ret = kfd_parse_subtype_cache(cache, device_list);
1239                 break;
1240         case CRAT_SUBTYPE_TLB_AFFINITY:
1241                 /*
1242                  * For now, nothing to do here
1243                  */
1244                 pr_debug("Found TLB entry in CRAT table (not processing)\n");
1245                 break;
1246         case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
1247                 /*
1248                  * For now, nothing to do here
1249                  */
1250                 pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
1251                 break;
1252         case CRAT_SUBTYPE_IOLINK_AFFINITY:
1253                 iolink = (struct crat_subtype_iolink *)sub_type_hdr;
1254                 ret = kfd_parse_subtype_iolink(iolink, device_list);
1255                 break;
1256         default:
1257                 pr_warn("Unknown subtype %d in CRAT\n",
1258                                 sub_type_hdr->type);
1259         }
1260
1261         return ret;
1262 }
1263
1264 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1265  * create a kfd_topology_device and add in to device_list. Also parse
1266  * CRAT subtypes and attach it to appropriate kfd_topology_device
1267  *      @crat_image - input image containing CRAT
1268  *      @device_list - [OUT] list of kfd_topology_device generated after
1269  *                     parsing crat_image
1270  *      @proximity_domain - Proximity domain of the first device in the table
1271  *
1272  *      Return - 0 if successful else -ve value
1273  */
1274 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
1275                          uint32_t proximity_domain)
1276 {
1277         struct kfd_topology_device *top_dev = NULL;
1278         struct crat_subtype_generic *sub_type_hdr;
1279         uint16_t node_id;
1280         int ret = 0;
1281         struct crat_header *crat_table = (struct crat_header *)crat_image;
1282         uint16_t num_nodes;
1283         uint32_t image_len;
1284
1285         if (!crat_image)
1286                 return -EINVAL;
1287
1288         if (!list_empty(device_list)) {
1289                 pr_warn("Error device list should be empty\n");
1290                 return -EINVAL;
1291         }
1292
1293         num_nodes = crat_table->num_domains;
1294         image_len = crat_table->length;
1295
1296         pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
1297
1298         for (node_id = 0; node_id < num_nodes; node_id++) {
1299                 top_dev = kfd_create_topology_device(device_list);
1300                 if (!top_dev)
1301                         break;
1302                 top_dev->proximity_domain = proximity_domain++;
1303         }
1304
1305         if (!top_dev) {
1306                 ret = -ENOMEM;
1307                 goto err;
1308         }
1309
1310         memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
1311         memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
1312                         CRAT_OEMTABLEID_LENGTH);
1313         top_dev->oem_revision = crat_table->oem_revision;
1314
1315         sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1316         while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
1317                         ((char *)crat_image) + image_len) {
1318                 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
1319                         ret = kfd_parse_subtype(sub_type_hdr, device_list);
1320                         if (ret)
1321                                 break;
1322                 }
1323
1324                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1325                                 sub_type_hdr->length);
1326         }
1327
1328 err:
1329         if (ret)
1330                 kfd_release_topology_device_list(device_list);
1331
1332         return ret;
1333 }
1334
1335
1336 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
1337                                                    struct kfd_gpu_cache_info *pcache_info)
1338 {
1339         struct amdgpu_device *adev = kdev->adev;
1340         int i = 0;
1341
1342         /* TCP L1 Cache per CU */
1343         if (adev->gfx.config.gc_tcp_l1_size) {
1344                 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
1345                 pcache_info[i].cache_level = 1;
1346                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1347                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1348                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1349                 pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
1350                 i++;
1351         }
1352         /* Scalar L1 Instruction Cache per SQC */
1353         if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
1354                 pcache_info[i].cache_size =
1355                         adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
1356                 pcache_info[i].cache_level = 1;
1357                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1358                                         CRAT_CACHE_FLAGS_INST_CACHE |
1359                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1360                 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1361                 i++;
1362         }
1363         /* Scalar L1 Data Cache per SQC */
1364         if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
1365                 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
1366                 pcache_info[i].cache_level = 1;
1367                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1368                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1369                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1370                 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1371                 i++;
1372         }
1373         /* GL1 Data Cache per SA */
1374         if (adev->gfx.config.gc_gl1c_per_sa &&
1375             adev->gfx.config.gc_gl1c_size_per_instance) {
1376                 pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
1377                         adev->gfx.config.gc_gl1c_size_per_instance;
1378                 pcache_info[i].cache_level = 1;
1379                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1380                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1381                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1382                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1383                 i++;
1384         }
1385         /* L2 Data Cache per GPU (Total Tex Cache) */
1386         if (adev->gfx.config.gc_gl2c_per_gpu) {
1387                 pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
1388                 pcache_info[i].cache_level = 2;
1389                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1390                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1391                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1392                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1393                 i++;
1394         }
1395         /* L3 Data Cache per GPU */
1396         if (adev->gmc.mall_size) {
1397                 pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
1398                 pcache_info[i].cache_level = 3;
1399                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1400                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1401                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1402                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1403                 i++;
1404         }
1405         return i;
1406 }
1407
1408 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
1409 {
1410         int num_of_cache_types = 0;
1411
1412         switch (kdev->adev->asic_type) {
1413         case CHIP_KAVERI:
1414                 *pcache_info = kaveri_cache_info;
1415                 num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
1416                 break;
1417         case CHIP_HAWAII:
1418                 *pcache_info = hawaii_cache_info;
1419                 num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
1420                 break;
1421         case CHIP_CARRIZO:
1422                 *pcache_info = carrizo_cache_info;
1423                 num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
1424                 break;
1425         case CHIP_TONGA:
1426                 *pcache_info = tonga_cache_info;
1427                 num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
1428                 break;
1429         case CHIP_FIJI:
1430                 *pcache_info = fiji_cache_info;
1431                 num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
1432                 break;
1433         case CHIP_POLARIS10:
1434                 *pcache_info = polaris10_cache_info;
1435                 num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
1436                 break;
1437         case CHIP_POLARIS11:
1438                 *pcache_info = polaris11_cache_info;
1439                 num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
1440                 break;
1441         case CHIP_POLARIS12:
1442                 *pcache_info = polaris12_cache_info;
1443                 num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
1444                 break;
1445         case CHIP_VEGAM:
1446                 *pcache_info = vegam_cache_info;
1447                 num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
1448                 break;
1449         default:
1450                 switch (KFD_GC_VERSION(kdev)) {
1451                 case IP_VERSION(9, 0, 1):
1452                         *pcache_info = vega10_cache_info;
1453                         num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
1454                         break;
1455                 case IP_VERSION(9, 2, 1):
1456                         *pcache_info = vega12_cache_info;
1457                         num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
1458                         break;
1459                 case IP_VERSION(9, 4, 0):
1460                 case IP_VERSION(9, 4, 1):
1461                         *pcache_info = vega20_cache_info;
1462                         num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
1463                         break;
1464                 case IP_VERSION(9, 4, 2):
1465                 case IP_VERSION(9, 4, 3):
1466                         *pcache_info = aldebaran_cache_info;
1467                         num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
1468                         break;
1469                 case IP_VERSION(9, 1, 0):
1470                 case IP_VERSION(9, 2, 2):
1471                         *pcache_info = raven_cache_info;
1472                         num_of_cache_types = ARRAY_SIZE(raven_cache_info);
1473                         break;
1474                 case IP_VERSION(9, 3, 0):
1475                         *pcache_info = renoir_cache_info;
1476                         num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
1477                         break;
1478                 case IP_VERSION(10, 1, 10):
1479                 case IP_VERSION(10, 1, 2):
1480                 case IP_VERSION(10, 1, 3):
1481                 case IP_VERSION(10, 1, 4):
1482                         *pcache_info = navi10_cache_info;
1483                         num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
1484                         break;
1485                 case IP_VERSION(10, 1, 1):
1486                         *pcache_info = navi14_cache_info;
1487                         num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
1488                         break;
1489                 case IP_VERSION(10, 3, 0):
1490                         *pcache_info = sienna_cichlid_cache_info;
1491                         num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
1492                         break;
1493                 case IP_VERSION(10, 3, 2):
1494                         *pcache_info = navy_flounder_cache_info;
1495                         num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
1496                         break;
1497                 case IP_VERSION(10, 3, 4):
1498                         *pcache_info = dimgrey_cavefish_cache_info;
1499                         num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
1500                         break;
1501                 case IP_VERSION(10, 3, 1):
1502                         *pcache_info = vangogh_cache_info;
1503                         num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
1504                         break;
1505                 case IP_VERSION(10, 3, 5):
1506                         *pcache_info = beige_goby_cache_info;
1507                         num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
1508                         break;
1509                 case IP_VERSION(10, 3, 3):
1510                         *pcache_info = yellow_carp_cache_info;
1511                         num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
1512                         break;
1513                 case IP_VERSION(10, 3, 6):
1514                         *pcache_info = gc_10_3_6_cache_info;
1515                         num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
1516                         break;
1517                 case IP_VERSION(10, 3, 7):
1518                         *pcache_info = gfx1037_cache_info;
1519                         num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
1520                         break;
1521                 case IP_VERSION(11, 0, 0):
1522                 case IP_VERSION(11, 0, 1):
1523                 case IP_VERSION(11, 0, 2):
1524                 case IP_VERSION(11, 0, 3):
1525                 case IP_VERSION(11, 0, 4):
1526                         num_of_cache_types =
1527                                 kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
1528                         break;
1529                 default:
1530                         *pcache_info = dummy_cache_info;
1531                         num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
1532                         pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
1533                         break;
1534                 }
1535         }
1536         return num_of_cache_types;
1537 }
1538
1539 static bool kfd_ignore_crat(void)
1540 {
1541         bool ret;
1542
1543         if (ignore_crat)
1544                 return true;
1545
1546         ret = true;
1547
1548         return ret;
1549 }
1550
1551 /*
1552  * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
1553  * copies CRAT from ACPI (if available).
1554  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
1555  *
1556  *      @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
1557  *                   crat_image will be NULL
1558  *      @size: [OUT] size of crat_image
1559  *
1560  *      Return 0 if successful else return error code
1561  */
1562 int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
1563 {
1564         struct acpi_table_header *crat_table;
1565         acpi_status status;
1566         void *pcrat_image;
1567         int rc = 0;
1568
1569         if (!crat_image)
1570                 return -EINVAL;
1571
1572         *crat_image = NULL;
1573
1574         if (kfd_ignore_crat()) {
1575                 pr_info("CRAT table disabled by module option\n");
1576                 return -ENODATA;
1577         }
1578
1579         /* Fetch the CRAT table from ACPI */
1580         status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
1581         if (status == AE_NOT_FOUND) {
1582                 pr_info("CRAT table not found\n");
1583                 return -ENODATA;
1584         } else if (ACPI_FAILURE(status)) {
1585                 const char *err = acpi_format_exception(status);
1586
1587                 pr_err("CRAT table error: %s\n", err);
1588                 return -EINVAL;
1589         }
1590
1591         pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
1592         if (!pcrat_image) {
1593                 rc = -ENOMEM;
1594                 goto out;
1595         }
1596
1597         memcpy(pcrat_image, crat_table, crat_table->length);
1598         *crat_image = pcrat_image;
1599         *size = crat_table->length;
1600 out:
1601         acpi_put_table(crat_table);
1602         return rc;
1603 }
1604
1605 /* Memory required to create Virtual CRAT.
1606  * Since there is no easy way to predict the amount of memory required, the
1607  * following amount is allocated for GPU Virtual CRAT. This is
1608  * expected to cover all known conditions. But to be safe additional check
1609  * is put in the code to ensure we don't overwrite.
1610  */
1611 #define VCRAT_SIZE_FOR_GPU      (4 * PAGE_SIZE)
1612
1613 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1614  *
1615  *      @numa_node_id: CPU NUMA node id
1616  *      @avail_size: Available size in the memory
1617  *      @sub_type_hdr: Memory into which compute info will be filled in
1618  *
1619  *      Return 0 if successful else return -ve value
1620  */
1621 static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
1622                                 int proximity_domain,
1623                                 struct crat_subtype_computeunit *sub_type_hdr)
1624 {
1625         const struct cpumask *cpumask;
1626
1627         *avail_size -= sizeof(struct crat_subtype_computeunit);
1628         if (*avail_size < 0)
1629                 return -ENOMEM;
1630
1631         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
1632
1633         /* Fill in subtype header data */
1634         sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
1635         sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
1636         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1637
1638         cpumask = cpumask_of_node(numa_node_id);
1639
1640         /* Fill in CU data */
1641         sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
1642         sub_type_hdr->proximity_domain = proximity_domain;
1643         sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
1644         if (sub_type_hdr->processor_id_low == -1)
1645                 return -EINVAL;
1646
1647         sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
1648
1649         return 0;
1650 }
1651
1652 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1653  *
1654  *      @numa_node_id: CPU NUMA node id
1655  *      @avail_size: Available size in the memory
1656  *      @sub_type_hdr: Memory into which compute info will be filled in
1657  *
1658  *      Return 0 if successful else return -ve value
1659  */
1660 static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
1661                         int proximity_domain,
1662                         struct crat_subtype_memory *sub_type_hdr)
1663 {
1664         uint64_t mem_in_bytes = 0;
1665         pg_data_t *pgdat;
1666         int zone_type;
1667
1668         *avail_size -= sizeof(struct crat_subtype_memory);
1669         if (*avail_size < 0)
1670                 return -ENOMEM;
1671
1672         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1673
1674         /* Fill in subtype header data */
1675         sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1676         sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1677         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1678
1679         /* Fill in Memory Subunit data */
1680
1681         /* Unlike si_meminfo, si_meminfo_node is not exported. So
1682          * the following lines are duplicated from si_meminfo_node
1683          * function
1684          */
1685         pgdat = NODE_DATA(numa_node_id);
1686         for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
1687                 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
1688         mem_in_bytes <<= PAGE_SHIFT;
1689
1690         sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
1691         sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
1692         sub_type_hdr->proximity_domain = proximity_domain;
1693
1694         return 0;
1695 }
1696
1697 #ifdef CONFIG_X86_64
1698 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
1699                                 uint32_t *num_entries,
1700                                 struct crat_subtype_iolink *sub_type_hdr)
1701 {
1702         int nid;
1703         struct cpuinfo_x86 *c = &cpu_data(0);
1704         uint8_t link_type;
1705
1706         if (c->x86_vendor == X86_VENDOR_AMD)
1707                 link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
1708         else
1709                 link_type = CRAT_IOLINK_TYPE_QPI_1_1;
1710
1711         *num_entries = 0;
1712
1713         /* Create IO links from this node to other CPU nodes */
1714         for_each_online_node(nid) {
1715                 if (nid == numa_node_id) /* node itself */
1716                         continue;
1717
1718                 *avail_size -= sizeof(struct crat_subtype_iolink);
1719                 if (*avail_size < 0)
1720                         return -ENOMEM;
1721
1722                 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1723
1724                 /* Fill in subtype header data */
1725                 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
1726                 sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
1727                 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1728
1729                 /* Fill in IO link data */
1730                 sub_type_hdr->proximity_domain_from = numa_node_id;
1731                 sub_type_hdr->proximity_domain_to = nid;
1732                 sub_type_hdr->io_interface_type = link_type;
1733
1734                 (*num_entries)++;
1735                 sub_type_hdr++;
1736         }
1737
1738         return 0;
1739 }
1740 #endif
1741
1742 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1743  *
1744  *      @pcrat_image: Fill in VCRAT for CPU
1745  *      @size:  [IN] allocated size of crat_image.
1746  *              [OUT] actual size of data filled in crat_image
1747  */
1748 static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
1749 {
1750         struct crat_header *crat_table = (struct crat_header *)pcrat_image;
1751         struct acpi_table_header *acpi_table;
1752         acpi_status status;
1753         struct crat_subtype_generic *sub_type_hdr;
1754         int avail_size = *size;
1755         int numa_node_id;
1756 #ifdef CONFIG_X86_64
1757         uint32_t entries = 0;
1758 #endif
1759         int ret = 0;
1760
1761         if (!pcrat_image)
1762                 return -EINVAL;
1763
1764         /* Fill in CRAT Header.
1765          * Modify length and total_entries as subunits are added.
1766          */
1767         avail_size -= sizeof(struct crat_header);
1768         if (avail_size < 0)
1769                 return -ENOMEM;
1770
1771         memset(crat_table, 0, sizeof(struct crat_header));
1772         memcpy(&crat_table->signature, CRAT_SIGNATURE,
1773                         sizeof(crat_table->signature));
1774         crat_table->length = sizeof(struct crat_header);
1775
1776         status = acpi_get_table("DSDT", 0, &acpi_table);
1777         if (status != AE_OK)
1778                 pr_warn("DSDT table not found for OEM information\n");
1779         else {
1780                 crat_table->oem_revision = acpi_table->revision;
1781                 memcpy(crat_table->oem_id, acpi_table->oem_id,
1782                                 CRAT_OEMID_LENGTH);
1783                 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
1784                                 CRAT_OEMTABLEID_LENGTH);
1785                 acpi_put_table(acpi_table);
1786         }
1787         crat_table->total_entries = 0;
1788         crat_table->num_domains = 0;
1789
1790         sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1791
1792         for_each_online_node(numa_node_id) {
1793                 if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
1794                         continue;
1795
1796                 /* Fill in Subtype: Compute Unit */
1797                 ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
1798                         crat_table->num_domains,
1799                         (struct crat_subtype_computeunit *)sub_type_hdr);
1800                 if (ret < 0)
1801                         return ret;
1802                 crat_table->length += sub_type_hdr->length;
1803                 crat_table->total_entries++;
1804
1805                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1806                         sub_type_hdr->length);
1807
1808                 /* Fill in Subtype: Memory */
1809                 ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
1810                         crat_table->num_domains,
1811                         (struct crat_subtype_memory *)sub_type_hdr);
1812                 if (ret < 0)
1813                         return ret;
1814                 crat_table->length += sub_type_hdr->length;
1815                 crat_table->total_entries++;
1816
1817                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1818                         sub_type_hdr->length);
1819
1820                 /* Fill in Subtype: IO Link */
1821 #ifdef CONFIG_X86_64
1822                 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
1823                                 &entries,
1824                                 (struct crat_subtype_iolink *)sub_type_hdr);
1825                 if (ret < 0)
1826                         return ret;
1827
1828                 if (entries) {
1829                         crat_table->length += (sub_type_hdr->length * entries);
1830                         crat_table->total_entries += entries;
1831
1832                         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1833                                         sub_type_hdr->length * entries);
1834                 }
1835 #else
1836                 pr_info("IO link not available for non x86 platforms\n");
1837 #endif
1838
1839                 crat_table->num_domains++;
1840         }
1841
1842         /* TODO: Add cache Subtype for CPU.
1843          * Currently, CPU cache information is available in function
1844          * detect_cache_attributes(cpu) defined in the file
1845          * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
1846          * exported and to get the same information the code needs to be
1847          * duplicated.
1848          */
1849
1850         *size = crat_table->length;
1851         pr_info("Virtual CRAT table created for CPU\n");
1852
1853         return 0;
1854 }
1855
1856 static int kfd_fill_gpu_memory_affinity(int *avail_size,
1857                 struct kfd_node *kdev, uint8_t type, uint64_t size,
1858                 struct crat_subtype_memory *sub_type_hdr,
1859                 uint32_t proximity_domain,
1860                 const struct kfd_local_mem_info *local_mem_info)
1861 {
1862         *avail_size -= sizeof(struct crat_subtype_memory);
1863         if (*avail_size < 0)
1864                 return -ENOMEM;
1865
1866         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1867         sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1868         sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1869         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
1870
1871         sub_type_hdr->proximity_domain = proximity_domain;
1872
1873         pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",
1874                         type, size);
1875
1876         sub_type_hdr->length_low = lower_32_bits(size);
1877         sub_type_hdr->length_high = upper_32_bits(size);
1878
1879         sub_type_hdr->width = local_mem_info->vram_width;
1880         sub_type_hdr->visibility_type = type;
1881
1882         return 0;
1883 }
1884
1885 #ifdef CONFIG_ACPI_NUMA
1886 static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
1887 {
1888         struct acpi_table_header *table_header = NULL;
1889         struct acpi_subtable_header *sub_header = NULL;
1890         unsigned long table_end, subtable_len;
1891         u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
1892                         pci_dev_id(kdev->adev->pdev);
1893         u32 bdf;
1894         acpi_status status;
1895         struct acpi_srat_cpu_affinity *cpu;
1896         struct acpi_srat_generic_affinity *gpu;
1897         int pxm = 0, max_pxm = 0;
1898         int numa_node = NUMA_NO_NODE;
1899         bool found = false;
1900
1901         /* Fetch the SRAT table from ACPI */
1902         status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
1903         if (status == AE_NOT_FOUND) {
1904                 pr_warn("SRAT table not found\n");
1905                 return;
1906         } else if (ACPI_FAILURE(status)) {
1907                 const char *err = acpi_format_exception(status);
1908                 pr_err("SRAT table error: %s\n", err);
1909                 return;
1910         }
1911
1912         table_end = (unsigned long)table_header + table_header->length;
1913
1914         /* Parse all entries looking for a match. */
1915         sub_header = (struct acpi_subtable_header *)
1916                         ((unsigned long)table_header +
1917                         sizeof(struct acpi_table_srat));
1918         subtable_len = sub_header->length;
1919
1920         while (((unsigned long)sub_header) + subtable_len  < table_end) {
1921                 /*
1922                  * If length is 0, break from this loop to avoid
1923                  * infinite loop.
1924                  */
1925                 if (subtable_len == 0) {
1926                         pr_err("SRAT invalid zero length\n");
1927                         break;
1928                 }
1929
1930                 switch (sub_header->type) {
1931                 case ACPI_SRAT_TYPE_CPU_AFFINITY:
1932                         cpu = (struct acpi_srat_cpu_affinity *)sub_header;
1933                         pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
1934                                         cpu->proximity_domain_lo;
1935                         if (pxm > max_pxm)
1936                                 max_pxm = pxm;
1937                         break;
1938                 case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
1939                         gpu = (struct acpi_srat_generic_affinity *)sub_header;
1940                         bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
1941                                         *((u16 *)(&gpu->device_handle[2]));
1942                         if (bdf == pci_id) {
1943                                 found = true;
1944                                 numa_node = pxm_to_node(gpu->proximity_domain);
1945                         }
1946                         break;
1947                 default:
1948                         break;
1949                 }
1950
1951                 if (found)
1952                         break;
1953
1954                 sub_header = (struct acpi_subtable_header *)
1955                                 ((unsigned long)sub_header + subtable_len);
1956                 subtable_len = sub_header->length;
1957         }
1958
1959         acpi_put_table(table_header);
1960
1961         /* Workaround bad cpu-gpu binding case */
1962         if (found && (numa_node < 0 ||
1963                         numa_node > pxm_to_node(max_pxm)))
1964                 numa_node = 0;
1965
1966         if (numa_node != NUMA_NO_NODE)
1967                 set_dev_node(&kdev->adev->pdev->dev, numa_node);
1968 }
1969 #endif
1970
1971 #define KFD_CRAT_INTRA_SOCKET_WEIGHT    13
1972 #define KFD_CRAT_XGMI_WEIGHT            15
1973
1974 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
1975  * to its NUMA node
1976  *      @avail_size: Available size in the memory
1977  *      @kdev - [IN] GPU device
1978  *      @sub_type_hdr: Memory into which io link info will be filled in
1979  *      @proximity_domain - proximity domain of the GPU node
1980  *
1981  *      Return 0 if successful else return -ve value
1982  */
1983 static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
1984                         struct kfd_node *kdev,
1985                         struct crat_subtype_iolink *sub_type_hdr,
1986                         uint32_t proximity_domain)
1987 {
1988         *avail_size -= sizeof(struct crat_subtype_iolink);
1989         if (*avail_size < 0)
1990                 return -ENOMEM;
1991
1992         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1993
1994         /* Fill in subtype header data */
1995         sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
1996         sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
1997         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
1998         if (kfd_dev_is_large_bar(kdev))
1999                 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2000
2001         /* Fill in IOLINK subtype.
2002          * TODO: Fill-in other fields of iolink subtype
2003          */
2004         if (kdev->adev->gmc.xgmi.connected_to_cpu ||
2005             (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
2006              kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
2007              AMDGPU_PKG_TYPE_APU)) {
2008                 bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
2009                 int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
2010                                                         KFD_CRAT_INTRA_SOCKET_WEIGHT;
2011                 uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
2012                                                         kdev->adev, NULL, true) : mem_bw;
2013
2014                 /*
2015                  * with host gpu xgmi link, host can access gpu memory whether
2016                  * or not pcie bar type is large, so always create bidirectional
2017                  * io link.
2018                  */
2019                 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2020                 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2021                 sub_type_hdr->weight_xgmi = weight;
2022                 sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
2023                 sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
2024         } else {
2025                 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
2026                 sub_type_hdr->minimum_bandwidth_mbs =
2027                                 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
2028                 sub_type_hdr->maximum_bandwidth_mbs =
2029                                 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
2030         }
2031
2032         sub_type_hdr->proximity_domain_from = proximity_domain;
2033
2034 #ifdef CONFIG_ACPI_NUMA
2035         if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&
2036             num_possible_nodes() > 1)
2037                 kfd_find_numa_node_in_srat(kdev);
2038 #endif
2039 #ifdef CONFIG_NUMA
2040         if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
2041                 sub_type_hdr->proximity_domain_to = 0;
2042         else
2043                 sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;
2044 #else
2045         sub_type_hdr->proximity_domain_to = 0;
2046 #endif
2047         return 0;
2048 }
2049
2050 static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
2051                         struct kfd_node *kdev,
2052                         struct kfd_node *peer_kdev,
2053                         struct crat_subtype_iolink *sub_type_hdr,
2054                         uint32_t proximity_domain_from,
2055                         uint32_t proximity_domain_to)
2056 {
2057         bool use_ta_info = kdev->kfd->num_nodes == 1;
2058
2059         *avail_size -= sizeof(struct crat_subtype_iolink);
2060         if (*avail_size < 0)
2061                 return -ENOMEM;
2062
2063         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
2064
2065         sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
2066         sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
2067         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
2068                                CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2069
2070         sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2071         sub_type_hdr->proximity_domain_from = proximity_domain_from;
2072         sub_type_hdr->proximity_domain_to = proximity_domain_to;
2073
2074         if (use_ta_info) {
2075                 sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
2076                         amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
2077                 sub_type_hdr->maximum_bandwidth_mbs =
2078                         amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
2079                                                         peer_kdev->adev, false);
2080                 sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
2081                         amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
2082         } else {
2083                 bool is_single_hop = kdev->kfd == peer_kdev->kfd;
2084                 int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
2085                         (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
2086                 int mem_bw = 819200;
2087
2088                 sub_type_hdr->weight_xgmi = weight;
2089                 sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2090                 sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2091         }
2092
2093         return 0;
2094 }
2095
2096 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2097  *
2098  *      @pcrat_image: Fill in VCRAT for GPU
2099  *      @size:  [IN] allocated size of crat_image.
2100  *              [OUT] actual size of data filled in crat_image
2101  */
2102 static int kfd_create_vcrat_image_gpu(void *pcrat_image,
2103                                       size_t *size, struct kfd_node *kdev,
2104                                       uint32_t proximity_domain)
2105 {
2106         struct crat_header *crat_table = (struct crat_header *)pcrat_image;
2107         struct crat_subtype_generic *sub_type_hdr;
2108         struct kfd_local_mem_info local_mem_info;
2109         struct kfd_topology_device *peer_dev;
2110         struct crat_subtype_computeunit *cu;
2111         struct kfd_cu_info cu_info;
2112         int avail_size = *size;
2113         uint32_t total_num_of_cu;
2114         uint32_t nid = 0;
2115         int ret = 0;
2116
2117         if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
2118                 return -EINVAL;
2119
2120         /* Fill the CRAT Header.
2121          * Modify length and total_entries as subunits are added.
2122          */
2123         avail_size -= sizeof(struct crat_header);
2124         if (avail_size < 0)
2125                 return -ENOMEM;
2126
2127         memset(crat_table, 0, sizeof(struct crat_header));
2128
2129         memcpy(&crat_table->signature, CRAT_SIGNATURE,
2130                         sizeof(crat_table->signature));
2131         /* Change length as we add more subtypes*/
2132         crat_table->length = sizeof(struct crat_header);
2133         crat_table->num_domains = 1;
2134         crat_table->total_entries = 0;
2135
2136         /* Fill in Subtype: Compute Unit
2137          * First fill in the sub type header and then sub type data
2138          */
2139         avail_size -= sizeof(struct crat_subtype_computeunit);
2140         if (avail_size < 0)
2141                 return -ENOMEM;
2142
2143         sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
2144         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
2145
2146         sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
2147         sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
2148         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
2149
2150         /* Fill CU subtype data */
2151         cu = (struct crat_subtype_computeunit *)sub_type_hdr;
2152         cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
2153         cu->proximity_domain = proximity_domain;
2154
2155         amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
2156         cu->num_simd_per_cu = cu_info.simd_per_cu;
2157         cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
2158         cu->max_waves_simd = cu_info.max_waves_per_simd;
2159
2160         cu->wave_front_size = cu_info.wave_front_size;
2161         cu->array_count = cu_info.num_shader_arrays_per_engine *
2162                 cu_info.num_shader_engines;
2163         total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
2164         cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
2165         cu->num_cu_per_array = cu_info.num_cu_per_sh;
2166         cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
2167         cu->num_banks = cu_info.num_shader_engines;
2168         cu->lds_size_in_kb = cu_info.lds_size;
2169
2170         cu->hsa_capability = 0;
2171
2172         /* Check if this node supports IOMMU. During parsing this flag will
2173          * translate to HSA_CAP_ATS_PRESENT
2174          */
2175         if (!kfd_iommu_check_device(kdev->kfd))
2176                 cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
2177
2178         crat_table->length += sub_type_hdr->length;
2179         crat_table->total_entries++;
2180
2181         /* Fill in Subtype: Memory. Only on systems with large BAR (no
2182          * private FB), report memory as public. On other systems
2183          * report the total FB size (public+private) as a single
2184          * private heap.
2185          */
2186         local_mem_info = kdev->local_mem_info;
2187         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2188                         sub_type_hdr->length);
2189
2190         if (debug_largebar)
2191                 local_mem_info.local_mem_size_private = 0;
2192
2193         if (local_mem_info.local_mem_size_private == 0)
2194                 ret = kfd_fill_gpu_memory_affinity(&avail_size,
2195                                 kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
2196                                 local_mem_info.local_mem_size_public,
2197                                 (struct crat_subtype_memory *)sub_type_hdr,
2198                                 proximity_domain,
2199                                 &local_mem_info);
2200         else
2201                 ret = kfd_fill_gpu_memory_affinity(&avail_size,
2202                                 kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE,
2203                                 local_mem_info.local_mem_size_public +
2204                                 local_mem_info.local_mem_size_private,
2205                                 (struct crat_subtype_memory *)sub_type_hdr,
2206                                 proximity_domain,
2207                                 &local_mem_info);
2208         if (ret < 0)
2209                 return ret;
2210
2211         crat_table->length += sizeof(struct crat_subtype_memory);
2212         crat_table->total_entries++;
2213
2214         /* Fill in Subtype: IO_LINKS
2215          *  Only direct links are added here which is Link from GPU to
2216          *  its NUMA node. Indirect links are added by userspace.
2217          */
2218         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2219                 sub_type_hdr->length);
2220         ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
2221                 (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
2222
2223         if (ret < 0)
2224                 return ret;
2225
2226         crat_table->length += sub_type_hdr->length;
2227         crat_table->total_entries++;
2228
2229
2230         /* Fill in Subtype: IO_LINKS
2231          * Direct links from GPU to other GPUs through xGMI.
2232          * We will loop GPUs that already be processed (with lower value
2233          * of proximity_domain), add the link for the GPUs with same
2234          * hive id (from this GPU to other GPU) . The reversed iolink
2235          * (from other GPU to this GPU) will be added
2236          * in kfd_parse_subtype_iolink.
2237          */
2238         if (kdev->kfd->hive_id) {
2239                 for (nid = 0; nid < proximity_domain; ++nid) {
2240                         peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
2241                         if (!peer_dev->gpu)
2242                                 continue;
2243                         if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
2244                                 continue;
2245                         sub_type_hdr = (typeof(sub_type_hdr))(
2246                                 (char *)sub_type_hdr +
2247                                 sizeof(struct crat_subtype_iolink));
2248                         ret = kfd_fill_gpu_xgmi_link_to_gpu(
2249                                 &avail_size, kdev, peer_dev->gpu,
2250                                 (struct crat_subtype_iolink *)sub_type_hdr,
2251                                 proximity_domain, nid);
2252                         if (ret < 0)
2253                                 return ret;
2254                         crat_table->length += sub_type_hdr->length;
2255                         crat_table->total_entries++;
2256                 }
2257         }
2258         *size = crat_table->length;
2259         pr_info("Virtual CRAT table created for GPU\n");
2260
2261         return ret;
2262 }
2263
2264 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2265  *              creates a Virtual CRAT (VCRAT) image
2266  *
2267  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
2268  *
2269  *      @crat_image: VCRAT image created because ACPI does not have a
2270  *                   CRAT for this device
2271  *      @size: [OUT] size of virtual crat_image
2272  *      @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2273  *              COMPUTE_UNIT_GPU - Create VCRAT for GPU
2274  *              (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2275  *                      -- this option is not currently implemented.
2276  *                      The assumption is that all AMD APUs will have CRAT
2277  *      @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
2278  *
2279  *      Return 0 if successful else return -ve value
2280  */
2281 int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
2282                                   int flags, struct kfd_node *kdev,
2283                                   uint32_t proximity_domain)
2284 {
2285         void *pcrat_image = NULL;
2286         int ret = 0, num_nodes;
2287         size_t dyn_size;
2288
2289         if (!crat_image)
2290                 return -EINVAL;
2291
2292         *crat_image = NULL;
2293
2294         /* Allocate the CPU Virtual CRAT size based on the number of online
2295          * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.
2296          * This should cover all the current conditions. A check is put not
2297          * to overwrite beyond allocated size for GPUs
2298          */
2299         switch (flags) {
2300         case COMPUTE_UNIT_CPU:
2301                 num_nodes = num_online_nodes();
2302                 dyn_size = sizeof(struct crat_header) +
2303                         num_nodes * (sizeof(struct crat_subtype_computeunit) +
2304                         sizeof(struct crat_subtype_memory) +
2305                         (num_nodes - 1) * sizeof(struct crat_subtype_iolink));
2306                 pcrat_image = kvmalloc(dyn_size, GFP_KERNEL);
2307                 if (!pcrat_image)
2308                         return -ENOMEM;
2309                 *size = dyn_size;
2310                 pr_debug("CRAT size is %ld", dyn_size);
2311                 ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
2312                 break;
2313         case COMPUTE_UNIT_GPU:
2314                 if (!kdev)
2315                         return -EINVAL;
2316                 pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
2317                 if (!pcrat_image)
2318                         return -ENOMEM;
2319                 *size = VCRAT_SIZE_FOR_GPU;
2320                 ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev,
2321                                                  proximity_domain);
2322                 break;
2323         case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
2324                 /* TODO: */
2325                 ret = -EINVAL;
2326                 pr_err("VCRAT not implemented for APU\n");
2327                 break;
2328         default:
2329                 ret = -EINVAL;
2330         }
2331
2332         if (!ret)
2333                 *crat_image = pcrat_image;
2334         else
2335                 kvfree(pcrat_image);
2336
2337         return ret;
2338 }
2339
2340
2341 /* kfd_destroy_crat_image
2342  *
2343  *      @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
2344  *
2345  */
2346 void kfd_destroy_crat_image(void *crat_image)
2347 {
2348         kvfree(crat_image);
2349 }