drivers/gpu/drm/amd/amdkfd/kfd_crat.c

   1 // SPDX-License-Identifier: GPL-2.0 OR MIT
   2 /*
   3  * Copyright 2015-2022 Advanced Micro Devices, Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21  * OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include <linux/pci.h>
  25 #include <linux/acpi.h>
  26 #include "kfd_crat.h"
  27 #include "kfd_priv.h"
  28 #include "kfd_topology.h"
  29 #include "kfd_iommu.h"
  30 #include "amdgpu.h"
  31 #include "amdgpu_amdkfd.h"
  32
  33 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
  34  * GPU processor ID are expressed with Bit[31]=1.
  35  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
  36  * used in the CRAT.
  37  */
  38 static uint32_t gpu_processor_id_low = 0x80001000;
  39
  40 /* Return the next available gpu_processor_id and increment it for next GPU
  41  *      @total_cu_count - Total CUs present in the GPU including ones
  42  *                        masked off
  43  */
  44 static inline unsigned int get_and_inc_gpu_processor_id(
  45                                 unsigned int total_cu_count)
  46 {
  47         int current_id = gpu_processor_id_low;
  48
  49         gpu_processor_id_low += total_cu_count;
  50         return current_id;
  51 }
  52
  53
  54 static struct kfd_gpu_cache_info kaveri_cache_info[] = {
  55         {
  56                 /* TCP L1 Cache per CU */
  57                 .cache_size = 16,
  58                 .cache_level = 1,
  59                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
  60                                 CRAT_CACHE_FLAGS_DATA_CACHE |
  61                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
  62                 .num_cu_shared = 1,
  63         },
  64         {
  65                 /* Scalar L1 Instruction Cache (in SQC module) per bank */
  66                 .cache_size = 16,
  67                 .cache_level = 1,
  68                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
  69                                 CRAT_CACHE_FLAGS_INST_CACHE |
  70                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
  71                 .num_cu_shared = 2,
  72         },
  73         {
  74                 /* Scalar L1 Data Cache (in SQC module) per bank */
  75                 .cache_size = 8,
  76                 .cache_level = 1,
  77                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
  78                                 CRAT_CACHE_FLAGS_DATA_CACHE |
  79                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
  80                 .num_cu_shared = 2,
  81         },
  82
  83         /* TODO: Add L2 Cache information */
  84 };
  85
  86
  87 static struct kfd_gpu_cache_info carrizo_cache_info[] = {
  88         {
  89                 /* TCP L1 Cache per CU */
  90                 .cache_size = 16,
  91                 .cache_level = 1,
  92                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
  93                                 CRAT_CACHE_FLAGS_DATA_CACHE |
  94                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
  95                 .num_cu_shared = 1,
  96         },
  97         {
  98                 /* Scalar L1 Instruction Cache (in SQC module) per bank */
  99                 .cache_size = 8,
 100                 .cache_level = 1,
 101                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 102                                 CRAT_CACHE_FLAGS_INST_CACHE |
 103                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 104                 .num_cu_shared = 4,
 105         },
 106         {
 107                 /* Scalar L1 Data Cache (in SQC module) per bank. */
 108                 .cache_size = 4,
 109                 .cache_level = 1,
 110                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 111                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 112                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 113                 .num_cu_shared = 4,
 114         },
 115
 116         /* TODO: Add L2 Cache information */
 117 };
 118
 119 #define hawaii_cache_info kaveri_cache_info
 120 #define tonga_cache_info carrizo_cache_info
 121 #define fiji_cache_info  carrizo_cache_info
 122 #define polaris10_cache_info carrizo_cache_info
 123 #define polaris11_cache_info carrizo_cache_info
 124 #define polaris12_cache_info carrizo_cache_info
 125 #define vegam_cache_info carrizo_cache_info
 126
 127 /* NOTE: L1 cache information has been updated and L2/L3
 128  * cache information has been added for Vega10 and
 129  * newer ASICs. The unit for cache_size is KiB.
 130  * In future,  check & update cache details
 131  * for every new ASIC is required.
 132  */
 133
 134 static struct kfd_gpu_cache_info vega10_cache_info[] = {
 135         {
 136                 /* TCP L1 Cache per CU */
 137                 .cache_size = 16,
 138                 .cache_level = 1,
 139                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 140                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 141                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 142                 .num_cu_shared = 1,
 143         },
 144         {
 145                 /* Scalar L1 Instruction Cache per SQC */
 146                 .cache_size = 32,
 147                 .cache_level = 1,
 148                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 149                                 CRAT_CACHE_FLAGS_INST_CACHE |
 150                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 151                 .num_cu_shared = 3,
 152         },
 153         {
 154                 /* Scalar L1 Data Cache per SQC */
 155                 .cache_size = 16,
 156                 .cache_level = 1,
 157                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 158                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 159                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 160                 .num_cu_shared = 3,
 161         },
 162         {
 163                 /* L2 Data Cache per GPU (Total Tex Cache) */
 164                 .cache_size = 4096,
 165                 .cache_level = 2,
 166                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 167                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 168                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 169                 .num_cu_shared = 16,
 170         },
 171 };
 172
 173 static struct kfd_gpu_cache_info raven_cache_info[] = {
 174         {
 175                 /* TCP L1 Cache per CU */
 176                 .cache_size = 16,
 177                 .cache_level = 1,
 178                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 179                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 180                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 181                 .num_cu_shared = 1,
 182         },
 183         {
 184                 /* Scalar L1 Instruction Cache per SQC */
 185                 .cache_size = 32,
 186                 .cache_level = 1,
 187                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 188                                 CRAT_CACHE_FLAGS_INST_CACHE |
 189                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 190                 .num_cu_shared = 3,
 191         },
 192         {
 193                 /* Scalar L1 Data Cache per SQC */
 194                 .cache_size = 16,
 195                 .cache_level = 1,
 196                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 197                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 198                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 199                 .num_cu_shared = 3,
 200         },
 201         {
 202                 /* L2 Data Cache per GPU (Total Tex Cache) */
 203                 .cache_size = 1024,
 204                 .cache_level = 2,
 205                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 206                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 207                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 208                 .num_cu_shared = 11,
 209         },
 210 };
 211
 212 static struct kfd_gpu_cache_info renoir_cache_info[] = {
 213         {
 214                 /* TCP L1 Cache per CU */
 215                 .cache_size = 16,
 216                 .cache_level = 1,
 217                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 218                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 219                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 220                 .num_cu_shared = 1,
 221         },
 222         {
 223                 /* Scalar L1 Instruction Cache per SQC */
 224                 .cache_size = 32,
 225                 .cache_level = 1,
 226                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 227                                 CRAT_CACHE_FLAGS_INST_CACHE |
 228                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 229                 .num_cu_shared = 3,
 230         },
 231         {
 232                 /* Scalar L1 Data Cache per SQC */
 233                 .cache_size = 16,
 234                 .cache_level = 1,
 235                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 236                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 237                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 238                 .num_cu_shared = 3,
 239         },
 240         {
 241                 /* L2 Data Cache per GPU (Total Tex Cache) */
 242                 .cache_size = 1024,
 243                 .cache_level = 2,
 244                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 245                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 246                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 247                 .num_cu_shared = 8,
 248         },
 249 };
 250
 251 static struct kfd_gpu_cache_info vega12_cache_info[] = {
 252         {
 253                 /* TCP L1 Cache per CU */
 254                 .cache_size = 16,
 255                 .cache_level = 1,
 256                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 257                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 258                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 259                 .num_cu_shared = 1,
 260         },
 261         {
 262                 /* Scalar L1 Instruction Cache per SQC */
 263                 .cache_size = 32,
 264                 .cache_level = 1,
 265                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 266                                 CRAT_CACHE_FLAGS_INST_CACHE |
 267                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 268                 .num_cu_shared = 3,
 269         },
 270         {
 271                 /* Scalar L1 Data Cache per SQC */
 272                 .cache_size = 16,
 273                 .cache_level = 1,
 274                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 275                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 276                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 277                 .num_cu_shared = 3,
 278         },
 279         {
 280                 /* L2 Data Cache per GPU (Total Tex Cache) */
 281                 .cache_size = 2048,
 282                 .cache_level = 2,
 283                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 284                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 285                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 286                 .num_cu_shared = 5,
 287         },
 288 };
 289
 290 static struct kfd_gpu_cache_info vega20_cache_info[] = {
 291         {
 292                 /* TCP L1 Cache per CU */
 293                 .cache_size = 16,
 294                 .cache_level = 1,
 295                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 296                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 297                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 298                 .num_cu_shared = 1,
 299         },
 300         {
 301                 /* Scalar L1 Instruction Cache per SQC */
 302                 .cache_size = 32,
 303                 .cache_level = 1,
 304                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 305                                 CRAT_CACHE_FLAGS_INST_CACHE |
 306                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 307                 .num_cu_shared = 3,
 308         },
 309         {
 310                 /* Scalar L1 Data Cache per SQC */
 311                 .cache_size = 16,
 312                 .cache_level = 1,
 313                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 314                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 315                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 316                 .num_cu_shared = 3,
 317         },
 318         {
 319                 /* L2 Data Cache per GPU (Total Tex Cache) */
 320                 .cache_size = 8192,
 321                 .cache_level = 2,
 322                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 323                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 324                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 325                 .num_cu_shared = 16,
 326         },
 327 };
 328
 329 static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
 330         {
 331                 /* TCP L1 Cache per CU */
 332                 .cache_size = 16,
 333                 .cache_level = 1,
 334                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 335                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 336                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 337                 .num_cu_shared = 1,
 338         },
 339         {
 340                 /* Scalar L1 Instruction Cache per SQC */
 341                 .cache_size = 32,
 342                 .cache_level = 1,
 343                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 344                                 CRAT_CACHE_FLAGS_INST_CACHE |
 345                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 346                 .num_cu_shared = 2,
 347         },
 348         {
 349                 /* Scalar L1 Data Cache per SQC */
 350                 .cache_size = 16,
 351                 .cache_level = 1,
 352                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 353                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 354                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 355                 .num_cu_shared = 2,
 356         },
 357         {
 358                 /* L2 Data Cache per GPU (Total Tex Cache) */
 359                 .cache_size = 8192,
 360                 .cache_level = 2,
 361                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 362                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 363                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 364                 .num_cu_shared = 14,
 365         },
 366 };
 367
 368 static struct kfd_gpu_cache_info navi10_cache_info[] = {
 369         {
 370                 /* TCP L1 Cache per CU */
 371                 .cache_size = 16,
 372                 .cache_level = 1,
 373                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 374                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 375                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 376                 .num_cu_shared = 1,
 377         },
 378         {
 379                 /* Scalar L1 Instruction Cache per SQC */
 380                 .cache_size = 32,
 381                 .cache_level = 1,
 382                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 383                                 CRAT_CACHE_FLAGS_INST_CACHE |
 384                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 385                 .num_cu_shared = 2,
 386         },
 387         {
 388                 /* Scalar L1 Data Cache per SQC */
 389                 .cache_size = 16,
 390                 .cache_level = 1,
 391                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 392                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 393                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 394                 .num_cu_shared = 2,
 395         },
 396         {
 397                 /* GL1 Data Cache per SA */
 398                 .cache_size = 128,
 399                 .cache_level = 1,
 400                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 401                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 402                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 403                 .num_cu_shared = 10,
 404         },
 405         {
 406                 /* L2 Data Cache per GPU (Total Tex Cache) */
 407                 .cache_size = 4096,
 408                 .cache_level = 2,
 409                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 410                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 411                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 412                 .num_cu_shared = 10,
 413         },
 414 };
 415
 416 static struct kfd_gpu_cache_info vangogh_cache_info[] = {
 417         {
 418                 /* TCP L1 Cache per CU */
 419                 .cache_size = 16,
 420                 .cache_level = 1,
 421                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 422                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 423                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 424                 .num_cu_shared = 1,
 425         },
 426         {
 427                 /* Scalar L1 Instruction Cache per SQC */
 428                 .cache_size = 32,
 429                 .cache_level = 1,
 430                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 431                                 CRAT_CACHE_FLAGS_INST_CACHE |
 432                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 433                 .num_cu_shared = 2,
 434         },
 435         {
 436                 /* Scalar L1 Data Cache per SQC */
 437                 .cache_size = 16,
 438                 .cache_level = 1,
 439                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 440                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 441                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 442                 .num_cu_shared = 2,
 443         },
 444         {
 445                 /* GL1 Data Cache per SA */
 446                 .cache_size = 128,
 447                 .cache_level = 1,
 448                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 449                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 450                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 451                 .num_cu_shared = 8,
 452         },
 453         {
 454                 /* L2 Data Cache per GPU (Total Tex Cache) */
 455                 .cache_size = 1024,
 456                 .cache_level = 2,
 457                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 458                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 459                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 460                 .num_cu_shared = 8,
 461         },
 462 };
 463
 464 static struct kfd_gpu_cache_info navi14_cache_info[] = {
 465         {
 466                 /* TCP L1 Cache per CU */
 467                 .cache_size = 16,
 468                 .cache_level = 1,
 469                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 470                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 471                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 472                 .num_cu_shared = 1,
 473         },
 474         {
 475                 /* Scalar L1 Instruction Cache per SQC */
 476                 .cache_size = 32,
 477                 .cache_level = 1,
 478                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 479                                 CRAT_CACHE_FLAGS_INST_CACHE |
 480                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 481                 .num_cu_shared = 2,
 482         },
 483         {
 484                 /* Scalar L1 Data Cache per SQC */
 485                 .cache_size = 16,
 486                 .cache_level = 1,
 487                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 488                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 489                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 490                 .num_cu_shared = 2,
 491         },
 492         {
 493                 /* GL1 Data Cache per SA */
 494                 .cache_size = 128,
 495                 .cache_level = 1,
 496                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 497                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 498                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 499                 .num_cu_shared = 12,
 500         },
 501         {
 502                 /* L2 Data Cache per GPU (Total Tex Cache) */
 503                 .cache_size = 2048,
 504                 .cache_level = 2,
 505                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 506                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 507                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 508                 .num_cu_shared = 12,
 509         },
 510 };
 511
 512 static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
 513         {
 514                 /* TCP L1 Cache per CU */
 515                 .cache_size = 16,
 516                 .cache_level = 1,
 517                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 518                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 519                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 520                 .num_cu_shared = 1,
 521         },
 522         {
 523                 /* Scalar L1 Instruction Cache per SQC */
 524                 .cache_size = 32,
 525                 .cache_level = 1,
 526                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 527                                 CRAT_CACHE_FLAGS_INST_CACHE |
 528                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 529                 .num_cu_shared = 2,
 530         },
 531         {
 532                 /* Scalar L1 Data Cache per SQC */
 533                 .cache_size = 16,
 534                 .cache_level = 1,
 535                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 536                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 537                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 538                 .num_cu_shared = 2,
 539         },
 540         {
 541                 /* GL1 Data Cache per SA */
 542                 .cache_size = 128,
 543                 .cache_level = 1,
 544                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 545                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 546                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 547                 .num_cu_shared = 10,
 548         },
 549         {
 550                 /* L2 Data Cache per GPU (Total Tex Cache) */
 551                 .cache_size = 4096,
 552                 .cache_level = 2,
 553                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 554                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 555                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 556                 .num_cu_shared = 10,
 557         },
 558         {
 559                 /* L3 Data Cache per GPU */
 560                 .cache_size = 128*1024,
 561                 .cache_level = 3,
 562                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 563                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 564                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 565                 .num_cu_shared = 10,
 566         },
 567 };
 568
 569 static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
 570         {
 571                 /* TCP L1 Cache per CU */
 572                 .cache_size = 16,
 573                 .cache_level = 1,
 574                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 575                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 576                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 577                 .num_cu_shared = 1,
 578         },
 579         {
 580                 /* Scalar L1 Instruction Cache per SQC */
 581                 .cache_size = 32,
 582                 .cache_level = 1,
 583                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 584                                 CRAT_CACHE_FLAGS_INST_CACHE |
 585                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 586                 .num_cu_shared = 2,
 587         },
 588         {
 589                 /* Scalar L1 Data Cache per SQC */
 590                 .cache_size = 16,
 591                 .cache_level = 1,
 592                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 593                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 594                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 595                 .num_cu_shared = 2,
 596         },
 597         {
 598                 /* GL1 Data Cache per SA */
 599                 .cache_size = 128,
 600                 .cache_level = 1,
 601                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 602                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 603                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 604                 .num_cu_shared = 10,
 605         },
 606         {
 607                 /* L2 Data Cache per GPU (Total Tex Cache) */
 608                 .cache_size = 3072,
 609                 .cache_level = 2,
 610                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 611                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 612                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 613                 .num_cu_shared = 10,
 614         },
 615         {
 616                 /* L3 Data Cache per GPU */
 617                 .cache_size = 96*1024,
 618                 .cache_level = 3,
 619                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 620                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 621                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 622                 .num_cu_shared = 10,
 623         },
 624 };
 625
 626 static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
 627         {
 628                 /* TCP L1 Cache per CU */
 629                 .cache_size = 16,
 630                 .cache_level = 1,
 631                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 632                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 633                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 634                 .num_cu_shared = 1,
 635         },
 636         {
 637                 /* Scalar L1 Instruction Cache per SQC */
 638                 .cache_size = 32,
 639                 .cache_level = 1,
 640                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 641                                 CRAT_CACHE_FLAGS_INST_CACHE |
 642                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 643                 .num_cu_shared = 2,
 644         },
 645         {
 646                 /* Scalar L1 Data Cache per SQC */
 647                 .cache_size = 16,
 648                 .cache_level = 1,
 649                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 650                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 651                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 652                 .num_cu_shared = 2,
 653         },
 654         {
 655                 /* GL1 Data Cache per SA */
 656                 .cache_size = 128,
 657                 .cache_level = 1,
 658                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 659                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 660                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 661                 .num_cu_shared = 8,
 662         },
 663         {
 664                 /* L2 Data Cache per GPU (Total Tex Cache) */
 665                 .cache_size = 2048,
 666                 .cache_level = 2,
 667                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 668                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 669                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 670                 .num_cu_shared = 8,
 671         },
 672         {
 673                 /* L3 Data Cache per GPU */
 674                 .cache_size = 32*1024,
 675                 .cache_level = 3,
 676                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 677                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 678                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 679                 .num_cu_shared = 8,
 680         },
 681 };
 682
 683 static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
 684         {
 685                 /* TCP L1 Cache per CU */
 686                 .cache_size = 16,
 687                 .cache_level = 1,
 688                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 689                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 690                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 691                 .num_cu_shared = 1,
 692         },
 693         {
 694                 /* Scalar L1 Instruction Cache per SQC */
 695                 .cache_size = 32,
 696                 .cache_level = 1,
 697                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 698                                 CRAT_CACHE_FLAGS_INST_CACHE |
 699                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 700                 .num_cu_shared = 2,
 701         },
 702         {
 703                 /* Scalar L1 Data Cache per SQC */
 704                 .cache_size = 16,
 705                 .cache_level = 1,
 706                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 707                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 708                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 709                 .num_cu_shared = 2,
 710         },
 711         {
 712                 /* GL1 Data Cache per SA */
 713                 .cache_size = 128,
 714                 .cache_level = 1,
 715                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 716                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 717                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 718                 .num_cu_shared = 8,
 719         },
 720         {
 721                 /* L2 Data Cache per GPU (Total Tex Cache) */
 722                 .cache_size = 1024,
 723                 .cache_level = 2,
 724                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 725                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 726                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 727                 .num_cu_shared = 8,
 728         },
 729         {
 730                 /* L3 Data Cache per GPU */
 731                 .cache_size = 16*1024,
 732                 .cache_level = 3,
 733                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 734                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 735                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 736                 .num_cu_shared = 8,
 737         },
 738 };
 739
 740 static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
 741         {
 742                 /* TCP L1 Cache per CU */
 743                 .cache_size = 16,
 744                 .cache_level = 1,
 745                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 746                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 747                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 748                 .num_cu_shared = 1,
 749         },
 750         {
 751                 /* Scalar L1 Instruction Cache per SQC */
 752                 .cache_size = 32,
 753                 .cache_level = 1,
 754                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 755                                 CRAT_CACHE_FLAGS_INST_CACHE |
 756                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 757                 .num_cu_shared = 2,
 758         },
 759         {
 760                 /* Scalar L1 Data Cache per SQC */
 761                 .cache_size = 16,
 762                 .cache_level = 1,
 763                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 764                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 765                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 766                 .num_cu_shared = 2,
 767         },
 768         {
 769                 /* GL1 Data Cache per SA */
 770                 .cache_size = 128,
 771                 .cache_level = 1,
 772                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 773                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 774                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 775                 .num_cu_shared = 6,
 776         },
 777         {
 778                 /* L2 Data Cache per GPU (Total Tex Cache) */
 779                 .cache_size = 2048,
 780                 .cache_level = 2,
 781                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 782                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 783                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 784                 .num_cu_shared = 6,
 785         },
 786 };
 787
 788 static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
 789         {
 790                 /* TCP L1 Cache per CU */
 791                 .cache_size = 16,
 792                 .cache_level = 1,
 793                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 794                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 795                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 796                 .num_cu_shared = 1,
 797         },
 798         {
 799                 /* Scalar L1 Instruction Cache per SQC */
 800                 .cache_size = 32,
 801                 .cache_level = 1,
 802                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 803                                 CRAT_CACHE_FLAGS_INST_CACHE |
 804                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 805                 .num_cu_shared = 2,
 806         },
 807         {
 808                 /* Scalar L1 Data Cache per SQC */
 809                 .cache_size = 16,
 810                 .cache_level = 1,
 811                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 812                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 813                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 814                 .num_cu_shared = 2,
 815         },
 816         {
 817                 /* GL1 Data Cache per SA */
 818                 .cache_size = 128,
 819                 .cache_level = 1,
 820                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 821                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 822                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 823                 .num_cu_shared = 2,
 824         },
 825         {
 826                 /* L2 Data Cache per GPU (Total Tex Cache) */
 827                 .cache_size = 256,
 828                 .cache_level = 2,
 829                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 830                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 831                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 832                 .num_cu_shared = 2,
 833         },
 834 };
 835
 836 static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
 837         {
 838                 /* TCP L1 Cache per CU */
 839                 .cache_size = 16,
 840                 .cache_level = 1,
 841                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 842                           CRAT_CACHE_FLAGS_DATA_CACHE |
 843                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 844                 .num_cu_shared = 1,
 845         },
 846         {
 847                 /* Scalar L1 Instruction Cache per SQC */
 848                 .cache_size = 32,
 849                 .cache_level = 1,
 850                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 851                           CRAT_CACHE_FLAGS_INST_CACHE |
 852                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 853                 .num_cu_shared = 2,
 854         },
 855         {
 856                 /* Scalar L1 Data Cache per SQC */
 857                 .cache_size = 16,
 858                 .cache_level = 1,
 859                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 860                           CRAT_CACHE_FLAGS_DATA_CACHE |
 861                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 862                 .num_cu_shared = 2,
 863         },
 864         {
 865                 /* GL1 Data Cache per SA */
 866                 .cache_size = 128,
 867                 .cache_level = 1,
 868                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 869                           CRAT_CACHE_FLAGS_DATA_CACHE |
 870                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 871                 .num_cu_shared = 2,
 872         },
 873         {
 874                 /* L2 Data Cache per GPU (Total Tex Cache) */
 875                 .cache_size = 256,
 876                 .cache_level = 2,
 877                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 878                           CRAT_CACHE_FLAGS_DATA_CACHE |
 879                           CRAT_CACHE_FLAGS_SIMD_CACHE),
 880                 .num_cu_shared = 2,
 881         },
 882 };
 883
 884 static struct kfd_gpu_cache_info dummy_cache_info[] = {
 885         {
 886                 /* TCP L1 Cache per CU */
 887                 .cache_size = 16,
 888                 .cache_level = 1,
 889                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 890                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 891                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 892                 .num_cu_shared = 1,
 893         },
 894         {
 895                 /* Scalar L1 Instruction Cache per SQC */
 896                 .cache_size = 32,
 897                 .cache_level = 1,
 898                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 899                                 CRAT_CACHE_FLAGS_INST_CACHE |
 900                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 901                 .num_cu_shared = 2,
 902         },
 903         {
 904                 /* Scalar L1 Data Cache per SQC */
 905                 .cache_size = 16,
 906                 .cache_level = 1,
 907                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 908                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 909                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 910                 .num_cu_shared = 2,
 911         },
 912         {
 913                 /* GL1 Data Cache per SA */
 914                 .cache_size = 128,
 915                 .cache_level = 1,
 916                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 917                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 918                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 919                 .num_cu_shared = 6,
 920         },
 921         {
 922                 /* L2 Data Cache per GPU (Total Tex Cache) */
 923                 .cache_size = 2048,
 924                 .cache_level = 2,
 925                 .flags = (CRAT_CACHE_FLAGS_ENABLED |
 926                                 CRAT_CACHE_FLAGS_DATA_CACHE |
 927                                 CRAT_CACHE_FLAGS_SIMD_CACHE),
 928                 .num_cu_shared = 6,
 929         },
 930 };
 931
 932 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
 933                 struct crat_subtype_computeunit *cu)
 934 {
 935         dev->node_props.cpu_cores_count = cu->num_cpu_cores;
 936         dev->node_props.cpu_core_id_base = cu->processor_id_low;
 937         if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
 938                 dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
 939
 940         pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
 941                         cu->processor_id_low);
 942 }
 943
 944 static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
 945                 struct crat_subtype_computeunit *cu)
 946 {
 947         dev->node_props.simd_id_base = cu->processor_id_low;
 948         dev->node_props.simd_count = cu->num_simd_cores;
 949         dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
 950         dev->node_props.max_waves_per_simd = cu->max_waves_simd;
 951         dev->node_props.wave_front_size = cu->wave_front_size;
 952         dev->node_props.array_count = cu->array_count;
 953         dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
 954         dev->node_props.simd_per_cu = cu->num_simd_per_cu;
 955         dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
 956         if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
 957                 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
 958         pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);
 959 }
 960
 961 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
 962  * topology device present in the device_list
 963  */
 964 static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
 965                                 struct list_head *device_list)
 966 {
 967         struct kfd_topology_device *dev;
 968
 969         pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
 970                         cu->proximity_domain, cu->hsa_capability);
 971         list_for_each_entry(dev, device_list, list) {
 972                 if (cu->proximity_domain == dev->proximity_domain) {
 973                         if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
 974                                 kfd_populated_cu_info_cpu(dev, cu);
 975
 976                         if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
 977                                 kfd_populated_cu_info_gpu(dev, cu);
 978                         break;
 979                 }
 980         }
 981
 982         return 0;
 983 }
 984
 985 static struct kfd_mem_properties *
 986 find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
 987                 struct kfd_topology_device *dev)
 988 {
 989         struct kfd_mem_properties *props;
 990
 991         list_for_each_entry(props, &dev->mem_props, list) {
 992                 if (props->heap_type == heap_type
 993                                 && props->flags == flags
 994                                 && props->width == width)
 995                         return props;
 996         }
 997
 998         return NULL;
 999 }
1000 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1001  * topology device present in the device_list
1002  */
1003 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
1004                                 struct list_head *device_list)
1005 {
1006         struct kfd_mem_properties *props;
1007         struct kfd_topology_device *dev;
1008         uint32_t heap_type;
1009         uint64_t size_in_bytes;
1010         uint32_t flags = 0;
1011         uint32_t width;
1012
1013         pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
1014                         mem->proximity_domain);
1015         list_for_each_entry(dev, device_list, list) {
1016                 if (mem->proximity_domain == dev->proximity_domain) {
1017                         /* We're on GPU node */
1018                         if (dev->node_props.cpu_cores_count == 0) {
1019                                 /* APU */
1020                                 if (mem->visibility_type == 0)
1021                                         heap_type =
1022                                                 HSA_MEM_HEAP_TYPE_FB_PRIVATE;
1023                                 /* dGPU */
1024                                 else
1025                                         heap_type = mem->visibility_type;
1026                         } else
1027                                 heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
1028
1029                         if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
1030                                 flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
1031                         if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
1032                                 flags |= HSA_MEM_FLAGS_NON_VOLATILE;
1033
1034                         size_in_bytes =
1035                                 ((uint64_t)mem->length_high << 32) +
1036                                                         mem->length_low;
1037                         width = mem->width;
1038
1039                         /* Multiple banks of the same type are aggregated into
1040                          * one. User mode doesn't care about multiple physical
1041                          * memory segments. It's managed as a single virtual
1042                          * heap for user mode.
1043                          */
1044                         props = find_subtype_mem(heap_type, flags, width, dev);
1045                         if (props) {
1046                                 props->size_in_bytes += size_in_bytes;
1047                                 break;
1048                         }
1049
1050                         props = kfd_alloc_struct(props);
1051                         if (!props)
1052                                 return -ENOMEM;
1053
1054                         props->heap_type = heap_type;
1055                         props->flags = flags;
1056                         props->size_in_bytes = size_in_bytes;
1057                         props->width = width;
1058
1059                         dev->node_props.mem_banks_count++;
1060                         list_add_tail(&props->list, &dev->mem_props);
1061
1062                         break;
1063                 }
1064         }
1065
1066         return 0;
1067 }
1068
1069 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1070  * topology device present in the device_list
1071  */
1072 static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
1073                         struct list_head *device_list)
1074 {
1075         struct kfd_cache_properties *props;
1076         struct kfd_topology_device *dev;
1077         uint32_t id;
1078         uint32_t total_num_of_cu;
1079
1080         id = cache->processor_id_low;
1081
1082         pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);
1083         list_for_each_entry(dev, device_list, list) {
1084                 total_num_of_cu = (dev->node_props.array_count *
1085                                         dev->node_props.cu_per_simd_array);
1086
1087                 /* Cache infomration in CRAT doesn't have proximity_domain
1088                  * information as it is associated with a CPU core or GPU
1089                  * Compute Unit. So map the cache using CPU core Id or SIMD
1090                  * (GPU) ID.
1091                  * TODO: This works because currently we can safely assume that
1092                  *  Compute Units are parsed before caches are parsed. In
1093                  *  future, remove this dependency
1094                  */
1095                 if ((id >= dev->node_props.cpu_core_id_base &&
1096                         id <= dev->node_props.cpu_core_id_base +
1097                                 dev->node_props.cpu_cores_count) ||
1098                         (id >= dev->node_props.simd_id_base &&
1099                         id < dev->node_props.simd_id_base +
1100                                 total_num_of_cu)) {
1101                         props = kfd_alloc_struct(props);
1102                         if (!props)
1103                                 return -ENOMEM;
1104
1105                         props->processor_id_low = id;
1106                         props->cache_level = cache->cache_level;
1107                         props->cache_size = cache->cache_size;
1108                         props->cacheline_size = cache->cache_line_size;
1109                         props->cachelines_per_tag = cache->lines_per_tag;
1110                         props->cache_assoc = cache->associativity;
1111                         props->cache_latency = cache->cache_latency;
1112
1113                         memcpy(props->sibling_map, cache->sibling_map,
1114                                         CRAT_SIBLINGMAP_SIZE);
1115
1116                         /* set the sibling_map_size as 32 for CRAT from ACPI */
1117                         props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
1118
1119                         if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
1120                                 props->cache_type |= HSA_CACHE_TYPE_DATA;
1121                         if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
1122                                 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
1123                         if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
1124                                 props->cache_type |= HSA_CACHE_TYPE_CPU;
1125                         if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
1126                                 props->cache_type |= HSA_CACHE_TYPE_HSACU;
1127
1128                         dev->node_props.caches_count++;
1129                         list_add_tail(&props->list, &dev->cache_props);
1130
1131                         break;
1132                 }
1133         }
1134
1135         return 0;
1136 }
1137
1138 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1139  * topology device present in the device_list
1140  */
1141 static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
1142                                         struct list_head *device_list)
1143 {
1144         struct kfd_iolink_properties *props = NULL, *props2;
1145         struct kfd_topology_device *dev, *to_dev;
1146         uint32_t id_from;
1147         uint32_t id_to;
1148
1149         id_from = iolink->proximity_domain_from;
1150         id_to = iolink->proximity_domain_to;
1151
1152         pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
1153                         id_from, id_to);
1154         list_for_each_entry(dev, device_list, list) {
1155                 if (id_from == dev->proximity_domain) {
1156                         props = kfd_alloc_struct(props);
1157                         if (!props)
1158                                 return -ENOMEM;
1159
1160                         props->node_from = id_from;
1161                         props->node_to = id_to;
1162                         props->ver_maj = iolink->version_major;
1163                         props->ver_min = iolink->version_minor;
1164                         props->iolink_type = iolink->io_interface_type;
1165
1166                         if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
1167                                 props->weight = 20;
1168                         else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
1169                                 props->weight = iolink->weight_xgmi;
1170                         else
1171                                 props->weight = node_distance(id_from, id_to);
1172
1173                         props->min_latency = iolink->minimum_latency;
1174                         props->max_latency = iolink->maximum_latency;
1175                         props->min_bandwidth = iolink->minimum_bandwidth_mbs;
1176                         props->max_bandwidth = iolink->maximum_bandwidth_mbs;
1177                         props->rec_transfer_size =
1178                                         iolink->recommended_transfer_size;
1179
1180                         dev->node_props.io_links_count++;
1181                         list_add_tail(&props->list, &dev->io_link_props);
1182                         break;
1183                 }
1184         }
1185
1186         /* CPU topology is created before GPUs are detected, so CPU->GPU
1187          * links are not built at that time. If a PCIe type is discovered, it
1188          * means a GPU is detected and we are adding GPU->CPU to the topology.
1189          * At this time, also add the corresponded CPU->GPU link if GPU
1190          * is large bar.
1191          * For xGMI, we only added the link with one direction in the crat
1192          * table, add corresponded reversed direction link now.
1193          */
1194         if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
1195                 to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
1196                 if (!to_dev)
1197                         return -ENODEV;
1198                 /* same everything but the other direction */
1199                 props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
1200                 if (!props2)
1201                         return -ENOMEM;
1202
1203                 props2->node_from = id_to;
1204                 props2->node_to = id_from;
1205                 props2->kobj = NULL;
1206                 to_dev->node_props.io_links_count++;
1207                 list_add_tail(&props2->list, &to_dev->io_link_props);
1208         }
1209
1210         return 0;
1211 }
1212
1213 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1214  * present in the device_list
1215  *      @sub_type_hdr - subtype section of crat_image
1216  *      @device_list - list of topology devices present in this crat_image
1217  */
1218 static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
1219                                 struct list_head *device_list)
1220 {
1221         struct crat_subtype_computeunit *cu;
1222         struct crat_subtype_memory *mem;
1223         struct crat_subtype_cache *cache;
1224         struct crat_subtype_iolink *iolink;
1225         int ret = 0;
1226
1227         switch (sub_type_hdr->type) {
1228         case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
1229                 cu = (struct crat_subtype_computeunit *)sub_type_hdr;
1230                 ret = kfd_parse_subtype_cu(cu, device_list);
1231                 break;
1232         case CRAT_SUBTYPE_MEMORY_AFFINITY:
1233                 mem = (struct crat_subtype_memory *)sub_type_hdr;
1234                 ret = kfd_parse_subtype_mem(mem, device_list);
1235                 break;
1236         case CRAT_SUBTYPE_CACHE_AFFINITY:
1237                 cache = (struct crat_subtype_cache *)sub_type_hdr;
1238                 ret = kfd_parse_subtype_cache(cache, device_list);
1239                 break;
1240         case CRAT_SUBTYPE_TLB_AFFINITY:
1241                 /*
1242                  * For now, nothing to do here
1243                  */
1244                 pr_debug("Found TLB entry in CRAT table (not processing)\n");
1245                 break;
1246         case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
1247                 /*
1248                  * For now, nothing to do here
1249                  */
1250                 pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
1251                 break;
1252         case CRAT_SUBTYPE_IOLINK_AFFINITY:
1253                 iolink = (struct crat_subtype_iolink *)sub_type_hdr;
1254                 ret = kfd_parse_subtype_iolink(iolink, device_list);
1255                 break;
1256         default:
1257                 pr_warn("Unknown subtype %d in CRAT\n",
1258                                 sub_type_hdr->type);
1259         }
1260
1261         return ret;
1262 }
1263
1264 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1265  * create a kfd_topology_device and add in to device_list. Also parse
1266  * CRAT subtypes and attach it to appropriate kfd_topology_device
1267  *      @crat_image - input image containing CRAT
1268  *      @device_list - [OUT] list of kfd_topology_device generated after
1269  *                     parsing crat_image
1270  *      @proximity_domain - Proximity domain of the first device in the table
1271  *
1272  *      Return - 0 if successful else -ve value
1273  */
1274 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
1275                          uint32_t proximity_domain)
1276 {
1277         struct kfd_topology_device *top_dev = NULL;
1278         struct crat_subtype_generic *sub_type_hdr;
1279         uint16_t node_id;
1280         int ret = 0;
1281         struct crat_header *crat_table = (struct crat_header *)crat_image;
1282         uint16_t num_nodes;
1283         uint32_t image_len;
1284
1285         if (!crat_image)
1286                 return -EINVAL;
1287
1288         if (!list_empty(device_list)) {
1289                 pr_warn("Error device list should be empty\n");
1290                 return -EINVAL;
1291         }
1292
1293         num_nodes = crat_table->num_domains;
1294         image_len = crat_table->length;
1295
1296         pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
1297
1298         for (node_id = 0; node_id < num_nodes; node_id++) {
1299                 top_dev = kfd_create_topology_device(device_list);
1300                 if (!top_dev)
1301                         break;
1302                 top_dev->proximity_domain = proximity_domain++;
1303         }
1304
1305         if (!top_dev) {
1306                 ret = -ENOMEM;
1307                 goto err;
1308         }
1309
1310         memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
1311         memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
1312                         CRAT_OEMTABLEID_LENGTH);
1313         top_dev->oem_revision = crat_table->oem_revision;
1314
1315         sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1316         while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
1317                         ((char *)crat_image) + image_len) {
1318                 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
1319                         ret = kfd_parse_subtype(sub_type_hdr, device_list);
1320                         if (ret)
1321                                 break;
1322                 }
1323
1324                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1325                                 sub_type_hdr->length);
1326         }
1327
1328 err:
1329         if (ret)
1330                 kfd_release_topology_device_list(device_list);
1331
1332         return ret;
1333 }
1334
1335
1336 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
1337                                                    struct kfd_gpu_cache_info *pcache_info)
1338 {
1339         struct amdgpu_device *adev = kdev->adev;
1340         int i = 0;
1341
1342         /* TCP L1 Cache per CU */
1343         if (adev->gfx.config.gc_tcp_l1_size) {
1344                 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
1345                 pcache_info[i].cache_level = 1;
1346                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1347                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1348                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1349                 pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
1350                 i++;
1351         }
1352         /* Scalar L1 Instruction Cache per SQC */
1353         if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
1354                 pcache_info[i].cache_size =
1355                         adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
1356                 pcache_info[i].cache_level = 1;
1357                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1358                                         CRAT_CACHE_FLAGS_INST_CACHE |
1359                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1360                 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1361                 i++;
1362         }
1363         /* Scalar L1 Data Cache per SQC */
1364         if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
1365                 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
1366                 pcache_info[i].cache_level = 1;
1367                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1368                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1369                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1370                 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1371                 i++;
1372         }
1373         /* GL1 Data Cache per SA */
1374         if (adev->gfx.config.gc_gl1c_per_sa &&
1375             adev->gfx.config.gc_gl1c_size_per_instance) {
1376                 pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
1377                         adev->gfx.config.gc_gl1c_size_per_instance;
1378                 pcache_info[i].cache_level = 1;
1379                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1380                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1381                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1382                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1383                 i++;
1384         }
1385         /* L2 Data Cache per GPU (Total Tex Cache) */
1386         if (adev->gfx.config.gc_gl2c_per_gpu) {
1387                 pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
1388                 pcache_info[i].cache_level = 2;
1389                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1390                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1391                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1392                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1393                 i++;
1394         }
1395         /* L3 Data Cache per GPU */
1396         if (adev->gmc.mall_size) {
1397                 pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
1398                 pcache_info[i].cache_level = 3;
1399                 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1400                                         CRAT_CACHE_FLAGS_DATA_CACHE |
1401                                         CRAT_CACHE_FLAGS_SIMD_CACHE);
1402                 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1403                 i++;
1404         }
1405         return i;
1406 }
1407
1408 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
1409 {
1410         int num_of_cache_types = 0;
1411
1412         switch (kdev->adev->asic_type) {
1413         case CHIP_KAVERI:
1414                 *pcache_info = kaveri_cache_info;
1415                 num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
1416                 break;
1417         case CHIP_HAWAII:
1418                 *pcache_info = hawaii_cache_info;
1419                 num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
1420                 break;
1421         case CHIP_CARRIZO:
1422                 *pcache_info = carrizo_cache_info;
1423                 num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
1424                 break;
1425         case CHIP_TONGA:
1426                 *pcache_info = tonga_cache_info;
1427                 num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
1428                 break;
1429         case CHIP_FIJI:
1430                 *pcache_info = fiji_cache_info;
1431                 num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
1432                 break;
1433         case CHIP_POLARIS10:
1434                 *pcache_info = polaris10_cache_info;
1435                 num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
1436                 break;
1437         case CHIP_POLARIS11:
1438                 *pcache_info = polaris11_cache_info;
1439                 num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
1440                 break;
1441         case CHIP_POLARIS12:
1442                 *pcache_info = polaris12_cache_info;
1443                 num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
1444                 break;
1445         case CHIP_VEGAM:
1446                 *pcache_info = vegam_cache_info;
1447                 num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
1448                 break;
1449         default:
1450                 switch (KFD_GC_VERSION(kdev)) {
1451                 case IP_VERSION(9, 0, 1):
1452                         *pcache_info = vega10_cache_info;
1453                         num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
1454                         break;
1455                 case IP_VERSION(9, 2, 1):
1456                         *pcache_info = vega12_cache_info;
1457                         num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
1458                         break;
1459                 case IP_VERSION(9, 4, 0):
1460                 case IP_VERSION(9, 4, 1):
1461                         *pcache_info = vega20_cache_info;
1462                         num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
1463                         break;
1464                 case IP_VERSION(9, 4, 2):
1465                 case IP_VERSION(9, 4, 3):
1466                         *pcache_info = aldebaran_cache_info;
1467                         num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
1468                         break;
1469                 case IP_VERSION(9, 1, 0):
1470                 case IP_VERSION(9, 2, 2):
1471                         *pcache_info = raven_cache_info;
1472                         num_of_cache_types = ARRAY_SIZE(raven_cache_info);
1473                         break;
1474                 case IP_VERSION(9, 3, 0):
1475                         *pcache_info = renoir_cache_info;
1476                         num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
1477                         break;
1478                 case IP_VERSION(10, 1, 10):
1479                 case IP_VERSION(10, 1, 2):
1480                 case IP_VERSION(10, 1, 3):
1481                 case IP_VERSION(10, 1, 4):
1482                         *pcache_info = navi10_cache_info;
1483                         num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
1484                         break;
1485                 case IP_VERSION(10, 1, 1):
1486                         *pcache_info = navi14_cache_info;
1487                         num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
1488                         break;
1489                 case IP_VERSION(10, 3, 0):
1490                         *pcache_info = sienna_cichlid_cache_info;
1491                         num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
1492                         break;
1493                 case IP_VERSION(10, 3, 2):
1494                         *pcache_info = navy_flounder_cache_info;
1495                         num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
1496                         break;
1497                 case IP_VERSION(10, 3, 4):
1498                         *pcache_info = dimgrey_cavefish_cache_info;
1499                         num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
1500                         break;
1501                 case IP_VERSION(10, 3, 1):
1502                         *pcache_info = vangogh_cache_info;
1503                         num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
1504                         break;
1505                 case IP_VERSION(10, 3, 5):
1506                         *pcache_info = beige_goby_cache_info;
1507                         num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
1508                         break;
1509                 case IP_VERSION(10, 3, 3):
1510                         *pcache_info = yellow_carp_cache_info;
1511                         num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
1512                         break;
1513                 case IP_VERSION(10, 3, 6):
1514                         *pcache_info = gc_10_3_6_cache_info;
1515                         num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
1516                         break;
1517                 case IP_VERSION(10, 3, 7):
1518                         *pcache_info = gfx1037_cache_info;
1519                         num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
1520                         break;
1521                 case IP_VERSION(11, 0, 0):
1522                 case IP_VERSION(11, 0, 1):
1523                 case IP_VERSION(11, 0, 2):
1524                 case IP_VERSION(11, 0, 3):
1525                 case IP_VERSION(11, 0, 4):
1526                         num_of_cache_types =
1527                                 kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
1528                         break;
1529                 default:
1530                         *pcache_info = dummy_cache_info;
1531                         num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
1532                         pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
1533                         break;
1534                 }
1535         }
1536         return num_of_cache_types;
1537 }
1538
1539 static bool kfd_ignore_crat(void)
1540 {
1541         bool ret;
1542
1543         if (ignore_crat)
1544                 return true;
1545
1546 #ifndef KFD_SUPPORT_IOMMU_V2
1547         ret = true;
1548 #else
1549         ret = false;
1550 #endif
1551
1552         return ret;
1553 }
1554
1555 /*
1556  * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
1557  * copies CRAT from ACPI (if available).
1558  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
1559  *
1560  *      @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
1561  *                   crat_image will be NULL
1562  *      @size: [OUT] size of crat_image
1563  *
1564  *      Return 0 if successful else return error code
1565  */
1566 int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
1567 {
1568         struct acpi_table_header *crat_table;
1569         acpi_status status;
1570         void *pcrat_image;
1571         int rc = 0;
1572
1573         if (!crat_image)
1574                 return -EINVAL;
1575
1576         *crat_image = NULL;
1577
1578         if (kfd_ignore_crat()) {
1579                 pr_info("CRAT table disabled by module option\n");
1580                 return -ENODATA;
1581         }
1582
1583         /* Fetch the CRAT table from ACPI */
1584         status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
1585         if (status == AE_NOT_FOUND) {
1586                 pr_info("CRAT table not found\n");
1587                 return -ENODATA;
1588         } else if (ACPI_FAILURE(status)) {
1589                 const char *err = acpi_format_exception(status);
1590
1591                 pr_err("CRAT table error: %s\n", err);
1592                 return -EINVAL;
1593         }
1594
1595         pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
1596         if (!pcrat_image) {
1597                 rc = -ENOMEM;
1598                 goto out;
1599         }
1600
1601         memcpy(pcrat_image, crat_table, crat_table->length);
1602         *crat_image = pcrat_image;
1603         *size = crat_table->length;
1604 out:
1605         acpi_put_table(crat_table);
1606         return rc;
1607 }
1608
1609 /* Memory required to create Virtual CRAT.
1610  * Since there is no easy way to predict the amount of memory required, the
1611  * following amount is allocated for GPU Virtual CRAT. This is
1612  * expected to cover all known conditions. But to be safe additional check
1613  * is put in the code to ensure we don't overwrite.
1614  */
1615 #define VCRAT_SIZE_FOR_GPU      (4 * PAGE_SIZE)
1616
1617 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1618  *
1619  *      @numa_node_id: CPU NUMA node id
1620  *      @avail_size: Available size in the memory
1621  *      @sub_type_hdr: Memory into which compute info will be filled in
1622  *
1623  *      Return 0 if successful else return -ve value
1624  */
1625 static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
1626                                 int proximity_domain,
1627                                 struct crat_subtype_computeunit *sub_type_hdr)
1628 {
1629         const struct cpumask *cpumask;
1630
1631         *avail_size -= sizeof(struct crat_subtype_computeunit);
1632         if (*avail_size < 0)
1633                 return -ENOMEM;
1634
1635         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
1636
1637         /* Fill in subtype header data */
1638         sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
1639         sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
1640         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1641
1642         cpumask = cpumask_of_node(numa_node_id);
1643
1644         /* Fill in CU data */
1645         sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
1646         sub_type_hdr->proximity_domain = proximity_domain;
1647         sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
1648         if (sub_type_hdr->processor_id_low == -1)
1649                 return -EINVAL;
1650
1651         sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
1652
1653         return 0;
1654 }
1655
1656 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1657  *
1658  *      @numa_node_id: CPU NUMA node id
1659  *      @avail_size: Available size in the memory
1660  *      @sub_type_hdr: Memory into which compute info will be filled in
1661  *
1662  *      Return 0 if successful else return -ve value
1663  */
1664 static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
1665                         int proximity_domain,
1666                         struct crat_subtype_memory *sub_type_hdr)
1667 {
1668         uint64_t mem_in_bytes = 0;
1669         pg_data_t *pgdat;
1670         int zone_type;
1671
1672         *avail_size -= sizeof(struct crat_subtype_memory);
1673         if (*avail_size < 0)
1674                 return -ENOMEM;
1675
1676         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1677
1678         /* Fill in subtype header data */
1679         sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1680         sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1681         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1682
1683         /* Fill in Memory Subunit data */
1684
1685         /* Unlike si_meminfo, si_meminfo_node is not exported. So
1686          * the following lines are duplicated from si_meminfo_node
1687          * function
1688          */
1689         pgdat = NODE_DATA(numa_node_id);
1690         for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
1691                 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
1692         mem_in_bytes <<= PAGE_SHIFT;
1693
1694         sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
1695         sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
1696         sub_type_hdr->proximity_domain = proximity_domain;
1697
1698         return 0;
1699 }
1700
1701 #ifdef CONFIG_X86_64
1702 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
1703                                 uint32_t *num_entries,
1704                                 struct crat_subtype_iolink *sub_type_hdr)
1705 {
1706         int nid;
1707         struct cpuinfo_x86 *c = &cpu_data(0);
1708         uint8_t link_type;
1709
1710         if (c->x86_vendor == X86_VENDOR_AMD)
1711                 link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
1712         else
1713                 link_type = CRAT_IOLINK_TYPE_QPI_1_1;
1714
1715         *num_entries = 0;
1716
1717         /* Create IO links from this node to other CPU nodes */
1718         for_each_online_node(nid) {
1719                 if (nid == numa_node_id) /* node itself */
1720                         continue;
1721
1722                 *avail_size -= sizeof(struct crat_subtype_iolink);
1723                 if (*avail_size < 0)
1724                         return -ENOMEM;
1725
1726                 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1727
1728                 /* Fill in subtype header data */
1729                 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
1730                 sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
1731                 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1732
1733                 /* Fill in IO link data */
1734                 sub_type_hdr->proximity_domain_from = numa_node_id;
1735                 sub_type_hdr->proximity_domain_to = nid;
1736                 sub_type_hdr->io_interface_type = link_type;
1737
1738                 (*num_entries)++;
1739                 sub_type_hdr++;
1740         }
1741
1742         return 0;
1743 }
1744 #endif
1745
1746 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1747  *
1748  *      @pcrat_image: Fill in VCRAT for CPU
1749  *      @size:  [IN] allocated size of crat_image.
1750  *              [OUT] actual size of data filled in crat_image
1751  */
1752 static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
1753 {
1754         struct crat_header *crat_table = (struct crat_header *)pcrat_image;
1755         struct acpi_table_header *acpi_table;
1756         acpi_status status;
1757         struct crat_subtype_generic *sub_type_hdr;
1758         int avail_size = *size;
1759         int numa_node_id;
1760 #ifdef CONFIG_X86_64
1761         uint32_t entries = 0;
1762 #endif
1763         int ret = 0;
1764
1765         if (!pcrat_image)
1766                 return -EINVAL;
1767
1768         /* Fill in CRAT Header.
1769          * Modify length and total_entries as subunits are added.
1770          */
1771         avail_size -= sizeof(struct crat_header);
1772         if (avail_size < 0)
1773                 return -ENOMEM;
1774
1775         memset(crat_table, 0, sizeof(struct crat_header));
1776         memcpy(&crat_table->signature, CRAT_SIGNATURE,
1777                         sizeof(crat_table->signature));
1778         crat_table->length = sizeof(struct crat_header);
1779
1780         status = acpi_get_table("DSDT", 0, &acpi_table);
1781         if (status != AE_OK)
1782                 pr_warn("DSDT table not found for OEM information\n");
1783         else {
1784                 crat_table->oem_revision = acpi_table->revision;
1785                 memcpy(crat_table->oem_id, acpi_table->oem_id,
1786                                 CRAT_OEMID_LENGTH);
1787                 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
1788                                 CRAT_OEMTABLEID_LENGTH);
1789                 acpi_put_table(acpi_table);
1790         }
1791         crat_table->total_entries = 0;
1792         crat_table->num_domains = 0;
1793
1794         sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1795
1796         for_each_online_node(numa_node_id) {
1797                 if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
1798                         continue;
1799
1800                 /* Fill in Subtype: Compute Unit */
1801                 ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
1802                         crat_table->num_domains,
1803                         (struct crat_subtype_computeunit *)sub_type_hdr);
1804                 if (ret < 0)
1805                         return ret;
1806                 crat_table->length += sub_type_hdr->length;
1807                 crat_table->total_entries++;
1808
1809                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1810                         sub_type_hdr->length);
1811
1812                 /* Fill in Subtype: Memory */
1813                 ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
1814                         crat_table->num_domains,
1815                         (struct crat_subtype_memory *)sub_type_hdr);
1816                 if (ret < 0)
1817                         return ret;
1818                 crat_table->length += sub_type_hdr->length;
1819                 crat_table->total_entries++;
1820
1821                 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1822                         sub_type_hdr->length);
1823
1824                 /* Fill in Subtype: IO Link */
1825 #ifdef CONFIG_X86_64
1826                 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
1827                                 &entries,
1828                                 (struct crat_subtype_iolink *)sub_type_hdr);
1829                 if (ret < 0)
1830                         return ret;
1831
1832                 if (entries) {
1833                         crat_table->length += (sub_type_hdr->length * entries);
1834                         crat_table->total_entries += entries;
1835
1836                         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1837                                         sub_type_hdr->length * entries);
1838                 }
1839 #else
1840                 pr_info("IO link not available for non x86 platforms\n");
1841 #endif
1842
1843                 crat_table->num_domains++;
1844         }
1845
1846         /* TODO: Add cache Subtype for CPU.
1847          * Currently, CPU cache information is available in function
1848          * detect_cache_attributes(cpu) defined in the file
1849          * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
1850          * exported and to get the same information the code needs to be
1851          * duplicated.
1852          */
1853
1854         *size = crat_table->length;
1855         pr_info("Virtual CRAT table created for CPU\n");
1856
1857         return 0;
1858 }
1859
1860 static int kfd_fill_gpu_memory_affinity(int *avail_size,
1861                 struct kfd_node *kdev, uint8_t type, uint64_t size,
1862                 struct crat_subtype_memory *sub_type_hdr,
1863                 uint32_t proximity_domain,
1864                 const struct kfd_local_mem_info *local_mem_info)
1865 {
1866         *avail_size -= sizeof(struct crat_subtype_memory);
1867         if (*avail_size < 0)
1868                 return -ENOMEM;
1869
1870         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1871         sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1872         sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1873         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
1874
1875         sub_type_hdr->proximity_domain = proximity_domain;
1876
1877         pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",
1878                         type, size);
1879
1880         sub_type_hdr->length_low = lower_32_bits(size);
1881         sub_type_hdr->length_high = upper_32_bits(size);
1882
1883         sub_type_hdr->width = local_mem_info->vram_width;
1884         sub_type_hdr->visibility_type = type;
1885
1886         return 0;
1887 }
1888
1889 #ifdef CONFIG_ACPI_NUMA
1890 static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
1891 {
1892         struct acpi_table_header *table_header = NULL;
1893         struct acpi_subtable_header *sub_header = NULL;
1894         unsigned long table_end, subtable_len;
1895         u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
1896                         pci_dev_id(kdev->adev->pdev);
1897         u32 bdf;
1898         acpi_status status;
1899         struct acpi_srat_cpu_affinity *cpu;
1900         struct acpi_srat_generic_affinity *gpu;
1901         int pxm = 0, max_pxm = 0;
1902         int numa_node = NUMA_NO_NODE;
1903         bool found = false;
1904
1905         /* Fetch the SRAT table from ACPI */
1906         status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
1907         if (status == AE_NOT_FOUND) {
1908                 pr_warn("SRAT table not found\n");
1909                 return;
1910         } else if (ACPI_FAILURE(status)) {
1911                 const char *err = acpi_format_exception(status);
1912                 pr_err("SRAT table error: %s\n", err);
1913                 return;
1914         }
1915
1916         table_end = (unsigned long)table_header + table_header->length;
1917
1918         /* Parse all entries looking for a match. */
1919         sub_header = (struct acpi_subtable_header *)
1920                         ((unsigned long)table_header +
1921                         sizeof(struct acpi_table_srat));
1922         subtable_len = sub_header->length;
1923
1924         while (((unsigned long)sub_header) + subtable_len  < table_end) {
1925                 /*
1926                  * If length is 0, break from this loop to avoid
1927                  * infinite loop.
1928                  */
1929                 if (subtable_len == 0) {
1930                         pr_err("SRAT invalid zero length\n");
1931                         break;
1932                 }
1933
1934                 switch (sub_header->type) {
1935                 case ACPI_SRAT_TYPE_CPU_AFFINITY:
1936                         cpu = (struct acpi_srat_cpu_affinity *)sub_header;
1937                         pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
1938                                         cpu->proximity_domain_lo;
1939                         if (pxm > max_pxm)
1940                                 max_pxm = pxm;
1941                         break;
1942                 case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
1943                         gpu = (struct acpi_srat_generic_affinity *)sub_header;
1944                         bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
1945                                         *((u16 *)(&gpu->device_handle[2]));
1946                         if (bdf == pci_id) {
1947                                 found = true;
1948                                 numa_node = pxm_to_node(gpu->proximity_domain);
1949                         }
1950                         break;
1951                 default:
1952                         break;
1953                 }
1954
1955                 if (found)
1956                         break;
1957
1958                 sub_header = (struct acpi_subtable_header *)
1959                                 ((unsigned long)sub_header + subtable_len);
1960                 subtable_len = sub_header->length;
1961         }
1962
1963         acpi_put_table(table_header);
1964
1965         /* Workaround bad cpu-gpu binding case */
1966         if (found && (numa_node < 0 ||
1967                         numa_node > pxm_to_node(max_pxm)))
1968                 numa_node = 0;
1969
1970         if (numa_node != NUMA_NO_NODE)
1971                 set_dev_node(&kdev->adev->pdev->dev, numa_node);
1972 }
1973 #endif
1974
1975 #define KFD_CRAT_INTRA_SOCKET_WEIGHT    13
1976 #define KFD_CRAT_XGMI_WEIGHT            15
1977
1978 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
1979  * to its NUMA node
1980  *      @avail_size: Available size in the memory
1981  *      @kdev - [IN] GPU device
1982  *      @sub_type_hdr: Memory into which io link info will be filled in
1983  *      @proximity_domain - proximity domain of the GPU node
1984  *
1985  *      Return 0 if successful else return -ve value
1986  */
1987 static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
1988                         struct kfd_node *kdev,
1989                         struct crat_subtype_iolink *sub_type_hdr,
1990                         uint32_t proximity_domain)
1991 {
1992         *avail_size -= sizeof(struct crat_subtype_iolink);
1993         if (*avail_size < 0)
1994                 return -ENOMEM;
1995
1996         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1997
1998         /* Fill in subtype header data */
1999         sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
2000         sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
2001         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
2002         if (kfd_dev_is_large_bar(kdev))
2003                 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2004
2005         /* Fill in IOLINK subtype.
2006          * TODO: Fill-in other fields of iolink subtype
2007          */
2008         if (kdev->adev->gmc.xgmi.connected_to_cpu ||
2009             (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
2010              kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
2011              AMDGPU_PKG_TYPE_APU)) {
2012                 bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
2013                 int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
2014                                                         KFD_CRAT_INTRA_SOCKET_WEIGHT;
2015                 uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
2016                                                         kdev->adev, NULL, true) : mem_bw;
2017
2018                 /*
2019                  * with host gpu xgmi link, host can access gpu memory whether
2020                  * or not pcie bar type is large, so always create bidirectional
2021                  * io link.
2022                  */
2023                 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2024                 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2025                 sub_type_hdr->weight_xgmi = weight;
2026                 sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
2027                 sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
2028         } else {
2029                 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
2030                 sub_type_hdr->minimum_bandwidth_mbs =
2031                                 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
2032                 sub_type_hdr->maximum_bandwidth_mbs =
2033                                 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
2034         }
2035
2036         sub_type_hdr->proximity_domain_from = proximity_domain;
2037
2038 #ifdef CONFIG_ACPI_NUMA
2039         if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&
2040             num_possible_nodes() > 1)
2041                 kfd_find_numa_node_in_srat(kdev);
2042 #endif
2043 #ifdef CONFIG_NUMA
2044         if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
2045                 sub_type_hdr->proximity_domain_to = 0;
2046         else
2047                 sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;
2048 #else
2049         sub_type_hdr->proximity_domain_to = 0;
2050 #endif
2051         return 0;
2052 }
2053
2054 static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
2055                         struct kfd_node *kdev,
2056                         struct kfd_node *peer_kdev,
2057                         struct crat_subtype_iolink *sub_type_hdr,
2058                         uint32_t proximity_domain_from,
2059                         uint32_t proximity_domain_to)
2060 {
2061         bool use_ta_info = kdev->kfd->num_nodes == 1;
2062
2063         *avail_size -= sizeof(struct crat_subtype_iolink);
2064         if (*avail_size < 0)
2065                 return -ENOMEM;
2066
2067         memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
2068
2069         sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
2070         sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
2071         sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
2072                                CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2073
2074         sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2075         sub_type_hdr->proximity_domain_from = proximity_domain_from;
2076         sub_type_hdr->proximity_domain_to = proximity_domain_to;
2077
2078         if (use_ta_info) {
2079                 sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
2080                         amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
2081                 sub_type_hdr->maximum_bandwidth_mbs =
2082                         amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
2083                                                         peer_kdev->adev, false);
2084                 sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
2085                         amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
2086         } else {
2087                 bool is_single_hop = kdev->kfd == peer_kdev->kfd;
2088                 int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
2089                         (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
2090                 int mem_bw = 819200;
2091
2092                 sub_type_hdr->weight_xgmi = weight;
2093                 sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2094                 sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2095         }
2096
2097         return 0;
2098 }
2099
2100 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2101  *
2102  *      @pcrat_image: Fill in VCRAT for GPU
2103  *      @size:  [IN] allocated size of crat_image.
2104  *              [OUT] actual size of data filled in crat_image
2105  */
2106 static int kfd_create_vcrat_image_gpu(void *pcrat_image,
2107                                       size_t *size, struct kfd_node *kdev,
2108                                       uint32_t proximity_domain)
2109 {
2110         struct crat_header *crat_table = (struct crat_header *)pcrat_image;
2111         struct crat_subtype_generic *sub_type_hdr;
2112         struct kfd_local_mem_info local_mem_info;
2113         struct kfd_topology_device *peer_dev;
2114         struct crat_subtype_computeunit *cu;
2115         struct kfd_cu_info cu_info;
2116         int avail_size = *size;
2117         uint32_t total_num_of_cu;
2118         uint32_t nid = 0;
2119         int ret = 0;
2120
2121         if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
2122                 return -EINVAL;
2123
2124         /* Fill the CRAT Header.
2125          * Modify length and total_entries as subunits are added.
2126          */
2127         avail_size -= sizeof(struct crat_header);
2128         if (avail_size < 0)
2129                 return -ENOMEM;
2130
2131         memset(crat_table, 0, sizeof(struct crat_header));
2132
2133         memcpy(&crat_table->signature, CRAT_SIGNATURE,
2134                         sizeof(crat_table->signature));
2135         /* Change length as we add more subtypes*/
2136         crat_table->length = sizeof(struct crat_header);
2137         crat_table->num_domains = 1;
2138         crat_table->total_entries = 0;
2139
2140         /* Fill in Subtype: Compute Unit
2141          * First fill in the sub type header and then sub type data
2142          */
2143         avail_size -= sizeof(struct crat_subtype_computeunit);
2144         if (avail_size < 0)
2145                 return -ENOMEM;
2146
2147         sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
2148         memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
2149
2150         sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
2151         sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
2152         sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
2153
2154         /* Fill CU subtype data */
2155         cu = (struct crat_subtype_computeunit *)sub_type_hdr;
2156         cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
2157         cu->proximity_domain = proximity_domain;
2158
2159         amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
2160         cu->num_simd_per_cu = cu_info.simd_per_cu;
2161         cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
2162         cu->max_waves_simd = cu_info.max_waves_per_simd;
2163
2164         cu->wave_front_size = cu_info.wave_front_size;
2165         cu->array_count = cu_info.num_shader_arrays_per_engine *
2166                 cu_info.num_shader_engines;
2167         total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
2168         cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
2169         cu->num_cu_per_array = cu_info.num_cu_per_sh;
2170         cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
2171         cu->num_banks = cu_info.num_shader_engines;
2172         cu->lds_size_in_kb = cu_info.lds_size;
2173
2174         cu->hsa_capability = 0;
2175
2176         /* Check if this node supports IOMMU. During parsing this flag will
2177          * translate to HSA_CAP_ATS_PRESENT
2178          */
2179         if (!kfd_iommu_check_device(kdev->kfd))
2180                 cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
2181
2182         crat_table->length += sub_type_hdr->length;
2183         crat_table->total_entries++;
2184
2185         /* Fill in Subtype: Memory. Only on systems with large BAR (no
2186          * private FB), report memory as public. On other systems
2187          * report the total FB size (public+private) as a single
2188          * private heap.
2189          */
2190         local_mem_info = kdev->local_mem_info;
2191         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2192                         sub_type_hdr->length);
2193
2194         if (debug_largebar)
2195                 local_mem_info.local_mem_size_private = 0;
2196
2197         if (local_mem_info.local_mem_size_private == 0)
2198                 ret = kfd_fill_gpu_memory_affinity(&avail_size,
2199                                 kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
2200                                 local_mem_info.local_mem_size_public,
2201                                 (struct crat_subtype_memory *)sub_type_hdr,
2202                                 proximity_domain,
2203                                 &local_mem_info);
2204         else
2205                 ret = kfd_fill_gpu_memory_affinity(&avail_size,
2206                                 kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE,
2207                                 local_mem_info.local_mem_size_public +
2208                                 local_mem_info.local_mem_size_private,
2209                                 (struct crat_subtype_memory *)sub_type_hdr,
2210                                 proximity_domain,
2211                                 &local_mem_info);
2212         if (ret < 0)
2213                 return ret;
2214
2215         crat_table->length += sizeof(struct crat_subtype_memory);
2216         crat_table->total_entries++;
2217
2218         /* Fill in Subtype: IO_LINKS
2219          *  Only direct links are added here which is Link from GPU to
2220          *  its NUMA node. Indirect links are added by userspace.
2221          */
2222         sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2223                 sub_type_hdr->length);
2224         ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
2225                 (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
2226
2227         if (ret < 0)
2228                 return ret;
2229
2230         crat_table->length += sub_type_hdr->length;
2231         crat_table->total_entries++;
2232
2233
2234         /* Fill in Subtype: IO_LINKS
2235          * Direct links from GPU to other GPUs through xGMI.
2236          * We will loop GPUs that already be processed (with lower value
2237          * of proximity_domain), add the link for the GPUs with same
2238          * hive id (from this GPU to other GPU) . The reversed iolink
2239          * (from other GPU to this GPU) will be added
2240          * in kfd_parse_subtype_iolink.
2241          */
2242         if (kdev->kfd->hive_id) {
2243                 for (nid = 0; nid < proximity_domain; ++nid) {
2244                         peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
2245                         if (!peer_dev->gpu)
2246                                 continue;
2247                         if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
2248                                 continue;
2249                         sub_type_hdr = (typeof(sub_type_hdr))(
2250                                 (char *)sub_type_hdr +
2251                                 sizeof(struct crat_subtype_iolink));
2252                         ret = kfd_fill_gpu_xgmi_link_to_gpu(
2253                                 &avail_size, kdev, peer_dev->gpu,
2254                                 (struct crat_subtype_iolink *)sub_type_hdr,
2255                                 proximity_domain, nid);
2256                         if (ret < 0)
2257                                 return ret;
2258                         crat_table->length += sub_type_hdr->length;
2259                         crat_table->total_entries++;
2260                 }
2261         }
2262         *size = crat_table->length;
2263         pr_info("Virtual CRAT table created for GPU\n");
2264
2265         return ret;
2266 }
2267
2268 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2269  *              creates a Virtual CRAT (VCRAT) image
2270  *
2271  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
2272  *
2273  *      @crat_image: VCRAT image created because ACPI does not have a
2274  *                   CRAT for this device
2275  *      @size: [OUT] size of virtual crat_image
2276  *      @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2277  *              COMPUTE_UNIT_GPU - Create VCRAT for GPU
2278  *              (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2279  *                      -- this option is not currently implemented.
2280  *                      The assumption is that all AMD APUs will have CRAT
2281  *      @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
2282  *
2283  *      Return 0 if successful else return -ve value
2284  */
2285 int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
2286                                   int flags, struct kfd_node *kdev,
2287                                   uint32_t proximity_domain)
2288 {
2289         void *pcrat_image = NULL;
2290         int ret = 0, num_nodes;
2291         size_t dyn_size;
2292
2293         if (!crat_image)
2294                 return -EINVAL;
2295
2296         *crat_image = NULL;
2297
2298         /* Allocate the CPU Virtual CRAT size based on the number of online
2299          * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.
2300          * This should cover all the current conditions. A check is put not
2301          * to overwrite beyond allocated size for GPUs
2302          */
2303         switch (flags) {
2304         case COMPUTE_UNIT_CPU:
2305                 num_nodes = num_online_nodes();
2306                 dyn_size = sizeof(struct crat_header) +
2307                         num_nodes * (sizeof(struct crat_subtype_computeunit) +
2308                         sizeof(struct crat_subtype_memory) +
2309                         (num_nodes - 1) * sizeof(struct crat_subtype_iolink));
2310                 pcrat_image = kvmalloc(dyn_size, GFP_KERNEL);
2311                 if (!pcrat_image)
2312                         return -ENOMEM;
2313                 *size = dyn_size;
2314                 pr_debug("CRAT size is %ld", dyn_size);
2315                 ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
2316                 break;
2317         case COMPUTE_UNIT_GPU:
2318                 if (!kdev)
2319                         return -EINVAL;
2320                 pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
2321                 if (!pcrat_image)
2322                         return -ENOMEM;
2323                 *size = VCRAT_SIZE_FOR_GPU;
2324                 ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev,
2325                                                  proximity_domain);
2326                 break;
2327         case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
2328                 /* TODO: */
2329                 ret = -EINVAL;
2330                 pr_err("VCRAT not implemented for APU\n");
2331                 break;
2332         default:
2333                 ret = -EINVAL;
2334         }
2335
2336         if (!ret)
2337                 *crat_image = pcrat_image;
2338         else
2339                 kvfree(pcrat_image);
2340
2341         return ret;
2342 }
2343
2344
2345 /* kfd_destroy_crat_image
2346  *
2347  *      @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
2348  *
2349  */
2350 void kfd_destroy_crat_image(void *crat_image)
2351 {
2352         kvfree(crat_image);
2353 }