drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

   1 /*
   2  * Copyright 2008 Advanced Micro Devices, Inc.
   3  * Copyright 2008 Red Hat Inc.
   4  * Copyright 2009 Jerome Glisse.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors: Dave Airlie
  25  *          Alex Deucher
  26  *          Jerome Glisse
  27  */
  28 #include <linux/power_supply.h>
  29 #include <linux/kthread.h>
  30 #include <linux/module.h>
  31 #include <linux/console.h>
  32 #include <linux/slab.h>
  33 #include <linux/iommu.h>
  34 #include <linux/pci.h>
  35 #include <linux/devcoredump.h>
  36 #include <generated/utsrelease.h>
  37 #include <linux/pci-p2pdma.h>
  38 #include <linux/apple-gmux.h>
  39
  40 #include <drm/drm_aperture.h>
  41 #include <drm/drm_atomic_helper.h>
  42 #include <drm/drm_crtc_helper.h>
  43 #include <drm/drm_fb_helper.h>
  44 #include <drm/drm_probe_helper.h>
  45 #include <drm/amdgpu_drm.h>
  46 #include <linux/vgaarb.h>
  47 #include <linux/vga_switcheroo.h>
  48 #include <linux/efi.h>
  49 #include "amdgpu.h"
  50 #include "amdgpu_trace.h"
  51 #include "amdgpu_i2c.h"
  52 #include "atom.h"
  53 #include "amdgpu_atombios.h"
  54 #include "amdgpu_atomfirmware.h"
  55 #include "amd_pcie.h"
  56 #ifdef CONFIG_DRM_AMDGPU_SI
  57 #include "si.h"
  58 #endif
  59 #ifdef CONFIG_DRM_AMDGPU_CIK
  60 #include "cik.h"
  61 #endif
  62 #include "vi.h"
  63 #include "soc15.h"
  64 #include "nv.h"
  65 #include "bif/bif_4_1_d.h"
  66 #include <linux/firmware.h>
  67 #include "amdgpu_vf_error.h"
  68
  69 #include "amdgpu_amdkfd.h"
  70 #include "amdgpu_pm.h"
  71
  72 #include "amdgpu_xgmi.h"
  73 #include "amdgpu_ras.h"
  74 #include "amdgpu_pmu.h"
  75 #include "amdgpu_fru_eeprom.h"
  76 #include "amdgpu_reset.h"
  77
  78 #include <linux/suspend.h>
  79 #include <drm/task_barrier.h>
  80 #include <linux/pm_runtime.h>
  81
  82 #include <drm/drm_drv.h>
  83
  84 #if IS_ENABLED(CONFIG_X86)
  85 #include <asm/intel-family.h>
  86 #endif
  87
  88 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  89 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  90 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  91 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  92 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  93 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  94 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  95
  96 #define AMDGPU_RESUME_MS                2000
  97 #define AMDGPU_MAX_RETRY_LIMIT          2
  98 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
  99
 100 static const struct drm_driver amdgpu_kms_driver;
 101
 102 const char *amdgpu_asic_name[] = {
 103         "TAHITI",
 104         "PITCAIRN",
 105         "VERDE",
 106         "OLAND",
 107         "HAINAN",
 108         "BONAIRE",
 109         "KAVERI",
 110         "KABINI",
 111         "HAWAII",
 112         "MULLINS",
 113         "TOPAZ",
 114         "TONGA",
 115         "FIJI",
 116         "CARRIZO",
 117         "STONEY",
 118         "POLARIS10",
 119         "POLARIS11",
 120         "POLARIS12",
 121         "VEGAM",
 122         "VEGA10",
 123         "VEGA12",
 124         "VEGA20",
 125         "RAVEN",
 126         "ARCTURUS",
 127         "RENOIR",
 128         "ALDEBARAN",
 129         "NAVI10",
 130         "CYAN_SKILLFISH",
 131         "NAVI14",
 132         "NAVI12",
 133         "SIENNA_CICHLID",
 134         "NAVY_FLOUNDER",
 135         "VANGOGH",
 136         "DIMGREY_CAVEFISH",
 137         "BEIGE_GOBY",
 138         "YELLOW_CARP",
 139         "IP DISCOVERY",
 140         "LAST",
 141 };
 142
 143 /**
 144  * DOC: pcie_replay_count
 145  *
 146  * The amdgpu driver provides a sysfs API for reporting the total number
 147  * of PCIe replays (NAKs)
 148  * The file pcie_replay_count is used for this and returns the total
 149  * number of replays as a sum of the NAKs generated and NAKs received
 150  */
 151
 152 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 153                 struct device_attribute *attr, char *buf)
 154 {
 155         struct drm_device *ddev = dev_get_drvdata(dev);
 156         struct amdgpu_device *adev = drm_to_adev(ddev);
 157         uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 158
 159         return sysfs_emit(buf, "%llu\n", cnt);
 160 }
 161
 162 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
 163                 amdgpu_device_get_pcie_replay_count, NULL);
 164
 165 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 166
 167 /**
 168  * DOC: product_name
 169  *
 170  * The amdgpu driver provides a sysfs API for reporting the product name
 171  * for the device
 172  * The file product_name is used for this and returns the product name
 173  * as returned from the FRU.
 174  * NOTE: This is only available for certain server cards
 175  */
 176
 177 static ssize_t amdgpu_device_get_product_name(struct device *dev,
 178                 struct device_attribute *attr, char *buf)
 179 {
 180         struct drm_device *ddev = dev_get_drvdata(dev);
 181         struct amdgpu_device *adev = drm_to_adev(ddev);
 182
 183         return sysfs_emit(buf, "%s\n", adev->product_name);
 184 }
 185
 186 static DEVICE_ATTR(product_name, S_IRUGO,
 187                 amdgpu_device_get_product_name, NULL);
 188
 189 /**
 190  * DOC: product_number
 191  *
 192  * The amdgpu driver provides a sysfs API for reporting the part number
 193  * for the device
 194  * The file product_number is used for this and returns the part number
 195  * as returned from the FRU.
 196  * NOTE: This is only available for certain server cards
 197  */
 198
 199 static ssize_t amdgpu_device_get_product_number(struct device *dev,
 200                 struct device_attribute *attr, char *buf)
 201 {
 202         struct drm_device *ddev = dev_get_drvdata(dev);
 203         struct amdgpu_device *adev = drm_to_adev(ddev);
 204
 205         return sysfs_emit(buf, "%s\n", adev->product_number);
 206 }
 207
 208 static DEVICE_ATTR(product_number, S_IRUGO,
 209                 amdgpu_device_get_product_number, NULL);
 210
 211 /**
 212  * DOC: serial_number
 213  *
 214  * The amdgpu driver provides a sysfs API for reporting the serial number
 215  * for the device
 216  * The file serial_number is used for this and returns the serial number
 217  * as returned from the FRU.
 218  * NOTE: This is only available for certain server cards
 219  */
 220
 221 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
 222                 struct device_attribute *attr, char *buf)
 223 {
 224         struct drm_device *ddev = dev_get_drvdata(dev);
 225         struct amdgpu_device *adev = drm_to_adev(ddev);
 226
 227         return sysfs_emit(buf, "%s\n", adev->serial);
 228 }
 229
 230 static DEVICE_ATTR(serial_number, S_IRUGO,
 231                 amdgpu_device_get_serial_number, NULL);
 232
 233 /**
 234  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
 235  *
 236  * @dev: drm_device pointer
 237  *
 238  * Returns true if the device is a dGPU with ATPX power control,
 239  * otherwise return false.
 240  */
 241 bool amdgpu_device_supports_px(struct drm_device *dev)
 242 {
 243         struct amdgpu_device *adev = drm_to_adev(dev);
 244
 245         if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
 246                 return true;
 247         return false;
 248 }
 249
 250 /**
 251  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
 252  *
 253  * @dev: drm_device pointer
 254  *
 255  * Returns true if the device is a dGPU with ACPI power control,
 256  * otherwise return false.
 257  */
 258 bool amdgpu_device_supports_boco(struct drm_device *dev)
 259 {
 260         struct amdgpu_device *adev = drm_to_adev(dev);
 261
 262         if (adev->has_pr3 ||
 263             ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
 264                 return true;
 265         return false;
 266 }
 267
 268 /**
 269  * amdgpu_device_supports_baco - Does the device support BACO
 270  *
 271  * @dev: drm_device pointer
 272  *
 273  * Returns true if the device supporte BACO,
 274  * otherwise return false.
 275  */
 276 bool amdgpu_device_supports_baco(struct drm_device *dev)
 277 {
 278         struct amdgpu_device *adev = drm_to_adev(dev);
 279
 280         return amdgpu_asic_supports_baco(adev);
 281 }
 282
 283 /**
 284  * amdgpu_device_supports_smart_shift - Is the device dGPU with
 285  * smart shift support
 286  *
 287  * @dev: drm_device pointer
 288  *
 289  * Returns true if the device is a dGPU with Smart Shift support,
 290  * otherwise returns false.
 291  */
 292 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
 293 {
 294         return (amdgpu_device_supports_boco(dev) &&
 295                 amdgpu_acpi_is_power_shift_control_supported());
 296 }
 297
 298 /*
 299  * VRAM access helper functions
 300  */
 301
 302 /**
 303  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
 304  *
 305  * @adev: amdgpu_device pointer
 306  * @pos: offset of the buffer in vram
 307  * @buf: virtual address of the buffer in system memory
 308  * @size: read/write size, sizeof(@buf) must > @size
 309  * @write: true - write to vram, otherwise - read from vram
 310  */
 311 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
 312                              void *buf, size_t size, bool write)
 313 {
 314         unsigned long flags;
 315         uint32_t hi = ~0, tmp = 0;
 316         uint32_t *data = buf;
 317         uint64_t last;
 318         int idx;
 319
 320         if (!drm_dev_enter(adev_to_drm(adev), &idx))
 321                 return;
 322
 323         BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
 324
 325         spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 326         for (last = pos + size; pos < last; pos += 4) {
 327                 tmp = pos >> 31;
 328
 329                 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
 330                 if (tmp != hi) {
 331                         WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
 332                         hi = tmp;
 333                 }
 334                 if (write)
 335                         WREG32_NO_KIQ(mmMM_DATA, *data++);
 336                 else
 337                         *data++ = RREG32_NO_KIQ(mmMM_DATA);
 338         }
 339
 340         spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 341         drm_dev_exit(idx);
 342 }
 343
 344 /**
 345  * amdgpu_device_aper_access - access vram by vram aperature
 346  *
 347  * @adev: amdgpu_device pointer
 348  * @pos: offset of the buffer in vram
 349  * @buf: virtual address of the buffer in system memory
 350  * @size: read/write size, sizeof(@buf) must > @size
 351  * @write: true - write to vram, otherwise - read from vram
 352  *
 353  * The return value means how many bytes have been transferred.
 354  */
 355 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
 356                                  void *buf, size_t size, bool write)
 357 {
 358 #ifdef CONFIG_64BIT
 359         void __iomem *addr;
 360         size_t count = 0;
 361         uint64_t last;
 362
 363         if (!adev->mman.aper_base_kaddr)
 364                 return 0;
 365
 366         last = min(pos + size, adev->gmc.visible_vram_size);
 367         if (last > pos) {
 368                 addr = adev->mman.aper_base_kaddr + pos;
 369                 count = last - pos;
 370
 371                 if (write) {
 372                         memcpy_toio(addr, buf, count);
 373                         mb();
 374                         amdgpu_device_flush_hdp(adev, NULL);
 375                 } else {
 376                         amdgpu_device_invalidate_hdp(adev, NULL);
 377                         mb();
 378                         memcpy_fromio(buf, addr, count);
 379                 }
 380
 381         }
 382
 383         return count;
 384 #else
 385         return 0;
 386 #endif
 387 }
 388
 389 /**
 390  * amdgpu_device_vram_access - read/write a buffer in vram
 391  *
 392  * @adev: amdgpu_device pointer
 393  * @pos: offset of the buffer in vram
 394  * @buf: virtual address of the buffer in system memory
 395  * @size: read/write size, sizeof(@buf) must > @size
 396  * @write: true - write to vram, otherwise - read from vram
 397  */
 398 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 399                                void *buf, size_t size, bool write)
 400 {
 401         size_t count;
 402
 403         /* try to using vram apreature to access vram first */
 404         count = amdgpu_device_aper_access(adev, pos, buf, size, write);
 405         size -= count;
 406         if (size) {
 407                 /* using MM to access rest vram */
 408                 pos += count;
 409                 buf += count;
 410                 amdgpu_device_mm_access(adev, pos, buf, size, write);
 411         }
 412 }
 413
 414 /*
 415  * register access helper functions.
 416  */
 417
 418 /* Check if hw access should be skipped because of hotplug or device error */
 419 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
 420 {
 421         if (adev->no_hw_access)
 422                 return true;
 423
 424 #ifdef CONFIG_LOCKDEP
 425         /*
 426          * This is a bit complicated to understand, so worth a comment. What we assert
 427          * here is that the GPU reset is not running on another thread in parallel.
 428          *
 429          * For this we trylock the read side of the reset semaphore, if that succeeds
 430          * we know that the reset is not running in paralell.
 431          *
 432          * If the trylock fails we assert that we are either already holding the read
 433          * side of the lock or are the reset thread itself and hold the write side of
 434          * the lock.
 435          */
 436         if (in_task()) {
 437                 if (down_read_trylock(&adev->reset_domain->sem))
 438                         up_read(&adev->reset_domain->sem);
 439                 else
 440                         lockdep_assert_held(&adev->reset_domain->sem);
 441         }
 442 #endif
 443         return false;
 444 }
 445
 446 /**
 447  * amdgpu_device_rreg - read a memory mapped IO or indirect register
 448  *
 449  * @adev: amdgpu_device pointer
 450  * @reg: dword aligned register offset
 451  * @acc_flags: access flags which require special behavior
 452  *
 453  * Returns the 32 bit value from the offset specified.
 454  */
 455 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 456                             uint32_t reg, uint32_t acc_flags)
 457 {
 458         uint32_t ret;
 459
 460         if (amdgpu_device_skip_hw_access(adev))
 461                 return 0;
 462
 463         if ((reg * 4) < adev->rmmio_size) {
 464                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 465                     amdgpu_sriov_runtime(adev) &&
 466                     down_read_trylock(&adev->reset_domain->sem)) {
 467                         ret = amdgpu_kiq_rreg(adev, reg);
 468                         up_read(&adev->reset_domain->sem);
 469                 } else {
 470                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 471                 }
 472         } else {
 473                 ret = adev->pcie_rreg(adev, reg * 4);
 474         }
 475
 476         trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
 477
 478         return ret;
 479 }
 480
 481 /*
 482  * MMIO register read with bytes helper functions
 483  * @offset:bytes offset from MMIO start
 484  *
 485 */
 486
 487 /**
 488  * amdgpu_mm_rreg8 - read a memory mapped IO register
 489  *
 490  * @adev: amdgpu_device pointer
 491  * @offset: byte aligned register offset
 492  *
 493  * Returns the 8 bit value from the offset specified.
 494  */
 495 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 496 {
 497         if (amdgpu_device_skip_hw_access(adev))
 498                 return 0;
 499
 500         if (offset < adev->rmmio_size)
 501                 return (readb(adev->rmmio + offset));
 502         BUG();
 503 }
 504
 505 /*
 506  * MMIO register write with bytes helper functions
 507  * @offset:bytes offset from MMIO start
 508  * @value: the value want to be written to the register
 509  *
 510 */
 511 /**
 512  * amdgpu_mm_wreg8 - read a memory mapped IO register
 513  *
 514  * @adev: amdgpu_device pointer
 515  * @offset: byte aligned register offset
 516  * @value: 8 bit value to write
 517  *
 518  * Writes the value specified to the offset specified.
 519  */
 520 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 521 {
 522         if (amdgpu_device_skip_hw_access(adev))
 523                 return;
 524
 525         if (offset < adev->rmmio_size)
 526                 writeb(value, adev->rmmio + offset);
 527         else
 528                 BUG();
 529 }
 530
 531 /**
 532  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
 533  *
 534  * @adev: amdgpu_device pointer
 535  * @reg: dword aligned register offset
 536  * @v: 32 bit value to write to the register
 537  * @acc_flags: access flags which require special behavior
 538  *
 539  * Writes the value specified to the offset specified.
 540  */
 541 void amdgpu_device_wreg(struct amdgpu_device *adev,
 542                         uint32_t reg, uint32_t v,
 543                         uint32_t acc_flags)
 544 {
 545         if (amdgpu_device_skip_hw_access(adev))
 546                 return;
 547
 548         if ((reg * 4) < adev->rmmio_size) {
 549                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 550                     amdgpu_sriov_runtime(adev) &&
 551                     down_read_trylock(&adev->reset_domain->sem)) {
 552                         amdgpu_kiq_wreg(adev, reg, v);
 553                         up_read(&adev->reset_domain->sem);
 554                 } else {
 555                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 556                 }
 557         } else {
 558                 adev->pcie_wreg(adev, reg * 4, v);
 559         }
 560
 561         trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
 562 }
 563
 564 /**
 565  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
 566  *
 567  * @adev: amdgpu_device pointer
 568  * @reg: mmio/rlc register
 569  * @v: value to write
 570  *
 571  * this function is invoked only for the debugfs register access
 572  */
 573 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 574                              uint32_t reg, uint32_t v)
 575 {
 576         if (amdgpu_device_skip_hw_access(adev))
 577                 return;
 578
 579         if (amdgpu_sriov_fullaccess(adev) &&
 580             adev->gfx.rlc.funcs &&
 581             adev->gfx.rlc.funcs->is_rlcg_access_range) {
 582                 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
 583                         return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
 584         } else if ((reg * 4) >= adev->rmmio_size) {
 585                 adev->pcie_wreg(adev, reg * 4, v);
 586         } else {
 587                 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 588         }
 589 }
 590
 591 /**
 592  * amdgpu_mm_rdoorbell - read a doorbell dword
 593  *
 594  * @adev: amdgpu_device pointer
 595  * @index: doorbell index
 596  *
 597  * Returns the value in the doorbell aperture at the
 598  * requested doorbell index (CIK).
 599  */
 600 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
 601 {
 602         if (amdgpu_device_skip_hw_access(adev))
 603                 return 0;
 604
 605         if (index < adev->doorbell.num_doorbells) {
 606                 return readl(adev->doorbell.ptr + index);
 607         } else {
 608                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 609                 return 0;
 610         }
 611 }
 612
 613 /**
 614  * amdgpu_mm_wdoorbell - write a doorbell dword
 615  *
 616  * @adev: amdgpu_device pointer
 617  * @index: doorbell index
 618  * @v: value to write
 619  *
 620  * Writes @v to the doorbell aperture at the
 621  * requested doorbell index (CIK).
 622  */
 623 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 624 {
 625         if (amdgpu_device_skip_hw_access(adev))
 626                 return;
 627
 628         if (index < adev->doorbell.num_doorbells) {
 629                 writel(v, adev->doorbell.ptr + index);
 630         } else {
 631                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 632         }
 633 }
 634
 635 /**
 636  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
 637  *
 638  * @adev: amdgpu_device pointer
 639  * @index: doorbell index
 640  *
 641  * Returns the value in the doorbell aperture at the
 642  * requested doorbell index (VEGA10+).
 643  */
 644 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
 645 {
 646         if (amdgpu_device_skip_hw_access(adev))
 647                 return 0;
 648
 649         if (index < adev->doorbell.num_doorbells) {
 650                 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
 651         } else {
 652                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 653                 return 0;
 654         }
 655 }
 656
 657 /**
 658  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
 659  *
 660  * @adev: amdgpu_device pointer
 661  * @index: doorbell index
 662  * @v: value to write
 663  *
 664  * Writes @v to the doorbell aperture at the
 665  * requested doorbell index (VEGA10+).
 666  */
 667 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
 668 {
 669         if (amdgpu_device_skip_hw_access(adev))
 670                 return;
 671
 672         if (index < adev->doorbell.num_doorbells) {
 673                 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
 674         } else {
 675                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 676         }
 677 }
 678
 679 /**
 680  * amdgpu_device_indirect_rreg - read an indirect register
 681  *
 682  * @adev: amdgpu_device pointer
 683  * @reg_addr: indirect register address to read from
 684  *
 685  * Returns the value of indirect register @reg_addr
 686  */
 687 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 688                                 u32 reg_addr)
 689 {
 690         unsigned long flags, pcie_index, pcie_data;
 691         void __iomem *pcie_index_offset;
 692         void __iomem *pcie_data_offset;
 693         u32 r;
 694
 695         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 696         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 697
 698         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 699         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 700         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 701
 702         writel(reg_addr, pcie_index_offset);
 703         readl(pcie_index_offset);
 704         r = readl(pcie_data_offset);
 705         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 706
 707         return r;
 708 }
 709
 710 /**
 711  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
 712  *
 713  * @adev: amdgpu_device pointer
 714  * @reg_addr: indirect register address to read from
 715  *
 716  * Returns the value of indirect register @reg_addr
 717  */
 718 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
 719                                   u32 reg_addr)
 720 {
 721         unsigned long flags, pcie_index, pcie_data;
 722         void __iomem *pcie_index_offset;
 723         void __iomem *pcie_data_offset;
 724         u64 r;
 725
 726         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 727         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 728
 729         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 730         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 731         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 732
 733         /* read low 32 bits */
 734         writel(reg_addr, pcie_index_offset);
 735         readl(pcie_index_offset);
 736         r = readl(pcie_data_offset);
 737         /* read high 32 bits */
 738         writel(reg_addr + 4, pcie_index_offset);
 739         readl(pcie_index_offset);
 740         r |= ((u64)readl(pcie_data_offset) << 32);
 741         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 742
 743         return r;
 744 }
 745
 746 /**
 747  * amdgpu_device_indirect_wreg - write an indirect register address
 748  *
 749  * @adev: amdgpu_device pointer
 750  * @pcie_index: mmio register offset
 751  * @pcie_data: mmio register offset
 752  * @reg_addr: indirect register offset
 753  * @reg_data: indirect register data
 754  *
 755  */
 756 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
 757                                  u32 reg_addr, u32 reg_data)
 758 {
 759         unsigned long flags, pcie_index, pcie_data;
 760         void __iomem *pcie_index_offset;
 761         void __iomem *pcie_data_offset;
 762
 763         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 764         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 765
 766         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 767         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 768         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 769
 770         writel(reg_addr, pcie_index_offset);
 771         readl(pcie_index_offset);
 772         writel(reg_data, pcie_data_offset);
 773         readl(pcie_data_offset);
 774         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 775 }
 776
 777 /**
 778  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
 779  *
 780  * @adev: amdgpu_device pointer
 781  * @pcie_index: mmio register offset
 782  * @pcie_data: mmio register offset
 783  * @reg_addr: indirect register offset
 784  * @reg_data: indirect register data
 785  *
 786  */
 787 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
 788                                    u32 reg_addr, u64 reg_data)
 789 {
 790         unsigned long flags, pcie_index, pcie_data;
 791         void __iomem *pcie_index_offset;
 792         void __iomem *pcie_data_offset;
 793
 794         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 795         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 796
 797         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 798         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 799         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 800
 801         /* write low 32 bits */
 802         writel(reg_addr, pcie_index_offset);
 803         readl(pcie_index_offset);
 804         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
 805         readl(pcie_data_offset);
 806         /* write high 32 bits */
 807         writel(reg_addr + 4, pcie_index_offset);
 808         readl(pcie_index_offset);
 809         writel((u32)(reg_data >> 32), pcie_data_offset);
 810         readl(pcie_data_offset);
 811         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 812 }
 813
 814 /**
 815  * amdgpu_device_get_rev_id - query device rev_id
 816  *
 817  * @adev: amdgpu_device pointer
 818  *
 819  * Return device rev_id
 820  */
 821 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
 822 {
 823         return adev->nbio.funcs->get_rev_id(adev);
 824 }
 825
 826 /**
 827  * amdgpu_invalid_rreg - dummy reg read function
 828  *
 829  * @adev: amdgpu_device pointer
 830  * @reg: offset of register
 831  *
 832  * Dummy register read function.  Used for register blocks
 833  * that certain asics don't have (all asics).
 834  * Returns the value in the register.
 835  */
 836 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
 837 {
 838         DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
 839         BUG();
 840         return 0;
 841 }
 842
 843 /**
 844  * amdgpu_invalid_wreg - dummy reg write function
 845  *
 846  * @adev: amdgpu_device pointer
 847  * @reg: offset of register
 848  * @v: value to write to the register
 849  *
 850  * Dummy register read function.  Used for register blocks
 851  * that certain asics don't have (all asics).
 852  */
 853 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 854 {
 855         DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
 856                   reg, v);
 857         BUG();
 858 }
 859
 860 /**
 861  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
 862  *
 863  * @adev: amdgpu_device pointer
 864  * @reg: offset of register
 865  *
 866  * Dummy register read function.  Used for register blocks
 867  * that certain asics don't have (all asics).
 868  * Returns the value in the register.
 869  */
 870 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
 871 {
 872         DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
 873         BUG();
 874         return 0;
 875 }
 876
 877 /**
 878  * amdgpu_invalid_wreg64 - dummy reg write function
 879  *
 880  * @adev: amdgpu_device pointer
 881  * @reg: offset of register
 882  * @v: value to write to the register
 883  *
 884  * Dummy register read function.  Used for register blocks
 885  * that certain asics don't have (all asics).
 886  */
 887 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
 888 {
 889         DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
 890                   reg, v);
 891         BUG();
 892 }
 893
 894 /**
 895  * amdgpu_block_invalid_rreg - dummy reg read function
 896  *
 897  * @adev: amdgpu_device pointer
 898  * @block: offset of instance
 899  * @reg: offset of register
 900  *
 901  * Dummy register read function.  Used for register blocks
 902  * that certain asics don't have (all asics).
 903  * Returns the value in the register.
 904  */
 905 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
 906                                           uint32_t block, uint32_t reg)
 907 {
 908         DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
 909                   reg, block);
 910         BUG();
 911         return 0;
 912 }
 913
 914 /**
 915  * amdgpu_block_invalid_wreg - dummy reg write function
 916  *
 917  * @adev: amdgpu_device pointer
 918  * @block: offset of instance
 919  * @reg: offset of register
 920  * @v: value to write to the register
 921  *
 922  * Dummy register read function.  Used for register blocks
 923  * that certain asics don't have (all asics).
 924  */
 925 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 926                                       uint32_t block,
 927                                       uint32_t reg, uint32_t v)
 928 {
 929         DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
 930                   reg, block, v);
 931         BUG();
 932 }
 933
 934 /**
 935  * amdgpu_device_asic_init - Wrapper for atom asic_init
 936  *
 937  * @adev: amdgpu_device pointer
 938  *
 939  * Does any asic specific work and then calls atom asic init.
 940  */
 941 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 942 {
 943         amdgpu_asic_pre_asic_init(adev);
 944
 945         if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
 946                 return amdgpu_atomfirmware_asic_init(adev, true);
 947         else
 948                 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
 949 }
 950
 951 /**
 952  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
 953  *
 954  * @adev: amdgpu_device pointer
 955  *
 956  * Allocates a scratch page of VRAM for use by various things in the
 957  * driver.
 958  */
 959 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
 960 {
 961         return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
 962                                        AMDGPU_GEM_DOMAIN_VRAM |
 963                                        AMDGPU_GEM_DOMAIN_GTT,
 964                                        &adev->mem_scratch.robj,
 965                                        &adev->mem_scratch.gpu_addr,
 966                                        (void **)&adev->mem_scratch.ptr);
 967 }
 968
 969 /**
 970  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
 971  *
 972  * @adev: amdgpu_device pointer
 973  *
 974  * Frees the VRAM scratch page.
 975  */
 976 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
 977 {
 978         amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
 979 }
 980
 981 /**
 982  * amdgpu_device_program_register_sequence - program an array of registers.
 983  *
 984  * @adev: amdgpu_device pointer
 985  * @registers: pointer to the register array
 986  * @array_size: size of the register array
 987  *
 988  * Programs an array or registers with and and or masks.
 989  * This is a helper for setting golden registers.
 990  */
 991 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 992                                              const u32 *registers,
 993                                              const u32 array_size)
 994 {
 995         u32 tmp, reg, and_mask, or_mask;
 996         int i;
 997
 998         if (array_size % 3)
 999                 return;
1000
1001         for (i = 0; i < array_size; i +=3) {
1002                 reg = registers[i + 0];
1003                 and_mask = registers[i + 1];
1004                 or_mask = registers[i + 2];
1005
1006                 if (and_mask == 0xffffffff) {
1007                         tmp = or_mask;
1008                 } else {
1009                         tmp = RREG32(reg);
1010                         tmp &= ~and_mask;
1011                         if (adev->family >= AMDGPU_FAMILY_AI)
1012                                 tmp |= (or_mask & and_mask);
1013                         else
1014                                 tmp |= or_mask;
1015                 }
1016                 WREG32(reg, tmp);
1017         }
1018 }
1019
1020 /**
1021  * amdgpu_device_pci_config_reset - reset the GPU
1022  *
1023  * @adev: amdgpu_device pointer
1024  *
1025  * Resets the GPU using the pci config reset sequence.
1026  * Only applicable to asics prior to vega10.
1027  */
1028 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1029 {
1030         pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1031 }
1032
1033 /**
1034  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1035  *
1036  * @adev: amdgpu_device pointer
1037  *
1038  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1039  */
1040 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1041 {
1042         return pci_reset_function(adev->pdev);
1043 }
1044
1045 /*
1046  * GPU doorbell aperture helpers function.
1047  */
1048 /**
1049  * amdgpu_device_doorbell_init - Init doorbell driver information.
1050  *
1051  * @adev: amdgpu_device pointer
1052  *
1053  * Init doorbell driver information (CIK)
1054  * Returns 0 on success, error on failure.
1055  */
1056 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
1057 {
1058
1059         /* No doorbell on SI hardware generation */
1060         if (adev->asic_type < CHIP_BONAIRE) {
1061                 adev->doorbell.base = 0;
1062                 adev->doorbell.size = 0;
1063                 adev->doorbell.num_doorbells = 0;
1064                 adev->doorbell.ptr = NULL;
1065                 return 0;
1066         }
1067
1068         if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1069                 return -EINVAL;
1070
1071         amdgpu_asic_init_doorbell_index(adev);
1072
1073         /* doorbell bar mapping */
1074         adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1075         adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1076
1077         if (adev->enable_mes) {
1078                 adev->doorbell.num_doorbells =
1079                         adev->doorbell.size / sizeof(u32);
1080         } else {
1081                 adev->doorbell.num_doorbells =
1082                         min_t(u32, adev->doorbell.size / sizeof(u32),
1083                               adev->doorbell_index.max_assignment+1);
1084                 if (adev->doorbell.num_doorbells == 0)
1085                         return -EINVAL;
1086
1087                 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1088                  * paging queue doorbell use the second page. The
1089                  * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1090                  * doorbells are in the first page. So with paging queue enabled,
1091                  * the max num_doorbells should + 1 page (0x400 in dword)
1092                  */
1093                 if (adev->asic_type >= CHIP_VEGA10)
1094                         adev->doorbell.num_doorbells += 0x400;
1095         }
1096
1097         adev->doorbell.ptr = ioremap(adev->doorbell.base,
1098                                      adev->doorbell.num_doorbells *
1099                                      sizeof(u32));
1100         if (adev->doorbell.ptr == NULL)
1101                 return -ENOMEM;
1102
1103         return 0;
1104 }
1105
1106 /**
1107  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
1108  *
1109  * @adev: amdgpu_device pointer
1110  *
1111  * Tear down doorbell driver information (CIK)
1112  */
1113 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1114 {
1115         iounmap(adev->doorbell.ptr);
1116         adev->doorbell.ptr = NULL;
1117 }
1118
1119
1120
1121 /*
1122  * amdgpu_device_wb_*()
1123  * Writeback is the method by which the GPU updates special pages in memory
1124  * with the status of certain GPU events (fences, ring pointers,etc.).
1125  */
1126
1127 /**
1128  * amdgpu_device_wb_fini - Disable Writeback and free memory
1129  *
1130  * @adev: amdgpu_device pointer
1131  *
1132  * Disables Writeback and frees the Writeback memory (all asics).
1133  * Used at driver shutdown.
1134  */
1135 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1136 {
1137         if (adev->wb.wb_obj) {
1138                 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1139                                       &adev->wb.gpu_addr,
1140                                       (void **)&adev->wb.wb);
1141                 adev->wb.wb_obj = NULL;
1142         }
1143 }
1144
1145 /**
1146  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1147  *
1148  * @adev: amdgpu_device pointer
1149  *
1150  * Initializes writeback and allocates writeback memory (all asics).
1151  * Used at driver startup.
1152  * Returns 0 on success or an -error on failure.
1153  */
1154 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1155 {
1156         int r;
1157
1158         if (adev->wb.wb_obj == NULL) {
1159                 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1160                 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1161                                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1162                                             &adev->wb.wb_obj, &adev->wb.gpu_addr,
1163                                             (void **)&adev->wb.wb);
1164                 if (r) {
1165                         dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1166                         return r;
1167                 }
1168
1169                 adev->wb.num_wb = AMDGPU_MAX_WB;
1170                 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1171
1172                 /* clear wb memory */
1173                 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1174         }
1175
1176         return 0;
1177 }
1178
1179 /**
1180  * amdgpu_device_wb_get - Allocate a wb entry
1181  *
1182  * @adev: amdgpu_device pointer
1183  * @wb: wb index
1184  *
1185  * Allocate a wb slot for use by the driver (all asics).
1186  * Returns 0 on success or -EINVAL on failure.
1187  */
1188 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1189 {
1190         unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1191
1192         if (offset < adev->wb.num_wb) {
1193                 __set_bit(offset, adev->wb.used);
1194                 *wb = offset << 3; /* convert to dw offset */
1195                 return 0;
1196         } else {
1197                 return -EINVAL;
1198         }
1199 }
1200
1201 /**
1202  * amdgpu_device_wb_free - Free a wb entry
1203  *
1204  * @adev: amdgpu_device pointer
1205  * @wb: wb index
1206  *
1207  * Free a wb slot allocated for use by the driver (all asics)
1208  */
1209 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1210 {
1211         wb >>= 3;
1212         if (wb < adev->wb.num_wb)
1213                 __clear_bit(wb, adev->wb.used);
1214 }
1215
1216 /**
1217  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1218  *
1219  * @adev: amdgpu_device pointer
1220  *
1221  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1222  * to fail, but if any of the BARs is not accessible after the size we abort
1223  * driver loading by returning -ENODEV.
1224  */
1225 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1226 {
1227         int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1228         struct pci_bus *root;
1229         struct resource *res;
1230         unsigned i;
1231         u16 cmd;
1232         int r;
1233
1234         /* Bypass for VF */
1235         if (amdgpu_sriov_vf(adev))
1236                 return 0;
1237
1238         /* skip if the bios has already enabled large BAR */
1239         if (adev->gmc.real_vram_size &&
1240             (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1241                 return 0;
1242
1243         /* Check if the root BUS has 64bit memory resources */
1244         root = adev->pdev->bus;
1245         while (root->parent)
1246                 root = root->parent;
1247
1248         pci_bus_for_each_resource(root, res, i) {
1249                 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1250                     res->start > 0x100000000ull)
1251                         break;
1252         }
1253
1254         /* Trying to resize is pointless without a root hub window above 4GB */
1255         if (!res)
1256                 return 0;
1257
1258         /* Limit the BAR size to what is available */
1259         rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1260                         rbar_size);
1261
1262         /* Disable memory decoding while we change the BAR addresses and size */
1263         pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1264         pci_write_config_word(adev->pdev, PCI_COMMAND,
1265                               cmd & ~PCI_COMMAND_MEMORY);
1266
1267         /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1268         amdgpu_device_doorbell_fini(adev);
1269         if (adev->asic_type >= CHIP_BONAIRE)
1270                 pci_release_resource(adev->pdev, 2);
1271
1272         pci_release_resource(adev->pdev, 0);
1273
1274         r = pci_resize_resource(adev->pdev, 0, rbar_size);
1275         if (r == -ENOSPC)
1276                 DRM_INFO("Not enough PCI address space for a large BAR.");
1277         else if (r && r != -ENOTSUPP)
1278                 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1279
1280         pci_assign_unassigned_bus_resources(adev->pdev->bus);
1281
1282         /* When the doorbell or fb BAR isn't available we have no chance of
1283          * using the device.
1284          */
1285         r = amdgpu_device_doorbell_init(adev);
1286         if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1287                 return -ENODEV;
1288
1289         pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1290
1291         return 0;
1292 }
1293
1294 /*
1295  * GPU helpers function.
1296  */
1297 /**
1298  * amdgpu_device_need_post - check if the hw need post or not
1299  *
1300  * @adev: amdgpu_device pointer
1301  *
1302  * Check if the asic has been initialized (all asics) at driver startup
1303  * or post is needed if  hw reset is performed.
1304  * Returns true if need or false if not.
1305  */
1306 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1307 {
1308         uint32_t reg;
1309
1310         if (amdgpu_sriov_vf(adev))
1311                 return false;
1312
1313         if (amdgpu_passthrough(adev)) {
1314                 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1315                  * some old smc fw still need driver do vPost otherwise gpu hang, while
1316                  * those smc fw version above 22.15 doesn't have this flaw, so we force
1317                  * vpost executed for smc version below 22.15
1318                  */
1319                 if (adev->asic_type == CHIP_FIJI) {
1320                         int err;
1321                         uint32_t fw_ver;
1322                         err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1323                         /* force vPost if error occured */
1324                         if (err)
1325                                 return true;
1326
1327                         fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1328                         if (fw_ver < 0x00160e00)
1329                                 return true;
1330                 }
1331         }
1332
1333         /* Don't post if we need to reset whole hive on init */
1334         if (adev->gmc.xgmi.pending_reset)
1335                 return false;
1336
1337         if (adev->has_hw_reset) {
1338                 adev->has_hw_reset = false;
1339                 return true;
1340         }
1341
1342         /* bios scratch used on CIK+ */
1343         if (adev->asic_type >= CHIP_BONAIRE)
1344                 return amdgpu_atombios_scratch_need_asic_init(adev);
1345
1346         /* check MEM_SIZE for older asics */
1347         reg = amdgpu_asic_get_config_memsize(adev);
1348
1349         if ((reg != 0) && (reg != 0xffffffff))
1350                 return false;
1351
1352         return true;
1353 }
1354
1355 /**
1356  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1357  *
1358  * @adev: amdgpu_device pointer
1359  *
1360  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1361  * be set for this device.
1362  *
1363  * Returns true if it should be used or false if not.
1364  */
1365 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1366 {
1367         switch (amdgpu_aspm) {
1368         case -1:
1369                 break;
1370         case 0:
1371                 return false;
1372         case 1:
1373                 return true;
1374         default:
1375                 return false;
1376         }
1377         return pcie_aspm_enabled(adev->pdev);
1378 }
1379
1380 bool amdgpu_device_aspm_support_quirk(void)
1381 {
1382 #if IS_ENABLED(CONFIG_X86)
1383         struct cpuinfo_x86 *c = &cpu_data(0);
1384
1385         return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1386 #else
1387         return true;
1388 #endif
1389 }
1390
1391 /* if we get transitioned to only one device, take VGA back */
1392 /**
1393  * amdgpu_device_vga_set_decode - enable/disable vga decode
1394  *
1395  * @pdev: PCI device pointer
1396  * @state: enable/disable vga decode
1397  *
1398  * Enable/disable vga decode (all asics).
1399  * Returns VGA resource flags.
1400  */
1401 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1402                 bool state)
1403 {
1404         struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1405         amdgpu_asic_set_vga_state(adev, state);
1406         if (state)
1407                 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1408                        VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1409         else
1410                 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1411 }
1412
1413 /**
1414  * amdgpu_device_check_block_size - validate the vm block size
1415  *
1416  * @adev: amdgpu_device pointer
1417  *
1418  * Validates the vm block size specified via module parameter.
1419  * The vm block size defines number of bits in page table versus page directory,
1420  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1421  * page table and the remaining bits are in the page directory.
1422  */
1423 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1424 {
1425         /* defines number of bits in page table versus page directory,
1426          * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1427          * page table and the remaining bits are in the page directory */
1428         if (amdgpu_vm_block_size == -1)
1429                 return;
1430
1431         if (amdgpu_vm_block_size < 9) {
1432                 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1433                          amdgpu_vm_block_size);
1434                 amdgpu_vm_block_size = -1;
1435         }
1436 }
1437
1438 /**
1439  * amdgpu_device_check_vm_size - validate the vm size
1440  *
1441  * @adev: amdgpu_device pointer
1442  *
1443  * Validates the vm size in GB specified via module parameter.
1444  * The VM size is the size of the GPU virtual memory space in GB.
1445  */
1446 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1447 {
1448         /* no need to check the default value */
1449         if (amdgpu_vm_size == -1)
1450                 return;
1451
1452         if (amdgpu_vm_size < 1) {
1453                 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1454                          amdgpu_vm_size);
1455                 amdgpu_vm_size = -1;
1456         }
1457 }
1458
1459 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1460 {
1461         struct sysinfo si;
1462         bool is_os_64 = (sizeof(void *) == 8);
1463         uint64_t total_memory;
1464         uint64_t dram_size_seven_GB = 0x1B8000000;
1465         uint64_t dram_size_three_GB = 0xB8000000;
1466
1467         if (amdgpu_smu_memory_pool_size == 0)
1468                 return;
1469
1470         if (!is_os_64) {
1471                 DRM_WARN("Not 64-bit OS, feature not supported\n");
1472                 goto def_value;
1473         }
1474         si_meminfo(&si);
1475         total_memory = (uint64_t)si.totalram * si.mem_unit;
1476
1477         if ((amdgpu_smu_memory_pool_size == 1) ||
1478                 (amdgpu_smu_memory_pool_size == 2)) {
1479                 if (total_memory < dram_size_three_GB)
1480                         goto def_value1;
1481         } else if ((amdgpu_smu_memory_pool_size == 4) ||
1482                 (amdgpu_smu_memory_pool_size == 8)) {
1483                 if (total_memory < dram_size_seven_GB)
1484                         goto def_value1;
1485         } else {
1486                 DRM_WARN("Smu memory pool size not supported\n");
1487                 goto def_value;
1488         }
1489         adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1490
1491         return;
1492
1493 def_value1:
1494         DRM_WARN("No enough system memory\n");
1495 def_value:
1496         adev->pm.smu_prv_buffer_size = 0;
1497 }
1498
1499 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1500 {
1501         if (!(adev->flags & AMD_IS_APU) ||
1502             adev->asic_type < CHIP_RAVEN)
1503                 return 0;
1504
1505         switch (adev->asic_type) {
1506         case CHIP_RAVEN:
1507                 if (adev->pdev->device == 0x15dd)
1508                         adev->apu_flags |= AMD_APU_IS_RAVEN;
1509                 if (adev->pdev->device == 0x15d8)
1510                         adev->apu_flags |= AMD_APU_IS_PICASSO;
1511                 break;
1512         case CHIP_RENOIR:
1513                 if ((adev->pdev->device == 0x1636) ||
1514                     (adev->pdev->device == 0x164c))
1515                         adev->apu_flags |= AMD_APU_IS_RENOIR;
1516                 else
1517                         adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1518                 break;
1519         case CHIP_VANGOGH:
1520                 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1521                 break;
1522         case CHIP_YELLOW_CARP:
1523                 break;
1524         case CHIP_CYAN_SKILLFISH:
1525                 if ((adev->pdev->device == 0x13FE) ||
1526                     (adev->pdev->device == 0x143F))
1527                         adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1528                 break;
1529         default:
1530                 break;
1531         }
1532
1533         return 0;
1534 }
1535
1536 /**
1537  * amdgpu_device_check_arguments - validate module params
1538  *
1539  * @adev: amdgpu_device pointer
1540  *
1541  * Validates certain module parameters and updates
1542  * the associated values used by the driver (all asics).
1543  */
1544 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1545 {
1546         if (amdgpu_sched_jobs < 4) {
1547                 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1548                          amdgpu_sched_jobs);
1549                 amdgpu_sched_jobs = 4;
1550         } else if (!is_power_of_2(amdgpu_sched_jobs)){
1551                 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1552                          amdgpu_sched_jobs);
1553                 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1554         }
1555
1556         if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1557                 /* gart size must be greater or equal to 32M */
1558                 dev_warn(adev->dev, "gart size (%d) too small\n",
1559                          amdgpu_gart_size);
1560                 amdgpu_gart_size = -1;
1561         }
1562
1563         if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1564                 /* gtt size must be greater or equal to 32M */
1565                 dev_warn(adev->dev, "gtt size (%d) too small\n",
1566                                  amdgpu_gtt_size);
1567                 amdgpu_gtt_size = -1;
1568         }
1569
1570         /* valid range is between 4 and 9 inclusive */
1571         if (amdgpu_vm_fragment_size != -1 &&
1572             (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1573                 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1574                 amdgpu_vm_fragment_size = -1;
1575         }
1576
1577         if (amdgpu_sched_hw_submission < 2) {
1578                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1579                          amdgpu_sched_hw_submission);
1580                 amdgpu_sched_hw_submission = 2;
1581         } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1582                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1583                          amdgpu_sched_hw_submission);
1584                 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1585         }
1586
1587         if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1588                 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1589                 amdgpu_reset_method = -1;
1590         }
1591
1592         amdgpu_device_check_smu_prv_buffer_size(adev);
1593
1594         amdgpu_device_check_vm_size(adev);
1595
1596         amdgpu_device_check_block_size(adev);
1597
1598         adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1599
1600         return 0;
1601 }
1602
1603 /**
1604  * amdgpu_switcheroo_set_state - set switcheroo state
1605  *
1606  * @pdev: pci dev pointer
1607  * @state: vga_switcheroo state
1608  *
1609  * Callback for the switcheroo driver.  Suspends or resumes
1610  * the asics before or after it is powered up using ACPI methods.
1611  */
1612 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1613                                         enum vga_switcheroo_state state)
1614 {
1615         struct drm_device *dev = pci_get_drvdata(pdev);
1616         int r;
1617
1618         if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1619                 return;
1620
1621         if (state == VGA_SWITCHEROO_ON) {
1622                 pr_info("switched on\n");
1623                 /* don't suspend or resume card normally */
1624                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1625
1626                 pci_set_power_state(pdev, PCI_D0);
1627                 amdgpu_device_load_pci_state(pdev);
1628                 r = pci_enable_device(pdev);
1629                 if (r)
1630                         DRM_WARN("pci_enable_device failed (%d)\n", r);
1631                 amdgpu_device_resume(dev, true);
1632
1633                 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1634         } else {
1635                 pr_info("switched off\n");
1636                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1637                 amdgpu_device_suspend(dev, true);
1638                 amdgpu_device_cache_pci_state(pdev);
1639                 /* Shut down the device */
1640                 pci_disable_device(pdev);
1641                 pci_set_power_state(pdev, PCI_D3cold);
1642                 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1643         }
1644 }
1645
1646 /**
1647  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1648  *
1649  * @pdev: pci dev pointer
1650  *
1651  * Callback for the switcheroo driver.  Check of the switcheroo
1652  * state can be changed.
1653  * Returns true if the state can be changed, false if not.
1654  */
1655 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1656 {
1657         struct drm_device *dev = pci_get_drvdata(pdev);
1658
1659         /*
1660         * FIXME: open_count is protected by drm_global_mutex but that would lead to
1661         * locking inversion with the driver load path. And the access here is
1662         * completely racy anyway. So don't bother with locking for now.
1663         */
1664         return atomic_read(&dev->open_count) == 0;
1665 }
1666
1667 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1668         .set_gpu_state = amdgpu_switcheroo_set_state,
1669         .reprobe = NULL,
1670         .can_switch = amdgpu_switcheroo_can_switch,
1671 };
1672
1673 /**
1674  * amdgpu_device_ip_set_clockgating_state - set the CG state
1675  *
1676  * @dev: amdgpu_device pointer
1677  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1678  * @state: clockgating state (gate or ungate)
1679  *
1680  * Sets the requested clockgating state for all instances of
1681  * the hardware IP specified.
1682  * Returns the error code from the last instance.
1683  */
1684 int amdgpu_device_ip_set_clockgating_state(void *dev,
1685                                            enum amd_ip_block_type block_type,
1686                                            enum amd_clockgating_state state)
1687 {
1688         struct amdgpu_device *adev = dev;
1689         int i, r = 0;
1690
1691         for (i = 0; i < adev->num_ip_blocks; i++) {
1692                 if (!adev->ip_blocks[i].status.valid)
1693                         continue;
1694                 if (adev->ip_blocks[i].version->type != block_type)
1695                         continue;
1696                 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1697                         continue;
1698                 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1699                         (void *)adev, state);
1700                 if (r)
1701                         DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1702                                   adev->ip_blocks[i].version->funcs->name, r);
1703         }
1704         return r;
1705 }
1706
1707 /**
1708  * amdgpu_device_ip_set_powergating_state - set the PG state
1709  *
1710  * @dev: amdgpu_device pointer
1711  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1712  * @state: powergating state (gate or ungate)
1713  *
1714  * Sets the requested powergating state for all instances of
1715  * the hardware IP specified.
1716  * Returns the error code from the last instance.
1717  */
1718 int amdgpu_device_ip_set_powergating_state(void *dev,
1719                                            enum amd_ip_block_type block_type,
1720                                            enum amd_powergating_state state)
1721 {
1722         struct amdgpu_device *adev = dev;
1723         int i, r = 0;
1724
1725         for (i = 0; i < adev->num_ip_blocks; i++) {
1726                 if (!adev->ip_blocks[i].status.valid)
1727                         continue;
1728                 if (adev->ip_blocks[i].version->type != block_type)
1729                         continue;
1730                 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1731                         continue;
1732                 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1733                         (void *)adev, state);
1734                 if (r)
1735                         DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1736                                   adev->ip_blocks[i].version->funcs->name, r);
1737         }
1738         return r;
1739 }
1740
1741 /**
1742  * amdgpu_device_ip_get_clockgating_state - get the CG state
1743  *
1744  * @adev: amdgpu_device pointer
1745  * @flags: clockgating feature flags
1746  *
1747  * Walks the list of IPs on the device and updates the clockgating
1748  * flags for each IP.
1749  * Updates @flags with the feature flags for each hardware IP where
1750  * clockgating is enabled.
1751  */
1752 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1753                                             u64 *flags)
1754 {
1755         int i;
1756
1757         for (i = 0; i < adev->num_ip_blocks; i++) {
1758                 if (!adev->ip_blocks[i].status.valid)
1759                         continue;
1760                 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1761                         adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1762         }
1763 }
1764
1765 /**
1766  * amdgpu_device_ip_wait_for_idle - wait for idle
1767  *
1768  * @adev: amdgpu_device pointer
1769  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1770  *
1771  * Waits for the request hardware IP to be idle.
1772  * Returns 0 for success or a negative error code on failure.
1773  */
1774 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1775                                    enum amd_ip_block_type block_type)
1776 {
1777         int i, r;
1778
1779         for (i = 0; i < adev->num_ip_blocks; i++) {
1780                 if (!adev->ip_blocks[i].status.valid)
1781                         continue;
1782                 if (adev->ip_blocks[i].version->type == block_type) {
1783                         r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1784                         if (r)
1785                                 return r;
1786                         break;
1787                 }
1788         }
1789         return 0;
1790
1791 }
1792
1793 /**
1794  * amdgpu_device_ip_is_idle - is the hardware IP idle
1795  *
1796  * @adev: amdgpu_device pointer
1797  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1798  *
1799  * Check if the hardware IP is idle or not.
1800  * Returns true if it the IP is idle, false if not.
1801  */
1802 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1803                               enum amd_ip_block_type block_type)
1804 {
1805         int i;
1806
1807         for (i = 0; i < adev->num_ip_blocks; i++) {
1808                 if (!adev->ip_blocks[i].status.valid)
1809                         continue;
1810                 if (adev->ip_blocks[i].version->type == block_type)
1811                         return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1812         }
1813         return true;
1814
1815 }
1816
1817 /**
1818  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1819  *
1820  * @adev: amdgpu_device pointer
1821  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1822  *
1823  * Returns a pointer to the hardware IP block structure
1824  * if it exists for the asic, otherwise NULL.
1825  */
1826 struct amdgpu_ip_block *
1827 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1828                               enum amd_ip_block_type type)
1829 {
1830         int i;
1831
1832         for (i = 0; i < adev->num_ip_blocks; i++)
1833                 if (adev->ip_blocks[i].version->type == type)
1834                         return &adev->ip_blocks[i];
1835
1836         return NULL;
1837 }
1838
1839 /**
1840  * amdgpu_device_ip_block_version_cmp
1841  *
1842  * @adev: amdgpu_device pointer
1843  * @type: enum amd_ip_block_type
1844  * @major: major version
1845  * @minor: minor version
1846  *
1847  * return 0 if equal or greater
1848  * return 1 if smaller or the ip_block doesn't exist
1849  */
1850 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1851                                        enum amd_ip_block_type type,
1852                                        u32 major, u32 minor)
1853 {
1854         struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1855
1856         if (ip_block && ((ip_block->version->major > major) ||
1857                         ((ip_block->version->major == major) &&
1858                         (ip_block->version->minor >= minor))))
1859                 return 0;
1860
1861         return 1;
1862 }
1863
1864 /**
1865  * amdgpu_device_ip_block_add
1866  *
1867  * @adev: amdgpu_device pointer
1868  * @ip_block_version: pointer to the IP to add
1869  *
1870  * Adds the IP block driver information to the collection of IPs
1871  * on the asic.
1872  */
1873 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1874                                const struct amdgpu_ip_block_version *ip_block_version)
1875 {
1876         if (!ip_block_version)
1877                 return -EINVAL;
1878
1879         switch (ip_block_version->type) {
1880         case AMD_IP_BLOCK_TYPE_VCN:
1881                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1882                         return 0;
1883                 break;
1884         case AMD_IP_BLOCK_TYPE_JPEG:
1885                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1886                         return 0;
1887                 break;
1888         default:
1889                 break;
1890         }
1891
1892         DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1893                   ip_block_version->funcs->name);
1894
1895         adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1896
1897         return 0;
1898 }
1899
1900 /**
1901  * amdgpu_device_enable_virtual_display - enable virtual display feature
1902  *
1903  * @adev: amdgpu_device pointer
1904  *
1905  * Enabled the virtual display feature if the user has enabled it via
1906  * the module parameter virtual_display.  This feature provides a virtual
1907  * display hardware on headless boards or in virtualized environments.
1908  * This function parses and validates the configuration string specified by
1909  * the user and configues the virtual display configuration (number of
1910  * virtual connectors, crtcs, etc.) specified.
1911  */
1912 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1913 {
1914         adev->enable_virtual_display = false;
1915
1916         if (amdgpu_virtual_display) {
1917                 const char *pci_address_name = pci_name(adev->pdev);
1918                 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1919
1920                 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1921                 pciaddstr_tmp = pciaddstr;
1922                 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1923                         pciaddname = strsep(&pciaddname_tmp, ",");
1924                         if (!strcmp("all", pciaddname)
1925                             || !strcmp(pci_address_name, pciaddname)) {
1926                                 long num_crtc;
1927                                 int res = -1;
1928
1929                                 adev->enable_virtual_display = true;
1930
1931                                 if (pciaddname_tmp)
1932                                         res = kstrtol(pciaddname_tmp, 10,
1933                                                       &num_crtc);
1934
1935                                 if (!res) {
1936                                         if (num_crtc < 1)
1937                                                 num_crtc = 1;
1938                                         if (num_crtc > 6)
1939                                                 num_crtc = 6;
1940                                         adev->mode_info.num_crtc = num_crtc;
1941                                 } else {
1942                                         adev->mode_info.num_crtc = 1;
1943                                 }
1944                                 break;
1945                         }
1946                 }
1947
1948                 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1949                          amdgpu_virtual_display, pci_address_name,
1950                          adev->enable_virtual_display, adev->mode_info.num_crtc);
1951
1952                 kfree(pciaddstr);
1953         }
1954 }
1955
1956 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1957 {
1958         if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1959                 adev->mode_info.num_crtc = 1;
1960                 adev->enable_virtual_display = true;
1961                 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1962                          adev->enable_virtual_display, adev->mode_info.num_crtc);
1963         }
1964 }
1965
1966 /**
1967  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1968  *
1969  * @adev: amdgpu_device pointer
1970  *
1971  * Parses the asic configuration parameters specified in the gpu info
1972  * firmware and makes them availale to the driver for use in configuring
1973  * the asic.
1974  * Returns 0 on success, -EINVAL on failure.
1975  */
1976 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1977 {
1978         const char *chip_name;
1979         char fw_name[40];
1980         int err;
1981         const struct gpu_info_firmware_header_v1_0 *hdr;
1982
1983         adev->firmware.gpu_info_fw = NULL;
1984
1985         if (adev->mman.discovery_bin) {
1986                 /*
1987                  * FIXME: The bounding box is still needed by Navi12, so
1988                  * temporarily read it from gpu_info firmware. Should be dropped
1989                  * when DAL no longer needs it.
1990                  */
1991                 if (adev->asic_type != CHIP_NAVI12)
1992                         return 0;
1993         }
1994
1995         switch (adev->asic_type) {
1996         default:
1997                 return 0;
1998         case CHIP_VEGA10:
1999                 chip_name = "vega10";
2000                 break;
2001         case CHIP_VEGA12:
2002                 chip_name = "vega12";
2003                 break;
2004         case CHIP_RAVEN:
2005                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2006                         chip_name = "raven2";
2007                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2008                         chip_name = "picasso";
2009                 else
2010                         chip_name = "raven";
2011                 break;
2012         case CHIP_ARCTURUS:
2013                 chip_name = "arcturus";
2014                 break;
2015         case CHIP_NAVI12:
2016                 chip_name = "navi12";
2017                 break;
2018         }
2019
2020         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2021         err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2022         if (err) {
2023                 dev_err(adev->dev,
2024                         "Failed to get gpu_info firmware \"%s\"\n",
2025                         fw_name);
2026                 goto out;
2027         }
2028
2029         hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2030         amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2031
2032         switch (hdr->version_major) {
2033         case 1:
2034         {
2035                 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2036                         (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2037                                                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2038
2039                 /*
2040                  * Should be droped when DAL no longer needs it.
2041                  */
2042                 if (adev->asic_type == CHIP_NAVI12)
2043                         goto parse_soc_bounding_box;
2044
2045                 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2046                 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2047                 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2048                 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2049                 adev->gfx.config.max_texture_channel_caches =
2050                         le32_to_cpu(gpu_info_fw->gc_num_tccs);
2051                 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2052                 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2053                 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2054                 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2055                 adev->gfx.config.double_offchip_lds_buf =
2056                         le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2057                 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2058                 adev->gfx.cu_info.max_waves_per_simd =
2059                         le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2060                 adev->gfx.cu_info.max_scratch_slots_per_cu =
2061                         le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2062                 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2063                 if (hdr->version_minor >= 1) {
2064                         const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2065                                 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2066                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2067                         adev->gfx.config.num_sc_per_sh =
2068                                 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2069                         adev->gfx.config.num_packer_per_sc =
2070                                 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2071                 }
2072
2073 parse_soc_bounding_box:
2074                 /*
2075                  * soc bounding box info is not integrated in disocovery table,
2076                  * we always need to parse it from gpu info firmware if needed.
2077                  */
2078                 if (hdr->version_minor == 2) {
2079                         const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2080                                 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2081                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2082                         adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2083                 }
2084                 break;
2085         }
2086         default:
2087                 dev_err(adev->dev,
2088                         "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2089                 err = -EINVAL;
2090                 goto out;
2091         }
2092 out:
2093         return err;
2094 }
2095
2096 /**
2097  * amdgpu_device_ip_early_init - run early init for hardware IPs
2098  *
2099  * @adev: amdgpu_device pointer
2100  *
2101  * Early initialization pass for hardware IPs.  The hardware IPs that make
2102  * up each asic are discovered each IP's early_init callback is run.  This
2103  * is the first stage in initializing the asic.
2104  * Returns 0 on success, negative error code on failure.
2105  */
2106 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2107 {
2108         struct drm_device *dev = adev_to_drm(adev);
2109         struct pci_dev *parent;
2110         int i, r;
2111         bool total;
2112
2113         amdgpu_device_enable_virtual_display(adev);
2114
2115         if (amdgpu_sriov_vf(adev)) {
2116                 r = amdgpu_virt_request_full_gpu(adev, true);
2117                 if (r)
2118                         return r;
2119         }
2120
2121         switch (adev->asic_type) {
2122 #ifdef CONFIG_DRM_AMDGPU_SI
2123         case CHIP_VERDE:
2124         case CHIP_TAHITI:
2125         case CHIP_PITCAIRN:
2126         case CHIP_OLAND:
2127         case CHIP_HAINAN:
2128                 adev->family = AMDGPU_FAMILY_SI;
2129                 r = si_set_ip_blocks(adev);
2130                 if (r)
2131                         return r;
2132                 break;
2133 #endif
2134 #ifdef CONFIG_DRM_AMDGPU_CIK
2135         case CHIP_BONAIRE:
2136         case CHIP_HAWAII:
2137         case CHIP_KAVERI:
2138         case CHIP_KABINI:
2139         case CHIP_MULLINS:
2140                 if (adev->flags & AMD_IS_APU)
2141                         adev->family = AMDGPU_FAMILY_KV;
2142                 else
2143                         adev->family = AMDGPU_FAMILY_CI;
2144
2145                 r = cik_set_ip_blocks(adev);
2146                 if (r)
2147                         return r;
2148                 break;
2149 #endif
2150         case CHIP_TOPAZ:
2151         case CHIP_TONGA:
2152         case CHIP_FIJI:
2153         case CHIP_POLARIS10:
2154         case CHIP_POLARIS11:
2155         case CHIP_POLARIS12:
2156         case CHIP_VEGAM:
2157         case CHIP_CARRIZO:
2158         case CHIP_STONEY:
2159                 if (adev->flags & AMD_IS_APU)
2160                         adev->family = AMDGPU_FAMILY_CZ;
2161                 else
2162                         adev->family = AMDGPU_FAMILY_VI;
2163
2164                 r = vi_set_ip_blocks(adev);
2165                 if (r)
2166                         return r;
2167                 break;
2168         default:
2169                 r = amdgpu_discovery_set_ip_blocks(adev);
2170                 if (r)
2171                         return r;
2172                 break;
2173         }
2174
2175         if (amdgpu_has_atpx() &&
2176             (amdgpu_is_atpx_hybrid() ||
2177              amdgpu_has_atpx_dgpu_power_cntl()) &&
2178             ((adev->flags & AMD_IS_APU) == 0) &&
2179             !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2180                 adev->flags |= AMD_IS_PX;
2181
2182         if (!(adev->flags & AMD_IS_APU)) {
2183                 parent = pci_upstream_bridge(adev->pdev);
2184                 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2185         }
2186
2187         amdgpu_amdkfd_device_probe(adev);
2188
2189         adev->pm.pp_feature = amdgpu_pp_feature_mask;
2190         if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2191                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2192         if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2193                 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2194
2195         total = true;
2196         for (i = 0; i < adev->num_ip_blocks; i++) {
2197                 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2198                         DRM_ERROR("disabled ip block: %d <%s>\n",
2199                                   i, adev->ip_blocks[i].version->funcs->name);
2200                         adev->ip_blocks[i].status.valid = false;
2201                 } else {
2202                         if (adev->ip_blocks[i].version->funcs->early_init) {
2203                                 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2204                                 if (r == -ENOENT) {
2205                                         adev->ip_blocks[i].status.valid = false;
2206                                 } else if (r) {
2207                                         DRM_ERROR("early_init of IP block <%s> failed %d\n",
2208                                                   adev->ip_blocks[i].version->funcs->name, r);
2209                                         total = false;
2210                                 } else {
2211                                         adev->ip_blocks[i].status.valid = true;
2212                                 }
2213                         } else {
2214                                 adev->ip_blocks[i].status.valid = true;
2215                         }
2216                 }
2217                 /* get the vbios after the asic_funcs are set up */
2218                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2219                         r = amdgpu_device_parse_gpu_info_fw(adev);
2220                         if (r)
2221                                 return r;
2222
2223                         /* Read BIOS */
2224                         if (!amdgpu_get_bios(adev))
2225                                 return -EINVAL;
2226
2227                         r = amdgpu_atombios_init(adev);
2228                         if (r) {
2229                                 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2230                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2231                                 return r;
2232                         }
2233
2234                         /*get pf2vf msg info at it's earliest time*/
2235                         if (amdgpu_sriov_vf(adev))
2236                                 amdgpu_virt_init_data_exchange(adev);
2237
2238                 }
2239         }
2240         if (!total)
2241                 return -ENODEV;
2242
2243         adev->cg_flags &= amdgpu_cg_mask;
2244         adev->pg_flags &= amdgpu_pg_mask;
2245
2246         return 0;
2247 }
2248
2249 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2250 {
2251         int i, r;
2252
2253         for (i = 0; i < adev->num_ip_blocks; i++) {
2254                 if (!adev->ip_blocks[i].status.sw)
2255                         continue;
2256                 if (adev->ip_blocks[i].status.hw)
2257                         continue;
2258                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2259                     (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2260                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2261                         r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2262                         if (r) {
2263                                 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2264                                           adev->ip_blocks[i].version->funcs->name, r);
2265                                 return r;
2266                         }
2267                         adev->ip_blocks[i].status.hw = true;
2268                 }
2269         }
2270
2271         return 0;
2272 }
2273
2274 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2275 {
2276         int i, r;
2277
2278         for (i = 0; i < adev->num_ip_blocks; i++) {
2279                 if (!adev->ip_blocks[i].status.sw)
2280                         continue;
2281                 if (adev->ip_blocks[i].status.hw)
2282                         continue;
2283                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2284                 if (r) {
2285                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2286                                   adev->ip_blocks[i].version->funcs->name, r);
2287                         return r;
2288                 }
2289                 adev->ip_blocks[i].status.hw = true;
2290         }
2291
2292         return 0;
2293 }
2294
2295 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2296 {
2297         int r = 0;
2298         int i;
2299         uint32_t smu_version;
2300
2301         if (adev->asic_type >= CHIP_VEGA10) {
2302                 for (i = 0; i < adev->num_ip_blocks; i++) {
2303                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2304                                 continue;
2305
2306                         if (!adev->ip_blocks[i].status.sw)
2307                                 continue;
2308
2309                         /* no need to do the fw loading again if already done*/
2310                         if (adev->ip_blocks[i].status.hw == true)
2311                                 break;
2312
2313                         if (amdgpu_in_reset(adev) || adev->in_suspend) {
2314                                 r = adev->ip_blocks[i].version->funcs->resume(adev);
2315                                 if (r) {
2316                                         DRM_ERROR("resume of IP block <%s> failed %d\n",
2317                                                           adev->ip_blocks[i].version->funcs->name, r);
2318                                         return r;
2319                                 }
2320                         } else {
2321                                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2322                                 if (r) {
2323                                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2324                                                           adev->ip_blocks[i].version->funcs->name, r);
2325                                         return r;
2326                                 }
2327                         }
2328
2329                         adev->ip_blocks[i].status.hw = true;
2330                         break;
2331                 }
2332         }
2333
2334         if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2335                 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2336
2337         return r;
2338 }
2339
2340 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2341 {
2342         long timeout;
2343         int r, i;
2344
2345         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2346                 struct amdgpu_ring *ring = adev->rings[i];
2347
2348                 /* No need to setup the GPU scheduler for rings that don't need it */
2349                 if (!ring || ring->no_scheduler)
2350                         continue;
2351
2352                 switch (ring->funcs->type) {
2353                 case AMDGPU_RING_TYPE_GFX:
2354                         timeout = adev->gfx_timeout;
2355                         break;
2356                 case AMDGPU_RING_TYPE_COMPUTE:
2357                         timeout = adev->compute_timeout;
2358                         break;
2359                 case AMDGPU_RING_TYPE_SDMA:
2360                         timeout = adev->sdma_timeout;
2361                         break;
2362                 default:
2363                         timeout = adev->video_timeout;
2364                         break;
2365                 }
2366
2367                 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2368                                    ring->num_hw_submission, amdgpu_job_hang_limit,
2369                                    timeout, adev->reset_domain->wq,
2370                                    ring->sched_score, ring->name,
2371                                    adev->dev);
2372                 if (r) {
2373                         DRM_ERROR("Failed to create scheduler on ring %s.\n",
2374                                   ring->name);
2375                         return r;
2376                 }
2377         }
2378
2379         return 0;
2380 }
2381
2382
2383 /**
2384  * amdgpu_device_ip_init - run init for hardware IPs
2385  *
2386  * @adev: amdgpu_device pointer
2387  *
2388  * Main initialization pass for hardware IPs.  The list of all the hardware
2389  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2390  * are run.  sw_init initializes the software state associated with each IP
2391  * and hw_init initializes the hardware associated with each IP.
2392  * Returns 0 on success, negative error code on failure.
2393  */
2394 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2395 {
2396         int i, r;
2397
2398         r = amdgpu_ras_init(adev);
2399         if (r)
2400                 return r;
2401
2402         for (i = 0; i < adev->num_ip_blocks; i++) {
2403                 if (!adev->ip_blocks[i].status.valid)
2404                         continue;
2405                 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2406                 if (r) {
2407                         DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2408                                   adev->ip_blocks[i].version->funcs->name, r);
2409                         goto init_failed;
2410                 }
2411                 adev->ip_blocks[i].status.sw = true;
2412
2413                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2414                         /* need to do common hw init early so everything is set up for gmc */
2415                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2416                         if (r) {
2417                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2418                                 goto init_failed;
2419                         }
2420                         adev->ip_blocks[i].status.hw = true;
2421                 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2422                         /* need to do gmc hw init early so we can allocate gpu mem */
2423                         /* Try to reserve bad pages early */
2424                         if (amdgpu_sriov_vf(adev))
2425                                 amdgpu_virt_exchange_data(adev);
2426
2427                         r = amdgpu_device_mem_scratch_init(adev);
2428                         if (r) {
2429                                 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2430                                 goto init_failed;
2431                         }
2432                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2433                         if (r) {
2434                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2435                                 goto init_failed;
2436                         }
2437                         r = amdgpu_device_wb_init(adev);
2438                         if (r) {
2439                                 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2440                                 goto init_failed;
2441                         }
2442                         adev->ip_blocks[i].status.hw = true;
2443
2444                         /* right after GMC hw init, we create CSA */
2445                         if (amdgpu_mcbp) {
2446                                 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2447                                                                AMDGPU_GEM_DOMAIN_VRAM |
2448                                                                AMDGPU_GEM_DOMAIN_GTT,
2449                                                                AMDGPU_CSA_SIZE);
2450                                 if (r) {
2451                                         DRM_ERROR("allocate CSA failed %d\n", r);
2452                                         goto init_failed;
2453                                 }
2454                         }
2455                 }
2456         }
2457
2458         if (amdgpu_sriov_vf(adev))
2459                 amdgpu_virt_init_data_exchange(adev);
2460
2461         r = amdgpu_ib_pool_init(adev);
2462         if (r) {
2463                 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2464                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2465                 goto init_failed;
2466         }
2467
2468         r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2469         if (r)
2470                 goto init_failed;
2471
2472         r = amdgpu_device_ip_hw_init_phase1(adev);
2473         if (r)
2474                 goto init_failed;
2475
2476         r = amdgpu_device_fw_loading(adev);
2477         if (r)
2478                 goto init_failed;
2479
2480         r = amdgpu_device_ip_hw_init_phase2(adev);
2481         if (r)
2482                 goto init_failed;
2483
2484         /*
2485          * retired pages will be loaded from eeprom and reserved here,
2486          * it should be called after amdgpu_device_ip_hw_init_phase2  since
2487          * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2488          * for I2C communication which only true at this point.
2489          *
2490          * amdgpu_ras_recovery_init may fail, but the upper only cares the
2491          * failure from bad gpu situation and stop amdgpu init process
2492          * accordingly. For other failed cases, it will still release all
2493          * the resource and print error message, rather than returning one
2494          * negative value to upper level.
2495          *
2496          * Note: theoretically, this should be called before all vram allocations
2497          * to protect retired page from abusing
2498          */
2499         r = amdgpu_ras_recovery_init(adev);
2500         if (r)
2501                 goto init_failed;
2502
2503         /**
2504          * In case of XGMI grab extra reference for reset domain for this device
2505          */
2506         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2507                 if (amdgpu_xgmi_add_device(adev) == 0) {
2508                         if (!amdgpu_sriov_vf(adev)) {
2509                                 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2510
2511                                 if (WARN_ON(!hive)) {
2512                                         r = -ENOENT;
2513                                         goto init_failed;
2514                                 }
2515
2516                                 if (!hive->reset_domain ||
2517                                     !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2518                                         r = -ENOENT;
2519                                         amdgpu_put_xgmi_hive(hive);
2520                                         goto init_failed;
2521                                 }
2522
2523                                 /* Drop the early temporary reset domain we created for device */
2524                                 amdgpu_reset_put_reset_domain(adev->reset_domain);
2525                                 adev->reset_domain = hive->reset_domain;
2526                                 amdgpu_put_xgmi_hive(hive);
2527                         }
2528                 }
2529         }
2530
2531         r = amdgpu_device_init_schedulers(adev);
2532         if (r)
2533                 goto init_failed;
2534
2535         /* Don't init kfd if whole hive need to be reset during init */
2536         if (!adev->gmc.xgmi.pending_reset)
2537                 amdgpu_amdkfd_device_init(adev);
2538
2539         amdgpu_fru_get_product_info(adev);
2540
2541 init_failed:
2542         if (amdgpu_sriov_vf(adev))
2543                 amdgpu_virt_release_full_gpu(adev, true);
2544
2545         return r;
2546 }
2547
2548 /**
2549  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2550  *
2551  * @adev: amdgpu_device pointer
2552  *
2553  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2554  * this function before a GPU reset.  If the value is retained after a
2555  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2556  */
2557 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2558 {
2559         memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2560 }
2561
2562 /**
2563  * amdgpu_device_check_vram_lost - check if vram is valid
2564  *
2565  * @adev: amdgpu_device pointer
2566  *
2567  * Checks the reset magic value written to the gart pointer in VRAM.
2568  * The driver calls this after a GPU reset to see if the contents of
2569  * VRAM is lost or now.
2570  * returns true if vram is lost, false if not.
2571  */
2572 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2573 {
2574         if (memcmp(adev->gart.ptr, adev->reset_magic,
2575                         AMDGPU_RESET_MAGIC_NUM))
2576                 return true;
2577
2578         if (!amdgpu_in_reset(adev))
2579                 return false;
2580
2581         /*
2582          * For all ASICs with baco/mode1 reset, the VRAM is
2583          * always assumed to be lost.
2584          */
2585         switch (amdgpu_asic_reset_method(adev)) {
2586         case AMD_RESET_METHOD_BACO:
2587         case AMD_RESET_METHOD_MODE1:
2588                 return true;
2589         default:
2590                 return false;
2591         }
2592 }
2593
2594 /**
2595  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2596  *
2597  * @adev: amdgpu_device pointer
2598  * @state: clockgating state (gate or ungate)
2599  *
2600  * The list of all the hardware IPs that make up the asic is walked and the
2601  * set_clockgating_state callbacks are run.
2602  * Late initialization pass enabling clockgating for hardware IPs.
2603  * Fini or suspend, pass disabling clockgating for hardware IPs.
2604  * Returns 0 on success, negative error code on failure.
2605  */
2606
2607 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2608                                enum amd_clockgating_state state)
2609 {
2610         int i, j, r;
2611
2612         if (amdgpu_emu_mode == 1)
2613                 return 0;
2614
2615         for (j = 0; j < adev->num_ip_blocks; j++) {
2616                 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2617                 if (!adev->ip_blocks[i].status.late_initialized)
2618                         continue;
2619                 /* skip CG for GFX, SDMA on S0ix */
2620                 if (adev->in_s0ix &&
2621                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2622                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2623                         continue;
2624                 /* skip CG for VCE/UVD, it's handled specially */
2625                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2626                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2627                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2628                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2629                     adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2630                         /* enable clockgating to save power */
2631                         r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2632                                                                                      state);
2633                         if (r) {
2634                                 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2635                                           adev->ip_blocks[i].version->funcs->name, r);
2636                                 return r;
2637                         }
2638                 }
2639         }
2640
2641         return 0;
2642 }
2643
2644 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2645                                enum amd_powergating_state state)
2646 {
2647         int i, j, r;
2648
2649         if (amdgpu_emu_mode == 1)
2650                 return 0;
2651
2652         for (j = 0; j < adev->num_ip_blocks; j++) {
2653                 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2654                 if (!adev->ip_blocks[i].status.late_initialized)
2655                         continue;
2656                 /* skip PG for GFX, SDMA on S0ix */
2657                 if (adev->in_s0ix &&
2658                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2659                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2660                         continue;
2661                 /* skip CG for VCE/UVD, it's handled specially */
2662                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2663                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2664                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2665                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2666                     adev->ip_blocks[i].version->funcs->set_powergating_state) {
2667                         /* enable powergating to save power */
2668                         r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2669                                                                                         state);
2670                         if (r) {
2671                                 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2672                                           adev->ip_blocks[i].version->funcs->name, r);
2673                                 return r;
2674                         }
2675                 }
2676         }
2677         return 0;
2678 }
2679
2680 static int amdgpu_device_enable_mgpu_fan_boost(void)
2681 {
2682         struct amdgpu_gpu_instance *gpu_ins;
2683         struct amdgpu_device *adev;
2684         int i, ret = 0;
2685
2686         mutex_lock(&mgpu_info.mutex);
2687
2688         /*
2689          * MGPU fan boost feature should be enabled
2690          * only when there are two or more dGPUs in
2691          * the system
2692          */
2693         if (mgpu_info.num_dgpu < 2)
2694                 goto out;
2695
2696         for (i = 0; i < mgpu_info.num_dgpu; i++) {
2697                 gpu_ins = &(mgpu_info.gpu_ins[i]);
2698                 adev = gpu_ins->adev;
2699                 if (!(adev->flags & AMD_IS_APU) &&
2700                     !gpu_ins->mgpu_fan_enabled) {
2701                         ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2702                         if (ret)
2703                                 break;
2704
2705                         gpu_ins->mgpu_fan_enabled = 1;
2706                 }
2707         }
2708
2709 out:
2710         mutex_unlock(&mgpu_info.mutex);
2711
2712         return ret;
2713 }
2714
2715 /**
2716  * amdgpu_device_ip_late_init - run late init for hardware IPs
2717  *
2718  * @adev: amdgpu_device pointer
2719  *
2720  * Late initialization pass for hardware IPs.  The list of all the hardware
2721  * IPs that make up the asic is walked and the late_init callbacks are run.
2722  * late_init covers any special initialization that an IP requires
2723  * after all of the have been initialized or something that needs to happen
2724  * late in the init process.
2725  * Returns 0 on success, negative error code on failure.
2726  */
2727 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2728 {
2729         struct amdgpu_gpu_instance *gpu_instance;
2730         int i = 0, r;
2731
2732         for (i = 0; i < adev->num_ip_blocks; i++) {
2733                 if (!adev->ip_blocks[i].status.hw)
2734                         continue;
2735                 if (adev->ip_blocks[i].version->funcs->late_init) {
2736                         r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2737                         if (r) {
2738                                 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2739                                           adev->ip_blocks[i].version->funcs->name, r);
2740                                 return r;
2741                         }
2742                 }
2743                 adev->ip_blocks[i].status.late_initialized = true;
2744         }
2745
2746         r = amdgpu_ras_late_init(adev);
2747         if (r) {
2748                 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2749                 return r;
2750         }
2751
2752         amdgpu_ras_set_error_query_ready(adev, true);
2753
2754         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2755         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2756
2757         amdgpu_device_fill_reset_magic(adev);
2758
2759         r = amdgpu_device_enable_mgpu_fan_boost();
2760         if (r)
2761                 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2762
2763         /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2764         if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2765                                adev->asic_type == CHIP_ALDEBARAN ))
2766                 amdgpu_dpm_handle_passthrough_sbr(adev, true);
2767
2768         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2769                 mutex_lock(&mgpu_info.mutex);
2770
2771                 /*
2772                  * Reset device p-state to low as this was booted with high.
2773                  *
2774                  * This should be performed only after all devices from the same
2775                  * hive get initialized.
2776                  *
2777                  * However, it's unknown how many device in the hive in advance.
2778                  * As this is counted one by one during devices initializations.
2779                  *
2780                  * So, we wait for all XGMI interlinked devices initialized.
2781                  * This may bring some delays as those devices may come from
2782                  * different hives. But that should be OK.
2783                  */
2784                 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2785                         for (i = 0; i < mgpu_info.num_gpu; i++) {
2786                                 gpu_instance = &(mgpu_info.gpu_ins[i]);
2787                                 if (gpu_instance->adev->flags & AMD_IS_APU)
2788                                         continue;
2789
2790                                 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2791                                                 AMDGPU_XGMI_PSTATE_MIN);
2792                                 if (r) {
2793                                         DRM_ERROR("pstate setting failed (%d).\n", r);
2794                                         break;
2795                                 }
2796                         }
2797                 }
2798
2799                 mutex_unlock(&mgpu_info.mutex);
2800         }
2801
2802         return 0;
2803 }
2804
2805 /**
2806  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2807  *
2808  * @adev: amdgpu_device pointer
2809  *
2810  * For ASICs need to disable SMC first
2811  */
2812 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2813 {
2814         int i, r;
2815
2816         if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2817                 return;
2818
2819         for (i = 0; i < adev->num_ip_blocks; i++) {
2820                 if (!adev->ip_blocks[i].status.hw)
2821                         continue;
2822                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2823                         r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2824                         /* XXX handle errors */
2825                         if (r) {
2826                                 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2827                                           adev->ip_blocks[i].version->funcs->name, r);
2828                         }
2829                         adev->ip_blocks[i].status.hw = false;
2830                         break;
2831                 }
2832         }
2833 }
2834
2835 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2836 {
2837         int i, r;
2838
2839         for (i = 0; i < adev->num_ip_blocks; i++) {
2840                 if (!adev->ip_blocks[i].version->funcs->early_fini)
2841                         continue;
2842
2843                 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2844                 if (r) {
2845                         DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2846                                   adev->ip_blocks[i].version->funcs->name, r);
2847                 }
2848         }
2849
2850         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2851         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2852
2853         amdgpu_amdkfd_suspend(adev, false);
2854
2855         /* Workaroud for ASICs need to disable SMC first */
2856         amdgpu_device_smu_fini_early(adev);
2857
2858         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2859                 if (!adev->ip_blocks[i].status.hw)
2860                         continue;
2861
2862                 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2863                 /* XXX handle errors */
2864                 if (r) {
2865                         DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2866                                   adev->ip_blocks[i].version->funcs->name, r);
2867                 }
2868
2869                 adev->ip_blocks[i].status.hw = false;
2870         }
2871
2872         if (amdgpu_sriov_vf(adev)) {
2873                 if (amdgpu_virt_release_full_gpu(adev, false))
2874                         DRM_ERROR("failed to release exclusive mode on fini\n");
2875         }
2876
2877         return 0;
2878 }
2879
2880 /**
2881  * amdgpu_device_ip_fini - run fini for hardware IPs
2882  *
2883  * @adev: amdgpu_device pointer
2884  *
2885  * Main teardown pass for hardware IPs.  The list of all the hardware
2886  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2887  * are run.  hw_fini tears down the hardware associated with each IP
2888  * and sw_fini tears down any software state associated with each IP.
2889  * Returns 0 on success, negative error code on failure.
2890  */
2891 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2892 {
2893         int i, r;
2894
2895         if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2896                 amdgpu_virt_release_ras_err_handler_data(adev);
2897
2898         if (adev->gmc.xgmi.num_physical_nodes > 1)
2899                 amdgpu_xgmi_remove_device(adev);
2900
2901         amdgpu_amdkfd_device_fini_sw(adev);
2902
2903         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2904                 if (!adev->ip_blocks[i].status.sw)
2905                         continue;
2906
2907                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2908                         amdgpu_ucode_free_bo(adev);
2909                         amdgpu_free_static_csa(&adev->virt.csa_obj);
2910                         amdgpu_device_wb_fini(adev);
2911                         amdgpu_device_mem_scratch_fini(adev);
2912                         amdgpu_ib_pool_fini(adev);
2913                 }
2914
2915                 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2916                 /* XXX handle errors */
2917                 if (r) {
2918                         DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2919                                   adev->ip_blocks[i].version->funcs->name, r);
2920                 }
2921                 adev->ip_blocks[i].status.sw = false;
2922                 adev->ip_blocks[i].status.valid = false;
2923         }
2924
2925         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2926                 if (!adev->ip_blocks[i].status.late_initialized)
2927                         continue;
2928                 if (adev->ip_blocks[i].version->funcs->late_fini)
2929                         adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2930                 adev->ip_blocks[i].status.late_initialized = false;
2931         }
2932
2933         amdgpu_ras_fini(adev);
2934
2935         return 0;
2936 }
2937
2938 /**
2939  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2940  *
2941  * @work: work_struct.
2942  */
2943 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2944 {
2945         struct amdgpu_device *adev =
2946                 container_of(work, struct amdgpu_device, delayed_init_work.work);
2947         int r;
2948
2949         r = amdgpu_ib_ring_tests(adev);
2950         if (r)
2951                 DRM_ERROR("ib ring test failed (%d).\n", r);
2952 }
2953
2954 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2955 {
2956         struct amdgpu_device *adev =
2957                 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2958
2959         WARN_ON_ONCE(adev->gfx.gfx_off_state);
2960         WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2961
2962         if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2963                 adev->gfx.gfx_off_state = true;
2964 }
2965
2966 /**
2967  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2968  *
2969  * @adev: amdgpu_device pointer
2970  *
2971  * Main suspend function for hardware IPs.  The list of all the hardware
2972  * IPs that make up the asic is walked, clockgating is disabled and the
2973  * suspend callbacks are run.  suspend puts the hardware and software state
2974  * in each IP into a state suitable for suspend.
2975  * Returns 0 on success, negative error code on failure.
2976  */
2977 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2978 {
2979         int i, r;
2980
2981         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2982         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2983
2984         /*
2985          * Per PMFW team's suggestion, driver needs to handle gfxoff
2986          * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2987          * scenario. Add the missing df cstate disablement here.
2988          */
2989         if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2990                 dev_warn(adev->dev, "Failed to disallow df cstate");
2991
2992         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2993                 if (!adev->ip_blocks[i].status.valid)
2994                         continue;
2995
2996                 /* displays are handled separately */
2997                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2998                         continue;
2999
3000                 /* XXX handle errors */
3001                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3002                 /* XXX handle errors */
3003                 if (r) {
3004                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3005                                   adev->ip_blocks[i].version->funcs->name, r);
3006                         return r;
3007                 }
3008
3009                 adev->ip_blocks[i].status.hw = false;
3010         }
3011
3012         return 0;
3013 }
3014
3015 /**
3016  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3017  *
3018  * @adev: amdgpu_device pointer
3019  *
3020  * Main suspend function for hardware IPs.  The list of all the hardware
3021  * IPs that make up the asic is walked, clockgating is disabled and the
3022  * suspend callbacks are run.  suspend puts the hardware and software state
3023  * in each IP into a state suitable for suspend.
3024  * Returns 0 on success, negative error code on failure.
3025  */
3026 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3027 {
3028         int i, r;
3029
3030         if (adev->in_s0ix)
3031                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3032
3033         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3034                 if (!adev->ip_blocks[i].status.valid)
3035                         continue;
3036                 /* displays are handled in phase1 */
3037                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3038                         continue;
3039                 /* PSP lost connection when err_event_athub occurs */
3040                 if (amdgpu_ras_intr_triggered() &&
3041                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3042                         adev->ip_blocks[i].status.hw = false;
3043                         continue;
3044                 }
3045
3046                 /* skip unnecessary suspend if we do not initialize them yet */
3047                 if (adev->gmc.xgmi.pending_reset &&
3048                     !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3049                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3050                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3051                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3052                         adev->ip_blocks[i].status.hw = false;
3053                         continue;
3054                 }
3055
3056                 /* skip suspend of gfx/mes and psp for S0ix
3057                  * gfx is in gfxoff state, so on resume it will exit gfxoff just
3058                  * like at runtime. PSP is also part of the always on hardware
3059                  * so no need to suspend it.
3060                  */
3061                 if (adev->in_s0ix &&
3062                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3063                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3064                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3065                         continue;
3066
3067                 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3068                 if (adev->in_s0ix &&
3069                     (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3070                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3071                         continue;
3072
3073                 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3074                  * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3075                  * from this location and RLC Autoload automatically also gets loaded
3076                  * from here based on PMFW -> PSP message during re-init sequence.
3077                  * Therefore, the psp suspend & resume should be skipped to avoid destroy
3078                  * the TMR and reload FWs again for IMU enabled APU ASICs.
3079                  */
3080                 if (amdgpu_in_reset(adev) &&
3081                     (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3082                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3083                         continue;
3084
3085                 /* XXX handle errors */
3086                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3087                 /* XXX handle errors */
3088                 if (r) {
3089                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3090                                   adev->ip_blocks[i].version->funcs->name, r);
3091                 }
3092                 adev->ip_blocks[i].status.hw = false;
3093                 /* handle putting the SMC in the appropriate state */
3094                 if(!amdgpu_sriov_vf(adev)){
3095                         if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3096                                 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3097                                 if (r) {
3098                                         DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3099                                                         adev->mp1_state, r);
3100                                         return r;
3101                                 }
3102                         }
3103                 }
3104         }
3105
3106         return 0;
3107 }
3108
3109 /**
3110  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3111  *
3112  * @adev: amdgpu_device pointer
3113  *
3114  * Main suspend function for hardware IPs.  The list of all the hardware
3115  * IPs that make up the asic is walked, clockgating is disabled and the
3116  * suspend callbacks are run.  suspend puts the hardware and software state
3117  * in each IP into a state suitable for suspend.
3118  * Returns 0 on success, negative error code on failure.
3119  */
3120 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3121 {
3122         int r;
3123
3124         if (amdgpu_sriov_vf(adev)) {
3125                 amdgpu_virt_fini_data_exchange(adev);
3126                 amdgpu_virt_request_full_gpu(adev, false);
3127         }
3128
3129         r = amdgpu_device_ip_suspend_phase1(adev);
3130         if (r)
3131                 return r;
3132         r = amdgpu_device_ip_suspend_phase2(adev);
3133
3134         if (amdgpu_sriov_vf(adev))
3135                 amdgpu_virt_release_full_gpu(adev, false);
3136
3137         return r;
3138 }
3139
3140 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3141 {
3142         int i, r;
3143
3144         static enum amd_ip_block_type ip_order[] = {
3145                 AMD_IP_BLOCK_TYPE_COMMON,
3146                 AMD_IP_BLOCK_TYPE_GMC,
3147                 AMD_IP_BLOCK_TYPE_PSP,
3148                 AMD_IP_BLOCK_TYPE_IH,
3149         };
3150
3151         for (i = 0; i < adev->num_ip_blocks; i++) {
3152                 int j;
3153                 struct amdgpu_ip_block *block;
3154
3155                 block = &adev->ip_blocks[i];
3156                 block->status.hw = false;
3157
3158                 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3159
3160                         if (block->version->type != ip_order[j] ||
3161                                 !block->status.valid)
3162                                 continue;
3163
3164                         r = block->version->funcs->hw_init(adev);
3165                         DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3166                         if (r)
3167                                 return r;
3168                         block->status.hw = true;
3169                 }
3170         }
3171
3172         return 0;
3173 }
3174
3175 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3176 {
3177         int i, r;
3178
3179         static enum amd_ip_block_type ip_order[] = {
3180                 AMD_IP_BLOCK_TYPE_SMC,
3181                 AMD_IP_BLOCK_TYPE_DCE,
3182                 AMD_IP_BLOCK_TYPE_GFX,
3183                 AMD_IP_BLOCK_TYPE_SDMA,
3184                 AMD_IP_BLOCK_TYPE_MES,
3185                 AMD_IP_BLOCK_TYPE_UVD,
3186                 AMD_IP_BLOCK_TYPE_VCE,
3187                 AMD_IP_BLOCK_TYPE_VCN,
3188                 AMD_IP_BLOCK_TYPE_JPEG
3189         };
3190
3191         for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3192                 int j;
3193                 struct amdgpu_ip_block *block;
3194
3195                 for (j = 0; j < adev->num_ip_blocks; j++) {
3196                         block = &adev->ip_blocks[j];
3197
3198                         if (block->version->type != ip_order[i] ||
3199                                 !block->status.valid ||
3200                                 block->status.hw)
3201                                 continue;
3202
3203                         if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3204                                 r = block->version->funcs->resume(adev);
3205                         else
3206                                 r = block->version->funcs->hw_init(adev);
3207
3208                         DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3209                         if (r)
3210                                 return r;
3211                         block->status.hw = true;
3212                 }
3213         }
3214
3215         return 0;
3216 }
3217
3218 /**
3219  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3220  *
3221  * @adev: amdgpu_device pointer
3222  *
3223  * First resume function for hardware IPs.  The list of all the hardware
3224  * IPs that make up the asic is walked and the resume callbacks are run for
3225  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3226  * after a suspend and updates the software state as necessary.  This
3227  * function is also used for restoring the GPU after a GPU reset.
3228  * Returns 0 on success, negative error code on failure.
3229  */
3230 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3231 {
3232         int i, r;
3233
3234         for (i = 0; i < adev->num_ip_blocks; i++) {
3235                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3236                         continue;
3237                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3238                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3239                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3240                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3241
3242                         r = adev->ip_blocks[i].version->funcs->resume(adev);
3243                         if (r) {
3244                                 DRM_ERROR("resume of IP block <%s> failed %d\n",
3245                                           adev->ip_blocks[i].version->funcs->name, r);
3246                                 return r;
3247                         }
3248                         adev->ip_blocks[i].status.hw = true;
3249                 }
3250         }
3251
3252         return 0;
3253 }
3254
3255 /**
3256  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3257  *
3258  * @adev: amdgpu_device pointer
3259  *
3260  * First resume function for hardware IPs.  The list of all the hardware
3261  * IPs that make up the asic is walked and the resume callbacks are run for
3262  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3263  * functional state after a suspend and updates the software state as
3264  * necessary.  This function is also used for restoring the GPU after a GPU
3265  * reset.
3266  * Returns 0 on success, negative error code on failure.
3267  */
3268 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3269 {
3270         int i, r;
3271
3272         for (i = 0; i < adev->num_ip_blocks; i++) {
3273                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3274                         continue;
3275                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3276                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3277                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3278                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3279                         continue;
3280                 r = adev->ip_blocks[i].version->funcs->resume(adev);
3281                 if (r) {
3282                         DRM_ERROR("resume of IP block <%s> failed %d\n",
3283                                   adev->ip_blocks[i].version->funcs->name, r);
3284                         return r;
3285                 }
3286                 adev->ip_blocks[i].status.hw = true;
3287         }
3288
3289         return 0;
3290 }
3291
3292 /**
3293  * amdgpu_device_ip_resume - run resume for hardware IPs
3294  *
3295  * @adev: amdgpu_device pointer
3296  *
3297  * Main resume function for hardware IPs.  The hardware IPs
3298  * are split into two resume functions because they are
3299  * are also used in in recovering from a GPU reset and some additional
3300  * steps need to be take between them.  In this case (S3/S4) they are
3301  * run sequentially.
3302  * Returns 0 on success, negative error code on failure.
3303  */
3304 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3305 {
3306         int r;
3307
3308         r = amdgpu_amdkfd_resume_iommu(adev);
3309         if (r)
3310                 return r;
3311
3312         r = amdgpu_device_ip_resume_phase1(adev);
3313         if (r)
3314                 return r;
3315
3316         r = amdgpu_device_fw_loading(adev);
3317         if (r)
3318                 return r;
3319
3320         r = amdgpu_device_ip_resume_phase2(adev);
3321
3322         return r;
3323 }
3324
3325 /**
3326  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3327  *
3328  * @adev: amdgpu_device pointer
3329  *
3330  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3331  */
3332 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3333 {
3334         if (amdgpu_sriov_vf(adev)) {
3335                 if (adev->is_atom_fw) {
3336                         if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3337                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3338                 } else {
3339                         if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3340                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3341                 }
3342
3343                 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3344                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3345         }
3346 }
3347
3348 /**
3349  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3350  *
3351  * @asic_type: AMD asic type
3352  *
3353  * Check if there is DC (new modesetting infrastructre) support for an asic.
3354  * returns true if DC has support, false if not.
3355  */
3356 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3357 {
3358         switch (asic_type) {
3359 #ifdef CONFIG_DRM_AMDGPU_SI
3360         case CHIP_HAINAN:
3361 #endif
3362         case CHIP_TOPAZ:
3363                 /* chips with no display hardware */
3364                 return false;
3365 #if defined(CONFIG_DRM_AMD_DC)
3366         case CHIP_TAHITI:
3367         case CHIP_PITCAIRN:
3368         case CHIP_VERDE:
3369         case CHIP_OLAND:
3370                 /*
3371                  * We have systems in the wild with these ASICs that require
3372                  * LVDS and VGA support which is not supported with DC.
3373                  *
3374                  * Fallback to the non-DC driver here by default so as not to
3375                  * cause regressions.
3376                  */
3377 #if defined(CONFIG_DRM_AMD_DC_SI)
3378                 return amdgpu_dc > 0;
3379 #else
3380                 return false;
3381 #endif
3382         case CHIP_BONAIRE:
3383         case CHIP_KAVERI:
3384         case CHIP_KABINI:
3385         case CHIP_MULLINS:
3386                 /*
3387                  * We have systems in the wild with these ASICs that require
3388                  * VGA support which is not supported with DC.
3389                  *
3390                  * Fallback to the non-DC driver here by default so as not to
3391                  * cause regressions.
3392                  */
3393                 return amdgpu_dc > 0;
3394         default:
3395                 return amdgpu_dc != 0;
3396 #else
3397         default:
3398                 if (amdgpu_dc > 0)
3399                         DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3400                                          "but isn't supported by ASIC, ignoring\n");
3401                 return false;
3402 #endif
3403         }
3404 }
3405
3406 /**
3407  * amdgpu_device_has_dc_support - check if dc is supported
3408  *
3409  * @adev: amdgpu_device pointer
3410  *
3411  * Returns true for supported, false for not supported
3412  */
3413 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3414 {
3415         if (adev->enable_virtual_display ||
3416             (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3417                 return false;
3418
3419         return amdgpu_device_asic_has_dc_support(adev->asic_type);
3420 }
3421
3422 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3423 {
3424         struct amdgpu_device *adev =
3425                 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3426         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3427
3428         /* It's a bug to not have a hive within this function */
3429         if (WARN_ON(!hive))
3430                 return;
3431
3432         /*
3433          * Use task barrier to synchronize all xgmi reset works across the
3434          * hive. task_barrier_enter and task_barrier_exit will block
3435          * until all the threads running the xgmi reset works reach
3436          * those points. task_barrier_full will do both blocks.
3437          */
3438         if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3439
3440                 task_barrier_enter(&hive->tb);
3441                 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3442
3443                 if (adev->asic_reset_res)
3444                         goto fail;
3445
3446                 task_barrier_exit(&hive->tb);
3447                 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3448
3449                 if (adev->asic_reset_res)
3450                         goto fail;
3451
3452                 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3453                     adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3454                         adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3455         } else {
3456
3457                 task_barrier_full(&hive->tb);
3458                 adev->asic_reset_res =  amdgpu_asic_reset(adev);
3459         }
3460
3461 fail:
3462         if (adev->asic_reset_res)
3463                 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3464                          adev->asic_reset_res, adev_to_drm(adev)->unique);
3465         amdgpu_put_xgmi_hive(hive);
3466 }
3467
3468 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3469 {
3470         char *input = amdgpu_lockup_timeout;
3471         char *timeout_setting = NULL;
3472         int index = 0;
3473         long timeout;
3474         int ret = 0;
3475
3476         /*
3477          * By default timeout for non compute jobs is 10000
3478          * and 60000 for compute jobs.
3479          * In SR-IOV or passthrough mode, timeout for compute
3480          * jobs are 60000 by default.
3481          */
3482         adev->gfx_timeout = msecs_to_jiffies(10000);
3483         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3484         if (amdgpu_sriov_vf(adev))
3485                 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3486                                         msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3487         else
3488                 adev->compute_timeout =  msecs_to_jiffies(60000);
3489
3490         if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3491                 while ((timeout_setting = strsep(&input, ",")) &&
3492                                 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3493                         ret = kstrtol(timeout_setting, 0, &timeout);
3494                         if (ret)
3495                                 return ret;
3496
3497                         if (timeout == 0) {
3498                                 index++;
3499                                 continue;
3500                         } else if (timeout < 0) {
3501                                 timeout = MAX_SCHEDULE_TIMEOUT;
3502                                 dev_warn(adev->dev, "lockup timeout disabled");
3503                                 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3504                         } else {
3505                                 timeout = msecs_to_jiffies(timeout);
3506                         }
3507
3508                         switch (index++) {
3509                         case 0:
3510                                 adev->gfx_timeout = timeout;
3511                                 break;
3512                         case 1:
3513                                 adev->compute_timeout = timeout;
3514                                 break;
3515                         case 2:
3516                                 adev->sdma_timeout = timeout;
3517                                 break;
3518                         case 3:
3519                                 adev->video_timeout = timeout;
3520                                 break;
3521                         default:
3522                                 break;
3523                         }
3524                 }
3525                 /*
3526                  * There is only one value specified and
3527                  * it should apply to all non-compute jobs.
3528                  */
3529                 if (index == 1) {
3530                         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3531                         if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3532                                 adev->compute_timeout = adev->gfx_timeout;
3533                 }
3534         }
3535
3536         return ret;
3537 }
3538
3539 /**
3540  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3541  *
3542  * @adev: amdgpu_device pointer
3543  *
3544  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3545  */
3546 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3547 {
3548         struct iommu_domain *domain;
3549
3550         domain = iommu_get_domain_for_dev(adev->dev);
3551         if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3552                 adev->ram_is_direct_mapped = true;
3553 }
3554
3555 static const struct attribute *amdgpu_dev_attributes[] = {
3556         &dev_attr_product_name.attr,
3557         &dev_attr_product_number.attr,
3558         &dev_attr_serial_number.attr,
3559         &dev_attr_pcie_replay_count.attr,
3560         NULL
3561 };
3562
3563 /**
3564  * amdgpu_device_init - initialize the driver
3565  *
3566  * @adev: amdgpu_device pointer
3567  * @flags: driver flags
3568  *
3569  * Initializes the driver info and hw (all asics).
3570  * Returns 0 for success or an error on failure.
3571  * Called at driver startup.
3572  */
3573 int amdgpu_device_init(struct amdgpu_device *adev,
3574                        uint32_t flags)
3575 {
3576         struct drm_device *ddev = adev_to_drm(adev);
3577         struct pci_dev *pdev = adev->pdev;
3578         int r, i;
3579         bool px = false;
3580         u32 max_MBps;
3581
3582         adev->shutdown = false;
3583         adev->flags = flags;
3584
3585         if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3586                 adev->asic_type = amdgpu_force_asic_type;
3587         else
3588                 adev->asic_type = flags & AMD_ASIC_MASK;
3589
3590         adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3591         if (amdgpu_emu_mode == 1)
3592                 adev->usec_timeout *= 10;
3593         adev->gmc.gart_size = 512 * 1024 * 1024;
3594         adev->accel_working = false;
3595         adev->num_rings = 0;
3596         RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3597         adev->mman.buffer_funcs = NULL;
3598         adev->mman.buffer_funcs_ring = NULL;
3599         adev->vm_manager.vm_pte_funcs = NULL;
3600         adev->vm_manager.vm_pte_num_scheds = 0;
3601         adev->gmc.gmc_funcs = NULL;
3602         adev->harvest_ip_mask = 0x0;
3603         adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3604         bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3605
3606         adev->smc_rreg = &amdgpu_invalid_rreg;
3607         adev->smc_wreg = &amdgpu_invalid_wreg;
3608         adev->pcie_rreg = &amdgpu_invalid_rreg;
3609         adev->pcie_wreg = &amdgpu_invalid_wreg;
3610         adev->pciep_rreg = &amdgpu_invalid_rreg;
3611         adev->pciep_wreg = &amdgpu_invalid_wreg;
3612         adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3613         adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3614         adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3615         adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3616         adev->didt_rreg = &amdgpu_invalid_rreg;
3617         adev->didt_wreg = &amdgpu_invalid_wreg;
3618         adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3619         adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3620         adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3621         adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3622
3623         DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3624                  amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3625                  pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3626
3627         /* mutex initialization are all done here so we
3628          * can recall function without having locking issues */
3629         mutex_init(&adev->firmware.mutex);
3630         mutex_init(&adev->pm.mutex);
3631         mutex_init(&adev->gfx.gpu_clock_mutex);
3632         mutex_init(&adev->srbm_mutex);
3633         mutex_init(&adev->gfx.pipe_reserve_mutex);
3634         mutex_init(&adev->gfx.gfx_off_mutex);
3635         mutex_init(&adev->grbm_idx_mutex);
3636         mutex_init(&adev->mn_lock);
3637         mutex_init(&adev->virt.vf_errors.lock);
3638         hash_init(adev->mn_hash);
3639         mutex_init(&adev->psp.mutex);
3640         mutex_init(&adev->notifier_lock);
3641         mutex_init(&adev->pm.stable_pstate_ctx_lock);
3642         mutex_init(&adev->benchmark_mutex);
3643
3644         amdgpu_device_init_apu_flags(adev);
3645
3646         r = amdgpu_device_check_arguments(adev);
3647         if (r)
3648                 return r;
3649
3650         spin_lock_init(&adev->mmio_idx_lock);
3651         spin_lock_init(&adev->smc_idx_lock);
3652         spin_lock_init(&adev->pcie_idx_lock);
3653         spin_lock_init(&adev->uvd_ctx_idx_lock);
3654         spin_lock_init(&adev->didt_idx_lock);
3655         spin_lock_init(&adev->gc_cac_idx_lock);
3656         spin_lock_init(&adev->se_cac_idx_lock);
3657         spin_lock_init(&adev->audio_endpt_idx_lock);
3658         spin_lock_init(&adev->mm_stats.lock);
3659
3660         INIT_LIST_HEAD(&adev->shadow_list);
3661         mutex_init(&adev->shadow_list_lock);
3662
3663         INIT_LIST_HEAD(&adev->reset_list);
3664
3665         INIT_LIST_HEAD(&adev->ras_list);
3666
3667         INIT_DELAYED_WORK(&adev->delayed_init_work,
3668                           amdgpu_device_delayed_init_work_handler);
3669         INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3670                           amdgpu_device_delay_enable_gfx_off);
3671
3672         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3673
3674         adev->gfx.gfx_off_req_count = 1;
3675         adev->gfx.gfx_off_residency = 0;
3676         adev->gfx.gfx_off_entrycount = 0;
3677         adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3678
3679         atomic_set(&adev->throttling_logging_enabled, 1);
3680         /*
3681          * If throttling continues, logging will be performed every minute
3682          * to avoid log flooding. "-1" is subtracted since the thermal
3683          * throttling interrupt comes every second. Thus, the total logging
3684          * interval is 59 seconds(retelimited printk interval) + 1(waiting
3685          * for throttling interrupt) = 60 seconds.
3686          */
3687         ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3688         ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3689
3690         /* Registers mapping */
3691         /* TODO: block userspace mapping of io register */
3692         if (adev->asic_type >= CHIP_BONAIRE) {
3693                 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3694                 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3695         } else {
3696                 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3697                 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3698         }
3699
3700         for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3701                 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3702
3703         adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3704         if (adev->rmmio == NULL) {
3705                 return -ENOMEM;
3706         }
3707         DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3708         DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3709
3710         amdgpu_device_get_pcie_info(adev);
3711
3712         if (amdgpu_mcbp)
3713                 DRM_INFO("MCBP is enabled\n");
3714
3715         /*
3716          * Reset domain needs to be present early, before XGMI hive discovered
3717          * (if any) and intitialized to use reset sem and in_gpu reset flag
3718          * early on during init and before calling to RREG32.
3719          */
3720         adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3721         if (!adev->reset_domain)
3722                 return -ENOMEM;
3723
3724         /* detect hw virtualization here */
3725         amdgpu_detect_virtualization(adev);
3726
3727         r = amdgpu_device_get_job_timeout_settings(adev);
3728         if (r) {
3729                 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3730                 return r;
3731         }
3732
3733         /* early init functions */
3734         r = amdgpu_device_ip_early_init(adev);
3735         if (r)
3736                 return r;
3737
3738         /* Get rid of things like offb */
3739         r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3740         if (r)
3741                 return r;
3742
3743         /* Enable TMZ based on IP_VERSION */
3744         amdgpu_gmc_tmz_set(adev);
3745
3746         amdgpu_gmc_noretry_set(adev);
3747         /* Need to get xgmi info early to decide the reset behavior*/
3748         if (adev->gmc.xgmi.supported) {
3749                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3750                 if (r)
3751                         return r;
3752         }
3753
3754         /* enable PCIE atomic ops */
3755         if (amdgpu_sriov_vf(adev))
3756                 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3757                         adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3758                         (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3759         else
3760                 adev->have_atomics_support =
3761                         !pci_enable_atomic_ops_to_root(adev->pdev,
3762                                           PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3763                                           PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3764         if (!adev->have_atomics_support)
3765                 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3766
3767         /* doorbell bar mapping and doorbell index init*/
3768         amdgpu_device_doorbell_init(adev);
3769
3770         if (amdgpu_emu_mode == 1) {
3771                 /* post the asic on emulation mode */
3772                 emu_soc_asic_init(adev);
3773                 goto fence_driver_init;
3774         }
3775
3776         amdgpu_reset_init(adev);
3777
3778         /* detect if we are with an SRIOV vbios */
3779         amdgpu_device_detect_sriov_bios(adev);
3780
3781         /* check if we need to reset the asic
3782          *  E.g., driver was not cleanly unloaded previously, etc.
3783          */
3784         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3785                 if (adev->gmc.xgmi.num_physical_nodes) {
3786                         dev_info(adev->dev, "Pending hive reset.\n");
3787                         adev->gmc.xgmi.pending_reset = true;
3788                         /* Only need to init necessary block for SMU to handle the reset */
3789                         for (i = 0; i < adev->num_ip_blocks; i++) {
3790                                 if (!adev->ip_blocks[i].status.valid)
3791                                         continue;
3792                                 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3793                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3794                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3795                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3796                                         DRM_DEBUG("IP %s disabled for hw_init.\n",
3797                                                 adev->ip_blocks[i].version->funcs->name);
3798                                         adev->ip_blocks[i].status.hw = true;
3799                                 }
3800                         }
3801                 } else {
3802                         r = amdgpu_asic_reset(adev);
3803                         if (r) {
3804                                 dev_err(adev->dev, "asic reset on init failed\n");
3805                                 goto failed;
3806                         }
3807                 }
3808         }
3809
3810         /* Post card if necessary */
3811         if (amdgpu_device_need_post(adev)) {
3812                 if (!adev->bios) {
3813                         dev_err(adev->dev, "no vBIOS found\n");
3814                         r = -EINVAL;
3815                         goto failed;
3816                 }
3817                 DRM_INFO("GPU posting now...\n");
3818                 r = amdgpu_device_asic_init(adev);
3819                 if (r) {
3820                         dev_err(adev->dev, "gpu post error!\n");
3821                         goto failed;
3822                 }
3823         }
3824
3825         if (adev->is_atom_fw) {
3826                 /* Initialize clocks */
3827                 r = amdgpu_atomfirmware_get_clock_info(adev);
3828                 if (r) {
3829                         dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3830                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3831                         goto failed;
3832                 }
3833         } else {
3834                 /* Initialize clocks */
3835                 r = amdgpu_atombios_get_clock_info(adev);
3836                 if (r) {
3837                         dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3838                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3839                         goto failed;
3840                 }
3841                 /* init i2c buses */
3842                 if (!amdgpu_device_has_dc_support(adev))
3843                         amdgpu_atombios_i2c_init(adev);
3844         }
3845
3846 fence_driver_init:
3847         /* Fence driver */
3848         r = amdgpu_fence_driver_sw_init(adev);
3849         if (r) {
3850                 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3851                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3852                 goto failed;
3853         }
3854
3855         /* init the mode config */
3856         drm_mode_config_init(adev_to_drm(adev));
3857
3858         r = amdgpu_device_ip_init(adev);
3859         if (r) {
3860                 /* failed in exclusive mode due to timeout */
3861                 if (amdgpu_sriov_vf(adev) &&
3862                     !amdgpu_sriov_runtime(adev) &&
3863                     amdgpu_virt_mmio_blocked(adev) &&
3864                     !amdgpu_virt_wait_reset(adev)) {
3865                         dev_err(adev->dev, "VF exclusive mode timeout\n");
3866                         /* Don't send request since VF is inactive. */
3867                         adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3868                         adev->virt.ops = NULL;
3869                         r = -EAGAIN;
3870                         goto release_ras_con;
3871                 }
3872                 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3873                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3874                 goto release_ras_con;
3875         }
3876
3877         amdgpu_fence_driver_hw_init(adev);
3878
3879         dev_info(adev->dev,
3880                 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3881                         adev->gfx.config.max_shader_engines,
3882                         adev->gfx.config.max_sh_per_se,
3883                         adev->gfx.config.max_cu_per_sh,
3884                         adev->gfx.cu_info.number);
3885
3886         adev->accel_working = true;
3887
3888         amdgpu_vm_check_compute_bug(adev);
3889
3890         /* Initialize the buffer migration limit. */
3891         if (amdgpu_moverate >= 0)
3892                 max_MBps = amdgpu_moverate;
3893         else
3894                 max_MBps = 8; /* Allow 8 MB/s. */
3895         /* Get a log2 for easy divisions. */
3896         adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3897
3898         r = amdgpu_pm_sysfs_init(adev);
3899         if (r)
3900                 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
3901
3902         r = amdgpu_ucode_sysfs_init(adev);
3903         if (r) {
3904                 adev->ucode_sysfs_en = false;
3905                 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3906         } else
3907                 adev->ucode_sysfs_en = true;
3908
3909         r = amdgpu_psp_sysfs_init(adev);
3910         if (r) {
3911                 adev->psp_sysfs_en = false;
3912                 if (!amdgpu_sriov_vf(adev))
3913                         DRM_ERROR("Creating psp sysfs failed\n");
3914         } else
3915                 adev->psp_sysfs_en = true;
3916
3917         /*
3918          * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3919          * Otherwise the mgpu fan boost feature will be skipped due to the
3920          * gpu instance is counted less.
3921          */
3922         amdgpu_register_gpu_instance(adev);
3923
3924         /* enable clockgating, etc. after ib tests, etc. since some blocks require
3925          * explicit gating rather than handling it automatically.
3926          */
3927         if (!adev->gmc.xgmi.pending_reset) {
3928                 r = amdgpu_device_ip_late_init(adev);
3929                 if (r) {
3930                         dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3931                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3932                         goto release_ras_con;
3933                 }
3934                 /* must succeed. */
3935                 amdgpu_ras_resume(adev);
3936                 queue_delayed_work(system_wq, &adev->delayed_init_work,
3937                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
3938         }
3939
3940         if (amdgpu_sriov_vf(adev))
3941                 flush_delayed_work(&adev->delayed_init_work);
3942
3943         r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3944         if (r)
3945                 dev_err(adev->dev, "Could not create amdgpu device attr\n");
3946
3947         if (IS_ENABLED(CONFIG_PERF_EVENTS))
3948                 r = amdgpu_pmu_init(adev);
3949         if (r)
3950                 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3951
3952         /* Have stored pci confspace at hand for restore in sudden PCI error */
3953         if (amdgpu_device_cache_pci_state(adev->pdev))
3954                 pci_restore_state(pdev);
3955
3956         /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3957         /* this will fail for cards that aren't VGA class devices, just
3958          * ignore it */
3959         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3960                 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
3961
3962         px = amdgpu_device_supports_px(ddev);
3963
3964         if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3965                                 apple_gmux_detect(NULL, NULL)))
3966                 vga_switcheroo_register_client(adev->pdev,
3967                                                &amdgpu_switcheroo_ops, px);
3968
3969         if (px)
3970                 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3971
3972         if (adev->gmc.xgmi.pending_reset)
3973                 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3974                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
3975
3976         amdgpu_device_check_iommu_direct_map(adev);
3977
3978         return 0;
3979
3980 release_ras_con:
3981         amdgpu_release_ras_context(adev);
3982
3983 failed:
3984         amdgpu_vf_error_trans_all(adev);
3985
3986         return r;
3987 }
3988
3989 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3990 {
3991
3992         /* Clear all CPU mappings pointing to this device */
3993         unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3994
3995         /* Unmap all mapped bars - Doorbell, registers and VRAM */
3996         amdgpu_device_doorbell_fini(adev);
3997
3998         iounmap(adev->rmmio);
3999         adev->rmmio = NULL;
4000         if (adev->mman.aper_base_kaddr)
4001                 iounmap(adev->mman.aper_base_kaddr);
4002         adev->mman.aper_base_kaddr = NULL;
4003
4004         /* Memory manager related */
4005         if (!adev->gmc.xgmi.connected_to_cpu) {
4006                 arch_phys_wc_del(adev->gmc.vram_mtrr);
4007                 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4008         }
4009 }
4010
4011 /**
4012  * amdgpu_device_fini_hw - tear down the driver
4013  *
4014  * @adev: amdgpu_device pointer
4015  *
4016  * Tear down the driver info (all asics).
4017  * Called at driver shutdown.
4018  */
4019 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4020 {
4021         dev_info(adev->dev, "amdgpu: finishing device.\n");
4022         flush_delayed_work(&adev->delayed_init_work);
4023         adev->shutdown = true;
4024
4025         /* make sure IB test finished before entering exclusive mode
4026          * to avoid preemption on IB test
4027          * */
4028         if (amdgpu_sriov_vf(adev)) {
4029                 amdgpu_virt_request_full_gpu(adev, false);
4030                 amdgpu_virt_fini_data_exchange(adev);
4031         }
4032
4033         /* disable all interrupts */
4034         amdgpu_irq_disable_all(adev);
4035         if (adev->mode_info.mode_config_initialized){
4036                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4037                         drm_helper_force_disable_all(adev_to_drm(adev));
4038                 else
4039                         drm_atomic_helper_shutdown(adev_to_drm(adev));
4040         }
4041         amdgpu_fence_driver_hw_fini(adev);
4042
4043         if (adev->mman.initialized)
4044                 drain_workqueue(adev->mman.bdev.wq);
4045
4046         if (adev->pm.sysfs_initialized)
4047                 amdgpu_pm_sysfs_fini(adev);
4048         if (adev->ucode_sysfs_en)
4049                 amdgpu_ucode_sysfs_fini(adev);
4050         if (adev->psp_sysfs_en)
4051                 amdgpu_psp_sysfs_fini(adev);
4052         sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4053
4054         /* disable ras feature must before hw fini */
4055         amdgpu_ras_pre_fini(adev);
4056
4057         amdgpu_device_ip_fini_early(adev);
4058
4059         amdgpu_irq_fini_hw(adev);
4060
4061         if (adev->mman.initialized)
4062                 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4063
4064         amdgpu_gart_dummy_page_fini(adev);
4065
4066         if (drm_dev_is_unplugged(adev_to_drm(adev)))
4067                 amdgpu_device_unmap_mmio(adev);
4068
4069 }
4070
4071 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4072 {
4073         int idx;
4074         bool px;
4075
4076         amdgpu_fence_driver_sw_fini(adev);
4077         amdgpu_device_ip_fini(adev);
4078         amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4079         adev->accel_working = false;
4080         dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4081
4082         amdgpu_reset_fini(adev);
4083
4084         /* free i2c buses */
4085         if (!amdgpu_device_has_dc_support(adev))
4086                 amdgpu_i2c_fini(adev);
4087
4088         if (amdgpu_emu_mode != 1)
4089                 amdgpu_atombios_fini(adev);
4090
4091         kfree(adev->bios);
4092         adev->bios = NULL;
4093
4094         px = amdgpu_device_supports_px(adev_to_drm(adev));
4095
4096         if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4097                                 apple_gmux_detect(NULL, NULL)))
4098                 vga_switcheroo_unregister_client(adev->pdev);
4099
4100         if (px)
4101                 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4102
4103         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4104                 vga_client_unregister(adev->pdev);
4105
4106         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4107
4108                 iounmap(adev->rmmio);
4109                 adev->rmmio = NULL;
4110                 amdgpu_device_doorbell_fini(adev);
4111                 drm_dev_exit(idx);
4112         }
4113
4114         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4115                 amdgpu_pmu_fini(adev);
4116         if (adev->mman.discovery_bin)
4117                 amdgpu_discovery_fini(adev);
4118
4119         amdgpu_reset_put_reset_domain(adev->reset_domain);
4120         adev->reset_domain = NULL;
4121
4122         kfree(adev->pci_state);
4123
4124 }
4125
4126 /**
4127  * amdgpu_device_evict_resources - evict device resources
4128  * @adev: amdgpu device object
4129  *
4130  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4131  * of the vram memory type. Mainly used for evicting device resources
4132  * at suspend time.
4133  *
4134  */
4135 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4136 {
4137         int ret;
4138
4139         /* No need to evict vram on APUs for suspend to ram or s2idle */
4140         if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4141                 return 0;
4142
4143         ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4144         if (ret)
4145                 DRM_WARN("evicting device resources failed\n");
4146         return ret;
4147 }
4148
4149 /*
4150  * Suspend & resume.
4151  */
4152 /**
4153  * amdgpu_device_suspend - initiate device suspend
4154  *
4155  * @dev: drm dev pointer
4156  * @fbcon : notify the fbdev of suspend
4157  *
4158  * Puts the hw in the suspend state (all asics).
4159  * Returns 0 for success or an error on failure.
4160  * Called at driver suspend.
4161  */
4162 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4163 {
4164         struct amdgpu_device *adev = drm_to_adev(dev);
4165         int r = 0;
4166
4167         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4168                 return 0;
4169
4170         adev->in_suspend = true;
4171
4172         /* Evict the majority of BOs before grabbing the full access */
4173         r = amdgpu_device_evict_resources(adev);
4174         if (r)
4175                 return r;
4176
4177         if (amdgpu_sriov_vf(adev)) {
4178                 amdgpu_virt_fini_data_exchange(adev);
4179                 r = amdgpu_virt_request_full_gpu(adev, false);
4180                 if (r)
4181                         return r;
4182         }
4183
4184         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4185                 DRM_WARN("smart shift update failed\n");
4186
4187         if (fbcon)
4188                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4189
4190         cancel_delayed_work_sync(&adev->delayed_init_work);
4191
4192         amdgpu_ras_suspend(adev);
4193
4194         amdgpu_device_ip_suspend_phase1(adev);
4195
4196         if (!adev->in_s0ix)
4197                 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4198
4199         r = amdgpu_device_evict_resources(adev);
4200         if (r)
4201                 return r;
4202
4203         amdgpu_fence_driver_hw_fini(adev);
4204
4205         amdgpu_device_ip_suspend_phase2(adev);
4206
4207         if (amdgpu_sriov_vf(adev))
4208                 amdgpu_virt_release_full_gpu(adev, false);
4209
4210         return 0;
4211 }
4212
4213 /**
4214  * amdgpu_device_resume - initiate device resume
4215  *
4216  * @dev: drm dev pointer
4217  * @fbcon : notify the fbdev of resume
4218  *
4219  * Bring the hw back to operating state (all asics).
4220  * Returns 0 for success or an error on failure.
4221  * Called at driver resume.
4222  */
4223 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4224 {
4225         struct amdgpu_device *adev = drm_to_adev(dev);
4226         int r = 0;
4227
4228         if (amdgpu_sriov_vf(adev)) {
4229                 r = amdgpu_virt_request_full_gpu(adev, true);
4230                 if (r)
4231                         return r;
4232         }
4233
4234         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4235                 return 0;
4236
4237         if (adev->in_s0ix)
4238                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4239
4240         /* post card */
4241         if (amdgpu_device_need_post(adev)) {
4242                 r = amdgpu_device_asic_init(adev);
4243                 if (r)
4244                         dev_err(adev->dev, "amdgpu asic init failed\n");
4245         }
4246
4247         r = amdgpu_device_ip_resume(adev);
4248
4249         if (r) {
4250                 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4251                 goto exit;
4252         }
4253         amdgpu_fence_driver_hw_init(adev);
4254
4255         r = amdgpu_device_ip_late_init(adev);
4256         if (r)
4257                 goto exit;
4258
4259         queue_delayed_work(system_wq, &adev->delayed_init_work,
4260                            msecs_to_jiffies(AMDGPU_RESUME_MS));
4261
4262         if (!adev->in_s0ix) {
4263                 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4264                 if (r)
4265                         goto exit;
4266         }
4267
4268 exit:
4269         if (amdgpu_sriov_vf(adev)) {
4270                 amdgpu_virt_init_data_exchange(adev);
4271                 amdgpu_virt_release_full_gpu(adev, true);
4272         }
4273
4274         if (r)
4275                 return r;
4276
4277         /* Make sure IB tests flushed */
4278         flush_delayed_work(&adev->delayed_init_work);
4279
4280         if (fbcon)
4281                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4282
4283         amdgpu_ras_resume(adev);
4284
4285         if (adev->mode_info.num_crtc) {
4286                 /*
4287                  * Most of the connector probing functions try to acquire runtime pm
4288                  * refs to ensure that the GPU is powered on when connector polling is
4289                  * performed. Since we're calling this from a runtime PM callback,
4290                  * trying to acquire rpm refs will cause us to deadlock.
4291                  *
4292                  * Since we're guaranteed to be holding the rpm lock, it's safe to
4293                  * temporarily disable the rpm helpers so this doesn't deadlock us.
4294                  */
4295 #ifdef CONFIG_PM
4296                 dev->dev->power.disable_depth++;
4297 #endif
4298                 if (!adev->dc_enabled)
4299                         drm_helper_hpd_irq_event(dev);
4300                 else
4301                         drm_kms_helper_hotplug_event(dev);
4302 #ifdef CONFIG_PM
4303                 dev->dev->power.disable_depth--;
4304 #endif
4305         }
4306         adev->in_suspend = false;
4307
4308         if (adev->enable_mes)
4309                 amdgpu_mes_self_test(adev);
4310
4311         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4312                 DRM_WARN("smart shift update failed\n");
4313
4314         return 0;
4315 }
4316
4317 /**
4318  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4319  *
4320  * @adev: amdgpu_device pointer
4321  *
4322  * The list of all the hardware IPs that make up the asic is walked and
4323  * the check_soft_reset callbacks are run.  check_soft_reset determines
4324  * if the asic is still hung or not.
4325  * Returns true if any of the IPs are still in a hung state, false if not.
4326  */
4327 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4328 {
4329         int i;
4330         bool asic_hang = false;
4331
4332         if (amdgpu_sriov_vf(adev))
4333                 return true;
4334
4335         if (amdgpu_asic_need_full_reset(adev))
4336                 return true;
4337
4338         for (i = 0; i < adev->num_ip_blocks; i++) {
4339                 if (!adev->ip_blocks[i].status.valid)
4340                         continue;
4341                 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4342                         adev->ip_blocks[i].status.hang =
4343                                 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4344                 if (adev->ip_blocks[i].status.hang) {
4345                         dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4346                         asic_hang = true;
4347                 }
4348         }
4349         return asic_hang;
4350 }
4351
4352 /**
4353  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4354  *
4355  * @adev: amdgpu_device pointer
4356  *
4357  * The list of all the hardware IPs that make up the asic is walked and the
4358  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4359  * handles any IP specific hardware or software state changes that are
4360  * necessary for a soft reset to succeed.
4361  * Returns 0 on success, negative error code on failure.
4362  */
4363 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4364 {
4365         int i, r = 0;
4366
4367         for (i = 0; i < adev->num_ip_blocks; i++) {
4368                 if (!adev->ip_blocks[i].status.valid)
4369                         continue;
4370                 if (adev->ip_blocks[i].status.hang &&
4371                     adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4372                         r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4373                         if (r)
4374                                 return r;
4375                 }
4376         }
4377
4378         return 0;
4379 }
4380
4381 /**
4382  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4383  *
4384  * @adev: amdgpu_device pointer
4385  *
4386  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4387  * reset is necessary to recover.
4388  * Returns true if a full asic reset is required, false if not.
4389  */
4390 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4391 {
4392         int i;
4393
4394         if (amdgpu_asic_need_full_reset(adev))
4395                 return true;
4396
4397         for (i = 0; i < adev->num_ip_blocks; i++) {
4398                 if (!adev->ip_blocks[i].status.valid)
4399                         continue;
4400                 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4401                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4402                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4403                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4404                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4405                         if (adev->ip_blocks[i].status.hang) {
4406                                 dev_info(adev->dev, "Some block need full reset!\n");
4407                                 return true;
4408                         }
4409                 }
4410         }
4411         return false;
4412 }
4413
4414 /**
4415  * amdgpu_device_ip_soft_reset - do a soft reset
4416  *
4417  * @adev: amdgpu_device pointer
4418  *
4419  * The list of all the hardware IPs that make up the asic is walked and the
4420  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4421  * IP specific hardware or software state changes that are necessary to soft
4422  * reset the IP.
4423  * Returns 0 on success, negative error code on failure.
4424  */
4425 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4426 {
4427         int i, r = 0;
4428
4429         for (i = 0; i < adev->num_ip_blocks; i++) {
4430                 if (!adev->ip_blocks[i].status.valid)
4431                         continue;
4432                 if (adev->ip_blocks[i].status.hang &&
4433                     adev->ip_blocks[i].version->funcs->soft_reset) {
4434                         r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4435                         if (r)
4436                                 return r;
4437                 }
4438         }
4439
4440         return 0;
4441 }
4442
4443 /**
4444  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4445  *
4446  * @adev: amdgpu_device pointer
4447  *
4448  * The list of all the hardware IPs that make up the asic is walked and the
4449  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4450  * handles any IP specific hardware or software state changes that are
4451  * necessary after the IP has been soft reset.
4452  * Returns 0 on success, negative error code on failure.
4453  */
4454 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4455 {
4456         int i, r = 0;
4457
4458         for (i = 0; i < adev->num_ip_blocks; i++) {
4459                 if (!adev->ip_blocks[i].status.valid)
4460                         continue;
4461                 if (adev->ip_blocks[i].status.hang &&
4462                     adev->ip_blocks[i].version->funcs->post_soft_reset)
4463                         r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4464                 if (r)
4465                         return r;
4466         }
4467
4468         return 0;
4469 }
4470
4471 /**
4472  * amdgpu_device_recover_vram - Recover some VRAM contents
4473  *
4474  * @adev: amdgpu_device pointer
4475  *
4476  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4477  * restore things like GPUVM page tables after a GPU reset where
4478  * the contents of VRAM might be lost.
4479  *
4480  * Returns:
4481  * 0 on success, negative error code on failure.
4482  */
4483 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4484 {
4485         struct dma_fence *fence = NULL, *next = NULL;
4486         struct amdgpu_bo *shadow;
4487         struct amdgpu_bo_vm *vmbo;
4488         long r = 1, tmo;
4489
4490         if (amdgpu_sriov_runtime(adev))
4491                 tmo = msecs_to_jiffies(8000);
4492         else
4493                 tmo = msecs_to_jiffies(100);
4494
4495         dev_info(adev->dev, "recover vram bo from shadow start\n");
4496         mutex_lock(&adev->shadow_list_lock);
4497         list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4498                 shadow = &vmbo->bo;
4499                 /* No need to recover an evicted BO */
4500                 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4501                     shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4502                     shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4503                         continue;
4504
4505                 r = amdgpu_bo_restore_shadow(shadow, &next);
4506                 if (r)
4507                         break;
4508
4509                 if (fence) {
4510                         tmo = dma_fence_wait_timeout(fence, false, tmo);
4511                         dma_fence_put(fence);
4512                         fence = next;
4513                         if (tmo == 0) {
4514                                 r = -ETIMEDOUT;
4515                                 break;
4516                         } else if (tmo < 0) {
4517                                 r = tmo;
4518                                 break;
4519                         }
4520                 } else {
4521                         fence = next;
4522                 }
4523         }
4524         mutex_unlock(&adev->shadow_list_lock);
4525
4526         if (fence)
4527                 tmo = dma_fence_wait_timeout(fence, false, tmo);
4528         dma_fence_put(fence);
4529
4530         if (r < 0 || tmo <= 0) {
4531                 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4532                 return -EIO;
4533         }
4534
4535         dev_info(adev->dev, "recover vram bo from shadow done\n");
4536         return 0;
4537 }
4538
4539
4540 /**
4541  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4542  *
4543  * @adev: amdgpu_device pointer
4544  * @from_hypervisor: request from hypervisor
4545  *
4546  * do VF FLR and reinitialize Asic
4547  * return 0 means succeeded otherwise failed
4548  */
4549 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4550                                      bool from_hypervisor)
4551 {
4552         int r;
4553         struct amdgpu_hive_info *hive = NULL;
4554         int retry_limit = 0;
4555
4556 retry:
4557         amdgpu_amdkfd_pre_reset(adev);
4558
4559         if (from_hypervisor)
4560                 r = amdgpu_virt_request_full_gpu(adev, true);
4561         else
4562                 r = amdgpu_virt_reset_gpu(adev);
4563         if (r)
4564                 return r;
4565
4566         /* Resume IP prior to SMC */
4567         r = amdgpu_device_ip_reinit_early_sriov(adev);
4568         if (r)
4569                 goto error;
4570
4571         amdgpu_virt_init_data_exchange(adev);
4572
4573         r = amdgpu_device_fw_loading(adev);
4574         if (r)
4575                 return r;
4576
4577         /* now we are okay to resume SMC/CP/SDMA */
4578         r = amdgpu_device_ip_reinit_late_sriov(adev);
4579         if (r)
4580                 goto error;
4581
4582         hive = amdgpu_get_xgmi_hive(adev);
4583         /* Update PSP FW topology after reset */
4584         if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4585                 r = amdgpu_xgmi_update_topology(hive, adev);
4586
4587         if (hive)
4588                 amdgpu_put_xgmi_hive(hive);
4589
4590         if (!r) {
4591                 amdgpu_irq_gpu_reset_resume_helper(adev);
4592                 r = amdgpu_ib_ring_tests(adev);
4593
4594                 amdgpu_amdkfd_post_reset(adev);
4595         }
4596
4597 error:
4598         if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4599                 amdgpu_inc_vram_lost(adev);
4600                 r = amdgpu_device_recover_vram(adev);
4601         }
4602         amdgpu_virt_release_full_gpu(adev, true);
4603
4604         if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4605                 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4606                         retry_limit++;
4607                         goto retry;
4608                 } else
4609                         DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4610         }
4611
4612         return r;
4613 }
4614
4615 /**
4616  * amdgpu_device_has_job_running - check if there is any job in mirror list
4617  *
4618  * @adev: amdgpu_device pointer
4619  *
4620  * check if there is any job in mirror list
4621  */
4622 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4623 {
4624         int i;
4625         struct drm_sched_job *job;
4626
4627         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4628                 struct amdgpu_ring *ring = adev->rings[i];
4629
4630                 if (!ring || !ring->sched.thread)
4631                         continue;
4632
4633                 spin_lock(&ring->sched.job_list_lock);
4634                 job = list_first_entry_or_null(&ring->sched.pending_list,
4635                                                struct drm_sched_job, list);
4636                 spin_unlock(&ring->sched.job_list_lock);
4637                 if (job)
4638                         return true;
4639         }
4640         return false;
4641 }
4642
4643 /**
4644  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4645  *
4646  * @adev: amdgpu_device pointer
4647  *
4648  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4649  * a hung GPU.
4650  */
4651 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4652 {
4653
4654         if (amdgpu_gpu_recovery == 0)
4655                 goto disabled;
4656
4657         /* Skip soft reset check in fatal error mode */
4658         if (!amdgpu_ras_is_poison_mode_supported(adev))
4659                 return true;
4660
4661         if (amdgpu_sriov_vf(adev))
4662                 return true;
4663
4664         if (amdgpu_gpu_recovery == -1) {
4665                 switch (adev->asic_type) {
4666 #ifdef CONFIG_DRM_AMDGPU_SI
4667                 case CHIP_VERDE:
4668                 case CHIP_TAHITI:
4669                 case CHIP_PITCAIRN:
4670                 case CHIP_OLAND:
4671                 case CHIP_HAINAN:
4672 #endif
4673 #ifdef CONFIG_DRM_AMDGPU_CIK
4674                 case CHIP_KAVERI:
4675                 case CHIP_KABINI:
4676                 case CHIP_MULLINS:
4677 #endif
4678                 case CHIP_CARRIZO:
4679                 case CHIP_STONEY:
4680                 case CHIP_CYAN_SKILLFISH:
4681                         goto disabled;
4682                 default:
4683                         break;
4684                 }
4685         }
4686
4687         return true;
4688
4689 disabled:
4690                 dev_info(adev->dev, "GPU recovery disabled.\n");
4691                 return false;
4692 }
4693
4694 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4695 {
4696         u32 i;
4697         int ret = 0;
4698
4699         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4700
4701         dev_info(adev->dev, "GPU mode1 reset\n");
4702
4703         /* disable BM */
4704         pci_clear_master(adev->pdev);
4705
4706         amdgpu_device_cache_pci_state(adev->pdev);
4707
4708         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4709                 dev_info(adev->dev, "GPU smu mode1 reset\n");
4710                 ret = amdgpu_dpm_mode1_reset(adev);
4711         } else {
4712                 dev_info(adev->dev, "GPU psp mode1 reset\n");
4713                 ret = psp_gpu_reset(adev);
4714         }
4715
4716         if (ret)
4717                 dev_err(adev->dev, "GPU mode1 reset failed\n");
4718
4719         amdgpu_device_load_pci_state(adev->pdev);
4720
4721         /* wait for asic to come out of reset */
4722         for (i = 0; i < adev->usec_timeout; i++) {
4723                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4724
4725                 if (memsize != 0xffffffff)
4726                         break;
4727                 udelay(1);
4728         }
4729
4730         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4731         return ret;
4732 }
4733
4734 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4735                                  struct amdgpu_reset_context *reset_context)
4736 {
4737         int i, r = 0;
4738         struct amdgpu_job *job = NULL;
4739         bool need_full_reset =
4740                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4741
4742         if (reset_context->reset_req_dev == adev)
4743                 job = reset_context->job;
4744
4745         if (amdgpu_sriov_vf(adev)) {
4746                 /* stop the data exchange thread */
4747                 amdgpu_virt_fini_data_exchange(adev);
4748         }
4749
4750         amdgpu_fence_driver_isr_toggle(adev, true);
4751
4752         /* block all schedulers and reset given job's ring */
4753         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4754                 struct amdgpu_ring *ring = adev->rings[i];
4755
4756                 if (!ring || !ring->sched.thread)
4757                         continue;
4758
4759                 /*clear job fence from fence drv to avoid force_completion
4760                  *leave NULL and vm flush fence in fence drv */
4761                 amdgpu_fence_driver_clear_job_fences(ring);
4762
4763                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4764                 amdgpu_fence_driver_force_completion(ring);
4765         }
4766
4767         amdgpu_fence_driver_isr_toggle(adev, false);
4768
4769         if (job && job->vm)
4770                 drm_sched_increase_karma(&job->base);
4771
4772         r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4773         /* If reset handler not implemented, continue; otherwise return */
4774         if (r == -ENOSYS)
4775                 r = 0;
4776         else
4777                 return r;
4778
4779         /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4780         if (!amdgpu_sriov_vf(adev)) {
4781
4782                 if (!need_full_reset)
4783                         need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4784
4785                 if (!need_full_reset && amdgpu_gpu_recovery &&
4786                     amdgpu_device_ip_check_soft_reset(adev)) {
4787                         amdgpu_device_ip_pre_soft_reset(adev);
4788                         r = amdgpu_device_ip_soft_reset(adev);
4789                         amdgpu_device_ip_post_soft_reset(adev);
4790                         if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4791                                 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4792                                 need_full_reset = true;
4793                         }
4794                 }
4795
4796                 if (need_full_reset)
4797                         r = amdgpu_device_ip_suspend(adev);
4798                 if (need_full_reset)
4799                         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4800                 else
4801                         clear_bit(AMDGPU_NEED_FULL_RESET,
4802                                   &reset_context->flags);
4803         }
4804
4805         return r;
4806 }
4807
4808 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4809 {
4810         int i;
4811
4812         lockdep_assert_held(&adev->reset_domain->sem);
4813
4814         for (i = 0; i < adev->num_regs; i++) {
4815                 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4816                 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4817                                              adev->reset_dump_reg_value[i]);
4818         }
4819
4820         return 0;
4821 }
4822
4823 #ifdef CONFIG_DEV_COREDUMP
4824 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4825                 size_t count, void *data, size_t datalen)
4826 {
4827         struct drm_printer p;
4828         struct amdgpu_device *adev = data;
4829         struct drm_print_iterator iter;
4830         int i;
4831
4832         iter.data = buffer;
4833         iter.offset = 0;
4834         iter.start = offset;
4835         iter.remain = count;
4836
4837         p = drm_coredump_printer(&iter);
4838
4839         drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4840         drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4841         drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4842         drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4843         if (adev->reset_task_info.pid)
4844                 drm_printf(&p, "process_name: %s PID: %d\n",
4845                            adev->reset_task_info.process_name,
4846                            adev->reset_task_info.pid);
4847
4848         if (adev->reset_vram_lost)
4849                 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4850         if (adev->num_regs) {
4851                 drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
4852
4853                 for (i = 0; i < adev->num_regs; i++)
4854                         drm_printf(&p, "0x%08x: 0x%08x\n",
4855                                    adev->reset_dump_reg_list[i],
4856                                    adev->reset_dump_reg_value[i]);
4857         }
4858
4859         return count - iter.remain;
4860 }
4861
4862 static void amdgpu_devcoredump_free(void *data)
4863 {
4864 }
4865
4866 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4867 {
4868         struct drm_device *dev = adev_to_drm(adev);
4869
4870         ktime_get_ts64(&adev->reset_time);
4871         dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4872                       amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4873 }
4874 #endif
4875
4876 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4877                          struct amdgpu_reset_context *reset_context)
4878 {
4879         struct amdgpu_device *tmp_adev = NULL;
4880         bool need_full_reset, skip_hw_reset, vram_lost = false;
4881         int r = 0;
4882         bool gpu_reset_for_dev_remove = 0;
4883
4884         /* Try reset handler method first */
4885         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4886                                     reset_list);
4887         amdgpu_reset_reg_dumps(tmp_adev);
4888
4889         reset_context->reset_device_list = device_list_handle;
4890         r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4891         /* If reset handler not implemented, continue; otherwise return */
4892         if (r == -ENOSYS)
4893                 r = 0;
4894         else
4895                 return r;
4896
4897         /* Reset handler not implemented, use the default method */
4898         need_full_reset =
4899                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4900         skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4901
4902         gpu_reset_for_dev_remove =
4903                 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4904                         test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4905
4906         /*
4907          * ASIC reset has to be done on all XGMI hive nodes ASAP
4908          * to allow proper links negotiation in FW (within 1 sec)
4909          */
4910         if (!skip_hw_reset && need_full_reset) {
4911                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4912                         /* For XGMI run all resets in parallel to speed up the process */
4913                         if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4914                                 tmp_adev->gmc.xgmi.pending_reset = false;
4915                                 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4916                                         r = -EALREADY;
4917                         } else
4918                                 r = amdgpu_asic_reset(tmp_adev);
4919
4920                         if (r) {
4921                                 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4922                                          r, adev_to_drm(tmp_adev)->unique);
4923                                 break;
4924                         }
4925                 }
4926
4927                 /* For XGMI wait for all resets to complete before proceed */
4928                 if (!r) {
4929                         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4930                                 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4931                                         flush_work(&tmp_adev->xgmi_reset_work);
4932                                         r = tmp_adev->asic_reset_res;
4933                                         if (r)
4934                                                 break;
4935                                 }
4936                         }
4937                 }
4938         }
4939
4940         if (!r && amdgpu_ras_intr_triggered()) {
4941                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4942                         if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4943                             tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4944                                 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
4945                 }
4946
4947                 amdgpu_ras_intr_cleared();
4948         }
4949
4950         /* Since the mode1 reset affects base ip blocks, the
4951          * phase1 ip blocks need to be resumed. Otherwise there
4952          * will be a BIOS signature error and the psp bootloader
4953          * can't load kdb on the next amdgpu install.
4954          */
4955         if (gpu_reset_for_dev_remove) {
4956                 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4957                         amdgpu_device_ip_resume_phase1(tmp_adev);
4958
4959                 goto end;
4960         }
4961
4962         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4963                 if (need_full_reset) {
4964                         /* post card */
4965                         r = amdgpu_device_asic_init(tmp_adev);
4966                         if (r) {
4967                                 dev_warn(tmp_adev->dev, "asic atom init failed!");
4968                         } else {
4969                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4970                                 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4971                                 if (r)
4972                                         goto out;
4973
4974                                 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4975                                 if (r)
4976                                         goto out;
4977
4978                                 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4979 #ifdef CONFIG_DEV_COREDUMP
4980                                 tmp_adev->reset_vram_lost = vram_lost;
4981                                 memset(&tmp_adev->reset_task_info, 0,
4982                                                 sizeof(tmp_adev->reset_task_info));
4983                                 if (reset_context->job && reset_context->job->vm)
4984                                         tmp_adev->reset_task_info =
4985                                                 reset_context->job->vm->task_info;
4986                                 amdgpu_reset_capture_coredumpm(tmp_adev);
4987 #endif
4988                                 if (vram_lost) {
4989                                         DRM_INFO("VRAM is lost due to GPU reset!\n");
4990                                         amdgpu_inc_vram_lost(tmp_adev);
4991                                 }
4992
4993                                 r = amdgpu_device_fw_loading(tmp_adev);
4994                                 if (r)
4995                                         return r;
4996
4997                                 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4998                                 if (r)
4999                                         goto out;
5000
5001                                 if (vram_lost)
5002                                         amdgpu_device_fill_reset_magic(tmp_adev);
5003
5004                                 /*
5005                                  * Add this ASIC as tracked as reset was already
5006                                  * complete successfully.
5007                                  */
5008                                 amdgpu_register_gpu_instance(tmp_adev);
5009
5010                                 if (!reset_context->hive &&
5011                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5012                                         amdgpu_xgmi_add_device(tmp_adev);
5013
5014                                 r = amdgpu_device_ip_late_init(tmp_adev);
5015                                 if (r)
5016                                         goto out;
5017
5018                                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5019
5020                                 /*
5021                                  * The GPU enters bad state once faulty pages
5022                                  * by ECC has reached the threshold, and ras
5023                                  * recovery is scheduled next. So add one check
5024                                  * here to break recovery if it indeed exceeds
5025                                  * bad page threshold, and remind user to
5026                                  * retire this GPU or setting one bigger
5027                                  * bad_page_threshold value to fix this once
5028                                  * probing driver again.
5029                                  */
5030                                 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5031                                         /* must succeed. */
5032                                         amdgpu_ras_resume(tmp_adev);
5033                                 } else {
5034                                         r = -EINVAL;
5035                                         goto out;
5036                                 }
5037
5038                                 /* Update PSP FW topology after reset */
5039                                 if (reset_context->hive &&
5040                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5041                                         r = amdgpu_xgmi_update_topology(
5042                                                 reset_context->hive, tmp_adev);
5043                         }
5044                 }
5045
5046 out:
5047                 if (!r) {
5048                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5049                         r = amdgpu_ib_ring_tests(tmp_adev);
5050                         if (r) {
5051                                 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5052                                 need_full_reset = true;
5053                                 r = -EAGAIN;
5054                                 goto end;
5055                         }
5056                 }
5057
5058                 if (!r)
5059                         r = amdgpu_device_recover_vram(tmp_adev);
5060                 else
5061                         tmp_adev->asic_reset_res = r;
5062         }
5063
5064 end:
5065         if (need_full_reset)
5066                 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5067         else
5068                 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5069         return r;
5070 }
5071
5072 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5073 {
5074
5075         switch (amdgpu_asic_reset_method(adev)) {
5076         case AMD_RESET_METHOD_MODE1:
5077                 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5078                 break;
5079         case AMD_RESET_METHOD_MODE2:
5080                 adev->mp1_state = PP_MP1_STATE_RESET;
5081                 break;
5082         default:
5083                 adev->mp1_state = PP_MP1_STATE_NONE;
5084                 break;
5085         }
5086 }
5087
5088 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5089 {
5090         amdgpu_vf_error_trans_all(adev);
5091         adev->mp1_state = PP_MP1_STATE_NONE;
5092 }
5093
5094 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5095 {
5096         struct pci_dev *p = NULL;
5097
5098         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5099                         adev->pdev->bus->number, 1);
5100         if (p) {
5101                 pm_runtime_enable(&(p->dev));
5102                 pm_runtime_resume(&(p->dev));
5103         }
5104
5105         pci_dev_put(p);
5106 }
5107
5108 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5109 {
5110         enum amd_reset_method reset_method;
5111         struct pci_dev *p = NULL;
5112         u64 expires;
5113
5114         /*
5115          * For now, only BACO and mode1 reset are confirmed
5116          * to suffer the audio issue without proper suspended.
5117          */
5118         reset_method = amdgpu_asic_reset_method(adev);
5119         if ((reset_method != AMD_RESET_METHOD_BACO) &&
5120              (reset_method != AMD_RESET_METHOD_MODE1))
5121                 return -EINVAL;
5122
5123         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5124                         adev->pdev->bus->number, 1);
5125         if (!p)
5126                 return -ENODEV;
5127
5128         expires = pm_runtime_autosuspend_expiration(&(p->dev));
5129         if (!expires)
5130                 /*
5131                  * If we cannot get the audio device autosuspend delay,
5132                  * a fixed 4S interval will be used. Considering 3S is
5133                  * the audio controller default autosuspend delay setting.
5134                  * 4S used here is guaranteed to cover that.
5135                  */
5136                 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5137
5138         while (!pm_runtime_status_suspended(&(p->dev))) {
5139                 if (!pm_runtime_suspend(&(p->dev)))
5140                         break;
5141
5142                 if (expires < ktime_get_mono_fast_ns()) {
5143                         dev_warn(adev->dev, "failed to suspend display audio\n");
5144                         pci_dev_put(p);
5145                         /* TODO: abort the succeeding gpu reset? */
5146                         return -ETIMEDOUT;
5147                 }
5148         }
5149
5150         pm_runtime_disable(&(p->dev));
5151
5152         pci_dev_put(p);
5153         return 0;
5154 }
5155
5156 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5157 {
5158         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5159
5160 #if defined(CONFIG_DEBUG_FS)
5161         if (!amdgpu_sriov_vf(adev))
5162                 cancel_work(&adev->reset_work);
5163 #endif
5164
5165         if (adev->kfd.dev)
5166                 cancel_work(&adev->kfd.reset_work);
5167
5168         if (amdgpu_sriov_vf(adev))
5169                 cancel_work(&adev->virt.flr_work);
5170
5171         if (con && adev->ras_enabled)
5172                 cancel_work(&con->recovery_work);
5173
5174 }
5175
5176 /**
5177  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5178  *
5179  * @adev: amdgpu_device pointer
5180  * @job: which job trigger hang
5181  * @reset_context: amdgpu reset context pointer
5182  *
5183  * Attempt to reset the GPU if it has hung (all asics).
5184  * Attempt to do soft-reset or full-reset and reinitialize Asic
5185  * Returns 0 for success or an error on failure.
5186  */
5187
5188 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5189                               struct amdgpu_job *job,
5190                               struct amdgpu_reset_context *reset_context)
5191 {
5192         struct list_head device_list, *device_list_handle =  NULL;
5193         bool job_signaled = false;
5194         struct amdgpu_hive_info *hive = NULL;
5195         struct amdgpu_device *tmp_adev = NULL;
5196         int i, r = 0;
5197         bool need_emergency_restart = false;
5198         bool audio_suspended = false;
5199         bool gpu_reset_for_dev_remove = false;
5200
5201         gpu_reset_for_dev_remove =
5202                         test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5203                                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5204
5205         /*
5206          * Special case: RAS triggered and full reset isn't supported
5207          */
5208         need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5209
5210         /*
5211          * Flush RAM to disk so that after reboot
5212          * the user can read log and see why the system rebooted.
5213          */
5214         if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
5215                 DRM_WARN("Emergency reboot.");
5216
5217                 ksys_sync_helper();
5218                 emergency_restart();
5219         }
5220
5221         dev_info(adev->dev, "GPU %s begin!\n",
5222                 need_emergency_restart ? "jobs stop":"reset");
5223
5224         if (!amdgpu_sriov_vf(adev))
5225                 hive = amdgpu_get_xgmi_hive(adev);
5226         if (hive)
5227                 mutex_lock(&hive->hive_lock);
5228
5229         reset_context->job = job;
5230         reset_context->hive = hive;
5231         /*
5232          * Build list of devices to reset.
5233          * In case we are in XGMI hive mode, resort the device list
5234          * to put adev in the 1st position.
5235          */
5236         INIT_LIST_HEAD(&device_list);
5237         if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5238                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5239                         list_add_tail(&tmp_adev->reset_list, &device_list);
5240                         if (gpu_reset_for_dev_remove && adev->shutdown)
5241                                 tmp_adev->shutdown = true;
5242                 }
5243                 if (!list_is_first(&adev->reset_list, &device_list))
5244                         list_rotate_to_front(&adev->reset_list, &device_list);
5245                 device_list_handle = &device_list;
5246         } else {
5247                 list_add_tail(&adev->reset_list, &device_list);
5248                 device_list_handle = &device_list;
5249         }
5250
5251         /* We need to lock reset domain only once both for XGMI and single device */
5252         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5253                                     reset_list);
5254         amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5255
5256         /* block all schedulers and reset given job's ring */
5257         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5258
5259                 amdgpu_device_set_mp1_state(tmp_adev);
5260
5261                 /*
5262                  * Try to put the audio codec into suspend state
5263                  * before gpu reset started.
5264                  *
5265                  * Due to the power domain of the graphics device
5266                  * is shared with AZ power domain. Without this,
5267                  * we may change the audio hardware from behind
5268                  * the audio driver's back. That will trigger
5269                  * some audio codec errors.
5270                  */
5271                 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5272                         audio_suspended = true;
5273
5274                 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5275
5276                 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5277
5278                 if (!amdgpu_sriov_vf(tmp_adev))
5279                         amdgpu_amdkfd_pre_reset(tmp_adev);
5280
5281                 /*
5282                  * Mark these ASICs to be reseted as untracked first
5283                  * And add them back after reset completed
5284                  */
5285                 amdgpu_unregister_gpu_instance(tmp_adev);
5286
5287                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5288
5289                 /* disable ras on ALL IPs */
5290                 if (!need_emergency_restart &&
5291                       amdgpu_device_ip_need_full_reset(tmp_adev))
5292                         amdgpu_ras_suspend(tmp_adev);
5293
5294                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5295                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5296
5297                         if (!ring || !ring->sched.thread)
5298                                 continue;
5299
5300                         drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5301
5302                         if (need_emergency_restart)
5303                                 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5304                 }
5305                 atomic_inc(&tmp_adev->gpu_reset_counter);
5306         }
5307
5308         if (need_emergency_restart)
5309                 goto skip_sched_resume;
5310
5311         /*
5312          * Must check guilty signal here since after this point all old
5313          * HW fences are force signaled.
5314          *
5315          * job->base holds a reference to parent fence
5316          */
5317         if (job && dma_fence_is_signaled(&job->hw_fence)) {
5318                 job_signaled = true;
5319                 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5320                 goto skip_hw_reset;
5321         }
5322
5323 retry:  /* Rest of adevs pre asic reset from XGMI hive. */
5324         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5325                 if (gpu_reset_for_dev_remove) {
5326                         /* Workaroud for ASICs need to disable SMC first */
5327                         amdgpu_device_smu_fini_early(tmp_adev);
5328                 }
5329                 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5330                 /*TODO Should we stop ?*/
5331                 if (r) {
5332                         dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5333                                   r, adev_to_drm(tmp_adev)->unique);
5334                         tmp_adev->asic_reset_res = r;
5335                 }
5336
5337                 /*
5338                  * Drop all pending non scheduler resets. Scheduler resets
5339                  * were already dropped during drm_sched_stop
5340                  */
5341                 amdgpu_device_stop_pending_resets(tmp_adev);
5342         }
5343
5344         /* Actual ASIC resets if needed.*/
5345         /* Host driver will handle XGMI hive reset for SRIOV */
5346         if (amdgpu_sriov_vf(adev)) {
5347                 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5348                 if (r)
5349                         adev->asic_reset_res = r;
5350
5351                 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5352                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5353                     adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
5354                         amdgpu_ras_resume(adev);
5355         } else {
5356                 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5357                 if (r && r == -EAGAIN)
5358                         goto retry;
5359
5360                 if (!r && gpu_reset_for_dev_remove)
5361                         goto recover_end;
5362         }
5363
5364 skip_hw_reset:
5365
5366         /* Post ASIC reset for all devs .*/
5367         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5368
5369                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5370                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5371
5372                         if (!ring || !ring->sched.thread)
5373                                 continue;
5374
5375                         drm_sched_start(&ring->sched, true);
5376                 }
5377
5378                 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
5379                         amdgpu_mes_self_test(tmp_adev);
5380
5381                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
5382                         drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5383                 }
5384
5385                 if (tmp_adev->asic_reset_res)
5386                         r = tmp_adev->asic_reset_res;
5387
5388                 tmp_adev->asic_reset_res = 0;
5389
5390                 if (r) {
5391                         /* bad news, how to tell it to userspace ? */
5392                         dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5393                         amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5394                 } else {
5395                         dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5396                         if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5397                                 DRM_WARN("smart shift update failed\n");
5398                 }
5399         }
5400
5401 skip_sched_resume:
5402         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5403                 /* unlock kfd: SRIOV would do it separately */
5404                 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5405                         amdgpu_amdkfd_post_reset(tmp_adev);
5406
5407                 /* kfd_post_reset will do nothing if kfd device is not initialized,
5408                  * need to bring up kfd here if it's not be initialized before
5409                  */
5410                 if (!adev->kfd.init_complete)
5411                         amdgpu_amdkfd_device_init(adev);
5412
5413                 if (audio_suspended)
5414                         amdgpu_device_resume_display_audio(tmp_adev);
5415
5416                 amdgpu_device_unset_mp1_state(tmp_adev);
5417
5418                 amdgpu_ras_set_error_query_ready(tmp_adev, true);
5419         }
5420
5421 recover_end:
5422         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5423                                             reset_list);
5424         amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5425
5426         if (hive) {
5427                 mutex_unlock(&hive->hive_lock);
5428                 amdgpu_put_xgmi_hive(hive);
5429         }
5430
5431         if (r)
5432                 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5433
5434         atomic_set(&adev->reset_domain->reset_res, r);
5435         return r;
5436 }
5437
5438 /**
5439  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5440  *
5441  * @adev: amdgpu_device pointer
5442  *
5443  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5444  * and lanes) of the slot the device is in. Handles APUs and
5445  * virtualized environments where PCIE config space may not be available.
5446  */
5447 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5448 {
5449         struct pci_dev *pdev;
5450         enum pci_bus_speed speed_cap, platform_speed_cap;
5451         enum pcie_link_width platform_link_width;
5452
5453         if (amdgpu_pcie_gen_cap)
5454                 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5455
5456         if (amdgpu_pcie_lane_cap)
5457                 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5458
5459         /* covers APUs as well */
5460         if (pci_is_root_bus(adev->pdev->bus)) {
5461                 if (adev->pm.pcie_gen_mask == 0)
5462                         adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5463                 if (adev->pm.pcie_mlw_mask == 0)
5464                         adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5465                 return;
5466         }
5467
5468         if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5469                 return;
5470
5471         pcie_bandwidth_available(adev->pdev, NULL,
5472                                  &platform_speed_cap, &platform_link_width);
5473
5474         if (adev->pm.pcie_gen_mask == 0) {
5475                 /* asic caps */
5476                 pdev = adev->pdev;
5477                 speed_cap = pcie_get_speed_cap(pdev);
5478                 if (speed_cap == PCI_SPEED_UNKNOWN) {
5479                         adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5480                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5481                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5482                 } else {
5483                         if (speed_cap == PCIE_SPEED_32_0GT)
5484                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5485                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5486                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5487                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5488                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5489                         else if (speed_cap == PCIE_SPEED_16_0GT)
5490                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5491                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5492                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5493                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5494                         else if (speed_cap == PCIE_SPEED_8_0GT)
5495                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5496                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5497                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5498                         else if (speed_cap == PCIE_SPEED_5_0GT)
5499                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5500                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5501                         else
5502                                 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5503                 }
5504                 /* platform caps */
5505                 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5506                         adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5507                                                    CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5508                 } else {
5509                         if (platform_speed_cap == PCIE_SPEED_32_0GT)
5510                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5511                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5512                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5513                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5514                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5515                         else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5516                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5517                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5518                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5519                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5520                         else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5521                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5522                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5523                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5524                         else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5525                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5526                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5527                         else
5528                                 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5529
5530                 }
5531         }
5532         if (adev->pm.pcie_mlw_mask == 0) {
5533                 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5534                         adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5535                 } else {
5536                         switch (platform_link_width) {
5537                         case PCIE_LNK_X32:
5538                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5539                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5540                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5541                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5542                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5543                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5544                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5545                                 break;
5546                         case PCIE_LNK_X16:
5547                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5548                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5549                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5550                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5551                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5552                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5553                                 break;
5554                         case PCIE_LNK_X12:
5555                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5556                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5557                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5558                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5559                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5560                                 break;
5561                         case PCIE_LNK_X8:
5562                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5563                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5564                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5565                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5566                                 break;
5567                         case PCIE_LNK_X4:
5568                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5569                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5570                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5571                                 break;
5572                         case PCIE_LNK_X2:
5573                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5574                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5575                                 break;
5576                         case PCIE_LNK_X1:
5577                                 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5578                                 break;
5579                         default:
5580                                 break;
5581                         }
5582                 }
5583         }
5584 }
5585
5586 /**
5587  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5588  *
5589  * @adev: amdgpu_device pointer
5590  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5591  *
5592  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5593  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5594  * @peer_adev.
5595  */
5596 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5597                                       struct amdgpu_device *peer_adev)
5598 {
5599 #ifdef CONFIG_HSA_AMD_P2P
5600         uint64_t address_mask = peer_adev->dev->dma_mask ?
5601                 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5602         resource_size_t aper_limit =
5603                 adev->gmc.aper_base + adev->gmc.aper_size - 1;
5604         bool p2p_access =
5605                 !adev->gmc.xgmi.connected_to_cpu &&
5606                 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
5607
5608         return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5609                 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5610                 !(adev->gmc.aper_base & address_mask ||
5611                   aper_limit & address_mask));
5612 #else
5613         return false;
5614 #endif
5615 }
5616
5617 int amdgpu_device_baco_enter(struct drm_device *dev)
5618 {
5619         struct amdgpu_device *adev = drm_to_adev(dev);
5620         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5621
5622         if (!amdgpu_device_supports_baco(dev))
5623                 return -ENOTSUPP;
5624
5625         if (ras && adev->ras_enabled &&
5626             adev->nbio.funcs->enable_doorbell_interrupt)
5627                 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5628
5629         return amdgpu_dpm_baco_enter(adev);
5630 }
5631
5632 int amdgpu_device_baco_exit(struct drm_device *dev)
5633 {
5634         struct amdgpu_device *adev = drm_to_adev(dev);
5635         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5636         int ret = 0;
5637
5638         if (!amdgpu_device_supports_baco(dev))
5639                 return -ENOTSUPP;
5640
5641         ret = amdgpu_dpm_baco_exit(adev);
5642         if (ret)
5643                 return ret;
5644
5645         if (ras && adev->ras_enabled &&
5646             adev->nbio.funcs->enable_doorbell_interrupt)
5647                 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5648
5649         if (amdgpu_passthrough(adev) &&
5650             adev->nbio.funcs->clear_doorbell_interrupt)
5651                 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5652
5653         return 0;
5654 }
5655
5656 /**
5657  * amdgpu_pci_error_detected - Called when a PCI error is detected.
5658  * @pdev: PCI device struct
5659  * @state: PCI channel state
5660  *
5661  * Description: Called when a PCI error is detected.
5662  *
5663  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5664  */
5665 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5666 {
5667         struct drm_device *dev = pci_get_drvdata(pdev);
5668         struct amdgpu_device *adev = drm_to_adev(dev);
5669         int i;
5670
5671         DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5672
5673         if (adev->gmc.xgmi.num_physical_nodes > 1) {
5674                 DRM_WARN("No support for XGMI hive yet...");
5675                 return PCI_ERS_RESULT_DISCONNECT;
5676         }
5677
5678         adev->pci_channel_state = state;
5679
5680         switch (state) {
5681         case pci_channel_io_normal:
5682                 return PCI_ERS_RESULT_CAN_RECOVER;
5683         /* Fatal error, prepare for slot reset */
5684         case pci_channel_io_frozen:
5685                 /*
5686                  * Locking adev->reset_domain->sem will prevent any external access
5687                  * to GPU during PCI error recovery
5688                  */
5689                 amdgpu_device_lock_reset_domain(adev->reset_domain);
5690                 amdgpu_device_set_mp1_state(adev);
5691
5692                 /*
5693                  * Block any work scheduling as we do for regular GPU reset
5694                  * for the duration of the recovery
5695                  */
5696                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5697                         struct amdgpu_ring *ring = adev->rings[i];
5698
5699                         if (!ring || !ring->sched.thread)
5700                                 continue;
5701
5702                         drm_sched_stop(&ring->sched, NULL);
5703                 }
5704                 atomic_inc(&adev->gpu_reset_counter);
5705                 return PCI_ERS_RESULT_NEED_RESET;
5706         case pci_channel_io_perm_failure:
5707                 /* Permanent error, prepare for device removal */
5708                 return PCI_ERS_RESULT_DISCONNECT;
5709         }
5710
5711         return PCI_ERS_RESULT_NEED_RESET;
5712 }
5713
5714 /**
5715  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5716  * @pdev: pointer to PCI device
5717  */
5718 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5719 {
5720
5721         DRM_INFO("PCI error: mmio enabled callback!!\n");
5722
5723         /* TODO - dump whatever for debugging purposes */
5724
5725         /* This called only if amdgpu_pci_error_detected returns
5726          * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5727          * works, no need to reset slot.
5728          */
5729
5730         return PCI_ERS_RESULT_RECOVERED;
5731 }
5732
5733 /**
5734  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5735  * @pdev: PCI device struct
5736  *
5737  * Description: This routine is called by the pci error recovery
5738  * code after the PCI slot has been reset, just before we
5739  * should resume normal operations.
5740  */
5741 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5742 {
5743         struct drm_device *dev = pci_get_drvdata(pdev);
5744         struct amdgpu_device *adev = drm_to_adev(dev);
5745         int r, i;
5746         struct amdgpu_reset_context reset_context;
5747         u32 memsize;
5748         struct list_head device_list;
5749
5750         DRM_INFO("PCI error: slot reset callback!!\n");
5751
5752         memset(&reset_context, 0, sizeof(reset_context));
5753
5754         INIT_LIST_HEAD(&device_list);
5755         list_add_tail(&adev->reset_list, &device_list);
5756
5757         /* wait for asic to come out of reset */
5758         msleep(500);
5759
5760         /* Restore PCI confspace */
5761         amdgpu_device_load_pci_state(pdev);
5762
5763         /* confirm  ASIC came out of reset */
5764         for (i = 0; i < adev->usec_timeout; i++) {
5765                 memsize = amdgpu_asic_get_config_memsize(adev);
5766
5767                 if (memsize != 0xffffffff)
5768                         break;
5769                 udelay(1);
5770         }
5771         if (memsize == 0xffffffff) {
5772                 r = -ETIME;
5773                 goto out;
5774         }
5775
5776         reset_context.method = AMD_RESET_METHOD_NONE;
5777         reset_context.reset_req_dev = adev;
5778         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5779         set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5780
5781         adev->no_hw_access = true;
5782         r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5783         adev->no_hw_access = false;
5784         if (r)
5785                 goto out;
5786
5787         r = amdgpu_do_asic_reset(&device_list, &reset_context);
5788
5789 out:
5790         if (!r) {
5791                 if (amdgpu_device_cache_pci_state(adev->pdev))
5792                         pci_restore_state(adev->pdev);
5793
5794                 DRM_INFO("PCIe error recovery succeeded\n");
5795         } else {
5796                 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5797                 amdgpu_device_unset_mp1_state(adev);
5798                 amdgpu_device_unlock_reset_domain(adev->reset_domain);
5799         }
5800
5801         return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5802 }
5803
5804 /**
5805  * amdgpu_pci_resume() - resume normal ops after PCI reset
5806  * @pdev: pointer to PCI device
5807  *
5808  * Called when the error recovery driver tells us that its
5809  * OK to resume normal operation.
5810  */
5811 void amdgpu_pci_resume(struct pci_dev *pdev)
5812 {
5813         struct drm_device *dev = pci_get_drvdata(pdev);
5814         struct amdgpu_device *adev = drm_to_adev(dev);
5815         int i;
5816
5817
5818         DRM_INFO("PCI error: resume callback!!\n");
5819
5820         /* Only continue execution for the case of pci_channel_io_frozen */
5821         if (adev->pci_channel_state != pci_channel_io_frozen)
5822                 return;
5823
5824         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5825                 struct amdgpu_ring *ring = adev->rings[i];
5826
5827                 if (!ring || !ring->sched.thread)
5828                         continue;
5829
5830                 drm_sched_start(&ring->sched, true);
5831         }
5832
5833         amdgpu_device_unset_mp1_state(adev);
5834         amdgpu_device_unlock_reset_domain(adev->reset_domain);
5835 }
5836
5837 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5838 {
5839         struct drm_device *dev = pci_get_drvdata(pdev);
5840         struct amdgpu_device *adev = drm_to_adev(dev);
5841         int r;
5842
5843         r = pci_save_state(pdev);
5844         if (!r) {
5845                 kfree(adev->pci_state);
5846
5847                 adev->pci_state = pci_store_saved_state(pdev);
5848
5849                 if (!adev->pci_state) {
5850                         DRM_ERROR("Failed to store PCI saved state");
5851                         return false;
5852                 }
5853         } else {
5854                 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5855                 return false;
5856         }
5857
5858         return true;
5859 }
5860
5861 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5862 {
5863         struct drm_device *dev = pci_get_drvdata(pdev);
5864         struct amdgpu_device *adev = drm_to_adev(dev);
5865         int r;
5866
5867         if (!adev->pci_state)
5868                 return false;
5869
5870         r = pci_load_saved_state(pdev, adev->pci_state);
5871
5872         if (!r) {
5873                 pci_restore_state(pdev);
5874         } else {
5875                 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5876                 return false;
5877         }
5878
5879         return true;
5880 }
5881
5882 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5883                 struct amdgpu_ring *ring)
5884 {
5885 #ifdef CONFIG_X86_64
5886         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5887                 return;
5888 #endif
5889         if (adev->gmc.xgmi.connected_to_cpu)
5890                 return;
5891
5892         if (ring && ring->funcs->emit_hdp_flush)
5893                 amdgpu_ring_emit_hdp_flush(ring);
5894         else
5895                 amdgpu_asic_flush_hdp(adev, ring);
5896 }
5897
5898 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5899                 struct amdgpu_ring *ring)
5900 {
5901 #ifdef CONFIG_X86_64
5902         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5903                 return;
5904 #endif
5905         if (adev->gmc.xgmi.connected_to_cpu)
5906                 return;
5907
5908         amdgpu_asic_invalidate_hdp(adev, ring);
5909 }
5910
5911 int amdgpu_in_reset(struct amdgpu_device *adev)
5912 {
5913         return atomic_read(&adev->reset_domain->in_gpu_reset);
5914 }
5915
5916 /**
5917  * amdgpu_device_halt() - bring hardware to some kind of halt state
5918  *
5919  * @adev: amdgpu_device pointer
5920  *
5921  * Bring hardware to some kind of halt state so that no one can touch it
5922  * any more. It will help to maintain error context when error occurred.
5923  * Compare to a simple hang, the system will keep stable at least for SSH
5924  * access. Then it should be trivial to inspect the hardware state and
5925  * see what's going on. Implemented as following:
5926  *
5927  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5928  *    clears all CPU mappings to device, disallows remappings through page faults
5929  * 2. amdgpu_irq_disable_all() disables all interrupts
5930  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5931  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5932  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5933  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5934  *    flush any in flight DMA operations
5935  */
5936 void amdgpu_device_halt(struct amdgpu_device *adev)
5937 {
5938         struct pci_dev *pdev = adev->pdev;
5939         struct drm_device *ddev = adev_to_drm(adev);
5940
5941         drm_dev_unplug(ddev);
5942
5943         amdgpu_irq_disable_all(adev);
5944
5945         amdgpu_fence_driver_hw_fini(adev);
5946
5947         adev->no_hw_access = true;
5948
5949         amdgpu_device_unmap_mmio(adev);
5950
5951         pci_disable_device(pdev);
5952         pci_wait_for_pending_transaction(pdev);
5953 }
5954
5955 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5956                                 u32 reg)
5957 {
5958         unsigned long flags, address, data;
5959         u32 r;
5960
5961         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5962         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5963
5964         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5965         WREG32(address, reg * 4);
5966         (void)RREG32(address);
5967         r = RREG32(data);
5968         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5969         return r;
5970 }
5971
5972 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5973                                 u32 reg, u32 v)
5974 {
5975         unsigned long flags, address, data;
5976
5977         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5978         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5979
5980         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5981         WREG32(address, reg * 4);
5982         (void)RREG32(address);
5983         WREG32(data, v);
5984         (void)RREG32(data);
5985         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5986 }
5987
5988 /**
5989  * amdgpu_device_switch_gang - switch to a new gang
5990  * @adev: amdgpu_device pointer
5991  * @gang: the gang to switch to
5992  *
5993  * Try to switch to a new gang.
5994  * Returns: NULL if we switched to the new gang or a reference to the current
5995  * gang leader.
5996  */
5997 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5998                                             struct dma_fence *gang)
5999 {
6000         struct dma_fence *old = NULL;
6001
6002         do {
6003                 dma_fence_put(old);
6004                 rcu_read_lock();
6005                 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6006                 rcu_read_unlock();
6007
6008                 if (old == gang)
6009                         break;
6010
6011                 if (!dma_fence_is_signaled(old))
6012                         return old;
6013
6014         } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6015                          old, gang) != old);
6016
6017         dma_fence_put(old);
6018         return NULL;
6019 }
6020
6021 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6022 {
6023         switch (adev->asic_type) {
6024 #ifdef CONFIG_DRM_AMDGPU_SI
6025         case CHIP_HAINAN:
6026 #endif
6027         case CHIP_TOPAZ:
6028                 /* chips with no display hardware */
6029                 return false;
6030 #ifdef CONFIG_DRM_AMDGPU_SI
6031         case CHIP_TAHITI:
6032         case CHIP_PITCAIRN:
6033         case CHIP_VERDE:
6034         case CHIP_OLAND:
6035 #endif
6036 #ifdef CONFIG_DRM_AMDGPU_CIK
6037         case CHIP_BONAIRE:
6038         case CHIP_HAWAII:
6039         case CHIP_KAVERI:
6040         case CHIP_KABINI:
6041         case CHIP_MULLINS:
6042 #endif
6043         case CHIP_TONGA:
6044         case CHIP_FIJI:
6045         case CHIP_POLARIS10:
6046         case CHIP_POLARIS11:
6047         case CHIP_POLARIS12:
6048         case CHIP_VEGAM:
6049         case CHIP_CARRIZO:
6050         case CHIP_STONEY:
6051                 /* chips with display hardware */
6052                 return true;
6053         default:
6054                 /* IP discovery */
6055                 if (!adev->ip_versions[DCE_HWIP][0] ||
6056                     (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6057                         return false;
6058                 return true;
6059         }
6060 }