drm/amdgpu: set vm size and block size by individual gmc by default (v3)
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
index cc01b3f27a1fed5b5ef9555c8a0416c48d2dd8ab..483660742f75c9a9521a3bdef9f9bf3016639209 100644 (file)
@@ -40,6 +40,7 @@
 #include "amdgpu_i2c.h"
 #include "atom.h"
 #include "amdgpu_atombios.h"
+#include "amdgpu_atomfirmware.h"
 #include "amd_pcie.h"
 #ifdef CONFIG_DRM_AMDGPU_SI
 #include "si.h"
 #include "cik.h"
 #endif
 #include "vi.h"
+#include "soc15.h"
 #include "bif/bif_4_1_d.h"
 #include <linux/pci.h>
 #include <linux/firmware.h>
+#include "amdgpu_pm.h"
 
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
 static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
@@ -74,6 +77,7 @@ static const char *amdgpu_asic_name[] = {
        "POLARIS10",
        "POLARIS11",
        "POLARIS12",
+       "VEGA10",
        "LAST",
 };
 
@@ -194,6 +198,44 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
        }
 }
 
+/**
+ * amdgpu_mm_rdoorbell64 - read a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
+{
+       if (index < adev->doorbell.num_doorbells) {
+               return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
+       } else {
+               DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
+               return 0;
+       }
+}
+
+/**
+ * amdgpu_mm_wdoorbell64 - write a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
+{
+       if (index < adev->doorbell.num_doorbells) {
+               atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
+       } else {
+               DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
+       }
+}
+
 /**
  * amdgpu_invalid_rreg - dummy reg read function
  *
@@ -515,6 +557,29 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb)
        }
 }
 
+/**
+ * amdgpu_wb_get_64bit - Allocate a wb entry
+ *
+ * @adev: amdgpu_device pointer
+ * @wb: wb index
+ *
+ * Allocate a wb slot for use by the driver (all asics).
+ * Returns 0 on success or -EINVAL on failure.
+ */
+int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb)
+{
+       unsigned long offset = bitmap_find_next_zero_area_off(adev->wb.used,
+                               adev->wb.num_wb, 0, 2, 7, 0);
+       if ((offset + 1) < adev->wb.num_wb) {
+               __set_bit(offset, adev->wb.used);
+               __set_bit(offset + 1, adev->wb.used);
+               *wb = offset;
+               return 0;
+       } else {
+               return -EINVAL;
+       }
+}
+
 /**
  * amdgpu_wb_free - Free a wb entry
  *
@@ -529,6 +594,22 @@ void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb)
                __clear_bit(wb, adev->wb.used);
 }
 
+/**
+ * amdgpu_wb_free_64bit - Free a wb entry
+ *
+ * @adev: amdgpu_device pointer
+ * @wb: wb index
+ *
+ * Free a wb slot allocated for use by the driver (all asics)
+ */
+void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb)
+{
+       if ((wb + 1) < adev->wb.num_wb) {
+               __clear_bit(wb, adev->wb.used);
+               __clear_bit(wb + 1, adev->wb.used);
+       }
+}
+
 /**
  * amdgpu_vram_location - try to find VRAM location
  * @adev: amdgpu device structure holding all necessary informations
@@ -636,9 +717,9 @@ bool amdgpu_need_post(struct amdgpu_device *adev)
                return true;
        }
        /* then check MEM_SIZE, in case the crtcs are off */
-       reg = RREG32(mmCONFIG_MEMSIZE);
+       reg = amdgpu_asic_get_config_memsize(adev);
 
-       if (reg)
+       if ((reg != 0) && (reg != 0xffffffff))
                return false;
 
        return true;
@@ -915,8 +996,13 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev)
        }
 
        mutex_init(&adev->mode_info.atom_context->mutex);
-       amdgpu_atombios_scratch_regs_init(adev);
-       amdgpu_atom_allocate_fb_scratch(adev->mode_info.atom_context);
+       if (adev->is_atom_fw) {
+               amdgpu_atomfirmware_scratch_regs_init(adev);
+               amdgpu_atomfirmware_allocate_fb_scratch(adev);
+       } else {
+               amdgpu_atombios_scratch_regs_init(adev);
+               amdgpu_atombios_allocate_fb_scratch(adev);
+       }
        return 0;
 }
 
@@ -954,6 +1040,62 @@ static bool amdgpu_check_pot_argument(int arg)
        return (arg & (arg - 1)) == 0;
 }
 
+static void amdgpu_check_block_size(struct amdgpu_device *adev)
+{
+       /* defines number of bits in page table versus page directory,
+        * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+        * page table and the remaining bits are in the page directory */
+       if (amdgpu_vm_block_size == -1)
+               return;
+
+       if (amdgpu_vm_block_size < 9) {
+               dev_warn(adev->dev, "VM page table size (%d) too small\n",
+                        amdgpu_vm_block_size);
+               goto def_value;
+       }
+
+       if (amdgpu_vm_block_size > 24 ||
+           (amdgpu_vm_size * 1024) < (1ull << amdgpu_vm_block_size)) {
+               dev_warn(adev->dev, "VM page table size (%d) too large\n",
+                        amdgpu_vm_block_size);
+               goto def_value;
+       }
+
+       return;
+
+def_value:
+       amdgpu_vm_block_size = -1;
+}
+
+static void amdgpu_check_vm_size(struct amdgpu_device *adev)
+{
+       if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
+               dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
+                        amdgpu_vm_size);
+               goto def_value;
+       }
+
+       if (amdgpu_vm_size < 1) {
+               dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
+                        amdgpu_vm_size);
+               goto def_value;
+       }
+
+       /*
+        * Max GPUVM size for Cayman, SI, CI VI are 40 bits.
+        */
+       if (amdgpu_vm_size > 1024) {
+               dev_warn(adev->dev, "VM size (%d) too large, max is 1TB\n",
+                        amdgpu_vm_size);
+               goto def_value;
+       }
+
+       return;
+
+def_value:
+       amdgpu_vm_size = -1;
+}
+
 /**
  * amdgpu_check_arguments - validate module params
  *
@@ -983,54 +1125,9 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
                }
        }
 
-       if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
-               dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
-                        amdgpu_vm_size);
-               amdgpu_vm_size = 8;
-       }
+       amdgpu_check_vm_size(adev);
 
-       if (amdgpu_vm_size < 1) {
-               dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
-                        amdgpu_vm_size);
-               amdgpu_vm_size = 8;
-       }
-
-       /*
-        * Max GPUVM size for Cayman, SI and CI are 40 bits.
-        */
-       if (amdgpu_vm_size > 1024) {
-               dev_warn(adev->dev, "VM size (%d) too large, max is 1TB\n",
-                        amdgpu_vm_size);
-               amdgpu_vm_size = 8;
-       }
-
-       /* defines number of bits in page table versus page directory,
-        * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
-        * page table and the remaining bits are in the page directory */
-       if (amdgpu_vm_block_size == -1) {
-
-               /* Total bits covered by PD + PTs */
-               unsigned bits = ilog2(amdgpu_vm_size) + 18;
-
-               /* Make sure the PD is 4K in size up to 8GB address space.
-                  Above that split equal between PD and PTs */
-               if (amdgpu_vm_size <= 8)
-                       amdgpu_vm_block_size = bits - 9;
-               else
-                       amdgpu_vm_block_size = (bits + 3) / 2;
-
-       } else if (amdgpu_vm_block_size < 9) {
-               dev_warn(adev->dev, "VM page table size (%d) too small\n",
-                        amdgpu_vm_block_size);
-               amdgpu_vm_block_size = 9;
-       }
-
-       if (amdgpu_vm_block_size > 24 ||
-           (amdgpu_vm_size * 1024) < (1ull << amdgpu_vm_block_size)) {
-               dev_warn(adev->dev, "VM page table size (%d) too large\n",
-                        amdgpu_vm_block_size);
-               amdgpu_vm_block_size = 9;
-       }
+       amdgpu_check_block_size(adev);
 
        if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
            !amdgpu_check_pot_argument(amdgpu_vram_page_split))) {
@@ -1059,7 +1156,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
        if (state == VGA_SWITCHEROO_ON) {
                unsigned d3_delay = dev->pdev->d3_delay;
 
-               printk(KERN_INFO "amdgpu: switched on\n");
+               pr_info("amdgpu: switched on\n");
                /* don't suspend or resume card normally */
                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
@@ -1070,7 +1167,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
                dev->switch_power_state = DRM_SWITCH_POWER_ON;
                drm_kms_helper_poll_enable(dev);
        } else {
-               printk(KERN_INFO "amdgpu: switched off\n");
+               pr_info("amdgpu: switched off\n");
                drm_kms_helper_poll_disable(dev);
                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
                amdgpu_device_suspend(dev, true, true);
@@ -1114,13 +1211,15 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
        for (i = 0; i < adev->num_ip_blocks; i++) {
                if (!adev->ip_blocks[i].status.valid)
                        continue;
-               if (adev->ip_blocks[i].version->type == block_type) {
-                       r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
-                                                                                    state);
-                       if (r)
-                               return r;
-                       break;
-               }
+               if (adev->ip_blocks[i].version->type != block_type)
+                       continue;
+               if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
+                       continue;
+               r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
+                       (void *)adev, state);
+               if (r)
+                       DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
+                                 adev->ip_blocks[i].version->funcs->name, r);
        }
        return r;
 }
@@ -1134,13 +1233,15 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev,
        for (i = 0; i < adev->num_ip_blocks; i++) {
                if (!adev->ip_blocks[i].status.valid)
                        continue;
-               if (adev->ip_blocks[i].version->type == block_type) {
-                       r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
-                                                                                    state);
-                       if (r)
-                               return r;
-                       break;
-               }
+               if (adev->ip_blocks[i].version->type != block_type)
+                       continue;
+               if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
+                       continue;
+               r = adev->ip_blocks[i].version->funcs->set_powergating_state(
+                       (void *)adev, state);
+               if (r)
+                       DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
+                                 adev->ip_blocks[i].version->funcs->name, r);
        }
        return r;
 }
@@ -1345,6 +1446,13 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
                        return r;
                break;
 #endif
+       case CHIP_VEGA10:
+               adev->family = AMDGPU_FAMILY_AI;
+
+               r = soc15_set_ip_blocks(adev);
+               if (r)
+                       return r;
+               break;
        default:
                /* FIXME: not supported yet */
                return -EINVAL;
@@ -1476,6 +1584,9 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
                }
        }
 
+       amdgpu_dpm_enable_uvd(adev, false);
+       amdgpu_dpm_enable_vce(adev, false);
+
        return 0;
 }
 
@@ -1674,8 +1785,13 @@ static int amdgpu_resume(struct amdgpu_device *adev)
 
 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
 {
-       if (amdgpu_atombios_has_gpu_virtualization_table(adev))
-               adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+       if (adev->is_atom_fw) {
+               if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
+                       adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+       } else {
+               if (amdgpu_atombios_has_gpu_virtualization_table(adev))
+                       adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+       }
 }
 
 /**
@@ -1740,6 +1856,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
         * can recall function without having locking issues */
        mutex_init(&adev->vm_manager.lock);
        atomic_set(&adev->irq.ih.lock, 0);
+       mutex_init(&adev->firmware.mutex);
        mutex_init(&adev->pm.mutex);
        mutex_init(&adev->gfx.gpu_clock_mutex);
        mutex_init(&adev->srbm_mutex);
@@ -1810,7 +1927,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                runtime = true;
        if (amdgpu_device_is_px(ddev))
                runtime = true;
-       vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime);
+       if (!pci_is_thunderbolt_attached(adev->pdev))
+               vga_switcheroo_register_client(adev->pdev,
+                                              &amdgpu_switcheroo_ops, runtime);
        if (runtime)
                vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 
@@ -1846,14 +1965,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                DRM_INFO("GPU post is not needed\n");
        }
 
-       /* Initialize clocks */
-       r = amdgpu_atombios_get_clock_info(adev);
-       if (r) {
-               dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
-               goto failed;
+       if (!adev->is_atom_fw) {
+               /* Initialize clocks */
+               r = amdgpu_atombios_get_clock_info(adev);
+               if (r) {
+                       dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
+                       return r;
+               }
+               /* init i2c buses */
+               amdgpu_atombios_i2c_init(adev);
        }
-       /* init i2c buses */
-       amdgpu_atombios_i2c_init(adev);
 
        /* Fence driver */
        r = amdgpu_fence_driver_init(adev);
@@ -1912,12 +2033,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                else
                        DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
        }
-       if ((amdgpu_testing & 2)) {
-               if (adev->accel_working)
-                       amdgpu_test_syncing(adev);
-               else
-                       DRM_INFO("amdgpu: acceleration disabled, skipping sync tests\n");
-       }
        if (amdgpu_benchmarking) {
                if (adev->accel_working)
                        amdgpu_benchmark(adev, amdgpu_benchmarking);
@@ -1969,7 +2084,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
        amdgpu_atombios_fini(adev);
        kfree(adev->bios);
        adev->bios = NULL;
-       vga_switcheroo_unregister_client(adev->pdev);
+       if (!pci_is_thunderbolt_attached(adev->pdev))
+               vga_switcheroo_unregister_client(adev->pdev);
        if (adev->flags & AMD_IS_PX)
                vga_switcheroo_fini_domain_pm_ops(adev->dev);
        vga_client_register(adev->pdev, NULL, NULL, NULL);
@@ -2063,7 +2179,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
         */
        amdgpu_bo_evict_vram(adev);
 
-       amdgpu_atombios_scratch_regs_save(adev);
+       if (adev->is_atom_fw)
+               amdgpu_atomfirmware_scratch_regs_save(adev);
+       else
+               amdgpu_atombios_scratch_regs_save(adev);
        pci_save_state(dev->pdev);
        if (suspend) {
                /* Shut down the device */
@@ -2115,7 +2234,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
                        return r;
                }
        }
-       amdgpu_atombios_scratch_regs_restore(adev);
+       if (adev->is_atom_fw)
+               amdgpu_atomfirmware_scratch_regs_restore(adev);
+       else
+               amdgpu_atombios_scratch_regs_restore(adev);
 
        /* post card */
        if (amdgpu_need_post(adev)) {
@@ -2125,9 +2247,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
        }
 
        r = amdgpu_resume(adev);
-       if (r)
+       if (r) {
                DRM_ERROR("amdgpu_resume failed (%d).\n", r);
-
+               return r;
+       }
        amdgpu_fence_driver_resume(adev);
 
        if (resume) {
@@ -2500,9 +2623,15 @@ retry:
                        amdgpu_display_stop_mc_access(adev, &save);
                        amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
                }
-               amdgpu_atombios_scratch_regs_save(adev);
+               if (adev->is_atom_fw)
+                       amdgpu_atomfirmware_scratch_regs_save(adev);
+               else
+                       amdgpu_atombios_scratch_regs_save(adev);
                r = amdgpu_asic_reset(adev);
-               amdgpu_atombios_scratch_regs_restore(adev);
+               if (adev->is_atom_fw)
+                       amdgpu_atomfirmware_scratch_regs_restore(adev);
+               else
+                       amdgpu_atombios_scratch_regs_restore(adev);
                /* post card */
                amdgpu_atom_asic_init(adev->mode_info.atom_context);