Merge tag 'topic/drm-misc-2015-05-27' of git://anongit.freedesktop.org/drm-intel...
authorDave Airlie <airlied@redhat.com>
Wed, 3 Jun 2015 23:17:45 +0000 (09:17 +1000)
committerDave Airlie <airlied@redhat.com>
Wed, 3 Jun 2015 23:17:45 +0000 (09:17 +1000)
One more round of drm-misc, again mostly atomic. Big thing is the
userspace blob code from Daniel Stone, with support for the mode_id blob
now added to the atomic ioctl. Finally we can do atomic modesets!

Note that the atomic ioctl is still behind the module knob since the
weston patches aren't quite ready yet imo - they lack TEST_ONLY support,
which is a fairly crucial bit of the atomic api. But besides that I think
it's all good to go. That's also why we didn't bother to hide the new blob
ioctls behind the knob, that part won't need to change. And if weston
patches get in shape in time we could throw the "atomic by default patch"
on top for 4.2.

* tag 'topic/drm-misc-2015-05-27' of git://anongit.freedesktop.org/drm-intel:
  drm: Fix off-by-one in vblank hardware counter wraparound handling
  drm/atomic: fix out of bounds read in for_each_*_in_state helpers
  drm/atomic: Add MODE_ID property
  drm/atomic: Add current-mode blob to CRTC state
  drm: Add drm_atomic_set_mode_for_crtc
  drm: check for garbage in unused addfb2 fields
  drm: Retain reference to blob properties in lookup
  drm/mode: Add user blob-creation ioctl
  drm: Return error value from blob creation
  drm: Allow creating blob properties without copy
  drm/mode: Unstatic kernel-userspace mode conversion
  drm/mode: Validate modes inside drm_crtc_convert_umode
  drm/crtc_helper: Replace open-coded CRTC state helpers
  drm: kerneldoc fixes for blob properties
  drm/DocBook: Add more drm_bridge documentation
  drm: bridge: Allow daisy chaining of bridges
  drm/atomic: add all affected planes in drm_atomic_helper_check_modeset
  drm/atomic: add drm_atomic_add_affected_planes
  drm/atomic: add commit_planes_on_crtc helper

96 files changed:
Documentation/DocBook/drm.tmpl
drivers/gpu/drm/amd/amdkfd/Makefile
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
drivers/gpu/drm/drm_mm.c
drivers/gpu/drm/i2c/adv7511.c
drivers/gpu/drm/i2c/tda998x_drv.c
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_context.c
drivers/gpu/drm/i915/i915_gem_debug.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_gem_userptr.c
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_bios.c
drivers/gpu/drm/i915/intel_crt.c
drivers/gpu/drm/i915/intel_csr.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_dsi.c
drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
drivers/gpu/drm/i915/intel_dsi_pll.c
drivers/gpu/drm/i915/intel_fbdev.c
drivers/gpu/drm/i915/intel_hdmi.c
drivers/gpu/drm/i915/intel_i2c.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_overlay.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_runtime_pm.c
drivers/gpu/drm/i915/intel_sdvo.c
drivers/gpu/drm/i915/intel_sideband.c
drivers/gpu/drm/i915/intel_sprite.c
drivers/gpu/drm/radeon/atombios_dp.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cik_reg.h
drivers/gpu/drm/radeon/cikd.h
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/nid.h
drivers/gpu/drm/radeon/r100.c
drivers/gpu/drm/radeon/r300.c
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_asic.h
drivers/gpu/drm/radeon/radeon_audio.c
drivers/gpu/drm/radeon/radeon_audio.h
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_irq_kms.c
drivers/gpu/drm/radeon/radeon_kfd.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/radeon/radeon_mode.h
drivers/gpu/drm/radeon/radeon_vce.c
drivers/gpu/drm/radeon/radeon_vm.c
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/radeon/si_dpm.c
drivers/gpu/drm/radeon/sid.h
drivers/gpu/drm/radeon/trinity_dpm.c
drivers/gpu/drm/radeon/vce_v1_0.c
drivers/gpu/drm/radeon/vce_v2_0.c
drivers/gpu/drm/rcar-du/rcar_du_crtc.c
drivers/gpu/drm/rcar-du/rcar_du_crtc.h
drivers/gpu/drm/rcar-du/rcar_du_drv.h
drivers/gpu/drm/rcar-du/rcar_du_group.c
drivers/gpu/drm/rcar-du/rcar_du_group.h
drivers/gpu/drm/rcar-du/rcar_du_kms.c
drivers/gpu/drm/rcar-du/rcar_du_plane.c
drivers/gpu/drm/rcar-du/rcar_du_plane.h
drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
include/drm/i915_pciids.h
include/uapi/drm/i915_drm.h
include/uapi/drm/radeon_drm.h
include/uapi/linux/kfd_ioctl.h

index 5d9d851d8623053aac26062f47f3f58541562d09..109fde8b4a281a3c4b54dcc9f78da6ba7c002bc9 100644 (file)
@@ -4165,6 +4165,12 @@ int num_ioctls;</synopsis>
          </tgroup>
        </table>
       </sect2>
+
+      <sect2>
+       <title>CSR firmware support for DMC</title>
+!Pdrivers/gpu/drm/i915/intel_csr.c csr support for dmc
+!Idrivers/gpu/drm/i915/intel_csr.c
+      </sect2>
     </sect1>
 
     <sect1>
@@ -4216,7 +4222,6 @@ int num_ioctls;</synopsis>
 !Idrivers/gpu/drm/i915/i915_gem_shrinker.c
       </sect2>
     </sect1>
-
     <sect1>
       <title> Tracing </title>
       <para>
index 652d25478fd550bdc1e1d4005a8788e877f586c4..28551153ec6d0ea18031f361618a5158a537c88e 100644 (file)
@@ -12,6 +12,7 @@ amdkfd-y      := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
                kfd_kernel_queue_vi.o kfd_packet_manager.o \
                kfd_process_queue_manager.o kfd_device_queue_manager.o \
                kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
-               kfd_interrupt.o kfd_events.o cik_event_interrupt.o
+               kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
+               kfd_dbgdev.o kfd_dbgmgr.o
 
 obj-$(CONFIG_HSA_AMD)  += amdkfd.o
index b2c6109bd7af12fcff5d5ebaa27e8a23f1dc1827..96c904b3acb7a889ddcfff58e491752056a7dc45 100644 (file)
@@ -35,6 +35,7 @@
 #include <asm/processor.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
+#include "kfd_dbgmgr.h"
 
 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
 static int kfd_open(struct inode *, struct file *);
@@ -432,6 +433,301 @@ out:
        return err;
 }
 
+static int kfd_ioctl_dbg_register(struct file *filep,
+                               struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_dbg_register_args *args = data;
+       struct kfd_dev *dev;
+       struct kfd_dbgmgr *dbgmgr_ptr;
+       struct kfd_process_device *pdd;
+       bool create_ok;
+       long status = 0;
+
+       dev = kfd_device_by_id(args->gpu_id);
+       if (dev == NULL)
+               return -EINVAL;
+
+       if (dev->device_info->asic_family == CHIP_CARRIZO) {
+               pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
+               return -EINVAL;
+       }
+
+       mutex_lock(kfd_get_dbgmgr_mutex());
+       mutex_lock(&p->mutex);
+
+       /*
+        * make sure that we have pdd, if this the first queue created for
+        * this process
+        */
+       pdd = kfd_bind_process_to_device(dev, p);
+       if (IS_ERR(pdd)) {
+               mutex_unlock(&p->mutex);
+               mutex_unlock(kfd_get_dbgmgr_mutex());
+               return PTR_ERR(pdd);
+       }
+
+       if (dev->dbgmgr == NULL) {
+               /* In case of a legal call, we have no dbgmgr yet */
+               create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
+               if (create_ok) {
+                       status = kfd_dbgmgr_register(dbgmgr_ptr, p);
+                       if (status != 0)
+                               kfd_dbgmgr_destroy(dbgmgr_ptr);
+                       else
+                               dev->dbgmgr = dbgmgr_ptr;
+               }
+       } else {
+               pr_debug("debugger already registered\n");
+               status = -EINVAL;
+       }
+
+       mutex_unlock(&p->mutex);
+       mutex_unlock(kfd_get_dbgmgr_mutex());
+
+       return status;
+}
+
+static int kfd_ioctl_dbg_unrgesiter(struct file *filep,
+                               struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_dbg_unregister_args *args = data;
+       struct kfd_dev *dev;
+       long status;
+
+       dev = kfd_device_by_id(args->gpu_id);
+       if (dev == NULL)
+               return -EINVAL;
+
+       if (dev->device_info->asic_family == CHIP_CARRIZO) {
+               pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n");
+               return -EINVAL;
+       }
+
+       mutex_lock(kfd_get_dbgmgr_mutex());
+
+       status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
+       if (status == 0) {
+               kfd_dbgmgr_destroy(dev->dbgmgr);
+               dev->dbgmgr = NULL;
+       }
+
+       mutex_unlock(kfd_get_dbgmgr_mutex());
+
+       return status;
+}
+
+/*
+ * Parse and generate variable size data structure for address watch.
+ * Total size of the buffer and # watch points is limited in order
+ * to prevent kernel abuse. (no bearing to the much smaller HW limitation
+ * which is enforced by dbgdev module)
+ * please also note that the watch address itself are not "copied from user",
+ * since it be set into the HW in user mode values.
+ *
+ */
+static int kfd_ioctl_dbg_address_watch(struct file *filep,
+                                       struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_dbg_address_watch_args *args = data;
+       struct kfd_dev *dev;
+       struct dbg_address_watch_info aw_info;
+       unsigned char *args_buff;
+       long status;
+       void __user *cmd_from_user;
+       uint64_t watch_mask_value = 0;
+       unsigned int args_idx = 0;
+
+       memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
+
+       dev = kfd_device_by_id(args->gpu_id);
+       if (dev == NULL)
+               return -EINVAL;
+
+       if (dev->device_info->asic_family == CHIP_CARRIZO) {
+               pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
+               return -EINVAL;
+       }
+
+       cmd_from_user = (void __user *) args->content_ptr;
+
+       /* Validate arguments */
+
+       if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
+               (args->buf_size_in_bytes <= sizeof(*args)) ||
+               (cmd_from_user == NULL))
+               return -EINVAL;
+
+       /* this is the actual buffer to work with */
+
+       args_buff = kmalloc(args->buf_size_in_bytes -
+                                       sizeof(*args), GFP_KERNEL);
+       if (args_buff == NULL)
+               return -ENOMEM;
+
+       status = copy_from_user(args_buff, cmd_from_user,
+                               args->buf_size_in_bytes - sizeof(*args));
+
+       if (status != 0) {
+               pr_debug("Failed to copy address watch user data\n");
+               kfree(args_buff);
+               return -EINVAL;
+       }
+
+       aw_info.process = p;
+
+       aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
+       args_idx += sizeof(aw_info.num_watch_points);
+
+       aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
+       args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
+
+       /*
+        * set watch address base pointer to point on the array base
+        * within args_buff
+        */
+       aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
+
+       /* skip over the addresses buffer */
+       args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
+
+       if (args_idx >= args->buf_size_in_bytes) {
+               kfree(args_buff);
+               return -EINVAL;
+       }
+
+       watch_mask_value = (uint64_t) args_buff[args_idx];
+
+       if (watch_mask_value > 0) {
+               /*
+                * There is an array of masks.
+                * set watch mask base pointer to point on the array base
+                * within args_buff
+                */
+               aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
+
+               /* skip over the masks buffer */
+               args_idx += sizeof(aw_info.watch_mask) *
+                               aw_info.num_watch_points;
+       } else {
+               /* just the NULL mask, set to NULL and skip over it */
+               aw_info.watch_mask = NULL;
+               args_idx += sizeof(aw_info.watch_mask);
+       }
+
+       if (args_idx > args->buf_size_in_bytes) {
+               kfree(args_buff);
+               return -EINVAL;
+       }
+
+       /* Currently HSA Event is not supported for DBG */
+       aw_info.watch_event = NULL;
+
+       mutex_lock(kfd_get_dbgmgr_mutex());
+
+       status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
+
+       mutex_unlock(kfd_get_dbgmgr_mutex());
+
+       kfree(args_buff);
+
+       return status;
+}
+
+/* Parse and generate fixed size data structure for wave control */
+static int kfd_ioctl_dbg_wave_control(struct file *filep,
+                                       struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_dbg_wave_control_args *args = data;
+       struct kfd_dev *dev;
+       struct dbg_wave_control_info wac_info;
+       unsigned char *args_buff;
+       uint32_t computed_buff_size;
+       long status;
+       void __user *cmd_from_user;
+       unsigned int args_idx = 0;
+
+       memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
+
+       /* we use compact form, independent of the packing attribute value */
+       computed_buff_size = sizeof(*args) +
+                               sizeof(wac_info.mode) +
+                               sizeof(wac_info.operand) +
+                               sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
+                               sizeof(wac_info.dbgWave_msg.MemoryVA) +
+                               sizeof(wac_info.trapId);
+
+       dev = kfd_device_by_id(args->gpu_id);
+       if (dev == NULL)
+               return -EINVAL;
+
+       if (dev->device_info->asic_family == CHIP_CARRIZO) {
+               pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
+               return -EINVAL;
+       }
+
+       /* input size must match the computed "compact" size */
+       if (args->buf_size_in_bytes != computed_buff_size) {
+               pr_debug("size mismatch, computed : actual %u : %u\n",
+                               args->buf_size_in_bytes, computed_buff_size);
+               return -EINVAL;
+       }
+
+       cmd_from_user = (void __user *) args->content_ptr;
+
+       if (cmd_from_user == NULL)
+               return -EINVAL;
+
+       /* this is the actual buffer to work with */
+
+       args_buff = kmalloc(args->buf_size_in_bytes - sizeof(*args),
+                       GFP_KERNEL);
+
+       if (args_buff == NULL)
+               return -ENOMEM;
+
+       /* Now copy the entire buffer from user */
+       status = copy_from_user(args_buff, cmd_from_user,
+                               args->buf_size_in_bytes - sizeof(*args));
+       if (status != 0) {
+               pr_debug("Failed to copy wave control user data\n");
+               kfree(args_buff);
+               return -EINVAL;
+       }
+
+       /* move ptr to the start of the "pay-load" area */
+       wac_info.process = p;
+
+       wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
+       args_idx += sizeof(wac_info.operand);
+
+       wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
+       args_idx += sizeof(wac_info.mode);
+
+       wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
+       args_idx += sizeof(wac_info.trapId);
+
+       wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
+                                       *((uint32_t *)(&args_buff[args_idx]));
+       wac_info.dbgWave_msg.MemoryVA = NULL;
+
+       mutex_lock(kfd_get_dbgmgr_mutex());
+
+       pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
+                       wac_info.process, wac_info.operand,
+                       wac_info.mode, wac_info.trapId,
+                       wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+
+       status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
+
+       pr_debug("Returned status of dbg manager is %ld\n", status);
+
+       mutex_unlock(kfd_get_dbgmgr_mutex());
+
+       kfree(args_buff);
+
+       return status;
+}
+
 static int kfd_ioctl_get_clock_counters(struct file *filep,
                                struct kfd_process *p, void *data)
 {
@@ -612,6 +908,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
        AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
                        kfd_ioctl_wait_events, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
+                       kfd_ioctl_dbg_register, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
+                       kfd_ioctl_dbg_unrgesiter, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
+                       kfd_ioctl_dbg_address_watch, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
+                       kfd_ioctl_dbg_wave_control, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT        ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
new file mode 100644 (file)
index 0000000..96153f2
--- /dev/null
@@ -0,0 +1,886 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_diq.h"
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_pm4_opcodes.h"
+#include "cik_regs.h"
+#include "kfd_dbgmgr.h"
+#include "kfd_dbgdev.h"
+#include "kfd_device_queue_manager.h"
+#include "../../radeon/cik_reg.h"
+
+static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
+{
+       BUG_ON(!dev || !dev->kfd2kgd);
+
+       dev->kfd2kgd->address_watch_disable(dev->kgd);
+}
+
+static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+                               unsigned int pasid, uint64_t vmid0_address,
+                               uint32_t *packet_buff, size_t size_in_bytes)
+{
+       struct pm4__release_mem *rm_packet;
+       struct pm4__indirect_buffer_pasid *ib_packet;
+       struct kfd_mem_obj *mem_obj;
+       size_t pq_packets_size_in_bytes;
+       union ULARGE_INTEGER *largep;
+       union ULARGE_INTEGER addr;
+       struct kernel_queue *kq;
+       uint64_t *rm_state;
+       unsigned int *ib_packet_buff;
+       int status;
+
+       BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
+
+       kq = dbgdev->kq;
+
+       pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
+                               sizeof(struct pm4__indirect_buffer_pasid);
+
+       /*
+        * We acquire a buffer from DIQ
+        * The receive packet buff will be sitting on the Indirect Buffer
+        * and in the PQ we put the IB packet + sync packet(s).
+        */
+       status = kq->ops.acquire_packet_buffer(kq,
+                               pq_packets_size_in_bytes / sizeof(uint32_t),
+                               &ib_packet_buff);
+       if (status != 0) {
+               pr_err("amdkfd: acquire_packet_buffer failed\n");
+               return status;
+       }
+
+       memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
+
+       ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
+
+       ib_packet->header.count = 3;
+       ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
+       ib_packet->header.type = PM4_TYPE_3;
+
+       largep = (union ULARGE_INTEGER *) &vmid0_address;
+
+       ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
+       ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
+
+       ib_packet->control = (1 << 23) | (1 << 31) |
+                       ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
+
+       ib_packet->bitfields5.pasid = pasid;
+
+       /*
+        * for now we use release mem for GPU-CPU synchronization
+        * Consider WaitRegMem + WriteData as a better alternative
+        * we get a GART allocations ( gpu/cpu mapping),
+        * for the sync variable, and wait until:
+        * (a) Sync with HW
+        * (b) Sync var is written by CP to mem.
+        */
+       rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
+                       (sizeof(struct pm4__indirect_buffer_pasid) /
+                                       sizeof(unsigned int)));
+
+       status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
+                                       &mem_obj);
+
+       if (status != 0) {
+               pr_err("amdkfd: Failed to allocate GART memory\n");
+               kq->ops.rollback_packet(kq);
+               return status;
+       }
+
+       rm_state = (uint64_t *) mem_obj->cpu_ptr;
+
+       *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
+
+       rm_packet->header.opcode = IT_RELEASE_MEM;
+       rm_packet->header.type = PM4_TYPE_3;
+       rm_packet->header.count = sizeof(struct pm4__release_mem) /
+                                       sizeof(unsigned int) - 2;
+
+       rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+       rm_packet->bitfields2.event_index =
+                               event_index___release_mem__end_of_pipe;
+
+       rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+       rm_packet->bitfields2.atc = 0;
+       rm_packet->bitfields2.tc_wb_action_ena = 1;
+
+       addr.quad_part = mem_obj->gpu_addr;
+
+       rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
+       rm_packet->address_hi = addr.u.high_part;
+
+       rm_packet->bitfields3.data_sel =
+                               data_sel___release_mem__send_64_bit_data;
+
+       rm_packet->bitfields3.int_sel =
+                       int_sel___release_mem__send_data_after_write_confirm;
+
+       rm_packet->bitfields3.dst_sel =
+                       dst_sel___release_mem__memory_controller;
+
+       rm_packet->data_lo = QUEUESTATE__ACTIVE;
+
+       kq->ops.submit_packet(kq);
+
+       /* Wait till CP writes sync code: */
+       status = amdkfd_fence_wait_timeout(
+                       (unsigned int *) rm_state,
+                       QUEUESTATE__ACTIVE, 1500);
+
+       kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+       return status;
+}
+
+static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
+{
+       BUG_ON(!dbgdev);
+
+       /*
+        * no action is needed in this case,
+        * just make sure diq will not be used
+        */
+
+       dbgdev->kq = NULL;
+
+       return 0;
+}
+
+static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
+{
+       struct queue_properties properties;
+       unsigned int qid;
+       struct kernel_queue *kq = NULL;
+       int status;
+
+       BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
+
+       status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
+                               &properties, 0, KFD_QUEUE_TYPE_DIQ,
+                               &qid);
+
+       if (status) {
+               pr_err("amdkfd: Failed to create DIQ\n");
+               return status;
+       }
+
+       pr_debug("DIQ Created with queue id: %d\n", qid);
+
+       kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
+
+       if (kq == NULL) {
+               pr_err("amdkfd: Error getting DIQ\n");
+               pqm_destroy_queue(dbgdev->pqm, qid);
+               return -EFAULT;
+       }
+
+       dbgdev->kq = kq;
+
+       return status;
+}
+
+static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
+{
+       BUG_ON(!dbgdev || !dbgdev->dev);
+
+       /* disable watch address */
+       dbgdev_address_watch_disable_nodiq(dbgdev->dev);
+       return 0;
+}
+
+static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
+{
+       /* todo - disable address watch */
+       int status;
+
+       BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
+
+       status = pqm_destroy_queue(dbgdev->pqm,
+                       dbgdev->kq->queue->properties.queue_id);
+       dbgdev->kq = NULL;
+
+       return status;
+}
+
+static void dbgdev_address_watch_set_registers(
+                       const struct dbg_address_watch_info *adw_info,
+                       union TCP_WATCH_ADDR_H_BITS *addrHi,
+                       union TCP_WATCH_ADDR_L_BITS *addrLo,
+                       union TCP_WATCH_CNTL_BITS *cntl,
+                       unsigned int index, unsigned int vmid)
+{
+       union ULARGE_INTEGER addr;
+
+       BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
+
+       addr.quad_part = 0;
+       addrHi->u32All = 0;
+       addrLo->u32All = 0;
+       cntl->u32All = 0;
+
+       if (adw_info->watch_mask != NULL)
+               cntl->bitfields.mask =
+                       (uint32_t) (adw_info->watch_mask[index] &
+                                       ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
+       else
+               cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
+
+       addr.quad_part = (unsigned long long) adw_info->watch_address[index];
+
+       addrHi->bitfields.addr = addr.u.high_part &
+                                       ADDRESS_WATCH_REG_ADDHIGH_MASK;
+       addrLo->bitfields.addr =
+                       (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
+
+       cntl->bitfields.mode = adw_info->watch_mode[index];
+       cntl->bitfields.vmid = (uint32_t) vmid;
+       /* for now assume it is an ATC address */
+       cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
+
+       pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
+       pr_debug("\t\t%20s %08x\n", "set reg add high :",
+                       addrHi->bitfields.addr);
+       pr_debug("\t\t%20s %08x\n", "set reg add low :",
+                       addrLo->bitfields.addr);
+}
+
+static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
+                                       struct dbg_address_watch_info *adw_info)
+{
+       union TCP_WATCH_ADDR_H_BITS addrHi;
+       union TCP_WATCH_ADDR_L_BITS addrLo;
+       union TCP_WATCH_CNTL_BITS cntl;
+       struct kfd_process_device *pdd;
+       unsigned int i;
+
+       BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
+
+       /* taking the vmid for that process on the safe way using pdd */
+       pdd = kfd_get_process_device_data(dbgdev->dev,
+                                       adw_info->process);
+       if (!pdd) {
+               pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+               return -EFAULT;
+       }
+
+       addrHi.u32All = 0;
+       addrLo.u32All = 0;
+       cntl.u32All = 0;
+
+       if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
+                       (adw_info->num_watch_points == 0)) {
+               pr_err("amdkfd: num_watch_points is invalid\n");
+               return -EINVAL;
+       }
+
+       if ((adw_info->watch_mode == NULL) ||
+               (adw_info->watch_address == NULL)) {
+               pr_err("amdkfd: adw_info fields are not valid\n");
+               return -EINVAL;
+       }
+
+       for (i = 0 ; i < adw_info->num_watch_points ; i++) {
+               dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
+                                               &cntl, i, pdd->qpd.vmid);
+
+               pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+               pr_debug("\t\t%20s %08x\n", "register index :", i);
+               pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
+               pr_debug("\t\t%20s %08x\n", "Address Low is :",
+                               addrLo.bitfields.addr);
+               pr_debug("\t\t%20s %08x\n", "Address high is :",
+                               addrHi.bitfields.addr);
+               pr_debug("\t\t%20s %08x\n", "Address high is :",
+                               addrHi.bitfields.addr);
+               pr_debug("\t\t%20s %08x\n", "Control Mask is :",
+                               cntl.bitfields.mask);
+               pr_debug("\t\t%20s %08x\n", "Control Mode is :",
+                               cntl.bitfields.mode);
+               pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
+                               cntl.bitfields.vmid);
+               pr_debug("\t\t%20s %08x\n", "Control atc  is :",
+                               cntl.bitfields.atc);
+               pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+
+               pdd->dev->kfd2kgd->address_watch_execute(
+                                               dbgdev->dev->kgd,
+                                               i,
+                                               cntl.u32All,
+                                               addrHi.u32All,
+                                               addrLo.u32All);
+       }
+
+       return 0;
+}
+
+static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+                                       struct dbg_address_watch_info *adw_info)
+{
+       struct pm4__set_config_reg *packets_vec;
+       union TCP_WATCH_ADDR_H_BITS addrHi;
+       union TCP_WATCH_ADDR_L_BITS addrLo;
+       union TCP_WATCH_CNTL_BITS cntl;
+       struct kfd_mem_obj *mem_obj;
+       unsigned int aw_reg_add_dword;
+       uint32_t *packet_buff_uint;
+       unsigned int i;
+       int status;
+       size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
+       /* we do not control the vmid in DIQ mode, just a place holder */
+       unsigned int vmid = 0;
+
+       BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
+
+       addrHi.u32All = 0;
+       addrLo.u32All = 0;
+       cntl.u32All = 0;
+
+       if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
+                       (adw_info->num_watch_points == 0)) {
+               pr_err("amdkfd: num_watch_points is invalid\n");
+               return -EINVAL;
+       }
+
+       if ((NULL == adw_info->watch_mode) ||
+                       (NULL == adw_info->watch_address)) {
+               pr_err("amdkfd: adw_info fields are not valid\n");
+               return -EINVAL;
+       }
+
+       status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+
+       if (status != 0) {
+               pr_err("amdkfd: Failed to allocate GART memory\n");
+               return status;
+       }
+
+       packet_buff_uint = mem_obj->cpu_ptr;
+
+       memset(packet_buff_uint, 0, ib_size);
+
+       packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
+
+       packets_vec[0].header.count = 1;
+       packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
+       packets_vec[0].header.type = PM4_TYPE_3;
+       packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+       packets_vec[0].bitfields2.insert_vmid = 1;
+       packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
+       packets_vec[1].bitfields2.insert_vmid = 0;
+       packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+       packets_vec[2].bitfields2.insert_vmid = 0;
+       packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
+       packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+       packets_vec[3].bitfields2.insert_vmid = 1;
+
+       for (i = 0; i < adw_info->num_watch_points; i++) {
+               dbgdev_address_watch_set_registers(adw_info,
+                                               &addrHi,
+                                               &addrLo,
+                                               &cntl,
+                                               i,
+                                               vmid);
+
+               pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+               pr_debug("\t\t%20s %08x\n", "register index :", i);
+               pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
+               pr_debug("\t\t%20s %p\n", "Add ptr is :",
+                               adw_info->watch_address);
+               pr_debug("\t\t%20s %08llx\n", "Add     is :",
+                               adw_info->watch_address[i]);
+               pr_debug("\t\t%20s %08x\n", "Address Low is :",
+                               addrLo.bitfields.addr);
+               pr_debug("\t\t%20s %08x\n", "Address high is :",
+                               addrHi.bitfields.addr);
+               pr_debug("\t\t%20s %08x\n", "Control Mask is :",
+                               cntl.bitfields.mask);
+               pr_debug("\t\t%20s %08x\n", "Control Mode is :",
+                               cntl.bitfields.mode);
+               pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
+                               cntl.bitfields.vmid);
+               pr_debug("\t\t%20s %08x\n", "Control atc  is :",
+                               cntl.bitfields.atc);
+               pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+
+               aw_reg_add_dword =
+                               dbgdev->dev->kfd2kgd->address_watch_get_offset(
+                                       dbgdev->dev->kgd,
+                                       i,
+                                       ADDRESS_WATCH_REG_CNTL);
+
+               aw_reg_add_dword /= sizeof(uint32_t);
+
+               packets_vec[0].bitfields2.reg_offset =
+                                       aw_reg_add_dword - CONFIG_REG_BASE;
+
+               packets_vec[0].reg_data[0] = cntl.u32All;
+
+               aw_reg_add_dword =
+                               dbgdev->dev->kfd2kgd->address_watch_get_offset(
+                                       dbgdev->dev->kgd,
+                                       i,
+                                       ADDRESS_WATCH_REG_ADDR_HI);
+
+               aw_reg_add_dword /= sizeof(uint32_t);
+
+               packets_vec[1].bitfields2.reg_offset =
+                                       aw_reg_add_dword - CONFIG_REG_BASE;
+               packets_vec[1].reg_data[0] = addrHi.u32All;
+
+               aw_reg_add_dword =
+                               dbgdev->dev->kfd2kgd->address_watch_get_offset(
+                                       dbgdev->dev->kgd,
+                                       i,
+                                       ADDRESS_WATCH_REG_ADDR_LO);
+
+               aw_reg_add_dword /= sizeof(uint32_t);
+
+               packets_vec[2].bitfields2.reg_offset =
+                               aw_reg_add_dword - CONFIG_REG_BASE;
+               packets_vec[2].reg_data[0] = addrLo.u32All;
+
+               /* enable watch flag if address is not zero*/
+               if (adw_info->watch_address[i] > 0)
+                       cntl.bitfields.valid = 1;
+               else
+                       cntl.bitfields.valid = 0;
+
+               aw_reg_add_dword =
+                               dbgdev->dev->kfd2kgd->address_watch_get_offset(
+                                       dbgdev->dev->kgd,
+                                       i,
+                                       ADDRESS_WATCH_REG_CNTL);
+
+               aw_reg_add_dword /= sizeof(uint32_t);
+
+               packets_vec[3].bitfields2.reg_offset =
+                                       aw_reg_add_dword - CONFIG_REG_BASE;
+               packets_vec[3].reg_data[0] = cntl.u32All;
+
+               status = dbgdev_diq_submit_ib(
+                                       dbgdev,
+                                       adw_info->process->pasid,
+                                       mem_obj->gpu_addr,
+                                       packet_buff_uint,
+                                       ib_size);
+
+               if (status != 0) {
+                       pr_err("amdkfd: Failed to submit IB to DIQ\n");
+                       break;
+               }
+       }
+
+       kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+       return status;
+}
+
+static int dbgdev_wave_control_set_registers(
+                               struct dbg_wave_control_info *wac_info,
+                               union SQ_CMD_BITS *in_reg_sq_cmd,
+                               union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
+{
+       int status;
+       union SQ_CMD_BITS reg_sq_cmd;
+       union GRBM_GFX_INDEX_BITS reg_gfx_index;
+       struct HsaDbgWaveMsgAMDGen2 *pMsg;
+
+       BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
+
+       reg_sq_cmd.u32All = 0;
+       reg_gfx_index.u32All = 0;
+       pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
+
+       switch (wac_info->mode) {
+       /* Send command to single wave */
+       case HSA_DBG_WAVEMODE_SINGLE:
+               /*
+                * Limit access to the process waves only,
+                * by setting vmid check
+                */
+               reg_sq_cmd.bits.check_vmid = 1;
+               reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
+               reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
+               reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
+
+               reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
+               reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
+               reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+               break;
+
+       /* Send command to all waves with matching VMID */
+       case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
+
+               reg_gfx_index.bits.sh_broadcast_writes = 1;
+               reg_gfx_index.bits.se_broadcast_writes = 1;
+               reg_gfx_index.bits.instance_broadcast_writes = 1;
+
+               reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+
+               break;
+
+       /* Send command to all CU waves with matching VMID */
+       case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
+
+               reg_sq_cmd.bits.check_vmid = 1;
+               reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+
+               reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
+               reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
+               reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       switch (wac_info->operand) {
+       case HSA_DBG_WAVEOP_HALT:
+               reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
+               break;
+
+       case HSA_DBG_WAVEOP_RESUME:
+               reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
+               break;
+
+       case HSA_DBG_WAVEOP_KILL:
+               reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
+               break;
+
+       case HSA_DBG_WAVEOP_DEBUG:
+               reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
+               break;
+
+       case HSA_DBG_WAVEOP_TRAP:
+               if (wac_info->trapId < MAX_TRAPID) {
+                       reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
+                       reg_sq_cmd.bits.trap_id = wac_info->trapId;
+               } else {
+                       status = -EINVAL;
+               }
+               break;
+
+       default:
+               status = -EINVAL;
+               break;
+       }
+
+       if (status == 0) {
+               *in_reg_sq_cmd = reg_sq_cmd;
+               *in_reg_gfx_index = reg_gfx_index;
+       }
+
+       return status;
+}
+
+static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+                                       struct dbg_wave_control_info *wac_info)
+{
+
+       int status;
+       union SQ_CMD_BITS reg_sq_cmd;
+       union GRBM_GFX_INDEX_BITS reg_gfx_index;
+       struct kfd_mem_obj *mem_obj;
+       uint32_t *packet_buff_uint;
+       struct pm4__set_config_reg *packets_vec;
+       size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
+
+       BUG_ON(!dbgdev || !wac_info);
+
+       reg_sq_cmd.u32All = 0;
+
+       status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
+                                                       &reg_gfx_index);
+       if (status) {
+               pr_err("amdkfd: Failed to set wave control registers\n");
+               return status;
+       }
+
+       /* we do not control the VMID in DIQ,so reset it to a known value */
+       reg_sq_cmd.bits.vm_id = 0;
+
+       pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+       pr_debug("\t\t mode      is: %u\n", wac_info->mode);
+       pr_debug("\t\t operand   is: %u\n", wac_info->operand);
+       pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
+       pr_debug("\t\t msg value is: %u\n",
+                       wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+       pr_debug("\t\t vmid      is: N/A\n");
+
+       pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+       pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
+       pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
+       pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
+       pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
+       pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
+       pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
+
+       pr_debug("\t\t ibw       is : %u\n",
+                       reg_gfx_index.bitfields.instance_broadcast_writes);
+       pr_debug("\t\t ii        is : %u\n",
+                       reg_gfx_index.bitfields.instance_index);
+       pr_debug("\t\t sebw      is : %u\n",
+                       reg_gfx_index.bitfields.se_broadcast_writes);
+       pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
+       pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
+       pr_debug("\t\t sbw       is : %u\n",
+                       reg_gfx_index.bitfields.sh_broadcast_writes);
+
+       pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+       status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+
+       if (status != 0) {
+               pr_err("amdkfd: Failed to allocate GART memory\n");
+               return status;
+       }
+
+       packet_buff_uint = mem_obj->cpu_ptr;
+
+       memset(packet_buff_uint, 0, ib_size);
+
+       packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
+       packets_vec[0].header.count = 1;
+       packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
+       packets_vec[0].header.type = PM4_TYPE_3;
+       packets_vec[0].bitfields2.reg_offset =
+                       GRBM_GFX_INDEX / (sizeof(uint32_t)) -
+                               USERCONFIG_REG_BASE;
+
+       packets_vec[0].bitfields2.insert_vmid = 0;
+       packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
+
+       packets_vec[1].header.count = 1;
+       packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
+       packets_vec[1].header.type = PM4_TYPE_3;
+       packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
+                                               CONFIG_REG_BASE;
+
+       packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
+       packets_vec[1].bitfields2.insert_vmid = 1;
+       packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
+
+       /* Restore the GRBM_GFX_INDEX register */
+
+       reg_gfx_index.u32All = 0;
+       reg_gfx_index.bits.sh_broadcast_writes = 1;
+       reg_gfx_index.bits.instance_broadcast_writes = 1;
+       reg_gfx_index.bits.se_broadcast_writes = 1;
+
+
+       packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+       packets_vec[2].bitfields2.reg_offset =
+                               GRBM_GFX_INDEX / (sizeof(uint32_t)) -
+                                       USERCONFIG_REG_BASE;
+
+       packets_vec[2].bitfields2.insert_vmid = 0;
+       packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
+
+       status = dbgdev_diq_submit_ib(
+                       dbgdev,
+                       wac_info->process->pasid,
+                       mem_obj->gpu_addr,
+                       packet_buff_uint,
+                       ib_size);
+
+       if (status != 0)
+               pr_err("amdkfd: Failed to submit IB to DIQ\n");
+
+       kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+       return status;
+}
+
+static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
+                                       struct dbg_wave_control_info *wac_info)
+{
+       int status;
+       union SQ_CMD_BITS reg_sq_cmd;
+       union GRBM_GFX_INDEX_BITS reg_gfx_index;
+       struct kfd_process_device *pdd;
+
+       BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
+
+       reg_sq_cmd.u32All = 0;
+
+       /* taking the VMID for that process on the safe way using PDD */
+       pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
+
+       if (!pdd) {
+               pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+               return -EFAULT;
+       }
+       status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
+                                                       &reg_gfx_index);
+       if (status) {
+               pr_err("amdkfd: Failed to set wave control registers\n");
+               return status;
+       }
+
+       /* for non DIQ we need to patch the VMID: */
+
+       reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
+
+       pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+       pr_debug("\t\t mode      is: %u\n", wac_info->mode);
+       pr_debug("\t\t operand   is: %u\n", wac_info->operand);
+       pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
+       pr_debug("\t\t msg value is: %u\n",
+                       wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+       pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
+
+       pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+       pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
+       pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
+       pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
+       pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
+       pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
+       pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
+
+       pr_debug("\t\t ibw       is : %u\n",
+                       reg_gfx_index.bitfields.instance_broadcast_writes);
+       pr_debug("\t\t ii        is : %u\n",
+                       reg_gfx_index.bitfields.instance_index);
+       pr_debug("\t\t sebw      is : %u\n",
+                       reg_gfx_index.bitfields.se_broadcast_writes);
+       pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
+       pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
+       pr_debug("\t\t sbw       is : %u\n",
+                       reg_gfx_index.bitfields.sh_broadcast_writes);
+
+       pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+       return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
+                                                       reg_gfx_index.u32All,
+                                                       reg_sq_cmd.u32All);
+}
+
+int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
+{
+       int status = 0;
+       unsigned int vmid;
+       union SQ_CMD_BITS reg_sq_cmd;
+       union GRBM_GFX_INDEX_BITS reg_gfx_index;
+       struct kfd_process_device *pdd;
+       struct dbg_wave_control_info wac_info;
+       int temp;
+       int first_vmid_to_scan = 8;
+       int last_vmid_to_scan = 15;
+
+       first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
+       temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
+       last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
+
+       reg_sq_cmd.u32All = 0;
+       status = 0;
+
+       wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
+       wac_info.operand = HSA_DBG_WAVEOP_KILL;
+
+       pr_debug("Killing all process wavefronts\n");
+
+       /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+        * ATC_VMID15_PASID_MAPPING
+        * to check which VMID the current process is mapped to. */
+
+       for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+               if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+                               (dev->kgd, vmid)) {
+                       if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+                                       (dev->kgd, vmid) == p->pasid) {
+                               pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
+                                               vmid, p->pasid);
+                               break;
+                       }
+               }
+       }
+
+       if (vmid > last_vmid_to_scan) {
+               pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
+               return -EFAULT;
+       }
+
+       /* taking the VMID for that process on the safe way using PDD */
+       pdd = kfd_get_process_device_data(dev, p);
+       if (!pdd)
+               return -EFAULT;
+
+       status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
+                       &reg_gfx_index);
+       if (status != 0)
+               return -EINVAL;
+
+       /* for non DIQ we need to patch the VMID: */
+       reg_sq_cmd.bits.vm_id = vmid;
+
+       dev->kfd2kgd->wave_control_execute(dev->kgd,
+                                       reg_gfx_index.u32All,
+                                       reg_sq_cmd.u32All);
+
+       return 0;
+}
+
+void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
+                       enum DBGDEV_TYPE type)
+{
+       BUG_ON(!pdbgdev || !pdev);
+
+       pdbgdev->dev = pdev;
+       pdbgdev->kq = NULL;
+       pdbgdev->type = type;
+       pdbgdev->pqm = NULL;
+
+       switch (type) {
+       case DBGDEV_TYPE_NODIQ:
+               pdbgdev->dbgdev_register = dbgdev_register_nodiq;
+               pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
+               pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
+               pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
+               break;
+       case DBGDEV_TYPE_DIQ:
+       default:
+               pdbgdev->dbgdev_register = dbgdev_register_diq;
+               pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
+               pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
+               pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
+               break;
+       }
+
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
new file mode 100644 (file)
index 0000000..4b0dd5a
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_DBGDEV_H_
+#define KFD_DBGDEV_H_
+
+enum {
+       SQ_CMD_VMID_OFFSET = 28,
+       ADDRESS_WATCH_CNTL_OFFSET = 24
+};
+
+enum {
+       PRIV_QUEUE_SYNC_TIME_MS = 200
+};
+
+/* CONTEXT reg space definition */
+enum {
+       CONTEXT_REG_BASE = 0xA000,
+       CONTEXT_REG_END = 0xA400,
+       CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE
+};
+
+/* USER CONFIG reg space definition */
+enum {
+       USERCONFIG_REG_BASE = 0xC000,
+       USERCONFIG_REG_END = 0x10000,
+       USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE
+};
+
+/* CONFIG reg space definition */
+enum {
+       CONFIG_REG_BASE = 0x2000,       /* in dwords */
+       CONFIG_REG_END = 0x2B00,
+       CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE
+};
+
+/* SH reg space definition */
+enum {
+       SH_REG_BASE = 0x2C00,
+       SH_REG_END = 0x3000,
+       SH_REG_SIZE = SH_REG_END - SH_REG_BASE
+};
+
+enum SQ_IND_CMD_CMD {
+       SQ_IND_CMD_CMD_NULL = 0x00000000,
+       SQ_IND_CMD_CMD_HALT = 0x00000001,
+       SQ_IND_CMD_CMD_RESUME = 0x00000002,
+       SQ_IND_CMD_CMD_KILL = 0x00000003,
+       SQ_IND_CMD_CMD_DEBUG = 0x00000004,
+       SQ_IND_CMD_CMD_TRAP = 0x00000005,
+};
+
+enum SQ_IND_CMD_MODE {
+       SQ_IND_CMD_MODE_SINGLE = 0x00000000,
+       SQ_IND_CMD_MODE_BROADCAST = 0x00000001,
+       SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002,
+       SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003,
+       SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004,
+};
+
+union SQ_IND_INDEX_BITS {
+       struct {
+               uint32_t wave_id:4;
+               uint32_t simd_id:2;
+               uint32_t thread_id:6;
+                uint32_t:1;
+               uint32_t force_read:1;
+               uint32_t read_timeout:1;
+               uint32_t unindexed:1;
+               uint32_t index:16;
+
+       } bitfields, bits;
+       uint32_t u32All;
+       signed int i32All;
+       float f32All;
+};
+
+union SQ_IND_CMD_BITS {
+       struct {
+               uint32_t data:32;
+       } bitfields, bits;
+       uint32_t u32All;
+       signed int i32All;
+       float f32All;
+};
+
+union SQ_CMD_BITS {
+       struct {
+               uint32_t cmd:3;
+                uint32_t:1;
+               uint32_t mode:3;
+               uint32_t check_vmid:1;
+               uint32_t trap_id:3;
+                uint32_t:5;
+               uint32_t wave_id:4;
+               uint32_t simd_id:2;
+                uint32_t:2;
+               uint32_t queue_id:3;
+                uint32_t:1;
+               uint32_t vm_id:4;
+       } bitfields, bits;
+       uint32_t u32All;
+       signed int i32All;
+       float f32All;
+};
+
+union SQ_IND_DATA_BITS {
+       struct {
+               uint32_t data:32;
+       } bitfields, bits;
+       uint32_t u32All;
+       signed int i32All;
+       float f32All;
+};
+
+union GRBM_GFX_INDEX_BITS {
+       struct {
+               uint32_t instance_index:8;
+               uint32_t sh_index:8;
+               uint32_t se_index:8;
+                uint32_t:5;
+               uint32_t sh_broadcast_writes:1;
+               uint32_t instance_broadcast_writes:1;
+               uint32_t se_broadcast_writes:1;
+       } bitfields, bits;
+       uint32_t u32All;
+       signed int i32All;
+       float f32All;
+};
+
+union TCP_WATCH_ADDR_H_BITS {
+       struct {
+               uint32_t addr:16;
+                uint32_t:16;
+
+       } bitfields, bits;
+       uint32_t u32All;
+       signed int i32All;
+       float f32All;
+};
+
+union TCP_WATCH_ADDR_L_BITS {
+       struct {
+               uint32_t:6;
+               uint32_t addr:26;
+       } bitfields, bits;
+       uint32_t u32All;
+       signed int i32All;
+       float f32All;
+};
+
+enum {
+       QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
+       QUEUESTATE__ACTIVE_COMPLETION_PENDING,
+       QUEUESTATE__ACTIVE
+};
+
+union ULARGE_INTEGER {
+       struct {
+               uint32_t low_part;
+               uint32_t high_part;
+       } u;
+       unsigned long long quad_part;
+};
+
+
+#define KFD_CIK_VMID_START_OFFSET (8)
+#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8))
+
+
+void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
+                       enum DBGDEV_TYPE type);
+
+#endif /* KFD_DBGDEV_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
new file mode 100644 (file)
index 0000000..56d6763
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include "kfd_priv.h"
+#include "cik_regs.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_diq.h"
+#include "kfd_dbgmgr.h"
+#include "kfd_dbgdev.h"
+
+static DEFINE_MUTEX(kfd_dbgmgr_mutex);
+
+struct mutex *kfd_get_dbgmgr_mutex(void)
+{
+       return &kfd_dbgmgr_mutex;
+}
+
+
+static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
+{
+       BUG_ON(!pmgr);
+
+       kfree(pmgr->dbgdev);
+
+       pmgr->dbgdev = NULL;
+       pmgr->pasid = 0;
+       pmgr->dev = NULL;
+}
+
+void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
+{
+       if (pmgr != NULL) {
+               kfd_dbgmgr_uninitialize(pmgr);
+               kfree(pmgr);
+       }
+}
+
+bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
+{
+       enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
+       struct kfd_dbgmgr *new_buff;
+
+       BUG_ON(pdev == NULL);
+       BUG_ON(!pdev->init_complete);
+
+       new_buff = kfd_alloc_struct(new_buff);
+       if (!new_buff) {
+               pr_err("amdkfd: Failed to allocate dbgmgr instance\n");
+               return false;
+       }
+
+       new_buff->pasid = 0;
+       new_buff->dev = pdev;
+       new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
+       if (!new_buff->dbgdev) {
+               pr_err("amdkfd: Failed to allocate dbgdev instance\n");
+               kfree(new_buff);
+               return false;
+       }
+
+       /* get actual type of DBGDevice cpsch or not */
+       if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
+               type = DBGDEV_TYPE_NODIQ;
+
+       kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
+       *ppmgr = new_buff;
+
+       return true;
+}
+
+long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+{
+       BUG_ON(!p || !pmgr || !pmgr->dbgdev);
+
+       if (pmgr->pasid != 0) {
+               pr_debug("H/W debugger is already active using pasid %d\n",
+                               pmgr->pasid);
+               return -EBUSY;
+       }
+
+       /* remember pasid */
+       pmgr->pasid = p->pasid;
+
+       /* provide the pqm for diq generation */
+       pmgr->dbgdev->pqm = &p->pqm;
+
+       /* activate the actual registering */
+       pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
+
+       return 0;
+}
+
+long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+{
+       BUG_ON(!p || !pmgr || !pmgr->dbgdev);
+
+       /* Is the requests coming from the already registered process? */
+       if (pmgr->pasid != p->pasid) {
+               pr_debug("H/W debugger is not registered by calling pasid %d\n",
+                               p->pasid);
+               return -EINVAL;
+       }
+
+       pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
+
+       pmgr->pasid = 0;
+
+       return 0;
+}
+
+long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
+                               struct dbg_wave_control_info *wac_info)
+{
+       BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info);
+
+       /* Is the requests coming from the already registered process? */
+       if (pmgr->pasid != wac_info->process->pasid) {
+               pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+                               wac_info->process->pasid);
+               return -EINVAL;
+       }
+
+       return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info);
+}
+
+long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
+                               struct dbg_address_watch_info *adw_info)
+{
+       BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info);
+
+
+       /* Is the requests coming from the already registered process? */
+       if (pmgr->pasid != adw_info->process->pasid) {
+               pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+                               adw_info->process->pasid);
+               return -EINVAL;
+       }
+
+       return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev,
+                                                       adw_info);
+}
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
new file mode 100644 (file)
index 0000000..257a745
--- /dev/null
@@ -0,0 +1,294 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_DBGMGR_H_
+#define KFD_DBGMGR_H_
+
+#include "kfd_priv.h"
+
+/* must align with hsakmttypes definition */
+#pragma pack(push, 4)
+
+enum HSA_DBG_WAVEOP {
+       HSA_DBG_WAVEOP_HALT = 1,        /* Halts a wavefront            */
+       HSA_DBG_WAVEOP_RESUME = 2,      /* Resumes a wavefront          */
+       HSA_DBG_WAVEOP_KILL = 3,        /* Kills a wavefront            */
+       HSA_DBG_WAVEOP_DEBUG = 4,       /* Causes wavefront to enter
+                                               debug mode              */
+       HSA_DBG_WAVEOP_TRAP = 5,        /* Causes wavefront to take
+                                               a trap                  */
+       HSA_DBG_NUM_WAVEOP = 5,
+       HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WAVEMODE {
+       /* send command to a single wave */
+       HSA_DBG_WAVEMODE_SINGLE = 0,
+       /*
+        * Broadcast to all wavefronts of all processes is not
+        * supported for HSA user mode
+        */
+
+       /* send to waves within current process */
+       HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2,
+       /* send to waves within current process on CU  */
+       HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,
+       HSA_DBG_NUM_WAVEMODE = 3,
+       HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WAVEMSG_TYPE {
+       HSA_DBG_WAVEMSG_AUTO = 0,
+       HSA_DBG_WAVEMSG_USER = 1,
+       HSA_DBG_WAVEMSG_ERROR = 2,
+       HSA_DBG_NUM_WAVEMSG,
+       HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WATCH_MODE {
+       HSA_DBG_WATCH_READ = 0,         /* Read operations only */
+       HSA_DBG_WATCH_NONREAD = 1,      /* Write or Atomic operations only */
+       HSA_DBG_WATCH_ATOMIC = 2,       /* Atomic Operations only */
+       HSA_DBG_WATCH_ALL = 3,          /* Read, Write or Atomic operations */
+       HSA_DBG_WATCH_NUM,
+       HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
+};
+
+/* This structure is hardware specific and may change in the future */
+struct HsaDbgWaveMsgAMDGen2 {
+       union {
+               struct ui32 {
+                       uint32_t UserData:8;    /* user data */
+                       uint32_t ShaderArray:1; /* Shader array */
+                       uint32_t Priv:1;        /* Privileged */
+                       uint32_t Reserved0:4;   /* This field is reserved,
+                                                  should be 0 */
+                       uint32_t WaveId:4;      /* wave id */
+                       uint32_t SIMD:2;        /* SIMD id */
+                       uint32_t HSACU:4;       /* Compute unit */
+                       uint32_t ShaderEngine:2;/* Shader engine */
+                       uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
+                       uint32_t Reserved1:4;   /* This field is reserved,
+                                                  should be 0 */
+               } ui32;
+               uint32_t Value;
+       };
+       uint32_t Reserved2;
+};
+
+union HsaDbgWaveMessageAMD {
+       struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
+       /* for future HsaDbgWaveMsgAMDGen3; */
+};
+
+struct HsaDbgWaveMessage {
+       void *MemoryVA;         /* ptr to associated host-accessible data */
+       union HsaDbgWaveMessageAMD DbgWaveMsg;
+};
+
+/*
+ * TODO: This definitions to be MOVED to kfd_event, once it is implemented.
+ *
+ * HSA sync primitive, Event and HW Exception notification API definitions.
+ * The API functions allow the runtime to define a so-called sync-primitive,
+ * a SW object combining a user-mode provided "syncvar" and a scheduler event
+ * that can be signaled through a defined GPU interrupt. A syncvar is
+ * a process virtual memory location of a certain size that can be accessed
+ * by CPU and GPU shader code within the process to set and query the content
+ * within that memory. The definition of the content is determined by the HSA
+ * runtime and potentially GPU shader code interfacing with the HSA runtime.
+ * The syncvar values may be commonly written through an PM4 WRITE_DATA packet
+ * in the user mode instruction stream. The OS scheduler event is typically
+ * associated and signaled by an interrupt issued by the GPU, but other HSA
+ * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
+ * by the KFD by this mechanism, too. */
+
+/* these are the new definitions for events */
+enum HSA_EVENTTYPE {
+       HSA_EVENTTYPE_SIGNAL = 0,       /* user-mode generated GPU signal */
+       HSA_EVENTTYPE_NODECHANGE = 1,   /* HSA node change (attach/detach) */
+       HSA_EVENTTYPE_DEVICESTATECHANGE = 2,    /* HSA device state change
+                                                  (start/stop) */
+       HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
+       HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
+       HSA_EVENTTYPE_DEBUG_EVENT = 5,  /* GPU signal for debugging */
+       HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
+       HSA_EVENTTYPE_QUEUE_EVENT = 7,  /* GPU signal queue idle state
+                                          (EOP pm4) */
+       /* ...  */
+       HSA_EVENTTYPE_MAXID,
+       HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
+};
+
+/* Sub-definitions for various event types: Syncvar */
+struct HsaSyncVar {
+       union SyncVar {
+               void *UserData; /* pointer to user mode data */
+               uint64_t UserDataPtrValue; /* 64bit compatibility of value */
+       } SyncVar;
+       uint64_t SyncVarSize;
+};
+
+/* Sub-definitions for various event types: NodeChange */
+
+enum HSA_EVENTTYPE_NODECHANGE_FLAGS {
+       HSA_EVENTTYPE_NODECHANGE_ADD = 0,
+       HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
+       HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
+};
+
+struct HsaNodeChange {
+       /* HSA node added/removed on the platform */
+       enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;
+};
+
+/* Sub-definitions for various event types: DeviceStateChange */
+enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
+       /* device started (and available) */
+       HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0,
+       /* device stopped (i.e. unavailable) */
+       HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1,
+       HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
+};
+
+enum HSA_DEVICE {
+       HSA_DEVICE_CPU = 0,
+       HSA_DEVICE_GPU = 1,
+       MAX_HSA_DEVICE = 2
+};
+
+struct HsaDeviceStateChange {
+       uint32_t NodeId;        /* F-NUMA node that contains the device */
+       enum HSA_DEVICE Device; /* device type: GPU or CPU */
+       enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
+};
+
+struct HsaEventData {
+       enum HSA_EVENTTYPE EventType; /* event type */
+       union EventData {
+               /*
+                * return data associated with HSA_EVENTTYPE_SIGNAL
+                * and other events
+                */
+               struct HsaSyncVar SyncVar;
+
+               /* data associated with HSA_EVENTTYPE_NODE_CHANGE */
+               struct HsaNodeChange NodeChangeState;
+
+               /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */
+               struct HsaDeviceStateChange DeviceState;
+       } EventData;
+
+       /* the following data entries are internal to the KFD & thunk itself */
+
+       /* internal thunk store for Event data (OsEventHandle) */
+       uint64_t HWData1;
+       /* internal thunk store for Event data (HWAddress) */
+       uint64_t HWData2;
+       /* internal thunk store for Event data (HWData) */
+       uint32_t HWData3;
+};
+
+struct HsaEventDescriptor {
+       /* event type to allocate */
+       enum HSA_EVENTTYPE EventType;
+       /* H-NUMA node containing GPU device that is event source */
+       uint32_t NodeId;
+       /* pointer to user mode syncvar data, syncvar->UserDataPtrValue
+        * may be NULL
+        */
+       struct HsaSyncVar SyncVar;
+};
+
+struct HsaEvent {
+       uint32_t EventId;
+       struct HsaEventData EventData;
+};
+
+#pragma pack(pop)
+
+enum DBGDEV_TYPE {
+       DBGDEV_TYPE_ILLEGAL = 0,
+       DBGDEV_TYPE_NODIQ = 1,
+       DBGDEV_TYPE_DIQ = 2,
+       DBGDEV_TYPE_TEST = 3
+};
+
+struct dbg_address_watch_info {
+       struct kfd_process *process;
+       enum HSA_DBG_WATCH_MODE *watch_mode;
+       uint64_t *watch_address;
+       uint64_t *watch_mask;
+       struct HsaEvent *watch_event;
+       uint32_t num_watch_points;
+};
+
+struct dbg_wave_control_info {
+       struct kfd_process *process;
+       uint32_t trapId;
+       enum HSA_DBG_WAVEOP operand;
+       enum HSA_DBG_WAVEMODE mode;
+       struct HsaDbgWaveMessage dbgWave_msg;
+};
+
+struct kfd_dbgdev {
+
+       /* The device that owns this data. */
+       struct kfd_dev *dev;
+
+       /* kernel queue for DIQ */
+       struct kernel_queue *kq;
+
+       /* a pointer to the pqm of the calling process */
+       struct process_queue_manager *pqm;
+
+       /* type of debug device ( DIQ, non DIQ, etc. ) */
+       enum DBGDEV_TYPE type;
+
+       /* virtualized function pointers to device dbg */
+       int (*dbgdev_register)(struct kfd_dbgdev *dbgdev);
+       int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev);
+       int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev,
+                               struct dbg_address_watch_info *adw_info);
+       int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev,
+                               struct dbg_wave_control_info *wac_info);
+
+};
+
+struct kfd_dbgmgr {
+       unsigned int pasid;
+       struct kfd_dev *dev;
+       struct kfd_dbgdev *dbgdev;
+};
+
+/* prototypes for debug manager functions */
+struct mutex *kfd_get_dbgmgr_mutex(void);
+void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr);
+bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev);
+long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
+                               struct dbg_wave_control_info *wac_info);
+long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
+                       struct dbg_address_watch_info *adw_info);
+#endif /* KFD_DBGMGR_H_ */
index 52cab0f53ebc6e03c194208d65c10abd39754d2e..1d1e2e952a79b145d3f9855dd5a063d83d52cc64 100644 (file)
 static const struct kfd_device_info kaveri_device_info = {
        .asic_family = CHIP_KAVERI,
        .max_pasid_bits = 16,
+       /* max num of queues for KV.TODO should be a dynamic value */
+       .max_no_of_hqd  = 24,
        .ih_ring_entry_size = 4 * sizeof(uint32_t),
        .event_interrupt_class = &event_interrupt_class_cik,
+       .num_of_watch_points = 4,
        .mqd_size_aligned = MQD_SIZE_ALIGNED
 };
 
@@ -294,6 +297,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                goto dqm_start_error;
        }
 
+       kfd->dbgmgr = NULL;
+
        kfd->init_complete = true;
        dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
                 kfd->pdev->device);
index 4e215bd4d41f0ae872c2568f1746b9c9ec68e6c8..547b0a589693615b6da7ae7d5ea87435ee71313f 100644 (file)
@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
 
 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+                               bool preempt_static_queues, bool lock);
 
 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
                                        struct queue *q,
@@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
 
        BUG_ON(!dqm);
 
-       destroy_queues_cpsch(dqm, true);
+       destroy_queues_cpsch(dqm, true, true);
 
        list_for_each_entry(node, &dqm->queues, list) {
                pdd = qpd_to_pdd(node->qpd);
@@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
        pr_debug("kfd: In %s\n", __func__);
 
        mutex_lock(&dqm->lock);
-       destroy_queues_cpsch(dqm, false);
+       /* here we actually preempt the DIQ */
+       destroy_queues_cpsch(dqm, true, false);
        list_del(&kq->list);
        dqm->queue_count--;
        qpd->is_debug = false;
@@ -913,7 +915,7 @@ out:
        return retval;
 }
 
-static int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
+int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
                                unsigned int fence_value,
                                unsigned long timeout)
 {
@@ -935,13 +937,16 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm,
                                unsigned int sdma_engine)
 {
        return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
-                       KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+                       KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
                        sdma_engine);
 }
 
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+                               bool preempt_static_queues, bool lock)
 {
        int retval;
+       enum kfd_preempt_type_filter preempt_type;
+       struct kfd_process *p;
 
        BUG_ON(!dqm);
 
@@ -960,8 +965,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
                destroy_sdma_queues(dqm, 1);
        }
 
+       preempt_type = preempt_static_queues ?
+                       KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
+                       KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
+
        retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
-                       KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
+                       preempt_type, 0, false, 0);
        if (retval != 0)
                goto out;
 
@@ -969,8 +978,13 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
        pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
                                KFD_FENCE_COMPLETED);
        /* should be timed out */
-       amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
+       retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
                                QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+       if (retval != 0) {
+               p = kfd_get_process(current);
+               p->reset_wavefronts = true;
+               goto out;
+       }
        pm_release_ib(&dqm->packets);
        dqm->active_runlist = false;
 
@@ -989,7 +1003,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
        if (lock)
                mutex_lock(&dqm->lock);
 
-       retval = destroy_queues_cpsch(dqm, false);
+       retval = destroy_queues_cpsch(dqm, false, false);
        if (retval != 0) {
                pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
                goto out;
@@ -1024,13 +1038,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 {
        int retval;
        struct mqd_manager *mqd;
+       bool preempt_all_queues;
 
        BUG_ON(!dqm || !qpd || !q);
 
+       preempt_all_queues = false;
+
        retval = 0;
 
        /* remove queue from list to prevent rescheduling after preemption */
        mutex_lock(&dqm->lock);
+
+       if (qpd->is_debug) {
+               /*
+                * error, currently we do not allow to destroy a queue
+                * of a currently debugged process
+                */
+               retval = -EBUSY;
+               goto failed_try_destroy_debugged_queue;
+
+       }
+
        mqd = dqm->ops.get_mqd_manager(dqm,
                        get_mqd_type_from_queue_type(q->properties.type));
        if (!mqd) {
@@ -1062,6 +1090,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
        return 0;
 
 failed:
+failed_try_destroy_debugged_queue:
+
        mutex_unlock(&dqm->lock);
        return retval;
 }
index 57278e2d72e0ae6b76c4fc519144016ee024dcdc..ec4036a09f3e78f7343a4ba87d88dc1aa0d71b21 100644 (file)
@@ -88,9 +88,11 @@ struct device_queue_manager_ops {
                                struct queue *q,
                                struct qcm_process_device *qpd,
                                int *allocate_vmid);
+
        int     (*destroy_queue)(struct device_queue_manager *dqm,
                                struct qcm_process_device *qpd,
                                struct queue *q);
+
        int     (*update_queue)(struct device_queue_manager *dqm,
                                struct queue *q);
 
@@ -100,8 +102,10 @@ struct device_queue_manager_ops {
 
        int     (*register_process)(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
+
        int     (*unregister_process)(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
+
        int     (*initialize)(struct device_queue_manager *dqm);
        int     (*start)(struct device_queue_manager *dqm);
        int     (*stop)(struct device_queue_manager *dqm);
@@ -109,9 +113,11 @@ struct device_queue_manager_ops {
        int     (*create_kernel_queue)(struct device_queue_manager *dqm,
                                        struct kernel_queue *kq,
                                        struct qcm_process_device *qpd);
+
        void    (*destroy_kernel_queue)(struct device_queue_manager *dqm,
                                        struct kernel_queue *kq,
                                        struct qcm_process_device *qpd);
+
        bool    (*set_cache_memory_policy)(struct device_queue_manager *dqm,
                                           struct qcm_process_device *qpd,
                                           enum cache_policy default_policy,
index e2533d875f4311e2c6fa426b4ea606c981b1675b..99b6d28a11c3e9030c734a58170ec4bb35bac56f 100644 (file)
@@ -163,7 +163,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
        num_queues = 0;
        list_for_each_entry(cur, &qpd->queues_list, list)
                num_queues++;
-       packet->bitfields10.num_queues = num_queues;
+       packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues;
 
        packet->sh_mem_config = qpd->sh_mem_config;
        packet->sh_mem_bases = qpd->sh_mem_bases;
@@ -177,9 +177,10 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
 }
 
 static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
-                               struct queue *q)
+                               struct queue *q, bool is_static)
 {
        struct pm4_map_queues *packet;
+       bool use_static = is_static;
 
        BUG_ON(!pm || !buffer || !q);
 
@@ -209,6 +210,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
        case KFD_QUEUE_TYPE_SDMA:
                packet->bitfields2.engine_sel =
                                engine_sel__mes_map_queues__sdma0;
+               use_static = false; /* no static queues under SDMA */
                break;
        default:
                BUG();
@@ -218,6 +220,9 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
        packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
                        q->properties.doorbell_off;
 
+       packet->mes_map_queues_ordinals[0].bitfields3.is_static =
+                       (use_static == true) ? 1 : 0;
+
        packet->mes_map_queues_ordinals[0].mqd_addr_lo =
                        lower_32_bits(q->gart_mqd_addr);
 
@@ -271,9 +276,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
                        pm_release_ib(pm);
                        return -ENOMEM;
                }
+
                retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
                if (retval != 0)
                        return retval;
+
                proccesses_mapped++;
                inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
                                alloc_size_bytes);
@@ -281,23 +288,36 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
                list_for_each_entry(kq, &qpd->priv_queue_list, list) {
                        if (kq->queue->properties.is_active != true)
                                continue;
+
+                       pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
+                               kq->queue->queue, qpd->is_debug);
+
                        retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
-                                                       kq->queue);
+                                               kq->queue, qpd->is_debug);
                        if (retval != 0)
                                return retval;
-                       inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
-                                       alloc_size_bytes);
+
+                       inc_wptr(&rl_wptr,
+                               sizeof(struct pm4_map_queues),
+                               alloc_size_bytes);
                }
 
                list_for_each_entry(q, &qpd->queues_list, list) {
                        if (q->properties.is_active != true)
                                continue;
-                       retval = pm_create_map_queue(pm,
-                                               &rl_buffer[rl_wptr], q);
+
+                       pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
+                               q->queue, qpd->is_debug);
+
+                       retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
+                                               q,  qpd->is_debug);
+
                        if (retval != 0)
                                return retval;
-                       inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
-                                       alloc_size_bytes);
+
+                       inc_wptr(&rl_wptr,
+                               sizeof(struct pm4_map_queues),
+                               alloc_size_bytes);
                }
        }
 
@@ -488,7 +508,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 
        packet = (struct pm4_unmap_queues *)buffer;
        memset(buffer, 0, sizeof(struct pm4_unmap_queues));
-
+       pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
+               mode, reset, type);
        packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
                                        sizeof(struct pm4_unmap_queues));
        switch (type) {
@@ -529,6 +550,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
                packet->bitfields2.queue_sel =
                                queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
                break;
+       case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
+               /* in this case, we do not preempt static queues */
+               packet->bitfields2.queue_sel =
+                               queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only;
+               break;
        default:
                BUG();
                break;
index 071ad5724bd2cd12d42963fe23a7a9ad7cda24c3..5b393f3e34a9f0a0082c1eb6edd8370cb638e223 100644 (file)
@@ -237,7 +237,8 @@ struct pm4_map_queues {
        struct {
                union {
                        struct {
-                               uint32_t reserved5:2;
+                               uint32_t is_static:1;
+                               uint32_t reserved5:1;
                                uint32_t doorbell_offset:21;
                                uint32_t reserved6:3;
                                uint32_t queue:6;
@@ -328,7 +329,8 @@ enum unmap_queues_action_enum {
 enum unmap_queues_queue_sel_enum {
        queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
        queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
-       queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2
+       queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2,
+       queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3
 };
 
 enum unmap_queues_engine_sel_enum {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
new file mode 100644 (file)
index 0000000..a0ff348
--- /dev/null
@@ -0,0 +1,290 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_PM4_HEADERS_DIQ_H_
+#define KFD_PM4_HEADERS_DIQ_H_
+
+/*--------------------_INDIRECT_BUFFER-------------------- */
+
+#ifndef _PM4__INDIRECT_BUFFER_DEFINED
+#define _PM4__INDIRECT_BUFFER_DEFINED
+enum _INDIRECT_BUFFER_cache_policy_enum {
+       cache_policy___indirect_buffer__lru = 0,
+       cache_policy___indirect_buffer__stream = 1,
+       cache_policy___indirect_buffer__bypass = 2
+};
+
+enum {
+       IT_INDIRECT_BUFFER_PASID = 0x5C
+};
+
+struct pm4__indirect_buffer_pasid {
+       union {
+               union PM4_MES_TYPE_3_HEADER header;     /* header */
+               unsigned int ordinal1;
+       };
+
+       union {
+               struct {
+                       unsigned int reserved1:2;
+                       unsigned int ib_base_lo:30;
+               } bitfields2;
+               unsigned int ordinal2;
+       };
+
+       union {
+               struct {
+                       unsigned int ib_base_hi:16;
+                       unsigned int reserved2:16;
+               } bitfields3;
+               unsigned int ordinal3;
+       };
+
+       union {
+               unsigned int control;
+               unsigned int ordinal4;
+       };
+
+       union {
+               struct {
+                       unsigned int pasid:10;
+                       unsigned int reserved4:22;
+               } bitfields5;
+               unsigned int ordinal5;
+       };
+
+};
+
+#endif
+
+/*--------------------_RELEASE_MEM-------------------- */
+
+#ifndef _PM4__RELEASE_MEM_DEFINED
+#define _PM4__RELEASE_MEM_DEFINED
+enum _RELEASE_MEM_event_index_enum {
+       event_index___release_mem__end_of_pipe = 5,
+       event_index___release_mem__shader_done = 6
+};
+
+enum _RELEASE_MEM_cache_policy_enum {
+       cache_policy___release_mem__lru = 0,
+       cache_policy___release_mem__stream = 1,
+       cache_policy___release_mem__bypass = 2
+};
+
+enum _RELEASE_MEM_dst_sel_enum {
+       dst_sel___release_mem__memory_controller = 0,
+       dst_sel___release_mem__tc_l2 = 1,
+       dst_sel___release_mem__queue_write_pointer_register = 2,
+       dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum _RELEASE_MEM_int_sel_enum {
+       int_sel___release_mem__none = 0,
+       int_sel___release_mem__send_interrupt_only = 1,
+       int_sel___release_mem__send_interrupt_after_write_confirm = 2,
+       int_sel___release_mem__send_data_after_write_confirm = 3
+};
+
+enum _RELEASE_MEM_data_sel_enum {
+       data_sel___release_mem__none = 0,
+       data_sel___release_mem__send_32_bit_low = 1,
+       data_sel___release_mem__send_64_bit_data = 2,
+       data_sel___release_mem__send_gpu_clock_counter = 3,
+       data_sel___release_mem__send_cp_perfcounter_hi_lo = 4,
+       data_sel___release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4__release_mem {
+       union {
+               union PM4_MES_TYPE_3_HEADER header;     /*header */
+               unsigned int ordinal1;
+       };
+
+       union {
+               struct {
+                       unsigned int event_type:6;
+                       unsigned int reserved1:2;
+                       enum _RELEASE_MEM_event_index_enum event_index:4;
+                       unsigned int tcl1_vol_action_ena:1;
+                       unsigned int tc_vol_action_ena:1;
+                       unsigned int reserved2:1;
+                       unsigned int tc_wb_action_ena:1;
+                       unsigned int tcl1_action_ena:1;
+                       unsigned int tc_action_ena:1;
+                       unsigned int reserved3:6;
+                       unsigned int atc:1;
+                       enum _RELEASE_MEM_cache_policy_enum cache_policy:2;
+                       unsigned int reserved4:5;
+               } bitfields2;
+               unsigned int ordinal2;
+       };
+
+       union {
+               struct {
+                       unsigned int reserved5:16;
+                       enum _RELEASE_MEM_dst_sel_enum dst_sel:2;
+                       unsigned int reserved6:6;
+                       enum _RELEASE_MEM_int_sel_enum int_sel:3;
+                       unsigned int reserved7:2;
+                       enum _RELEASE_MEM_data_sel_enum data_sel:3;
+               } bitfields3;
+               unsigned int ordinal3;
+       };
+
+       union {
+               struct {
+                       unsigned int reserved8:2;
+                       unsigned int address_lo_32b:30;
+               } bitfields4;
+               struct {
+                       unsigned int reserved9:3;
+                       unsigned int address_lo_64b:29;
+               } bitfields5;
+               unsigned int ordinal4;
+       };
+
+       unsigned int address_hi;
+
+       unsigned int data_lo;
+
+       unsigned int data_hi;
+
+};
+#endif
+
+
+/*--------------------_SET_CONFIG_REG-------------------- */
+
+#ifndef _PM4__SET_CONFIG_REG_DEFINED
+#define _PM4__SET_CONFIG_REG_DEFINED
+
+struct pm4__set_config_reg {
+       union {
+               union PM4_MES_TYPE_3_HEADER header;     /*header */
+               unsigned int ordinal1;
+       };
+
+       union {
+               struct {
+                       unsigned int reg_offset:16;
+                       unsigned int reserved1:7;
+                       unsigned int vmid_shift:5;
+                       unsigned int insert_vmid:1;
+                       unsigned int reserved2:3;
+               } bitfields2;
+               unsigned int ordinal2;
+       };
+
+       unsigned int reg_data[1];       /*1..N of these fields */
+
+};
+#endif
+
+/*--------------------_WAIT_REG_MEM-------------------- */
+
+#ifndef _PM4__WAIT_REG_MEM_DEFINED
+#define _PM4__WAIT_REG_MEM_DEFINED
+enum _WAIT_REG_MEM_function_enum {
+       function___wait_reg_mem__always_pass = 0,
+       function___wait_reg_mem__less_than_ref_value = 1,
+       function___wait_reg_mem__less_than_equal_to_the_ref_value = 2,
+       function___wait_reg_mem__equal_to_the_reference_value = 3,
+       function___wait_reg_mem__not_equal_reference_value = 4,
+       function___wait_reg_mem__greater_than_or_equal_reference_value = 5,
+       function___wait_reg_mem__greater_than_reference_value = 6,
+       function___wait_reg_mem__reserved = 7
+};
+
+enum _WAIT_REG_MEM_mem_space_enum {
+       mem_space___wait_reg_mem__register_space = 0,
+       mem_space___wait_reg_mem__memory_space = 1
+};
+
+enum _WAIT_REG_MEM_operation_enum {
+       operation___wait_reg_mem__wait_reg_mem = 0,
+       operation___wait_reg_mem__wr_wait_wr_reg = 1
+};
+
+struct pm4__wait_reg_mem {
+       union {
+               union PM4_MES_TYPE_3_HEADER header;     /*header */
+               unsigned int ordinal1;
+       };
+
+       union {
+               struct {
+                       enum _WAIT_REG_MEM_function_enum function:3;
+                       unsigned int reserved1:1;
+                       enum _WAIT_REG_MEM_mem_space_enum mem_space:2;
+                       enum _WAIT_REG_MEM_operation_enum operation:2;
+                       unsigned int reserved2:24;
+               } bitfields2;
+               unsigned int ordinal2;
+       };
+
+       union {
+               struct {
+                       unsigned int reserved3:2;
+                       unsigned int memory_poll_addr_lo:30;
+               } bitfields3;
+               struct {
+                       unsigned int register_poll_addr:16;
+                       unsigned int reserved4:16;
+               } bitfields4;
+               struct {
+                       unsigned int register_write_addr:16;
+                       unsigned int reserved5:16;
+               } bitfields5;
+               unsigned int ordinal3;
+       };
+
+       union {
+               struct {
+                       unsigned int poll_address_hi:16;
+                       unsigned int reserved6:16;
+               } bitfields6;
+               struct {
+                       unsigned int register_write_addr:16;
+                       unsigned int reserved7:16;
+               } bitfields7;
+               unsigned int ordinal4;
+       };
+
+       unsigned int reference;
+
+       unsigned int mask;
+
+       union {
+               struct {
+                       unsigned int poll_interval:16;
+                       unsigned int reserved8:16;
+               } bitfields8;
+               unsigned int ordinal7;
+       };
+
+};
+#endif
+
+
+#endif /* KFD_PM4_HEADERS_DIQ_H_ */
index b6f838f56589664297232a4e2700a0c39cca5bcc..cb79046e5c8007050ca1eab89f0754cb7cce7acf 100644 (file)
@@ -128,6 +128,7 @@ struct kfd_device_info {
        unsigned int asic_family;
        const struct kfd_event_interrupt_class *event_interrupt_class;
        unsigned int max_pasid_bits;
+       unsigned int max_no_of_hqd;
        size_t ih_ring_entry_size;
        uint8_t num_of_watch_points;
        uint16_t mqd_size_aligned;
@@ -167,8 +168,8 @@ struct kfd_dev {
 
        const struct kfd2kgd_calls *kfd2kgd;
        struct mutex doorbell_mutex;
-       unsigned long doorbell_available_index[DIV_ROUND_UP(
-               KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
+       DECLARE_BITMAP(doorbell_available_index,
+                       KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 
        void *gtt_mem;
        uint64_t gtt_start_gpu_addr;
@@ -195,6 +196,9 @@ struct kfd_dev {
         * from the HW ring into a SW ring.
         */
        bool interrupts_active;
+
+       /* Debug manager */
+       struct kfd_dbgmgr           *dbgmgr;
 };
 
 /* KGD2KFD callbacks */
@@ -231,6 +235,7 @@ struct device *kfd_chardev(void);
 enum kfd_preempt_type_filter {
        KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
        KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
+       KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES,
        KFD_PREEMPT_TYPE_FILTER_BY_PASID
 };
 
@@ -503,8 +508,6 @@ struct kfd_process {
        /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
        struct kfd_queue **queues;
 
-       unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
-
        /*Is the user space process 32 bit?*/
        bool is_32bit_user_mode;
 
@@ -516,6 +519,11 @@ struct kfd_process {
                                                                event_pages */
        u32 next_nonsignal_event_id;
        size_t signal_event_count;
+       /*
+        * This flag tells if we should reset all wavefronts on
+        * process termination
+        */
+       bool reset_wavefronts;
 };
 
 /**
@@ -650,6 +658,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
                        struct queue_properties *p);
+struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
+                                               unsigned int qid);
+
+int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
+                               unsigned int fence_value,
+                               unsigned long timeout);
 
 /* Packet Manager */
 
@@ -717,4 +731,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
                     uint64_t *event_page_offset, uint32_t *event_slot_index);
 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 
+int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
+
 #endif
index dc910af2bb3c45aa6e344a86e16d9ba4ae389713..56b904f5bdb19352cb0c7d15fa26669e3e4a97aa 100644 (file)
@@ -31,6 +31,7 @@
 struct mm_struct;
 
 #include "kfd_priv.h"
+#include "kfd_dbgmgr.h"
 
 /*
  * Initial size for the array of queues.
@@ -172,6 +173,9 @@ static void kfd_process_wq_release(struct work_struct *work)
                pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
                                pdd->dev->id, p->pasid);
 
+               if (p->reset_wavefronts)
+                       dbgdev_wave_reset_wavefronts(pdd->dev, p);
+
                amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
                list_del(&pdd->per_device_list);
 
@@ -301,6 +305,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
        if (kfd_init_apertures(process) != 0)
                goto err_init_apretures;
 
+       process->reset_wavefronts = false;
+
        return process;
 
 err_init_apretures:
@@ -399,7 +405,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
 
        mutex_lock(&p->mutex);
 
+       if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
+               kfd_dbgmgr_destroy(dev->dbgmgr);
+
        pqm_uninit(&p->pqm);
+       if (p->reset_wavefronts)
+               dbgdev_wave_reset_wavefronts(dev, p);
 
        pdd = kfd_get_process_device_data(dev, p);
 
index 530b82c4e78b1eca87d6ebfe72e7ecf763fc8a8c..7b69070f7ecc5eb58833f0ff828f7a934f7bd1d6 100644 (file)
@@ -158,6 +158,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
        struct queue *q;
        struct process_queue_node *pqn;
        struct kernel_queue *kq;
+       int num_queues = 0;
+       struct queue *cur;
 
        BUG_ON(!pqm || !dev || !properties || !qid);
 
@@ -172,6 +174,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
                return -1;
        }
 
+       /*
+        * for debug process, verify that it is within the static queues limit
+        * currently limit is set to half of the total avail HQD slots
+        * If we are just about to create DIQ, the is_debug flag is not set yet
+        * Hence we also check the type as well
+        */
+       if ((pdd->qpd.is_debug) ||
+               (type == KFD_QUEUE_TYPE_DIQ)) {
+               list_for_each_entry(cur, &pdd->qpd.queues_list, list)
+                       num_queues++;
+               if (num_queues >= dev->device_info->max_no_of_hqd/2)
+                       return (-ENOSPC);
+       }
+
        retval = find_available_queue_slot(pqm, qid);
        if (retval != 0)
                return retval;
@@ -341,7 +357,7 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
        return 0;
 }
 
-static __attribute__((unused)) struct kernel_queue *pqm_get_kernel_queue(
+struct kernel_queue *pqm_get_kernel_queue(
                                        struct process_queue_manager *pqm,
                                        unsigned int qid)
 {
index 4ea21ae88b07f81babb01725ae8545e94ec131c3..9080daa116b60031e156f0d10e200d2017c2950e 100644 (file)
@@ -163,6 +163,27 @@ struct kfd2kgd_calls {
        int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd,
                                unsigned int timeout);
 
+       int (*address_watch_disable)(struct kgd_dev *kgd);
+       int (*address_watch_execute)(struct kgd_dev *kgd,
+                                       unsigned int watch_point_id,
+                                       uint32_t cntl_val,
+                                       uint32_t addr_hi,
+                                       uint32_t addr_lo);
+       int (*wave_control_execute)(struct kgd_dev *kgd,
+                                       uint32_t gfx_index_val,
+                                       uint32_t sq_cmd);
+       uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd,
+                                       unsigned int watch_point_id,
+                                       unsigned int reg_offset);
+       bool (*get_atc_vmid_pasid_mapping_valid)(
+                                       struct kgd_dev *kgd,
+                                       uint8_t vmid);
+       uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
+                                       struct kgd_dev *kgd,
+                                       uint8_t vmid);
+       void (*write_vmid_invalidate_request)(struct kgd_dev *kgd,
+                                       uint8_t vmid);
+
        uint16_t (*get_fw_version)(struct kgd_dev *kgd,
                                enum kgd_engine_type type);
 };
index 1134526286c819c87bc523a1b8852dc485804046..3427b115e2bb895e0e54a7d21c16efad41210d73 100644 (file)
@@ -825,7 +825,7 @@ static u64 drm_mm_dump_hole(struct seq_file *m, struct drm_mm_node *entry)
                hole_start = drm_mm_hole_node_start(entry);
                hole_end = drm_mm_hole_node_end(entry);
                hole_size = hole_end - hole_start;
-               seq_printf(m, "%#llx-%#llx: %llu: free\n", hole_start,
+               seq_printf(m, "%#018llx-%#018llx: %llu: free\n", hole_start,
                           hole_end, hole_size);
                return hole_size;
        }
@@ -846,7 +846,7 @@ int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm)
        total_free += drm_mm_dump_hole(m, &mm->head_node);
 
        drm_mm_for_each_node(entry, mm) {
-               seq_printf(m, "%#016llx-%#016llx: %llu: used\n", entry->start,
+               seq_printf(m, "%#018llx-%#018llx: %llu: used\n", entry->start,
                           entry->start + entry->size, entry->size);
                total_used += entry->size;
                total_free += drm_mm_dump_hole(m, entry);
index b728523e194f7581b8e9740b7ddb13e6f5606d56..2aaa3c88999e32c0f951312b48aedded13f837e0 100644 (file)
@@ -438,7 +438,7 @@ static int adv7511_irq_process(struct adv7511 *adv7511)
        regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
        regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
 
-       if (irq0 & ADV7511_INT0_HDP)
+       if (irq0 & ADV7511_INT0_HDP && adv7511->encoder)
                drm_helper_hpd_irq_event(adv7511->encoder->dev);
 
        if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
index 011e1cac3e4ce0b17e1d3fa2a5bd31a1ad38b3ff..fe1599d75f14e39b2a39364b78d088d4715c368f 100644 (file)
@@ -644,7 +644,8 @@ tda998x_write_avi(struct tda998x_priv *priv, struct drm_display_mode *mode)
 
        len = hdmi_avi_infoframe_pack(&frame, buf, sizeof(buf));
        if (len < 0) {
-               dev_err(&priv->hdmi->dev, "hdmi_avi_infoframe_pack() failed: %d\n", len);
+               dev_err(&priv->hdmi->dev,
+                       "hdmi_avi_infoframe_pack() failed: %zd\n", len);
                return;
        }
 
index adbbddab42c65278d78ff83ca6b6347ade7cc6b0..88cc793c46d36777ca07331c18c2f4634b3aab7e 100644 (file)
@@ -120,10 +120,13 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
+       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+       struct intel_engine_cs *ring;
        struct i915_vma *vma;
        int pin_count = 0;
+       int i;
 
-       seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
+       seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x ",
                   &obj->base,
                   obj->active ? "*" : " ",
                   get_pin_flag(obj),
@@ -131,8 +134,11 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
                   get_global_flag(obj),
                   obj->base.size / 1024,
                   obj->base.read_domains,
-                  obj->base.write_domain,
-                  i915_gem_request_get_seqno(obj->last_read_req),
+                  obj->base.write_domain);
+       for_each_ring(ring, dev_priv, i)
+               seq_printf(m, "%x ",
+                               i915_gem_request_get_seqno(obj->last_read_req[i]));
+       seq_printf(m, "] %x %x%s%s%s",
                   i915_gem_request_get_seqno(obj->last_write_req),
                   i915_gem_request_get_seqno(obj->last_fenced_req),
                   i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
@@ -169,9 +175,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
                *t = '\0';
                seq_printf(m, " (%s mappable)", s);
        }
-       if (obj->last_read_req != NULL)
+       if (obj->last_write_req != NULL)
                seq_printf(m, " (%s)",
-                          i915_gem_request_get_ring(obj->last_read_req)->name);
+                          i915_gem_request_get_ring(obj->last_write_req)->name);
        if (obj->frontbuffer_bits)
                seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
@@ -665,7 +671,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
        struct drm_device *dev = node->minor->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_engine_cs *ring;
-       struct drm_i915_gem_request *rq;
+       struct drm_i915_gem_request *req;
        int ret, any, i;
 
        ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -677,22 +683,22 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
                int count;
 
                count = 0;
-               list_for_each_entry(rq, &ring->request_list, list)
+               list_for_each_entry(req, &ring->request_list, list)
                        count++;
                if (count == 0)
                        continue;
 
                seq_printf(m, "%s requests: %d\n", ring->name, count);
-               list_for_each_entry(rq, &ring->request_list, list) {
+               list_for_each_entry(req, &ring->request_list, list) {
                        struct task_struct *task;
 
                        rcu_read_lock();
                        task = NULL;
-                       if (rq->pid)
-                               task = pid_task(rq->pid, PIDTYPE_PID);
+                       if (req->pid)
+                               task = pid_task(req->pid, PIDTYPE_PID);
                        seq_printf(m, "    %x @ %d: %s [%d]\n",
-                                  rq->seqno,
-                                  (int) (jiffies - rq->emitted_jiffies),
+                                  req->seqno,
+                                  (int) (jiffies - req->emitted_jiffies),
                                   task ? task->comm : "<unknown>",
                                   task ? task->pid : -1);
                        rcu_read_unlock();
@@ -2276,22 +2282,35 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
        return 0;
 }
 
+static int count_irq_waiters(struct drm_i915_private *i915)
+{
+       struct intel_engine_cs *ring;
+       int count = 0;
+       int i;
+
+       for_each_ring(ring, i915, i)
+               count += ring->irq_refcount;
+
+       return count;
+}
+
 static int i915_rps_boost_info(struct seq_file *m, void *data)
 {
        struct drm_info_node *node = m->private;
        struct drm_device *dev = node->minor->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_file *file;
-       int ret;
-
-       ret = mutex_lock_interruptible(&dev->struct_mutex);
-       if (ret)
-               return ret;
-
-       ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
-       if (ret)
-               goto unlock;
 
+       seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
+       seq_printf(m, "GPU busy? %d\n", dev_priv->mm.busy);
+       seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
+       seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n",
+                  intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
+                  intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
+                  intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit),
+                  intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit),
+                  intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
+       spin_lock(&dev_priv->rps.client_lock);
        list_for_each_entry_reverse(file, &dev->filelist, lhead) {
                struct drm_i915_file_private *file_priv = file->driver_priv;
                struct task_struct *task;
@@ -2301,17 +2320,20 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
                seq_printf(m, "%s [%d]: %d boosts%s\n",
                           task ? task->comm : "<unknown>",
                           task ? task->pid : -1,
-                          file_priv->rps_boosts,
-                          list_empty(&file_priv->rps_boost) ? "" : ", active");
+                          file_priv->rps.boosts,
+                          list_empty(&file_priv->rps.link) ? "" : ", active");
                rcu_read_unlock();
        }
+       seq_printf(m, "Semaphore boosts: %d%s\n",
+                  dev_priv->rps.semaphores.boosts,
+                  list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
+       seq_printf(m, "MMIO flip boosts: %d%s\n",
+                  dev_priv->rps.mmioflips.boosts,
+                  list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
        seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
+       spin_unlock(&dev_priv->rps.client_lock);
 
-       mutex_unlock(&dev_priv->rps.hw_lock);
-unlock:
-       mutex_unlock(&dev->struct_mutex);
-
-       return ret;
+       return 0;
 }
 
 static int i915_llc(struct seq_file *m, void *data)
@@ -5154,6 +5176,9 @@ static int i915_dpcd_show(struct seq_file *m, void *data)
        ssize_t err;
        int i;
 
+       if (connector->status != connector_status_connected)
+               return -ENODEV;
+
        for (i = 0; i < ARRAY_SIZE(i915_dpcd_debug); i++) {
                const struct dpcd_block *b = &i915_dpcd_debug[i];
                size_t size = b->end ? b->end - b->offset + 1 : (b->size ?: 1);
index a238889630d8a6d5dd302b3701235d8f8bc53aa0..d2df321ba6349832d300a9551fe863806b2eddce 100644 (file)
@@ -814,7 +814,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        spin_lock_init(&dev_priv->uncore.lock);
        spin_lock_init(&dev_priv->mm.object_stat_lock);
        spin_lock_init(&dev_priv->mmio_flip_lock);
-       mutex_init(&dev_priv->dpio_lock);
+       mutex_init(&dev_priv->sb_lock);
        mutex_init(&dev_priv->modeset_restore_lock);
        mutex_init(&dev_priv->csr_lock);
 
index 51149fb75e96a6f7fd99b554cfab63ed15fd557d..884b4f9b81c4abb163ec181deebb44c3cfc6abce 100644 (file)
@@ -595,6 +595,7 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv);
 static int vlv_resume_prepare(struct drm_i915_private *dev_priv,
                              bool rpm_resume);
 static int skl_resume_prepare(struct drm_i915_private *dev_priv);
+static int bxt_resume_prepare(struct drm_i915_private *dev_priv);
 
 
 static int i915_drm_suspend(struct drm_device *dev)
@@ -811,14 +812,17 @@ static int i915_drm_resume_early(struct drm_device *dev)
        if (IS_VALLEYVIEW(dev_priv))
                ret = vlv_resume_prepare(dev_priv, false);
        if (ret)
-               DRM_ERROR("Resume prepare failed: %d,Continuing resume\n", ret);
+               DRM_ERROR("Resume prepare failed: %d, continuing anyway\n",
+                         ret);
 
        intel_uncore_early_sanitize(dev, true);
 
-       if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-               hsw_disable_pc8(dev_priv);
+       if (IS_BROXTON(dev))
+               ret = bxt_resume_prepare(dev_priv);
        else if (IS_SKYLAKE(dev_priv))
                ret = skl_resume_prepare(dev_priv);
+       else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+               hsw_disable_pc8(dev_priv);
 
        intel_uncore_sanitize(dev);
        intel_power_domains_init_hw(dev_priv);
@@ -989,7 +993,7 @@ static int i915_pm_suspend_late(struct device *dev)
        struct drm_device *drm_dev = dev_to_i915(dev)->dev;
 
        /*
-        * We have a suspedn ordering issue with the snd-hda driver also
+        * We have a suspend ordering issue with the snd-hda driver also
         * requiring our device to be power up. Due to the lack of a
         * parent/child relationship we currently solve this with an late
         * suspend hook.
@@ -1043,6 +1047,8 @@ static int skl_suspend_complete(struct drm_i915_private *dev_priv)
         */
        intel_csr_load_status_set(dev_priv, FW_UNINITIALIZED);
 
+       skl_uninit_cdclk(dev_priv);
+
        return 0;
 }
 
@@ -1089,6 +1095,7 @@ static int skl_resume_prepare(struct drm_i915_private *dev_priv)
 {
        struct drm_device *dev = dev_priv->dev;
 
+       skl_init_cdclk(dev_priv);
        intel_csr_load_program(dev);
 
        return 0;
@@ -1586,16 +1593,15 @@ static int intel_runtime_resume(struct device *device)
  */
 static int intel_suspend_complete(struct drm_i915_private *dev_priv)
 {
-       struct drm_device *dev = dev_priv->dev;
        int ret;
 
-       if (IS_BROXTON(dev))
+       if (IS_BROXTON(dev_priv))
                ret = bxt_suspend_complete(dev_priv);
-       else if (IS_SKYLAKE(dev))
+       else if (IS_SKYLAKE(dev_priv))
                ret = skl_suspend_complete(dev_priv);
-       else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+       else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
                ret = hsw_suspend_complete(dev_priv);
-       else if (IS_VALLEYVIEW(dev))
+       else if (IS_VALLEYVIEW(dev_priv))
                ret = vlv_suspend_complete(dev_priv);
        else
                ret = 0;
index acfa4fc93803ceaa210229807db90b410d086ee8..72f5a3f9dbf243c2da93a4151454eb33ff638274 100644 (file)
@@ -56,7 +56,7 @@
 
 #define DRIVER_NAME            "i915"
 #define DRIVER_DESC            "Intel Graphics"
-#define DRIVER_DATE            "20150508"
+#define DRIVER_DATE            "20150522"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -272,6 +272,30 @@ struct drm_i915_private;
 struct i915_mm_struct;
 struct i915_mmu_object;
 
+struct drm_i915_file_private {
+       struct drm_i915_private *dev_priv;
+       struct drm_file *file;
+
+       struct {
+               spinlock_t lock;
+               struct list_head request_list;
+/* 20ms is a fairly arbitrary limit (greater than the average frame time)
+ * chosen to prevent the CPU getting more than a frame ahead of the GPU
+ * (when using lax throttling for the frontbuffer). We also use it to
+ * offer free GPU waitboosts for severely congested workloads.
+ */
+#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
+       } mm;
+       struct idr context_idr;
+
+       struct intel_rps_client {
+               struct list_head link;
+               unsigned boosts;
+       } rps;
+
+       struct intel_engine_cs *bsd_ring;
+};
+
 enum intel_dpll_id {
        DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
        /* real shared dpll ids must be >= 0 */
@@ -309,7 +333,7 @@ struct intel_dpll_hw_state {
        uint32_t cfgcr1, cfgcr2;
 
        /* bxt */
-       uint32_t ebb0, pll0, pll1, pll2, pll3, pll6, pll8, pcsdw12;
+       uint32_t ebb0, pll0, pll1, pll2, pll3, pll6, pll8, pll10, pcsdw12;
 };
 
 struct intel_shared_dpll_config {
@@ -508,7 +532,7 @@ struct drm_i915_error_state {
        struct drm_i915_error_buffer {
                u32 size;
                u32 name;
-               u32 rseqno, wseqno;
+               u32 rseqno[I915_NUM_RINGS], wseqno;
                u32 gtt_offset;
                u32 read_domains;
                u32 write_domain;
@@ -1065,17 +1089,24 @@ struct intel_gen6_power_mgmt {
        int last_adj;
        enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
+       spinlock_t client_lock;
+       struct list_head clients;
+       bool client_boost;
+
        bool enabled;
        struct delayed_work delayed_resume_work;
-       struct list_head clients;
        unsigned boosts;
 
+       struct intel_rps_client semaphores, mmioflips;
+
        /* manual wa residency calculations */
        struct intel_rps_ei up_ei, down_ei;
 
        /*
         * Protects RPS/RC6 register access and PCU communication.
-        * Must be taken after struct_mutex if nested.
+        * Must be taken after struct_mutex if nested. Note that
+        * this lock may be held for long periods of time when
+        * talking to hw - so only take it when talking to hw!
         */
        struct mutex hw_lock;
 };
@@ -1468,7 +1499,8 @@ static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
 
 struct skl_ddb_allocation {
        struct skl_ddb_entry pipe[I915_MAX_PIPES];
-       struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES];
+       struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* packed/uv */
+       struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* y-plane */
        struct skl_ddb_entry cursor[I915_MAX_PIPES];
 };
 
@@ -1634,8 +1666,8 @@ struct drm_i915_private {
        /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
        struct pm_qos_request pm_qos;
 
-       /* DPIO indirect register protection */
-       struct mutex dpio_lock;
+       /* Sideband mailbox protection */
+       struct mutex sb_lock;
 
        /** Cached value of IMR to avoid reads in updating the bitfield */
        union {
@@ -1684,6 +1716,7 @@ struct drm_i915_private {
        int num_fence_regs; /* 8 on pre-965, 16 otherwise */
 
        unsigned int fsb_freq, mem_freq, is_ddr3;
+       unsigned int skl_boot_cdclk;
        unsigned int cdclk_freq;
        unsigned int hpll_freq;
 
@@ -1938,7 +1971,7 @@ struct drm_i915_gem_object {
        struct drm_mm_node *stolen;
        struct list_head global_list;
 
-       struct list_head ring_list;
+       struct list_head ring_list[I915_NUM_RINGS];
        /** Used in execbuf to temporarily hold a ref */
        struct list_head obj_exec_link;
 
@@ -1949,7 +1982,7 @@ struct drm_i915_gem_object {
         * rendering and so a non-zero seqno), and is not set if it i s on
         * inactive (ready to be unbound) list.
         */
-       unsigned int active:1;
+       unsigned int active:I915_NUM_RINGS;
 
        /**
         * This is set if the object has been written to since last bound
@@ -2020,8 +2053,17 @@ struct drm_i915_gem_object {
        void *dma_buf_vmapping;
        int vmapping_count;
 
-       /** Breadcrumb of last rendering to the buffer. */
-       struct drm_i915_gem_request *last_read_req;
+       /** Breadcrumb of last rendering to the buffer.
+        * There can only be one writer, but we allow for multiple readers.
+        * If there is a writer that necessarily implies that all other
+        * read requests are complete - but we may only be lazily clearing
+        * the read requests. A read request is naturally the most recent
+        * request on a ring, so we may have two different write and read
+        * requests on one ring where the write request is older than the
+        * read request. This allows for the CPU to read from an active
+        * buffer by only waiting for the write to complete.
+        * */
+       struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS];
        struct drm_i915_gem_request *last_write_req;
        /** Breadcrumb of last fenced GPU access to the buffer. */
        struct drm_i915_gem_request *last_fenced_req;
@@ -2160,10 +2202,12 @@ i915_gem_request_get_ring(struct drm_i915_gem_request *req)
        return req ? req->ring : NULL;
 }
 
-static inline void
+static inline struct drm_i915_gem_request *
 i915_gem_request_reference(struct drm_i915_gem_request *req)
 {
-       kref_get(&req->ref);
+       if (req)
+               kref_get(&req->ref);
+       return req;
 }
 
 static inline void
@@ -2204,22 +2248,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
  * a later patch when the call to i915_seqno_passed() is obsoleted...
  */
 
-struct drm_i915_file_private {
-       struct drm_i915_private *dev_priv;
-       struct drm_file *file;
-
-       struct {
-               spinlock_t lock;
-               struct list_head request_list;
-       } mm;
-       struct idr context_idr;
-
-       struct list_head rps_boost;
-       struct intel_engine_cs *bsd_ring;
-
-       unsigned rps_boosts;
-};
-
 /*
  * A command that requires special handling by the command parser.
  */
@@ -2375,6 +2403,7 @@ struct drm_i915_cmd_table {
 #define SKL_REVID_C0           (0x2)
 #define SKL_REVID_D0           (0x3)
 #define SKL_REVID_E0           (0x4)
+#define SKL_REVID_F0           (0x5)
 
 #define BXT_REVID_A0           (0x0)
 #define BXT_REVID_B0           (0x3)
@@ -2445,6 +2474,9 @@ struct drm_i915_cmd_table {
 
 #define HAS_IPS(dev)           (IS_HSW_ULT(dev) || IS_BROADWELL(dev))
 
+#define HAS_DP_MST(dev)                (IS_HASWELL(dev) || IS_BROADWELL(dev) || \
+                                INTEL_INFO(dev)->gen >= 9)
+
 #define HAS_DDI(dev)           (INTEL_INFO(dev)->has_ddi)
 #define HAS_FPGA_DBG_UNCLAIMED(dev)    (INTEL_INFO(dev)->has_fpga_dbg)
 #define HAS_PSR(dev)           (IS_HASWELL(dev) || IS_BROADWELL(dev) || \
@@ -2820,7 +2852,6 @@ static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
 
 void i915_gem_reset(struct drm_device *dev);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
-int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -2838,10 +2869,13 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
                        unsigned reset_counter,
                        bool interruptible,
                        s64 *timeout,
-                       struct drm_i915_file_private *file_priv);
+                       struct intel_rps_client *rps);
 int __must_check i915_wait_request(struct drm_i915_gem_request *req);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+                              bool readonly);
+int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
                                  bool write);
 int __must_check
index f128ed8d6f65d7b40f325d60014f975bc1df9284..be35f0486202d6d5b36db6e6e74a2a3b0c73353a 100644 (file)
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
+#define RQ_BUG_ON(expr)
+
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
-static __must_check int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-                              bool readonly);
 static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj);
-
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
+static void
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
                                 struct drm_i915_gem_object *obj);
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
@@ -518,8 +518,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
                ret = i915_gem_object_wait_rendering(obj, true);
                if (ret)
                        return ret;
-
-               i915_gem_object_retire(obj);
        }
 
        ret = i915_gem_object_get_pages(obj);
@@ -939,8 +937,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
                ret = i915_gem_object_wait_rendering(obj, false);
                if (ret)
                        return ret;
-
-               i915_gem_object_retire(obj);
        }
        /* Same trick applies to invalidate partially written cachelines read
         * before writing. */
@@ -1181,16 +1177,16 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
        return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
 
-static int __i915_spin_request(struct drm_i915_gem_request *rq)
+static int __i915_spin_request(struct drm_i915_gem_request *req)
 {
        unsigned long timeout;
 
-       if (i915_gem_request_get_ring(rq)->irq_refcount)
+       if (i915_gem_request_get_ring(req)->irq_refcount)
                return -EBUSY;
 
        timeout = jiffies + 1;
        while (!need_resched()) {
-               if (i915_gem_request_completed(rq, true))
+               if (i915_gem_request_completed(req, true))
                        return 0;
 
                if (time_after_eq(jiffies, timeout))
@@ -1198,7 +1194,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *rq)
 
                cpu_relax_lowlatency();
        }
-       if (i915_gem_request_completed(rq, false))
+       if (i915_gem_request_completed(req, false))
                return 0;
 
        return -EAGAIN;
@@ -1225,7 +1221,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
                        unsigned reset_counter,
                        bool interruptible,
                        s64 *timeout,
-                       struct drm_i915_file_private *file_priv)
+                       struct intel_rps_client *rps)
 {
        struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
        struct drm_device *dev = ring->dev;
@@ -1239,14 +1235,17 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 
        WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
 
+       if (list_empty(&req->list))
+               return 0;
+
        if (i915_gem_request_completed(req, true))
                return 0;
 
        timeout_expire = timeout ?
                jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
 
-       if (INTEL_INFO(dev)->gen >= 6)
-               gen6_rps_boost(dev_priv, file_priv);
+       if (INTEL_INFO(dev_priv)->gen >= 6)
+               gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
 
        /* Record current time in case interrupted by signal, or wedged */
        trace_i915_gem_request_wait_begin(req);
@@ -1338,6 +1337,63 @@ out:
        return ret;
 }
 
+static inline void
+i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
+{
+       struct drm_i915_file_private *file_priv = request->file_priv;
+
+       if (!file_priv)
+               return;
+
+       spin_lock(&file_priv->mm.lock);
+       list_del(&request->client_list);
+       request->file_priv = NULL;
+       spin_unlock(&file_priv->mm.lock);
+}
+
+static void i915_gem_request_retire(struct drm_i915_gem_request *request)
+{
+       trace_i915_gem_request_retire(request);
+
+       /* We know the GPU must have read the request to have
+        * sent us the seqno + interrupt, so use the position
+        * of tail of the request to update the last known position
+        * of the GPU head.
+        *
+        * Note this requires that we are always called in request
+        * completion order.
+        */
+       request->ringbuf->last_retired_head = request->postfix;
+
+       list_del_init(&request->list);
+       i915_gem_request_remove_from_client(request);
+
+       put_pid(request->pid);
+
+       i915_gem_request_unreference(request);
+}
+
+static void
+__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
+{
+       struct intel_engine_cs *engine = req->ring;
+       struct drm_i915_gem_request *tmp;
+
+       lockdep_assert_held(&engine->dev->struct_mutex);
+
+       if (list_empty(&req->list))
+               return;
+
+       do {
+               tmp = list_first_entry(&engine->request_list,
+                                      typeof(*tmp), list);
+
+               i915_gem_request_retire(tmp);
+       } while (tmp != req);
+
+       WARN_ON(i915_verify_lists(engine->dev));
+}
+
 /**
  * Waits for a request to be signaled, and cleans up the
  * request and object lists appropriately for that event.
@@ -1348,7 +1404,6 @@ i915_wait_request(struct drm_i915_gem_request *req)
        struct drm_device *dev;
        struct drm_i915_private *dev_priv;
        bool interruptible;
-       unsigned reset_counter;
        int ret;
 
        BUG_ON(req == NULL);
@@ -1367,29 +1422,13 @@ i915_wait_request(struct drm_i915_gem_request *req)
        if (ret)
                return ret;
 
-       reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
-       i915_gem_request_reference(req);
-       ret = __i915_wait_request(req, reset_counter,
+       ret = __i915_wait_request(req,
+                                 atomic_read(&dev_priv->gpu_error.reset_counter),
                                  interruptible, NULL, NULL);
-       i915_gem_request_unreference(req);
-       return ret;
-}
-
-static int
-i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj)
-{
-       if (!obj->active)
-               return 0;
-
-       /* Manually manage the write flush as we may have not yet
-        * retired the buffer.
-        *
-        * Note that the last_write_req is always the earlier of
-        * the two (read/write) requests, so if we haved successfully waited,
-        * we know we have passed the last write.
-        */
-       i915_gem_request_assign(&obj->last_write_req, NULL);
+       if (ret)
+               return ret;
 
+       __i915_gem_request_retire__upto(req);
        return 0;
 }
 
@@ -1397,22 +1436,56 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj)
  * Ensures that all rendering to the object has completed and the object is
  * safe to unbind from the GTT or access from the CPU.
  */
-static __must_check int
+int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
                               bool readonly)
 {
-       struct drm_i915_gem_request *req;
-       int ret;
+       int ret, i;
 
-       req = readonly ? obj->last_write_req : obj->last_read_req;
-       if (!req)
+       if (!obj->active)
                return 0;
 
-       ret = i915_wait_request(req);
-       if (ret)
-               return ret;
+       if (readonly) {
+               if (obj->last_write_req != NULL) {
+                       ret = i915_wait_request(obj->last_write_req);
+                       if (ret)
+                               return ret;
 
-       return i915_gem_object_wait_rendering__tail(obj);
+                       i = obj->last_write_req->ring->id;
+                       if (obj->last_read_req[i] == obj->last_write_req)
+                               i915_gem_object_retire__read(obj, i);
+                       else
+                               i915_gem_object_retire__write(obj);
+               }
+       } else {
+               for (i = 0; i < I915_NUM_RINGS; i++) {
+                       if (obj->last_read_req[i] == NULL)
+                               continue;
+
+                       ret = i915_wait_request(obj->last_read_req[i]);
+                       if (ret)
+                               return ret;
+
+                       i915_gem_object_retire__read(obj, i);
+               }
+               RQ_BUG_ON(obj->active);
+       }
+
+       return 0;
+}
+
+static void
+i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
+                              struct drm_i915_gem_request *req)
+{
+       int ring = req->ring->id;
+
+       if (obj->last_read_req[ring] == req)
+               i915_gem_object_retire__read(obj, ring);
+       else if (obj->last_write_req == req)
+               i915_gem_object_retire__write(obj);
+
+       __i915_gem_request_retire__upto(req);
 }
 
 /* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -1420,40 +1493,75 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
  */
 static __must_check int
 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
-                                           struct drm_i915_file_private *file_priv,
+                                           struct intel_rps_client *rps,
                                            bool readonly)
 {
-       struct drm_i915_gem_request *req;
        struct drm_device *dev = obj->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_i915_gem_request *requests[I915_NUM_RINGS];
        unsigned reset_counter;
-       int ret;
+       int ret, i, n = 0;
 
        BUG_ON(!mutex_is_locked(&dev->struct_mutex));
        BUG_ON(!dev_priv->mm.interruptible);
 
-       req = readonly ? obj->last_write_req : obj->last_read_req;
-       if (!req)
+       if (!obj->active)
                return 0;
 
        ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
        if (ret)
                return ret;
 
-       ret = i915_gem_check_olr(req);
-       if (ret)
-               return ret;
-
        reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
-       i915_gem_request_reference(req);
+
+       if (readonly) {
+               struct drm_i915_gem_request *req;
+
+               req = obj->last_write_req;
+               if (req == NULL)
+                       return 0;
+
+               ret = i915_gem_check_olr(req);
+               if (ret)
+                       goto err;
+
+               requests[n++] = i915_gem_request_reference(req);
+       } else {
+               for (i = 0; i < I915_NUM_RINGS; i++) {
+                       struct drm_i915_gem_request *req;
+
+                       req = obj->last_read_req[i];
+                       if (req == NULL)
+                               continue;
+
+                       ret = i915_gem_check_olr(req);
+                       if (ret)
+                               goto err;
+
+                       requests[n++] = i915_gem_request_reference(req);
+               }
+       }
+
        mutex_unlock(&dev->struct_mutex);
-       ret = __i915_wait_request(req, reset_counter, true, NULL, file_priv);
+       for (i = 0; ret == 0 && i < n; i++)
+               ret = __i915_wait_request(requests[i], reset_counter, true,
+                                         NULL, rps);
        mutex_lock(&dev->struct_mutex);
-       i915_gem_request_unreference(req);
-       if (ret)
-               return ret;
 
-       return i915_gem_object_wait_rendering__tail(obj);
+err:
+       for (i = 0; i < n; i++) {
+               if (ret == 0)
+                       i915_gem_object_retire_request(obj, requests[i]);
+               i915_gem_request_unreference(requests[i]);
+       }
+
+       return ret;
+}
+
+static struct intel_rps_client *to_rps_client(struct drm_file *file)
+{
+       struct drm_i915_file_private *fpriv = file->driver_priv;
+       return &fpriv->rps;
 }
 
 /**
@@ -1498,7 +1606,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
         * to catch cases where we are gazumped.
         */
        ret = i915_gem_object_wait_rendering__nonblocking(obj,
-                                                         file->driver_priv,
+                                                         to_rps_client(file),
                                                          !write_domain);
        if (ret)
                goto unref;
@@ -1919,7 +2027,6 @@ i915_gem_mmap_gtt(struct drm_file *file,
                  uint32_t handle,
                  uint64_t *offset)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_i915_gem_object *obj;
        int ret;
 
@@ -2235,78 +2342,58 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
        return 0;
 }
 
-static void
-i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-                              struct intel_engine_cs *ring)
+void i915_vma_move_to_active(struct i915_vma *vma,
+                            struct intel_engine_cs *ring)
 {
-       struct drm_i915_gem_request *req;
-       struct intel_engine_cs *old_ring;
-
-       BUG_ON(ring == NULL);
-
-       req = intel_ring_get_request(ring);
-       old_ring = i915_gem_request_get_ring(obj->last_read_req);
-
-       if (old_ring != ring && obj->last_write_req) {
-               /* Keep the request relative to the current ring */
-               i915_gem_request_assign(&obj->last_write_req, req);
-       }
+       struct drm_i915_gem_object *obj = vma->obj;
 
        /* Add a reference if we're newly entering the active list. */
-       if (!obj->active) {
+       if (obj->active == 0)
                drm_gem_object_reference(&obj->base);
-               obj->active = 1;
-       }
+       obj->active |= intel_ring_flag(ring);
 
-       list_move_tail(&obj->ring_list, &ring->active_list);
+       list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
+       i915_gem_request_assign(&obj->last_read_req[ring->id],
+                               intel_ring_get_request(ring));
 
-       i915_gem_request_assign(&obj->last_read_req, req);
+       list_move_tail(&vma->mm_list, &vma->vm->active_list);
 }
 
-void i915_vma_move_to_active(struct i915_vma *vma,
-                            struct intel_engine_cs *ring)
+static void
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
 {
-       list_move_tail(&vma->mm_list, &vma->vm->active_list);
-       return i915_gem_object_move_to_active(vma->obj, ring);
+       RQ_BUG_ON(obj->last_write_req == NULL);
+       RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
+
+       i915_gem_request_assign(&obj->last_write_req, NULL);
+       intel_fb_obj_flush(obj, true);
 }
 
 static void
-i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
 {
        struct i915_vma *vma;
 
-       BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
-       BUG_ON(!obj->active);
+       RQ_BUG_ON(obj->last_read_req[ring] == NULL);
+       RQ_BUG_ON(!(obj->active & (1 << ring)));
+
+       list_del_init(&obj->ring_list[ring]);
+       i915_gem_request_assign(&obj->last_read_req[ring], NULL);
+
+       if (obj->last_write_req && obj->last_write_req->ring->id == ring)
+               i915_gem_object_retire__write(obj);
+
+       obj->active &= ~(1 << ring);
+       if (obj->active)
+               return;
 
        list_for_each_entry(vma, &obj->vma_list, vma_link) {
                if (!list_empty(&vma->mm_list))
                        list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
        }
 
-       intel_fb_obj_flush(obj, true);
-
-       list_del_init(&obj->ring_list);
-
-       i915_gem_request_assign(&obj->last_read_req, NULL);
-       i915_gem_request_assign(&obj->last_write_req, NULL);
-       obj->base.write_domain = 0;
-
        i915_gem_request_assign(&obj->last_fenced_req, NULL);
-
-       obj->active = 0;
        drm_gem_object_unreference(&obj->base);
-
-       WARN_ON(i915_verify_lists(dev));
-}
-
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj)
-{
-       if (obj->last_read_req == NULL)
-               return;
-
-       if (i915_gem_request_completed(obj->last_read_req, true))
-               i915_gem_object_move_to_inactive(obj);
 }
 
 static int
@@ -2483,20 +2570,6 @@ int __i915_add_request(struct intel_engine_cs *ring,
        return 0;
 }
 
-static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
-{
-       struct drm_i915_file_private *file_priv = request->file_priv;
-
-       if (!file_priv)
-               return;
-
-       spin_lock(&file_priv->mm.lock);
-       list_del(&request->client_list);
-       request->file_priv = NULL;
-       spin_unlock(&file_priv->mm.lock);
-}
-
 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
                                   const struct intel_context *ctx)
 {
@@ -2542,16 +2615,6 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
        }
 }
 
-static void i915_gem_free_request(struct drm_i915_gem_request *request)
-{
-       list_del(&request->list);
-       i915_gem_request_remove_from_client(request);
-
-       put_pid(request->pid);
-
-       i915_gem_request_unreference(request);
-}
-
 void i915_gem_request_free(struct kref *req_ref)
 {
        struct drm_i915_gem_request *req = container_of(req_ref,
@@ -2576,38 +2639,38 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
                           struct intel_context *ctx)
 {
        struct drm_i915_private *dev_priv = to_i915(ring->dev);
-       struct drm_i915_gem_request *rq;
+       struct drm_i915_gem_request *req;
        int ret;
 
        if (ring->outstanding_lazy_request)
                return 0;
 
-       rq = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
-       if (rq == NULL)
+       req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+       if (req == NULL)
                return -ENOMEM;
 
-       kref_init(&rq->ref);
-       rq->i915 = dev_priv;
+       kref_init(&req->ref);
+       req->i915 = dev_priv;
 
-       ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
-       if (ret) {
-               kfree(rq);
-               return ret;
-       }
+       ret = i915_gem_get_seqno(ring->dev, &req->seqno);
+       if (ret)
+               goto err;
 
-       rq->ring = ring;
+       req->ring = ring;
 
        if (i915.enable_execlists)
-               ret = intel_logical_ring_alloc_request_extras(rq, ctx);
+               ret = intel_logical_ring_alloc_request_extras(req, ctx);
        else
-               ret = intel_ring_alloc_request_extras(rq);
-       if (ret) {
-               kfree(rq);
-               return ret;
-       }
+               ret = intel_ring_alloc_request_extras(req);
+       if (ret)
+               goto err;
 
-       ring->outstanding_lazy_request = rq;
+       ring->outstanding_lazy_request = req;
        return 0;
+
+err:
+       kmem_cache_free(dev_priv->requests, req);
+       return ret;
 }
 
 struct drm_i915_gem_request *
@@ -2652,9 +2715,9 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 
                obj = list_first_entry(&ring->active_list,
                                       struct drm_i915_gem_object,
-                                      ring_list);
+                                      ring_list[ring->id]);
 
-               i915_gem_object_move_to_inactive(obj);
+               i915_gem_object_retire__read(obj, ring->id);
        }
 
        /*
@@ -2690,7 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
                                           struct drm_i915_gem_request,
                                           list);
 
-               i915_gem_free_request(request);
+               i915_gem_request_retire(request);
        }
 
        /* This may not have been flushed before the reset, so clean it now */
@@ -2738,6 +2801,8 @@ void i915_gem_reset(struct drm_device *dev)
        i915_gem_context_reset(dev);
 
        i915_gem_restore_fences(dev);
+
+       WARN_ON(i915_verify_lists(dev));
 }
 
 /**
@@ -2746,11 +2811,11 @@ void i915_gem_reset(struct drm_device *dev)
 void
 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 {
-       if (list_empty(&ring->request_list))
-               return;
-
        WARN_ON(i915_verify_lists(ring->dev));
 
+       if (list_empty(&ring->active_list))
+               return;
+
        /* Retire requests first as we use it above for the early return.
         * If we retire requests last, we may use a later seqno and so clear
         * the requests lists without clearing the active list, leading to
@@ -2766,16 +2831,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
                if (!i915_gem_request_completed(request, true))
                        break;
 
-               trace_i915_gem_request_retire(request);
-
-               /* We know the GPU must have read the request to have
-                * sent us the seqno + interrupt, so use the position
-                * of tail of the request to update the last known position
-                * of the GPU head.
-                */
-               request->ringbuf->last_retired_head = request->postfix;
-
-               i915_gem_free_request(request);
+               i915_gem_request_retire(request);
        }
 
        /* Move any buffers on the active list that are no longer referenced
@@ -2787,12 +2843,12 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 
                obj = list_first_entry(&ring->active_list,
                                      struct drm_i915_gem_object,
-                                     ring_list);
+                                     ring_list[ring->id]);
 
-               if (!i915_gem_request_completed(obj->last_read_req, true))
+               if (!list_empty(&obj->last_read_req[ring->id]->list))
                        break;
 
-               i915_gem_object_move_to_inactive(obj);
+               i915_gem_object_retire__read(obj, ring->id);
        }
 
        if (unlikely(ring->trace_irq_req &&
@@ -2887,17 +2943,30 @@ i915_gem_idle_work_handler(struct work_struct *work)
 static int
 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 {
-       struct intel_engine_cs *ring;
-       int ret;
+       int ret, i;
+
+       if (!obj->active)
+               return 0;
+
+       for (i = 0; i < I915_NUM_RINGS; i++) {
+               struct drm_i915_gem_request *req;
+
+               req = obj->last_read_req[i];
+               if (req == NULL)
+                       continue;
 
-       if (obj->active) {
-               ring = i915_gem_request_get_ring(obj->last_read_req);
+               if (list_empty(&req->list))
+                       goto retire;
 
-               ret = i915_gem_check_olr(obj->last_read_req);
+               ret = i915_gem_check_olr(req);
                if (ret)
                        return ret;
 
-               i915_gem_retire_requests_ring(ring);
+               if (i915_gem_request_completed(req, true)) {
+                       __i915_gem_request_retire__upto(req);
+retire:
+                       i915_gem_object_retire__read(obj, i);
+               }
        }
 
        return 0;
@@ -2931,9 +3000,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_i915_gem_wait *args = data;
        struct drm_i915_gem_object *obj;
-       struct drm_i915_gem_request *req;
+       struct drm_i915_gem_request *req[I915_NUM_RINGS];
        unsigned reset_counter;
-       int ret = 0;
+       int i, n = 0;
+       int ret;
 
        if (args->flags != 0)
                return -EINVAL;
@@ -2953,11 +3023,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
        if (ret)
                goto out;
 
-       if (!obj->active || !obj->last_read_req)
+       if (!obj->active)
                goto out;
 
-       req = obj->last_read_req;
-
        /* Do this after OLR check to make sure we make forward progress polling
         * on this IOCTL with a timeout == 0 (like busy ioctl)
         */
@@ -2968,13 +3036,23 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
        drm_gem_object_unreference(&obj->base);
        reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
-       i915_gem_request_reference(req);
+
+       for (i = 0; i < I915_NUM_RINGS; i++) {
+               if (obj->last_read_req[i] == NULL)
+                       continue;
+
+               req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
+       }
+
        mutex_unlock(&dev->struct_mutex);
 
-       ret = __i915_wait_request(req, reset_counter, true,
-                                 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
-                                 file->driver_priv);
-       i915_gem_request_unreference__unlocked(req);
+       for (i = 0; i < n; i++) {
+               if (ret == 0)
+                       ret = __i915_wait_request(req[i], reset_counter, true,
+                                                 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
+                                                 file->driver_priv);
+               i915_gem_request_unreference__unlocked(req[i]);
+       }
        return ret;
 
 out:
@@ -2983,6 +3061,59 @@ out:
        return ret;
 }
 
+static int
+__i915_gem_object_sync(struct drm_i915_gem_object *obj,
+                      struct intel_engine_cs *to,
+                      struct drm_i915_gem_request *req)
+{
+       struct intel_engine_cs *from;
+       int ret;
+
+       from = i915_gem_request_get_ring(req);
+       if (to == from)
+               return 0;
+
+       if (i915_gem_request_completed(req, true))
+               return 0;
+
+       ret = i915_gem_check_olr(req);
+       if (ret)
+               return ret;
+
+       if (!i915_semaphore_is_enabled(obj->base.dev)) {
+               struct drm_i915_private *i915 = to_i915(obj->base.dev);
+               ret = __i915_wait_request(req,
+                                         atomic_read(&i915->gpu_error.reset_counter),
+                                         i915->mm.interruptible,
+                                         NULL,
+                                         &i915->rps.semaphores);
+               if (ret)
+                       return ret;
+
+               i915_gem_object_retire_request(obj, req);
+       } else {
+               int idx = intel_ring_sync_index(from, to);
+               u32 seqno = i915_gem_request_get_seqno(req);
+
+               if (seqno <= from->semaphore.sync_seqno[idx])
+                       return 0;
+
+               trace_i915_gem_ring_sync_to(from, to, req);
+               ret = to->semaphore.sync_to(to, from, seqno);
+               if (ret)
+                       return ret;
+
+               /* We use last_read_req because sync_to()
+                * might have just caused seqno wrap under
+                * the radar.
+                */
+               from->semaphore.sync_seqno[idx] =
+                       i915_gem_request_get_seqno(obj->last_read_req[from->id]);
+       }
+
+       return 0;
+}
+
 /**
  * i915_gem_object_sync - sync an object to a ring.
  *
@@ -2991,7 +3122,17 @@ out:
  *
  * This code is meant to abstract object synchronization with the GPU.
  * Calling with NULL implies synchronizing the object with the CPU
- * rather than a particular GPU ring.
+ * rather than a particular GPU ring. Conceptually we serialise writes
+ * between engines inside the GPU. We only allow on engine to write
+ * into a buffer at any time, but multiple readers. To ensure each has
+ * a coherent view of memory, we must:
+ *
+ * - If there is an outstanding write request to the object, the new
+ *   request must wait for it to complete (either CPU or in hw, requests
+ *   on the same ring will be naturally ordered).
+ *
+ * - If we are a write request (pending_write_domain is set), the new
+ *   request must wait for outstanding read requests to complete.
  *
  * Returns 0 if successful, else propagates up the lower layer error.
  */
@@ -2999,41 +3140,32 @@ int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
                     struct intel_engine_cs *to)
 {
-       struct intel_engine_cs *from;
-       u32 seqno;
-       int ret, idx;
-
-       from = i915_gem_request_get_ring(obj->last_read_req);
-
-       if (from == NULL || to == from)
-               return 0;
+       const bool readonly = obj->base.pending_write_domain == 0;
+       struct drm_i915_gem_request *req[I915_NUM_RINGS];
+       int ret, i, n;
 
-       if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-               return i915_gem_object_wait_rendering(obj, false);
-
-       idx = intel_ring_sync_index(from, to);
-
-       seqno = i915_gem_request_get_seqno(obj->last_read_req);
-       /* Optimization: Avoid semaphore sync when we are sure we already
-        * waited for an object with higher seqno */
-       if (seqno <= from->semaphore.sync_seqno[idx])
+       if (!obj->active)
                return 0;
 
-       ret = i915_gem_check_olr(obj->last_read_req);
-       if (ret)
-               return ret;
+       if (to == NULL)
+               return i915_gem_object_wait_rendering(obj, readonly);
 
-       trace_i915_gem_ring_sync_to(from, to, obj->last_read_req);
-       ret = to->semaphore.sync_to(to, from, seqno);
-       if (!ret)
-               /* We use last_read_req because sync_to()
-                * might have just caused seqno wrap under
-                * the radar.
-                */
-               from->semaphore.sync_seqno[idx] =
-                               i915_gem_request_get_seqno(obj->last_read_req);
+       n = 0;
+       if (readonly) {
+               if (obj->last_write_req)
+                       req[n++] = obj->last_write_req;
+       } else {
+               for (i = 0; i < I915_NUM_RINGS; i++)
+                       if (obj->last_read_req[i])
+                               req[n++] = obj->last_read_req[i];
+       }
+       for (i = 0; i < n; i++) {
+               ret = __i915_gem_object_sync(obj, to, req[i]);
+               if (ret)
+                       return ret;
+       }
 
-       return ret;
+       return 0;
 }
 
 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
@@ -3079,7 +3211,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 
        BUG_ON(obj->pages == NULL);
 
-       ret = i915_gem_object_finish_gpu(obj);
+       ret = i915_gem_object_wait_rendering(obj, false);
        if (ret)
                return ret;
        /* Continue on if we fail due to EIO, the GPU is hung so we
@@ -3119,10 +3251,6 @@ int i915_vma_unbind(struct i915_vma *vma)
        /* Since the unbound list is global, only move to that list if
         * no more VMAs exist. */
        if (list_empty(&obj->vma_list)) {
-               /* Throw away the active reference before
-                * moving to the unbound list. */
-               i915_gem_object_retire(obj);
-
                i915_gem_gtt_finish_object(obj);
                list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
        }
@@ -3155,6 +3283,7 @@ int i915_gpu_idle(struct drm_device *dev)
                        return ret;
        }
 
+       WARN_ON(i915_verify_lists(dev));
        return 0;
 }
 
@@ -3777,8 +3906,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
        if (ret)
                return ret;
 
-       i915_gem_object_retire(obj);
-
        /* Flush and acquire obj->pages so that we are coherent through
         * direct access in memory with previous cached writes through
         * shmemfs and that our cache domain tracking remains valid.
@@ -3854,7 +3981,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
        }
 
        if (i915_gem_obj_bound_any(obj)) {
-               ret = i915_gem_object_finish_gpu(obj);
+               ret = i915_gem_object_wait_rendering(obj, false);
                if (ret)
                        return ret;
 
@@ -3976,11 +4103,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
        u32 old_read_domains, old_write_domain;
        int ret;
 
-       if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
-               ret = i915_gem_object_sync(obj, pipelined);
-               if (ret)
-                       return ret;
-       }
+       ret = i915_gem_object_sync(obj, pipelined);
+       if (ret)
+               return ret;
 
        /* Mark the pin_display early so that we account for the
         * display coherency whilst setting up the cache domains.
@@ -4045,23 +4170,6 @@ i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
        obj->pin_display--;
 }
 
-int
-i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
-{
-       int ret;
-
-       if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
-               return 0;
-
-       ret = i915_gem_object_wait_rendering(obj, false);
-       if (ret)
-               return ret;
-
-       /* Ensure that we invalidate the GPU's caches and TLBs. */
-       obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
-       return 0;
-}
-
 /**
  * Moves a single object to the CPU read, and possibly write domain.
  *
@@ -4081,7 +4189,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
        if (ret)
                return ret;
 
-       i915_gem_object_retire(obj);
        i915_gem_object_flush_gtt_write_domain(obj);
 
        old_write_domain = obj->base.write_domain;
@@ -4132,7 +4239,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_i915_file_private *file_priv = file->driver_priv;
-       unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
+       unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
        struct drm_i915_gem_request *request, *target = NULL;
        unsigned reset_counter;
        int ret;
@@ -4370,15 +4477,15 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
         * necessary flushes here.
         */
        ret = i915_gem_object_flush_active(obj);
+       if (ret)
+               goto unref;
 
-       args->busy = obj->active;
-       if (obj->last_read_req) {
-               struct intel_engine_cs *ring;
-               BUILD_BUG_ON(I915_NUM_RINGS > 16);
-               ring = i915_gem_request_get_ring(obj->last_read_req);
-               args->busy |= intel_ring_flag(ring) << 16;
-       }
+       BUILD_BUG_ON(I915_NUM_RINGS > 16);
+       args->busy = obj->active << 16;
+       if (obj->last_write_req)
+               args->busy |= obj->last_write_req->ring->id;
 
+unref:
        drm_gem_object_unreference(&obj->base);
 unlock:
        mutex_unlock(&dev->struct_mutex);
@@ -4452,8 +4559,11 @@ unlock:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
                          const struct drm_i915_gem_object_ops *ops)
 {
+       int i;
+
        INIT_LIST_HEAD(&obj->global_list);
-       INIT_LIST_HEAD(&obj->ring_list);
+       for (i = 0; i < I915_NUM_RINGS; i++)
+               INIT_LIST_HEAD(&obj->ring_list[i]);
        INIT_LIST_HEAD(&obj->obj_exec_link);
        INIT_LIST_HEAD(&obj->vma_list);
        INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -5112,10 +5222,10 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
        }
        spin_unlock(&file_priv->mm.lock);
 
-       if (!list_empty(&file_priv->rps_boost)) {
-               mutex_lock(&to_i915(dev)->rps.hw_lock);
-               list_del(&file_priv->rps_boost);
-               mutex_unlock(&to_i915(dev)->rps.hw_lock);
+       if (!list_empty(&file_priv->rps.link)) {
+               spin_lock(&to_i915(dev)->rps.client_lock);
+               list_del(&file_priv->rps.link);
+               spin_unlock(&to_i915(dev)->rps.client_lock);
        }
 }
 
@@ -5133,7 +5243,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
        file->driver_priv = file_priv;
        file_priv->dev_priv = dev->dev_private;
        file_priv->file = file;
-       INIT_LIST_HEAD(&file_priv->rps_boost);
+       INIT_LIST_HEAD(&file_priv->rps.link);
 
        spin_lock_init(&file_priv->mm.lock);
        INIT_LIST_HEAD(&file_priv->mm.request_list);
index 5a47eb5e3c5dcadb0c776dc7df7f48a970c1c1fc..8867818b140173fc3c107b299a7b553a6287f8f9 100644 (file)
@@ -753,8 +753,6 @@ static int do_switch(struct intel_engine_cs *ring,
                 * swapped, but there is no way to do that yet.
                 */
                from->legacy_hw_ctx.rcs_state->dirty = 1;
-               BUG_ON(i915_gem_request_get_ring(
-                       from->legacy_hw_ctx.rcs_state->last_read_req) != ring);
 
                /* obj is kept alive until the next request by its active ref */
                i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
index f462d1b51d973db98dc48492a2223b4eb065e626..17299d04189fcc91b30b703787174914681bcdbf 100644 (file)
@@ -34,82 +34,34 @@ int
 i915_verify_lists(struct drm_device *dev)
 {
        static int warned;
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_i915_private *dev_priv = to_i915(dev);
        struct drm_i915_gem_object *obj;
+       struct intel_engine_cs *ring;
        int err = 0;
+       int i;
 
        if (warned)
                return 0;
 
-       list_for_each_entry(obj, &dev_priv->render_ring.active_list, list) {
-               if (obj->base.dev != dev ||
-                   !atomic_read(&obj->base.refcount.refcount)) {
-                       DRM_ERROR("freed render active %p\n", obj);
-                       err++;
-                       break;
-               } else if (!obj->active ||
-                          (obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) {
-                       DRM_ERROR("invalid render active %p (a %d r %x)\n",
-                                 obj,
-                                 obj->active,
-                                 obj->base.read_domains);
-                       err++;
-               } else if (obj->base.write_domain && list_empty(&obj->gpu_write_list)) {
-                       DRM_ERROR("invalid render active %p (w %x, gwl %d)\n",
-                                 obj,
-                                 obj->base.write_domain,
-                                 !list_empty(&obj->gpu_write_list));
-                       err++;
-               }
-       }
-
-       list_for_each_entry(obj, &dev_priv->mm.flushing_list, list) {
-               if (obj->base.dev != dev ||
-                   !atomic_read(&obj->base.refcount.refcount)) {
-                       DRM_ERROR("freed flushing %p\n", obj);
-                       err++;
-                       break;
-               } else if (!obj->active ||
-                          (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0 ||
-                          list_empty(&obj->gpu_write_list)) {
-                       DRM_ERROR("invalid flushing %p (a %d w %x gwl %d)\n",
-                                 obj,
-                                 obj->active,
-                                 obj->base.write_domain,
-                                 !list_empty(&obj->gpu_write_list));
-                       err++;
-               }
-       }
-
-       list_for_each_entry(obj, &dev_priv->mm.gpu_write_list, gpu_write_list) {
-               if (obj->base.dev != dev ||
-                   !atomic_read(&obj->base.refcount.refcount)) {
-                       DRM_ERROR("freed gpu write %p\n", obj);
-                       err++;
-                       break;
-               } else if (!obj->active ||
-                          (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) {
-                       DRM_ERROR("invalid gpu write %p (a %d w %x)\n",
-                                 obj,
-                                 obj->active,
-                                 obj->base.write_domain);
-                       err++;
-               }
-       }
-
-       list_for_each_entry(obj, &i915_gtt_vm->inactive_list, list) {
-               if (obj->base.dev != dev ||
-                   !atomic_read(&obj->base.refcount.refcount)) {
-                       DRM_ERROR("freed inactive %p\n", obj);
-                       err++;
-                       break;
-               } else if (obj->pin_count || obj->active ||
-                          (obj->base.write_domain & I915_GEM_GPU_DOMAINS)) {
-                       DRM_ERROR("invalid inactive %p (p %d a %d w %x)\n",
-                                 obj,
-                                 obj->pin_count, obj->active,
-                                 obj->base.write_domain);
-                       err++;
+       for_each_ring(ring, dev_priv, i) {
+               list_for_each_entry(obj, &ring->active_list, ring_list[ring->id]) {
+                       if (obj->base.dev != dev ||
+                           !atomic_read(&obj->base.refcount.refcount)) {
+                               DRM_ERROR("%s: freed active obj %p\n",
+                                         ring->name, obj);
+                               err++;
+                               break;
+                       } else if (!obj->active ||
+                                  obj->last_read_req[ring->id] == NULL) {
+                               DRM_ERROR("%s: invalid active obj %p\n",
+                                         ring->name, obj);
+                               err++;
+                       } else if (obj->base.write_domain) {
+                               DRM_ERROR("%s: invalid write obj %p (w %x)\n",
+                                         ring->name,
+                                         obj, obj->base.write_domain);
+                               err++;
+                       }
                }
        }
 
index 560c79a8a43d5cac451ce2001ffac0578d89259c..bd0e4bda2c649a54bb97a1b791ecb6d52e4438f1 100644 (file)
@@ -889,6 +889,7 @@ static int
 i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
                                struct list_head *vmas)
 {
+       const unsigned other_rings = ~intel_ring_flag(ring);
        struct i915_vma *vma;
        uint32_t flush_domains = 0;
        bool flush_chipset = false;
@@ -896,9 +897,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 
        list_for_each_entry(vma, vmas, exec_list) {
                struct drm_i915_gem_object *obj = vma->obj;
-               ret = i915_gem_object_sync(obj, ring);
-               if (ret)
-                       return ret;
+
+               if (obj->active & other_rings) {
+                       ret = i915_gem_object_sync(obj, ring);
+                       if (ret)
+                               return ret;
+               }
 
                if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
                        flush_chipset |= i915_gem_clflush_object(obj, false);
index e3bcc3ba7e405f28dd764752ba72d941222ccb20..619dad1b23863716972a23b12226b16af533a068 100644 (file)
@@ -757,7 +757,7 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
        WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
 
        /* FIXME: upper bound must not overflow 32 bits  */
-       WARN_ON((start + length) >= (1ULL << 32));
+       WARN_ON((start + length) > (1ULL << 32));
 
        gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
                if (pd)
@@ -952,6 +952,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
        ppgtt->base.start = 0;
        ppgtt->base.total = 1ULL << 32;
+       if (IS_ENABLED(CONFIG_X86_32))
+               /* While we have a proliferation of size_t variables
+                * we cannot represent the full ppgtt size on 32bit,
+                * so limit it to the same size as the GGTT (currently
+                * 2GiB).
+                */
+               ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total;
        ppgtt->base.cleanup = gen8_ppgtt_cleanup;
        ppgtt->base.allocate_va_range = gen8_alloc_va_range;
        ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
index 4039ede158be237cb34c165634e5d471159dc710..1f4e5a32a16e568cae0cc340c39482d13af7ef41 100644 (file)
@@ -219,11 +219,14 @@ i915_mmu_notifier_add(struct drm_device *dev,
                      struct i915_mmu_object *mo)
 {
        struct interval_tree_node *it;
-       int ret;
+       int ret = 0;
 
-       ret = i915_mutex_lock_interruptible(dev);
-       if (ret)
-               return ret;
+       /* By this point we have already done a lot of expensive setup that
+        * we do not want to repeat just because the caller (e.g. X) has a
+        * signal pending (and partly because of that expensive setup, X
+        * using an interrupt timer is likely to get stuck in an EINTR loop).
+        */
+       mutex_lock(&dev->struct_mutex);
 
        /* Make sure we drop the final active reference (and thereby
         * remove the objects from the interval tree) before we do
index a3e330d2a1d8531eb1ee7c6fb18aae81a229d4fc..6f4256918f7694804f992cd2466db250d7490ad0 100644 (file)
@@ -192,15 +192,20 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
                                struct drm_i915_error_buffer *err,
                                int count)
 {
+       int i;
+
        err_printf(m, "  %s [%d]:\n", name, count);
 
        while (count--) {
-               err_printf(m, "    %08x %8u %02x %02x %x %x",
+               err_printf(m, "    %08x %8u %02x %02x ",
                           err->gtt_offset,
                           err->size,
                           err->read_domains,
-                          err->write_domain,
-                          err->rseqno, err->wseqno);
+                          err->write_domain);
+               for (i = 0; i < I915_NUM_RINGS; i++)
+                       err_printf(m, "%02x ", err->rseqno[i]);
+
+               err_printf(m, "] %02x", err->wseqno);
                err_puts(m, pin_flag(err->pinned));
                err_puts(m, tiling_flag(err->tiling));
                err_puts(m, dirty_flag(err->dirty));
@@ -681,10 +686,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
                       struct i915_vma *vma)
 {
        struct drm_i915_gem_object *obj = vma->obj;
+       int i;
 
        err->size = obj->base.size;
        err->name = obj->base.name;
-       err->rseqno = i915_gem_request_get_seqno(obj->last_read_req);
+       for (i = 0; i < I915_NUM_RINGS; i++)
+               err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]);
        err->wseqno = i915_gem_request_get_seqno(obj->last_write_req);
        err->gtt_offset = vma->node.start;
        err->read_domains = obj->base.read_domains;
@@ -697,8 +704,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
        err->dirty = obj->dirty;
        err->purgeable = obj->madv != I915_MADV_WILLNEED;
        err->userptr = obj->userptr.mm != NULL;
-       err->ring = obj->last_read_req ?
-                       i915_gem_request_get_ring(obj->last_read_req)->id : -1;
+       err->ring = obj->last_write_req ?
+                       i915_gem_request_get_ring(obj->last_write_req)->id : -1;
        err->cache_level = obj->cache_level;
 }
 
index 9da955e4f3554208f59a15b6ae6b4fc69b9be316..e6bb72dca3ffb15ae732aac0291dc13806eaba92 100644 (file)
@@ -79,7 +79,7 @@ static const u32 hpd_status_g4x[HPD_NUM_PINS] = {
        [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS
 };
 
-static const u32 hpd_status_i915[HPD_NUM_PINS] = { /* i915 and valleyview are the same */
+static const u32 hpd_status_i915[HPD_NUM_PINS] = {
        [HPD_CRT] = CRT_HOTPLUG_INT_STATUS,
        [HPD_SDVO_B] = SDVOB_HOTPLUG_INT_STATUS_I915,
        [HPD_SDVO_C] = SDVOC_HOTPLUG_INT_STATUS_I915,
@@ -1070,12 +1070,25 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
        return events;
 }
 
+static bool any_waiters(struct drm_i915_private *dev_priv)
+{
+       struct intel_engine_cs *ring;
+       int i;
+
+       for_each_ring(ring, dev_priv, i)
+               if (ring->irq_refcount)
+                       return true;
+
+       return false;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
        struct drm_i915_private *dev_priv =
                container_of(work, struct drm_i915_private, rps.work);
+       bool client_boost;
+       int new_delay, adj, min, max;
        u32 pm_iir;
-       int new_delay, adj;
 
        spin_lock_irq(&dev_priv->irq_lock);
        /* Speed up work cancelation during disabling rps interrupts. */
@@ -1087,12 +1100,14 @@ static void gen6_pm_rps_work(struct work_struct *work)
        dev_priv->rps.pm_iir = 0;
        /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
        gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+       client_boost = dev_priv->rps.client_boost;
+       dev_priv->rps.client_boost = false;
        spin_unlock_irq(&dev_priv->irq_lock);
 
        /* Make sure we didn't queue anything we're not going to process. */
        WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
 
-       if ((pm_iir & dev_priv->pm_rps_events) == 0)
+       if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
                return;
 
        mutex_lock(&dev_priv->rps.hw_lock);
@@ -1101,7 +1116,13 @@ static void gen6_pm_rps_work(struct work_struct *work)
 
        adj = dev_priv->rps.last_adj;
        new_delay = dev_priv->rps.cur_freq;
-       if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+       min = dev_priv->rps.min_freq_softlimit;
+       max = dev_priv->rps.max_freq_softlimit;
+
+       if (client_boost) {
+               new_delay = dev_priv->rps.max_freq_softlimit;
+               adj = 0;
+       } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
                if (adj > 0)
                        adj *= 2;
                else /* CHV needs even encode values */
@@ -1114,6 +1135,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
                        new_delay = dev_priv->rps.efficient_freq;
                        adj = 0;
                }
+       } else if (any_waiters(dev_priv)) {
+               adj = 0;
        } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
                if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
                        new_delay = dev_priv->rps.efficient_freq;
@@ -1135,9 +1158,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
         * interrupt
         */
        new_delay += adj;
-       new_delay = clamp_t(int, new_delay,
-                           dev_priv->rps.min_freq_softlimit,
-                           dev_priv->rps.max_freq_softlimit);
+       new_delay = clamp_t(int, new_delay, min, max);
 
        intel_set_rps(dev_priv->dev, new_delay);
 
@@ -1386,7 +1407,7 @@ static int i915_port_to_hotplug_shift(enum port port)
        }
 }
 
-static inline enum port get_port_from_pin(enum hpd_pin pin)
+static enum port get_port_from_pin(enum hpd_pin pin)
 {
        switch (pin) {
        case HPD_PORT_B:
@@ -1400,10 +1421,10 @@ static inline enum port get_port_from_pin(enum hpd_pin pin)
        }
 }
 
-static inline void intel_hpd_irq_handler(struct drm_device *dev,
-                                        u32 hotplug_trigger,
-                                        u32 dig_hotplug_reg,
-                                        const u32 hpd[HPD_NUM_PINS])
+static void intel_hpd_irq_handler(struct drm_device *dev,
+                                 u32 hotplug_trigger,
+                                 u32 dig_hotplug_reg,
+                                 const u32 hpd[HPD_NUM_PINS])
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        int i;
@@ -1743,7 +1764,7 @@ static void i9xx_hpd_irq_handler(struct drm_device *dev)
                 */
                POSTING_READ(PORT_HOTPLUG_STAT);
 
-               if (IS_G4X(dev)) {
+               if (IS_G4X(dev) || IS_VALLEYVIEW(dev)) {
                        u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X;
 
                        intel_hpd_irq_handler(dev, hotplug_trigger, 0, hpd_status_g4x);
index 58627a3194160c302926cb135eff7c379a316e7d..6d3fead3a3586b2779738d72f7305be6ad2a087f 100644 (file)
@@ -595,10 +595,6 @@ enum punit_power_well {
        PUNIT_POWER_WELL_DPIO_RX0               = 10,
        PUNIT_POWER_WELL_DPIO_RX1               = 11,
        PUNIT_POWER_WELL_DPIO_CMN_D             = 12,
-       /* FIXME: guesswork below */
-       PUNIT_POWER_WELL_DPIO_TX_D_LANES_01     = 13,
-       PUNIT_POWER_WELL_DPIO_TX_D_LANES_23     = 14,
-       PUNIT_POWER_WELL_DPIO_RX2               = 15,
 
        PUNIT_POWER_WELL_NUM,
 };
@@ -1204,6 +1200,12 @@ enum skl_disp_power_wells {
 #define   PORT_PLL_GAIN_CTL(x)         ((x)  << 16)
 /* PORT_PLL_8_A */
 #define   PORT_PLL_TARGET_CNT_MASK     0x3FF
+/* PORT_PLL_9_A */
+#define  PORT_PLL_LOCK_THRESHOLD_MASK  0xe
+/* PORT_PLL_10_A */
+#define  PORT_PLL_DCO_AMP_OVR_EN_H     (1<<27)
+#define  PORT_PLL_DCO_AMP_MASK         0x3c00
+#define  PORT_PLL_DCO_AMP(x)           (x<<10)
 #define _PORT_PLL_BASE(port)           _PORT3(port, _PORT_PLL_0_A,     \
                                                _PORT_PLL_0_B,          \
                                                _PORT_PLL_0_C)
@@ -1455,6 +1457,8 @@ enum skl_disp_power_wells {
 #define RING_HWS_PGA(base)     ((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)        ((base)+0x2080)
 
+#define HSW_GTT_CACHE_EN       0x4024
+#define   GTT_CACHE_EN_ALL     0xF0007FFF
 #define GEN7_WR_WATERMARK      0x4028
 #define GEN7_GFX_PRIO_CTRL     0x402C
 #define ARB_MODE               0x4030
@@ -2137,6 +2141,10 @@ enum skl_disp_power_wells {
 #define DPIO_PHY_STATUS                        (VLV_DISPLAY_BASE + 0x6240)
 #define   DPLL_PORTD_READY_MASK                (0xf)
 #define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100)
+#define   PHY_LDO_DELAY_0NS                    0x0
+#define   PHY_LDO_DELAY_200NS                  0x1
+#define   PHY_LDO_DELAY_600NS                  0x2
+#define   PHY_LDO_SEQ_DELAY(delay, phy)                ((delay) << (2*(phy)+23))
 #define   PHY_CH_SU_PSR                                0x1
 #define   PHY_CH_DEEP_PSR                      0x7
 #define   PHY_CH_POWER_MODE(mode, phy, ch)     ((mode) << (6*(phy)+3*(ch)+2))
@@ -5167,6 +5175,8 @@ enum skl_disp_power_wells {
 #define _PLANE_KEYMAX_2_A                      0x702a0
 #define _PLANE_BUF_CFG_1_A                     0x7027c
 #define _PLANE_BUF_CFG_2_A                     0x7037c
+#define _PLANE_NV12_BUF_CFG_1_A                0x70278
+#define _PLANE_NV12_BUF_CFG_2_A                0x70378
 
 #define _PLANE_CTL_1_B                         0x71180
 #define _PLANE_CTL_2_B                         0x71280
@@ -5253,6 +5263,15 @@ enum skl_disp_power_wells {
 #define PLANE_BUF_CFG(pipe, plane)     \
        _PLANE(plane, _PLANE_BUF_CFG_1(pipe), _PLANE_BUF_CFG_2(pipe))
 
+#define _PLANE_NV12_BUF_CFG_1_B                0x71278
+#define _PLANE_NV12_BUF_CFG_2_B                0x71378
+#define _PLANE_NV12_BUF_CFG_1(pipe)    \
+       _PIPE(pipe, _PLANE_NV12_BUF_CFG_1_A, _PLANE_NV12_BUF_CFG_1_B)
+#define _PLANE_NV12_BUF_CFG_2(pipe)    \
+       _PIPE(pipe, _PLANE_NV12_BUF_CFG_2_A, _PLANE_NV12_BUF_CFG_2_B)
+#define PLANE_NV12_BUF_CFG(pipe, plane)        \
+       _PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe))
+
 /* SKL new cursor registers */
 #define _CUR_BUF_CFG_A                         0x7017c
 #define _CUR_BUF_CFG_B                         0x7117c
@@ -5774,6 +5793,7 @@ enum skl_disp_power_wells {
 
 /* GEN8 chicken */
 #define HDC_CHICKEN0                           0x7300
+#define  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE        (1<<15)
 #define  HDC_FENCE_DEST_SLM_DISABLE            (1<<14)
 #define  HDC_DONOT_FETCH_MEM_WHEN_MASKED       (1<<11)
 #define  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT   (1<<5)
@@ -6422,6 +6442,7 @@ enum skl_disp_power_wells {
 #define  TRANS_DP_PORT_SEL_D   (2<<29)
 #define  TRANS_DP_PORT_SEL_NONE        (3<<29)
 #define  TRANS_DP_PORT_SEL_MASK        (3<<29)
+#define  TRANS_DP_PIPE_TO_PORT(val)    ((((val) & TRANS_DP_PORT_SEL_MASK) >> 29) + PORT_B)
 #define  TRANS_DP_AUDIO_ONLY   (1<<26)
 #define  TRANS_DP_ENH_FRAMING  (1<<18)
 #define  TRANS_DP_8BPC         (0<<9)
@@ -6681,6 +6702,9 @@ enum skl_disp_power_wells {
 #define     GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT   8
 #define     GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT   16
 #define     GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT   24
+#define   SKL_PCODE_CDCLK_CONTROL              0x7
+#define     SKL_CDCLK_PREPARE_FOR_CHANGE       0x3
+#define     SKL_CDCLK_READY_FOR_CHANGE         0x1
 #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE      0x8
 #define   GEN6_PCODE_READ_MIN_FREQ_TABLE       0x9
 #define   GEN6_READ_OC_PARAMS                  0xc
index cee596d0a6a21cb6513b046b3723c61ea7e86a62..198fc3c3291b2ac05540ea36ef853c9826b23efa 100644 (file)
 
 static int panel_type;
 
-static void *
-find_section(struct bdb_header *bdb, int section_id)
+static const void *
+find_section(const void *_bdb, int section_id)
 {
-       u8 *base = (u8 *)bdb;
+       const struct bdb_header *bdb = _bdb;
+       const u8 *base = _bdb;
        int index = 0;
        u16 total, current_size;
        u8 current_id;
@@ -53,7 +54,7 @@ find_section(struct bdb_header *bdb, int section_id)
                current_id = *(base + index);
                index++;
 
-               current_size = *((u16 *)(base + index));
+               current_size = *((const u16 *)(base + index));
                index += 2;
 
                if (index + current_size > total)
@@ -69,7 +70,7 @@ find_section(struct bdb_header *bdb, int section_id)
 }
 
 static u16
-get_blocksize(void *p)
+get_blocksize(const void *p)
 {
        u16 *block_ptr, block_size;
 
@@ -204,7 +205,7 @@ get_lvds_fp_timing(const struct bdb_header *bdb,
 /* Try to find integrated panel data */
 static void
 parse_lfp_panel_data(struct drm_i915_private *dev_priv,
-                           struct bdb_header *bdb)
+                    const struct bdb_header *bdb)
 {
        const struct bdb_lvds_options *lvds_options;
        const struct bdb_lvds_lfp_data *lvds_lfp_data;
@@ -310,7 +311,8 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv,
 }
 
 static void
-parse_lfp_backlight(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_lfp_backlight(struct drm_i915_private *dev_priv,
+                   const struct bdb_header *bdb)
 {
        const struct bdb_lfp_backlight_data *backlight_data;
        const struct bdb_lfp_backlight_data_entry *entry;
@@ -348,9 +350,9 @@ parse_lfp_backlight(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
 /* Try to find sdvo panel data */
 static void
 parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
-                     struct bdb_header *bdb)
+                     const struct bdb_header *bdb)
 {
-       struct lvds_dvo_timing *dvo_timing;
+       const struct lvds_dvo_timing *dvo_timing;
        struct drm_display_mode *panel_fixed_mode;
        int index;
 
@@ -361,7 +363,7 @@ parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
        }
 
        if (index == -1) {
-               struct bdb_sdvo_lvds_options *sdvo_lvds_options;
+               const struct bdb_sdvo_lvds_options *sdvo_lvds_options;
 
                sdvo_lvds_options = find_section(bdb, BDB_SDVO_LVDS_OPTIONS);
                if (!sdvo_lvds_options)
@@ -402,10 +404,10 @@ static int intel_bios_ssc_frequency(struct drm_device *dev,
 
 static void
 parse_general_features(struct drm_i915_private *dev_priv,
-                      struct bdb_header *bdb)
+                      const struct bdb_header *bdb)
 {
        struct drm_device *dev = dev_priv->dev;
-       struct bdb_general_features *general;
+       const struct bdb_general_features *general;
 
        general = find_section(bdb, BDB_GENERAL_FEATURES);
        if (general) {
@@ -428,9 +430,9 @@ parse_general_features(struct drm_i915_private *dev_priv,
 
 static void
 parse_general_definitions(struct drm_i915_private *dev_priv,
-                         struct bdb_header *bdb)
+                         const struct bdb_header *bdb)
 {
-       struct bdb_general_definitions *general;
+       const struct bdb_general_definitions *general;
 
        general = find_section(bdb, BDB_GENERAL_DEFINITIONS);
        if (general) {
@@ -447,19 +449,19 @@ parse_general_definitions(struct drm_i915_private *dev_priv,
        }
 }
 
-static union child_device_config *
-child_device_ptr(struct bdb_general_definitions *p_defs, int i)
+static const union child_device_config *
+child_device_ptr(const struct bdb_general_definitions *p_defs, int i)
 {
-       return (void *) &p_defs->devices[i * p_defs->child_dev_size];
+       return (const void *) &p_defs->devices[i * p_defs->child_dev_size];
 }
 
 static void
 parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
-                         struct bdb_header *bdb)
+                         const struct bdb_header *bdb)
 {
        struct sdvo_device_mapping *p_mapping;
-       struct bdb_general_definitions *p_defs;
-       union child_device_config *p_child;
+       const struct bdb_general_definitions *p_defs;
+       const union child_device_config *p_child;
        int i, child_device_num, count;
        u16     block_size;
 
@@ -545,9 +547,9 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
 
 static void
 parse_driver_features(struct drm_i915_private *dev_priv,
-                      struct bdb_header *bdb)
+                     const struct bdb_header *bdb)
 {
-       struct bdb_driver_features *driver;
+       const struct bdb_driver_features *driver;
 
        driver = find_section(bdb, BDB_DRIVER_FEATURES);
        if (!driver)
@@ -571,11 +573,11 @@ parse_driver_features(struct drm_i915_private *dev_priv,
 }
 
 static void
-parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_edp(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
 {
-       struct bdb_edp *edp;
-       struct edp_power_seq *edp_pps;
-       struct edp_link_params *edp_link_params;
+       const struct bdb_edp *edp;
+       const struct edp_power_seq *edp_pps;
+       const struct edp_link_params *edp_link_params;
 
        edp = find_section(bdb, BDB_EDP);
        if (!edp) {
@@ -683,10 +685,10 @@ parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
 }
 
 static void
-parse_psr(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
 {
-       struct bdb_psr *psr;
-       struct psr_table *psr_table;
+       const struct bdb_psr *psr;
+       const struct psr_table *psr_table;
 
        psr = find_section(bdb, BDB_PSR);
        if (!psr) {
@@ -794,13 +796,14 @@ static u8 *goto_next_sequence(u8 *data, int *size)
 }
 
 static void
-parse_mipi(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_mipi(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
 {
-       struct bdb_mipi_config *start;
-       struct bdb_mipi_sequence *sequence;
-       struct mipi_config *config;
-       struct mipi_pps_data *pps;
-       u8 *data, *seq_data;
+       const struct bdb_mipi_config *start;
+       const struct bdb_mipi_sequence *sequence;
+       const struct mipi_config *config;
+       const struct mipi_pps_data *pps;
+       u8 *data;
+       const u8 *seq_data;
        int i, panel_id, seq_size;
        u16 block_size;
 
@@ -944,7 +947,7 @@ err:
 }
 
 static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
-                          struct bdb_header *bdb)
+                          const struct bdb_header *bdb)
 {
        union child_device_config *it, *child = NULL;
        struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port];
@@ -1046,7 +1049,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
 }
 
 static void parse_ddi_ports(struct drm_i915_private *dev_priv,
-                           struct bdb_header *bdb)
+                           const struct bdb_header *bdb)
 {
        struct drm_device *dev = dev_priv->dev;
        enum port port;
@@ -1066,10 +1069,11 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv,
 
 static void
 parse_device_mapping(struct drm_i915_private *dev_priv,
-                      struct bdb_header *bdb)
+                    const struct bdb_header *bdb)
 {
-       struct bdb_general_definitions *p_defs;
-       union child_device_config *p_child, *child_dev_ptr;
+       const struct bdb_general_definitions *p_defs;
+       const union child_device_config *p_child;
+       union child_device_config *child_dev_ptr;
        int i, child_device_num, count;
        u16     block_size;
 
@@ -1126,8 +1130,7 @@ parse_device_mapping(struct drm_i915_private *dev_priv,
 
                child_dev_ptr = dev_priv->vbt.child_dev + count;
                count++;
-               memcpy((void *)child_dev_ptr, (void *)p_child,
-                                       sizeof(*p_child));
+               memcpy(child_dev_ptr, p_child, sizeof(*p_child));
        }
        return;
 }
@@ -1196,19 +1199,22 @@ static const struct dmi_system_id intel_no_opregion_vbt[] = {
        { }
 };
 
-static struct bdb_header *validate_vbt(char *base, size_t size,
-                                      struct vbt_header *vbt,
-                                      const char *source)
+static const struct bdb_header *validate_vbt(const void __iomem *_base,
+                                            size_t size,
+                                            const void __iomem *_vbt,
+                                            const char *source)
 {
-       size_t offset;
-       struct bdb_header *bdb;
-
-       if (vbt == NULL) {
-               DRM_DEBUG_DRIVER("VBT signature missing\n");
-               return NULL;
-       }
+       /*
+        * This is the one place where we explicitly discard the address space
+        * (__iomem) of the BIOS/VBT. (And this will cause a sparse complaint.)
+        * From now on everything is based on 'base', and treated as regular
+        * memory.
+        */
+       const void *base = (const void *) _base;
+       size_t offset = _vbt - _base;
+       const struct vbt_header *vbt = base + offset;
+       const struct bdb_header *bdb;
 
-       offset = (char *)vbt - base;
        if (offset + sizeof(struct vbt_header) > size) {
                DRM_DEBUG_DRIVER("VBT header incomplete\n");
                return NULL;
@@ -1225,7 +1231,7 @@ static struct bdb_header *validate_vbt(char *base, size_t size,
                return NULL;
        }
 
-       bdb = (struct bdb_header *)(base + offset);
+       bdb = base + offset;
        if (offset + bdb->bdb_size > size) {
                DRM_DEBUG_DRIVER("BDB incomplete\n");
                return NULL;
@@ -1236,6 +1242,22 @@ static struct bdb_header *validate_vbt(char *base, size_t size,
        return bdb;
 }
 
+static const struct bdb_header *find_vbt(void __iomem *bios, size_t size)
+{
+       const struct bdb_header *bdb = NULL;
+       size_t i;
+
+       /* Scour memory looking for the VBT signature. */
+       for (i = 0; i + 4 < size; i++) {
+               if (ioread32(bios + i) == *((const u32 *) "$VBT")) {
+                       bdb = validate_vbt(bios, size, bios + i, "PCI ROM");
+                       break;
+               }
+       }
+
+       return bdb;
+}
+
 /**
  * intel_parse_bios - find VBT and initialize settings from the BIOS
  * @dev: DRM device
@@ -1250,7 +1272,7 @@ intel_parse_bios(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct pci_dev *pdev = dev->pdev;
-       struct bdb_header *bdb = NULL;
+       const struct bdb_header *bdb = NULL;
        u8 __iomem *bios = NULL;
 
        if (HAS_PCH_NOP(dev))
@@ -1260,27 +1282,17 @@ intel_parse_bios(struct drm_device *dev)
 
        /* XXX Should this validation be moved to intel_opregion.c? */
        if (!dmi_check_system(intel_no_opregion_vbt) && dev_priv->opregion.vbt)
-               bdb = validate_vbt((char *)dev_priv->opregion.header, OPREGION_SIZE,
-                                  (struct vbt_header *)dev_priv->opregion.vbt,
-                                  "OpRegion");
+               bdb = validate_vbt(dev_priv->opregion.header, OPREGION_SIZE,
+                                  dev_priv->opregion.vbt, "OpRegion");
 
        if (bdb == NULL) {
-               size_t i, size;
+               size_t size;
 
                bios = pci_map_rom(pdev, &size);
                if (!bios)
                        return -1;
 
-               /* Scour memory looking for the VBT signature */
-               for (i = 0; i + 4 < size; i++) {
-                       if (memcmp(bios + i, "$VBT", 4) == 0) {
-                               bdb = validate_vbt(bios, size,
-                                                  (struct vbt_header *)(bios + i),
-                                                  "PCI ROM");
-                               break;
-                       }
-               }
-
+               bdb = find_vbt(bios, size);
                if (!bdb) {
                        pci_unmap_rom(pdev, bios);
                        return -1;
index 93bb5159d093504c3a9ca2f36ed6320895f01c45..521af2c069cb6aed90e379501d3fbc39c24df6e9 100644 (file)
@@ -207,6 +207,14 @@ static void intel_disable_crt(struct intel_encoder *encoder)
        intel_crt_set_dpms(encoder, DRM_MODE_DPMS_OFF);
 }
 
+static void pch_disable_crt(struct intel_encoder *encoder)
+{
+}
+
+static void pch_post_disable_crt(struct intel_encoder *encoder)
+{
+       intel_disable_crt(encoder);
+}
 
 static void hsw_crt_post_disable(struct intel_encoder *encoder)
 {
@@ -888,7 +896,12 @@ void intel_crt_init(struct drm_device *dev)
                crt->adpa_reg = ADPA;
 
        crt->base.compute_config = intel_crt_compute_config;
-       crt->base.disable = intel_disable_crt;
+       if (HAS_PCH_SPLIT(dev) && !HAS_DDI(dev)) {
+               crt->base.disable = pch_disable_crt;
+               crt->base.post_disable = pch_post_disable_crt;
+       } else {
+               crt->base.disable = intel_disable_crt;
+       }
        crt->base.enable = intel_enable_crt;
        if (I915_HAS_HOTPLUG(dev))
                crt->base.hpd_pin = HPD_CRT;
index 9311cddb86e670a0a23f265839c5802a84462239..5cb8cc18994a37528bef8e1b180b23356dd11509 100644 (file)
 #include "i915_drv.h"
 #include "i915_reg.h"
 
+/**
+ * DOC: csr support for dmc
+ *
+ * Display Context Save and Restore (CSR) firmware support added from gen9
+ * onwards to drive newly added DMC (Display microcontroller) in display
+ * engine to save and restore the state of display engine when it enter into
+ * low-power state and comes back to normal.
+ *
+ * Firmware loading status will be one of the below states: FW_UNINITIALIZED,
+ * FW_LOADED, FW_FAILED.
+ *
+ * Once the firmware is written into the registers status will be moved from
+ * FW_UNINITIALIZED to FW_LOADED and for any erroneous condition status will
+ * be moved to FW_FAILED.
+ */
+
 #define I915_CSR_SKL "i915/skl_dmc_ver4.bin"
 
 MODULE_FIRMWARE(I915_CSR_SKL);
@@ -183,6 +199,14 @@ static char intel_get_substepping(struct drm_device *dev)
                return -ENODATA;
 }
 
+/**
+ * intel_csr_load_status_get() - to get firmware loading status.
+ * @dev_priv: i915 device.
+ *
+ * This function helps to get the firmware loading status.
+ *
+ * Return: Firmware loading status.
+ */
 enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv)
 {
        enum csr_state state;
@@ -194,6 +218,13 @@ enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv)
        return state;
 }
 
+/**
+ * intel_csr_load_status_set() - help to set firmware loading status.
+ * @dev_priv: i915 device.
+ * @state: enumeration of firmware loading status.
+ *
+ * Set the firmware loading status.
+ */
 void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
                        enum csr_state state)
 {
@@ -202,6 +233,14 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
        mutex_unlock(&dev_priv->csr_lock);
 }
 
+/**
+ * intel_csr_load_program() - write the firmware from memory to register.
+ * @dev: drm device.
+ *
+ * CSR firmware is read from a .bin file and kept in internal memory one time.
+ * Everytime display comes back from low power state this function is called to
+ * copy the firmware from internal memory to registers.
+ */
 void intel_csr_load_program(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -359,6 +398,13 @@ out:
        release_firmware(fw);
 }
 
+/**
+ * intel_csr_ucode_init() - initialize the firmware loading.
+ * @dev: drm device.
+ *
+ * This function is called at the time of loading the display driver to read
+ * firmware from a .bin file and copied into a internal memory.
+ */
 void intel_csr_ucode_init(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -393,6 +439,13 @@ void intel_csr_ucode_init(struct drm_device *dev)
        }
 }
 
+/**
+ * intel_csr_ucode_fini() - unload the CSR firmware.
+ * @dev: drm device.
+ *
+ * Firmmware unloading includes freeing the internal momory and reset the
+ * firmware loading status.
+ */
 void intel_csr_ucode_fini(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
index 807e15d41a1bd8eebd73f29a3f8ad43ef46f3398..cacb07b7a8f108a84a209223c4d7604d1c462a13 100644 (file)
@@ -1087,6 +1087,9 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
                      WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) |
                      WRPLL_DIVIDER_POST(p);
 
+               memset(&crtc_state->dpll_hw_state, 0,
+                      sizeof(crtc_state->dpll_hw_state));
+
                crtc_state->dpll_hw_state.wrpll = val;
 
                pll = intel_get_shared_dpll(intel_crtc, crtc_state);
@@ -1309,6 +1312,9 @@ skl_ddi_pll_select(struct intel_crtc *intel_crtc,
        } else /* eDP */
                return true;
 
+       memset(&crtc_state->dpll_hw_state, 0,
+              sizeof(crtc_state->dpll_hw_state));
+
        crtc_state->dpll_hw_state.ctrl1 = ctrl1;
        crtc_state->dpll_hw_state.cfgcr1 = cfgcr1;
        crtc_state->dpll_hw_state.cfgcr2 = cfgcr2;
@@ -1334,22 +1340,17 @@ struct bxt_clk_div {
        uint32_t m2_frac;
        bool m2_frac_en;
        uint32_t n;
-       uint32_t prop_coef;
-       uint32_t int_coef;
-       uint32_t gain_ctl;
-       uint32_t targ_cnt;
-       uint32_t lanestagger;
 };
 
 /* pre-calculated values for DP linkrates */
 static struct bxt_clk_div bxt_dp_clk_val[7] = {
-       /* 162 */ {4, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
-       /* 270 */ {4, 1, 27,       0, 0, 1, 3,  8, 1, 9, 0xd},
-       /* 540 */ {2, 1, 27,       0, 0, 1, 3,  8, 1, 9, 0x18},
-       /* 216 */ {3, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
-       /* 243 */ {4, 1, 24, 1258291, 1, 1, 5, 11, 2, 9, 0xd},
-       /* 324 */ {4, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
-       /* 432 */ {3, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0x18}
+       /* 162 */ {4, 2, 32, 1677722, 1, 1},
+       /* 270 */ {4, 1, 27,       0, 0, 1},
+       /* 540 */ {2, 1, 27,       0, 0, 1},
+       /* 216 */ {3, 2, 32, 1677722, 1, 1},
+       /* 243 */ {4, 1, 24, 1258291, 1, 1},
+       /* 324 */ {4, 1, 32, 1677722, 1, 1},
+       /* 432 */ {3, 1, 32, 1677722, 1, 1}
 };
 
 static bool
@@ -1360,6 +1361,9 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
 {
        struct intel_shared_dpll *pll;
        struct bxt_clk_div clk_div = {0};
+       int vco = 0;
+       uint32_t prop_coef, int_coef, gain_ctl, targ_cnt;
+       uint32_t dcoampovr_en_h, dco_amp, lanestagger;
 
        if (intel_encoder->type == INTEL_OUTPUT_HDMI) {
                intel_clock_t best_clock;
@@ -1383,21 +1387,7 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
                clk_div.m2_frac = best_clock.m2 & ((1 << 22) - 1);
                clk_div.m2_frac_en = clk_div.m2_frac != 0;
 
-               /* FIXME: set coef, gain, targcnt based on freq band */
-               clk_div.prop_coef = 5;
-               clk_div.int_coef = 11;
-               clk_div.gain_ctl = 2;
-               clk_div.targ_cnt = 9;
-               if (clock > 270000)
-                       clk_div.lanestagger = 0x18;
-               else if (clock > 135000)
-                       clk_div.lanestagger = 0x0d;
-               else if (clock > 67000)
-                       clk_div.lanestagger = 0x07;
-               else if (clock > 33000)
-                       clk_div.lanestagger = 0x04;
-               else
-                       clk_div.lanestagger = 0x02;
+               vco = best_clock.vco;
        } else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
                        intel_encoder->type == INTEL_OUTPUT_EDP) {
                struct drm_encoder *encoder = &intel_encoder->base;
@@ -1417,8 +1407,48 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
                        clk_div = bxt_dp_clk_val[0];
                        DRM_ERROR("Unknown link rate\n");
                }
+               vco = clock * 10 / 2 * clk_div.p1 * clk_div.p2;
+       }
+
+       dco_amp = 15;
+       dcoampovr_en_h = 0;
+       if (vco >= 6200000 && vco <= 6480000) {
+               prop_coef = 4;
+               int_coef = 9;
+               gain_ctl = 3;
+               targ_cnt = 8;
+       } else if ((vco > 5400000 && vco < 6200000) ||
+                       (vco >= 4800000 && vco < 5400000)) {
+               prop_coef = 5;
+               int_coef = 11;
+               gain_ctl = 3;
+               targ_cnt = 9;
+               if (vco >= 4800000 && vco < 5400000)
+                       dcoampovr_en_h = 1;
+       } else if (vco == 5400000) {
+               prop_coef = 3;
+               int_coef = 8;
+               gain_ctl = 1;
+               targ_cnt = 9;
+       } else {
+               DRM_ERROR("Invalid VCO\n");
+               return false;
        }
 
+       memset(&crtc_state->dpll_hw_state, 0,
+              sizeof(crtc_state->dpll_hw_state));
+
+       if (clock > 270000)
+               lanestagger = 0x18;
+       else if (clock > 135000)
+               lanestagger = 0x0d;
+       else if (clock > 67000)
+               lanestagger = 0x07;
+       else if (clock > 33000)
+               lanestagger = 0x04;
+       else
+               lanestagger = 0x02;
+
        crtc_state->dpll_hw_state.ebb0 =
                PORT_PLL_P1(clk_div.p1) | PORT_PLL_P2(clk_div.p2);
        crtc_state->dpll_hw_state.pll0 = clk_div.m2_int;
@@ -1430,14 +1460,19 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
                        PORT_PLL_M2_FRAC_ENABLE;
 
        crtc_state->dpll_hw_state.pll6 =
-               clk_div.prop_coef | PORT_PLL_INT_COEFF(clk_div.int_coef);
+               prop_coef | PORT_PLL_INT_COEFF(int_coef);
        crtc_state->dpll_hw_state.pll6 |=
-               PORT_PLL_GAIN_CTL(clk_div.gain_ctl);
+               PORT_PLL_GAIN_CTL(gain_ctl);
+
+       crtc_state->dpll_hw_state.pll8 = targ_cnt;
 
-       crtc_state->dpll_hw_state.pll8 = clk_div.targ_cnt;
+       if (dcoampovr_en_h)
+               crtc_state->dpll_hw_state.pll10 = PORT_PLL_DCO_AMP_OVR_EN_H;
+
+       crtc_state->dpll_hw_state.pll10 |= PORT_PLL_DCO_AMP(dco_amp);
 
        crtc_state->dpll_hw_state.pcsdw12 =
-               LANESTAGGER_STRAP_OVRD | clk_div.lanestagger;
+               LANESTAGGER_STRAP_OVRD | lanestagger;
 
        pll = intel_get_shared_dpll(intel_crtc, crtc_state);
        if (pll == NULL) {
@@ -2367,10 +2402,16 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv,
        temp |= pll->config.hw_state.pll8;
        I915_WRITE(BXT_PORT_PLL(port, 8), temp);
 
-       /*
-        * FIXME: program PORT_PLL_9/i_lockthresh according to the latest
-        * specification update.
-        */
+       temp = I915_READ(BXT_PORT_PLL(port, 9));
+       temp &= ~PORT_PLL_LOCK_THRESHOLD_MASK;
+       temp |= (5 << 1);
+       I915_WRITE(BXT_PORT_PLL(port, 9), temp);
+
+       temp = I915_READ(BXT_PORT_PLL(port, 10));
+       temp &= ~PORT_PLL_DCO_AMP_OVR_EN_H;
+       temp &= ~PORT_PLL_DCO_AMP_MASK;
+       temp |= pll->config.hw_state.pll10;
+       I915_WRITE(BXT_PORT_PLL(port, 10), temp);
 
        /* Recalibrate with new settings */
        temp = I915_READ(BXT_PORT_PLL_EBB_4(port));
@@ -2434,6 +2475,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
        hw_state->pll3 = I915_READ(BXT_PORT_PLL(port, 3));
        hw_state->pll6 = I915_READ(BXT_PORT_PLL(port, 6));
        hw_state->pll8 = I915_READ(BXT_PORT_PLL(port, 8));
+       hw_state->pll10 = I915_READ(BXT_PORT_PLL(port, 10));
        /*
         * While we write to the group register to program all lanes at once we
         * can read only lane registers. We configure all lanes the same way, so
@@ -2468,6 +2510,7 @@ void intel_ddi_pll_init(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        uint32_t val = I915_READ(LCPLL_CTL);
+       int cdclk_freq;
 
        if (IS_SKYLAKE(dev))
                skl_shared_dplls_init(dev_priv);
@@ -2476,12 +2519,15 @@ void intel_ddi_pll_init(struct drm_device *dev)
        else
                hsw_shared_dplls_init(dev_priv);
 
-       DRM_DEBUG_KMS("CDCLK running at %dKHz\n",
-                     dev_priv->display.get_display_clock_speed(dev));
+       cdclk_freq = dev_priv->display.get_display_clock_speed(dev);
+       DRM_DEBUG_KMS("CDCLK running at %dKHz\n", cdclk_freq);
 
        if (IS_SKYLAKE(dev)) {
+               dev_priv->skl_boot_cdclk = cdclk_freq;
                if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE))
                        DRM_ERROR("LCPLL1 is disabled\n");
+               else
+                       intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
        } else if (IS_BROXTON(dev)) {
                broxton_init_cdclk(dev);
                broxton_ddi_phy_init(dev);
index 240092afc3928bdb81a81f2781fb913080154415..4e3f302d86f790b27f0f6d8af3f13cdcf93d078e 100644 (file)
 #include <drm/drm_rect.h>
 #include <linux/dma_remapping.h>
 
-/* Primary plane formats supported by all gen */
-#define COMMON_PRIMARY_FORMATS \
-       DRM_FORMAT_C8, \
-       DRM_FORMAT_RGB565, \
-       DRM_FORMAT_XRGB8888, \
-       DRM_FORMAT_ARGB8888
-
 /* Primary plane formats for gen <= 3 */
-static const uint32_t intel_primary_formats_gen2[] = {
-       COMMON_PRIMARY_FORMATS,
+static const uint32_t i8xx_primary_formats[] = {
+       DRM_FORMAT_C8,
+       DRM_FORMAT_RGB565,
        DRM_FORMAT_XRGB1555,
-       DRM_FORMAT_ARGB1555,
+       DRM_FORMAT_XRGB8888,
 };
 
 /* Primary plane formats for gen >= 4 */
-static const uint32_t intel_primary_formats_gen4[] = {
-       COMMON_PRIMARY_FORMATS, \
+static const uint32_t i965_primary_formats[] = {
+       DRM_FORMAT_C8,
+       DRM_FORMAT_RGB565,
+       DRM_FORMAT_XRGB8888,
        DRM_FORMAT_XBGR8888,
+       DRM_FORMAT_XRGB2101010,
+       DRM_FORMAT_XBGR2101010,
+};
+
+static const uint32_t skl_primary_formats[] = {
+       DRM_FORMAT_C8,
+       DRM_FORMAT_RGB565,
+       DRM_FORMAT_XRGB8888,
+       DRM_FORMAT_XBGR8888,
+       DRM_FORMAT_ARGB8888,
        DRM_FORMAT_ABGR8888,
        DRM_FORMAT_XRGB2101010,
-       DRM_FORMAT_ARGB2101010,
        DRM_FORMAT_XBGR2101010,
-       DRM_FORMAT_ABGR2101010,
 };
 
 /* Cursor formats */
@@ -1136,9 +1140,9 @@ static void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state)
        u32 val;
        bool cur_state;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        val = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        cur_state = val & DSI_PLL_VCO_EN;
        I915_STATE_WARN(cur_state != state,
@@ -1657,13 +1661,15 @@ static void chv_enable_pll(struct intel_crtc *crtc,
 
        BUG_ON(!IS_CHERRYVIEW(dev_priv->dev));
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* Enable back the 10bit clock to display controller */
        tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
        tmp |= DPIO_DCLKP_EN;
        vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp);
 
+       mutex_unlock(&dev_priv->sb_lock);
+
        /*
         * Need to wait > 100ns between dclkp clock enable bit and PLL enable.
         */
@@ -1679,8 +1685,6 @@ static void chv_enable_pll(struct intel_crtc *crtc,
        /* not sure when this should be written */
        I915_WRITE(DPLL_MD(pipe), pipe_config->dpll_hw_state.dpll_md);
        POSTING_READ(DPLL_MD(pipe));
-
-       mutex_unlock(&dev_priv->dpio_lock);
 }
 
 static int intel_num_dvo_pipes(struct drm_device *dev)
@@ -1822,7 +1826,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
        I915_WRITE(DPLL(pipe), val);
        POSTING_READ(DPLL(pipe));
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* Disable 10bit clock to display controller */
        val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
@@ -1840,7 +1844,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
                vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val);
        }
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
@@ -2206,20 +2210,6 @@ static void intel_disable_pipe(struct intel_crtc *crtc)
                intel_wait_for_pipe_off(crtc);
 }
 
-/*
- * Plane regs are double buffered, going from enabled->disabled needs a
- * trigger in order to latch.  The display address reg provides this.
- */
-void intel_flush_primary_plane(struct drm_i915_private *dev_priv,
-                              enum plane plane)
-{
-       struct drm_device *dev = dev_priv->dev;
-       u32 reg = INTEL_INFO(dev)->gen >= 4 ? DSPSURF(plane) : DSPADDR(plane);
-
-       I915_WRITE(reg, I915_READ(reg));
-       POSTING_READ(reg);
-}
-
 /**
  * intel_enable_primary_hw_plane - enable the primary plane on a given pipe
  * @plane:  plane to be enabled
@@ -2702,26 +2692,21 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc,
                dspcntr |= DISPPLANE_8BPP;
                break;
        case DRM_FORMAT_XRGB1555:
-       case DRM_FORMAT_ARGB1555:
                dspcntr |= DISPPLANE_BGRX555;
                break;
        case DRM_FORMAT_RGB565:
                dspcntr |= DISPPLANE_BGRX565;
                break;
        case DRM_FORMAT_XRGB8888:
-       case DRM_FORMAT_ARGB8888:
                dspcntr |= DISPPLANE_BGRX888;
                break;
        case DRM_FORMAT_XBGR8888:
-       case DRM_FORMAT_ABGR8888:
                dspcntr |= DISPPLANE_RGBX888;
                break;
        case DRM_FORMAT_XRGB2101010:
-       case DRM_FORMAT_ARGB2101010:
                dspcntr |= DISPPLANE_BGRX101010;
                break;
        case DRM_FORMAT_XBGR2101010:
-       case DRM_FORMAT_ABGR2101010:
                dspcntr |= DISPPLANE_RGBX101010;
                break;
        default:
@@ -2817,19 +2802,15 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc,
                dspcntr |= DISPPLANE_BGRX565;
                break;
        case DRM_FORMAT_XRGB8888:
-       case DRM_FORMAT_ARGB8888:
                dspcntr |= DISPPLANE_BGRX888;
                break;
        case DRM_FORMAT_XBGR8888:
-       case DRM_FORMAT_ABGR8888:
                dspcntr |= DISPPLANE_RGBX888;
                break;
        case DRM_FORMAT_XRGB2101010:
-       case DRM_FORMAT_ARGB2101010:
                dspcntr |= DISPPLANE_BGRX101010;
                break;
        case DRM_FORMAT_XBGR2101010:
-       case DRM_FORMAT_ABGR2101010:
                dspcntr |= DISPPLANE_RGBX101010;
                break;
        default:
@@ -2953,95 +2934,83 @@ void skl_detach_scalers(struct intel_crtc *intel_crtc)
 
 u32 skl_plane_ctl_format(uint32_t pixel_format)
 {
-       u32 plane_ctl_format = 0;
        switch (pixel_format) {
+       case DRM_FORMAT_C8:
+               return PLANE_CTL_FORMAT_INDEXED;
        case DRM_FORMAT_RGB565:
-               plane_ctl_format = PLANE_CTL_FORMAT_RGB_565;
-               break;
+               return PLANE_CTL_FORMAT_RGB_565;
        case DRM_FORMAT_XBGR8888:
-               plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX;
-               break;
+               return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX;
        case DRM_FORMAT_XRGB8888:
-               plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888;
-               break;
+               return PLANE_CTL_FORMAT_XRGB_8888;
        /*
         * XXX: For ARBG/ABGR formats we default to expecting scanout buffers
         * to be already pre-multiplied. We need to add a knob (or a different
         * DRM_FORMAT) for user-space to configure that.
         */
        case DRM_FORMAT_ABGR8888:
-               plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX |
+               return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX |
                        PLANE_CTL_ALPHA_SW_PREMULTIPLY;
-               break;
        case DRM_FORMAT_ARGB8888:
-               plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 |
+               return PLANE_CTL_FORMAT_XRGB_8888 |
                        PLANE_CTL_ALPHA_SW_PREMULTIPLY;
-               break;
        case DRM_FORMAT_XRGB2101010:
-               plane_ctl_format = PLANE_CTL_FORMAT_XRGB_2101010;
-               break;
+               return PLANE_CTL_FORMAT_XRGB_2101010;
        case DRM_FORMAT_XBGR2101010:
-               plane_ctl_format = PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010;
-               break;
+               return PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010;
        case DRM_FORMAT_YUYV:
-               plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV;
-               break;
+               return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV;
        case DRM_FORMAT_YVYU:
-               plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YVYU;
-               break;
+               return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YVYU;
        case DRM_FORMAT_UYVY:
-               plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY;
-               break;
+               return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY;
        case DRM_FORMAT_VYUY:
-               plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY;
-               break;
+               return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY;
        default:
-               BUG();
+               MISSING_CASE(pixel_format);
        }
-       return plane_ctl_format;
+
+       return 0;
 }
 
 u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
 {
-       u32 plane_ctl_tiling = 0;
        switch (fb_modifier) {
        case DRM_FORMAT_MOD_NONE:
                break;
        case I915_FORMAT_MOD_X_TILED:
-               plane_ctl_tiling = PLANE_CTL_TILED_X;
-               break;
+               return PLANE_CTL_TILED_X;
        case I915_FORMAT_MOD_Y_TILED:
-               plane_ctl_tiling = PLANE_CTL_TILED_Y;
-               break;
+               return PLANE_CTL_TILED_Y;
        case I915_FORMAT_MOD_Yf_TILED:
-               plane_ctl_tiling = PLANE_CTL_TILED_YF;
-               break;
+               return PLANE_CTL_TILED_YF;
        default:
                MISSING_CASE(fb_modifier);
        }
-       return plane_ctl_tiling;
+
+       return 0;
 }
 
 u32 skl_plane_ctl_rotation(unsigned int rotation)
 {
-       u32 plane_ctl_rotation = 0;
        switch (rotation) {
        case BIT(DRM_ROTATE_0):
                break;
+       /*
+        * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr
+        * while i915 HW rotation is clockwise, thats why this swapping.
+        */
        case BIT(DRM_ROTATE_90):
-               plane_ctl_rotation = PLANE_CTL_ROTATE_90;
-               break;
+               return PLANE_CTL_ROTATE_270;
        case BIT(DRM_ROTATE_180):
-               plane_ctl_rotation = PLANE_CTL_ROTATE_180;
-               break;
+               return PLANE_CTL_ROTATE_180;
        case BIT(DRM_ROTATE_270):
-               plane_ctl_rotation = PLANE_CTL_ROTATE_270;
-               break;
+               return PLANE_CTL_ROTATE_90;
        default:
                MISSING_CASE(rotation);
        }
 
-       return plane_ctl_rotation;
+       return 0;
 }
 
 static void skylake_update_primary_plane(struct drm_crtc *crtc,
@@ -3115,7 +3084,7 @@ static void skylake_update_primary_plane(struct drm_crtc *crtc,
 
        if (intel_rotation_90_or_270(rotation)) {
                /* stride = Surface height in tiles */
-               tile_height = intel_tile_height(dev, fb->bits_per_pixel,
+               tile_height = intel_tile_height(dev, fb->pixel_format,
                                                fb->modifier[0]);
                stride = DIV_ROUND_UP(fb->height, tile_height);
                x_offset = stride * tile_height - y - src_h;
@@ -3295,27 +3264,30 @@ void intel_finish_reset(struct drm_device *dev)
        drm_modeset_unlock_all(dev);
 }
 
-static int
+static void
 intel_finish_fb(struct drm_framebuffer *old_fb)
 {
        struct drm_i915_gem_object *obj = intel_fb_obj(old_fb);
-       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
        bool was_interruptible = dev_priv->mm.interruptible;
        int ret;
 
        /* Big Hammer, we also need to ensure that any pending
         * MI_WAIT_FOR_EVENT inside a user batch buffer on the
         * current scanout is retired before unpinning the old
-        * framebuffer.
+        * framebuffer. Note that we rely on userspace rendering
+        * into the buffer attached to the pipe they are waiting
+        * on. If not, userspace generates a GPU hang with IPEHR
+        * point to the MI_WAIT_FOR_EVENT.
         *
         * This should only fail upon a hung GPU, in which case we
         * can safely continue.
         */
        dev_priv->mm.interruptible = false;
-       ret = i915_gem_object_finish_gpu(obj);
+       ret = i915_gem_object_wait_rendering(obj, true);
        dev_priv->mm.interruptible = was_interruptible;
 
-       return ret;
+       WARN_ON(ret);
 }
 
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
@@ -3967,7 +3939,7 @@ static void lpt_program_iclkip(struct drm_crtc *crtc)
        u32 divsel, phaseinc, auxdiv, phasedir = 0;
        u32 temp;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* It is necessary to ungate the pixclk gate prior to programming
         * the divisors, and gate it back when it is done.
@@ -4044,7 +4016,7 @@ static void lpt_program_iclkip(struct drm_crtc *crtc)
 
        I915_WRITE(PIXCLK_GATE, PIXCLK_GATE_UNGATE);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void ironlake_pch_transcoder_set_timings(struct intel_crtc *crtc,
@@ -4182,8 +4154,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc)
                temp &= ~(TRANS_DP_PORT_SEL_MASK |
                          TRANS_DP_SYNC_MASK |
                          TRANS_DP_BPC_MASK);
-               temp |= (TRANS_DP_OUTPUT_ENABLE |
-                        TRANS_DP_ENH_FRAMING);
+               temp |= TRANS_DP_OUTPUT_ENABLE;
                temp |= bpc << 9; /* same format but at 11:9 */
 
                if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC)
@@ -4517,9 +4488,10 @@ skl_update_scaler_users(
        }
 
        /* check colorkey */
-       if (intel_plane && intel_plane->ckey.flags != I915_SET_COLORKEY_NONE) {
-               DRM_DEBUG_KMS("PLANE:%d scaling with color key not allowed",
-                       intel_plane->base.base.id);
+       if (WARN_ON(intel_plane &&
+               intel_plane->ckey.flags != I915_SET_COLORKEY_NONE)) {
+               DRM_DEBUG_KMS("PLANE:%d scaling %ux%u->%ux%u not allowed with colorkey",
+                       intel_plane->base.base.id, src_w, src_h, dst_w, dst_h);
                return -EINVAL;
        }
 
@@ -4532,9 +4504,7 @@ skl_update_scaler_users(
                case DRM_FORMAT_ABGR8888:
                case DRM_FORMAT_ARGB8888:
                case DRM_FORMAT_XRGB2101010:
-               case DRM_FORMAT_ARGB2101010:
                case DRM_FORMAT_XBGR2101010:
-               case DRM_FORMAT_ABGR2101010:
                case DRM_FORMAT_YUYV:
                case DRM_FORMAT_YVYU:
                case DRM_FORMAT_UYVY:
@@ -4858,11 +4828,22 @@ intel_pre_disable_primary(struct drm_crtc *crtc)
 
 static void intel_crtc_enable_planes(struct drm_crtc *crtc)
 {
+       struct drm_device *dev = crtc->dev;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int pipe = intel_crtc->pipe;
+
        intel_enable_primary_hw_plane(crtc->primary, crtc);
        intel_enable_sprite_planes(crtc);
        intel_crtc_update_cursor(crtc, true);
 
        intel_post_enable_primary(crtc);
+
+       /*
+        * FIXME: Once we grow proper nuclear flip support out of this we need
+        * to compute the mask of flip planes precisely. For the time being
+        * consider this a flip to a NULL plane.
+        */
+       intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
 }
 
 static void intel_crtc_disable_planes(struct drm_crtc *crtc)
@@ -5128,13 +5109,14 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
 
        ironlake_pfit_disable(intel_crtc);
 
+       if (intel_crtc->config->has_pch_encoder)
+               ironlake_fdi_disable(crtc);
+
        for_each_encoder_on_crtc(dev, crtc, encoder)
                if (encoder->post_disable)
                        encoder->post_disable(encoder);
 
        if (intel_crtc->config->has_pch_encoder) {
-               ironlake_fdi_disable(crtc);
-
                ironlake_disable_pch_transcoder(dev_priv, pipe);
 
                if (HAS_PCH_CPT(dev)) {
@@ -5543,16 +5525,224 @@ void broxton_uninit_cdclk(struct drm_device *dev)
        intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
 }
 
+static const struct skl_cdclk_entry {
+       unsigned int freq;
+       unsigned int vco;
+} skl_cdclk_frequencies[] = {
+       { .freq = 308570, .vco = 8640 },
+       { .freq = 337500, .vco = 8100 },
+       { .freq = 432000, .vco = 8640 },
+       { .freq = 450000, .vco = 8100 },
+       { .freq = 540000, .vco = 8100 },
+       { .freq = 617140, .vco = 8640 },
+       { .freq = 675000, .vco = 8100 },
+};
+
+static unsigned int skl_cdclk_decimal(unsigned int freq)
+{
+       return (freq - 1000) / 500;
+}
+
+static unsigned int skl_cdclk_get_vco(unsigned int freq)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(skl_cdclk_frequencies); i++) {
+               const struct skl_cdclk_entry *e = &skl_cdclk_frequencies[i];
+
+               if (e->freq == freq)
+                       return e->vco;
+       }
+
+       return 8100;
+}
+
+static void
+skl_dpll0_enable(struct drm_i915_private *dev_priv, unsigned int required_vco)
+{
+       unsigned int min_freq;
+       u32 val;
+
+       /* select the minimum CDCLK before enabling DPLL 0 */
+       val = I915_READ(CDCLK_CTL);
+       val &= ~CDCLK_FREQ_SEL_MASK | ~CDCLK_FREQ_DECIMAL_MASK;
+       val |= CDCLK_FREQ_337_308;
+
+       if (required_vco == 8640)
+               min_freq = 308570;
+       else
+               min_freq = 337500;
+
+       val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_freq);
+
+       I915_WRITE(CDCLK_CTL, val);
+       POSTING_READ(CDCLK_CTL);
+
+       /*
+        * We always enable DPLL0 with the lowest link rate possible, but still
+        * taking into account the VCO required to operate the eDP panel at the
+        * desired frequency. The usual DP link rates operate with a VCO of
+        * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640.
+        * The modeset code is responsible for the selection of the exact link
+        * rate later on, with the constraint of choosing a frequency that
+        * works with required_vco.
+        */
+       val = I915_READ(DPLL_CTRL1);
+
+       val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) |
+                DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0));
+       val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0);
+       if (required_vco == 8640)
+               val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080,
+                                           SKL_DPLL0);
+       else
+               val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810,
+                                           SKL_DPLL0);
+
+       I915_WRITE(DPLL_CTRL1, val);
+       POSTING_READ(DPLL_CTRL1);
+
+       I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE);
+
+       if (wait_for(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK, 5))
+               DRM_ERROR("DPLL0 not locked\n");
+}
+
+static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv)
+{
+       int ret;
+       u32 val;
+
+       /* inform PCU we want to change CDCLK */
+       val = SKL_CDCLK_PREPARE_FOR_CHANGE;
+       mutex_lock(&dev_priv->rps.hw_lock);
+       ret = sandybridge_pcode_read(dev_priv, SKL_PCODE_CDCLK_CONTROL, &val);
+       mutex_unlock(&dev_priv->rps.hw_lock);
+
+       return ret == 0 && (val & SKL_CDCLK_READY_FOR_CHANGE);
+}
+
+static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv)
+{
+       unsigned int i;
+
+       for (i = 0; i < 15; i++) {
+               if (skl_cdclk_pcu_ready(dev_priv))
+                       return true;
+               udelay(10);
+       }
+
+       return false;
+}
+
+static void skl_set_cdclk(struct drm_i915_private *dev_priv, unsigned int freq)
+{
+       u32 freq_select, pcu_ack;
+
+       DRM_DEBUG_DRIVER("Changing CDCLK to %dKHz\n", freq);
+
+       if (!skl_cdclk_wait_for_pcu_ready(dev_priv)) {
+               DRM_ERROR("failed to inform PCU about cdclk change\n");
+               return;
+       }
+
+       /* set CDCLK_CTL */
+       switch(freq) {
+       case 450000:
+       case 432000:
+               freq_select = CDCLK_FREQ_450_432;
+               pcu_ack = 1;
+               break;
+       case 540000:
+               freq_select = CDCLK_FREQ_540;
+               pcu_ack = 2;
+               break;
+       case 308570:
+       case 337500:
+       default:
+               freq_select = CDCLK_FREQ_337_308;
+               pcu_ack = 0;
+               break;
+       case 617140:
+       case 675000:
+               freq_select = CDCLK_FREQ_675_617;
+               pcu_ack = 3;
+               break;
+       }
+
+       I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(freq));
+       POSTING_READ(CDCLK_CTL);
+
+       /* inform PCU of the change */
+       mutex_lock(&dev_priv->rps.hw_lock);
+       sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack);
+       mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+void skl_uninit_cdclk(struct drm_i915_private *dev_priv)
+{
+       /* disable DBUF power */
+       I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST);
+       POSTING_READ(DBUF_CTL);
+
+       udelay(10);
+
+       if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
+               DRM_ERROR("DBuf power disable timeout\n");
+
+       /* disable DPLL0 */
+       I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE);
+       if (wait_for(!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK), 1))
+               DRM_ERROR("Couldn't disable DPLL0\n");
+
+       intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+}
+
+void skl_init_cdclk(struct drm_i915_private *dev_priv)
+{
+       u32 val;
+       unsigned int required_vco;
+
+       /* enable PCH reset handshake */
+       val = I915_READ(HSW_NDE_RSTWRN_OPT);
+       I915_WRITE(HSW_NDE_RSTWRN_OPT, val | RESET_PCH_HANDSHAKE_ENABLE);
+
+       /* enable PG1 and Misc I/O */
+       intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
+
+       /* DPLL0 already enabed !? */
+       if (I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE) {
+               DRM_DEBUG_DRIVER("DPLL0 already running\n");
+               return;
+       }
+
+       /* enable DPLL0 */
+       required_vco = skl_cdclk_get_vco(dev_priv->skl_boot_cdclk);
+       skl_dpll0_enable(dev_priv, required_vco);
+
+       /* set CDCLK to the frequency the BIOS chose */
+       skl_set_cdclk(dev_priv, dev_priv->skl_boot_cdclk);
+
+       /* enable DBUF power */
+       I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST);
+       POSTING_READ(DBUF_CTL);
+
+       udelay(10);
+
+       if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE))
+               DRM_ERROR("DBuf power enable timeout\n");
+}
+
 /* returns HPLL frequency in kHz */
 static int valleyview_get_vco(struct drm_i915_private *dev_priv)
 {
        int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 };
 
        /* Obtain SKU information */
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) &
                CCK_FUSE_HPLL_FREQ_MASK;
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        return vco_freq[hpll_freq] * 1000;
 }
@@ -5601,12 +5791,13 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk)
        }
        mutex_unlock(&dev_priv->rps.hw_lock);
 
+       mutex_lock(&dev_priv->sb_lock);
+
        if (cdclk == 400000) {
                u32 divider;
 
                divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1;
 
-               mutex_lock(&dev_priv->dpio_lock);
                /* adjust cdclk divider */
                val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL);
                val &= ~DISPLAY_FREQUENCY_VALUES;
@@ -5617,10 +5808,8 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk)
                              DISPLAY_FREQUENCY_STATUS) == (divider << DISPLAY_FREQUENCY_STATUS_SHIFT),
                             50))
                        DRM_ERROR("timed out waiting for CDclk change\n");
-               mutex_unlock(&dev_priv->dpio_lock);
        }
 
-       mutex_lock(&dev_priv->dpio_lock);
        /* adjust self-refresh exit latency value */
        val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC);
        val &= ~0x7f;
@@ -5634,7 +5823,8 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk)
        else
                val |= 3000 / 250; /* 3.0 usec */
        vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val);
-       mutex_unlock(&dev_priv->dpio_lock);
+
+       mutex_unlock(&dev_priv->sb_lock);
 
        vlv_update_cdclk(dev);
 }
@@ -6562,9 +6752,9 @@ static int valleyview_get_display_clock_speed(struct drm_device *dev)
        if (dev_priv->hpll_freq == 0)
                dev_priv->hpll_freq = valleyview_get_vco(dev_priv);
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        divider = val & DISPLAY_FREQUENCY_VALUES;
 
@@ -6906,7 +7096,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
        u32 bestn, bestm1, bestm2, bestp1, bestp2;
        u32 coreclk, reg_val;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        bestn = pipe_config->dpll.n;
        bestm1 = pipe_config->dpll.m1;
@@ -6984,7 +7174,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
        vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk);
 
        vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void chv_update_pll(struct intel_crtc *crtc,
@@ -7029,7 +7219,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
        I915_WRITE(dpll_reg,
                   pipe_config->dpll_hw_state.dpll & ~DPLL_VCO_ENABLE);
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* p1 and p2 divider */
        vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port),
@@ -7102,7 +7292,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
                        vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) |
                        DPIO_AFC_RECAL);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 /**
@@ -7477,6 +7667,9 @@ static int i9xx_crtc_compute_clock(struct intel_crtc *crtc,
        struct drm_connector_state *connector_state;
        int i;
 
+       memset(&crtc_state->dpll_hw_state, 0,
+              sizeof(crtc_state->dpll_hw_state));
+
        for_each_connector_in_state(state, connector, connector_state, i) {
                if (connector_state->crtc != &crtc->base)
                        continue;
@@ -7600,9 +7793,9 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc,
        if (!(pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE))
                return;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe));
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7;
        clock.m2 = mdiv & DPIO_M2DIV_MASK;
@@ -7696,12 +7889,12 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc,
        u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2;
        int refclk = 100000;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port));
        pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
        pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
        pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
        clock.m2 = ((pll_dw0 & 0xff) << 22) | (pll_dw2 & 0x3fffff);
@@ -8067,7 +8260,7 @@ static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread,
                 with_fdi, "LP PCH doesn't have FDI\n"))
                with_fdi = false;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
        tmp &= ~SBI_SSCCTL_DISABLE;
@@ -8093,7 +8286,7 @@ static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread,
        tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE;
        intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 /* Sequence to disable CLKOUT_DP */
@@ -8102,7 +8295,7 @@ static void lpt_disable_clkout_dp(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
        uint32_t reg, tmp;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
               SBI_GEN0 : SBI_DBUFF0;
@@ -8121,7 +8314,7 @@ static void lpt_disable_clkout_dp(struct drm_device *dev)
                intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
        }
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void lpt_init_pch_refclk(struct drm_device *dev)
@@ -8518,6 +8711,9 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
        bool is_lvds = false;
        struct intel_shared_dpll *pll;
 
+       memset(&crtc_state->dpll_hw_state, 0,
+              sizeof(crtc_state->dpll_hw_state));
+
        is_lvds = intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS);
 
        WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)),
@@ -9353,6 +9549,12 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
        }
 
        pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
+
+       if (INTEL_INFO(dev)->gen >= 9) {
+               pipe_config->scaler_state.scaler_id = -1;
+               pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
+       }
+
        if (intel_display_power_is_enabled(dev_priv, pfit_domain)) {
                if (INTEL_INFO(dev)->gen == 9)
                        skylake_get_pfit_config(crtc, pipe_config);
@@ -9360,10 +9562,6 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
                        ironlake_get_pfit_config(crtc, pipe_config);
                else
                        MISSING_CASE(INTEL_INFO(dev)->gen);
-
-       } else {
-               pipe_config->scaler_state.scaler_id = -1;
-               pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
        }
 
        if (IS_HASWELL(dev))
@@ -9868,7 +10066,7 @@ retry:
                goto fail;
        }
 
-       crtc_state->base.enable = true;
+       crtc_state->base.active = crtc_state->base.enable = true;
 
        if (!mode)
                mode = &load_detect_mode;
@@ -9965,7 +10163,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
                connector_state->best_encoder = NULL;
                connector_state->crtc = NULL;
 
-               crtc_state->base.enable = false;
+               crtc_state->base.enable = crtc_state->base.active = false;
 
                ret = intel_modeset_setup_plane_state(state, crtc, NULL, NULL,
                                                      0, 0);
@@ -10690,7 +10888,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
        else if (i915.enable_execlists)
                return true;
        else
-               return ring != i915_gem_request_get_ring(obj->last_read_req);
+               return ring != i915_gem_request_get_ring(obj->last_write_req);
 }
 
 static void skl_do_mmio_flip(struct intel_crtc *intel_crtc)
@@ -10790,22 +10988,19 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
 
 static void intel_mmio_flip_work_func(struct work_struct *work)
 {
-       struct intel_crtc *crtc =
-               container_of(work, struct intel_crtc, mmio_flip.work);
-       struct intel_mmio_flip *mmio_flip;
+       struct intel_mmio_flip *mmio_flip =
+               container_of(work, struct intel_mmio_flip, work);
 
-       mmio_flip = &crtc->mmio_flip;
        if (mmio_flip->req)
                WARN_ON(__i915_wait_request(mmio_flip->req,
-                                           crtc->reset_counter,
-                                           false, NULL, NULL) != 0);
+                                           mmio_flip->crtc->reset_counter,
+                                           false, NULL,
+                                           &mmio_flip->i915->rps.mmioflips));
 
-       intel_do_mmio_flip(crtc);
-       if (mmio_flip->req) {
-               mutex_lock(&crtc->base.dev->struct_mutex);
-               i915_gem_request_assign(&mmio_flip->req, NULL);
-               mutex_unlock(&crtc->base.dev->struct_mutex);
-       }
+       intel_do_mmio_flip(mmio_flip->crtc);
+
+       i915_gem_request_unreference__unlocked(mmio_flip->req);
+       kfree(mmio_flip);
 }
 
 static int intel_queue_mmio_flip(struct drm_device *dev,
@@ -10815,12 +11010,18 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
                                 struct intel_engine_cs *ring,
                                 uint32_t flags)
 {
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct intel_mmio_flip *mmio_flip;
+
+       mmio_flip = kmalloc(sizeof(*mmio_flip), GFP_KERNEL);
+       if (mmio_flip == NULL)
+               return -ENOMEM;
 
-       i915_gem_request_assign(&intel_crtc->mmio_flip.req,
-                               obj->last_write_req);
+       mmio_flip->i915 = to_i915(dev);
+       mmio_flip->req = i915_gem_request_reference(obj->last_write_req);
+       mmio_flip->crtc = to_intel_crtc(crtc);
 
-       schedule_work(&intel_crtc->mmio_flip.work);
+       INIT_WORK(&mmio_flip->work, intel_mmio_flip_work_func);
+       schedule_work(&mmio_flip->work);
 
        return 0;
 }
@@ -11005,7 +11206,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
        } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
                ring = &dev_priv->ring[BCS];
        } else if (INTEL_INFO(dev)->gen >= 7) {
-               ring = i915_gem_request_get_ring(obj->last_read_req);
+               ring = i915_gem_request_get_ring(obj->last_write_req);
                if (ring == NULL || ring->id != RCS)
                        ring = &dev_priv->ring[BCS];
        } else {
@@ -11021,7 +11222,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
         */
        ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
                                         crtc->primary->state,
-                                        mmio_flip ? i915_gem_request_get_ring(obj->last_read_req) : ring);
+                                        mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring);
        if (ret)
                goto cleanup_pending;
 
@@ -11037,6 +11238,12 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
                i915_gem_request_assign(&work->flip_queued_req,
                                        obj->last_write_req);
        } else {
+               if (obj->last_write_req) {
+                       ret = i915_gem_check_olr(obj->last_write_req);
+                       if (ret)
+                               goto cleanup_unpin;
+               }
+
                ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
                                                   page_flip_flags);
                if (ret)
@@ -11303,9 +11510,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
        DRM_DEBUG_KMS("port clock: %d\n", pipe_config->port_clock);
        DRM_DEBUG_KMS("pipe src size: %dx%d\n",
                      pipe_config->pipe_src_w, pipe_config->pipe_src_h);
-       DRM_DEBUG_KMS("num_scalers: %d\n", crtc->num_scalers);
-       DRM_DEBUG_KMS("scaler_users: 0x%x\n", pipe_config->scaler_state.scaler_users);
-       DRM_DEBUG_KMS("scaler id: %d\n", pipe_config->scaler_state.scaler_id);
+       DRM_DEBUG_KMS("num_scalers: %d, scaler_users: 0x%x, scaler_id: %d\n",
+                     crtc->num_scalers,
+                     pipe_config->scaler_state.scaler_users,
+                     pipe_config->scaler_state.scaler_id);
        DRM_DEBUG_KMS("gmch pfit: control: 0x%08x, ratios: 0x%08x, lvds border: 0x%08x\n",
                      pipe_config->gmch_pfit.control,
                      pipe_config->gmch_pfit.pgm_ratios,
@@ -11317,6 +11525,39 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
        DRM_DEBUG_KMS("ips: %i\n", pipe_config->ips_enabled);
        DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide);
 
+       if (IS_BROXTON(dev)) {
+               DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: ebb0: 0x%x, "
+                             "pll0: 0x%x, pll1: 0x%x, pll2: 0x%x, pll3: 0x%x, "
+                             "pll6: 0x%x, pll8: 0x%x, pcsdw12: 0x%x\n",
+                             pipe_config->ddi_pll_sel,
+                             pipe_config->dpll_hw_state.ebb0,
+                             pipe_config->dpll_hw_state.pll0,
+                             pipe_config->dpll_hw_state.pll1,
+                             pipe_config->dpll_hw_state.pll2,
+                             pipe_config->dpll_hw_state.pll3,
+                             pipe_config->dpll_hw_state.pll6,
+                             pipe_config->dpll_hw_state.pll8,
+                             pipe_config->dpll_hw_state.pcsdw12);
+       } else if (IS_SKYLAKE(dev)) {
+               DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: "
+                             "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n",
+                             pipe_config->ddi_pll_sel,
+                             pipe_config->dpll_hw_state.ctrl1,
+                             pipe_config->dpll_hw_state.cfgcr1,
+                             pipe_config->dpll_hw_state.cfgcr2);
+       } else if (HAS_DDI(dev)) {
+               DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x\n",
+                             pipe_config->ddi_pll_sel,
+                             pipe_config->dpll_hw_state.wrpll);
+       } else {
+               DRM_DEBUG_KMS("dpll_hw_state: dpll: 0x%x, dpll_md: 0x%x, "
+                             "fp0: 0x%x, fp1: 0x%x\n",
+                             pipe_config->dpll_hw_state.dpll,
+                             pipe_config->dpll_hw_state.dpll_md,
+                             pipe_config->dpll_hw_state.fp0,
+                             pipe_config->dpll_hw_state.fp1);
+       }
+
        DRM_DEBUG_KMS("planes on this crtc\n");
        list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
                intel_plane = to_intel_plane(plane);
@@ -11454,12 +11695,18 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
        struct intel_crtc_scaler_state scaler_state;
        struct intel_dpll_hw_state dpll_hw_state;
        enum intel_dpll_id shared_dpll;
+       uint32_t ddi_pll_sel;
+
+       /* FIXME: before the switch to atomic started, a new pipe_config was
+        * kzalloc'd. Code that depends on any field being zero should be
+        * fixed, so that the crtc_state can be safely duplicated. For now,
+        * only fields that are know to not cause problems are preserved. */
 
-       /* Clear only the intel specific part of the crtc state excluding scalers */
        tmp_state = crtc_state->base;
        scaler_state = crtc_state->scaler_state;
        shared_dpll = crtc_state->shared_dpll;
        dpll_hw_state = crtc_state->dpll_hw_state;
+       ddi_pll_sel = crtc_state->ddi_pll_sel;
 
        memset(crtc_state, 0, sizeof *crtc_state);
 
@@ -11467,6 +11714,7 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
        crtc_state->scaler_state = scaler_state;
        crtc_state->shared_dpll = shared_dpll;
        crtc_state->dpll_hw_state = dpll_hw_state;
+       crtc_state->ddi_pll_sel = ddi_pll_sel;
 }
 
 static int
@@ -12264,8 +12512,6 @@ static int __intel_set_mode_setup_plls(struct drm_atomic_state *state)
                if (needs_modeset(crtc_state)) {
                        clear_pipes |= 1 << intel_crtc->pipe;
                        intel_crtc_state->shared_dpll = DPLL_ID_PRIVATE;
-                       memset(&intel_crtc_state->dpll_hw_state, 0,
-                              sizeof(intel_crtc_state->dpll_hw_state));
                }
        }
 
@@ -12342,7 +12588,6 @@ static int __intel_set_mode(struct drm_crtc *modeset_crtc,
                        continue;
 
                if (!crtc_state->enable) {
-                       crtc_state->active = false;
                        intel_crtc_disable(crtc);
                } else if (crtc->state->enable) {
                        intel_crtc_disable_planes(crtc);
@@ -12492,7 +12737,8 @@ void intel_crtc_restore_mode(struct drm_crtc *crtc)
                        continue;
                }
 
-               crtc_state->base.enable = intel_crtc->new_enabled;
+               crtc_state->base.active = crtc_state->base.enable =
+                       intel_crtc->new_enabled;
 
                if (&intel_crtc->base == crtc)
                        drm_mode_copy(&crtc_state->base.mode, &crtc->mode);
@@ -12617,11 +12863,16 @@ intel_modeset_stage_output_state(struct drm_device *dev,
        }
 
        for_each_crtc_in_state(state, crtc, crtc_state, i) {
+               bool has_connectors;
+
                ret = drm_atomic_add_affected_connectors(state, crtc);
                if (ret)
                        return ret;
 
-               crtc_state->enable = drm_atomic_connectors_for_crtc(state, crtc);
+               has_connectors = !!drm_atomic_connectors_for_crtc(state, crtc);
+               if (has_connectors != crtc_state->enable)
+                       crtc_state->enable =
+                       crtc_state->active = has_connectors;
        }
 
        ret = intel_modeset_setup_plane_state(state, set->crtc, set->mode,
@@ -13008,8 +13259,11 @@ intel_check_primary_plane(struct drm_plane *plane,
                intel_atomic_get_crtc_state(state->base.state, intel_crtc) : NULL;
 
        if (INTEL_INFO(dev)->gen >= 9) {
-               min_scale = 1;
-               max_scale = skl_max_scale(intel_crtc, crtc_state);
+               /* use scaler when colorkey is not required */
+               if (to_intel_plane(plane)->ckey.flags == I915_SET_COLORKEY_NONE) {
+                       min_scale = 1;
+                       max_scale = skl_max_scale(intel_crtc, crtc_state);
+               }
                can_position = true;
        }
 
@@ -13251,8 +13505,8 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
        primary->max_downscale = 1;
        if (INTEL_INFO(dev)->gen >= 9) {
                primary->can_scale = true;
+               state->scaler_id = -1;
        }
-       state->scaler_id = -1;
        primary->pipe = pipe;
        primary->plane = pipe;
        primary->check_plane = intel_check_primary_plane;
@@ -13262,12 +13516,15 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
        if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4)
                primary->plane = !pipe;
 
-       if (INTEL_INFO(dev)->gen <= 3) {
-               intel_primary_formats = intel_primary_formats_gen2;
-               num_formats = ARRAY_SIZE(intel_primary_formats_gen2);
+       if (INTEL_INFO(dev)->gen >= 9) {
+               intel_primary_formats = skl_primary_formats;
+               num_formats = ARRAY_SIZE(skl_primary_formats);
+       } else if (INTEL_INFO(dev)->gen >= 4) {
+               intel_primary_formats = i965_primary_formats;
+               num_formats = ARRAY_SIZE(i965_primary_formats);
        } else {
-               intel_primary_formats = intel_primary_formats_gen4;
-               num_formats = ARRAY_SIZE(intel_primary_formats_gen4);
+               intel_primary_formats = i8xx_primary_formats;
+               num_formats = ARRAY_SIZE(i8xx_primary_formats);
        }
 
        drm_universal_plane_init(dev, &primary->base, 0,
@@ -13434,7 +13691,6 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
        cursor->max_downscale = 1;
        cursor->pipe = pipe;
        cursor->plane = pipe;
-       state->scaler_id = -1;
        cursor->check_plane = intel_check_cursor_plane;
        cursor->commit_plane = intel_commit_cursor_plane;
        cursor->disable_plane = intel_disable_cursor_plane;
@@ -13457,6 +13713,9 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
                                state->base.rotation);
        }
 
+       if (INTEL_INFO(dev)->gen >=9)
+               state->scaler_id = -1;
+
        drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs);
 
        return &cursor->base;
@@ -13550,8 +13809,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
        dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base;
        dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base;
 
-       INIT_WORK(&intel_crtc->mmio_flip.work, intel_mmio_flip_work_func);
-
        drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
 
        WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe);
@@ -13948,25 +14205,35 @@ static int intel_framebuffer_init(struct drm_device *dev,
        case DRM_FORMAT_ARGB8888:
                break;
        case DRM_FORMAT_XRGB1555:
-       case DRM_FORMAT_ARGB1555:
                if (INTEL_INFO(dev)->gen > 3) {
                        DRM_DEBUG("unsupported pixel format: %s\n",
                                  drm_get_format_name(mode_cmd->pixel_format));
                        return -EINVAL;
                }
                break;
-       case DRM_FORMAT_XBGR8888:
        case DRM_FORMAT_ABGR8888:
+               if (!IS_VALLEYVIEW(dev) && INTEL_INFO(dev)->gen < 9) {
+                       DRM_DEBUG("unsupported pixel format: %s\n",
+                                 drm_get_format_name(mode_cmd->pixel_format));
+                       return -EINVAL;
+               }
+               break;
+       case DRM_FORMAT_XBGR8888:
        case DRM_FORMAT_XRGB2101010:
-       case DRM_FORMAT_ARGB2101010:
        case DRM_FORMAT_XBGR2101010:
-       case DRM_FORMAT_ABGR2101010:
                if (INTEL_INFO(dev)->gen < 4) {
                        DRM_DEBUG("unsupported pixel format: %s\n",
                                  drm_get_format_name(mode_cmd->pixel_format));
                        return -EINVAL;
                }
                break;
+       case DRM_FORMAT_ABGR2101010:
+               if (!IS_VALLEYVIEW(dev)) {
+                       DRM_DEBUG("unsupported pixel format: %s\n",
+                                 drm_get_format_name(mode_cmd->pixel_format));
+                       return -EINVAL;
+               }
+               break;
        case DRM_FORMAT_YUYV:
        case DRM_FORMAT_UYVY:
        case DRM_FORMAT_YVYU:
@@ -14595,6 +14862,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 
                WARN_ON(crtc->active);
                crtc->base.state->enable = false;
+               crtc->base.state->active = false;
                crtc->base.enabled = false;
        }
 
@@ -14623,6 +14891,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
                              crtc->active ? "enabled" : "disabled");
 
                crtc->base.state->enable = crtc->active;
+               crtc->base.state->active = crtc->active;
                crtc->base.enabled = crtc->active;
 
                /* Because we only establish the connector -> encoder ->
@@ -14761,6 +15030,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
                                                                 crtc->config);
 
                crtc->base.state->enable = crtc->active;
+               crtc->base.state->active = crtc->active;
                crtc->base.enabled = crtc->active;
 
                plane_state = to_intel_plane_state(primary->state);
index eca82cff40b9870684e2edf8a479450bdba2fba4..280c282da9bd6d35b78e7b92d57122442a8f096d 100644 (file)
@@ -1097,6 +1097,9 @@ skl_edp_set_pll_config(struct intel_crtc_state *pipe_config, int link_clock)
 {
        u32 ctrl1;
 
+       memset(&pipe_config->dpll_hw_state, 0,
+              sizeof(pipe_config->dpll_hw_state));
+
        pipe_config->ddi_pll_sel = SKL_DPLL0;
        pipe_config->dpll_hw_state.cfgcr1 = 0;
        pipe_config->dpll_hw_state.cfgcr2 = 0;
@@ -1266,7 +1269,7 @@ static void snprintf_int_array(char *str, size_t len,
        str[0] = '\0';
 
        for (i = 0; i < nelem; i++) {
-               int r = snprintf(str, len, "%d,", array[i]);
+               int r = snprintf(str, len, "%s%d", i ? ", " : "", array[i]);
                if (r >= len)
                        return;
                str += r;
@@ -1567,7 +1570,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder)
 
        /* Split out the IBX/CPU vs CPT settings */
 
-       if (port == PORT_A && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) {
+       if (IS_GEN7(dev) && port == PORT_A) {
                if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
                        intel_dp->DP |= DP_SYNC_HS_HIGH;
                if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
@@ -1578,7 +1581,18 @@ static void intel_dp_prepare(struct intel_encoder *encoder)
                        intel_dp->DP |= DP_ENHANCED_FRAMING;
 
                intel_dp->DP |= crtc->pipe << 29;
-       } else if (!HAS_PCH_CPT(dev) || port == PORT_A) {
+       } else if (HAS_PCH_CPT(dev) && port != PORT_A) {
+               u32 trans_dp;
+
+               intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
+
+               trans_dp = I915_READ(TRANS_DP_CTL(crtc->pipe));
+               if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
+                       trans_dp |= TRANS_DP_ENH_FRAMING;
+               else
+                       trans_dp &= ~TRANS_DP_ENH_FRAMING;
+               I915_WRITE(TRANS_DP_CTL(crtc->pipe), trans_dp);
+       } else {
                if (!HAS_PCH_SPLIT(dev) && !IS_VALLEYVIEW(dev))
                        intel_dp->DP |= intel_dp->color_range;
 
@@ -1591,14 +1605,10 @@ static void intel_dp_prepare(struct intel_encoder *encoder)
                if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
                        intel_dp->DP |= DP_ENHANCED_FRAMING;
 
-               if (!IS_CHERRYVIEW(dev)) {
-                       if (crtc->pipe == 1)
-                               intel_dp->DP |= DP_PIPEB_SELECT;
-               } else {
+               if (IS_CHERRYVIEW(dev))
                        intel_dp->DP |= DP_PIPE_SELECT_CHV(crtc->pipe);
-               }
-       } else {
-               intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
+               else if (crtc->pipe == PIPE_B)
+                       intel_dp->DP |= DP_PIPEB_SELECT;
        }
 }
 
@@ -2182,41 +2192,25 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
        if (!(tmp & DP_PORT_EN))
                return false;
 
-       if (port == PORT_A && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) {
+       if (IS_GEN7(dev) && port == PORT_A) {
                *pipe = PORT_TO_PIPE_CPT(tmp);
-       } else if (IS_CHERRYVIEW(dev)) {
-               *pipe = DP_PORT_TO_PIPE_CHV(tmp);
-       } else if (!HAS_PCH_CPT(dev) || port == PORT_A) {
-               *pipe = PORT_TO_PIPE(tmp);
-       } else {
-               u32 trans_sel;
-               u32 trans_dp;
-               int i;
-
-               switch (intel_dp->output_reg) {
-               case PCH_DP_B:
-                       trans_sel = TRANS_DP_PORT_SEL_B;
-                       break;
-               case PCH_DP_C:
-                       trans_sel = TRANS_DP_PORT_SEL_C;
-                       break;
-               case PCH_DP_D:
-                       trans_sel = TRANS_DP_PORT_SEL_D;
-                       break;
-               default:
-                       return true;
-               }
+       } else if (HAS_PCH_CPT(dev) && port != PORT_A) {
+               enum pipe p;
 
-               for_each_pipe(dev_priv, i) {
-                       trans_dp = I915_READ(TRANS_DP_CTL(i));
-                       if ((trans_dp & TRANS_DP_PORT_SEL_MASK) == trans_sel) {
-                               *pipe = i;
+               for_each_pipe(dev_priv, p) {
+                       u32 trans_dp = I915_READ(TRANS_DP_CTL(p));
+                       if (TRANS_DP_PIPE_TO_PORT(trans_dp) == port) {
+                               *pipe = p;
                                return true;
                        }
                }
 
                DRM_DEBUG_KMS("No pipe for dp port 0x%x found\n",
                              intel_dp->output_reg);
+       } else if (IS_CHERRYVIEW(dev)) {
+               *pipe = DP_PORT_TO_PIPE_CHV(tmp);
+       } else {
+               *pipe = PORT_TO_PIPE(tmp);
        }
 
        return true;
@@ -2237,24 +2231,24 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 
        pipe_config->has_audio = tmp & DP_AUDIO_OUTPUT_ENABLE && port != PORT_A;
 
-       if ((port == PORT_A) || !HAS_PCH_CPT(dev)) {
-               if (tmp & DP_SYNC_HS_HIGH)
+       if (HAS_PCH_CPT(dev) && port != PORT_A) {
+               tmp = I915_READ(TRANS_DP_CTL(crtc->pipe));
+               if (tmp & TRANS_DP_HSYNC_ACTIVE_HIGH)
                        flags |= DRM_MODE_FLAG_PHSYNC;
                else
                        flags |= DRM_MODE_FLAG_NHSYNC;
 
-               if (tmp & DP_SYNC_VS_HIGH)
+               if (tmp & TRANS_DP_VSYNC_ACTIVE_HIGH)
                        flags |= DRM_MODE_FLAG_PVSYNC;
                else
                        flags |= DRM_MODE_FLAG_NVSYNC;
        } else {
-               tmp = I915_READ(TRANS_DP_CTL(crtc->pipe));
-               if (tmp & TRANS_DP_HSYNC_ACTIVE_HIGH)
+               if (tmp & DP_SYNC_HS_HIGH)
                        flags |= DRM_MODE_FLAG_PHSYNC;
                else
                        flags |= DRM_MODE_FLAG_NHSYNC;
 
-               if (tmp & TRANS_DP_VSYNC_ACTIVE_HIGH)
+               if (tmp & DP_SYNC_VS_HIGH)
                        flags |= DRM_MODE_FLAG_PVSYNC;
                else
                        flags |= DRM_MODE_FLAG_NVSYNC;
@@ -2361,7 +2355,7 @@ static void chv_post_disable_dp(struct intel_encoder *encoder)
 
        intel_dp_link_down(intel_dp);
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* Propagate soft reset to data lane reset */
        val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch));
@@ -2380,7 +2374,7 @@ static void chv_post_disable_dp(struct intel_encoder *encoder)
        val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
        vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void
@@ -2419,7 +2413,8 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp,
                }
                I915_WRITE(DP_TP_CTL(port), temp);
 
-       } else if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || port != PORT_A)) {
+       } else if ((IS_GEN7(dev) && port == PORT_A) ||
+                  (HAS_PCH_CPT(dev) && port != PORT_A)) {
                *DP &= ~DP_LINK_TRAIN_MASK_CPT;
 
                switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
@@ -2676,7 +2671,7 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder)
        int pipe = intel_crtc->pipe;
        u32 val;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port));
        val = 0;
@@ -2689,7 +2684,7 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder)
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018);
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        intel_enable_dp(encoder);
 }
@@ -2707,7 +2702,7 @@ static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder)
        intel_dp_prepare(encoder);
 
        /* Program Tx lane resets to default */
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port),
                         DPIO_PCS_TX_LANE2_RESET |
                         DPIO_PCS_TX_LANE1_RESET);
@@ -2721,7 +2716,7 @@ static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder)
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00);
        vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500);
        vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void chv_pre_enable_dp(struct intel_encoder *encoder)
@@ -2737,7 +2732,7 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder)
        int data, i, stagger;
        u32 val;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* allow hardware to manage TX FIFO reset source */
        val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch));
@@ -2807,7 +2802,7 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder)
                       DPIO_TX1_STAGGER_MULT(7) |
                       DPIO_TX2_STAGGER_MULT(5));
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        intel_enable_dp(encoder);
 }
@@ -2825,7 +2820,7 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder)
 
        intel_dp_prepare(encoder);
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* program left/right clock distribution */
        if (pipe != PIPE_B) {
@@ -2875,7 +2870,7 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder)
                val |= CHV_CMN_USEDCLKCHANNEL;
        vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 /*
@@ -3100,7 +3095,7 @@ static uint32_t vlv_signal_levels(struct intel_dp *intel_dp)
                return 0;
        }
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000);
        vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value);
        vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port),
@@ -3109,7 +3104,7 @@ static uint32_t vlv_signal_levels(struct intel_dp *intel_dp)
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000);
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value);
        vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x80000000);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        return 0;
 }
@@ -3196,7 +3191,7 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp)
                return 0;
        }
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* Clear calc init */
        val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
@@ -3283,7 +3278,7 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp)
        val |= DPIO_LRC_BYPASS;
        vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, val);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        return 0;
 }
@@ -3848,6 +3843,7 @@ static void
 intel_dp_link_down(struct intel_dp *intel_dp)
 {
        struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+       struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
        enum port port = intel_dig_port->port;
        struct drm_device *dev = intel_dig_port->base.base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3861,36 +3857,41 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 
        DRM_DEBUG_KMS("\n");
 
-       if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || port != PORT_A)) {
+       if ((IS_GEN7(dev) && port == PORT_A) ||
+           (HAS_PCH_CPT(dev) && port != PORT_A)) {
                DP &= ~DP_LINK_TRAIN_MASK_CPT;
-               I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT);
+               DP |= DP_LINK_TRAIN_PAT_IDLE_CPT;
        } else {
                if (IS_CHERRYVIEW(dev))
                        DP &= ~DP_LINK_TRAIN_MASK_CHV;
                else
                        DP &= ~DP_LINK_TRAIN_MASK;
-               I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE);
+               DP |= DP_LINK_TRAIN_PAT_IDLE;
        }
+       I915_WRITE(intel_dp->output_reg, DP);
        POSTING_READ(intel_dp->output_reg);
 
-       if (HAS_PCH_IBX(dev) &&
-           I915_READ(intel_dp->output_reg) & DP_PIPEB_SELECT) {
-               /* Hardware workaround: leaving our transcoder select
-                * set to transcoder B while it's off will prevent the
-                * corresponding HDMI output on transcoder A.
-                *
-                * Combine this with another hardware workaround:
-                * transcoder select bit can only be cleared while the
-                * port is enabled.
-                */
-               DP &= ~DP_PIPEB_SELECT;
+       DP &= ~(DP_PORT_EN | DP_AUDIO_OUTPUT_ENABLE);
+       I915_WRITE(intel_dp->output_reg, DP);
+       POSTING_READ(intel_dp->output_reg);
+
+       /*
+        * HW workaround for IBX, we need to move the port
+        * to transcoder A after disabling it to allow the
+        * matching HDMI port to be enabled on transcoder A.
+        */
+       if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B && port != PORT_A) {
+               /* always enable with pattern 1 (as per spec) */
+               DP &= ~(DP_PIPEB_SELECT | DP_LINK_TRAIN_MASK);
+               DP |= DP_PORT_EN | DP_LINK_TRAIN_PAT_1;
+               I915_WRITE(intel_dp->output_reg, DP);
+               POSTING_READ(intel_dp->output_reg);
+
+               DP &= ~DP_PORT_EN;
                I915_WRITE(intel_dp->output_reg, DP);
                POSTING_READ(intel_dp->output_reg);
        }
 
-       DP &= ~DP_AUDIO_OUTPUT_ENABLE;
-       I915_WRITE(intel_dp->output_reg, DP & ~DP_PORT_EN);
-       POSTING_READ(intel_dp->output_reg);
        msleep(intel_dp->panel_power_down_delay);
 }
 
@@ -4040,46 +4041,70 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
        u8 buf;
        int test_crc_count;
        int attempts = 6;
+       int ret = 0;
 
-       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0)
-               return -EIO;
+       hsw_disable_ips(intel_crtc);
+
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) {
+               ret = -EIO;
+               goto out;
+       }
 
-       if (!(buf & DP_TEST_CRC_SUPPORTED))
-               return -ENOTTY;
+       if (!(buf & DP_TEST_CRC_SUPPORTED)) {
+               ret = -ENOTTY;
+               goto out;
+       }
 
-       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0)
-               return -EIO;
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
+               ret = -EIO;
+               goto out;
+       }
 
        if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
-                               buf | DP_TEST_SINK_START) < 0)
-               return -EIO;
+                               buf | DP_TEST_SINK_START) < 0) {
+               ret = -EIO;
+               goto out;
+       }
+
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) {
+               ret = -EIO;
+               goto out;
+       }
 
-       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0)
-               return -EIO;
        test_crc_count = buf & DP_TEST_COUNT_MASK;
 
        do {
                if (drm_dp_dpcd_readb(&intel_dp->aux,
-                                     DP_TEST_SINK_MISC, &buf) < 0)
-                       return -EIO;
+                                     DP_TEST_SINK_MISC, &buf) < 0) {
+                       ret = -EIO;
+                       goto out;
+               }
                intel_wait_for_vblank(dev, intel_crtc->pipe);
        } while (--attempts && (buf & DP_TEST_COUNT_MASK) == test_crc_count);
 
        if (attempts == 0) {
                DRM_DEBUG_KMS("Panel is unable to calculate CRC after 6 vblanks\n");
-               return -ETIMEDOUT;
+               ret = -ETIMEDOUT;
+               goto out;
        }
 
-       if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0)
-               return -EIO;
+       if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
+               ret = -EIO;
+               goto out;
+       }
 
-       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0)
-               return -EIO;
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
+               ret = -EIO;
+               goto out;
+       }
        if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
-                              buf & ~DP_TEST_SINK_START) < 0)
-               return -EIO;
-
-       return 0;
+                              buf & ~DP_TEST_SINK_START) < 0) {
+               ret = -EIO;
+               goto out;
+       }
+out:
+       hsw_enable_ips(intel_crtc);
+       return ret;
 }
 
 static bool
@@ -4142,7 +4167,7 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp)
                if (!drm_dp_dpcd_write(&intel_dp->aux,
                                        DP_TEST_EDID_CHECKSUM,
                                        &intel_connector->detect_edid->checksum,
-                                       1));
+                                       1))
                        DRM_DEBUG_KMS("Failed to write EDID checksum\n");
 
                test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE;
@@ -5814,12 +5839,10 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
        intel_dp_aux_init(intel_dp, intel_connector);
 
        /* init MST on ports that can support it */
-       if (IS_HASWELL(dev) || IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9) {
-               if (port == PORT_B || port == PORT_C || port == PORT_D) {
-                       intel_dp_mst_encoder_init(intel_dig_port,
-                                                 intel_connector->base.base.id);
-               }
-       }
+       if (HAS_DP_MST(dev) &&
+           (port == PORT_B || port == PORT_C || port == PORT_D))
+               intel_dp_mst_encoder_init(intel_dig_port,
+                                         intel_connector->base.base.id);
 
        if (!intel_edp_init_connector(intel_dp, intel_connector)) {
                drm_dp_aux_unregister(&intel_dp->aux);
index ea3368e836264876d1319bf652485b99226d6b1c..2afb31a4627573a3f97d0a4530231f27451e0793 100644 (file)
@@ -459,8 +459,10 @@ struct intel_pipe_wm {
 };
 
 struct intel_mmio_flip {
-       struct drm_i915_gem_request *req;
        struct work_struct work;
+       struct drm_i915_private *i915;
+       struct drm_i915_gem_request *req;
+       struct intel_crtc *crtc;
 };
 
 struct skl_pipe_wm {
@@ -544,7 +546,6 @@ struct intel_crtc {
        } wm;
 
        int scanline_offset;
-       struct intel_mmio_flip mmio_flip;
 
        struct intel_crtc_atomic_commit atomic;
 
@@ -555,7 +556,15 @@ struct intel_crtc {
 struct intel_plane_wm_parameters {
        uint32_t horiz_pixels;
        uint32_t vert_pixels;
+       /*
+        *   For packed pixel formats:
+        *     bytes_per_pixel - holds bytes per pixel
+        *   For planar pixel formats:
+        *     bytes_per_pixel - holds bytes per pixel for uv-plane
+        *     y_bytes_per_pixel - holds bytes per pixel for y-plane
+        */
        uint8_t bytes_per_pixel;
+       uint8_t y_bytes_per_pixel;
        bool enabled;
        bool scaled;
        u64 tiling;
@@ -1059,9 +1068,6 @@ intel_rotation_90_or_270(unsigned int rotation)
        return rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270));
 }
 
-unsigned int
-intel_tile_height(struct drm_device *dev, uint32_t bits_per_pixel,
-                 uint64_t fb_modifier);
 void intel_create_rotation_property(struct drm_device *dev,
                                        struct intel_plane *plane);
 
@@ -1112,6 +1118,8 @@ void broxton_ddi_phy_init(struct drm_device *dev);
 void broxton_ddi_phy_uninit(struct drm_device *dev);
 void bxt_enable_dc9(struct drm_i915_private *dev_priv);
 void bxt_disable_dc9(struct drm_i915_private *dev_priv);
+void skl_init_cdclk(struct drm_i915_private *dev_priv);
+void skl_uninit_cdclk(struct drm_i915_private *dev_priv);
 void intel_dp_get_m_n(struct intel_crtc *crtc,
                      struct intel_crtc_state *pipe_config);
 void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n);
@@ -1359,9 +1367,10 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct drm_i915_private *dev_priv,
-                   struct drm_i915_file_private *file_priv);
+                   struct intel_rps_client *rps,
+                   unsigned long submitted);
 void intel_queue_rps_boost_for_request(struct drm_device *dev,
-                                      struct drm_i915_gem_request *rq);
+                                      struct drm_i915_gem_request *req);
 void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
@@ -1374,8 +1383,6 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob);
 
 /* intel_sprite.c */
 int intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane);
-void intel_flush_primary_plane(struct drm_i915_private *dev_priv,
-                              enum plane plane);
 int intel_plane_restore(struct drm_plane *plane);
 int intel_sprite_set_colorkey(struct drm_device *dev, void *data,
                              struct drm_file *file_priv);
index 51966426addfbd612fa4d34f97e565590b6fbd51..b5a5558ecd6314c9014687125e4b6d5f671ab78b 100644 (file)
@@ -239,7 +239,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs,
 
 static void band_gap_reset(struct drm_i915_private *dev_priv)
 {
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        vlv_flisdsi_write(dev_priv, 0x08, 0x0001);
        vlv_flisdsi_write(dev_priv, 0x0F, 0x0005);
@@ -248,7 +248,7 @@ static void band_gap_reset(struct drm_i915_private *dev_priv)
        vlv_flisdsi_write(dev_priv, 0x0F, 0x0000);
        vlv_flisdsi_write(dev_priv, 0x08, 0x0000);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static inline bool is_vid_mode(struct intel_dsi *intel_dsi)
@@ -346,11 +346,11 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder)
 
        DRM_DEBUG_KMS("\n");
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        /* program rcomp for compliance, reduce from 50 ohms to 45 ohms
         * needed everytime after power gate */
        vlv_flisdsi_write(dev_priv, 0x04, 0x0004);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        /* bandgap reset is needed after everytime we do power gate */
        band_gap_reset(dev_priv);
index d2cd8d5b27a16bac1caed3fcb50c253ba333eb0c..a5e99ac305daab3ef69471d37b0ec9a97a27425c 100644 (file)
@@ -212,7 +212,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
        function = gtable[gpio].function_reg;
        pad = gtable[gpio].pad_reg;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        if (!gtable[gpio].init) {
                /* program the function */
                /* FIXME: remove constant below */
@@ -224,7 +224,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 
        /* pull up/down */
        vlv_gpio_nc_write(dev_priv, pad, val);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        return data;
 }
index 3622d0bafdf8ad0565dd7284ad628125043034d6..d20cf37b6901b68fa9926edeb3019e0b407f5af2 100644 (file)
@@ -162,59 +162,41 @@ static u32 dsi_clk_from_pclk(u32 pclk, int pixel_format, int lane_count)
 
 #endif
 
-static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp)
+static int dsi_calc_mnp(int target_dsi_clk, struct dsi_mnp *dsi_mnp)
 {
-       u32 m, n, p;
-       u32 ref_clk;
-       u32 error;
-       u32 tmp_error;
-       int target_dsi_clk;
-       int calc_dsi_clk;
-       u32 calc_m;
-       u32 calc_p;
+       unsigned int calc_m = 0, calc_p = 0;
+       unsigned int m, n = 1, p;
+       int ref_clk = 25000;
+       int delta = target_dsi_clk;
        u32 m_seed;
 
-       /* dsi_clk is expected in KHZ */
-       if (dsi_clk < 300000 || dsi_clk > 1150000) {
+       /* target_dsi_clk is expected in kHz */
+       if (target_dsi_clk < 300000 || target_dsi_clk > 1150000) {
                DRM_ERROR("DSI CLK Out of Range\n");
                return -ECHRNG;
        }
 
-       ref_clk = 25000;
-       target_dsi_clk = dsi_clk;
-       error = 0xFFFFFFFF;
-       tmp_error = 0xFFFFFFFF;
-       calc_m = 0;
-       calc_p = 0;
-
-       for (m = 62; m <= 92; m++) {
-               for (p = 2; p <= 6; p++) {
-                       /* Find the optimal m and p divisors
-                          with minimal error +/- the required clock */
-                       calc_dsi_clk = (m * ref_clk) / p;
-                       if (calc_dsi_clk == target_dsi_clk) {
-                               calc_m = m;
-                               calc_p = p;
-                               error = 0;
-                               break;
-                       } else
-                               tmp_error = abs(target_dsi_clk - calc_dsi_clk);
-
-                       if (tmp_error < error) {
-                               error = tmp_error;
+       for (m = 62; m <= 92 && delta; m++) {
+               for (p = 2; p <= 6 && delta; p++) {
+                       /*
+                        * Find the optimal m and p divisors with minimal delta
+                        * +/- the required clock
+                        */
+                       int calc_dsi_clk = (m * ref_clk) / (p * n);
+                       int d = abs(target_dsi_clk - calc_dsi_clk);
+                       if (d < delta) {
+                               delta = d;
                                calc_m = m;
                                calc_p = p;
                        }
                }
-
-               if (error == 0)
-                       break;
        }
 
+       /* register has log2(N1), this works fine for powers of two */
+       n = ffs(n) - 1;
        m_seed = lfsr_converts[calc_m - 62];
-       n = 1;
        dsi_mnp->dsi_pll_ctrl = 1 << (DSI_PLL_P1_POST_DIV_SHIFT + calc_p - 2);
-       dsi_mnp->dsi_pll_div = (n - 1) << DSI_PLL_N1_DIV_SHIFT |
+       dsi_mnp->dsi_pll_div = n << DSI_PLL_N1_DIV_SHIFT |
                m_seed << DSI_PLL_M1_DIV_SHIFT;
 
        return 0;
@@ -262,7 +244,7 @@ void vlv_enable_dsi_pll(struct intel_encoder *encoder)
 
        DRM_DEBUG_KMS("\n");
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        vlv_configure_dsi_pll(encoder);
 
@@ -276,11 +258,11 @@ void vlv_enable_dsi_pll(struct intel_encoder *encoder)
        if (wait_for(vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL) &
                                                DSI_PLL_LOCK, 20)) {
 
-               mutex_unlock(&dev_priv->dpio_lock);
+               mutex_unlock(&dev_priv->sb_lock);
                DRM_ERROR("DSI PLL lock failed\n");
                return;
        }
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        DRM_DEBUG_KMS("DSI PLL locked\n");
 }
@@ -292,14 +274,14 @@ void vlv_disable_dsi_pll(struct intel_encoder *encoder)
 
        DRM_DEBUG_KMS("\n");
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        tmp = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
        tmp &= ~DSI_PLL_VCO_EN;
        tmp |= DSI_PLL_LDO_GATE;
        vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, tmp);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void assert_bpp_mismatch(int pixel_format, int pipe_bpp)
@@ -331,21 +313,25 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
        struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
        u32 dsi_clock, pclk;
        u32 pll_ctl, pll_div;
-       u32 m = 0, p = 0;
+       u32 m = 0, p = 0, n;
        int refclk = 25000;
        int i;
 
        DRM_DEBUG_KMS("\n");
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        pll_ctl = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
        pll_div = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_DIVIDER);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        /* mask out other bits and extract the P1 divisor */
        pll_ctl &= DSI_PLL_P1_POST_DIV_MASK;
        pll_ctl = pll_ctl >> (DSI_PLL_P1_POST_DIV_SHIFT - 2);
 
+       /* N1 divisor */
+       n = (pll_div & DSI_PLL_N1_DIV_MASK) >> DSI_PLL_N1_DIV_SHIFT;
+       n = 1 << n; /* register has log2(N1) */
+
        /* mask out the other bits and extract the M1 divisor */
        pll_div &= DSI_PLL_M1_DIV_MASK;
        pll_div = pll_div >> DSI_PLL_M1_DIV_SHIFT;
@@ -373,7 +359,7 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
 
        m = i + 62;
 
-       dsi_clock = (m * refclk) / p;
+       dsi_clock = (m * refclk) / (p * n);
 
        /* pixel_format and pipe_bpp should agree */
        assert_bpp_mismatch(intel_dsi->pixel_format, pipe_bpp);
index 4e7e7da2e03bed4f835ca6d9a0666cd131410502..6372cfc7d0532d1eb8575a8bc5f52fcf202d8d35 100644 (file)
@@ -96,6 +96,32 @@ static int intel_fbdev_blank(int blank, struct fb_info *info)
        return ret;
 }
 
+static int intel_fbdev_pan_display(struct fb_var_screeninfo *var,
+                                  struct fb_info *info)
+{
+       struct drm_fb_helper *fb_helper = info->par;
+       struct intel_fbdev *ifbdev =
+               container_of(fb_helper, struct intel_fbdev, helper);
+
+       int ret;
+       ret = drm_fb_helper_pan_display(var, info);
+
+       if (ret == 0) {
+               /*
+                * FIXME: fbdev presumes that all callbacks also work from
+                * atomic contexts and relies on that for emergency oops
+                * printing. KMS totally doesn't do that and the locking here is
+                * by far not the only place this goes wrong.  Ignore this for
+                * now until we solve this for real.
+                */
+               mutex_lock(&fb_helper->dev->struct_mutex);
+               intel_fb_obj_invalidate(ifbdev->fb->obj, NULL, ORIGIN_GTT);
+               mutex_unlock(&fb_helper->dev->struct_mutex);
+       }
+
+       return ret;
+}
+
 static struct fb_ops intelfb_ops = {
        .owner = THIS_MODULE,
        .fb_check_var = drm_fb_helper_check_var,
@@ -103,7 +129,7 @@ static struct fb_ops intelfb_ops = {
        .fb_fillrect = cfb_fillrect,
        .fb_copyarea = cfb_copyarea,
        .fb_imageblit = cfb_imageblit,
-       .fb_pan_display = drm_fb_helper_pan_display,
+       .fb_pan_display = intel_fbdev_pan_display,
        .fb_blank = intel_fbdev_blank,
        .fb_setcmap = drm_fb_helper_setcmap,
        .fb_debug_enter = drm_fb_helper_debug_enter,
index d04e6dc97fe5ca5538bfaee254f42a78a1819938..e97731aab6dcfee2a380d09b95cefdff57e193e1 100644 (file)
@@ -873,59 +873,59 @@ static void intel_disable_hdmi(struct intel_encoder *encoder)
        struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
        struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
        u32 temp;
-       u32 enable_bits = SDVO_ENABLE | SDVO_AUDIO_ENABLE;
-
-       if (crtc->config->has_audio)
-               intel_audio_codec_disable(encoder);
 
        temp = I915_READ(intel_hdmi->hdmi_reg);
 
-       /* HW workaround for IBX, we need to move the port to transcoder A
-        * before disabling it. */
-       if (HAS_PCH_IBX(dev)) {
-               struct drm_crtc *crtc = encoder->base.crtc;
-               int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1;
-
-               if (temp & SDVO_PIPE_B_SELECT) {
-                       temp &= ~SDVO_PIPE_B_SELECT;
-                       I915_WRITE(intel_hdmi->hdmi_reg, temp);
-                       POSTING_READ(intel_hdmi->hdmi_reg);
-
-                       /* Again we need to write this twice. */
-                       I915_WRITE(intel_hdmi->hdmi_reg, temp);
-                       POSTING_READ(intel_hdmi->hdmi_reg);
-
-                       /* Transcoder selection bits only update
-                        * effectively on vblank. */
-                       if (crtc)
-                               intel_wait_for_vblank(dev, pipe);
-                       else
-                               msleep(50);
-               }
-       }
-
-       /* HW workaround, need to toggle enable bit off and on for 12bpc, but
-        * we do this anyway which shows more stable in testing.
-        */
-       if (HAS_PCH_SPLIT(dev)) {
-               I915_WRITE(intel_hdmi->hdmi_reg, temp & ~SDVO_ENABLE);
-               POSTING_READ(intel_hdmi->hdmi_reg);
-       }
-
-       temp &= ~enable_bits;
-
+       temp &= ~(SDVO_ENABLE | SDVO_AUDIO_ENABLE);
        I915_WRITE(intel_hdmi->hdmi_reg, temp);
        POSTING_READ(intel_hdmi->hdmi_reg);
 
-       /* HW workaround, need to write this twice for issue that may result
-        * in first write getting masked.
+       /*
+        * HW workaround for IBX, we need to move the port
+        * to transcoder A after disabling it to allow the
+        * matching DP port to be enabled on transcoder A.
         */
-       if (HAS_PCH_SPLIT(dev)) {
+       if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B) {
+               temp &= ~SDVO_PIPE_B_SELECT;
+               temp |= SDVO_ENABLE;
+               /*
+                * HW workaround, need to write this twice for issue
+                * that may result in first write getting masked.
+                */
+               I915_WRITE(intel_hdmi->hdmi_reg, temp);
+               POSTING_READ(intel_hdmi->hdmi_reg);
+               I915_WRITE(intel_hdmi->hdmi_reg, temp);
+               POSTING_READ(intel_hdmi->hdmi_reg);
+
+               temp &= ~SDVO_ENABLE;
                I915_WRITE(intel_hdmi->hdmi_reg, temp);
                POSTING_READ(intel_hdmi->hdmi_reg);
        }
 }
 
+static void g4x_disable_hdmi(struct intel_encoder *encoder)
+{
+       struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+
+       if (crtc->config->has_audio)
+               intel_audio_codec_disable(encoder);
+
+       intel_disable_hdmi(encoder);
+}
+
+static void pch_disable_hdmi(struct intel_encoder *encoder)
+{
+       struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+
+       if (crtc->config->has_audio)
+               intel_audio_codec_disable(encoder);
+}
+
+static void pch_post_disable_hdmi(struct intel_encoder *encoder)
+{
+       intel_disable_hdmi(encoder);
+}
+
 static int hdmi_portclock_limit(struct intel_hdmi *hdmi, bool respect_dvi_limit)
 {
        struct drm_device *dev = intel_hdmi_to_dev(hdmi);
@@ -1036,7 +1036,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
         */
        if (pipe_config->pipe_bpp > 8*3 && pipe_config->has_hdmi_sink &&
            clock_12bpc <= portclock_limit &&
-           hdmi_12bpc_possible(pipe_config)) {
+           hdmi_12bpc_possible(pipe_config) &&
+           0 /* FIXME 12bpc support totally broken */) {
                DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n");
                desired_bpp = 12*3;
 
@@ -1293,7 +1294,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder)
        u32 val;
 
        /* Enable clock channels for this port */
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port));
        val = 0;
        if (pipe)
@@ -1316,7 +1317,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder)
        /* Program lane clock */
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018);
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        intel_hdmi->set_infoframes(&encoder->base,
                                   intel_crtc->config->has_hdmi_sink,
@@ -1340,7 +1341,7 @@ static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
        intel_hdmi_prepare(encoder);
 
        /* Program Tx lane resets to default */
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port),
                         DPIO_PCS_TX_LANE2_RESET |
                         DPIO_PCS_TX_LANE1_RESET);
@@ -1357,7 +1358,7 @@ static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
 
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), 0x00002000);
        vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
@@ -1373,7 +1374,7 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
 
        intel_hdmi_prepare(encoder);
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* program left/right clock distribution */
        if (pipe != PIPE_B) {
@@ -1423,7 +1424,7 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
                val |= CHV_CMN_USEDCLKCHANNEL;
        vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void vlv_hdmi_post_disable(struct intel_encoder *encoder)
@@ -1436,10 +1437,10 @@ static void vlv_hdmi_post_disable(struct intel_encoder *encoder)
        int pipe = intel_crtc->pipe;
 
        /* Reset lanes to avoid HDMI flicker (VLV w/a) */
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000);
        vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void chv_hdmi_post_disable(struct intel_encoder *encoder)
@@ -1453,7 +1454,7 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder)
        enum pipe pipe = intel_crtc->pipe;
        u32 val;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* Propagate soft reset to data lane reset */
        val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch));
@@ -1472,7 +1473,7 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder)
        val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
        vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 static void chv_hdmi_pre_enable(struct intel_encoder *encoder)
@@ -1490,7 +1491,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder)
        int data, i, stagger;
        u32 val;
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
 
        /* allow hardware to manage TX FIFO reset source */
        val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch));
@@ -1633,7 +1634,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder)
        val |= DPIO_LRC_BYPASS;
        vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, val);
 
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        intel_hdmi->set_infoframes(&encoder->base,
                                   intel_crtc->config->has_hdmi_sink,
@@ -1806,7 +1807,12 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port)
                         DRM_MODE_ENCODER_TMDS);
 
        intel_encoder->compute_config = intel_hdmi_compute_config;
-       intel_encoder->disable = intel_disable_hdmi;
+       if (HAS_PCH_SPLIT(dev)) {
+               intel_encoder->disable = pch_disable_hdmi;
+               intel_encoder->post_disable = pch_post_disable_hdmi;
+       } else {
+               intel_encoder->disable = g4x_disable_hdmi;
+       }
        intel_encoder->get_hw_state = intel_hdmi_get_hw_state;
        intel_encoder->get_config = intel_hdmi_get_config;
        if (IS_CHERRYVIEW(dev)) {
index 3daa7e322326a0e2d60cbcc753f2ee9782ee3e00..92072f56e41811d062d7e39e310352a69d9d677f 100644 (file)
@@ -49,6 +49,19 @@ static const struct gmbus_pin gmbus_pins[] = {
        [GMBUS_PIN_DPD] = { "dpd", GPIOF },
 };
 
+static const struct gmbus_pin gmbus_pins_bdw[] = {
+       [GMBUS_PIN_VGADDC] = { "vga", GPIOA },
+       [GMBUS_PIN_DPC] = { "dpc", GPIOD },
+       [GMBUS_PIN_DPB] = { "dpb", GPIOE },
+       [GMBUS_PIN_DPD] = { "dpd", GPIOF },
+};
+
+static const struct gmbus_pin gmbus_pins_skl[] = {
+       [GMBUS_PIN_DPC] = { "dpc", GPIOD },
+       [GMBUS_PIN_DPB] = { "dpb", GPIOE },
+       [GMBUS_PIN_DPD] = { "dpd", GPIOF },
+};
+
 static const struct gmbus_pin gmbus_pins_bxt[] = {
        [GMBUS_PIN_1_BXT] = { "dpb", PCH_GPIOB },
        [GMBUS_PIN_2_BXT] = { "dpc", PCH_GPIOC },
@@ -61,6 +74,10 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv,
 {
        if (IS_BROXTON(dev_priv))
                return &gmbus_pins_bxt[pin];
+       else if (IS_SKYLAKE(dev_priv))
+               return &gmbus_pins_skl[pin];
+       else if (IS_BROADWELL(dev_priv))
+               return &gmbus_pins_bdw[pin];
        else
                return &gmbus_pins[pin];
 }
@@ -72,6 +89,10 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
 
        if (IS_BROXTON(dev_priv))
                size = ARRAY_SIZE(gmbus_pins_bxt);
+       else if (IS_SKYLAKE(dev_priv))
+               size = ARRAY_SIZE(gmbus_pins_skl);
+       else if (IS_BROADWELL(dev_priv))
+               size = ARRAY_SIZE(gmbus_pins_bdw);
        else
                size = ARRAY_SIZE(gmbus_pins);
 
index 0fa9209ff556bf182b336b86ef56b57c54c8260c..9f5485ddcbe6e15d1b18871ae9aebd03b1035c16 100644 (file)
@@ -394,6 +394,12 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 
        assert_spin_locked(&ring->execlist_lock);
 
+       /*
+        * If irqs are not active generate a warning as batches that finish
+        * without the irqs may get lost and a GPU Hang may occur.
+        */
+       WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
+
        if (list_empty(&ring->execlist_queue))
                return;
 
@@ -421,7 +427,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
                 * WaIdleLiteRestore: make sure we never cause a lite
                 * restore with HEAD==TAIL
                 */
-               if (req0 && req0->elsp_submitted) {
+               if (req0->elsp_submitted) {
                        /*
                         * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
                         * as we resubmit the request. See gen8_emit_request()
@@ -622,6 +628,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
                                 struct list_head *vmas)
 {
        struct intel_engine_cs *ring = ringbuf->ring;
+       const unsigned other_rings = ~intel_ring_flag(ring);
        struct i915_vma *vma;
        uint32_t flush_domains = 0;
        bool flush_chipset = false;
@@ -630,9 +637,11 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
        list_for_each_entry(vma, vmas, exec_list) {
                struct drm_i915_gem_object *obj = vma->obj;
 
-               ret = i915_gem_object_sync(obj, ring);
-               if (ret)
-                       return ret;
+               if (obj->active & other_rings) {
+                       ret = i915_gem_object_sync(obj, ring);
+                       if (ret)
+                               return ret;
+               }
 
                if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
                        flush_chipset |= i915_gem_clflush_object(obj, false);
@@ -673,7 +682,8 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
 {
        struct intel_engine_cs *ring = ringbuf->ring;
        struct drm_i915_gem_request *request;
-       int ret, new_space;
+       unsigned space;
+       int ret;
 
        if (intel_ring_space(ringbuf) >= bytes)
                return 0;
@@ -684,14 +694,13 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
                 * from multiple ringbuffers. Here, we must ignore any that
                 * aren't from the ringbuffer we're considering.
                 */
-               struct intel_context *ctx = request->ctx;
-               if (ctx->engine[ring->id].ringbuf != ringbuf)
+               if (request->ringbuf != ringbuf)
                        continue;
 
                /* Would completion of this request free enough space? */
-               new_space = __intel_ring_space(request->postfix, ringbuf->tail,
-                                      ringbuf->size);
-               if (new_space >= bytes)
+               space = __intel_ring_space(request->postfix, ringbuf->tail,
+                                          ringbuf->size);
+               if (space >= bytes)
                        break;
        }
 
@@ -702,11 +711,8 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
        if (ret)
                return ret;
 
-       i915_gem_retire_requests_ring(ring);
-
-       WARN_ON(intel_ring_space(ringbuf) < new_space);
-
-       return intel_ring_space(ringbuf) >= bytes ? 0 : -ENOSPC;
+       ringbuf->space = space;
+       return 0;
 }
 
 /*
index 5fd2d5ac02e2dc2692c9a3d6a7d8d3fd0fc95f81..25c8ec697da1fa5364164be9163bc24241172854 100644 (file)
@@ -228,7 +228,6 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
        ret = i915_wait_request(overlay->last_flip_req);
        if (ret)
                return ret;
-       i915_gem_retire_requests(dev);
 
        i915_gem_request_assign(&overlay->last_flip_req, NULL);
        return 0;
@@ -376,7 +375,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
        ret = i915_wait_request(overlay->last_flip_req);
        if (ret)
                return ret;
-       i915_gem_retire_requests(overlay->dev);
 
        if (overlay->flip_tail)
                overlay->flip_tail(overlay);
index 7006f94b94c15c8e84b061fa7b305df0ebbd97b0..c5914564939cedc6059de156cbc554f4e0a9b387 100644 (file)
@@ -1946,7 +1946,7 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
 int ilk_wm_max_level(const struct drm_device *dev)
 {
        /* how many WM levels are we expecting */
-       if (IS_GEN9(dev))
+       if (INTEL_INFO(dev)->gen >= 9)
                return 7;
        else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
                return 4;
@@ -2639,8 +2639,18 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 }
 
 static unsigned int
-skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p)
+skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
 {
+
+       /* for planar format */
+       if (p->y_bytes_per_pixel) {
+               if (y)  /* y-plane data rate */
+                       return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
+               else    /* uv-plane data rate */
+                       return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
+       }
+
+       /* for packed formats */
        return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
 }
 
@@ -2663,7 +2673,10 @@ skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
                if (!p->enabled)
                        continue;
 
-               total_data_rate += skl_plane_relative_data_rate(p);
+               total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
+               if (p->y_bytes_per_pixel) {
+                       total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
+               }
        }
 
        return total_data_rate;
@@ -2682,6 +2695,7 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
        struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
        uint16_t alloc_size, start, cursor_blocks;
        uint16_t minimum[I915_MAX_PLANES];
+       uint16_t y_minimum[I915_MAX_PLANES];
        unsigned int total_data_rate;
        int plane;
 
@@ -2710,6 +2724,8 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
 
                minimum[plane] = 8;
                alloc_size -= minimum[plane];
+               y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
+               alloc_size -= y_minimum[plane];
        }
 
        /*
@@ -2723,16 +2739,17 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
        start = alloc->start;
        for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
                const struct intel_plane_wm_parameters *p;
-               unsigned int data_rate;
-               uint16_t plane_blocks;
+               unsigned int data_rate, y_data_rate;
+               uint16_t plane_blocks, y_plane_blocks = 0;
 
                p = &params->plane[plane];
                if (!p->enabled)
                        continue;
 
-               data_rate = skl_plane_relative_data_rate(p);
+               data_rate = skl_plane_relative_data_rate(p, 0);
 
                /*
+                * allocation for (packed formats) or (uv-plane part of planar format):
                 * promote the expression to 64 bits to avoid overflowing, the
                 * result is < available as data_rate / total_data_rate < 1
                 */
@@ -2744,6 +2761,22 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
                ddb->plane[pipe][plane].end = start + plane_blocks;
 
                start += plane_blocks;
+
+               /*
+                * allocation for y_plane part of planar format:
+                */
+               if (p->y_bytes_per_pixel) {
+                       y_data_rate = skl_plane_relative_data_rate(p, 1);
+                       y_plane_blocks = y_minimum[plane];
+                       y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
+                                               total_data_rate);
+
+                       ddb->y_plane[pipe][plane].start = start;
+                       ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
+
+                       start += y_plane_blocks;
+               }
+
        }
 
 }
@@ -2856,13 +2889,18 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
                p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
 
                fb = crtc->primary->state->fb;
+               /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
                if (fb) {
                        p->plane[0].enabled = true;
-                       p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8;
+                       p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+                               drm_format_plane_cpp(fb->pixel_format, 1) : fb->bits_per_pixel / 8;
+                       p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+                               drm_format_plane_cpp(fb->pixel_format, 0) : 0;
                        p->plane[0].tiling = fb->modifier[0];
                } else {
                        p->plane[0].enabled = false;
                        p->plane[0].bytes_per_pixel = 0;
+                       p->plane[0].y_bytes_per_pixel = 0;
                        p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
                }
                p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
@@ -2870,6 +2908,7 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
                p->plane[0].rotation = crtc->primary->state->rotation;
 
                fb = crtc->cursor->state->fb;
+               p->cursor.y_bytes_per_pixel = 0;
                if (fb) {
                        p->cursor.enabled = true;
                        p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8;
@@ -2905,22 +2944,25 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
        uint32_t plane_bytes_per_line, plane_blocks_per_line;
        uint32_t res_blocks, res_lines;
        uint32_t selected_result;
+       uint8_t bytes_per_pixel;
 
        if (latency == 0 || !p->active || !p_params->enabled)
                return false;
 
+       bytes_per_pixel = p_params->y_bytes_per_pixel ?
+               p_params->y_bytes_per_pixel :
+               p_params->bytes_per_pixel;
        method1 = skl_wm_method1(p->pixel_rate,
-                                p_params->bytes_per_pixel,
+                                bytes_per_pixel,
                                 latency);
        method2 = skl_wm_method2(p->pixel_rate,
                                 p->pipe_htotal,
                                 p_params->horiz_pixels,
-                                p_params->bytes_per_pixel,
+                                bytes_per_pixel,
                                 p_params->tiling,
                                 latency);
 
-       plane_bytes_per_line = p_params->horiz_pixels *
-                                       p_params->bytes_per_pixel;
+       plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
        plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
 
        if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
@@ -3137,10 +3179,14 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv,
                                   new->plane_trans[pipe][i]);
                I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
 
-               for (i = 0; i < intel_num_planes(crtc); i++)
+               for (i = 0; i < intel_num_planes(crtc); i++) {
                        skl_ddb_entry_write(dev_priv,
                                            PLANE_BUF_CFG(pipe, i),
                                            &new->ddb.plane[pipe][i]);
+                       skl_ddb_entry_write(dev_priv,
+                                           PLANE_NV12_BUF_CFG(pipe, i),
+                                           &new->ddb.y_plane[pipe][i]);
+               }
 
                skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
                                    &new->ddb.cursor[pipe]);
@@ -3298,6 +3344,7 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc,
                return false;
 
        intel_crtc->wm.skl_active = *pipe_wm;
+
        return true;
 }
 
@@ -3391,8 +3438,16 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
        intel_plane->wm.scaled = scaled;
        intel_plane->wm.horiz_pixels = sprite_width;
        intel_plane->wm.vert_pixels = sprite_height;
-       intel_plane->wm.bytes_per_pixel = pixel_size;
        intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
+
+       /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
+       intel_plane->wm.bytes_per_pixel =
+               (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+               drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
+       intel_plane->wm.y_bytes_per_pixel =
+               (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+               drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
+
        /*
         * Framebuffer can be NULL on plane disable, but it does not
         * matter for watermarks if we assume no tiling in that case.
@@ -4042,51 +4097,25 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
        trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 }
 
-/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
+/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
  *
  * * If Gfx is Idle, then
- * 1. Mask Turbo interrupts
- * 2. Bring up Gfx clock
- * 3. Change the freq to Rpn and wait till P-Unit updates freq
- * 4. Clear the Force GFX CLK ON bit so that Gfx can down
- * 5. Unmask Turbo interrupts
+ * 1. Forcewake Media well.
+ * 2. Request idle freq.
+ * 3. Release Forcewake of Media well.
 */
 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 {
-       struct drm_device *dev = dev_priv->dev;
        u32 val = dev_priv->rps.idle_freq;
 
-       /* CHV and latest VLV don't need to force the gfx clock */
-       if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
-               valleyview_set_rps(dev_priv->dev, val);
-               return;
-       }
-
-       /*
-        * When we are idle.  Drop to min voltage state.
-        */
-
        if (dev_priv->rps.cur_freq <= val)
                return;
 
-       /* Mask turbo interrupt so that they will not come in between */
-       I915_WRITE(GEN6_PMINTRMSK,
-                  gen6_sanitize_rps_pm_mask(dev_priv, ~0));
-
-       vlv_force_gfx_clock(dev_priv, true);
-
-       dev_priv->rps.cur_freq = val;
-
-       vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-
-       if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
-                               & GENFREQSTATUS) == 0, 100))
-               DRM_ERROR("timed out waiting for Punit\n");
-
-       gen6_set_rps_thresholds(dev_priv, val);
-       vlv_force_gfx_clock(dev_priv, false);
-
-       I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+       /* Wake up the media well, as that takes a lot less
+        * power than the Render well. */
+       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
+       valleyview_set_rps(dev_priv->dev, val);
+       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
 }
 
 void gen6_rps_busy(struct drm_i915_private *dev_priv)
@@ -4114,33 +4143,48 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
                dev_priv->rps.last_adj = 0;
                I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
        }
+       mutex_unlock(&dev_priv->rps.hw_lock);
 
+       spin_lock(&dev_priv->rps.client_lock);
        while (!list_empty(&dev_priv->rps.clients))
                list_del_init(dev_priv->rps.clients.next);
-       mutex_unlock(&dev_priv->rps.hw_lock);
+       spin_unlock(&dev_priv->rps.client_lock);
 }
 
 void gen6_rps_boost(struct drm_i915_private *dev_priv,
-                   struct drm_i915_file_private *file_priv)
+                   struct intel_rps_client *rps,
+                   unsigned long submitted)
 {
-       u32 val;
+       /* This is intentionally racy! We peek at the state here, then
+        * validate inside the RPS worker.
+        */
+       if (!(dev_priv->mm.busy &&
+             dev_priv->rps.enabled &&
+             dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
+               return;
 
-       mutex_lock(&dev_priv->rps.hw_lock);
-       val = dev_priv->rps.max_freq_softlimit;
-       if (dev_priv->rps.enabled &&
-           dev_priv->mm.busy &&
-           dev_priv->rps.cur_freq < val &&
-           (file_priv == NULL || list_empty(&file_priv->rps_boost))) {
-               intel_set_rps(dev_priv->dev, val);
-               dev_priv->rps.last_adj = 0;
+       /* Force a RPS boost (and don't count it against the client) if
+        * the GPU is severely congested.
+        */
+       if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
+               rps = NULL;
+
+       spin_lock(&dev_priv->rps.client_lock);
+       if (rps == NULL || list_empty(&rps->link)) {
+               spin_lock_irq(&dev_priv->irq_lock);
+               if (dev_priv->rps.interrupts_enabled) {
+                       dev_priv->rps.client_boost = true;
+                       queue_work(dev_priv->wq, &dev_priv->rps.work);
+               }
+               spin_unlock_irq(&dev_priv->irq_lock);
 
-               if (file_priv != NULL) {
-                       list_add(&file_priv->rps_boost, &dev_priv->rps.clients);
-                       file_priv->rps_boosts++;
+               if (rps != NULL) {
+                       list_add(&rps->link, &dev_priv->rps.clients);
+                       rps->boosts++;
                } else
                        dev_priv->rps.boosts++;
        }
-       mutex_unlock(&dev_priv->rps.hw_lock);
+       spin_unlock(&dev_priv->rps.client_lock);
 }
 
 void intel_set_rps(struct drm_device *dev, u8 val)
@@ -4714,24 +4758,6 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
        return rp1;
 }
 
-static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-       struct drm_device *dev = dev_priv->dev;
-       u32 val, rpn;
-
-       if (dev->pdev->revision >= 0x20) {
-               val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
-               rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
-                      FB_GFX_FREQ_FUSE_MASK);
-       } else { /* For pre-production hardware */
-               val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
-               rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) &
-                      PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK);
-       }
-
-       return rpn;
-}
-
 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
 {
        u32 val, rp1;
@@ -4938,9 +4964,9 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
 
        mutex_lock(&dev_priv->rps.hw_lock);
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        switch ((val >> 2) & 0x7) {
        case 0:
@@ -4983,7 +5009,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
                         intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
                         dev_priv->rps.rp1_freq);
 
-       dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
+       /* PUnit validated range is only [RPe, RP0] */
+       dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
        DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
                         intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
                         dev_priv->rps.min_freq);
@@ -5859,13 +5886,15 @@ static void ibx_init_clock_gating(struct drm_device *dev)
 static void g4x_disable_trickle_feed(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       int pipe;
+       enum pipe pipe;
 
        for_each_pipe(dev_priv, pipe) {
                I915_WRITE(DSPCNTR(pipe),
                           I915_READ(DSPCNTR(pipe)) |
                           DISPPLANE_TRICKLE_FEED_DISABLE);
-               intel_flush_primary_plane(dev_priv, pipe);
+
+               I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
+               POSTING_READ(DSPSURF(pipe));
        }
 }
 
@@ -6155,10 +6184,9 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        enum pipe pipe;
+       uint32_t misccpctl;
 
-       I915_WRITE(WM3_LP_ILK, 0);
-       I915_WRITE(WM2_LP_ILK, 0);
-       I915_WRITE(WM1_LP_ILK, 0);
+       ilk_init_lp_watermarks(dev);
 
        /* WaSwitchSolVfFArbitrationPriority:bdw */
        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -6187,6 +6215,22 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
+       /*
+        * WaProgramL3SqcReg1Default:bdw
+        * WaTempDisableDOPClkGating:bdw
+        */
+       misccpctl = I915_READ(GEN7_MISCCPCTL);
+       I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+       I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+       I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+
+       /*
+        * WaGttCachingOffByDefault:bdw
+        * GTT cache may not work with big pages, so if those
+        * are ever enabled GTT cache may need to be disabled.
+        */
+       I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
+
        lpt_init_clock_gating(dev);
 }
 
@@ -6462,6 +6506,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
        /* WaDisableSDEUnitClockGating:chv */
        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+       /*
+        * GTT cache may not work with big pages, so if those
+        * are ever enabled GTT cache may need to be disabled.
+        */
+       I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
 }
 
 static void g4x_init_clock_gating(struct drm_device *dev)
@@ -6830,34 +6880,39 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 
 struct request_boost {
        struct work_struct work;
-       struct drm_i915_gem_request *rq;
+       struct drm_i915_gem_request *req;
 };
 
 static void __intel_rps_boost_work(struct work_struct *work)
 {
        struct request_boost *boost = container_of(work, struct request_boost, work);
+       struct drm_i915_gem_request *req = boost->req;
 
-       if (!i915_gem_request_completed(boost->rq, true))
-               gen6_rps_boost(to_i915(boost->rq->ring->dev), NULL);
+       if (!i915_gem_request_completed(req, true))
+               gen6_rps_boost(to_i915(req->ring->dev), NULL,
+                              req->emitted_jiffies);
 
-       i915_gem_request_unreference__unlocked(boost->rq);
+       i915_gem_request_unreference__unlocked(req);
        kfree(boost);
 }
 
 void intel_queue_rps_boost_for_request(struct drm_device *dev,
-                                      struct drm_i915_gem_request *rq)
+                                      struct drm_i915_gem_request *req)
 {
        struct request_boost *boost;
 
-       if (rq == NULL || INTEL_INFO(dev)->gen < 6)
+       if (req == NULL || INTEL_INFO(dev)->gen < 6)
+               return;
+
+       if (i915_gem_request_completed(req, true))
                return;
 
        boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
        if (boost == NULL)
                return;
 
-       i915_gem_request_reference(rq);
-       boost->rq = rq;
+       i915_gem_request_reference(req);
+       boost->req = req;
 
        INIT_WORK(&boost->work, __intel_rps_boost_work);
        queue_work(to_i915(dev)->wq, &boost->work);
@@ -6868,10 +6923,13 @@ void intel_pm_setup(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        mutex_init(&dev_priv->rps.hw_lock);
+       spin_lock_init(&dev_priv->rps.client_lock);
 
        INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
                          intel_gen6_powersave_work);
        INIT_LIST_HEAD(&dev_priv->rps.clients);
+       INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
+       INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
 
        dev_priv->pm.suspended = false;
 }
index 9b96ed7de9bbb79284a666c376abb70ba0b91c94..d934f857394ddbc95d276d0fc511b3ac67616cea 100644 (file)
@@ -853,9 +853,6 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
                            GEN6_WIZ_HASHING_MASK,
                            GEN6_WIZ_HASHING_16x4);
 
-       /* WaProgramL3SqcReg1Default:bdw */
-       WA_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
-
        return 0;
 }
 
@@ -918,6 +915,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
 {
        struct drm_device *dev = ring->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t tmp;
 
        /* WaDisablePartialInstShootdown:skl,bxt */
        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
@@ -961,15 +959,19 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
        WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
                          GEN9_CCS_TLB_PREFETCH_ENABLE);
 
-       /*
-        * FIXME: don't apply the following on BXT for stepping C. On BXT A0
-        * the flag reads back as 0.
-        */
-       /* WaDisableMaskBasedCammingInRCC:sklC,bxtA */
-       if (INTEL_REVID(dev) == SKL_REVID_C0 || IS_BROXTON(dev))
+       /* WaDisableMaskBasedCammingInRCC:skl,bxt */
+       if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
+           (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
                WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
                                  PIXEL_MASK_CAMMING_DISABLE);
 
+       /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
+       tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
+       if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
+           (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
+               tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
+       WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
+
        return 0;
 }
 
@@ -1060,10 +1062,6 @@ static int bxt_init_workarounds(struct intel_engine_cs *ring)
                        GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
        }
 
-       /* WaForceContextSaveRestoreNonCoherent:bxt */
-       WA_SET_BIT_MASKED(HDC_CHICKEN0,
-                         HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
-
        return 0;
 }
 
@@ -2102,15 +2100,16 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 {
        struct intel_ringbuffer *ringbuf = ring->buffer;
        struct drm_i915_gem_request *request;
-       int ret, new_space;
+       unsigned space;
+       int ret;
 
        if (intel_ring_space(ringbuf) >= n)
                return 0;
 
        list_for_each_entry(request, &ring->request_list, list) {
-               new_space = __intel_ring_space(request->postfix, ringbuf->tail,
-                                      ringbuf->size);
-               if (new_space >= n)
+               space = __intel_ring_space(request->postfix, ringbuf->tail,
+                                          ringbuf->size);
+               if (space >= n)
                        break;
        }
 
@@ -2121,10 +2120,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
        if (ret)
                return ret;
 
-       i915_gem_retire_requests_ring(ring);
-
-       WARN_ON(intel_ring_space(ringbuf) < new_space);
-
+       ringbuf->space = space;
        return 0;
 }
 
@@ -2168,10 +2164,14 @@ int intel_ring_idle(struct intel_engine_cs *ring)
                return 0;
 
        req = list_entry(ring->request_list.prev,
-                          struct drm_i915_gem_request,
-                          list);
-
-       return i915_wait_request(req);
+                       struct drm_i915_gem_request,
+                       list);
+
+       /* Make sure we do not trigger any retires */
+       return __i915_wait_request(req,
+                                  atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
+                                  to_i915(ring->dev)->mm.interruptible,
+                                  NULL, NULL);
 }
 
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
index 317b9b43d1c1000d02d2042a5321aca2f4daeb51..1a45385f4d66947087c8ae072e2e2b2328ad6651 100644 (file)
@@ -771,7 +771,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
        vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl);
 
        if (wait_for(COND, 100))
-               DRM_ERROR("timout setting power well state %08x (%08x)\n",
+               DRM_ERROR("timeout setting power well state %08x (%08x)\n",
                          state,
                          vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL));
 
@@ -1029,7 +1029,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
        vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl);
 
        if (wait_for(COND, 100))
-               DRM_ERROR("timout setting power well state %08x (%08x)\n",
+               DRM_ERROR("timeout setting power well state %08x (%08x)\n",
                          state,
                          vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ));
 
@@ -1233,18 +1233,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
        BIT(POWER_DOMAIN_AUX_C) |               \
        BIT(POWER_DOMAIN_INIT))
 
-#define CHV_PIPE_A_POWER_DOMAINS (     \
-       BIT(POWER_DOMAIN_PIPE_A) |      \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_PIPE_B_POWER_DOMAINS (     \
-       BIT(POWER_DOMAIN_PIPE_B) |      \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_PIPE_C_POWER_DOMAINS (     \
-       BIT(POWER_DOMAIN_PIPE_C) |      \
-       BIT(POWER_DOMAIN_INIT))
-
 #define CHV_DPIO_CMN_BC_POWER_DOMAINS (                \
        BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |  \
        BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |  \
@@ -1260,17 +1248,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
        BIT(POWER_DOMAIN_AUX_D) |               \
        BIT(POWER_DOMAIN_INIT))
 
-#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS ( \
-       BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |  \
-       BIT(POWER_DOMAIN_AUX_D) |               \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS ( \
-       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |  \
-       BIT(POWER_DOMAIN_AUX_D) |               \
-       BIT(POWER_DOMAIN_INIT))
-
 static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
        .sync_hw = i9xx_always_on_power_well_noop,
        .enable = i9xx_always_on_power_well_noop,
@@ -1428,40 +1405,17 @@ static struct i915_power_well chv_power_wells[] = {
                .domains = VLV_ALWAYS_ON_POWER_DOMAINS,
                .ops = &i9xx_always_on_power_well_ops,
        },
-#if 0
        {
                .name = "display",
-               .domains = VLV_DISPLAY_POWER_DOMAINS,
-               .data = PUNIT_POWER_WELL_DISP2D,
-               .ops = &vlv_display_power_well_ops,
-       },
-#endif
-       {
-               .name = "pipe-a",
                /*
-                * FIXME: pipe A power well seems to be the new disp2d well.
-                * At least all registers seem to be housed there. Figure
-                * out if this a a temporary situation in pre-production
-                * hardware or a permanent state of affairs.
+                * Pipe A power well is the new disp2d well. Pipe B and C
+                * power wells don't actually exist. Pipe A power well is
+                * required for any pipe to work.
                 */
-               .domains = CHV_PIPE_A_POWER_DOMAINS | VLV_DISPLAY_POWER_DOMAINS,
+               .domains = VLV_DISPLAY_POWER_DOMAINS,
                .data = PIPE_A,
                .ops = &chv_pipe_power_well_ops,
        },
-#if 0
-       {
-               .name = "pipe-b",
-               .domains = CHV_PIPE_B_POWER_DOMAINS,
-               .data = PIPE_B,
-               .ops = &chv_pipe_power_well_ops,
-       },
-       {
-               .name = "pipe-c",
-               .domains = CHV_PIPE_C_POWER_DOMAINS,
-               .data = PIPE_C,
-               .ops = &chv_pipe_power_well_ops,
-       },
-#endif
        {
                .name = "dpio-common-bc",
                .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS,
@@ -1474,50 +1428,6 @@ static struct i915_power_well chv_power_wells[] = {
                .data = PUNIT_POWER_WELL_DPIO_CMN_D,
                .ops = &chv_dpio_cmn_power_well_ops,
        },
-#if 0
-       {
-               .name = "dpio-tx-b-01",
-               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01,
-       },
-       {
-               .name = "dpio-tx-b-23",
-               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23,
-       },
-       {
-               .name = "dpio-tx-c-01",
-               .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01,
-       },
-       {
-               .name = "dpio-tx-c-23",
-               .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23,
-       },
-       {
-               .name = "dpio-tx-d-01",
-               .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
-                          CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01,
-       },
-       {
-               .name = "dpio-tx-d-23",
-               .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
-                          CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23,
-       },
-#endif
 };
 
 static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv,
@@ -1724,6 +1634,8 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv)
         * value.
         */
        dev_priv->chv_phy_control =
+               PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY0) |
+               PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY1) |
                PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY0, DPIO_CH0) |
                PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY0, DPIO_CH1) |
                PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY1, DPIO_CH0);
index 0a0625761f4265905810e27d4f61526a5914528d..d24ef75596a107871c55d5f99ac7446deaca4449 100644 (file)
@@ -243,6 +243,14 @@ static void intel_sdvo_write_sdvox(struct intel_sdvo *intel_sdvo, u32 val)
        if (intel_sdvo->sdvo_reg == PCH_SDVOB) {
                I915_WRITE(intel_sdvo->sdvo_reg, val);
                POSTING_READ(intel_sdvo->sdvo_reg);
+               /*
+                * HW workaround, need to write this twice for issue
+                * that may result in first write getting masked.
+                */
+               if (HAS_PCH_IBX(dev)) {
+                       I915_WRITE(intel_sdvo->sdvo_reg, val);
+                       POSTING_READ(intel_sdvo->sdvo_reg);
+               }
                return;
        }
 
@@ -1429,6 +1437,7 @@ static void intel_disable_sdvo(struct intel_encoder *encoder)
 {
        struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
        struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
+       struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
        u32 temp;
 
        intel_sdvo_set_active_outputs(intel_sdvo, 0);
@@ -1437,35 +1446,34 @@ static void intel_disable_sdvo(struct intel_encoder *encoder)
                                                   DRM_MODE_DPMS_OFF);
 
        temp = I915_READ(intel_sdvo->sdvo_reg);
-       if ((temp & SDVO_ENABLE) != 0) {
-               /* HW workaround for IBX, we need to move the port to
-                * transcoder A before disabling it. */
-               if (HAS_PCH_IBX(encoder->base.dev)) {
-                       struct drm_crtc *crtc = encoder->base.crtc;
-                       int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1;
-
-                       if (temp & SDVO_PIPE_B_SELECT) {
-                               temp &= ~SDVO_PIPE_B_SELECT;
-                               I915_WRITE(intel_sdvo->sdvo_reg, temp);
-                               POSTING_READ(intel_sdvo->sdvo_reg);
-
-                               /* Again we need to write this twice. */
-                               I915_WRITE(intel_sdvo->sdvo_reg, temp);
-                               POSTING_READ(intel_sdvo->sdvo_reg);
-
-                               /* Transcoder selection bits only update
-                                * effectively on vblank. */
-                               if (crtc)
-                                       intel_wait_for_vblank(encoder->base.dev, pipe);
-                               else
-                                       msleep(50);
-                       }
-               }
 
-               intel_sdvo_write_sdvox(intel_sdvo, temp & ~SDVO_ENABLE);
+       temp &= ~SDVO_ENABLE;
+       intel_sdvo_write_sdvox(intel_sdvo, temp);
+
+       /*
+        * HW workaround for IBX, we need to move the port
+        * to transcoder A after disabling it to allow the
+        * matching DP port to be enabled on transcoder A.
+        */
+       if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) {
+               temp &= ~SDVO_PIPE_B_SELECT;
+               temp |= SDVO_ENABLE;
+               intel_sdvo_write_sdvox(intel_sdvo, temp);
+
+               temp &= ~SDVO_ENABLE;
+               intel_sdvo_write_sdvox(intel_sdvo, temp);
        }
 }
 
+static void pch_disable_sdvo(struct intel_encoder *encoder)
+{
+}
+
+static void pch_post_disable_sdvo(struct intel_encoder *encoder)
+{
+       intel_disable_sdvo(encoder);
+}
+
 static void intel_enable_sdvo(struct intel_encoder *encoder)
 {
        struct drm_device *dev = encoder->base.dev;
@@ -1478,14 +1486,9 @@ static void intel_enable_sdvo(struct intel_encoder *encoder)
        bool success;
 
        temp = I915_READ(intel_sdvo->sdvo_reg);
-       if ((temp & SDVO_ENABLE) == 0) {
-               /* HW workaround for IBX, we need to move the port
-                * to transcoder A before disabling it, so restore it here. */
-               if (HAS_PCH_IBX(dev))
-                       temp |= SDVO_PIPE_SEL(intel_crtc->pipe);
+       temp |= SDVO_ENABLE;
+       intel_sdvo_write_sdvox(intel_sdvo, temp);
 
-               intel_sdvo_write_sdvox(intel_sdvo, temp | SDVO_ENABLE);
-       }
        for (i = 0; i < 2; i++)
                intel_wait_for_vblank(dev, intel_crtc->pipe);
 
@@ -2988,7 +2991,12 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob)
        }
 
        intel_encoder->compute_config = intel_sdvo_compute_config;
-       intel_encoder->disable = intel_disable_sdvo;
+       if (HAS_PCH_SPLIT(dev)) {
+               intel_encoder->disable = pch_disable_sdvo;
+               intel_encoder->post_disable = pch_post_disable_sdvo;
+       } else {
+               intel_encoder->disable = intel_disable_sdvo;
+       }
        intel_encoder->pre_enable = intel_sdvo_pre_enable;
        intel_encoder->enable = intel_enable_sdvo;
        intel_encoder->get_hw_state = intel_sdvo_get_hw_state;
index 693ce82819709f2dc1b0130c81834c39579cb45c..8831fc579adeb5c3ddc3df548bd78cb7df026d5f 100644 (file)
@@ -49,7 +49,7 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
                (port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
                (bar << IOSF_BAR_SHIFT);
 
-       WARN_ON(!mutex_is_locked(&dev_priv->dpio_lock));
+       WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
 
        if (wait_for((I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) == 0, 5)) {
                DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n",
@@ -81,10 +81,10 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
                        SB_CRRDDA_NP, addr, &val);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        return val;
 }
@@ -93,10 +93,10 @@ void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 {
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
                        SB_CRWRDA_NP, addr, &val);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 }
 
 u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg)
@@ -121,10 +121,10 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
 
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,
                        SB_CRRDDA_NP, addr, &val);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        return val;
 }
@@ -213,7 +213,7 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
                   enum intel_sbi_destination destination)
 {
        u32 value = 0;
-       WARN_ON(!mutex_is_locked(&dev_priv->dpio_lock));
+       WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
 
        if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0,
                                100)) {
@@ -243,7 +243,7 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
 {
        u32 tmp;
 
-       WARN_ON(!mutex_is_locked(&dev_priv->dpio_lock));
+       WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
 
        if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0,
                                100)) {
index f215e223aa4a9b11cd9a74d015dca7fd59b5f922..8193a35388d7a0fbed6a947650f4a6bc8895c807 100644 (file)
@@ -229,8 +229,8 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc,
 
        if (intel_rotation_90_or_270(rotation)) {
                /* stride: Surface height in tiles */
-               tile_height = intel_tile_height(dev, fb->bits_per_pixel,
-                                                       fb->modifier[0]);
+               tile_height = intel_tile_height(dev, fb->pixel_format,
+                                               fb->modifier[0]);
                stride = DIV_ROUND_UP(fb->height, tile_height);
                plane_size = (src_w << 16) | src_h;
                x_offset = stride * tile_height - y - (src_h + 1);
@@ -282,7 +282,6 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc, bool force)
 
        I915_WRITE(PLANE_CTL(pipe, plane), 0);
 
-       /* Activate double buffered register update */
        I915_WRITE(PLANE_SURF(pipe, plane), 0);
        POSTING_READ(PLANE_SURF(pipe, plane));
 
@@ -339,7 +338,6 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
        struct drm_device *dev = dplane->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_plane *intel_plane = to_intel_plane(dplane);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        int pipe = intel_plane->pipe;
        int plane = intel_plane->plane;
@@ -453,8 +451,7 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
        I915_WRITE(SPCNTR(pipe, plane), sprctl);
        I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
                   sprsurf_offset);
-
-       intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+       POSTING_READ(SPSURF(pipe, plane));
 }
 
 static void
@@ -463,21 +460,17 @@ vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc, bool force)
        struct drm_device *dev = dplane->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_plane *intel_plane = to_intel_plane(dplane);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        int pipe = intel_plane->pipe;
        int plane = intel_plane->plane;
 
        I915_WRITE(SPCNTR(pipe, plane), 0);
 
-       /* Activate double buffered register update */
        I915_WRITE(SPSURF(pipe, plane), 0);
-
-       intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+       POSTING_READ(SPSURF(pipe, plane));
 
        intel_update_sprite_watermarks(dplane, crtc, 0, 0, 0, false, false);
 }
 
-
 static void
 ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
                 struct drm_framebuffer *fb,
@@ -489,7 +482,6 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
        struct drm_device *dev = plane->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_plane *intel_plane = to_intel_plane(plane);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        enum pipe pipe = intel_plane->pipe;
        u32 sprctl, sprscale = 0;
@@ -599,8 +591,7 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
        I915_WRITE(SPRCTL(pipe), sprctl);
        I915_WRITE(SPRSURF(pipe),
                   i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
-
-       intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+       POSTING_READ(SPRSURF(pipe));
 }
 
 static void
@@ -609,17 +600,15 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc, bool force)
        struct drm_device *dev = plane->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_plane *intel_plane = to_intel_plane(plane);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        int pipe = intel_plane->pipe;
 
        I915_WRITE(SPRCTL(pipe), I915_READ(SPRCTL(pipe)) & ~SPRITE_ENABLE);
        /* Can't leave the scaler enabled... */
        if (intel_plane->can_scale)
                I915_WRITE(SPRSCALE(pipe), 0);
-       /* Activate double buffered register update */
-       I915_WRITE(SPRSURF(pipe), 0);
 
-       intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+       I915_WRITE(SPRSURF(pipe), 0);
+       POSTING_READ(SPRSURF(pipe));
 }
 
 static void
@@ -633,7 +622,6 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
        struct drm_device *dev = plane->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_plane *intel_plane = to_intel_plane(plane);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        int pipe = intel_plane->pipe;
        unsigned long dvssurf_offset, linear_offset;
@@ -730,8 +718,7 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
        I915_WRITE(DVSCNTR(pipe), dvscntr);
        I915_WRITE(DVSSURF(pipe),
                   i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
-
-       intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+       POSTING_READ(DVSSURF(pipe));
 }
 
 static void
@@ -740,17 +727,14 @@ ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc, bool force)
        struct drm_device *dev = plane->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_plane *intel_plane = to_intel_plane(plane);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        int pipe = intel_plane->pipe;
 
        I915_WRITE(DVSCNTR(pipe), 0);
        /* Disable the scaler */
        I915_WRITE(DVSSCALE(pipe), 0);
 
-       /* Flush double buffered register updates */
        I915_WRITE(DVSSURF(pipe), 0);
-
-       intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+       POSTING_READ(DVSSURF(pipe));
 }
 
 static int
@@ -770,6 +754,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
        const struct drm_rect *clip = &state->clip;
        int hscale, vscale;
        int max_scale, min_scale;
+       bool can_scale;
        int pixel_size;
        int ret;
 
@@ -794,18 +779,29 @@ intel_check_sprite_plane(struct drm_plane *plane,
                return -EINVAL;
        }
 
+       /* setup can_scale, min_scale, max_scale */
+       if (INTEL_INFO(dev)->gen >= 9) {
+               /* use scaler when colorkey is not required */
+               if (intel_plane->ckey.flags == I915_SET_COLORKEY_NONE) {
+                       can_scale = 1;
+                       min_scale = 1;
+                       max_scale = skl_max_scale(intel_crtc, crtc_state);
+               } else {
+                       can_scale = 0;
+                       min_scale = DRM_PLANE_HELPER_NO_SCALING;
+                       max_scale = DRM_PLANE_HELPER_NO_SCALING;
+               }
+       } else {
+               can_scale = intel_plane->can_scale;
+               max_scale = intel_plane->max_downscale << 16;
+               min_scale = intel_plane->can_scale ? 1 : (1 << 16);
+       }
+
        /*
         * FIXME the following code does a bunch of fuzzy adjustments to the
         * coordinates and sizes. We probably need some way to decide whether
         * more strict checking should be done instead.
         */
-       max_scale = intel_plane->max_downscale << 16;
-       min_scale = intel_plane->can_scale ? 1 : (1 << 16);
-
-       if (INTEL_INFO(dev)->gen >= 9) {
-               min_scale = 1;
-               max_scale = skl_max_scale(intel_crtc, crtc_state);
-       }
 
        drm_rect_rotate(src, fb->width << 16, fb->height << 16,
                        state->base.rotation);
@@ -876,7 +872,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
                         * Must keep src and dst the
                         * same if we can't scale.
                         */
-                       if (!intel_plane->can_scale)
+                       if (!can_scale)
                                crtc_w &= ~1;
 
                        if (crtc_w == 0)
@@ -888,7 +884,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
        if (state->visible && (src_w != crtc_w || src_h != crtc_h)) {
                unsigned int width_bytes;
 
-               WARN_ON(!intel_plane->can_scale);
+               WARN_ON(!can_scale);
 
                /* FIXME interlacing min height is 6 */
 
@@ -1052,7 +1048,7 @@ int intel_plane_restore(struct drm_plane *plane)
                                       plane->state->src_w, plane->state->src_h);
 }
 
-static uint32_t ilk_plane_formats[] = {
+static const uint32_t ilk_plane_formats[] = {
        DRM_FORMAT_XRGB8888,
        DRM_FORMAT_YUYV,
        DRM_FORMAT_YVYU,
@@ -1060,7 +1056,7 @@ static uint32_t ilk_plane_formats[] = {
        DRM_FORMAT_VYUY,
 };
 
-static uint32_t snb_plane_formats[] = {
+static const uint32_t snb_plane_formats[] = {
        DRM_FORMAT_XBGR8888,
        DRM_FORMAT_XRGB8888,
        DRM_FORMAT_YUYV,
@@ -1069,7 +1065,7 @@ static uint32_t snb_plane_formats[] = {
        DRM_FORMAT_VYUY,
 };
 
-static uint32_t vlv_plane_formats[] = {
+static const uint32_t vlv_plane_formats[] = {
        DRM_FORMAT_RGB565,
        DRM_FORMAT_ABGR8888,
        DRM_FORMAT_ARGB8888,
index 3e3290c203c625d781f7dfacc13977c50c54d34b..ed173d30f1c05c49a4e275c7a2767d6b58a07894 100644 (file)
@@ -253,7 +253,7 @@ void radeon_dp_aux_init(struct radeon_connector *radeon_connector)
 #define DP_VOLTAGE_MAX         DP_TRAIN_VOLTAGE_SWING_LEVEL_3
 #define DP_PRE_EMPHASIS_MAX    DP_TRAIN_PRE_EMPH_LEVEL_3
 
-static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE],
+static void dp_get_adjust_train(const u8 link_status[DP_LINK_STATUS_SIZE],
                                int lane_count,
                                u8 train_set[4])
 {
@@ -311,7 +311,7 @@ static int dp_get_max_dp_pix_clock(int link_rate,
 /***** radeon specific DP functions *****/
 
 int radeon_dp_get_max_link_rate(struct drm_connector *connector,
-                               u8 dpcd[DP_DPCD_SIZE])
+                               const u8 dpcd[DP_DPCD_SIZE])
 {
        int max_link_rate;
 
@@ -328,7 +328,7 @@ int radeon_dp_get_max_link_rate(struct drm_connector *connector,
  * if the max lane# < low rate lane# then use max lane# instead.
  */
 static int radeon_dp_get_dp_lane_number(struct drm_connector *connector,
-                                       u8 dpcd[DP_DPCD_SIZE],
+                                       const u8 dpcd[DP_DPCD_SIZE],
                                        int pix_clock)
 {
        int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector));
@@ -347,7 +347,7 @@ static int radeon_dp_get_dp_lane_number(struct drm_connector *connector,
 }
 
 static int radeon_dp_get_dp_link_clock(struct drm_connector *connector,
-                                      u8 dpcd[DP_DPCD_SIZE],
+                                      const u8 dpcd[DP_DPCD_SIZE],
                                       int pix_clock)
 {
        int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector));
index a0c35bbc85462587be95048bd996aecb8ec481e2..d3a22f21294879bd658e586734752907e3b07041 100644 (file)
@@ -174,6 +174,31 @@ int cik_get_allowed_info_register(struct radeon_device *rdev,
        }
 }
 
+/*
+ * Indirect registers accessor
+ */
+u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
+{
+       unsigned long flags;
+       u32 r;
+
+       spin_lock_irqsave(&rdev->didt_idx_lock, flags);
+       WREG32(CIK_DIDT_IND_INDEX, (reg));
+       r = RREG32(CIK_DIDT_IND_DATA);
+       spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
+       return r;
+}
+
+void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rdev->didt_idx_lock, flags);
+       WREG32(CIK_DIDT_IND_INDEX, (reg));
+       WREG32(CIK_DIDT_IND_DATA, (v));
+       spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
+}
+
 /* get temperature in millidegrees */
 int ci_get_temp(struct radeon_device *rdev)
 {
index 0ec5d53eb6fb2bbd69c9c21ae5a65028d3c818af..4e883fdc59d8a5edf35cf4bf9a254173a7a92391 100644 (file)
 
 #define KFD_CIK_SDMA_QUEUE_OFFSET              0x200
 
+#define SQ_IND_INDEX                                   0x8DE0
+#define SQ_CMD                                         0x8DEC
+#define SQ_IND_DATA                                    0x8DE4
+
+/*
+ * The TCP_WATCHx_xxxx addresses that are shown here are in dwords,
+ * and that's why they are multiplied by 4
+ */
+#define TCP_WATCH0_ADDR_H                              (0x32A0*4)
+#define TCP_WATCH1_ADDR_H                              (0x32A3*4)
+#define TCP_WATCH2_ADDR_H                              (0x32A6*4)
+#define TCP_WATCH3_ADDR_H                              (0x32A9*4)
+#define TCP_WATCH0_ADDR_L                              (0x32A1*4)
+#define TCP_WATCH1_ADDR_L                              (0x32A4*4)
+#define TCP_WATCH2_ADDR_L                              (0x32A7*4)
+#define TCP_WATCH3_ADDR_L                              (0x32AA*4)
+#define TCP_WATCH0_CNTL                                        (0x32A2*4)
+#define TCP_WATCH1_CNTL                                        (0x32A5*4)
+#define TCP_WATCH2_CNTL                                        (0x32A8*4)
+#define TCP_WATCH3_CNTL                                        (0x32AB*4)
+
 #define CPC_INT_CNTL                                   0xC2D0
 
 #define CP_HQD_IQ_RPTR                                 0xC970u
-#define AQL_ENABLE                                     (1U << 0)
 #define SDMA0_RLC0_RB_CNTL                             0xD400u
 #define        SDMA_RB_VMID(x)                                 (x << 24)
 #define        SDMA0_RLC0_RB_BASE                              0xD404u
 #define        SDMA0_CNTL                                      0xD010
 #define        SDMA1_CNTL                                      0xD810
 
+enum {
+       MAX_TRAPID = 8,         /* 3 bits in the bitfield.  */
+       MAX_WATCH_ADDRESSES = 4
+};
+
+enum {
+       ADDRESS_WATCH_REG_ADDR_HI = 0,
+       ADDRESS_WATCH_REG_ADDR_LO,
+       ADDRESS_WATCH_REG_CNTL,
+       ADDRESS_WATCH_REG_MAX
+};
+
+enum {                         /*  not defined in the CI/KV reg file  */
+       ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
+       ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
+       ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
+       /* extend the mask to 26 bits in order to match the low address field */
+       ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
+       ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
+};
+
+union TCP_WATCH_CNTL_BITS {
+       struct {
+               uint32_t mask:24;
+               uint32_t vmid:4;
+               uint32_t atc:1;
+               uint32_t mode:2;
+               uint32_t valid:1;
+       } bitfields, bits;
+       uint32_t u32All;
+       signed int i32All;
+       float f32All;
+};
+
 #endif
index b33ba3b0808bbd9b1755f14310a95cff150bffe1..391ff9d5d70668326cea4ff4a078eeec673198bd 100644 (file)
 #define VCE_CMD_IB_AUTO                0x00000005
 #define VCE_CMD_SEMAPHORE      0x00000006
 
-#define ATC_VMID0_PASID_MAPPING                                        0x339Cu
-#define        ATC_VMID_PASID_MAPPING_UPDATE_STATUS    0x3398u
-#define        ATC_VMID_PASID_MAPPING_VALID                            (1U << 31)
+#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS           0x3398u
+#define ATC_VMID0_PASID_MAPPING                                0x339Cu
+#define ATC_VMID_PASID_MAPPING_PASID_MASK              (0xFFFF)
+#define ATC_VMID_PASID_MAPPING_PASID_SHIFT             0
+#define ATC_VMID_PASID_MAPPING_VALID_MASK              (0x1 << 31)
+#define ATC_VMID_PASID_MAPPING_VALID_SHIFT             31
 
 #define ATC_VM_APERTURE0_CNTL                                  0x3310u
 #define        ATS_ACCESS_MODE_NEVER                                           0
index 05e6d6ef596385ecab450bbd1831e942186021d2..5397bed26b86d4346b48fe994a2c1aa6ce834409 100644 (file)
 #include "evergreen_blit_shaders.h"
 #include "radeon_ucode.h"
 
+/*
+ * Indirect registers accessor
+ */
+u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
+{
+       unsigned long flags;
+       u32 r;
+
+       spin_lock_irqsave(&rdev->cg_idx_lock, flags);
+       WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
+       r = RREG32(EVERGREEN_CG_IND_DATA);
+       spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
+       return r;
+}
+
+void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rdev->cg_idx_lock, flags);
+       WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
+       WREG32(EVERGREEN_CG_IND_DATA, (v));
+       spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
+}
+
+u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
+{
+       unsigned long flags;
+       u32 r;
+
+       spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+       WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
+       r = RREG32(EVERGREEN_PIF_PHY0_DATA);
+       spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+       return r;
+}
+
+void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+       WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
+       WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
+       spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+}
+
+u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
+{
+       unsigned long flags;
+       u32 r;
+
+       spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+       WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
+       r = RREG32(EVERGREEN_PIF_PHY1_DATA);
+       spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+       return r;
+}
+
+void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+       WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
+       WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
+       spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+}
+
 static const u32 crtc_offsets[6] =
 {
        EVERGREEN_CRTC0_REGISTER_OFFSET,
index aba2f428c0a895380a4a3251e51c6484fba4b3be..75977d7e177eb3675b4a94d75b07cee257f07f74 100644 (file)
 #include "radeon_ucode.h"
 #include "clearstate_cayman.h"
 
+/*
+ * Indirect registers accessor
+ */
+u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
+{
+       unsigned long flags;
+       u32 r;
+
+       spin_lock_irqsave(&rdev->smc_idx_lock, flags);
+       WREG32(TN_SMC_IND_INDEX_0, (reg));
+       r = RREG32(TN_SMC_IND_DATA_0);
+       spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
+       return r;
+}
+
+void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rdev->smc_idx_lock, flags);
+       WREG32(TN_SMC_IND_INDEX_0, (reg));
+       WREG32(TN_SMC_IND_DATA_0, (v));
+       spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
+}
+
 static const u32 tn_rlc_save_restore_register_list[] =
 {
        0x98fc,
@@ -2041,6 +2066,25 @@ static int cayman_startup(struct radeon_device *rdev)
        if (r)
                rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
 
+       if (rdev->family == CHIP_ARUBA) {
+               r = radeon_vce_resume(rdev);
+               if (!r)
+                       r = vce_v1_0_resume(rdev);
+
+               if (!r)
+                       r = radeon_fence_driver_start_ring(rdev,
+                                                          TN_RING_TYPE_VCE1_INDEX);
+               if (!r)
+                       r = radeon_fence_driver_start_ring(rdev,
+                                                          TN_RING_TYPE_VCE2_INDEX);
+
+               if (r) {
+                       dev_err(rdev->dev, "VCE init error (%d).\n", r);
+                       rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
+                       rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
+               }
+       }
+
        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
        if (r) {
                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
@@ -2118,6 +2162,19 @@ static int cayman_startup(struct radeon_device *rdev)
                        DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
        }
 
+       ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+       if (ring->ring_size)
+               r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
+
+       ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+       if (ring->ring_size)
+               r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
+
+       if (!r)
+               r = vce_v1_0_init(rdev);
+       else if (r != -ENOENT)
+               DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
+
        r = radeon_ib_pool_init(rdev);
        if (r) {
                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2273,6 +2330,19 @@ int cayman_init(struct radeon_device *rdev)
                r600_ring_init(rdev, ring, 4096);
        }
 
+       if (rdev->family == CHIP_ARUBA) {
+               r = radeon_vce_init(rdev);
+               if (!r) {
+                       ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+                       ring->ring_obj = NULL;
+                       r600_ring_init(rdev, ring, 4096);
+
+                       ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+                       ring->ring_obj = NULL;
+                       r600_ring_init(rdev, ring, 4096);
+               }
+       }
+
        rdev->ih.ring_obj = NULL;
        r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -2326,6 +2396,7 @@ void cayman_fini(struct radeon_device *rdev)
        radeon_irq_kms_fini(rdev);
        uvd_v1_0_fini(rdev);
        radeon_uvd_fini(rdev);
+       radeon_vce_fini(rdev);
        cayman_pcie_gart_fini(rdev);
        r600_vram_scratch_fini(rdev);
        radeon_gem_fini(rdev);
@@ -2554,3 +2625,34 @@ void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
        radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
        radeon_ring_write(ring, 0x0);
 }
+
+int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
+{
+       struct atom_clock_dividers dividers;
+       int r, i;
+
+        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+                                          ecclk, false, &dividers);
+       if (r)
+               return r;
+
+       for (i = 0; i < 100; i++) {
+               if (RREG32(CG_ECLK_STATUS) & ECLK_STATUS)
+                       break;
+               mdelay(10);
+       }
+       if (i == 100)
+               return -ETIMEDOUT;
+
+       WREG32_P(CG_ECLK_CNTL, dividers.post_div, ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK));
+
+       for (i = 0; i < 100; i++) {
+               if (RREG32(CG_ECLK_STATUS) & ECLK_STATUS)
+                       break;
+               mdelay(10);
+       }
+       if (i == 100)
+               return -ETIMEDOUT;
+
+       return 0;
+}
index 3b290838918cfc3d04910616c25fd20df810fa89..47eb49b77d326ac88d2cfb3ec1de0c4632412257 100644 (file)
 
 #define DMIF_ADDR_CONFIG                               0xBD4
 
+/* fusion vce clocks */
+#define CG_ECLK_CNTL                                    0x620
+#       define ECLK_DIVIDER_MASK                        0x7f
+#       define ECLK_DIR_CNTL_EN                         (1 << 8)
+#define CG_ECLK_STATUS                                  0x624
+#       define ECLK_STATUS                              (1 << 0)
+
 /* DCE6 only */
 #define DMIF_ADDR_CALC                                 0xC00
 
index 04f2514f756453bfdc7275a52c65549e631cfc6c..238b13f045c180480ef4f49983bbc19b76016729 100644 (file)
@@ -4090,6 +4090,28 @@ int r100_init(struct radeon_device *rdev)
        return 0;
 }
 
+uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
+{
+       unsigned long flags;
+       uint32_t ret;
+
+       spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
+       writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+       ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+       spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+       return ret;
+}
+
+void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
+       writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+       writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+       spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+}
+
 u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
 {
        if (reg < rdev->rio_mem_size)
index 08d68f3e13e9887ff7b06f18e899c34fd85ae31a..718b12b03b570de5687af46d97ef123dc130c961 100644 (file)
  *   tell. (Jerome Glisse)
  */
 
+/*
+ * Indirect registers accessor
+ */
+uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+       unsigned long flags;
+       uint32_t r;
+
+       spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
+       WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
+       r = RREG32(RADEON_PCIE_DATA);
+       spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
+       return r;
+}
+
+void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
+       WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
+       WREG32(RADEON_PCIE_DATA, (v));
+       spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
+}
+
 /*
  * rv370,rv380 PCIE GART
  */
index 25b4ac967742c034372caa1dbf67d6476fd6bb92..d0ff93256bb04017945d75ac0bc0d1094de0ab06 100644 (file)
@@ -108,6 +108,53 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev);
 extern int evergreen_rlc_resume(struct radeon_device *rdev);
 extern void rv770_set_clk_bypass_mode(struct radeon_device *rdev);
 
+/*
+ * Indirect registers accessor
+ */
+u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
+{
+       unsigned long flags;
+       u32 r;
+
+       spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
+       WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
+       r = RREG32(R600_RCU_DATA);
+       spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
+       return r;
+}
+
+void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
+       WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
+       WREG32(R600_RCU_DATA, (v));
+       spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
+}
+
+u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
+{
+       unsigned long flags;
+       u32 r;
+
+       spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
+       WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
+       r = RREG32(R600_UVD_CTX_DATA);
+       spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
+       return r;
+}
+
+void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
+       WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
+       WREG32(R600_UVD_CTX_DATA, (v));
+       spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
+}
+
 /**
  * r600_get_allowed_info_register - fetch the register for the info ioctl
  *
index 46eb0fa75a614307286446a99d7c1c2037973ab4..4d2d0579fd4934524bf0764b384e04bb7eb8f317 100644 (file)
@@ -467,7 +467,6 @@ struct radeon_bo_va {
        /* protected by bo being reserved */
        struct list_head                bo_list;
        uint32_t                        flags;
-       uint64_t                        addr;
        struct radeon_fence             *last_pt_update;
        unsigned                        ref_count;
 
@@ -719,7 +718,7 @@ struct radeon_doorbell {
        resource_size_t         size;
        u32 __iomem             *ptr;
        u32                     num_doorbells;  /* Number of doorbells actually reserved for radeon. */
-       unsigned long           used[DIV_ROUND_UP(RADEON_MAX_DOORBELLS, BITS_PER_LONG)];
+       DECLARE_BITMAP(used, RADEON_MAX_DOORBELLS);
 };
 
 int radeon_doorbell_get(struct radeon_device *rdev, u32 *page);
@@ -941,6 +940,9 @@ struct radeon_vm {
        /* BOs freed, but not yet updated in the PT */
        struct list_head        freed;
 
+       /* BOs cleared in the PT */
+       struct list_head        cleared;
+
        /* contains the page directory */
        struct radeon_bo        *page_directory;
        unsigned                max_pde_used;
@@ -1709,8 +1711,6 @@ int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
  * VCE
  */
 #define RADEON_MAX_VCE_HANDLES 16
-#define RADEON_VCE_STACK_SIZE  (1024*1024)
-#define RADEON_VCE_HEAP_SIZE   (4*1024*1024)
 
 struct radeon_vce {
        struct radeon_bo        *vcpu_bo;
@@ -1721,6 +1721,7 @@ struct radeon_vce {
        struct drm_file         *filp[RADEON_MAX_VCE_HANDLES];
        unsigned                img_size[RADEON_MAX_VCE_HANDLES];
        struct delayed_work     idle_work;
+       uint32_t                keyselect;
 };
 
 int radeon_vce_init(struct radeon_device *rdev);
@@ -2435,6 +2436,7 @@ struct radeon_device {
        atomic64_t                      vram_usage;
        atomic64_t                      gtt_usage;
        atomic64_t                      num_bytes_moved;
+       atomic_t                        gpu_reset_counter;
        /* ACPI interface */
        struct radeon_atif              atif;
        struct radeon_atcs              atcs;
@@ -2472,38 +2474,24 @@ int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
 #define RADEON_MIN_MMIO_SIZE 0x10000
 
+uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
                                    bool always_indirect)
 {
        /* The mmio size is 64kb at minimum. Allows the if to be optimized out. */
        if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
                return readl(((void __iomem *)rdev->rmmio) + reg);
-       else {
-               unsigned long flags;
-               uint32_t ret;
-
-               spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
-               writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
-               ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
-               spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
-
-               return ret;
-       }
+       else
+               return r100_mm_rreg_slow(rdev, reg);
 }
-
 static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
                                bool always_indirect)
 {
        if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
                writel(v, ((void __iomem *)rdev->rmmio) + reg);
-       else {
-               unsigned long flags;
-
-               spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
-               writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
-               writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
-               spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
-       }
+       else
+               r100_mm_wreg_slow(rdev, reg, v);
 }
 
 u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
@@ -2579,6 +2567,13 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f)
                tmp_ |= ((val) & ~(mask));                      \
                WREG32_PLL(reg, tmp_);                          \
        } while (0)
+#define WREG32_SMC_P(reg, val, mask)                           \
+       do {                                                    \
+               uint32_t tmp_ = RREG32_SMC(reg);                \
+               tmp_ &= (mask);                                 \
+               tmp_ |= ((val) & ~(mask));                      \
+               WREG32_SMC(reg, tmp_);                          \
+       } while (0)
 #define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false))
 #define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
 #define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
@@ -2587,184 +2582,29 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f)
 #define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v))
 
 /*
- * Indirect registers accessor
+ * Indirect registers accessors.
+ * They used to be inlined, but this increases code size by ~65 kbytes.
+ * Since each performs a pair of MMIO ops
+ * within a spin_lock_irqsave/spin_unlock_irqrestore region,
+ * the cost of call+ret is almost negligible. MMIO and locking
+ * costs several dozens of cycles each at best, call+ret is ~5 cycles.
  */
-static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
-{
-       unsigned long flags;
-       uint32_t r;
-
-       spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
-       WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
-       r = RREG32(RADEON_PCIE_DATA);
-       spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
-       return r;
-}
-
-static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
-       WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
-       WREG32(RADEON_PCIE_DATA, (v));
-       spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
-}
-
-static inline u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
-{
-       unsigned long flags;
-       u32 r;
-
-       spin_lock_irqsave(&rdev->smc_idx_lock, flags);
-       WREG32(TN_SMC_IND_INDEX_0, (reg));
-       r = RREG32(TN_SMC_IND_DATA_0);
-       spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
-       return r;
-}
-
-static inline void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&rdev->smc_idx_lock, flags);
-       WREG32(TN_SMC_IND_INDEX_0, (reg));
-       WREG32(TN_SMC_IND_DATA_0, (v));
-       spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
-}
-
-static inline u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
-{
-       unsigned long flags;
-       u32 r;
-
-       spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
-       WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
-       r = RREG32(R600_RCU_DATA);
-       spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
-       return r;
-}
-
-static inline void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
-       WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
-       WREG32(R600_RCU_DATA, (v));
-       spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
-}
-
-static inline u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
-{
-       unsigned long flags;
-       u32 r;
-
-       spin_lock_irqsave(&rdev->cg_idx_lock, flags);
-       WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
-       r = RREG32(EVERGREEN_CG_IND_DATA);
-       spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
-       return r;
-}
-
-static inline void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&rdev->cg_idx_lock, flags);
-       WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
-       WREG32(EVERGREEN_CG_IND_DATA, (v));
-       spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
-}
-
-static inline u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
-{
-       unsigned long flags;
-       u32 r;
-
-       spin_lock_irqsave(&rdev->pif_idx_lock, flags);
-       WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
-       r = RREG32(EVERGREEN_PIF_PHY0_DATA);
-       spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-       return r;
-}
-
-static inline void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&rdev->pif_idx_lock, flags);
-       WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
-       WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
-       spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-}
-
-static inline u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
-{
-       unsigned long flags;
-       u32 r;
-
-       spin_lock_irqsave(&rdev->pif_idx_lock, flags);
-       WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
-       r = RREG32(EVERGREEN_PIF_PHY1_DATA);
-       spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-       return r;
-}
-
-static inline void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&rdev->pif_idx_lock, flags);
-       WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
-       WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
-       spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-}
-
-static inline u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
-{
-       unsigned long flags;
-       u32 r;
-
-       spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
-       WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
-       r = RREG32(R600_UVD_CTX_DATA);
-       spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
-       return r;
-}
-
-static inline void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
-       WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
-       WREG32(R600_UVD_CTX_DATA, (v));
-       spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
-}
-
-
-static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
-{
-       unsigned long flags;
-       u32 r;
-
-       spin_lock_irqsave(&rdev->didt_idx_lock, flags);
-       WREG32(CIK_DIDT_IND_INDEX, (reg));
-       r = RREG32(CIK_DIDT_IND_DATA);
-       spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
-       return r;
-}
-
-static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&rdev->didt_idx_lock, flags);
-       WREG32(CIK_DIDT_IND_INDEX, (reg));
-       WREG32(CIK_DIDT_IND_DATA, (v));
-       spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
-}
+uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
+void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg);
+void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg);
+void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg);
+void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg);
+void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg);
+void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg);
+void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg);
+void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v);
 
 void r100_pll_errata_after_index(struct radeon_device *rdev);
 
index 8dbf5083c4ff795498e619d798890bcaa4dca1e1..f2421bc3e901904d8fbe92deffb9e98d033de62b 100644 (file)
@@ -1761,6 +1761,19 @@ static struct radeon_asic cayman_asic = {
        },
 };
 
+static struct radeon_asic_ring trinity_vce_ring = {
+       .ib_execute = &radeon_vce_ib_execute,
+       .emit_fence = &radeon_vce_fence_emit,
+       .emit_semaphore = &radeon_vce_semaphore_emit,
+       .cs_parse = &radeon_vce_cs_parse,
+       .ring_test = &radeon_vce_ring_test,
+       .ib_test = &radeon_vce_ib_test,
+       .is_lockup = &radeon_ring_test_lockup,
+       .get_rptr = &vce_v1_0_get_rptr,
+       .get_wptr = &vce_v1_0_get_wptr,
+       .set_wptr = &vce_v1_0_set_wptr,
+};
+
 static struct radeon_asic trinity_asic = {
        .init = &cayman_init,
        .fini = &cayman_fini,
@@ -1794,6 +1807,8 @@ static struct radeon_asic trinity_asic = {
                [R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring,
                [CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring,
                [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
+               [TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
+               [TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
        },
        .irq = {
                .set = &evergreen_irq_set,
@@ -1838,6 +1853,7 @@ static struct radeon_asic trinity_asic = {
                .set_pcie_lanes = NULL,
                .set_clock_gating = NULL,
                .set_uvd_clocks = &sumo_set_uvd_clocks,
+               .set_vce_clocks = &tn_set_vce_clocks,
                .get_temperature = &tn_get_temp,
        },
        .dpm = {
@@ -1929,6 +1945,8 @@ static struct radeon_asic si_asic = {
                [R600_RING_TYPE_DMA_INDEX] = &si_dma_ring,
                [CAYMAN_RING_TYPE_DMA1_INDEX] = &si_dma_ring,
                [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
+               [TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
+               [TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
        },
        .irq = {
                .set = &si_irq_set,
@@ -1973,6 +1991,7 @@ static struct radeon_asic si_asic = {
                .set_pcie_lanes = &r600_set_pcie_lanes,
                .set_clock_gating = NULL,
                .set_uvd_clocks = &si_set_uvd_clocks,
+               .set_vce_clocks = &si_set_vce_clocks,
                .get_temperature = &si_get_temp,
        },
        .dpm = {
@@ -2436,6 +2455,8 @@ int radeon_asic_init(struct radeon_device *rdev)
                /* set num crtcs */
                rdev->num_crtc = 4;
                rdev->has_uvd = true;
+               rdev->cg_flags =
+                       RADEON_CG_SUPPORT_VCE_MGCG;
                break;
        case CHIP_TAHITI:
        case CHIP_PITCAIRN:
index a3ca8cd305c5c21541bae20820dbe3366f416706..e0aa33262eac62126e73984292e2658b310305ba 100644 (file)
@@ -694,6 +694,7 @@ int trinity_dpm_force_performance_level(struct radeon_device *rdev,
 void trinity_dpm_enable_bapm(struct radeon_device *rdev, bool enable);
 u32 trinity_dpm_get_current_sclk(struct radeon_device *rdev);
 u32 trinity_dpm_get_current_mclk(struct radeon_device *rdev);
+int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk);
 
 /* DCE6 - SI */
 void dce6_bandwidth_update(struct radeon_device *rdev);
@@ -745,6 +746,7 @@ void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
 u32 si_get_xclk(struct radeon_device *rdev);
 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev);
 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
+int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk);
 int si_get_temp(struct radeon_device *rdev);
 int si_get_allowed_info_register(struct radeon_device *rdev,
                                 u32 reg, u32 *val);
@@ -970,10 +972,14 @@ uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev,
                           struct radeon_ring *ring);
 void vce_v1_0_set_wptr(struct radeon_device *rdev,
                       struct radeon_ring *ring);
+int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data);
+unsigned vce_v1_0_bo_size(struct radeon_device *rdev);
+int vce_v1_0_resume(struct radeon_device *rdev);
 int vce_v1_0_init(struct radeon_device *rdev);
 int vce_v1_0_start(struct radeon_device *rdev);
 
 /* vce v2.0 */
+unsigned vce_v2_0_bo_size(struct radeon_device *rdev);
 int vce_v2_0_resume(struct radeon_device *rdev);
 
 #endif
index dcb779647c570cea3fd0a29fbae41be53dcbb7df..abbc154b1bff1aa7c84537f238c424327f5ac518 100644 (file)
@@ -242,6 +242,13 @@ static struct radeon_audio_funcs dce6_dp_funcs = {
        .dpms = evergreen_dp_enable,
 };
 
+static void radeon_audio_enable(struct radeon_device *rdev,
+                               struct r600_audio_pin *pin, u8 enable_mask)
+{
+       if (rdev->audio.funcs->enable)
+               rdev->audio.funcs->enable(rdev, pin, enable_mask);
+}
+
 static void radeon_audio_interface_init(struct radeon_device *rdev)
 {
        if (ASIC_IS_DCE6(rdev)) {
@@ -307,7 +314,7 @@ int radeon_audio_init(struct radeon_device *rdev)
 
        /* disable audio.  it will be set up later */
        for (i = 0; i < rdev->audio.num_pins; i++)
-               radeon_audio_enable(rdev, &rdev->audio.pin[i], false);
+               radeon_audio_enable(rdev, &rdev->audio.pin[i], 0);
 
        return 0;
 }
@@ -443,13 +450,6 @@ static void radeon_audio_select_pin(struct drm_encoder *encoder)
                radeon_encoder->audio->select_pin(encoder);
 }
 
-void radeon_audio_enable(struct radeon_device *rdev,
-       struct r600_audio_pin *pin, u8 enable_mask)
-{
-       if (rdev->audio.funcs->enable)
-               rdev->audio.funcs->enable(rdev, pin, enable_mask);
-}
-
 void radeon_audio_detect(struct drm_connector *connector,
                         enum drm_connector_status status)
 {
@@ -505,7 +505,7 @@ void radeon_audio_fini(struct radeon_device *rdev)
                return;
 
        for (i = 0; i < rdev->audio.num_pins; i++)
-               radeon_audio_enable(rdev, &rdev->audio.pin[i], false);
+               radeon_audio_enable(rdev, &rdev->audio.pin[i], 0);
 
        rdev->audio.enabled = false;
 }
index c92d059ab204dd395f248e44fc3be33a84ec7005..8438304f7139549c9ab785ec3f1decd50ac3254a 100644 (file)
@@ -74,8 +74,6 @@ u32 radeon_audio_endpoint_rreg(struct radeon_device *rdev,
 void radeon_audio_endpoint_wreg(struct radeon_device *rdev,
        u32 offset,     u32 reg, u32 v);
 struct r600_audio_pin *radeon_audio_get_pin(struct drm_encoder *encoder);
-void radeon_audio_enable(struct radeon_device *rdev,
-       struct r600_audio_pin *pin, u8 enable_mask);
 void radeon_audio_fini(struct radeon_device *rdev);
 void radeon_audio_mode_set(struct drm_encoder *encoder,
        struct drm_display_mode *mode);
index b7ca4c51462120fab3ab146dd74f653e8bcb91cb..13e207e0dff04168c63b2c0e8acd3895e51938ce 100644 (file)
@@ -1725,6 +1725,8 @@ int radeon_gpu_reset(struct radeon_device *rdev)
                return 0;
        }
 
+       atomic_inc(&rdev->gpu_reset_counter);
+
        radeon_save_bios_scratch_regs(rdev);
        /* block TTM */
        resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
index 7d620d4b3f314d77407fbdbbf72c01435f1e4196..5751446677d382428846b14fc6d37d908bb582b9 100644 (file)
  *            CS to GPU on >= r600
  *   2.41.0 - evergreen/cayman: Add SET_BASE/DRAW_INDIRECT command parsing support
  *   2.42.0 - Add VCE/VUI (Video Usability Information) support
+ *   2.43.0 - RADEON_INFO_GPU_RESET_COUNTER
  */
 #define KMS_DRIVER_MAJOR       2
-#define KMS_DRIVER_MINOR       42
+#define KMS_DRIVER_MINOR       43
 #define KMS_DRIVER_PATCHLEVEL  0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
index 7162c935371c66a3abcf3fcf3734c1d234e3bb37..1162bfa464f3036192854f4d0f3f363eb1ee8cff 100644 (file)
@@ -79,10 +79,12 @@ static void radeon_hotplug_work_func(struct work_struct *work)
        struct drm_mode_config *mode_config = &dev->mode_config;
        struct drm_connector *connector;
 
+       mutex_lock(&mode_config->mutex);
        if (mode_config->num_connector) {
                list_for_each_entry(connector, &mode_config->connector_list, head)
                        radeon_connector_hotplug(connector);
        }
+       mutex_unlock(&mode_config->mutex);
        /* Just fire off a uevent and let userspace tell us what to do */
        drm_helper_hpd_irq_event(dev);
 }
@@ -143,7 +145,13 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
  */
 int radeon_driver_irq_postinstall_kms(struct drm_device *dev)
 {
-       dev->max_vblank_count = 0x001fffff;
+       struct radeon_device *rdev = dev->dev_private;
+
+       if (ASIC_IS_AVIVO(rdev))
+               dev->max_vblank_count = 0x00ffffff;
+       else
+               dev->max_vblank_count = 0x001fffff;
+
        return 0;
 }
 
index 813a416db5386121dc55c006eb7a215f7e1f8c7f..e476c331f3fa6e3c91d4a5203f0aab8829028dfa 100644 (file)
 
 #define CIK_PIPE_PER_MEC       (4)
 
+static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
+       TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
+       TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
+       TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
+       TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
+};
+
 struct kgd_mem {
        struct radeon_bo *bo;
        uint64_t gpu_addr;
@@ -79,6 +86,23 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
                                unsigned int timeout);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+                                       unsigned int watch_point_id,
+                                       uint32_t cntl_val,
+                                       uint32_t addr_hi,
+                                       uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+                                       uint32_t gfx_index_val,
+                                       uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+                                       unsigned int watch_point_id,
+                                       unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+                                                       uint8_t vmid);
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 
 static const struct kfd2kgd_calls kfd2kgd = {
        .init_gtt_mem_allocation = alloc_gtt_mem,
@@ -96,6 +120,13 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
        .hqd_destroy = kgd_hqd_destroy,
        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+       .address_watch_disable = kgd_address_watch_disable,
+       .address_watch_execute = kgd_address_watch_execute,
+       .wave_control_execute = kgd_wave_control_execute,
+       .address_watch_get_offset = kgd_address_watch_get_offset,
+       .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
+       .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
+       .write_vmid_invalidate_request = write_vmid_invalidate_request,
        .get_fw_version = get_fw_version
 };
 
@@ -372,8 +403,8 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
         * the SW cleared it.
         * So the protocol is to always wait & clear.
         */
-       uint32_t pasid_mapping = (pasid == 0) ? 0 :
-                               (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID;
+       uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+                                       ATC_VMID_PASID_MAPPING_VALID_MASK;
 
        write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
                        pasid_mapping);
@@ -665,6 +696,122 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
        return 0;
 }
 
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+       union TCP_WATCH_CNTL_BITS cntl;
+       unsigned int i;
+
+       cntl.u32All = 0;
+
+       cntl.bitfields.valid = 0;
+       cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
+       cntl.bitfields.atc = 1;
+
+       /* Turning off this address until we set all the registers */
+       for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
+               write_register(kgd,
+                               watchRegs[i * ADDRESS_WATCH_REG_MAX +
+                                       ADDRESS_WATCH_REG_CNTL],
+                               cntl.u32All);
+
+       return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+                                       unsigned int watch_point_id,
+                                       uint32_t cntl_val,
+                                       uint32_t addr_hi,
+                                       uint32_t addr_lo)
+{
+       union TCP_WATCH_CNTL_BITS cntl;
+
+       cntl.u32All = cntl_val;
+
+       /* Turning off this watch point until we set all the registers */
+       cntl.bitfields.valid = 0;
+       write_register(kgd,
+                       watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+                               ADDRESS_WATCH_REG_CNTL],
+                       cntl.u32All);
+
+       write_register(kgd,
+                       watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+                               ADDRESS_WATCH_REG_ADDR_HI],
+                       addr_hi);
+
+       write_register(kgd,
+                       watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+                               ADDRESS_WATCH_REG_ADDR_LO],
+                       addr_lo);
+
+       /* Enable the watch point */
+       cntl.bitfields.valid = 1;
+
+       write_register(kgd,
+                       watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+                               ADDRESS_WATCH_REG_CNTL],
+                       cntl.u32All);
+
+       return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+                                       uint32_t gfx_index_val,
+                                       uint32_t sq_cmd)
+{
+       struct radeon_device *rdev = get_radeon_device(kgd);
+       uint32_t data;
+
+       mutex_lock(&rdev->grbm_idx_mutex);
+
+       write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
+       write_register(kgd, SQ_CMD, sq_cmd);
+
+       /*  Restore the GRBM_GFX_INDEX register  */
+
+       data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
+               SE_BROADCAST_WRITES;
+
+       write_register(kgd, GRBM_GFX_INDEX, data);
+
+       mutex_unlock(&rdev->grbm_idx_mutex);
+
+       return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+                                       unsigned int watch_point_id,
+                                       unsigned int reg_offset)
+{
+       return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
+{
+       uint32_t reg;
+       struct radeon_device *rdev = (struct radeon_device *) kgd;
+
+       reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
+       return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+                                                       uint8_t vmid)
+{
+       uint32_t reg;
+       struct radeon_device *rdev = (struct radeon_device *) kgd;
+
+       reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
+       return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+       struct radeon_device *rdev = (struct radeon_device *) kgd;
+
+       return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
+}
+
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 {
        struct radeon_device *rdev = (struct radeon_device *) kgd;
index 7b2a7335cc5d557eafa6864d50cb6ebc9cdfb5ff..9632e886ddc3a85a79bdb300c02db326ebd57d1e 100644 (file)
@@ -576,6 +576,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                if (radeon_get_allowed_info_register(rdev, *value, value))
                        return -EINVAL;
                break;
+       case RADEON_INFO_GPU_RESET_COUNTER:
+               *value = atomic_read(&rdev->gpu_reset_counter);
+               break;
        default:
                DRM_DEBUG_KMS("Invalid request %d\n", info->request);
                return -EINVAL;
index fa91a17b81b69c715d162b8c92dd66a4a6add914..6de5459316b53c60b41449c833b15dab31c0b45a 100644 (file)
@@ -754,7 +754,7 @@ extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector);
 extern int radeon_dp_get_panel_mode(struct drm_encoder *encoder,
                                    struct drm_connector *connector);
 int radeon_dp_get_max_link_rate(struct drm_connector *connector,
-                               u8 *dpcd);
+                               const u8 *dpcd);
 extern void radeon_dp_set_rx_power_state(struct drm_connector *connector,
                                         u8 power_state);
 extern void radeon_dp_aux_init(struct radeon_connector *radeon_connector);
index 0de5711ac508842b2a1d70550c755f0f85cd6d71..574f62bbd215bcf58c125861f7718aaabba7c423 100644 (file)
 #define VCE_IDLE_TIMEOUT_MS    1000
 
 /* Firmware Names */
+#define FIRMWARE_TAHITI        "radeon/TAHITI_vce.bin"
 #define FIRMWARE_BONAIRE       "radeon/BONAIRE_vce.bin"
 
+MODULE_FIRMWARE(FIRMWARE_TAHITI);
 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
 
 static void radeon_vce_idle_work_handler(struct work_struct *work);
@@ -63,6 +65,14 @@ int radeon_vce_init(struct radeon_device *rdev)
        INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler);
 
        switch (rdev->family) {
+       case CHIP_TAHITI:
+       case CHIP_PITCAIRN:
+       case CHIP_VERDE:
+       case CHIP_OLAND:
+       case CHIP_ARUBA:
+               fw_name = FIRMWARE_TAHITI;
+               break;
+
        case CHIP_BONAIRE:
        case CHIP_KAVERI:
        case CHIP_KABINI:
@@ -118,13 +128,17 @@ int radeon_vce_init(struct radeon_device *rdev)
        rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8);
 
        /* we can only work with this fw version for now */
-       if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8)))
+       if ((rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) &&
+           (rdev->vce.fw_version != ((50 << 24) | (0 << 16) | (1 << 8))) &&
+           (rdev->vce.fw_version != ((50 << 24) | (1 << 16) | (2 << 8))))
                return -EINVAL;
 
        /* allocate firmware, stack and heap BO */
 
-       size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
-              RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
+       if (rdev->family < CHIP_BONAIRE)
+               size = vce_v1_0_bo_size(rdev);
+       else
+               size = vce_v2_0_bo_size(rdev);
        r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
                             RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
                             &rdev->vce.vcpu_bo);
@@ -225,13 +239,17 @@ int radeon_vce_resume(struct radeon_device *rdev)
                return r;
        }
 
-       memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
+       memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
+       if (rdev->family < CHIP_BONAIRE)
+               r = vce_v1_0_load_fw(rdev, cpu_addr);
+       else
+               memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
 
        radeon_bo_kunmap(rdev->vce.vcpu_bo);
 
        radeon_bo_unreserve(rdev->vce.vcpu_bo);
 
-       return 0;
+       return r;
 }
 
 /**
index de42fc4a22b869296ff44c85c859678c6155ddd7..9739ded91b7a6d5a5b422c018f793bff728c98c6 100644 (file)
@@ -331,7 +331,6 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
        bo_va->it.start = 0;
        bo_va->it.last = 0;
        bo_va->flags = 0;
-       bo_va->addr = 0;
        bo_va->ref_count = 1;
        INIT_LIST_HEAD(&bo_va->bo_list);
        INIT_LIST_HEAD(&bo_va->vm_status);
@@ -491,9 +490,11 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
        }
 
        if (bo_va->it.start || bo_va->it.last) {
-               if (bo_va->addr) {
+               spin_lock(&vm->status_lock);
+               if (list_empty(&bo_va->vm_status)) {
                        /* add a clone of the bo_va to clear the old address */
                        struct radeon_bo_va *tmp;
+                       spin_unlock(&vm->status_lock);
                        tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
                        if (!tmp) {
                                mutex_unlock(&vm->mutex);
@@ -502,14 +503,11 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
                        tmp->it.start = bo_va->it.start;
                        tmp->it.last = bo_va->it.last;
                        tmp->vm = vm;
-                       tmp->addr = bo_va->addr;
                        tmp->bo = radeon_bo_ref(bo_va->bo);
                        spin_lock(&vm->status_lock);
                        list_add(&tmp->vm_status, &vm->freed);
-                       spin_unlock(&vm->status_lock);
-
-                       bo_va->addr = 0;
                }
+               spin_unlock(&vm->status_lock);
 
                interval_tree_remove(&bo_va->it, &vm->va);
                bo_va->it.start = 0;
@@ -520,10 +518,12 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
                bo_va->it.start = soffset;
                bo_va->it.last = eoffset - 1;
                interval_tree_insert(&bo_va->it, &vm->va);
+               spin_lock(&vm->status_lock);
+               list_add(&bo_va->vm_status, &vm->cleared);
+               spin_unlock(&vm->status_lock);
        }
 
        bo_va->flags = flags;
-       bo_va->addr = 0;
 
        soffset >>= radeon_vm_block_size;
        eoffset >>= radeon_vm_block_size;
@@ -921,7 +921,16 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
        }
 
        spin_lock(&vm->status_lock);
-       list_del_init(&bo_va->vm_status);
+       if (mem) {
+               if (list_empty(&bo_va->vm_status)) {
+                       spin_unlock(&vm->status_lock);
+                       return 0;
+               }
+               list_del_init(&bo_va->vm_status);
+       } else {
+               list_del(&bo_va->vm_status);
+               list_add(&bo_va->vm_status, &vm->cleared);
+       }
        spin_unlock(&vm->status_lock);
 
        bo_va->flags &= ~RADEON_VM_PAGE_VALID;
@@ -947,10 +956,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
                addr = 0;
        }
 
-       if (addr == bo_va->addr)
-               return 0;
-       bo_va->addr = addr;
-
        trace_radeon_vm_bo_update(bo_va);
 
        nptes = bo_va->it.last - bo_va->it.start + 1;
@@ -1038,7 +1043,7 @@ int radeon_vm_clear_freed(struct radeon_device *rdev,
                          struct radeon_vm *vm)
 {
        struct radeon_bo_va *bo_va;
-       int r;
+       int r = 0;
 
        spin_lock(&vm->status_lock);
        while (!list_empty(&vm->freed)) {
@@ -1049,14 +1054,15 @@ int radeon_vm_clear_freed(struct radeon_device *rdev,
                r = radeon_vm_bo_update(rdev, bo_va, NULL);
                radeon_bo_unref(&bo_va->bo);
                radeon_fence_unref(&bo_va->last_pt_update);
+               spin_lock(&vm->status_lock);
+               list_del(&bo_va->vm_status);
                kfree(bo_va);
                if (r)
-                       return r;
+                       break;
 
-               spin_lock(&vm->status_lock);
        }
        spin_unlock(&vm->status_lock);
-       return 0;
+       return r;
 
 }
 
@@ -1114,14 +1120,14 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
        mutex_lock(&vm->mutex);
        if (bo_va->it.start || bo_va->it.last)
                interval_tree_remove(&bo_va->it, &vm->va);
-       spin_lock(&vm->status_lock);
-       list_del(&bo_va->vm_status);
 
-       if (bo_va->addr) {
+       spin_lock(&vm->status_lock);
+       if (list_empty(&bo_va->vm_status)) {
                bo_va->bo = radeon_bo_ref(bo_va->bo);
                list_add(&bo_va->vm_status, &vm->freed);
        } else {
                radeon_fence_unref(&bo_va->last_pt_update);
+               list_del(&bo_va->vm_status);
                kfree(bo_va);
        }
        spin_unlock(&vm->status_lock);
@@ -1144,12 +1150,10 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
        struct radeon_bo_va *bo_va;
 
        list_for_each_entry(bo_va, &bo->va, bo_list) {
-               if (bo_va->addr) {
-                       spin_lock(&bo_va->vm->status_lock);
-                       list_del(&bo_va->vm_status);
+               spin_lock(&bo_va->vm->status_lock);
+               if (list_empty(&bo_va->vm_status))
                        list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
-                       spin_unlock(&bo_va->vm->status_lock);
-               }
+               spin_unlock(&bo_va->vm->status_lock);
        }
 }
 
@@ -1179,6 +1183,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
        spin_lock_init(&vm->status_lock);
        INIT_LIST_HEAD(&vm->invalidated);
        INIT_LIST_HEAD(&vm->freed);
+       INIT_LIST_HEAD(&vm->cleared);
 
        pd_size = radeon_vm_directory_size(rdev);
        pd_entries = radeon_vm_num_pdes(rdev);
index 5326f753e10760ec04e27701e2eef4674139494f..34c3739c87cf48a67af47d3e38795f7b4565d765 100644 (file)
@@ -6907,6 +6907,22 @@ static int si_startup(struct radeon_device *rdev)
                        rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
        }
 
+       r = radeon_vce_resume(rdev);
+       if (!r) {
+               r = vce_v1_0_resume(rdev);
+               if (!r)
+                       r = radeon_fence_driver_start_ring(rdev,
+                                                          TN_RING_TYPE_VCE1_INDEX);
+               if (!r)
+                       r = radeon_fence_driver_start_ring(rdev,
+                                                          TN_RING_TYPE_VCE2_INDEX);
+       }
+       if (r) {
+               dev_err(rdev->dev, "VCE init error (%d).\n", r);
+               rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
+               rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
+       }
+
        /* Enable IRQ */
        if (!rdev->irq.installed) {
                r = radeon_irq_kms_init(rdev);
@@ -6975,6 +6991,23 @@ static int si_startup(struct radeon_device *rdev)
                }
        }
 
+       r = -ENOENT;
+
+       ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+       if (ring->ring_size)
+               r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+                                    VCE_CMD_NO_OP);
+
+       ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+       if (ring->ring_size)
+               r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+                                    VCE_CMD_NO_OP);
+
+       if (!r)
+               r = vce_v1_0_init(rdev);
+       else if (r != -ENOENT)
+               DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
+
        r = radeon_ib_pool_init(rdev);
        if (r) {
                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -7033,6 +7066,7 @@ int si_suspend(struct radeon_device *rdev)
        if (rdev->has_uvd) {
                uvd_v1_0_fini(rdev);
                radeon_uvd_suspend(rdev);
+               radeon_vce_suspend(rdev);
        }
        si_fini_pg(rdev);
        si_fini_cg(rdev);
@@ -7140,6 +7174,17 @@ int si_init(struct radeon_device *rdev)
                }
        }
 
+       r = radeon_vce_init(rdev);
+       if (!r) {
+               ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+               ring->ring_obj = NULL;
+               r600_ring_init(rdev, ring, 4096);
+
+               ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+               ring->ring_obj = NULL;
+               r600_ring_init(rdev, ring, 4096);
+       }
+
        rdev->ih.ring_obj = NULL;
        r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -7191,6 +7236,7 @@ void si_fini(struct radeon_device *rdev)
        if (rdev->has_uvd) {
                uvd_v1_0_fini(rdev);
                radeon_uvd_fini(rdev);
+               radeon_vce_fini(rdev);
        }
        si_pcie_gart_fini(rdev);
        r600_vram_scratch_fini(rdev);
@@ -7675,3 +7721,124 @@ static void si_program_aspm(struct radeon_device *rdev)
                }
        }
 }
+
+int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
+{
+        unsigned i;
+
+        /* make sure VCEPLL_CTLREQ is deasserted */
+        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+
+        mdelay(10);
+
+        /* assert UPLL_CTLREQ */
+        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+
+        /* wait for CTLACK and CTLACK2 to get asserted */
+        for (i = 0; i < 100; ++i) {
+                uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+                if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
+                        break;
+                mdelay(10);
+        }
+
+        /* deassert UPLL_CTLREQ */
+        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+
+        if (i == 100) {
+                DRM_ERROR("Timeout setting UVD clocks!\n");
+                return -ETIMEDOUT;
+        }
+
+        return 0;
+}
+
+int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
+{
+       unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
+       int r;
+
+       /* bypass evclk and ecclk with bclk */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
+                    EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
+                    ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
+
+       /* put PLL in bypass mode */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
+                    ~VCEPLL_BYPASS_EN_MASK);
+
+       if (!evclk || !ecclk) {
+               /* keep the Bypass mode, put PLL to sleep */
+               WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
+                            ~VCEPLL_SLEEP_MASK);
+               return 0;
+       }
+
+       r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
+                                         16384, 0x03FFFFFF, 0, 128, 5,
+                                         &fb_div, &evclk_div, &ecclk_div);
+       if (r)
+               return r;
+
+       /* set RESET_ANTI_MUX to 0 */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
+
+       /* set VCO_MODE to 1 */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
+                    ~VCEPLL_VCO_MODE_MASK);
+
+       /* toggle VCEPLL_SLEEP to 1 then back to 0 */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
+                    ~VCEPLL_SLEEP_MASK);
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
+
+       /* deassert VCEPLL_RESET */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
+
+       mdelay(1);
+
+       r = si_vce_send_vcepll_ctlreq(rdev);
+       if (r)
+               return r;
+
+       /* assert VCEPLL_RESET again */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
+
+       /* disable spread spectrum. */
+       WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
+
+       /* set feedback divider */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
+
+       /* set ref divider to 0 */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
+
+       /* set PDIV_A and PDIV_B */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
+                    VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
+                    ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
+
+       /* give the PLL some time to settle */
+       mdelay(15);
+
+       /* deassert PLL_RESET */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
+
+       mdelay(15);
+
+       /* switch from bypass mode to normal mode */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
+
+       r = si_vce_send_vcepll_ctlreq(rdev);
+       if (r)
+               return r;
+
+       /* switch VCLK and DCLK selection */
+       WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
+                    EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
+                    ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
+
+       mdelay(100);
+
+       return 0;
+}
index ff8b83f5e929a851943e61e76fc3d45b2967a2a3..1dbdf3230daed2b4c829e3ed5b9800a007691fea 100644 (file)
@@ -1740,6 +1740,7 @@ struct ni_power_info *ni_get_pi(struct radeon_device *rdev);
 struct ni_ps *ni_get_ps(struct radeon_ps *rps);
 
 extern int si_mc_load_microcode(struct radeon_device *rdev);
+extern void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 
 static int si_populate_voltage_value(struct radeon_device *rdev,
                                     const struct atom_voltage_table *table,
@@ -2928,6 +2929,56 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
        { 0, 0, 0, 0 },
 };
 
+static u16 si_get_lower_of_leakage_and_vce_voltage(struct radeon_device *rdev,
+                                                  u16 vce_voltage)
+{
+       u16 highest_leakage = 0;
+       struct si_power_info *si_pi = si_get_pi(rdev);
+       int i;
+
+       for (i = 0; i < si_pi->leakage_voltage.count; i++){
+               if (highest_leakage < si_pi->leakage_voltage.entries[i].voltage)
+                       highest_leakage = si_pi->leakage_voltage.entries[i].voltage;
+       }
+
+       if (si_pi->leakage_voltage.count && (highest_leakage < vce_voltage))
+               return highest_leakage;
+
+       return vce_voltage;
+}
+
+static int si_get_vce_clock_voltage(struct radeon_device *rdev,
+                                   u32 evclk, u32 ecclk, u16 *voltage)
+{
+       u32 i;
+       int ret = -EINVAL;
+       struct radeon_vce_clock_voltage_dependency_table *table =
+               &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table;
+
+       if (((evclk == 0) && (ecclk == 0)) ||
+           (table && (table->count == 0))) {
+               *voltage = 0;
+               return 0;
+       }
+
+       for (i = 0; i < table->count; i++) {
+               if ((evclk <= table->entries[i].evclk) &&
+                   (ecclk <= table->entries[i].ecclk)) {
+                       *voltage = table->entries[i].v;
+                       ret = 0;
+                       break;
+               }
+       }
+
+       /* if no match return the highest voltage */
+       if (ret)
+               *voltage = table->entries[table->count - 1].v;
+
+       *voltage = si_get_lower_of_leakage_and_vce_voltage(rdev, *voltage);
+
+       return ret;
+}
+
 static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                                        struct radeon_ps *rps)
 {
@@ -2936,7 +2987,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
        bool disable_mclk_switching = false;
        bool disable_sclk_switching = false;
        u32 mclk, sclk;
-       u16 vddc, vddci;
+       u16 vddc, vddci, min_vce_voltage = 0;
        u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
        u32 max_sclk = 0, max_mclk = 0;
        int i;
@@ -2955,6 +3006,16 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                ++p;
        }
 
+       if (rps->vce_active) {
+               rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
+               rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk;
+               si_get_vce_clock_voltage(rdev, rps->evclk, rps->ecclk,
+                                        &min_vce_voltage);
+       } else {
+               rps->evclk = 0;
+               rps->ecclk = 0;
+       }
+
        if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
            ni_dpm_vblank_too_short(rdev))
                disable_mclk_switching = true;
@@ -3035,6 +3096,13 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                vddc = ps->performance_levels[0].vddc;
        }
 
+       if (rps->vce_active) {
+               if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk)
+                       sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk;
+               if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk)
+                       mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk;
+       }
+
        /* adjusted low state */
        ps->performance_levels[0].sclk = sclk;
        ps->performance_levels[0].mclk = mclk;
@@ -3084,6 +3152,8 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                                               &ps->performance_levels[i]);
 
        for (i = 0; i < ps->performance_level_count; i++) {
+               if (ps->performance_levels[i].vddc < min_vce_voltage)
+                       ps->performance_levels[i].vddc = min_vce_voltage;
                btc_apply_voltage_dependency_rules(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
                                                   ps->performance_levels[i].sclk,
                                                   max_limits->vddc,  &ps->performance_levels[i].vddc);
@@ -3110,7 +3180,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                if (ps->performance_levels[i].vddc > rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.vddc)
                        ps->dc_compatible = false;
        }
-
 }
 
 #if 0
@@ -5859,6 +5928,21 @@ static void si_set_pcie_lane_width_in_smc(struct radeon_device *rdev,
        }
 }
 
+static void si_set_vce_clock(struct radeon_device *rdev,
+                            struct radeon_ps *new_rps,
+                            struct radeon_ps *old_rps)
+{
+       if ((old_rps->evclk != new_rps->evclk) ||
+           (old_rps->ecclk != new_rps->ecclk)) {
+               /* turn the clocks on when encoding, off otherwise */
+               if (new_rps->evclk || new_rps->ecclk)
+                       vce_v1_0_enable_mgcg(rdev, false);
+               else
+                       vce_v1_0_enable_mgcg(rdev, true);
+               radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk);
+       }
+}
+
 void si_dpm_setup_asic(struct radeon_device *rdev)
 {
        int r;
@@ -6547,6 +6631,7 @@ int si_dpm_set_power_state(struct radeon_device *rdev)
                return ret;
        }
        ni_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps);
+       si_set_vce_clock(rdev, new_ps, old_ps);
        if (eg_pi->pcie_performance_request)
                si_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps);
        ret = si_set_power_state_conditionally_enable_ulv(rdev, new_ps);
@@ -6793,6 +6878,21 @@ static int si_parse_power_table(struct radeon_device *rdev)
                power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
        }
        rdev->pm.dpm.num_ps = state_array->ucNumEntries;
+
+       /* fill in the vce power states */
+       for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) {
+               u32 sclk, mclk;
+               clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx;
+               clock_info = (union pplib_clock_info *)
+                       &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
+               sclk = le16_to_cpu(clock_info->si.usEngineClockLow);
+               sclk |= clock_info->si.ucEngineClockHigh << 16;
+               mclk = le16_to_cpu(clock_info->si.usMemoryClockLow);
+               mclk |= clock_info->si.ucMemoryClockHigh << 16;
+               rdev->pm.dpm.vce_states[i].sclk = sclk;
+               rdev->pm.dpm.vce_states[i].mclk = mclk;
+       }
+
        return 0;
 }
 
@@ -6837,10 +6937,11 @@ int si_dpm_init(struct radeon_device *rdev)
        if (ret)
                return ret;
 
-       ret = si_parse_power_table(rdev);
+       ret = r600_parse_extended_power_table(rdev);
        if (ret)
                return ret;
-       ret = r600_parse_extended_power_table(rdev);
+
+       ret = si_parse_power_table(rdev);
        if (ret)
                return ret;
 
index 3afac301398388315f78d6cb7cb62e84e7314581..4c4a7218a3bd63c11f7f9c8d6b96583bef27368b 100644 (file)
 #define VCE_VCPU_CACHE_SIZE1                           0x20030
 #define VCE_VCPU_CACHE_OFFSET2                         0x20034
 #define VCE_VCPU_CACHE_SIZE2                           0x20038
+#define VCE_VCPU_SCRATCH7                              0x200dc
 #define VCE_SOFT_RESET                                 0x20120
 #define        VCE_ECPU_SOFT_RESET                     (1 << 0)
 #define        VCE_FME_SOFT_RESET                      (1 << 2)
 #define VCE_RB_RPTR                                    0x2018c
 #define VCE_RB_WPTR                                    0x20190
 #define VCE_CLOCK_GATING_A                             0x202f8
+#      define CGC_DYN_CLOCK_MODE                       (1 << 16)
 #define VCE_CLOCK_GATING_B                             0x202fc
 #define VCE_UENC_CLOCK_GATING                          0x205bc
 #define VCE_UENC_REG_CLOCK_GATING                      0x205c0
 #define VCE_CMD_IB_AUTO                                        0x00000005
 #define VCE_CMD_SEMAPHORE                              0x00000006
 
+/* discrete vce clocks */
+#define        CG_VCEPLL_FUNC_CNTL                             0xc0030600
+#      define VCEPLL_RESET_MASK                        0x00000001
+#      define VCEPLL_SLEEP_MASK                        0x00000002
+#      define VCEPLL_BYPASS_EN_MASK                    0x00000004
+#      define VCEPLL_CTLREQ_MASK                       0x00000008
+#      define VCEPLL_VCO_MODE_MASK                     0x00000600
+#      define VCEPLL_REF_DIV_MASK                      0x003F0000
+#      define VCEPLL_CTLACK_MASK                       0x40000000
+#      define VCEPLL_CTLACK2_MASK                      0x80000000
+#define        CG_VCEPLL_FUNC_CNTL_2                           0xc0030601
+#      define VCEPLL_PDIV_A(x)                         ((x) << 0)
+#      define VCEPLL_PDIV_A_MASK                       0x0000007F
+#      define VCEPLL_PDIV_B(x)                         ((x) << 8)
+#      define VCEPLL_PDIV_B_MASK                       0x00007F00
+#      define EVCLK_SRC_SEL(x)                         ((x) << 20)
+#      define EVCLK_SRC_SEL_MASK                       0x01F00000
+#      define ECCLK_SRC_SEL(x)                         ((x) << 25)
+#      define ECCLK_SRC_SEL_MASK                       0x3E000000
+#define        CG_VCEPLL_FUNC_CNTL_3                           0xc0030602
+#      define VCEPLL_FB_DIV(x)                         ((x) << 0)
+#      define VCEPLL_FB_DIV_MASK                       0x01FFFFFF
+#define        CG_VCEPLL_FUNC_CNTL_4                           0xc0030603
+#define        CG_VCEPLL_FUNC_CNTL_5                           0xc0030604
+#define        CG_VCEPLL_SPREAD_SPECTRUM                       0xc0030606
+#      define VCEPLL_SSEN_MASK                         0x00000001
+
 #endif
index a5b02c575d775b7b1a4409142507470e7f3fc415..d34bfcdab9be30b477c190891e71c26e6ab11bb3 100644 (file)
@@ -336,6 +336,7 @@ static const u32 trinity_override_mgpg_sequences[] =
        0x00000204, 0x00000000,
 };
 
+extern void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 static void trinity_program_clk_gating_hw_sequence(struct radeon_device *rdev,
                                                   const u32 *seq, u32 count);
 static void trinity_override_dynamic_mg_powergating(struct radeon_device *rdev);
@@ -985,6 +986,21 @@ static void trinity_set_uvd_clock_after_set_eng_clock(struct radeon_device *rdev
        trinity_setup_uvd_clocks(rdev, new_rps, old_rps);
 }
 
+static void trinity_set_vce_clock(struct radeon_device *rdev,
+                                 struct radeon_ps *new_rps,
+                                 struct radeon_ps *old_rps)
+{
+       if ((old_rps->evclk != new_rps->evclk) ||
+           (old_rps->ecclk != new_rps->ecclk)) {
+               /* turn the clocks on when encoding, off otherwise */
+               if (new_rps->evclk || new_rps->ecclk)
+                       vce_v1_0_enable_mgcg(rdev, false);
+               else
+                       vce_v1_0_enable_mgcg(rdev, true);
+               radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk);
+       }
+}
+
 static void trinity_program_ttt(struct radeon_device *rdev)
 {
        struct trinity_power_info *pi = trinity_get_pi(rdev);
@@ -1246,6 +1262,7 @@ int trinity_dpm_set_power_state(struct radeon_device *rdev)
                trinity_force_level_0(rdev);
                trinity_unforce_levels(rdev);
                trinity_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps);
+               trinity_set_vce_clock(rdev, new_ps, old_ps);
        }
        trinity_release_mutex(rdev);
 
@@ -1483,7 +1500,35 @@ static void trinity_adjust_uvd_state(struct radeon_device *rdev,
        }
 }
 
+static int trinity_get_vce_clock_voltage(struct radeon_device *rdev,
+                                        u32 evclk, u32 ecclk, u16 *voltage)
+{
+       u32 i;
+       int ret = -EINVAL;
+       struct radeon_vce_clock_voltage_dependency_table *table =
+               &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table;
+
+       if (((evclk == 0) && (ecclk == 0)) ||
+           (table && (table->count == 0))) {
+               *voltage = 0;
+               return 0;
+       }
+
+       for (i = 0; i < table->count; i++) {
+               if ((evclk <= table->entries[i].evclk) &&
+                   (ecclk <= table->entries[i].ecclk)) {
+                       *voltage = table->entries[i].v;
+                       ret = 0;
+                       break;
+               }
+       }
+
+       /* if no match return the highest voltage */
+       if (ret)
+               *voltage = table->entries[table->count - 1].v;
 
+       return ret;
+}
 
 static void trinity_apply_state_adjust_rules(struct radeon_device *rdev,
                                             struct radeon_ps *new_rps,
@@ -1496,6 +1541,7 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev,
        u32 min_sclk = pi->sys_info.min_sclk; /* XXX check against disp reqs */
        u32 sclk_in_sr = pi->sys_info.min_sclk; /* ??? */
        u32 i;
+       u16 min_vce_voltage;
        bool force_high;
        u32 num_active_displays = rdev->pm.dpm.new_active_crtc_count;
 
@@ -1504,6 +1550,14 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev,
 
        trinity_adjust_uvd_state(rdev, new_rps);
 
+       if (new_rps->vce_active) {
+               new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
+               new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk;
+       } else {
+               new_rps->evclk = 0;
+               new_rps->ecclk = 0;
+       }
+
        for (i = 0; i < ps->num_levels; i++) {
                if (ps->levels[i].vddc_index < min_voltage)
                        ps->levels[i].vddc_index = min_voltage;
@@ -1512,6 +1566,17 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev,
                        ps->levels[i].sclk =
                                trinity_get_valid_engine_clock(rdev, min_sclk);
 
+               /* patch in vce limits */
+               if (new_rps->vce_active) {
+                       /* sclk */
+                       if (ps->levels[i].sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk)
+                               ps->levels[i].sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk;
+                       /* vddc */
+                       trinity_get_vce_clock_voltage(rdev, new_rps->evclk, new_rps->ecclk, &min_vce_voltage);
+                       if (ps->levels[i].vddc_index < min_vce_voltage)
+                               ps->levels[i].vddc_index = min_vce_voltage;
+               }
+
                ps->levels[i].ds_divider_index =
                        sumo_get_sleep_divider_id_from_clock(rdev, ps->levels[i].sclk, sclk_in_sr);
 
@@ -1733,6 +1798,19 @@ static int trinity_parse_power_table(struct radeon_device *rdev)
                power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
        }
        rdev->pm.dpm.num_ps = state_array->ucNumEntries;
+
+       /* fill in the vce power states */
+       for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) {
+               u32 sclk;
+               clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx;
+               clock_info = (union pplib_clock_info *)
+                       &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
+               sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow);
+               sclk |= clock_info->sumo.ucEngineClockHigh << 16;
+               rdev->pm.dpm.vce_states[i].sclk = sclk;
+               rdev->pm.dpm.vce_states[i].mclk = 0;
+       }
+
        return 0;
 }
 
@@ -1914,6 +1992,10 @@ int trinity_dpm_init(struct radeon_device *rdev)
        if (ret)
                return ret;
 
+       ret = r600_parse_extended_power_table(rdev);
+       if (ret)
+               return ret;
+
        ret = trinity_parse_power_table(rdev);
        if (ret)
                return ret;
@@ -2000,6 +2082,7 @@ void trinity_dpm_fini(struct radeon_device *rdev)
        }
        kfree(rdev->pm.dpm.ps);
        kfree(rdev->pm.dpm.priv);
+       r600_free_extended_power_table(rdev);
 }
 
 u32 trinity_dpm_get_sclk(struct radeon_device *rdev, bool low)
index b44d9c842f7b3a76fe1dc3562ffc79f0c4e18e6c..07a0d378e122b1b206b8aceb1a97234254d00e74 100644 (file)
 #include "radeon_asic.h"
 #include "sid.h"
 
+#define VCE_V1_0_FW_SIZE       (256 * 1024)
+#define VCE_V1_0_STACK_SIZE    (64 * 1024)
+#define VCE_V1_0_DATA_SIZE     (7808 * (RADEON_MAX_VCE_HANDLES + 1))
+
+struct vce_v1_0_fw_signature
+{
+       int32_t off;
+       uint32_t len;
+       int32_t num;
+       struct {
+               uint32_t chip_id;
+               uint32_t keyselect;
+               uint32_t nonce[4];
+               uint32_t sigval[4];
+       } val[8];
+};
+
 /**
  * vce_v1_0_get_rptr - get read pointer
  *
@@ -82,6 +99,186 @@ void vce_v1_0_set_wptr(struct radeon_device *rdev,
                WREG32(VCE_RB_WPTR2, ring->wptr);
 }
 
+void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable)
+{
+       u32 tmp;
+
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) {
+               tmp = RREG32(VCE_CLOCK_GATING_A);
+               tmp |= CGC_DYN_CLOCK_MODE;
+               WREG32(VCE_CLOCK_GATING_A, tmp);
+
+               tmp = RREG32(VCE_UENC_CLOCK_GATING);
+               tmp &= ~0x1ff000;
+               tmp |= 0xff800000;
+               WREG32(VCE_UENC_CLOCK_GATING, tmp);
+
+               tmp = RREG32(VCE_UENC_REG_CLOCK_GATING);
+               tmp &= ~0x3ff;
+               WREG32(VCE_UENC_REG_CLOCK_GATING, tmp);
+       } else {
+               tmp = RREG32(VCE_CLOCK_GATING_A);
+               tmp &= ~CGC_DYN_CLOCK_MODE;
+               WREG32(VCE_CLOCK_GATING_A, tmp);
+
+               tmp = RREG32(VCE_UENC_CLOCK_GATING);
+               tmp |= 0x1ff000;
+               tmp &= ~0xff800000;
+               WREG32(VCE_UENC_CLOCK_GATING, tmp);
+
+               tmp = RREG32(VCE_UENC_REG_CLOCK_GATING);
+               tmp |= 0x3ff;
+               WREG32(VCE_UENC_REG_CLOCK_GATING, tmp);
+       }
+}
+
+static void vce_v1_0_init_cg(struct radeon_device *rdev)
+{
+       u32 tmp;
+
+       tmp = RREG32(VCE_CLOCK_GATING_A);
+       tmp |= CGC_DYN_CLOCK_MODE;
+       WREG32(VCE_CLOCK_GATING_A, tmp);
+
+       tmp = RREG32(VCE_CLOCK_GATING_B);
+       tmp |= 0x1e;
+       tmp &= ~0xe100e1;
+       WREG32(VCE_CLOCK_GATING_B, tmp);
+
+       tmp = RREG32(VCE_UENC_CLOCK_GATING);
+       tmp &= ~0xff9ff000;
+       WREG32(VCE_UENC_CLOCK_GATING, tmp);
+
+       tmp = RREG32(VCE_UENC_REG_CLOCK_GATING);
+       tmp &= ~0x3ff;
+       WREG32(VCE_UENC_REG_CLOCK_GATING, tmp);
+}
+
+int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
+{
+       struct vce_v1_0_fw_signature *sign = (void*)rdev->vce_fw->data;
+       uint32_t chip_id;
+       int i;
+
+       switch (rdev->family) {
+       case CHIP_TAHITI:
+               chip_id = 0x01000014;
+               break;
+       case CHIP_VERDE:
+               chip_id = 0x01000015;
+               break;
+       case CHIP_PITCAIRN:
+       case CHIP_OLAND:
+               chip_id = 0x01000016;
+               break;
+       case CHIP_ARUBA:
+               chip_id = 0x01000017;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       for (i = 0; i < sign->num; ++i) {
+               if (sign->val[i].chip_id == chip_id)
+                       break;
+       }
+
+       if (i == sign->num)
+               return -EINVAL;
+
+       data += (256 - 64) / 4;
+       data[0] = sign->val[i].nonce[0];
+       data[1] = sign->val[i].nonce[1];
+       data[2] = sign->val[i].nonce[2];
+       data[3] = sign->val[i].nonce[3];
+       data[4] = sign->len + 64;
+
+       memset(&data[5], 0, 44);
+       memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
+
+       data += data[4] / 4;
+       data[0] = sign->val[i].sigval[0];
+       data[1] = sign->val[i].sigval[1];
+       data[2] = sign->val[i].sigval[2];
+       data[3] = sign->val[i].sigval[3];
+
+       rdev->vce.keyselect = sign->val[i].keyselect;
+
+       return 0;
+}
+
+unsigned vce_v1_0_bo_size(struct radeon_device *rdev)
+{
+       WARN_ON(VCE_V1_0_FW_SIZE < rdev->vce_fw->size);
+       return VCE_V1_0_FW_SIZE + VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE;
+}
+
+int vce_v1_0_resume(struct radeon_device *rdev)
+{
+       uint64_t addr = rdev->vce.gpu_addr;
+       uint32_t size;
+       int i;
+
+       WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16));
+       WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
+       WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
+       WREG32(VCE_CLOCK_GATING_B, 0);
+
+       WREG32_P(VCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
+
+       WREG32(VCE_LMI_CTRL, 0x00398000);
+       WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1);
+       WREG32(VCE_LMI_SWAP_CNTL, 0);
+       WREG32(VCE_LMI_SWAP_CNTL1, 0);
+       WREG32(VCE_LMI_VM_CTRL, 0);
+
+       WREG32(VCE_VCPU_SCRATCH7, RADEON_MAX_VCE_HANDLES);
+
+       addr += 256;
+       size = VCE_V1_0_FW_SIZE;
+       WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
+       WREG32(VCE_VCPU_CACHE_SIZE0, size);
+
+       addr += size;
+       size = VCE_V1_0_STACK_SIZE;
+       WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff);
+       WREG32(VCE_VCPU_CACHE_SIZE1, size);
+
+       addr += size;
+       size = VCE_V1_0_DATA_SIZE;
+       WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff);
+       WREG32(VCE_VCPU_CACHE_SIZE2, size);
+
+       WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100);
+
+       WREG32(VCE_LMI_FW_START_KEYSEL, rdev->vce.keyselect);
+
+       for (i = 0; i < 10; ++i) {
+               mdelay(10);
+               if (RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_DONE)
+                       break;
+       }
+
+       if (i == 10)
+               return -ETIMEDOUT;
+
+       if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_PASS))
+               return -EINVAL;
+
+       for (i = 0; i < 10; ++i) {
+               mdelay(10);
+               if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_BUSY))
+                       break;
+       }
+
+       if (i == 10)
+               return -ETIMEDOUT;
+
+       vce_v1_0_init_cg(rdev);
+
+       return 0;
+}
+
 /**
  * vce_v1_0_start - start VCE block
  *
index fbbe78fbd087ae7c147a43b925f0ac0401c86466..cdeaab7c7b1eaa6f8e4b8e723f6470f55e3a6237 100644 (file)
 #include "radeon_asic.h"
 #include "cikd.h"
 
+#define VCE_V2_0_FW_SIZE       (256 * 1024)
+#define VCE_V2_0_STACK_SIZE    (64 * 1024)
+#define VCE_V2_0_DATA_SIZE     (23552 * RADEON_MAX_VCE_HANDLES)
+
 static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated)
 {
        u32 tmp;
@@ -140,6 +144,12 @@ static void vce_v2_0_init_cg(struct radeon_device *rdev)
        WREG32(VCE_CLOCK_GATING_B, tmp);
 }
 
+unsigned vce_v2_0_bo_size(struct radeon_device *rdev)
+{
+       WARN_ON(rdev->vce_fw->size > VCE_V2_0_FW_SIZE);
+       return VCE_V2_0_FW_SIZE + VCE_V2_0_STACK_SIZE + VCE_V2_0_DATA_SIZE;
+}
+
 int vce_v2_0_resume(struct radeon_device *rdev)
 {
        uint64_t addr = rdev->vce.gpu_addr;
@@ -159,17 +169,17 @@ int vce_v2_0_resume(struct radeon_device *rdev)
        WREG32(VCE_LMI_VCPU_CACHE_40BIT_BAR, addr >> 8);
 
        addr &= 0xff;
-       size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size);
+       size = VCE_V2_0_FW_SIZE;
        WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
        WREG32(VCE_VCPU_CACHE_SIZE0, size);
 
        addr += size;
-       size = RADEON_VCE_STACK_SIZE;
+       size = VCE_V2_0_STACK_SIZE;
        WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff);
        WREG32(VCE_VCPU_CACHE_SIZE1, size);
 
        addr += size;
-       size = RADEON_VCE_HEAP_SIZE;
+       size = VCE_V2_0_DATA_SIZE;
        WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff);
        WREG32(VCE_VCPU_CACHE_SIZE2, size);
 
index 7d0b8ef9bea21ca04a7fc678f3913dc1768615fd..e6a32c4e40402ec8cb7ed1d29158988ec07f1353 100644 (file)
@@ -195,26 +195,27 @@ void rcar_du_crtc_route_output(struct drm_crtc *crtc,
 
 static unsigned int plane_zpos(struct rcar_du_plane *plane)
 {
-       return to_rcar_du_plane_state(plane->plane.state)->zpos;
+       return to_rcar_plane_state(plane->plane.state)->zpos;
 }
 
 static const struct rcar_du_format_info *
 plane_format(struct rcar_du_plane *plane)
 {
-       return to_rcar_du_plane_state(plane->plane.state)->format;
+       return to_rcar_plane_state(plane->plane.state)->format;
 }
 
 static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc)
 {
        struct rcar_du_plane *planes[RCAR_DU_NUM_HW_PLANES];
        unsigned int num_planes = 0;
+       unsigned int dptsr_planes;
+       unsigned int hwplanes = 0;
        unsigned int prio = 0;
        unsigned int i;
-       u32 dptsr = 0;
        u32 dspr = 0;
 
-       for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes.planes); ++i) {
-               struct rcar_du_plane *plane = &rcrtc->group->planes.planes[i];
+       for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes); ++i) {
+               struct rcar_du_plane *plane = &rcrtc->group->planes[i];
                unsigned int j;
 
                if (plane->plane.state->crtc != &rcrtc->crtc)
@@ -234,41 +235,45 @@ static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc)
        for (i = 0; i < num_planes; ++i) {
                struct rcar_du_plane *plane = planes[i];
                struct drm_plane_state *state = plane->plane.state;
-               unsigned int index = to_rcar_du_plane_state(state)->hwindex;
+               unsigned int index = to_rcar_plane_state(state)->hwindex;
 
                prio -= 4;
                dspr |= (index + 1) << prio;
-               dptsr |= DPTSR_PnDK(index) |  DPTSR_PnTS(index);
+               hwplanes |= 1 << index;
 
                if (plane_format(plane)->planes == 2) {
                        index = (index + 1) % 8;
 
                        prio -= 4;
                        dspr |= (index + 1) << prio;
-                       dptsr |= DPTSR_PnDK(index) |  DPTSR_PnTS(index);
+                       hwplanes |= 1 << index;
                }
        }
 
-       /* Select display timing and dot clock generator 2 for planes associated
-        * with superposition controller 2.
+       /* Update the planes to display timing and dot clock generator
+        * associations.
+        *
+        * Updating the DPTSR register requires restarting the CRTC group,
+        * resulting in visible flicker. To mitigate the issue only update the
+        * association if needed by enabled planes. Planes being disabled will
+        * keep their current association.
         */
-       if (rcrtc->index % 2) {
-               /* The DPTSR register is updated when the display controller is
-                * stopped. We thus need to restart the DU. Once again, sorry
-                * for the flicker. One way to mitigate the issue would be to
-                * pre-associate planes with CRTCs (either with a fixed 4/4
-                * split, or through a module parameter). Flicker would then
-                * occur only if we need to break the pre-association.
-                */
-               mutex_lock(&rcrtc->group->lock);
-               if (rcar_du_group_read(rcrtc->group, DPTSR) != dptsr) {
-                       rcar_du_group_write(rcrtc->group, DPTSR, dptsr);
-                       if (rcrtc->group->used_crtcs)
-                               rcar_du_group_restart(rcrtc->group);
-               }
-               mutex_unlock(&rcrtc->group->lock);
+       mutex_lock(&rcrtc->group->lock);
+
+       dptsr_planes = rcrtc->index % 2 ? rcrtc->group->dptsr_planes | hwplanes
+                    : rcrtc->group->dptsr_planes & ~hwplanes;
+
+       if (dptsr_planes != rcrtc->group->dptsr_planes) {
+               rcar_du_group_write(rcrtc->group, DPTSR,
+                                   (dptsr_planes << 16) | dptsr_planes);
+               rcrtc->group->dptsr_planes = dptsr_planes;
+
+               if (rcrtc->group->used_crtcs)
+                       rcar_du_group_restart(rcrtc->group);
        }
 
+       mutex_unlock(&rcrtc->group->lock);
+
        rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR,
                            dspr);
 }
@@ -427,8 +432,8 @@ void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc)
        rcar_du_crtc_start(rcrtc);
 
        /* Commit the planes state. */
-       for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes.planes); ++i) {
-               struct rcar_du_plane *plane = &rcrtc->group->planes.planes[i];
+       for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes); ++i) {
+               struct rcar_du_plane *plane = &rcrtc->group->planes[i];
 
                if (plane->plane.state->crtc != &rcrtc->crtc)
                        continue;
@@ -592,7 +597,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
        rcrtc->enabled = false;
 
        ret = drm_crtc_init_with_planes(rcdu->ddev, crtc,
-                                       &rgrp->planes.planes[index % 2].plane,
+                                       &rgrp->planes[index % 2].plane,
                                        NULL, &crtc_funcs);
        if (ret < 0)
                return ret;
index 5d9aa9b33769eb225ea3919d2fc7f06842988f6f..4b95d9d08c4991ad03f159a2c6c54086c781592f 100644 (file)
 
 struct rcar_du_group;
 
+/**
+ * struct rcar_du_crtc - the CRTC, representing a DU superposition processor
+ * @crtc: base DRM CRTC
+ * @clock: the CRTC functional clock
+ * @extclock: external pixel dot clock (optional)
+ * @mmio_offset: offset of the CRTC registers in the DU MMIO block
+ * @index: CRTC software and hardware index
+ * @started: whether the CRTC has been started and is running
+ * @event: event to post when the pending page flip completes
+ * @flip_wait: wait queue used to signal page flip completion
+ * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC
+ * @enabled: whether the CRTC is enabled, used to control system resume
+ * @group: CRTC group this CRTC belongs to
+ */
 struct rcar_du_crtc {
        struct drm_crtc crtc;
 
index c7c538dd2e683045e7bfefcfc4e198f9352f35bb..9f34fc86436a4ef6b6f744f050de1aef84c4f6fa 100644 (file)
@@ -83,6 +83,12 @@ struct rcar_du_device {
 
        struct rcar_du_group groups[RCAR_DU_MAX_GROUPS];
 
+       struct {
+               struct drm_property *alpha;
+               struct drm_property *colorkey;
+               struct drm_property *zpos;
+       } props;
+
        unsigned int dpad0_source;
        struct rcar_du_lvdsenc *lvds[RCAR_DU_MAX_LVDS];
 
index 1bdc0ee0c2483c936b8fffd31948c42fe74e8455..7fd39a7d91c8b2fd411c4b2d27b4a1eabc4e9051 100644 (file)
@@ -85,6 +85,12 @@ static void rcar_du_group_setup(struct rcar_du_group *rgrp)
         * superposition 0 to DU0 pins. DU1 pins will be configured dynamically.
         */
        rcar_du_group_write(rgrp, DORCR, DORCR_PG1D_DS1 | DORCR_DPRS);
+
+       /* Apply planes to CRTCs association. */
+       mutex_lock(&rgrp->lock);
+       rcar_du_group_write(rgrp, DPTSR, (rgrp->dptsr_planes << 16) |
+                           rgrp->dptsr_planes);
+       mutex_unlock(&rgrp->lock);
 }
 
 /*
index ed36433fbe84a55d2fbd3e822d9681251f596d3b..7b414b31c3be7a823eec70b903a0f6b13ff301a5 100644 (file)
@@ -25,9 +25,11 @@ struct rcar_du_device;
  * @dev: the DU device
  * @mmio_offset: registers offset in the device memory map
  * @index: group index
+ * @num_crtcs: number of CRTCs in this group (1 or 2)
  * @use_count: number of users of the group (rcar_du_group_(get|put))
  * @used_crtcs: number of CRTCs currently in use
- * @lock: protects the DPTSR register
+ * @lock: protects the dptsr_planes field and the DPTSR register
+ * @dptsr_planes: bitmask of planes driven by dot-clock and timing generator 1
  * @planes: planes handled by the group
  */
 struct rcar_du_group {
@@ -35,12 +37,14 @@ struct rcar_du_group {
        unsigned int mmio_offset;
        unsigned int index;
 
+       unsigned int num_crtcs;
        unsigned int use_count;
        unsigned int used_crtcs;
 
        struct mutex lock;
+       unsigned int dptsr_planes;
 
-       struct rcar_du_planes planes;
+       struct rcar_du_plane planes[RCAR_DU_NUM_KMS_PLANES];
 };
 
 u32 rcar_du_group_read(struct rcar_du_group *rgrp, u32 reg);
index 93117f159a3ba668c59db1fd6466afd732c4675d..20859aae882e71869d7cb11bcffee657fa29da89 100644 (file)
@@ -221,7 +221,7 @@ static bool rcar_du_plane_needs_realloc(struct rcar_du_plane *plane,
 {
        const struct rcar_du_format_info *cur_format;
 
-       cur_format = to_rcar_du_plane_state(plane->plane.state)->format;
+       cur_format = to_rcar_plane_state(plane->plane.state)->format;
 
        /* Lowering the number of planes doesn't strictly require reallocation
         * as the extra hardware plane will be freed when committing, but doing
@@ -284,14 +284,19 @@ static int rcar_du_atomic_check(struct drm_device *dev,
                        continue;
 
                plane = to_rcar_plane(state->planes[i]);
-               plane_state = to_rcar_du_plane_state(state->plane_states[i]);
+               plane_state = to_rcar_plane_state(state->plane_states[i]);
+
+               dev_dbg(rcdu->dev, "%s: checking plane (%u,%u)\n", __func__,
+                       plane->group->index, plane - plane->group->planes);
 
                /* If the plane is being disabled we don't need to go through
                 * the full reallocation procedure. Just mark the hardware
                 * plane(s) as freed.
                 */
                if (!plane_state->format) {
-                       index = plane - plane->group->planes.planes;
+                       dev_dbg(rcdu->dev, "%s: plane is being disabled\n",
+                               __func__);
+                       index = plane - plane->group->planes;
                        group_freed_planes[plane->group->index] |= 1 << index;
                        plane_state->hwindex = -1;
                        continue;
@@ -301,10 +306,12 @@ static int rcar_du_atomic_check(struct drm_device *dev,
                 * mark the hardware plane(s) as free.
                 */
                if (rcar_du_plane_needs_realloc(plane, plane_state)) {
+                       dev_dbg(rcdu->dev, "%s: plane needs reallocation\n",
+                               __func__);
                        groups |= 1 << plane->group->index;
                        needs_realloc = true;
 
-                       index = plane - plane->group->planes.planes;
+                       index = plane - plane->group->planes;
                        group_freed_planes[plane->group->index] |= 1 << index;
                        plane_state->hwindex = -1;
                }
@@ -326,8 +333,11 @@ static int rcar_du_atomic_check(struct drm_device *dev,
                struct rcar_du_group *group = &rcdu->groups[index];
                unsigned int used_planes = 0;
 
+               dev_dbg(rcdu->dev, "%s: finding free planes for group %u\n",
+                       __func__, index);
+
                for (i = 0; i < RCAR_DU_NUM_KMS_PLANES; ++i) {
-                       struct rcar_du_plane *plane = &group->planes.planes[i];
+                       struct rcar_du_plane *plane = &group->planes[i];
                        struct rcar_du_plane_state *plane_state;
                        struct drm_plane_state *s;
 
@@ -342,28 +352,49 @@ static int rcar_du_atomic_check(struct drm_device *dev,
                         * above. Use the local freed planes list to check for
                         * that condition instead.
                         */
-                       if (group_freed_planes[index] & (1 << i))
+                       if (group_freed_planes[index] & (1 << i)) {
+                               dev_dbg(rcdu->dev,
+                                       "%s: plane (%u,%u) has been freed, skipping\n",
+                                       __func__, plane->group->index,
+                                       plane - plane->group->planes);
                                continue;
+                       }
 
-                       plane_state = to_rcar_du_plane_state(plane->plane.state);
+                       plane_state = to_rcar_plane_state(plane->plane.state);
                        used_planes |= rcar_du_plane_hwmask(plane_state);
+
+                       dev_dbg(rcdu->dev,
+                               "%s: plane (%u,%u) uses %u hwplanes (index %d)\n",
+                               __func__, plane->group->index,
+                               plane - plane->group->planes,
+                               plane_state->format ?
+                               plane_state->format->planes : 0,
+                               plane_state->hwindex);
                }
 
                group_free_planes[index] = 0xff & ~used_planes;
                groups &= ~(1 << index);
+
+               dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n",
+                       __func__, index, group_free_planes[index]);
        }
 
        /* Reallocate hardware planes for each plane that needs it. */
        for (i = 0; i < dev->mode_config.num_total_plane; ++i) {
                struct rcar_du_plane_state *plane_state;
                struct rcar_du_plane *plane;
+               unsigned int crtc_planes;
+               unsigned int free;
                int idx;
 
                if (!state->planes[i])
                        continue;
 
                plane = to_rcar_plane(state->planes[i]);
-               plane_state = to_rcar_du_plane_state(state->plane_states[i]);
+               plane_state = to_rcar_plane_state(state->plane_states[i]);
+
+               dev_dbg(rcdu->dev, "%s: allocating plane (%u,%u)\n", __func__,
+                       plane->group->index, plane - plane->group->planes);
 
                /* Skip planes that are being disabled or don't need to be
                 * reallocated.
@@ -372,18 +403,38 @@ static int rcar_du_atomic_check(struct drm_device *dev,
                    !rcar_du_plane_needs_realloc(plane, plane_state))
                        continue;
 
+               /* Try to allocate the plane from the free planes currently
+                * associated with the target CRTC to avoid restarting the CRTC
+                * group and thus minimize flicker. If it fails fall back to
+                * allocating from all free planes.
+                */
+               crtc_planes = to_rcar_crtc(plane_state->state.crtc)->index % 2
+                           ? plane->group->dptsr_planes
+                           : ~plane->group->dptsr_planes;
+               free = group_free_planes[plane->group->index];
+
                idx = rcar_du_plane_hwalloc(plane_state->format->planes,
-                                       group_free_planes[plane->group->index]);
+                                           free & crtc_planes);
+               if (idx < 0)
+                       idx = rcar_du_plane_hwalloc(plane_state->format->planes,
+                                                   free);
                if (idx < 0) {
                        dev_dbg(rcdu->dev, "%s: no available hardware plane\n",
                                __func__);
                        return idx;
                }
 
+               dev_dbg(rcdu->dev, "%s: allocated %u hwplanes (index %u)\n",
+                       __func__, plane_state->format->planes, idx);
+
                plane_state->hwindex = idx;
 
                group_free_planes[plane->group->index] &=
                        ~rcar_du_plane_hwmask(plane_state);
+
+               dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n",
+                       __func__, plane->group->index,
+                       group_free_planes[plane->group->index]);
        }
 
        return 0;
@@ -648,6 +699,31 @@ static int rcar_du_encoders_init(struct rcar_du_device *rcdu)
        return num_encoders;
 }
 
+static int rcar_du_properties_init(struct rcar_du_device *rcdu)
+{
+       rcdu->props.alpha =
+               drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255);
+       if (rcdu->props.alpha == NULL)
+               return -ENOMEM;
+
+       /* The color key is expressed as an RGB888 triplet stored in a 32-bit
+        * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0)
+        * or enable source color keying (1).
+        */
+       rcdu->props.colorkey =
+               drm_property_create_range(rcdu->ddev, 0, "colorkey",
+                                         0, 0x01ffffff);
+       if (rcdu->props.colorkey == NULL)
+               return -ENOMEM;
+
+       rcdu->props.zpos =
+               drm_property_create_range(rcdu->ddev, 0, "zpos", 1, 7);
+       if (rcdu->props.zpos == NULL)
+               return -ENOMEM;
+
+       return 0;
+}
+
 int rcar_du_modeset_init(struct rcar_du_device *rcdu)
 {
        static const unsigned int mmio_offsets[] = {
@@ -672,6 +748,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
 
        rcdu->num_crtcs = rcdu->info->num_crtcs;
 
+       ret = rcar_du_properties_init(rcdu);
+       if (ret < 0)
+               return ret;
+
        /* Initialize the groups. */
        num_groups = DIV_ROUND_UP(rcdu->num_crtcs, 2);
 
@@ -683,6 +763,13 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
                rgrp->dev = rcdu;
                rgrp->mmio_offset = mmio_offsets[i];
                rgrp->index = i;
+               rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U);
+
+               /* If we have more than one CRTCs in this group pre-associate
+                * planes 0-3 with CRTC 0 and planes 4-7 with CRTC 1 to minimize
+                * flicker occurring when the association is changed.
+                */
+               rgrp->dptsr_planes = rgrp->num_crtcs > 1 ? 0xf0 : 0;
 
                ret = rcar_du_planes_init(rgrp);
                if (ret < 0)
index 210e5c3fd9820d30f3347ce01fb4ecbb8b9f5fe5..3e30d84b798f283f469c1687b13f49ed2fb649a1 100644 (file)
@@ -45,7 +45,7 @@ static void rcar_du_plane_write(struct rcar_du_group *rgrp,
 static void rcar_du_plane_setup_fb(struct rcar_du_plane *plane)
 {
        struct rcar_du_plane_state *state =
-               to_rcar_du_plane_state(plane->plane.state);
+               to_rcar_plane_state(plane->plane.state);
        struct drm_framebuffer *fb = plane->plane.state->fb;
        struct rcar_du_group *rgrp = plane->group;
        unsigned int src_x = state->state.src_x >> 16;
@@ -109,7 +109,7 @@ static void rcar_du_plane_setup_mode(struct rcar_du_plane *plane,
                                     unsigned int index)
 {
        struct rcar_du_plane_state *state =
-               to_rcar_du_plane_state(plane->plane.state);
+               to_rcar_plane_state(plane->plane.state);
        struct rcar_du_group *rgrp = plane->group;
        u32 colorkey;
        u32 pnmr;
@@ -172,7 +172,7 @@ static void __rcar_du_plane_setup(struct rcar_du_plane *plane,
                                  unsigned int index)
 {
        struct rcar_du_plane_state *state =
-               to_rcar_du_plane_state(plane->plane.state);
+               to_rcar_plane_state(plane->plane.state);
        struct rcar_du_group *rgrp = plane->group;
        u32 ddcr2 = PnDDCR2_CODE;
        u32 ddcr4;
@@ -222,7 +222,7 @@ static void __rcar_du_plane_setup(struct rcar_du_plane *plane,
 void rcar_du_plane_setup(struct rcar_du_plane *plane)
 {
        struct rcar_du_plane_state *state =
-               to_rcar_du_plane_state(plane->plane.state);
+               to_rcar_plane_state(plane->plane.state);
 
        __rcar_du_plane_setup(plane, state->hwindex);
        if (state->format->planes == 2)
@@ -234,7 +234,7 @@ void rcar_du_plane_setup(struct rcar_du_plane *plane)
 static int rcar_du_plane_atomic_check(struct drm_plane *plane,
                                      struct drm_plane_state *state)
 {
-       struct rcar_du_plane_state *rstate = to_rcar_du_plane_state(state);
+       struct rcar_du_plane_state *rstate = to_rcar_plane_state(state);
        struct rcar_du_plane *rplane = to_rcar_plane(plane);
        struct rcar_du_device *rcdu = rplane->group->dev;
 
@@ -302,7 +302,7 @@ rcar_du_plane_atomic_duplicate_state(struct drm_plane *plane)
        struct rcar_du_plane_state *state;
        struct rcar_du_plane_state *copy;
 
-       state = to_rcar_du_plane_state(plane->state);
+       state = to_rcar_plane_state(plane->state);
        copy = kmemdup(state, sizeof(*state), GFP_KERNEL);
        if (copy == NULL)
                return NULL;
@@ -319,7 +319,7 @@ static void rcar_du_plane_atomic_destroy_state(struct drm_plane *plane,
        if (state->fb)
                drm_framebuffer_unreference(state->fb);
 
-       kfree(to_rcar_du_plane_state(state));
+       kfree(to_rcar_plane_state(state));
 }
 
 static int rcar_du_plane_atomic_set_property(struct drm_plane *plane,
@@ -327,15 +327,14 @@ static int rcar_du_plane_atomic_set_property(struct drm_plane *plane,
                                             struct drm_property *property,
                                             uint64_t val)
 {
-       struct rcar_du_plane_state *rstate = to_rcar_du_plane_state(state);
-       struct rcar_du_plane *rplane = to_rcar_plane(plane);
-       struct rcar_du_group *rgrp = rplane->group;
+       struct rcar_du_plane_state *rstate = to_rcar_plane_state(state);
+       struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev;
 
-       if (property == rgrp->planes.alpha)
+       if (property == rcdu->props.alpha)
                rstate->alpha = val;
-       else if (property == rgrp->planes.colorkey)
+       else if (property == rcdu->props.colorkey)
                rstate->colorkey = val;
-       else if (property == rgrp->planes.zpos)
+       else if (property == rcdu->props.zpos)
                rstate->zpos = val;
        else
                return -EINVAL;
@@ -349,14 +348,13 @@ static int rcar_du_plane_atomic_get_property(struct drm_plane *plane,
 {
        const struct rcar_du_plane_state *rstate =
                container_of(state, const struct rcar_du_plane_state, state);
-       struct rcar_du_plane *rplane = to_rcar_plane(plane);
-       struct rcar_du_group *rgrp = rplane->group;
+       struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev;
 
-       if (property == rgrp->planes.alpha)
+       if (property == rcdu->props.alpha)
                *val = rstate->alpha;
-       else if (property == rgrp->planes.colorkey)
+       else if (property == rcdu->props.colorkey)
                *val = rstate->colorkey;
-       else if (property == rgrp->planes.zpos)
+       else if (property == rcdu->props.zpos)
                *val = rstate->zpos;
        else
                return -EINVAL;
@@ -391,47 +389,24 @@ static const uint32_t formats[] = {
 
 int rcar_du_planes_init(struct rcar_du_group *rgrp)
 {
-       struct rcar_du_planes *planes = &rgrp->planes;
        struct rcar_du_device *rcdu = rgrp->dev;
        unsigned int num_planes;
-       unsigned int num_crtcs;
        unsigned int crtcs;
        unsigned int i;
        int ret;
 
-       planes->alpha =
-               drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255);
-       if (planes->alpha == NULL)
-               return -ENOMEM;
-
-       /* The color key is expressed as an RGB888 triplet stored in a 32-bit
-        * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0)
-        * or enable source color keying (1).
-        */
-       planes->colorkey =
-               drm_property_create_range(rcdu->ddev, 0, "colorkey",
-                                         0, 0x01ffffff);
-       if (planes->colorkey == NULL)
-               return -ENOMEM;
-
-       planes->zpos =
-               drm_property_create_range(rcdu->ddev, 0, "zpos", 1, 7);
-       if (planes->zpos == NULL)
-               return -ENOMEM;
-
-        /* Create one primary plane per in this group CRTC and seven overlay
+        /* Create one primary plane per CRTC in this group and seven overlay
          * planes.
          */
-       num_crtcs = min(rcdu->num_crtcs - 2 * rgrp->index, 2U);
-       num_planes = num_crtcs + 7;
+       num_planes = rgrp->num_crtcs + 7;
 
        crtcs = ((1 << rcdu->num_crtcs) - 1) & (3 << (2 * rgrp->index));
 
        for (i = 0; i < num_planes; ++i) {
-               enum drm_plane_type type = i < num_crtcs
+               enum drm_plane_type type = i < rgrp->num_crtcs
                                         ? DRM_PLANE_TYPE_PRIMARY
                                         : DRM_PLANE_TYPE_OVERLAY;
-               struct rcar_du_plane *plane = &planes->planes[i];
+               struct rcar_du_plane *plane = &rgrp->planes[i];
 
                plane->group = rgrp;
 
@@ -448,12 +423,12 @@ int rcar_du_planes_init(struct rcar_du_group *rgrp)
                        continue;
 
                drm_object_attach_property(&plane->plane.base,
-                                          planes->alpha, 255);
+                                          rcdu->props.alpha, 255);
                drm_object_attach_property(&plane->plane.base,
-                                          planes->colorkey,
+                                          rcdu->props.colorkey,
                                           RCAR_DU_COLORKEY_NONE);
                drm_object_attach_property(&plane->plane.base,
-                                          planes->zpos, 1);
+                                          rcdu->props.zpos, 1);
        }
 
        return 0;
index abff0ebeb195f4cba972d8e00237cdf496e89108..9732bff1911ba15f8d45e158350fb866fcd6274d 100644 (file)
@@ -38,19 +38,20 @@ static inline struct rcar_du_plane *to_rcar_plane(struct drm_plane *plane)
        return container_of(plane, struct rcar_du_plane, plane);
 }
 
-struct rcar_du_planes {
-       struct rcar_du_plane planes[RCAR_DU_NUM_KMS_PLANES];
-
-       struct drm_property *alpha;
-       struct drm_property *colorkey;
-       struct drm_property *zpos;
-};
-
+/**
+ * struct rcar_du_plane_state - Driver-specific plane state
+ * @state: base DRM plane state
+ * @format: information about the pixel format used by the plane
+ * @hwindex: 0-based hardware plane index, -1 means unused
+ * @alpha: value of the plane alpha property
+ * @colorkey: value of the plane colorkey property
+ * @zpos: value of the plane zpos property
+ */
 struct rcar_du_plane_state {
        struct drm_plane_state state;
 
        const struct rcar_du_format_info *format;
-       int hwindex;            /* 0-based, -1 means unused */
+       int hwindex;
 
        unsigned int alpha;
        unsigned int colorkey;
@@ -58,7 +59,7 @@ struct rcar_du_plane_state {
 };
 
 static inline struct rcar_du_plane_state *
-to_rcar_du_plane_state(struct drm_plane_state *state)
+to_rcar_plane_state(struct drm_plane_state *state)
 {
        return container_of(state, struct rcar_du_plane_state, state);
 }
index 01e1d27eb078396cd97dc2f3a51cbda8dec95a9f..3077f1554099932f6f477841182fb4eb278c60ea 100644 (file)
@@ -342,9 +342,12 @@ static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
        d_page->vaddr = dma_alloc_coherent(pool->dev, pool->size,
                                           &d_page->dma,
                                           pool->gfp_flags);
-       if (d_page->vaddr)
-               d_page->p = virt_to_page(d_page->vaddr);
-       else {
+       if (d_page->vaddr) {
+               if (is_vmalloc_addr(d_page->vaddr))
+                       d_page->p = vmalloc_to_page(d_page->vaddr);
+               else
+                       d_page->p = virt_to_page(d_page->vaddr);
+       } else {
                kfree(d_page);
                d_page = NULL;
        }
index bd0d644d2a038f352e360b9af5eb9b606d97c032..17c445612e0107646f54d407d70169465ff12776 100644 (file)
        INTEL_SKL_GT2_IDS(info), \
        INTEL_SKL_GT3_IDS(info)
 
-
 #define INTEL_BXT_IDS(info) \
        INTEL_VGA_DEVICE(0x0A84, info), \
-       INTEL_VGA_DEVICE(0x0A85, info), \
-       INTEL_VGA_DEVICE(0x0A86, info), \
-       INTEL_VGA_DEVICE(0x0A87, info)
+       INTEL_VGA_DEVICE(0x1A84, info), \
+       INTEL_VGA_DEVICE(0x5A84, info)
 
 #endif /* _I915_PCIIDS_H */
index 4851d660243cee73d3995e671d5798ecc98d5749..6e1a2ed116cb1410958f61631a7dc30839652738 100644 (file)
@@ -171,8 +171,12 @@ typedef struct _drm_i915_sarea {
 #define I915_BOX_TEXTURE_LOAD  0x8
 #define I915_BOX_LOST_CONTEXT  0x10
 
-/* I915 specific ioctls
- * The device specific ioctl range is 0x40 to 0x79.
+/*
+ * i915 specific ioctls.
+ *
+ * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
+ * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
+ * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
  */
 #define DRM_I915_INIT          0x00
 #define DRM_I915_FLUSH         0x01
index 871e73f99a4d7aa13b4cd6f5bc8969421c2a9301..573cb86a3d6e8b0ef7fee5dfb2003b9e2f50ef71 100644 (file)
@@ -1038,6 +1038,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_CURRENT_GPU_SCLK   0x22
 #define RADEON_INFO_CURRENT_GPU_MCLK   0x23
 #define RADEON_INFO_READ_REG           0x24
+#define RADEON_INFO_GPU_RESET_COUNTER  0x25
 
 struct drm_radeon_info {
        uint32_t                request;
index 4ca35a8f9891dae2638207c13903c0f9b55bfa61..d6833426fdef15479a0c2f3fe6145a6d5b8b2e15 100644 (file)
@@ -128,6 +128,32 @@ struct kfd_ioctl_get_process_apertures_args {
        uint32_t pad;
 };
 
+#define MAX_ALLOWED_NUM_POINTS    100
+#define MAX_ALLOWED_AW_BUFF_SIZE 4096
+#define MAX_ALLOWED_WAC_BUFF_SIZE  128
+
+struct kfd_ioctl_dbg_register_args {
+       uint32_t gpu_id;                /* to KFD */
+       uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_unregister_args {
+       uint32_t gpu_id;                /* to KFD */
+       uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_address_watch_args {
+       uint64_t content_ptr;           /* a pointer to the actual content */
+       uint32_t gpu_id;                /* to KFD */
+       uint32_t buf_size_in_bytes;     /*including gpu_id and buf_size */
+};
+
+struct kfd_ioctl_dbg_wave_control_args {
+       uint64_t content_ptr;           /* a pointer to the actual content */
+       uint32_t gpu_id;                /* to KFD */
+       uint32_t buf_size_in_bytes;     /*including gpu_id and buf_size */
+};
+
 /* Matching HSA_EVENTTYPE */
 #define KFD_IOC_EVENT_SIGNAL                   0
 #define KFD_IOC_EVENT_NODECHANGE               1
@@ -198,7 +224,8 @@ struct kfd_event_data {
 };
 
 struct kfd_ioctl_wait_events_args {
-       uint64_t events_ptr;            /* to KFD */
+       uint64_t events_ptr;            /* pointed to struct
+                                          kfd_event_data array, to KFD */
        uint32_t num_events;            /* to KFD */
        uint32_t wait_for_all;          /* to KFD */
        uint32_t timeout;               /* to KFD */
@@ -247,7 +274,19 @@ struct kfd_ioctl_wait_events_args {
 #define AMDKFD_IOC_WAIT_EVENTS                 \
                AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
 
+#define AMDKFD_IOC_DBG_REGISTER                        \
+               AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
+
+#define AMDKFD_IOC_DBG_UNREGISTER              \
+               AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
+
+#define AMDKFD_IOC_DBG_ADDRESS_WATCH           \
+               AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
+
+#define AMDKFD_IOC_DBG_WAVE_CONTROL            \
+               AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
+
 #define AMDKFD_COMMAND_START           0x01
-#define AMDKFD_COMMAND_END             0x0D
+#define AMDKFD_COMMAND_END             0x11
 
 #endif