drivers/gpu/drm/xe/xe_device.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2021 Intel Corporation
   4  */
   5
   6 #include "xe_device.h"
   7
   8 #include <linux/units.h>
   9
  10 #include <drm/drm_aperture.h>
  11 #include <drm/drm_atomic_helper.h>
  12 #include <drm/drm_gem_ttm_helper.h>
  13 #include <drm/drm_ioctl.h>
  14 #include <drm/drm_managed.h>
  15 #include <drm/drm_print.h>
  16 #include <drm/xe_drm.h>
  17
  18 #include "regs/xe_regs.h"
  19 #include "xe_bo.h"
  20 #include "xe_debugfs.h"
  21 #include "xe_dma_buf.h"
  22 #include "xe_drm_client.h"
  23 #include "xe_drv.h"
  24 #include "xe_exec_queue.h"
  25 #include "xe_exec.h"
  26 #include "xe_gt.h"
  27 #include "xe_irq.h"
  28 #include "xe_mmio.h"
  29 #include "xe_module.h"
  30 #include "xe_pat.h"
  31 #include "xe_pcode.h"
  32 #include "xe_pm.h"
  33 #include "xe_query.h"
  34 #include "xe_tile.h"
  35 #include "xe_ttm_stolen_mgr.h"
  36 #include "xe_ttm_sys_mgr.h"
  37 #include "xe_vm.h"
  38 #include "xe_vm_madvise.h"
  39 #include "xe_wait_user_fence.h"
  40 #include "xe_hwmon.h"
  41
  42 #ifdef CONFIG_LOCKDEP
  43 struct lockdep_map xe_device_mem_access_lockdep_map = {
  44         .name = "xe_device_mem_access_lockdep_map"
  45 };
  46 #endif
  47
  48 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
  49 {
  50         struct xe_device *xe = to_xe_device(dev);
  51         struct xe_drm_client *client;
  52         struct xe_file *xef;
  53         int ret = -ENOMEM;
  54
  55         xef = kzalloc(sizeof(*xef), GFP_KERNEL);
  56         if (!xef)
  57                 return ret;
  58
  59         client = xe_drm_client_alloc();
  60         if (!client) {
  61                 kfree(xef);
  62                 return ret;
  63         }
  64
  65         xef->drm = file;
  66         xef->client = client;
  67         xef->xe = xe;
  68
  69         mutex_init(&xef->vm.lock);
  70         xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
  71
  72         mutex_init(&xef->exec_queue.lock);
  73         xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
  74
  75         file->driver_priv = xef;
  76         return 0;
  77 }
  78
  79 static void device_kill_persistent_exec_queues(struct xe_device *xe,
  80                                                struct xe_file *xef);
  81
  82 static void xe_file_close(struct drm_device *dev, struct drm_file *file)
  83 {
  84         struct xe_device *xe = to_xe_device(dev);
  85         struct xe_file *xef = file->driver_priv;
  86         struct xe_vm *vm;
  87         struct xe_exec_queue *q;
  88         unsigned long idx;
  89
  90         mutex_lock(&xef->exec_queue.lock);
  91         xa_for_each(&xef->exec_queue.xa, idx, q) {
  92                 xe_exec_queue_kill(q);
  93                 xe_exec_queue_put(q);
  94         }
  95         mutex_unlock(&xef->exec_queue.lock);
  96         xa_destroy(&xef->exec_queue.xa);
  97         mutex_destroy(&xef->exec_queue.lock);
  98         device_kill_persistent_exec_queues(xe, xef);
  99
 100         mutex_lock(&xef->vm.lock);
 101         xa_for_each(&xef->vm.xa, idx, vm)
 102                 xe_vm_close_and_put(vm);
 103         mutex_unlock(&xef->vm.lock);
 104         xa_destroy(&xef->vm.xa);
 105         mutex_destroy(&xef->vm.lock);
 106
 107         xe_drm_client_put(xef->client);
 108         kfree(xef);
 109 }
 110
 111 static const struct drm_ioctl_desc xe_ioctls[] = {
 112         DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
 113         DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
 114         DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
 115                           DRM_RENDER_ALLOW),
 116         DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
 117         DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 118         DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
 119         DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
 120                           DRM_RENDER_ALLOW),
 121         DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
 122                           DRM_RENDER_ALLOW),
 123         DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
 124                           DRM_RENDER_ALLOW),
 125         DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
 126         DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl,
 127                           DRM_RENDER_ALLOW),
 128         DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
 129                           DRM_RENDER_ALLOW),
 130         DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
 131 };
 132
 133 static const struct file_operations xe_driver_fops = {
 134         .owner = THIS_MODULE,
 135         .open = drm_open,
 136         .release = drm_release_noglobal,
 137         .unlocked_ioctl = drm_ioctl,
 138         .mmap = drm_gem_mmap,
 139         .poll = drm_poll,
 140         .read = drm_read,
 141         .compat_ioctl = drm_compat_ioctl,
 142         .llseek = noop_llseek,
 143 #ifdef CONFIG_PROC_FS
 144         .show_fdinfo = drm_show_fdinfo,
 145 #endif
 146 };
 147
 148 static void xe_driver_release(struct drm_device *dev)
 149 {
 150         struct xe_device *xe = to_xe_device(dev);
 151
 152         pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL);
 153 }
 154
 155 static struct drm_driver driver = {
 156         /* Don't use MTRRs here; the Xserver or userspace app should
 157          * deal with them for Intel hardware.
 158          */
 159         .driver_features =
 160             DRIVER_GEM |
 161             DRIVER_RENDER | DRIVER_SYNCOBJ |
 162             DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
 163         .open = xe_file_open,
 164         .postclose = xe_file_close,
 165
 166         .gem_prime_import = xe_gem_prime_import,
 167
 168         .dumb_create = xe_bo_dumb_create,
 169         .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
 170 #ifdef CONFIG_PROC_FS
 171         .show_fdinfo = xe_drm_client_fdinfo,
 172 #endif
 173         .release = &xe_driver_release,
 174
 175         .ioctls = xe_ioctls,
 176         .num_ioctls = ARRAY_SIZE(xe_ioctls),
 177         .fops = &xe_driver_fops,
 178         .name = DRIVER_NAME,
 179         .desc = DRIVER_DESC,
 180         .date = DRIVER_DATE,
 181         .major = DRIVER_MAJOR,
 182         .minor = DRIVER_MINOR,
 183         .patchlevel = DRIVER_PATCHLEVEL,
 184 };
 185
 186 static void xe_device_destroy(struct drm_device *dev, void *dummy)
 187 {
 188         struct xe_device *xe = to_xe_device(dev);
 189
 190         if (xe->ordered_wq)
 191                 destroy_workqueue(xe->ordered_wq);
 192
 193         ttm_device_fini(&xe->ttm);
 194 }
 195
 196 struct xe_device *xe_device_create(struct pci_dev *pdev,
 197                                    const struct pci_device_id *ent)
 198 {
 199         struct xe_device *xe;
 200         int err;
 201
 202         err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
 203         if (err)
 204                 return ERR_PTR(err);
 205
 206         xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
 207         if (IS_ERR(xe))
 208                 return xe;
 209
 210         err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
 211                               xe->drm.anon_inode->i_mapping,
 212                               xe->drm.vma_offset_manager, false, false);
 213         if (WARN_ON(err))
 214                 goto err_put;
 215
 216         err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
 217         if (err)
 218                 goto err_put;
 219
 220         xe->info.devid = pdev->device;
 221         xe->info.revid = pdev->revision;
 222         xe->info.force_execlist = force_execlist;
 223
 224         spin_lock_init(&xe->irq.lock);
 225
 226         init_waitqueue_head(&xe->ufence_wq);
 227
 228         drmm_mutex_init(&xe->drm, &xe->usm.lock);
 229         xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1);
 230
 231         drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock);
 232         INIT_LIST_HEAD(&xe->persistent_engines.list);
 233
 234         spin_lock_init(&xe->pinned.lock);
 235         INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
 236         INIT_LIST_HEAD(&xe->pinned.external_vram);
 237         INIT_LIST_HEAD(&xe->pinned.evicted);
 238
 239         xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
 240         if (!xe->ordered_wq) {
 241                 drm_err(&xe->drm, "Failed to allocate xe-ordered-wq\n");
 242                 err = -ENOMEM;
 243                 goto err_put;
 244         }
 245
 246         drmm_mutex_init(&xe->drm, &xe->sb_lock);
 247         xe->enabled_irq_mask = ~0;
 248
 249         return xe;
 250
 251 err_put:
 252         drm_dev_put(&xe->drm);
 253
 254         return ERR_PTR(err);
 255 }
 256
 257 /*
 258  * The driver-initiated FLR is the highest level of reset that we can trigger
 259  * from within the driver. It is different from the PCI FLR in that it doesn't
 260  * fully reset the SGUnit and doesn't modify the PCI config space and therefore
 261  * it doesn't require a re-enumeration of the PCI BARs. However, the
 262  * driver-initiated FLR does still cause a reset of both GT and display and a
 263  * memory wipe of local and stolen memory, so recovery would require a full HW
 264  * re-init and saving/restoring (or re-populating) the wiped memory. Since we
 265  * perform the FLR as the very last action before releasing access to the HW
 266  * during the driver release flow, we don't attempt recovery at all, because
 267  * if/when a new instance of i915 is bound to the device it will do a full
 268  * re-init anyway.
 269  */
 270 static void xe_driver_flr(struct xe_device *xe)
 271 {
 272         const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
 273         struct xe_gt *gt = xe_root_mmio_gt(xe);
 274         int ret;
 275
 276         if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
 277                 drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
 278                 return;
 279         }
 280
 281         drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
 282
 283         /*
 284          * Make sure any pending FLR requests have cleared by waiting for the
 285          * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
 286          * to make sure it's not still set from a prior attempt (it's a write to
 287          * clear bit).
 288          * Note that we should never be in a situation where a previous attempt
 289          * is still pending (unless the HW is totally dead), but better to be
 290          * safe in case something unexpected happens
 291          */
 292         ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
 293         if (ret) {
 294                 drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
 295                 return;
 296         }
 297         xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
 298
 299         /* Trigger the actual Driver-FLR */
 300         xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
 301
 302         /* Wait for hardware teardown to complete */
 303         ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
 304         if (ret) {
 305                 drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
 306                 return;
 307         }
 308
 309         /* Wait for hardware/firmware re-init to complete */
 310         ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
 311                              flr_timeout, NULL, false);
 312         if (ret) {
 313                 drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
 314                 return;
 315         }
 316
 317         /* Clear sticky completion status */
 318         xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
 319 }
 320
 321 static void xe_driver_flr_fini(struct drm_device *drm, void *arg)
 322 {
 323         struct xe_device *xe = arg;
 324
 325         if (xe->needs_flr_on_fini)
 326                 xe_driver_flr(xe);
 327 }
 328
 329 static void xe_device_sanitize(struct drm_device *drm, void *arg)
 330 {
 331         struct xe_device *xe = arg;
 332         struct xe_gt *gt;
 333         u8 id;
 334
 335         for_each_gt(gt, xe, id)
 336                 xe_gt_sanitize(gt);
 337 }
 338
 339 int xe_device_probe(struct xe_device *xe)
 340 {
 341         struct xe_tile *tile;
 342         struct xe_gt *gt;
 343         int err;
 344         u8 id;
 345
 346         xe_pat_init_early(xe);
 347
 348         xe->info.mem_region_mask = 1;
 349
 350         for_each_tile(tile, xe, id) {
 351                 err = xe_tile_alloc(tile);
 352                 if (err)
 353                         return err;
 354         }
 355
 356         err = xe_mmio_init(xe);
 357         if (err)
 358                 return err;
 359
 360         err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
 361         if (err)
 362                 return err;
 363
 364         for_each_gt(gt, xe, id) {
 365                 err = xe_pcode_probe(gt);
 366                 if (err)
 367                         return err;
 368         }
 369
 370         err = xe_irq_install(xe);
 371         if (err)
 372                 return err;
 373
 374         for_each_gt(gt, xe, id) {
 375                 err = xe_gt_init_early(gt);
 376                 if (err)
 377                         goto err_irq_shutdown;
 378         }
 379
 380         err = xe_mmio_probe_vram(xe);
 381         if (err)
 382                 goto err_irq_shutdown;
 383
 384         xe_ttm_sys_mgr_init(xe);
 385
 386         for_each_tile(tile, xe, id) {
 387                 err = xe_tile_init_noalloc(tile);
 388                 if (err)
 389                         goto err_irq_shutdown;
 390         }
 391
 392         /* Allocate and map stolen after potential VRAM resize */
 393         xe_ttm_stolen_mgr_init(xe);
 394
 395         for_each_gt(gt, xe, id) {
 396                 err = xe_gt_init(gt);
 397                 if (err)
 398                         goto err_irq_shutdown;
 399         }
 400
 401         xe_heci_gsc_init(xe);
 402
 403         err = drm_dev_register(&xe->drm, 0);
 404         if (err)
 405                 goto err_irq_shutdown;
 406
 407         xe_debugfs_register(xe);
 408
 409         xe_pmu_register(&xe->pmu);
 410
 411         xe_hwmon_register(xe);
 412
 413         err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
 414         if (err)
 415                 return err;
 416
 417         return 0;
 418
 419 err_irq_shutdown:
 420         xe_irq_shutdown(xe);
 421         return err;
 422 }
 423
 424 void xe_device_remove(struct xe_device *xe)
 425 {
 426         xe_heci_gsc_fini(xe);
 427
 428         xe_irq_shutdown(xe);
 429 }
 430
 431 void xe_device_shutdown(struct xe_device *xe)
 432 {
 433 }
 434
 435 void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q)
 436 {
 437         mutex_lock(&xe->persistent_engines.lock);
 438         list_add_tail(&q->persistent.link, &xe->persistent_engines.list);
 439         mutex_unlock(&xe->persistent_engines.lock);
 440 }
 441
 442 void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
 443                                              struct xe_exec_queue *q)
 444 {
 445         mutex_lock(&xe->persistent_engines.lock);
 446         if (!list_empty(&q->persistent.link))
 447                 list_del(&q->persistent.link);
 448         mutex_unlock(&xe->persistent_engines.lock);
 449 }
 450
 451 static void device_kill_persistent_exec_queues(struct xe_device *xe,
 452                                                struct xe_file *xef)
 453 {
 454         struct xe_exec_queue *q, *next;
 455
 456         mutex_lock(&xe->persistent_engines.lock);
 457         list_for_each_entry_safe(q, next, &xe->persistent_engines.list,
 458                                  persistent.link)
 459                 if (q->persistent.xef == xef) {
 460                         xe_exec_queue_kill(q);
 461                         list_del_init(&q->persistent.link);
 462                 }
 463         mutex_unlock(&xe->persistent_engines.lock);
 464 }
 465
 466 void xe_device_wmb(struct xe_device *xe)
 467 {
 468         struct xe_gt *gt = xe_root_mmio_gt(xe);
 469
 470         wmb();
 471         if (IS_DGFX(xe))
 472                 xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0);
 473 }
 474
 475 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
 476 {
 477         return xe_device_has_flat_ccs(xe) ?
 478                 DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0;
 479 }
 480
 481 bool xe_device_mem_access_ongoing(struct xe_device *xe)
 482 {
 483         if (xe_pm_read_callback_task(xe) != NULL)
 484                 return true;
 485
 486         return atomic_read(&xe->mem_access.ref);
 487 }
 488
 489 void xe_device_assert_mem_access(struct xe_device *xe)
 490 {
 491         XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
 492 }
 493
 494 bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
 495 {
 496         bool active;
 497
 498         if (xe_pm_read_callback_task(xe) == current)
 499                 return true;
 500
 501         active = xe_pm_runtime_get_if_active(xe);
 502         if (active) {
 503                 int ref = atomic_inc_return(&xe->mem_access.ref);
 504
 505                 xe_assert(xe, ref != S32_MAX);
 506         }
 507
 508         return active;
 509 }
 510
 511 void xe_device_mem_access_get(struct xe_device *xe)
 512 {
 513         int ref;
 514
 515         /*
 516          * This looks racy, but should be fine since the pm_callback_task only
 517          * transitions from NULL -> current (and back to NULL again), during the
 518          * runtime_resume() or runtime_suspend() callbacks, for which there can
 519          * only be a single one running for our device. We only need to prevent
 520          * recursively calling the runtime_get or runtime_put from those
 521          * callbacks, as well as preventing triggering any access_ongoing
 522          * asserts.
 523          */
 524         if (xe_pm_read_callback_task(xe) == current)
 525                 return;
 526
 527         /*
 528          * Since the resume here is synchronous it can be quite easy to deadlock
 529          * if we are not careful. Also in practice it might be quite timing
 530          * sensitive to ever see the 0 -> 1 transition with the callers locks
 531          * held, so deadlocks might exist but are hard for lockdep to ever see.
 532          * With this in mind, help lockdep learn about the potentially scary
 533          * stuff that can happen inside the runtime_resume callback by acquiring
 534          * a dummy lock (it doesn't protect anything and gets compiled out on
 535          * non-debug builds).  Lockdep then only needs to see the
 536          * mem_access_lockdep_map -> runtime_resume callback once, and then can
 537          * hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
 538          * For example if the (callers_locks) are ever grabbed in the
 539          * runtime_resume callback, lockdep should give us a nice splat.
 540          */
 541         lock_map_acquire(&xe_device_mem_access_lockdep_map);
 542         lock_map_release(&xe_device_mem_access_lockdep_map);
 543
 544         xe_pm_runtime_get(xe);
 545         ref = atomic_inc_return(&xe->mem_access.ref);
 546
 547         xe_assert(xe, ref != S32_MAX);
 548
 549 }
 550
 551 void xe_device_mem_access_put(struct xe_device *xe)
 552 {
 553         int ref;
 554
 555         if (xe_pm_read_callback_task(xe) == current)
 556                 return;
 557
 558         ref = atomic_dec_return(&xe->mem_access.ref);
 559         xe_pm_runtime_put(xe);
 560
 561         xe_assert(xe, ref >= 0);
 562 }