Commit | Line | Data |
---|---|---|
dd08ebf6 MB |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2020 Intel Corporation | |
4 | */ | |
ea9f879d | 5 | |
dd08ebf6 MB |
6 | #include "xe_migrate.h" |
7 | ||
8cb49012 | 8 | #include <linux/bitfield.h> |
ea9f879d LDM |
9 | #include <linux/sizes.h> |
10 | ||
11 | #include <drm/drm_managed.h> | |
12 | #include <drm/ttm/ttm_tt.h> | |
13 | #include <drm/xe_drm.h> | |
14 | ||
a043fbab | 15 | #include "generated/xe_wa_oob.h" |
0134f130 | 16 | #include "instructions/xe_mi_commands.h" |
63955b3b | 17 | #include "regs/xe_gpu_commands.h" |
7cba3396 | 18 | #include "tests/xe_test.h" |
c73acc1e | 19 | #include "xe_assert.h" |
dd08ebf6 MB |
20 | #include "xe_bb.h" |
21 | #include "xe_bo.h" | |
c22a4ed0 | 22 | #include "xe_exec_queue.h" |
dd08ebf6 MB |
23 | #include "xe_ggtt.h" |
24 | #include "xe_gt.h" | |
25 | #include "xe_hw_engine.h" | |
26 | #include "xe_lrc.h" | |
27 | #include "xe_map.h" | |
28 | #include "xe_mocs.h" | |
29 | #include "xe_pt.h" | |
30 | #include "xe_res_cursor.h" | |
31 | #include "xe_sched_job.h" | |
32 | #include "xe_sync.h" | |
33 | #include "xe_trace.h" | |
34 | #include "xe_vm.h" | |
a043fbab | 35 | #include "xe_wa.h" |
dd08ebf6 | 36 | |
e9d285ff TH |
37 | /** |
38 | * struct xe_migrate - migrate context. | |
39 | */ | |
dd08ebf6 | 40 | struct xe_migrate { |
9b9529ce FD |
41 | /** @q: Default exec queue used for migration */ |
42 | struct xe_exec_queue *q; | |
08dea767 MR |
43 | /** @tile: Backpointer to the tile this struct xe_migrate belongs to. */ |
44 | struct xe_tile *tile; | |
e9d285ff | 45 | /** @job_mutex: Timeline mutex for @eng. */ |
dd08ebf6 | 46 | struct mutex job_mutex; |
e9d285ff | 47 | /** @pt_bo: Page-table buffer object. */ |
dd08ebf6 | 48 | struct xe_bo *pt_bo; |
e9d285ff | 49 | /** @batch_base_ofs: VM offset of the migration batch buffer */ |
dd08ebf6 | 50 | u64 batch_base_ofs; |
e9d285ff | 51 | /** @usm_batch_base_ofs: VM offset of the usm batch buffer */ |
dd08ebf6 | 52 | u64 usm_batch_base_ofs; |
9116eabb HPG |
53 | /** @cleared_mem_ofs: VM offset of @cleared_bo. */ |
54 | u64 cleared_mem_ofs; | |
e9d285ff TH |
55 | /** |
56 | * @fence: dma-fence representing the last migration job batch. | |
57 | * Protected by @job_mutex. | |
58 | */ | |
dd08ebf6 | 59 | struct dma_fence *fence; |
e9d285ff TH |
60 | /** |
61 | * @vm_update_sa: For integrated, used to suballocate page-tables | |
62 | * out of the pt_bo. | |
63 | */ | |
dd08ebf6 MB |
64 | struct drm_suballoc_manager vm_update_sa; |
65 | }; | |
66 | ||
67 | #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ | |
09427526 | 68 | #define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */ |
dd08ebf6 MB |
69 | #define NUM_KERNEL_PDE 17 |
70 | #define NUM_PT_SLOTS 32 | |
09427526 | 71 | #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M |
dd08ebf6 | 72 | |
e9d285ff | 73 | /** |
08dea767 MR |
74 | * xe_tile_migrate_engine() - Get this tile's migrate engine. |
75 | * @tile: The tile. | |
e9d285ff | 76 | * |
08dea767 | 77 | * Returns the default migrate engine of this tile. |
e9d285ff TH |
78 | * TODO: Perhaps this function is slightly misplaced, and even unneeded? |
79 | * | |
80 | * Return: The default migrate engine | |
81 | */ | |
9b9529ce | 82 | struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile) |
dd08ebf6 | 83 | { |
9b9529ce | 84 | return tile->migrate->q; |
dd08ebf6 MB |
85 | } |
86 | ||
87 | static void xe_migrate_fini(struct drm_device *dev, void *arg) | |
88 | { | |
89 | struct xe_migrate *m = arg; | |
dd08ebf6 | 90 | |
d00e9cc2 | 91 | xe_vm_lock(m->q->vm, false); |
dd08ebf6 | 92 | xe_bo_unpin(m->pt_bo); |
d00e9cc2 | 93 | xe_vm_unlock(m->q->vm); |
dd08ebf6 MB |
94 | |
95 | dma_fence_put(m->fence); | |
dd08ebf6 MB |
96 | xe_bo_put(m->pt_bo); |
97 | drm_suballoc_manager_fini(&m->vm_update_sa); | |
98 | mutex_destroy(&m->job_mutex); | |
9b9529ce FD |
99 | xe_vm_close_and_put(m->q->vm); |
100 | xe_exec_queue_put(m->q); | |
dd08ebf6 MB |
101 | } |
102 | ||
103 | static u64 xe_migrate_vm_addr(u64 slot, u32 level) | |
104 | { | |
99fea682 | 105 | XE_WARN_ON(slot >= NUM_PT_SLOTS); |
dd08ebf6 MB |
106 | |
107 | /* First slot is reserved for mapping of PT bo and bb, start from 1 */ | |
108 | return (slot + 1ULL) << xe_pt_shift(level + 1); | |
109 | } | |
110 | ||
d9e85dd5 | 111 | static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr) |
dd08ebf6 | 112 | { |
d9e85dd5 DK |
113 | /* |
114 | * Remove the DPA to get a correct offset into identity table for the | |
115 | * migrate offset | |
116 | */ | |
117 | addr -= xe->mem.vram.dpa_base; | |
dd08ebf6 MB |
118 | return addr + (256ULL << xe_pt_shift(2)); |
119 | } | |
120 | ||
08dea767 | 121 | static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, |
dd08ebf6 MB |
122 | struct xe_vm *vm) |
123 | { | |
08dea767 | 124 | struct xe_device *xe = tile_to_xe(tile); |
e814389f | 125 | u16 pat_index = xe->pat.idx[XE_CACHE_WB]; |
08dea767 | 126 | u8 id = tile->id; |
dd08ebf6 MB |
127 | u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; |
128 | u32 map_ofs, level, i; | |
876611c2 | 129 | struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo; |
dd08ebf6 | 130 | u64 entry; |
dd08ebf6 MB |
131 | |
132 | /* Can't bump NUM_PT_SLOTS too high */ | |
58e19acf | 133 | BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE); |
dd08ebf6 | 134 | /* Must be a multiple of 64K to support all platforms */ |
58e19acf | 135 | BUILD_BUG_ON(NUM_PT_SLOTS * XE_PAGE_SIZE % SZ_64K); |
dd08ebf6 MB |
136 | /* And one slot reserved for the 4KiB page table updates */ |
137 | BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); | |
138 | ||
139 | /* Need to be sure everything fits in the first PT, or create more */ | |
c73acc1e | 140 | xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); |
dd08ebf6 | 141 | |
876611c2 | 142 | bo = xe_bo_create_pin_map(vm->xe, tile, vm, |
58e19acf | 143 | num_entries * XE_PAGE_SIZE, |
dd08ebf6 | 144 | ttm_bo_type_kernel, |
876611c2 | 145 | XE_BO_CREATE_VRAM_IF_DGFX(tile) | |
dd08ebf6 MB |
146 | XE_BO_CREATE_PINNED_BIT); |
147 | if (IS_ERR(bo)) | |
148 | return PTR_ERR(bo); | |
149 | ||
e814389f | 150 | entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index); |
dd08ebf6 MB |
151 | xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); |
152 | ||
58e19acf | 153 | map_ofs = (num_entries - num_level) * XE_PAGE_SIZE; |
dd08ebf6 MB |
154 | |
155 | /* Map the entire BO in our level 0 pt */ | |
156 | for (i = 0, level = 0; i < num_entries; level++) { | |
0e5e77bd | 157 | entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE, |
e814389f | 158 | pat_index, 0); |
dd08ebf6 MB |
159 | |
160 | xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); | |
161 | ||
0d39b6da | 162 | if (vm->flags & XE_VM_FLAG_64K) |
dd08ebf6 MB |
163 | i += 16; |
164 | else | |
165 | i += 1; | |
166 | } | |
167 | ||
168 | if (!IS_DGFX(xe)) { | |
dd08ebf6 | 169 | /* Write out batch too */ |
58e19acf | 170 | m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; |
5a92da34 | 171 | if (xe->info.has_usm) { |
a682b6a4 BW |
172 | batch = tile->primary_gt->usm.bb_pool->bo; |
173 | m->usm_batch_base_ofs = m->batch_base_ofs; | |
174 | } | |
175 | ||
dd08ebf6 | 176 | for (i = 0; i < batch->size; |
0d39b6da | 177 | i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : |
58e19acf | 178 | XE_PAGE_SIZE) { |
0e5e77bd | 179 | entry = vm->pt_ops->pte_encode_bo(batch, i, |
e814389f | 180 | pat_index, 0); |
dd08ebf6 MB |
181 | |
182 | xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, | |
183 | entry); | |
184 | level++; | |
185 | } | |
186 | } else { | |
937b4be7 | 187 | u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); |
dd08ebf6 | 188 | |
d9e85dd5 | 189 | m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); |
dd08ebf6 | 190 | |
5a92da34 | 191 | if (xe->info.has_usm) { |
f6929e80 | 192 | batch = tile->primary_gt->usm.bb_pool->bo; |
937b4be7 | 193 | batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); |
d9e85dd5 | 194 | m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); |
dd08ebf6 MB |
195 | } |
196 | } | |
197 | ||
198 | for (level = 1; level < num_level; level++) { | |
199 | u32 flags = 0; | |
200 | ||
0d39b6da | 201 | if (vm->flags & XE_VM_FLAG_64K && level == 1) |
58e19acf | 202 | flags = XE_PDE_64K; |
dd08ebf6 | 203 | |
0e5e77bd | 204 | entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) * |
e814389f | 205 | XE_PAGE_SIZE, pat_index); |
58e19acf | 206 | xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, |
dd08ebf6 MB |
207 | entry | flags); |
208 | } | |
209 | ||
210 | /* Write PDE's that point to our BO. */ | |
211 | for (i = 0; i < num_entries - num_level; i++) { | |
0e5e77bd | 212 | entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE, |
e814389f | 213 | pat_index); |
dd08ebf6 | 214 | |
58e19acf | 215 | xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + |
dd08ebf6 MB |
216 | (i + 1) * 8, u64, entry); |
217 | } | |
218 | ||
9116eabb HPG |
219 | /* Set up a 1GiB NULL mapping at 255GiB offset. */ |
220 | level = 2; | |
221 | xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64, | |
222 | vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | |
223 | | XE_PTE_NULL); | |
224 | m->cleared_mem_ofs = (255ULL << xe_pt_shift(level)); | |
225 | ||
dd08ebf6 MB |
226 | /* Identity map the entire vram at 256GiB offset */ |
227 | if (IS_DGFX(xe)) { | |
228 | u64 pos, ofs, flags; | |
229 | ||
230 | level = 2; | |
58e19acf | 231 | ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; |
e814389f | 232 | flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, |
fcd75139 | 233 | true, 0); |
dd08ebf6 MB |
234 | |
235 | /* | |
236 | * Use 1GB pages, it shouldn't matter the physical amount of | |
237 | * vram is less, when we don't access it. | |
238 | */ | |
d9e85dd5 DK |
239 | for (pos = xe->mem.vram.dpa_base; |
240 | pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base; | |
241 | pos += SZ_1G, ofs += 8) | |
dd08ebf6 MB |
242 | xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); |
243 | } | |
244 | ||
245 | /* | |
246 | * Example layout created above, with root level = 3: | |
247 | * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's | |
248 | * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's | |
249 | * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's | |
250 | * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2] | |
251 | * | |
252 | * This makes the lowest part of the VM point to the pagetables. | |
253 | * Hence the lowest 2M in the vm should point to itself, with a few writes | |
254 | * and flushes, other parts of the VM can be used either for copying and | |
255 | * clearing. | |
256 | * | |
257 | * For performance, the kernel reserves PDE's, so about 20 are left | |
258 | * for async VM updates. | |
259 | * | |
260 | * To make it easier to work, each scratch PT is put in slot (1 + PT #) | |
261 | * everywhere, this allows lockless updates to scratch pages by using | |
262 | * the different addresses in VM. | |
263 | */ | |
264 | #define NUM_VMUSA_UNIT_PER_PAGE 32 | |
58e19acf | 265 | #define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) |
dd08ebf6 MB |
266 | #define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) |
267 | drm_suballoc_manager_init(&m->vm_update_sa, | |
58e19acf | 268 | (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * |
dd08ebf6 MB |
269 | NUM_VMUSA_UNIT_PER_PAGE, 0); |
270 | ||
271 | m->pt_bo = bo; | |
272 | return 0; | |
273 | } | |
274 | ||
a043fbab NV |
275 | /* |
276 | * Due to workaround 16017236439, odd instance hardware copy engines are | |
277 | * faster than even instance ones. | |
278 | * This function returns the mask involving all fast copy engines and the | |
279 | * reserved copy engine to be used as logical mask for migrate engine. | |
280 | * Including the reserved copy engine is required to avoid deadlocks due to | |
281 | * migrate jobs servicing the faults gets stuck behind the job that faulted. | |
282 | */ | |
283 | static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) | |
284 | { | |
285 | u32 logical_mask = 0; | |
286 | struct xe_hw_engine *hwe; | |
287 | enum xe_hw_engine_id id; | |
288 | ||
289 | for_each_hw_engine(hwe, gt, id) { | |
290 | if (hwe->class != XE_ENGINE_CLASS_COPY) | |
291 | continue; | |
292 | ||
293 | if (!XE_WA(gt, 16017236439) || | |
294 | xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1) | |
295 | logical_mask |= BIT(hwe->logical_instance); | |
296 | } | |
297 | ||
298 | return logical_mask; | |
299 | } | |
300 | ||
e9d285ff TH |
301 | /** |
302 | * xe_migrate_init() - Initialize a migrate context | |
08dea767 | 303 | * @tile: Back-pointer to the tile we're initializing for. |
e9d285ff TH |
304 | * |
305 | * Return: Pointer to a migrate context on success. Error pointer on error. | |
306 | */ | |
08dea767 | 307 | struct xe_migrate *xe_migrate_init(struct xe_tile *tile) |
dd08ebf6 | 308 | { |
08dea767 | 309 | struct xe_device *xe = tile_to_xe(tile); |
f6929e80 | 310 | struct xe_gt *primary_gt = tile->primary_gt; |
dd08ebf6 MB |
311 | struct xe_migrate *m; |
312 | struct xe_vm *vm; | |
dd08ebf6 MB |
313 | int err; |
314 | ||
dd08ebf6 MB |
315 | m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); |
316 | if (!m) | |
317 | return ERR_PTR(-ENOMEM); | |
318 | ||
08dea767 | 319 | m->tile = tile; |
dd08ebf6 MB |
320 | |
321 | /* Special layout, prepared below.. */ | |
322 | vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | | |
08dea767 | 323 | XE_VM_FLAG_SET_TILE_ID(tile)); |
dd08ebf6 MB |
324 | if (IS_ERR(vm)) |
325 | return ERR_CAST(vm); | |
326 | ||
d00e9cc2 | 327 | xe_vm_lock(vm, false); |
08dea767 | 328 | err = xe_migrate_prepare_vm(tile, m, vm); |
d00e9cc2 | 329 | xe_vm_unlock(vm); |
dd08ebf6 MB |
330 | if (err) { |
331 | xe_vm_close_and_put(vm); | |
332 | return ERR_PTR(err); | |
333 | } | |
334 | ||
5a92da34 | 335 | if (xe->info.has_usm) { |
08dea767 | 336 | struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, |
dd08ebf6 | 337 | XE_ENGINE_CLASS_COPY, |
08dea767 | 338 | primary_gt->usm.reserved_bcs_instance, |
dd08ebf6 | 339 | false); |
a043fbab NV |
340 | u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt); |
341 | ||
342 | if (!hwe || !logical_mask) | |
dd08ebf6 MB |
343 | return ERR_PTR(-EINVAL); |
344 | ||
a043fbab | 345 | m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, |
923e4238 | 346 | EXEC_QUEUE_FLAG_KERNEL | |
a8004af3 BW |
347 | EXEC_QUEUE_FLAG_PERMANENT | |
348 | EXEC_QUEUE_FLAG_HIGH_PRIORITY); | |
dd08ebf6 | 349 | } else { |
9b9529ce FD |
350 | m->q = xe_exec_queue_create_class(xe, primary_gt, vm, |
351 | XE_ENGINE_CLASS_COPY, | |
923e4238 DCS |
352 | EXEC_QUEUE_FLAG_KERNEL | |
353 | EXEC_QUEUE_FLAG_PERMANENT); | |
dd08ebf6 | 354 | } |
9b9529ce | 355 | if (IS_ERR(m->q)) { |
dd08ebf6 | 356 | xe_vm_close_and_put(vm); |
9b9529ce | 357 | return ERR_CAST(m->q); |
dd08ebf6 MB |
358 | } |
359 | ||
360 | mutex_init(&m->job_mutex); | |
361 | ||
362 | err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); | |
363 | if (err) | |
364 | return ERR_PTR(err); | |
365 | ||
366 | return m; | |
367 | } | |
368 | ||
09427526 HPG |
369 | static u64 max_mem_transfer_per_pass(struct xe_device *xe) |
370 | { | |
371 | if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe)) | |
372 | return MAX_CCS_LIMITED_TRANSFER; | |
373 | ||
374 | return MAX_PREEMPTDISABLE_TRANSFER; | |
375 | } | |
376 | ||
377 | static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur) | |
dd08ebf6 MB |
378 | { |
379 | /* | |
380 | * For VRAM we use identity mapped pages so we are limited to current | |
381 | * cursor size. For system we program the pages ourselves so we have no | |
382 | * such limitation. | |
383 | */ | |
09427526 | 384 | return min_t(u64, max_mem_transfer_per_pass(xe), |
dd08ebf6 MB |
385 | mem_type_is_vram(cur->mem_type) ? cur->size : |
386 | cur->remaining); | |
387 | } | |
388 | ||
389 | static u32 pte_update_size(struct xe_migrate *m, | |
390 | bool is_vram, | |
c33a7219 | 391 | struct ttm_resource *res, |
dd08ebf6 MB |
392 | struct xe_res_cursor *cur, |
393 | u64 *L0, u64 *L0_ofs, u32 *L0_pt, | |
394 | u32 cmd_size, u32 pt_ofs, u32 avail_pts) | |
395 | { | |
396 | u32 cmds = 0; | |
397 | ||
398 | *L0_pt = pt_ofs; | |
399 | if (!is_vram) { | |
400 | /* Clip L0 to available size */ | |
401 | u64 size = min(*L0, (u64)avail_pts * SZ_2M); | |
58e19acf | 402 | u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); |
dd08ebf6 MB |
403 | |
404 | *L0 = size; | |
405 | *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); | |
406 | ||
407 | /* MI_STORE_DATA_IMM */ | |
408 | cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff); | |
409 | ||
410 | /* PDE qwords */ | |
411 | cmds += num_4k_pages * 2; | |
412 | ||
413 | /* Each chunk has a single blit command */ | |
414 | cmds += cmd_size; | |
415 | } else { | |
416 | /* Offset into identity map. */ | |
d9e85dd5 DK |
417 | *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), |
418 | cur->start + vram_region_gpu_offset(res)); | |
dd08ebf6 MB |
419 | cmds += cmd_size; |
420 | } | |
421 | ||
422 | return cmds; | |
423 | } | |
424 | ||
425 | static void emit_pte(struct xe_migrate *m, | |
426 | struct xe_bb *bb, u32 at_pt, | |
65ef8dba | 427 | bool is_vram, bool is_comp_pte, |
dd08ebf6 MB |
428 | struct xe_res_cursor *cur, |
429 | u32 size, struct xe_bo *bo) | |
430 | { | |
65ef8dba HPG |
431 | struct xe_device *xe = tile_to_xe(m->tile); |
432 | ||
433 | u16 pat_index; | |
dd08ebf6 | 434 | u32 ptes; |
58e19acf | 435 | u64 ofs = at_pt * XE_PAGE_SIZE; |
dd08ebf6 MB |
436 | u64 cur_ofs; |
437 | ||
65ef8dba HPG |
438 | /* Indirect access needs compression enabled uncached PAT index */ |
439 | if (GRAPHICS_VERx100(xe) >= 2000) | |
440 | pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] : | |
441 | xe->pat.idx[XE_CACHE_NONE]; | |
442 | else | |
443 | pat_index = xe->pat.idx[XE_CACHE_WB]; | |
444 | ||
dd08ebf6 MB |
445 | /* |
446 | * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently | |
447 | * we're only emitting VRAM PTEs during sanity tests, so when | |
448 | * that's moved to a Kunit test, we should condition VRAM PTEs | |
449 | * on running tests. | |
450 | */ | |
451 | ||
58e19acf | 452 | ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); |
dd08ebf6 MB |
453 | |
454 | while (ptes) { | |
455 | u32 chunk = min(0x1ffU, ptes); | |
456 | ||
14a1e6a4 | 457 | bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); |
dd08ebf6 MB |
458 | bb->cs[bb->len++] = ofs; |
459 | bb->cs[bb->len++] = 0; | |
460 | ||
461 | cur_ofs = ofs; | |
462 | ofs += chunk * 8; | |
463 | ptes -= chunk; | |
464 | ||
465 | while (chunk--) { | |
23c8495e LDM |
466 | u64 addr, flags = 0; |
467 | bool devmem = false; | |
dd08ebf6 | 468 | |
e89b384c | 469 | addr = xe_res_dma(cur) & PAGE_MASK; |
dd08ebf6 | 470 | if (is_vram) { |
dd08ebf6 | 471 | /* Is this a 64K PTE entry? */ |
9b9529ce | 472 | if ((m->q->vm->flags & XE_VM_FLAG_64K) && |
dd08ebf6 | 473 | !(cur_ofs & (16 * 8 - 1))) { |
c73acc1e | 474 | xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K)); |
23c8495e | 475 | flags |= XE_PTE_PS64; |
dd08ebf6 MB |
476 | } |
477 | ||
fb31517c | 478 | addr += vram_region_gpu_offset(bo->ttm.resource); |
23c8495e | 479 | devmem = true; |
dd08ebf6 | 480 | } |
23c8495e | 481 | |
fcd75139 | 482 | addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, |
e814389f | 483 | addr, pat_index, |
23c8495e | 484 | 0, devmem, flags); |
dd08ebf6 MB |
485 | bb->cs[bb->len++] = lower_32_bits(addr); |
486 | bb->cs[bb->len++] = upper_32_bits(addr); | |
487 | ||
72e8d73b | 488 | xe_res_next(cur, min_t(u32, size, PAGE_SIZE)); |
dd08ebf6 MB |
489 | cur_ofs += 8; |
490 | } | |
491 | } | |
492 | } | |
493 | ||
494 | #define EMIT_COPY_CCS_DW 5 | |
495 | static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, | |
496 | u64 dst_ofs, bool dst_is_indirect, | |
497 | u64 src_ofs, bool src_is_indirect, | |
498 | u32 size) | |
499 | { | |
30603b5b | 500 | struct xe_device *xe = gt_to_xe(gt); |
dd08ebf6 MB |
501 | u32 *cs = bb->cs + bb->len; |
502 | u32 num_ccs_blks; | |
9cca4902 HPG |
503 | u32 num_pages; |
504 | u32 ccs_copy_size; | |
30603b5b | 505 | u32 mocs; |
dd08ebf6 | 506 | |
9cca4902 HPG |
507 | if (GRAPHICS_VERx100(xe) >= 2000) { |
508 | num_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); | |
509 | xe_gt_assert(gt, FIELD_FIT(XE2_CCS_SIZE_MASK, num_pages - 1)); | |
30603b5b | 510 | |
9cca4902 | 511 | ccs_copy_size = REG_FIELD_PREP(XE2_CCS_SIZE_MASK, num_pages - 1); |
30603b5b | 512 | mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index); |
9cca4902 HPG |
513 | |
514 | } else { | |
515 | num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), | |
516 | NUM_CCS_BYTES_PER_BLOCK); | |
517 | xe_gt_assert(gt, FIELD_FIT(CCS_SIZE_MASK, num_ccs_blks - 1)); | |
518 | ||
519 | ccs_copy_size = REG_FIELD_PREP(CCS_SIZE_MASK, num_ccs_blks - 1); | |
30603b5b | 520 | mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index); |
9cca4902 | 521 | } |
30603b5b | 522 | |
dd08ebf6 MB |
523 | *cs++ = XY_CTRL_SURF_COPY_BLT | |
524 | (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | | |
525 | (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | | |
9cca4902 | 526 | ccs_copy_size; |
dd08ebf6 | 527 | *cs++ = lower_32_bits(src_ofs); |
30603b5b | 528 | *cs++ = upper_32_bits(src_ofs) | mocs; |
dd08ebf6 | 529 | *cs++ = lower_32_bits(dst_ofs); |
30603b5b | 530 | *cs++ = upper_32_bits(dst_ofs) | mocs; |
dd08ebf6 MB |
531 | |
532 | bb->len = cs - bb->cs; | |
533 | } | |
534 | ||
535 | #define EMIT_COPY_DW 10 | |
536 | static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, | |
537 | u64 src_ofs, u64 dst_ofs, unsigned int size, | |
3e8e7ee6 | 538 | unsigned int pitch) |
dd08ebf6 | 539 | { |
4bdd8c2e | 540 | struct xe_device *xe = gt_to_xe(gt); |
30603b5b HK |
541 | u32 mocs = 0; |
542 | u32 tile_y = 0; | |
4bdd8c2e | 543 | |
c73acc1e FD |
544 | xe_gt_assert(gt, size / pitch <= S16_MAX); |
545 | xe_gt_assert(gt, pitch / 4 <= S16_MAX); | |
546 | xe_gt_assert(gt, pitch <= U16_MAX); | |
dd08ebf6 | 547 | |
30603b5b HK |
548 | if (GRAPHICS_VER(xe) >= 20) |
549 | mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index); | |
550 | ||
4bdd8c2e | 551 | if (GRAPHICS_VERx100(xe) >= 1250) |
30603b5b HK |
552 | tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4; |
553 | ||
554 | bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); | |
555 | bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs; | |
dd08ebf6 MB |
556 | bb->cs[bb->len++] = 0; |
557 | bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; | |
558 | bb->cs[bb->len++] = lower_32_bits(dst_ofs); | |
559 | bb->cs[bb->len++] = upper_32_bits(dst_ofs); | |
560 | bb->cs[bb->len++] = 0; | |
30603b5b | 561 | bb->cs[bb->len++] = pitch | mocs; |
dd08ebf6 MB |
562 | bb->cs[bb->len++] = lower_32_bits(src_ofs); |
563 | bb->cs[bb->len++] = upper_32_bits(src_ofs); | |
564 | } | |
565 | ||
566 | static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, | |
567 | enum dma_resv_usage usage) | |
568 | { | |
569 | return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); | |
570 | } | |
571 | ||
572 | static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) | |
573 | { | |
574 | return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; | |
575 | } | |
576 | ||
577 | static u32 xe_migrate_ccs_copy(struct xe_migrate *m, | |
578 | struct xe_bb *bb, | |
266c8588 HPG |
579 | u64 src_ofs, bool src_is_indirect, |
580 | u64 dst_ofs, bool dst_is_indirect, u32 dst_size, | |
dd08ebf6 MB |
581 | u64 ccs_ofs, bool copy_ccs) |
582 | { | |
f6929e80 | 583 | struct xe_gt *gt = m->tile->primary_gt; |
dd08ebf6 MB |
584 | u32 flush_flags = 0; |
585 | ||
266c8588 | 586 | if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) { |
dd08ebf6 | 587 | /* |
a2f9f4ff MA |
588 | * If the src is already in vram, then it should already |
589 | * have been cleared by us, or has been populated by the | |
590 | * user. Make sure we copy the CCS aux state as-is. | |
591 | * | |
592 | * Otherwise if the bo doesn't have any CCS metadata attached, | |
593 | * we still need to clear it for security reasons. | |
dd08ebf6 | 594 | */ |
266c8588 | 595 | u64 ccs_src_ofs = src_is_indirect ? src_ofs : m->cleared_mem_ofs; |
a2f9f4ff MA |
596 | |
597 | emit_copy_ccs(gt, bb, | |
598 | dst_ofs, true, | |
266c8588 | 599 | ccs_src_ofs, src_is_indirect, dst_size); |
a2f9f4ff | 600 | |
dd08ebf6 MB |
601 | flush_flags = MI_FLUSH_DW_CCS; |
602 | } else if (copy_ccs) { | |
266c8588 | 603 | if (!src_is_indirect) |
dd08ebf6 | 604 | src_ofs = ccs_ofs; |
266c8588 | 605 | else if (!dst_is_indirect) |
dd08ebf6 MB |
606 | dst_ofs = ccs_ofs; |
607 | ||
266c8588 | 608 | xe_gt_assert(gt, src_is_indirect || dst_is_indirect); |
dd08ebf6 | 609 | |
266c8588 HPG |
610 | emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs, |
611 | src_is_indirect, dst_size); | |
612 | if (dst_is_indirect) | |
dd08ebf6 MB |
613 | flush_flags = MI_FLUSH_DW_CCS; |
614 | } | |
615 | ||
616 | return flush_flags; | |
617 | } | |
618 | ||
e9d285ff TH |
619 | /** |
620 | * xe_migrate_copy() - Copy content of TTM resources. | |
621 | * @m: The migration context. | |
3690a01b TH |
622 | * @src_bo: The buffer object @src is currently bound to. |
623 | * @dst_bo: If copying between resources created for the same bo, set this to | |
624 | * the same value as @src_bo. If copying between buffer objects, set it to | |
625 | * the buffer object @dst is currently bound to. | |
e9d285ff TH |
626 | * @src: The source TTM resource. |
627 | * @dst: The dst TTM resource. | |
266c8588 | 628 | * @copy_only_ccs: If true copy only CCS metadata |
e9d285ff TH |
629 | * |
630 | * Copies the contents of @src to @dst: On flat CCS devices, | |
631 | * the CCS metadata is copied as well if needed, or if not present, | |
632 | * the CCS metadata of @dst is cleared for security reasons. | |
e9d285ff TH |
633 | * |
634 | * Return: Pointer to a dma_fence representing the last copy batch, or | |
635 | * an error pointer on failure. If there is a failure, any copy operation | |
636 | * started by the function call has been synced. | |
637 | */ | |
dd08ebf6 | 638 | struct dma_fence *xe_migrate_copy(struct xe_migrate *m, |
3690a01b TH |
639 | struct xe_bo *src_bo, |
640 | struct xe_bo *dst_bo, | |
dd08ebf6 | 641 | struct ttm_resource *src, |
266c8588 HPG |
642 | struct ttm_resource *dst, |
643 | bool copy_only_ccs) | |
dd08ebf6 | 644 | { |
f6929e80 | 645 | struct xe_gt *gt = m->tile->primary_gt; |
dd08ebf6 MB |
646 | struct xe_device *xe = gt_to_xe(gt); |
647 | struct dma_fence *fence = NULL; | |
3690a01b | 648 | u64 size = src_bo->size; |
dd08ebf6 MB |
649 | struct xe_res_cursor src_it, dst_it, ccs_it; |
650 | u64 src_L0_ofs, dst_L0_ofs; | |
651 | u32 src_L0_pt, dst_L0_pt; | |
652 | u64 src_L0, dst_L0; | |
653 | int pass = 0; | |
654 | int err; | |
266c8588 HPG |
655 | bool src_is_pltt = src->mem_type == XE_PL_TT; |
656 | bool dst_is_pltt = dst->mem_type == XE_PL_TT; | |
dd08ebf6 MB |
657 | bool src_is_vram = mem_type_is_vram(src->mem_type); |
658 | bool dst_is_vram = mem_type_is_vram(dst->mem_type); | |
3690a01b TH |
659 | bool copy_ccs = xe_device_has_flat_ccs(xe) && |
660 | xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo); | |
dd08ebf6 MB |
661 | bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); |
662 | ||
3690a01b TH |
663 | /* Copying CCS between two different BOs is not supported yet. */ |
664 | if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) | |
665 | return ERR_PTR(-EINVAL); | |
666 | ||
667 | if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) | |
668 | return ERR_PTR(-EINVAL); | |
669 | ||
dd08ebf6 | 670 | if (!src_is_vram) |
a21fe5ee | 671 | xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); |
dd08ebf6 | 672 | else |
e89b384c | 673 | xe_res_first(src, 0, size, &src_it); |
dd08ebf6 | 674 | if (!dst_is_vram) |
a21fe5ee | 675 | xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it); |
dd08ebf6 | 676 | else |
e89b384c | 677 | xe_res_first(dst, 0, size, &dst_it); |
dd08ebf6 MB |
678 | |
679 | if (copy_system_ccs) | |
a21fe5ee | 680 | xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo), |
dd08ebf6 MB |
681 | PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), |
682 | &ccs_it); | |
683 | ||
684 | while (size) { | |
685 | u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ | |
686 | struct xe_sched_job *job; | |
687 | struct xe_bb *bb; | |
688 | u32 flush_flags; | |
689 | u32 update_idx; | |
690 | u64 ccs_ofs, ccs_size; | |
691 | u32 ccs_pt; | |
09427526 | 692 | |
5a92da34 | 693 | bool usm = xe->info.has_usm; |
09427526 | 694 | u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; |
dd08ebf6 | 695 | |
09427526 HPG |
696 | src_L0 = xe_migrate_res_sizes(xe, &src_it); |
697 | dst_L0 = xe_migrate_res_sizes(xe, &dst_it); | |
dd08ebf6 MB |
698 | |
699 | drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", | |
700 | pass++, src_L0, dst_L0); | |
701 | ||
702 | src_L0 = min(src_L0, dst_L0); | |
703 | ||
c33a7219 | 704 | batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0, |
dd08ebf6 | 705 | &src_L0_ofs, &src_L0_pt, 0, 0, |
09427526 | 706 | avail_pts); |
dd08ebf6 | 707 | |
c33a7219 | 708 | batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0, |
dd08ebf6 | 709 | &dst_L0_ofs, &dst_L0_pt, 0, |
09427526 | 710 | avail_pts, avail_pts); |
dd08ebf6 MB |
711 | |
712 | if (copy_system_ccs) { | |
713 | ccs_size = xe_device_ccs_bytes(xe, src_L0); | |
c33a7219 | 714 | batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size, |
dd08ebf6 | 715 | &ccs_ofs, &ccs_pt, 0, |
09427526 HPG |
716 | 2 * avail_pts, |
717 | avail_pts); | |
dd08ebf6 MB |
718 | } |
719 | ||
720 | /* Add copy commands size here */ | |
266c8588 HPG |
721 | batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + |
722 | ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0)); | |
dd08ebf6 MB |
723 | |
724 | bb = xe_bb_new(gt, batch_size, usm); | |
725 | if (IS_ERR(bb)) { | |
726 | err = PTR_ERR(bb); | |
727 | goto err_sync; | |
728 | } | |
729 | ||
dd08ebf6 | 730 | if (!src_is_vram) |
65ef8dba | 731 | emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, |
3690a01b | 732 | src_bo); |
dd08ebf6 MB |
733 | else |
734 | xe_res_next(&src_it, src_L0); | |
735 | ||
736 | if (!dst_is_vram) | |
65ef8dba | 737 | emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, |
3690a01b | 738 | dst_bo); |
dd08ebf6 MB |
739 | else |
740 | xe_res_next(&dst_it, src_L0); | |
741 | ||
742 | if (copy_system_ccs) | |
65ef8dba | 743 | emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo); |
dd08ebf6 MB |
744 | |
745 | bb->cs[bb->len++] = MI_BATCH_BUFFER_END; | |
746 | update_idx = bb->len; | |
747 | ||
266c8588 HPG |
748 | if (!copy_only_ccs) |
749 | emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE); | |
750 | ||
751 | flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, | |
752 | IS_DGFX(xe) ? src_is_vram : src_is_pltt, | |
753 | dst_L0_ofs, | |
754 | IS_DGFX(xe) ? dst_is_vram : dst_is_pltt, | |
dd08ebf6 MB |
755 | src_L0, ccs_ofs, copy_ccs); |
756 | ||
757 | mutex_lock(&m->job_mutex); | |
9b9529ce | 758 | job = xe_bb_create_migration_job(m->q, bb, |
dd08ebf6 MB |
759 | xe_migrate_batch_base(m, usm), |
760 | update_idx); | |
761 | if (IS_ERR(job)) { | |
762 | err = PTR_ERR(job); | |
763 | goto err; | |
764 | } | |
765 | ||
766 | xe_sched_job_add_migrate_flush(job, flush_flags); | |
767 | if (!fence) { | |
3690a01b | 768 | err = job_add_deps(job, src_bo->ttm.base.resv, |
dd08ebf6 | 769 | DMA_RESV_USAGE_BOOKKEEP); |
3690a01b TH |
770 | if (!err && src_bo != dst_bo) |
771 | err = job_add_deps(job, dst_bo->ttm.base.resv, | |
772 | DMA_RESV_USAGE_BOOKKEEP); | |
dd08ebf6 MB |
773 | if (err) |
774 | goto err_job; | |
775 | } | |
776 | ||
777 | xe_sched_job_arm(job); | |
778 | dma_fence_put(fence); | |
779 | fence = dma_fence_get(&job->drm.s_fence->finished); | |
780 | xe_sched_job_push(job); | |
781 | ||
782 | dma_fence_put(m->fence); | |
783 | m->fence = dma_fence_get(fence); | |
784 | ||
785 | mutex_unlock(&m->job_mutex); | |
786 | ||
787 | xe_bb_free(bb, fence); | |
788 | size -= src_L0; | |
789 | continue; | |
790 | ||
791 | err_job: | |
792 | xe_sched_job_put(job); | |
793 | err: | |
794 | mutex_unlock(&m->job_mutex); | |
795 | xe_bb_free(bb, NULL); | |
796 | ||
797 | err_sync: | |
e9d285ff | 798 | /* Sync partial copy if any. FIXME: under job_mutex? */ |
dd08ebf6 MB |
799 | if (fence) { |
800 | dma_fence_wait(fence, false); | |
801 | dma_fence_put(fence); | |
802 | } | |
803 | ||
804 | return ERR_PTR(err); | |
805 | } | |
806 | ||
807 | return fence; | |
808 | } | |
809 | ||
11a2407e BV |
810 | static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, |
811 | u32 size, u32 pitch) | |
dd08ebf6 | 812 | { |
30603b5b | 813 | struct xe_device *xe = gt_to_xe(gt); |
11a2407e | 814 | u32 *cs = bb->cs + bb->len; |
11a2407e BV |
815 | u32 len = PVC_MEM_SET_CMD_LEN_DW; |
816 | ||
c690f0e6 | 817 | *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2); |
11a2407e BV |
818 | *cs++ = pitch - 1; |
819 | *cs++ = (size / pitch) - 1; | |
820 | *cs++ = pitch - 1; | |
821 | *cs++ = lower_32_bits(src_ofs); | |
822 | *cs++ = upper_32_bits(src_ofs); | |
30603b5b HK |
823 | if (GRAPHICS_VERx100(xe) >= 2000) |
824 | *cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); | |
825 | else | |
826 | *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); | |
11a2407e | 827 | |
c73acc1e | 828 | xe_gt_assert(gt, cs - bb->cs == len + bb->len); |
11a2407e BV |
829 | |
830 | bb->len += len; | |
831 | } | |
832 | ||
833 | static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, | |
834 | u64 src_ofs, u32 size, u32 pitch, bool is_vram) | |
835 | { | |
836 | struct xe_device *xe = gt_to_xe(gt); | |
dd08ebf6 MB |
837 | u32 *cs = bb->cs + bb->len; |
838 | u32 len = XY_FAST_COLOR_BLT_DW; | |
dd08ebf6 | 839 | |
11a2407e | 840 | if (GRAPHICS_VERx100(xe) < 1250) |
dd08ebf6 MB |
841 | len = 11; |
842 | ||
843 | *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | | |
844 | (len - 2); | |
30603b5b HK |
845 | if (GRAPHICS_VERx100(xe) >= 2000) |
846 | *cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) | | |
847 | (pitch - 1); | |
848 | else | |
849 | *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) | | |
850 | (pitch - 1); | |
dd08ebf6 MB |
851 | *cs++ = 0; |
852 | *cs++ = (size / pitch) << 16 | pitch / 4; | |
853 | *cs++ = lower_32_bits(src_ofs); | |
854 | *cs++ = upper_32_bits(src_ofs); | |
855 | *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; | |
11a2407e | 856 | *cs++ = 0; |
dd08ebf6 MB |
857 | *cs++ = 0; |
858 | *cs++ = 0; | |
859 | *cs++ = 0; | |
860 | ||
861 | if (len > 11) { | |
862 | *cs++ = 0; | |
863 | *cs++ = 0; | |
864 | *cs++ = 0; | |
865 | *cs++ = 0; | |
866 | *cs++ = 0; | |
867 | } | |
868 | ||
c73acc1e | 869 | xe_gt_assert(gt, cs - bb->cs == len + bb->len); |
11a2407e | 870 | |
dd08ebf6 | 871 | bb->len += len; |
11a2407e BV |
872 | } |
873 | ||
1951dad5 | 874 | static bool has_service_copy_support(struct xe_gt *gt) |
11a2407e | 875 | { |
1951dad5 MR |
876 | /* |
877 | * What we care about is whether the architecture was designed with | |
878 | * service copy functionality (specifically the new MEM_SET / MEM_COPY | |
879 | * instructions) so check the architectural engine list rather than the | |
880 | * actual list since these instructions are usable on BCS0 even if | |
881 | * all of the actual service copy engines (BCS1-BCS8) have been fused | |
882 | * off. | |
883 | */ | |
884 | return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8, | |
885 | XE_HW_ENGINE_BCS1); | |
886 | } | |
887 | ||
888 | static u32 emit_clear_cmd_len(struct xe_gt *gt) | |
889 | { | |
890 | if (has_service_copy_support(gt)) | |
11a2407e BV |
891 | return PVC_MEM_SET_CMD_LEN_DW; |
892 | else | |
893 | return XY_FAST_COLOR_BLT_DW; | |
894 | } | |
895 | ||
1951dad5 MR |
896 | static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, |
897 | u32 size, u32 pitch, bool is_vram) | |
11a2407e | 898 | { |
1951dad5 | 899 | if (has_service_copy_support(gt)) |
11a2407e | 900 | emit_clear_link_copy(gt, bb, src_ofs, size, pitch); |
1951dad5 | 901 | else |
11a2407e BV |
902 | emit_clear_main_copy(gt, bb, src_ofs, size, pitch, |
903 | is_vram); | |
dd08ebf6 MB |
904 | } |
905 | ||
e9d285ff TH |
906 | /** |
907 | * xe_migrate_clear() - Copy content of TTM resources. | |
908 | * @m: The migration context. | |
909 | * @bo: The buffer object @dst is currently bound to. | |
910 | * @dst: The dst TTM resource to be cleared. | |
e9d285ff | 911 | * |
11a2407e BV |
912 | * Clear the contents of @dst to zero. On flat CCS devices, |
913 | * the CCS metadata is cleared to zero as well on VRAM destinations. | |
e9d285ff TH |
914 | * TODO: Eliminate the @bo argument. |
915 | * | |
916 | * Return: Pointer to a dma_fence representing the last clear batch, or | |
917 | * an error pointer on failure. If there is a failure, any clear operation | |
918 | * started by the function call has been synced. | |
919 | */ | |
dd08ebf6 MB |
920 | struct dma_fence *xe_migrate_clear(struct xe_migrate *m, |
921 | struct xe_bo *bo, | |
11a2407e | 922 | struct ttm_resource *dst) |
dd08ebf6 MB |
923 | { |
924 | bool clear_vram = mem_type_is_vram(dst->mem_type); | |
f6929e80 | 925 | struct xe_gt *gt = m->tile->primary_gt; |
dd08ebf6 | 926 | struct xe_device *xe = gt_to_xe(gt); |
266c8588 | 927 | bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; |
dd08ebf6 MB |
928 | struct dma_fence *fence = NULL; |
929 | u64 size = bo->size; | |
930 | struct xe_res_cursor src_it; | |
931 | struct ttm_resource *src = dst; | |
932 | int err; | |
933 | int pass = 0; | |
934 | ||
935 | if (!clear_vram) | |
a21fe5ee | 936 | xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); |
dd08ebf6 MB |
937 | else |
938 | xe_res_first(src, 0, bo->size, &src_it); | |
939 | ||
940 | while (size) { | |
941 | u64 clear_L0_ofs; | |
942 | u32 clear_L0_pt; | |
943 | u32 flush_flags = 0; | |
944 | u64 clear_L0; | |
945 | struct xe_sched_job *job; | |
946 | struct xe_bb *bb; | |
947 | u32 batch_size, update_idx; | |
09427526 | 948 | |
5a92da34 | 949 | bool usm = xe->info.has_usm; |
09427526 HPG |
950 | u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; |
951 | ||
952 | clear_L0 = xe_migrate_res_sizes(xe, &src_it); | |
dd08ebf6 | 953 | |
dd08ebf6 MB |
954 | drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); |
955 | ||
956 | /* Calculate final sizes and batch size.. */ | |
957 | batch_size = 2 + | |
c33a7219 | 958 | pte_update_size(m, clear_vram, src, &src_it, |
dd08ebf6 | 959 | &clear_L0, &clear_L0_ofs, &clear_L0_pt, |
266c8588 | 960 | clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, |
09427526 | 961 | avail_pts); |
266c8588 HPG |
962 | |
963 | if (xe_device_has_flat_ccs(xe)) | |
dd08ebf6 MB |
964 | batch_size += EMIT_COPY_CCS_DW; |
965 | ||
966 | /* Clear commands */ | |
967 | ||
968 | if (WARN_ON_ONCE(!clear_L0)) | |
969 | break; | |
970 | ||
971 | bb = xe_bb_new(gt, batch_size, usm); | |
972 | if (IS_ERR(bb)) { | |
973 | err = PTR_ERR(bb); | |
974 | goto err_sync; | |
975 | } | |
976 | ||
977 | size -= clear_L0; | |
dd08ebf6 MB |
978 | /* Preemption is enabled again by the ring ops. */ |
979 | if (!clear_vram) { | |
65ef8dba | 980 | emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, |
dd08ebf6 MB |
981 | bo); |
982 | } else { | |
983 | xe_res_next(&src_it, clear_L0); | |
984 | } | |
985 | bb->cs[bb->len++] = MI_BATCH_BUFFER_END; | |
986 | update_idx = bb->len; | |
987 | ||
266c8588 HPG |
988 | if (!clear_system_ccs) |
989 | emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); | |
990 | ||
991 | if (xe_device_has_flat_ccs(xe)) { | |
dd08ebf6 | 992 | emit_copy_ccs(gt, bb, clear_L0_ofs, true, |
9116eabb | 993 | m->cleared_mem_ofs, false, clear_L0); |
dd08ebf6 MB |
994 | flush_flags = MI_FLUSH_DW_CCS; |
995 | } | |
996 | ||
997 | mutex_lock(&m->job_mutex); | |
9b9529ce | 998 | job = xe_bb_create_migration_job(m->q, bb, |
dd08ebf6 MB |
999 | xe_migrate_batch_base(m, usm), |
1000 | update_idx); | |
1001 | if (IS_ERR(job)) { | |
1002 | err = PTR_ERR(job); | |
1003 | goto err; | |
1004 | } | |
1005 | ||
1006 | xe_sched_job_add_migrate_flush(job, flush_flags); | |
a667cf56 MA |
1007 | if (!fence) { |
1008 | /* | |
1009 | * There can't be anything userspace related at this | |
1010 | * point, so we just need to respect any potential move | |
1011 | * fences, which are always tracked as | |
1012 | * DMA_RESV_USAGE_KERNEL. | |
1013 | */ | |
1014 | err = job_add_deps(job, bo->ttm.base.resv, | |
1015 | DMA_RESV_USAGE_KERNEL); | |
1016 | if (err) | |
1017 | goto err_job; | |
1018 | } | |
dd08ebf6 MB |
1019 | |
1020 | xe_sched_job_arm(job); | |
1021 | dma_fence_put(fence); | |
1022 | fence = dma_fence_get(&job->drm.s_fence->finished); | |
1023 | xe_sched_job_push(job); | |
1024 | ||
1025 | dma_fence_put(m->fence); | |
1026 | m->fence = dma_fence_get(fence); | |
1027 | ||
1028 | mutex_unlock(&m->job_mutex); | |
1029 | ||
1030 | xe_bb_free(bb, fence); | |
1031 | continue; | |
1032 | ||
a667cf56 MA |
1033 | err_job: |
1034 | xe_sched_job_put(job); | |
dd08ebf6 MB |
1035 | err: |
1036 | mutex_unlock(&m->job_mutex); | |
1037 | xe_bb_free(bb, NULL); | |
1038 | err_sync: | |
e9d285ff | 1039 | /* Sync partial copies if any. FIXME: job_mutex? */ |
dd08ebf6 MB |
1040 | if (fence) { |
1041 | dma_fence_wait(m->fence, false); | |
1042 | dma_fence_put(fence); | |
1043 | } | |
1044 | ||
1045 | return ERR_PTR(err); | |
1046 | } | |
1047 | ||
266c8588 HPG |
1048 | if (clear_system_ccs) |
1049 | bo->ccs_cleared = true; | |
1050 | ||
dd08ebf6 MB |
1051 | return fence; |
1052 | } | |
1053 | ||
876611c2 | 1054 | static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, |
dd08ebf6 MB |
1055 | const struct xe_vm_pgtable_update *update, |
1056 | struct xe_migrate_pt_update *pt_update) | |
1057 | { | |
1058 | const struct xe_migrate_pt_update_ops *ops = pt_update->ops; | |
1059 | u32 chunk; | |
1060 | u32 ofs = update->ofs, size = update->qwords; | |
1061 | ||
1062 | /* | |
1063 | * If we have 512 entries (max), we would populate it ourselves, | |
1064 | * and update the PDE above it to the new pointer. | |
1065 | * The only time this can only happen if we have to update the top | |
1066 | * PDE. This requires a BO that is almost vm->size big. | |
1067 | * | |
1068 | * This shouldn't be possible in practice.. might change when 16K | |
c73acc1e | 1069 | * pages are used. Hence the assert. |
dd08ebf6 | 1070 | */ |
c73acc1e | 1071 | xe_tile_assert(tile, update->qwords <= 0x1ff); |
d9e85dd5 DK |
1072 | if (!ppgtt_ofs) |
1073 | ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile), | |
1074 | xe_bo_addr(update->pt_bo, 0, | |
937b4be7 | 1075 | XE_PAGE_SIZE)); |
dd08ebf6 MB |
1076 | |
1077 | do { | |
1078 | u64 addr = ppgtt_ofs + ofs * 8; | |
3e8e7ee6 | 1079 | |
dd08ebf6 MB |
1080 | chunk = min(update->qwords, 0x1ffU); |
1081 | ||
1082 | /* Ensure populatefn can do memset64 by aligning bb->cs */ | |
1083 | if (!(bb->len & 1)) | |
1084 | bb->cs[bb->len++] = MI_NOOP; | |
1085 | ||
14a1e6a4 | 1086 | bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); |
dd08ebf6 MB |
1087 | bb->cs[bb->len++] = lower_32_bits(addr); |
1088 | bb->cs[bb->len++] = upper_32_bits(addr); | |
876611c2 | 1089 | ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk, |
dd08ebf6 MB |
1090 | update); |
1091 | ||
1092 | bb->len += chunk * 2; | |
1093 | ofs += chunk; | |
1094 | size -= chunk; | |
1095 | } while (size); | |
1096 | } | |
1097 | ||
1098 | struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m) | |
1099 | { | |
9b9529ce | 1100 | return xe_vm_get(m->q->vm); |
dd08ebf6 MB |
1101 | } |
1102 | ||
7cba3396 TH |
1103 | #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) |
1104 | struct migrate_test_params { | |
1105 | struct xe_test_priv base; | |
1106 | bool force_gpu; | |
1107 | }; | |
1108 | ||
1109 | #define to_migrate_test_params(_priv) \ | |
1110 | container_of(_priv, struct migrate_test_params, base) | |
1111 | #endif | |
1112 | ||
dd08ebf6 MB |
1113 | static struct dma_fence * |
1114 | xe_migrate_update_pgtables_cpu(struct xe_migrate *m, | |
1115 | struct xe_vm *vm, struct xe_bo *bo, | |
1116 | const struct xe_vm_pgtable_update *updates, | |
1117 | u32 num_updates, bool wait_vm, | |
1118 | struct xe_migrate_pt_update *pt_update) | |
1119 | { | |
7cba3396 TH |
1120 | XE_TEST_DECLARE(struct migrate_test_params *test = |
1121 | to_migrate_test_params | |
1122 | (xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));) | |
dd08ebf6 MB |
1123 | const struct xe_migrate_pt_update_ops *ops = pt_update->ops; |
1124 | struct dma_fence *fence; | |
1125 | int err; | |
1126 | u32 i; | |
1127 | ||
7cba3396 TH |
1128 | if (XE_TEST_ONLY(test && test->force_gpu)) |
1129 | return ERR_PTR(-ETIME); | |
1130 | ||
fc1cc680 TH |
1131 | if (bo && !dma_resv_test_signaled(bo->ttm.base.resv, |
1132 | DMA_RESV_USAGE_KERNEL)) | |
1133 | return ERR_PTR(-ETIME); | |
1134 | ||
b06d47be | 1135 | if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm), |
fc1cc680 TH |
1136 | DMA_RESV_USAGE_BOOKKEEP)) |
1137 | return ERR_PTR(-ETIME); | |
dd08ebf6 MB |
1138 | |
1139 | if (ops->pre_commit) { | |
fd84041d | 1140 | pt_update->job = NULL; |
dd08ebf6 MB |
1141 | err = ops->pre_commit(pt_update); |
1142 | if (err) | |
1143 | return ERR_PTR(err); | |
1144 | } | |
1145 | for (i = 0; i < num_updates; i++) { | |
1146 | const struct xe_vm_pgtable_update *update = &updates[i]; | |
1147 | ||
08dea767 | 1148 | ops->populate(pt_update, m->tile, &update->pt_bo->vmap, NULL, |
dd08ebf6 MB |
1149 | update->ofs, update->qwords, update); |
1150 | } | |
1151 | ||
fc1cc680 TH |
1152 | if (vm) { |
1153 | trace_xe_vm_cpu_bind(vm); | |
1154 | xe_device_wmb(vm->xe); | |
1155 | } | |
dd08ebf6 MB |
1156 | |
1157 | fence = dma_fence_get_stub(); | |
1158 | ||
1159 | return fence; | |
1160 | } | |
1161 | ||
eb9702ad MB |
1162 | static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q, |
1163 | struct xe_sync_entry *syncs, u32 num_syncs) | |
dd08ebf6 | 1164 | { |
eb9702ad | 1165 | struct dma_fence *fence; |
dd08ebf6 MB |
1166 | int i; |
1167 | ||
1168 | for (i = 0; i < num_syncs; i++) { | |
eb9702ad | 1169 | fence = syncs[i].fence; |
dd08ebf6 MB |
1170 | |
1171 | if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, | |
1172 | &fence->flags)) | |
1173 | return false; | |
1174 | } | |
eb9702ad MB |
1175 | if (q) { |
1176 | fence = xe_exec_queue_last_fence_get(q, vm); | |
1177 | if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) | |
1178 | return false; | |
1179 | } | |
dd08ebf6 MB |
1180 | |
1181 | return true; | |
1182 | } | |
1183 | ||
e9d285ff TH |
1184 | /** |
1185 | * xe_migrate_update_pgtables() - Pipelined page-table update | |
1186 | * @m: The migrate context. | |
1187 | * @vm: The vm we'll be updating. | |
1188 | * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr. | |
9b9529ce | 1189 | * @q: The exec queue to be used for the update or NULL if the default |
e9d285ff TH |
1190 | * migration engine is to be used. |
1191 | * @updates: An array of update descriptors. | |
1192 | * @num_updates: Number of descriptors in @updates. | |
1193 | * @syncs: Array of xe_sync_entry to await before updating. Note that waits | |
1194 | * will block the engine timeline. | |
1195 | * @num_syncs: Number of entries in @syncs. | |
1196 | * @pt_update: Pointer to a struct xe_migrate_pt_update, which contains | |
1197 | * pointers to callback functions and, if subclassed, private arguments to | |
1198 | * those. | |
1199 | * | |
1200 | * Perform a pipelined page-table update. The update descriptors are typically | |
1201 | * built under the same lock critical section as a call to this function. If | |
1202 | * using the default engine for the updates, they will be performed in the | |
1203 | * order they grab the job_mutex. If different engines are used, external | |
1204 | * synchronization is needed for overlapping updates to maintain page-table | |
1205 | * consistency. Note that the meaing of "overlapping" is that the updates | |
1206 | * touch the same page-table, which might be a higher-level page-directory. | |
1207 | * If no pipelining is needed, then updates may be performed by the cpu. | |
1208 | * | |
1209 | * Return: A dma_fence that, when signaled, indicates the update completion. | |
1210 | */ | |
dd08ebf6 MB |
1211 | struct dma_fence * |
1212 | xe_migrate_update_pgtables(struct xe_migrate *m, | |
1213 | struct xe_vm *vm, | |
1214 | struct xe_bo *bo, | |
9b9529ce | 1215 | struct xe_exec_queue *q, |
dd08ebf6 MB |
1216 | const struct xe_vm_pgtable_update *updates, |
1217 | u32 num_updates, | |
1218 | struct xe_sync_entry *syncs, u32 num_syncs, | |
1219 | struct xe_migrate_pt_update *pt_update) | |
1220 | { | |
1221 | const struct xe_migrate_pt_update_ops *ops = pt_update->ops; | |
08dea767 | 1222 | struct xe_tile *tile = m->tile; |
f6929e80 | 1223 | struct xe_gt *gt = tile->primary_gt; |
08dea767 | 1224 | struct xe_device *xe = tile_to_xe(tile); |
dd08ebf6 MB |
1225 | struct xe_sched_job *job; |
1226 | struct dma_fence *fence; | |
1227 | struct drm_suballoc *sa_bo = NULL; | |
1228 | struct xe_vma *vma = pt_update->vma; | |
1229 | struct xe_bb *bb; | |
1230 | u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0; | |
1231 | u64 addr; | |
1232 | int err = 0; | |
5a92da34 | 1233 | bool usm = !q && xe->info.has_usm; |
b06d47be MB |
1234 | bool first_munmap_rebind = vma && |
1235 | vma->gpuva.flags & XE_VMA_FIRST_REBIND; | |
9b9529ce | 1236 | struct xe_exec_queue *q_override = !q ? m->q : q; |
e814389f | 1237 | u16 pat_index = xe->pat.idx[XE_CACHE_WB]; |
dd08ebf6 MB |
1238 | |
1239 | /* Use the CPU if no in syncs and engine is idle */ | |
eb9702ad | 1240 | if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) { |
dd08ebf6 MB |
1241 | fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, |
1242 | num_updates, | |
1243 | first_munmap_rebind, | |
1244 | pt_update); | |
1245 | if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN)) | |
1246 | return fence; | |
1247 | } | |
1248 | ||
1249 | /* fixed + PTE entries */ | |
1250 | if (IS_DGFX(xe)) | |
1251 | batch_size = 2; | |
1252 | else | |
1253 | batch_size = 6 + num_updates * 2; | |
1254 | ||
1255 | for (i = 0; i < num_updates; i++) { | |
1256 | u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff); | |
1257 | ||
1258 | /* align noop + MI_STORE_DATA_IMM cmd prefix */ | |
1259 | batch_size += 4 * num_cmds + updates[i].qwords * 2; | |
1260 | } | |
1261 | ||
1262 | /* | |
1263 | * XXX: Create temp bo to copy from, if batch_size becomes too big? | |
1264 | * | |
1265 | * Worst case: Sum(2 * (each lower level page size) + (top level page size)) | |
1266 | * Should be reasonably bound.. | |
1267 | */ | |
c73acc1e | 1268 | xe_tile_assert(tile, batch_size < SZ_128K); |
dd08ebf6 | 1269 | |
5a92da34 | 1270 | bb = xe_bb_new(gt, batch_size, !q && xe->info.has_usm); |
dd08ebf6 MB |
1271 | if (IS_ERR(bb)) |
1272 | return ERR_CAST(bb); | |
1273 | ||
1274 | /* For sysmem PTE's, need to map them in our hole.. */ | |
1275 | if (!IS_DGFX(xe)) { | |
1276 | ppgtt_ofs = NUM_KERNEL_PDE - 1; | |
9b9529ce | 1277 | if (q) { |
c73acc1e | 1278 | xe_tile_assert(tile, num_updates <= NUM_VMUSA_WRITES_PER_UNIT); |
dd08ebf6 MB |
1279 | |
1280 | sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, | |
1281 | GFP_KERNEL, true, 0); | |
1282 | if (IS_ERR(sa_bo)) { | |
1283 | err = PTR_ERR(sa_bo); | |
1284 | goto err; | |
1285 | } | |
1286 | ||
1287 | ppgtt_ofs = NUM_KERNEL_PDE + | |
1288 | (drm_suballoc_soffset(sa_bo) / | |
1289 | NUM_VMUSA_UNIT_PER_PAGE); | |
1290 | page_ofs = (drm_suballoc_soffset(sa_bo) % | |
1291 | NUM_VMUSA_UNIT_PER_PAGE) * | |
1292 | VM_SA_UPDATE_UNIT_SIZE; | |
1293 | } | |
1294 | ||
dd08ebf6 | 1295 | /* Map our PT's to gtt */ |
14a1e6a4 | 1296 | bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates); |
58e19acf | 1297 | bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs; |
dd08ebf6 MB |
1298 | bb->cs[bb->len++] = 0; /* upper_32_bits */ |
1299 | ||
1300 | for (i = 0; i < num_updates; i++) { | |
1301 | struct xe_bo *pt_bo = updates[i].pt_bo; | |
1302 | ||
c73acc1e | 1303 | xe_tile_assert(tile, pt_bo->size == SZ_4K); |
dd08ebf6 | 1304 | |
e814389f | 1305 | addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, pat_index, 0); |
dd08ebf6 MB |
1306 | bb->cs[bb->len++] = lower_32_bits(addr); |
1307 | bb->cs[bb->len++] = upper_32_bits(addr); | |
1308 | } | |
1309 | ||
1310 | bb->cs[bb->len++] = MI_BATCH_BUFFER_END; | |
1311 | update_idx = bb->len; | |
1312 | ||
1313 | addr = xe_migrate_vm_addr(ppgtt_ofs, 0) + | |
58e19acf | 1314 | (page_ofs / sizeof(u64)) * XE_PAGE_SIZE; |
dd08ebf6 | 1315 | for (i = 0; i < num_updates; i++) |
876611c2 | 1316 | write_pgtable(tile, bb, addr + i * XE_PAGE_SIZE, |
dd08ebf6 MB |
1317 | &updates[i], pt_update); |
1318 | } else { | |
1319 | /* phys pages, no preamble required */ | |
1320 | bb->cs[bb->len++] = MI_BATCH_BUFFER_END; | |
1321 | update_idx = bb->len; | |
1322 | ||
dd08ebf6 | 1323 | for (i = 0; i < num_updates; i++) |
876611c2 | 1324 | write_pgtable(tile, bb, 0, &updates[i], pt_update); |
dd08ebf6 MB |
1325 | } |
1326 | ||
9b9529ce | 1327 | if (!q) |
dd08ebf6 MB |
1328 | mutex_lock(&m->job_mutex); |
1329 | ||
9b9529ce | 1330 | job = xe_bb_create_migration_job(q ?: m->q, bb, |
dd08ebf6 MB |
1331 | xe_migrate_batch_base(m, usm), |
1332 | update_idx); | |
1333 | if (IS_ERR(job)) { | |
1334 | err = PTR_ERR(job); | |
1335 | goto err_bb; | |
1336 | } | |
1337 | ||
1338 | /* Wait on BO move */ | |
1339 | if (bo) { | |
1340 | err = job_add_deps(job, bo->ttm.base.resv, | |
1341 | DMA_RESV_USAGE_KERNEL); | |
1342 | if (err) | |
1343 | goto err_job; | |
1344 | } | |
1345 | ||
1346 | /* | |
1347 | * Munmap style VM unbind, need to wait for all jobs to be complete / | |
1348 | * trigger preempts before moving forward | |
1349 | */ | |
1350 | if (first_munmap_rebind) { | |
b06d47be | 1351 | err = job_add_deps(job, xe_vm_resv(vm), |
dd08ebf6 MB |
1352 | DMA_RESV_USAGE_BOOKKEEP); |
1353 | if (err) | |
1354 | goto err_job; | |
1355 | } | |
1356 | ||
eb9702ad | 1357 | err = xe_sched_job_last_fence_add_dep(job, vm); |
dd08ebf6 MB |
1358 | for (i = 0; !err && i < num_syncs; i++) |
1359 | err = xe_sync_entry_add_deps(&syncs[i], job); | |
1360 | ||
1361 | if (err) | |
1362 | goto err_job; | |
1363 | ||
1364 | if (ops->pre_commit) { | |
fd84041d | 1365 | pt_update->job = job; |
dd08ebf6 MB |
1366 | err = ops->pre_commit(pt_update); |
1367 | if (err) | |
1368 | goto err_job; | |
1369 | } | |
1370 | xe_sched_job_arm(job); | |
1371 | fence = dma_fence_get(&job->drm.s_fence->finished); | |
1372 | xe_sched_job_push(job); | |
1373 | ||
9b9529ce | 1374 | if (!q) |
dd08ebf6 MB |
1375 | mutex_unlock(&m->job_mutex); |
1376 | ||
1377 | xe_bb_free(bb, fence); | |
1378 | drm_suballoc_free(sa_bo, fence); | |
1379 | ||
1380 | return fence; | |
1381 | ||
1382 | err_job: | |
1383 | xe_sched_job_put(job); | |
1384 | err_bb: | |
9b9529ce | 1385 | if (!q) |
dd08ebf6 MB |
1386 | mutex_unlock(&m->job_mutex); |
1387 | xe_bb_free(bb, NULL); | |
1388 | err: | |
1389 | drm_suballoc_free(sa_bo, NULL); | |
1390 | return ERR_PTR(err); | |
1391 | } | |
1392 | ||
e9d285ff TH |
1393 | /** |
1394 | * xe_migrate_wait() - Complete all operations using the xe_migrate context | |
1395 | * @m: Migrate context to wait for. | |
1396 | * | |
1397 | * Waits until the GPU no longer uses the migrate context's default engine | |
1398 | * or its page-table objects. FIXME: What about separate page-table update | |
1399 | * engines? | |
1400 | */ | |
dd08ebf6 MB |
1401 | void xe_migrate_wait(struct xe_migrate *m) |
1402 | { | |
1403 | if (m->fence) | |
1404 | dma_fence_wait(m->fence, false); | |
1405 | } | |
1406 | ||
1407 | #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) | |
1408 | #include "tests/xe_migrate.c" | |
1409 | #endif |