Commit | Line | Data |
---|---|---|
dd08ebf6 MB |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2020 Intel Corporation | |
4 | */ | |
ea9f879d | 5 | |
dd08ebf6 MB |
6 | #include "xe_migrate.h" |
7 | ||
8cb49012 | 8 | #include <linux/bitfield.h> |
ea9f879d LDM |
9 | #include <linux/sizes.h> |
10 | ||
11 | #include <drm/drm_managed.h> | |
12 | #include <drm/ttm/ttm_tt.h> | |
13 | #include <drm/xe_drm.h> | |
14 | ||
a043fbab | 15 | #include "generated/xe_wa_oob.h" |
0134f130 | 16 | #include "instructions/xe_mi_commands.h" |
63955b3b | 17 | #include "regs/xe_gpu_commands.h" |
7cba3396 | 18 | #include "tests/xe_test.h" |
c73acc1e | 19 | #include "xe_assert.h" |
dd08ebf6 MB |
20 | #include "xe_bb.h" |
21 | #include "xe_bo.h" | |
c22a4ed0 | 22 | #include "xe_exec_queue.h" |
dd08ebf6 MB |
23 | #include "xe_ggtt.h" |
24 | #include "xe_gt.h" | |
25 | #include "xe_hw_engine.h" | |
26 | #include "xe_lrc.h" | |
27 | #include "xe_map.h" | |
28 | #include "xe_mocs.h" | |
29 | #include "xe_pt.h" | |
30 | #include "xe_res_cursor.h" | |
31 | #include "xe_sched_job.h" | |
32 | #include "xe_sync.h" | |
33 | #include "xe_trace.h" | |
34 | #include "xe_vm.h" | |
a043fbab | 35 | #include "xe_wa.h" |
dd08ebf6 | 36 | |
e9d285ff TH |
37 | /** |
38 | * struct xe_migrate - migrate context. | |
39 | */ | |
dd08ebf6 | 40 | struct xe_migrate { |
9b9529ce FD |
41 | /** @q: Default exec queue used for migration */ |
42 | struct xe_exec_queue *q; | |
08dea767 MR |
43 | /** @tile: Backpointer to the tile this struct xe_migrate belongs to. */ |
44 | struct xe_tile *tile; | |
e9d285ff | 45 | /** @job_mutex: Timeline mutex for @eng. */ |
dd08ebf6 | 46 | struct mutex job_mutex; |
e9d285ff | 47 | /** @pt_bo: Page-table buffer object. */ |
dd08ebf6 | 48 | struct xe_bo *pt_bo; |
e9d285ff | 49 | /** @batch_base_ofs: VM offset of the migration batch buffer */ |
dd08ebf6 | 50 | u64 batch_base_ofs; |
e9d285ff | 51 | /** @usm_batch_base_ofs: VM offset of the usm batch buffer */ |
dd08ebf6 | 52 | u64 usm_batch_base_ofs; |
9116eabb HPG |
53 | /** @cleared_mem_ofs: VM offset of @cleared_bo. */ |
54 | u64 cleared_mem_ofs; | |
e9d285ff TH |
55 | /** |
56 | * @fence: dma-fence representing the last migration job batch. | |
57 | * Protected by @job_mutex. | |
58 | */ | |
dd08ebf6 | 59 | struct dma_fence *fence; |
e9d285ff TH |
60 | /** |
61 | * @vm_update_sa: For integrated, used to suballocate page-tables | |
62 | * out of the pt_bo. | |
63 | */ | |
dd08ebf6 | 64 | struct drm_suballoc_manager vm_update_sa; |
7425c43c TH |
65 | /** @min_chunk_size: For dgfx, Minimum chunk size */ |
66 | u64 min_chunk_size; | |
dd08ebf6 MB |
67 | }; |
68 | ||
69 | #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ | |
09427526 | 70 | #define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */ |
dd08ebf6 MB |
71 | #define NUM_KERNEL_PDE 17 |
72 | #define NUM_PT_SLOTS 32 | |
09427526 | 73 | #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M |
dd08ebf6 | 74 | |
e9d285ff | 75 | /** |
08dea767 MR |
76 | * xe_tile_migrate_engine() - Get this tile's migrate engine. |
77 | * @tile: The tile. | |
e9d285ff | 78 | * |
08dea767 | 79 | * Returns the default migrate engine of this tile. |
e9d285ff TH |
80 | * TODO: Perhaps this function is slightly misplaced, and even unneeded? |
81 | * | |
82 | * Return: The default migrate engine | |
83 | */ | |
9b9529ce | 84 | struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile) |
dd08ebf6 | 85 | { |
9b9529ce | 86 | return tile->migrate->q; |
dd08ebf6 MB |
87 | } |
88 | ||
89 | static void xe_migrate_fini(struct drm_device *dev, void *arg) | |
90 | { | |
91 | struct xe_migrate *m = arg; | |
dd08ebf6 | 92 | |
d00e9cc2 | 93 | xe_vm_lock(m->q->vm, false); |
dd08ebf6 | 94 | xe_bo_unpin(m->pt_bo); |
d00e9cc2 | 95 | xe_vm_unlock(m->q->vm); |
dd08ebf6 MB |
96 | |
97 | dma_fence_put(m->fence); | |
dd08ebf6 MB |
98 | xe_bo_put(m->pt_bo); |
99 | drm_suballoc_manager_fini(&m->vm_update_sa); | |
100 | mutex_destroy(&m->job_mutex); | |
9b9529ce FD |
101 | xe_vm_close_and_put(m->q->vm); |
102 | xe_exec_queue_put(m->q); | |
dd08ebf6 MB |
103 | } |
104 | ||
105 | static u64 xe_migrate_vm_addr(u64 slot, u32 level) | |
106 | { | |
99fea682 | 107 | XE_WARN_ON(slot >= NUM_PT_SLOTS); |
dd08ebf6 MB |
108 | |
109 | /* First slot is reserved for mapping of PT bo and bb, start from 1 */ | |
110 | return (slot + 1ULL) << xe_pt_shift(level + 1); | |
111 | } | |
112 | ||
d9e85dd5 | 113 | static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr) |
dd08ebf6 | 114 | { |
d9e85dd5 DK |
115 | /* |
116 | * Remove the DPA to get a correct offset into identity table for the | |
117 | * migrate offset | |
118 | */ | |
119 | addr -= xe->mem.vram.dpa_base; | |
dd08ebf6 MB |
120 | return addr + (256ULL << xe_pt_shift(2)); |
121 | } | |
122 | ||
08dea767 | 123 | static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, |
dd08ebf6 MB |
124 | struct xe_vm *vm) |
125 | { | |
08dea767 | 126 | struct xe_device *xe = tile_to_xe(tile); |
e814389f | 127 | u16 pat_index = xe->pat.idx[XE_CACHE_WB]; |
08dea767 | 128 | u8 id = tile->id; |
dd08ebf6 MB |
129 | u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; |
130 | u32 map_ofs, level, i; | |
876611c2 | 131 | struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo; |
dd08ebf6 | 132 | u64 entry; |
dd08ebf6 MB |
133 | |
134 | /* Can't bump NUM_PT_SLOTS too high */ | |
58e19acf | 135 | BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE); |
dd08ebf6 | 136 | /* Must be a multiple of 64K to support all platforms */ |
58e19acf | 137 | BUILD_BUG_ON(NUM_PT_SLOTS * XE_PAGE_SIZE % SZ_64K); |
dd08ebf6 MB |
138 | /* And one slot reserved for the 4KiB page table updates */ |
139 | BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); | |
140 | ||
141 | /* Need to be sure everything fits in the first PT, or create more */ | |
c73acc1e | 142 | xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); |
dd08ebf6 | 143 | |
876611c2 | 144 | bo = xe_bo_create_pin_map(vm->xe, tile, vm, |
58e19acf | 145 | num_entries * XE_PAGE_SIZE, |
dd08ebf6 | 146 | ttm_bo_type_kernel, |
876611c2 | 147 | XE_BO_CREATE_VRAM_IF_DGFX(tile) | |
dd08ebf6 MB |
148 | XE_BO_CREATE_PINNED_BIT); |
149 | if (IS_ERR(bo)) | |
150 | return PTR_ERR(bo); | |
151 | ||
e814389f | 152 | entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index); |
dd08ebf6 MB |
153 | xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); |
154 | ||
58e19acf | 155 | map_ofs = (num_entries - num_level) * XE_PAGE_SIZE; |
dd08ebf6 MB |
156 | |
157 | /* Map the entire BO in our level 0 pt */ | |
158 | for (i = 0, level = 0; i < num_entries; level++) { | |
0e5e77bd | 159 | entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE, |
e814389f | 160 | pat_index, 0); |
dd08ebf6 MB |
161 | |
162 | xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); | |
163 | ||
0d39b6da | 164 | if (vm->flags & XE_VM_FLAG_64K) |
dd08ebf6 MB |
165 | i += 16; |
166 | else | |
167 | i += 1; | |
168 | } | |
169 | ||
170 | if (!IS_DGFX(xe)) { | |
dd08ebf6 | 171 | /* Write out batch too */ |
58e19acf | 172 | m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; |
dd08ebf6 | 173 | for (i = 0; i < batch->size; |
0d39b6da | 174 | i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : |
58e19acf | 175 | XE_PAGE_SIZE) { |
0e5e77bd | 176 | entry = vm->pt_ops->pte_encode_bo(batch, i, |
e814389f | 177 | pat_index, 0); |
dd08ebf6 MB |
178 | |
179 | xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, | |
180 | entry); | |
181 | level++; | |
182 | } | |
3aa3c5c2 MB |
183 | if (xe->info.has_usm) { |
184 | xe_tile_assert(tile, batch->size == SZ_1M); | |
185 | ||
186 | batch = tile->primary_gt->usm.bb_pool->bo; | |
187 | m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; | |
188 | xe_tile_assert(tile, batch->size == SZ_512K); | |
189 | ||
190 | for (i = 0; i < batch->size; | |
191 | i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : | |
192 | XE_PAGE_SIZE) { | |
193 | entry = vm->pt_ops->pte_encode_bo(batch, i, | |
194 | pat_index, 0); | |
195 | ||
196 | xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, | |
197 | entry); | |
198 | level++; | |
199 | } | |
200 | } | |
dd08ebf6 | 201 | } else { |
937b4be7 | 202 | u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); |
dd08ebf6 | 203 | |
d9e85dd5 | 204 | m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); |
dd08ebf6 | 205 | |
5a92da34 | 206 | if (xe->info.has_usm) { |
f6929e80 | 207 | batch = tile->primary_gt->usm.bb_pool->bo; |
937b4be7 | 208 | batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); |
d9e85dd5 | 209 | m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); |
dd08ebf6 MB |
210 | } |
211 | } | |
212 | ||
213 | for (level = 1; level < num_level; level++) { | |
214 | u32 flags = 0; | |
215 | ||
0d39b6da | 216 | if (vm->flags & XE_VM_FLAG_64K && level == 1) |
58e19acf | 217 | flags = XE_PDE_64K; |
dd08ebf6 | 218 | |
0e5e77bd | 219 | entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) * |
e814389f | 220 | XE_PAGE_SIZE, pat_index); |
58e19acf | 221 | xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, |
dd08ebf6 MB |
222 | entry | flags); |
223 | } | |
224 | ||
225 | /* Write PDE's that point to our BO. */ | |
226 | for (i = 0; i < num_entries - num_level; i++) { | |
0e5e77bd | 227 | entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE, |
e814389f | 228 | pat_index); |
dd08ebf6 | 229 | |
58e19acf | 230 | xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + |
dd08ebf6 MB |
231 | (i + 1) * 8, u64, entry); |
232 | } | |
233 | ||
9116eabb HPG |
234 | /* Set up a 1GiB NULL mapping at 255GiB offset. */ |
235 | level = 2; | |
236 | xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64, | |
237 | vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | |
238 | | XE_PTE_NULL); | |
239 | m->cleared_mem_ofs = (255ULL << xe_pt_shift(level)); | |
240 | ||
dd08ebf6 MB |
241 | /* Identity map the entire vram at 256GiB offset */ |
242 | if (IS_DGFX(xe)) { | |
243 | u64 pos, ofs, flags; | |
244 | ||
245 | level = 2; | |
58e19acf | 246 | ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; |
e814389f | 247 | flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, |
fcd75139 | 248 | true, 0); |
dd08ebf6 MB |
249 | |
250 | /* | |
251 | * Use 1GB pages, it shouldn't matter the physical amount of | |
252 | * vram is less, when we don't access it. | |
253 | */ | |
d9e85dd5 DK |
254 | for (pos = xe->mem.vram.dpa_base; |
255 | pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base; | |
256 | pos += SZ_1G, ofs += 8) | |
dd08ebf6 MB |
257 | xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); |
258 | } | |
259 | ||
260 | /* | |
261 | * Example layout created above, with root level = 3: | |
262 | * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's | |
263 | * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's | |
264 | * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's | |
265 | * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2] | |
266 | * | |
267 | * This makes the lowest part of the VM point to the pagetables. | |
268 | * Hence the lowest 2M in the vm should point to itself, with a few writes | |
269 | * and flushes, other parts of the VM can be used either for copying and | |
270 | * clearing. | |
271 | * | |
272 | * For performance, the kernel reserves PDE's, so about 20 are left | |
273 | * for async VM updates. | |
274 | * | |
275 | * To make it easier to work, each scratch PT is put in slot (1 + PT #) | |
276 | * everywhere, this allows lockless updates to scratch pages by using | |
277 | * the different addresses in VM. | |
278 | */ | |
279 | #define NUM_VMUSA_UNIT_PER_PAGE 32 | |
58e19acf | 280 | #define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) |
dd08ebf6 MB |
281 | #define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) |
282 | drm_suballoc_manager_init(&m->vm_update_sa, | |
58e19acf | 283 | (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * |
dd08ebf6 MB |
284 | NUM_VMUSA_UNIT_PER_PAGE, 0); |
285 | ||
286 | m->pt_bo = bo; | |
287 | return 0; | |
288 | } | |
289 | ||
a043fbab NV |
290 | /* |
291 | * Due to workaround 16017236439, odd instance hardware copy engines are | |
292 | * faster than even instance ones. | |
293 | * This function returns the mask involving all fast copy engines and the | |
294 | * reserved copy engine to be used as logical mask for migrate engine. | |
295 | * Including the reserved copy engine is required to avoid deadlocks due to | |
296 | * migrate jobs servicing the faults gets stuck behind the job that faulted. | |
297 | */ | |
298 | static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) | |
299 | { | |
300 | u32 logical_mask = 0; | |
301 | struct xe_hw_engine *hwe; | |
302 | enum xe_hw_engine_id id; | |
303 | ||
304 | for_each_hw_engine(hwe, gt, id) { | |
305 | if (hwe->class != XE_ENGINE_CLASS_COPY) | |
306 | continue; | |
307 | ||
308 | if (!XE_WA(gt, 16017236439) || | |
309 | xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1) | |
310 | logical_mask |= BIT(hwe->logical_instance); | |
311 | } | |
312 | ||
313 | return logical_mask; | |
314 | } | |
315 | ||
e9d285ff TH |
316 | /** |
317 | * xe_migrate_init() - Initialize a migrate context | |
08dea767 | 318 | * @tile: Back-pointer to the tile we're initializing for. |
e9d285ff TH |
319 | * |
320 | * Return: Pointer to a migrate context on success. Error pointer on error. | |
321 | */ | |
08dea767 | 322 | struct xe_migrate *xe_migrate_init(struct xe_tile *tile) |
dd08ebf6 | 323 | { |
08dea767 | 324 | struct xe_device *xe = tile_to_xe(tile); |
f6929e80 | 325 | struct xe_gt *primary_gt = tile->primary_gt; |
dd08ebf6 MB |
326 | struct xe_migrate *m; |
327 | struct xe_vm *vm; | |
dd08ebf6 MB |
328 | int err; |
329 | ||
dd08ebf6 MB |
330 | m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); |
331 | if (!m) | |
332 | return ERR_PTR(-ENOMEM); | |
333 | ||
08dea767 | 334 | m->tile = tile; |
dd08ebf6 MB |
335 | |
336 | /* Special layout, prepared below.. */ | |
337 | vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | | |
08dea767 | 338 | XE_VM_FLAG_SET_TILE_ID(tile)); |
dd08ebf6 MB |
339 | if (IS_ERR(vm)) |
340 | return ERR_CAST(vm); | |
341 | ||
d00e9cc2 | 342 | xe_vm_lock(vm, false); |
08dea767 | 343 | err = xe_migrate_prepare_vm(tile, m, vm); |
d00e9cc2 | 344 | xe_vm_unlock(vm); |
dd08ebf6 MB |
345 | if (err) { |
346 | xe_vm_close_and_put(vm); | |
347 | return ERR_PTR(err); | |
348 | } | |
349 | ||
5a92da34 | 350 | if (xe->info.has_usm) { |
08dea767 | 351 | struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, |
dd08ebf6 | 352 | XE_ENGINE_CLASS_COPY, |
08dea767 | 353 | primary_gt->usm.reserved_bcs_instance, |
dd08ebf6 | 354 | false); |
a043fbab NV |
355 | u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt); |
356 | ||
357 | if (!hwe || !logical_mask) | |
dd08ebf6 MB |
358 | return ERR_PTR(-EINVAL); |
359 | ||
a043fbab | 360 | m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, |
923e4238 | 361 | EXEC_QUEUE_FLAG_KERNEL | |
19c02225 BW |
362 | EXEC_QUEUE_FLAG_PERMANENT | |
363 | EXEC_QUEUE_FLAG_HIGH_PRIORITY); | |
dd08ebf6 | 364 | } else { |
9b9529ce FD |
365 | m->q = xe_exec_queue_create_class(xe, primary_gt, vm, |
366 | XE_ENGINE_CLASS_COPY, | |
923e4238 DCS |
367 | EXEC_QUEUE_FLAG_KERNEL | |
368 | EXEC_QUEUE_FLAG_PERMANENT); | |
dd08ebf6 | 369 | } |
9b9529ce | 370 | if (IS_ERR(m->q)) { |
dd08ebf6 | 371 | xe_vm_close_and_put(vm); |
9b9529ce | 372 | return ERR_CAST(m->q); |
dd08ebf6 MB |
373 | } |
374 | ||
375 | mutex_init(&m->job_mutex); | |
376 | ||
377 | err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); | |
378 | if (err) | |
379 | return ERR_PTR(err); | |
380 | ||
7425c43c TH |
381 | if (IS_DGFX(xe)) { |
382 | if (xe_device_has_flat_ccs(xe)) | |
383 | /* min chunk size corresponds to 4K of CCS Metadata */ | |
384 | m->min_chunk_size = SZ_4K * SZ_64K / | |
385 | xe_device_ccs_bytes(xe, SZ_64K); | |
386 | else | |
387 | /* Somewhat arbitrary to avoid a huge amount of blits */ | |
388 | m->min_chunk_size = SZ_64K; | |
389 | m->min_chunk_size = roundup_pow_of_two(m->min_chunk_size); | |
390 | drm_dbg(&xe->drm, "Migrate min chunk size is 0x%08llx\n", | |
391 | (unsigned long long)m->min_chunk_size); | |
392 | } | |
393 | ||
dd08ebf6 MB |
394 | return m; |
395 | } | |
396 | ||
09427526 HPG |
397 | static u64 max_mem_transfer_per_pass(struct xe_device *xe) |
398 | { | |
399 | if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe)) | |
400 | return MAX_CCS_LIMITED_TRANSFER; | |
401 | ||
402 | return MAX_PREEMPTDISABLE_TRANSFER; | |
403 | } | |
404 | ||
7425c43c | 405 | static u64 xe_migrate_res_sizes(struct xe_migrate *m, struct xe_res_cursor *cur) |
dd08ebf6 | 406 | { |
7425c43c TH |
407 | struct xe_device *xe = tile_to_xe(m->tile); |
408 | u64 size = min_t(u64, max_mem_transfer_per_pass(xe), cur->remaining); | |
409 | ||
410 | if (mem_type_is_vram(cur->mem_type)) { | |
411 | /* | |
412 | * VRAM we want to blit in chunks with sizes aligned to | |
413 | * min_chunk_size in order for the offset to CCS metadata to be | |
414 | * page-aligned. If it's the last chunk it may be smaller. | |
415 | * | |
416 | * Another constraint is that we need to limit the blit to | |
417 | * the VRAM block size, unless size is smaller than | |
418 | * min_chunk_size. | |
419 | */ | |
420 | u64 chunk = max_t(u64, cur->size, m->min_chunk_size); | |
421 | ||
422 | size = min_t(u64, size, chunk); | |
423 | if (size > m->min_chunk_size) | |
424 | size = round_down(size, m->min_chunk_size); | |
425 | } | |
426 | ||
427 | return size; | |
428 | } | |
429 | ||
430 | static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur) | |
431 | { | |
432 | /* If the chunk is not fragmented, allow identity map. */ | |
433 | return cur->size >= size; | |
dd08ebf6 MB |
434 | } |
435 | ||
436 | static u32 pte_update_size(struct xe_migrate *m, | |
437 | bool is_vram, | |
c33a7219 | 438 | struct ttm_resource *res, |
dd08ebf6 MB |
439 | struct xe_res_cursor *cur, |
440 | u64 *L0, u64 *L0_ofs, u32 *L0_pt, | |
441 | u32 cmd_size, u32 pt_ofs, u32 avail_pts) | |
442 | { | |
443 | u32 cmds = 0; | |
444 | ||
445 | *L0_pt = pt_ofs; | |
7425c43c TH |
446 | if (is_vram && xe_migrate_allow_identity(*L0, cur)) { |
447 | /* Offset into identity map. */ | |
448 | *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), | |
449 | cur->start + vram_region_gpu_offset(res)); | |
450 | cmds += cmd_size; | |
451 | } else { | |
dd08ebf6 MB |
452 | /* Clip L0 to available size */ |
453 | u64 size = min(*L0, (u64)avail_pts * SZ_2M); | |
58e19acf | 454 | u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); |
dd08ebf6 MB |
455 | |
456 | *L0 = size; | |
457 | *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); | |
458 | ||
459 | /* MI_STORE_DATA_IMM */ | |
460 | cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff); | |
461 | ||
462 | /* PDE qwords */ | |
463 | cmds += num_4k_pages * 2; | |
464 | ||
465 | /* Each chunk has a single blit command */ | |
466 | cmds += cmd_size; | |
dd08ebf6 MB |
467 | } |
468 | ||
469 | return cmds; | |
470 | } | |
471 | ||
472 | static void emit_pte(struct xe_migrate *m, | |
473 | struct xe_bb *bb, u32 at_pt, | |
65ef8dba | 474 | bool is_vram, bool is_comp_pte, |
dd08ebf6 | 475 | struct xe_res_cursor *cur, |
7425c43c | 476 | u32 size, struct ttm_resource *res) |
dd08ebf6 | 477 | { |
65ef8dba | 478 | struct xe_device *xe = tile_to_xe(m->tile); |
7425c43c | 479 | struct xe_vm *vm = m->q->vm; |
65ef8dba | 480 | u16 pat_index; |
dd08ebf6 | 481 | u32 ptes; |
58e19acf | 482 | u64 ofs = at_pt * XE_PAGE_SIZE; |
dd08ebf6 MB |
483 | u64 cur_ofs; |
484 | ||
65ef8dba HPG |
485 | /* Indirect access needs compression enabled uncached PAT index */ |
486 | if (GRAPHICS_VERx100(xe) >= 2000) | |
487 | pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] : | |
c0e2508c | 488 | xe->pat.idx[XE_CACHE_WB]; |
65ef8dba HPG |
489 | else |
490 | pat_index = xe->pat.idx[XE_CACHE_WB]; | |
491 | ||
58e19acf | 492 | ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); |
dd08ebf6 MB |
493 | |
494 | while (ptes) { | |
495 | u32 chunk = min(0x1ffU, ptes); | |
496 | ||
14a1e6a4 | 497 | bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); |
dd08ebf6 MB |
498 | bb->cs[bb->len++] = ofs; |
499 | bb->cs[bb->len++] = 0; | |
500 | ||
501 | cur_ofs = ofs; | |
502 | ofs += chunk * 8; | |
503 | ptes -= chunk; | |
504 | ||
505 | while (chunk--) { | |
23c8495e LDM |
506 | u64 addr, flags = 0; |
507 | bool devmem = false; | |
dd08ebf6 | 508 | |
e89b384c | 509 | addr = xe_res_dma(cur) & PAGE_MASK; |
dd08ebf6 | 510 | if (is_vram) { |
7425c43c TH |
511 | if (vm->flags & XE_VM_FLAG_64K) { |
512 | u64 va = cur_ofs * XE_PAGE_SIZE / 8; | |
513 | ||
514 | xe_assert(xe, (va & (SZ_64K - 1)) == | |
515 | (addr & (SZ_64K - 1))); | |
516 | ||
23c8495e | 517 | flags |= XE_PTE_PS64; |
dd08ebf6 MB |
518 | } |
519 | ||
7425c43c | 520 | addr += vram_region_gpu_offset(res); |
23c8495e | 521 | devmem = true; |
dd08ebf6 | 522 | } |
23c8495e | 523 | |
7425c43c TH |
524 | addr = vm->pt_ops->pte_encode_addr(m->tile->xe, |
525 | addr, pat_index, | |
526 | 0, devmem, flags); | |
dd08ebf6 MB |
527 | bb->cs[bb->len++] = lower_32_bits(addr); |
528 | bb->cs[bb->len++] = upper_32_bits(addr); | |
529 | ||
72e8d73b | 530 | xe_res_next(cur, min_t(u32, size, PAGE_SIZE)); |
dd08ebf6 MB |
531 | cur_ofs += 8; |
532 | } | |
533 | } | |
534 | } | |
535 | ||
536 | #define EMIT_COPY_CCS_DW 5 | |
537 | static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, | |
538 | u64 dst_ofs, bool dst_is_indirect, | |
539 | u64 src_ofs, bool src_is_indirect, | |
540 | u32 size) | |
541 | { | |
30603b5b | 542 | struct xe_device *xe = gt_to_xe(gt); |
dd08ebf6 MB |
543 | u32 *cs = bb->cs + bb->len; |
544 | u32 num_ccs_blks; | |
9cca4902 HPG |
545 | u32 num_pages; |
546 | u32 ccs_copy_size; | |
30603b5b | 547 | u32 mocs; |
dd08ebf6 | 548 | |
9cca4902 HPG |
549 | if (GRAPHICS_VERx100(xe) >= 2000) { |
550 | num_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); | |
551 | xe_gt_assert(gt, FIELD_FIT(XE2_CCS_SIZE_MASK, num_pages - 1)); | |
30603b5b | 552 | |
9cca4902 | 553 | ccs_copy_size = REG_FIELD_PREP(XE2_CCS_SIZE_MASK, num_pages - 1); |
30603b5b | 554 | mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index); |
9cca4902 HPG |
555 | |
556 | } else { | |
557 | num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), | |
558 | NUM_CCS_BYTES_PER_BLOCK); | |
559 | xe_gt_assert(gt, FIELD_FIT(CCS_SIZE_MASK, num_ccs_blks - 1)); | |
560 | ||
561 | ccs_copy_size = REG_FIELD_PREP(CCS_SIZE_MASK, num_ccs_blks - 1); | |
30603b5b | 562 | mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index); |
9cca4902 | 563 | } |
30603b5b | 564 | |
dd08ebf6 MB |
565 | *cs++ = XY_CTRL_SURF_COPY_BLT | |
566 | (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | | |
567 | (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | | |
9cca4902 | 568 | ccs_copy_size; |
dd08ebf6 | 569 | *cs++ = lower_32_bits(src_ofs); |
30603b5b | 570 | *cs++ = upper_32_bits(src_ofs) | mocs; |
dd08ebf6 | 571 | *cs++ = lower_32_bits(dst_ofs); |
30603b5b | 572 | *cs++ = upper_32_bits(dst_ofs) | mocs; |
dd08ebf6 MB |
573 | |
574 | bb->len = cs - bb->cs; | |
575 | } | |
576 | ||
577 | #define EMIT_COPY_DW 10 | |
578 | static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, | |
579 | u64 src_ofs, u64 dst_ofs, unsigned int size, | |
3e8e7ee6 | 580 | unsigned int pitch) |
dd08ebf6 | 581 | { |
4bdd8c2e | 582 | struct xe_device *xe = gt_to_xe(gt); |
30603b5b HK |
583 | u32 mocs = 0; |
584 | u32 tile_y = 0; | |
4bdd8c2e | 585 | |
c73acc1e FD |
586 | xe_gt_assert(gt, size / pitch <= S16_MAX); |
587 | xe_gt_assert(gt, pitch / 4 <= S16_MAX); | |
588 | xe_gt_assert(gt, pitch <= U16_MAX); | |
dd08ebf6 | 589 | |
30603b5b HK |
590 | if (GRAPHICS_VER(xe) >= 20) |
591 | mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index); | |
592 | ||
4bdd8c2e | 593 | if (GRAPHICS_VERx100(xe) >= 1250) |
30603b5b HK |
594 | tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4; |
595 | ||
596 | bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); | |
597 | bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs; | |
dd08ebf6 MB |
598 | bb->cs[bb->len++] = 0; |
599 | bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; | |
600 | bb->cs[bb->len++] = lower_32_bits(dst_ofs); | |
601 | bb->cs[bb->len++] = upper_32_bits(dst_ofs); | |
602 | bb->cs[bb->len++] = 0; | |
30603b5b | 603 | bb->cs[bb->len++] = pitch | mocs; |
dd08ebf6 MB |
604 | bb->cs[bb->len++] = lower_32_bits(src_ofs); |
605 | bb->cs[bb->len++] = upper_32_bits(src_ofs); | |
606 | } | |
607 | ||
608 | static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, | |
609 | enum dma_resv_usage usage) | |
610 | { | |
611 | return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); | |
612 | } | |
613 | ||
614 | static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) | |
615 | { | |
616 | return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; | |
617 | } | |
618 | ||
619 | static u32 xe_migrate_ccs_copy(struct xe_migrate *m, | |
620 | struct xe_bb *bb, | |
266c8588 HPG |
621 | u64 src_ofs, bool src_is_indirect, |
622 | u64 dst_ofs, bool dst_is_indirect, u32 dst_size, | |
dd08ebf6 MB |
623 | u64 ccs_ofs, bool copy_ccs) |
624 | { | |
f6929e80 | 625 | struct xe_gt *gt = m->tile->primary_gt; |
dd08ebf6 MB |
626 | u32 flush_flags = 0; |
627 | ||
266c8588 | 628 | if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) { |
dd08ebf6 | 629 | /* |
a2f9f4ff MA |
630 | * If the src is already in vram, then it should already |
631 | * have been cleared by us, or has been populated by the | |
632 | * user. Make sure we copy the CCS aux state as-is. | |
633 | * | |
634 | * Otherwise if the bo doesn't have any CCS metadata attached, | |
635 | * we still need to clear it for security reasons. | |
dd08ebf6 | 636 | */ |
266c8588 | 637 | u64 ccs_src_ofs = src_is_indirect ? src_ofs : m->cleared_mem_ofs; |
a2f9f4ff MA |
638 | |
639 | emit_copy_ccs(gt, bb, | |
640 | dst_ofs, true, | |
266c8588 | 641 | ccs_src_ofs, src_is_indirect, dst_size); |
a2f9f4ff | 642 | |
dd08ebf6 MB |
643 | flush_flags = MI_FLUSH_DW_CCS; |
644 | } else if (copy_ccs) { | |
266c8588 | 645 | if (!src_is_indirect) |
dd08ebf6 | 646 | src_ofs = ccs_ofs; |
266c8588 | 647 | else if (!dst_is_indirect) |
dd08ebf6 MB |
648 | dst_ofs = ccs_ofs; |
649 | ||
266c8588 | 650 | xe_gt_assert(gt, src_is_indirect || dst_is_indirect); |
dd08ebf6 | 651 | |
266c8588 HPG |
652 | emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs, |
653 | src_is_indirect, dst_size); | |
654 | if (dst_is_indirect) | |
dd08ebf6 MB |
655 | flush_flags = MI_FLUSH_DW_CCS; |
656 | } | |
657 | ||
658 | return flush_flags; | |
659 | } | |
660 | ||
e9d285ff TH |
661 | /** |
662 | * xe_migrate_copy() - Copy content of TTM resources. | |
663 | * @m: The migration context. | |
3690a01b TH |
664 | * @src_bo: The buffer object @src is currently bound to. |
665 | * @dst_bo: If copying between resources created for the same bo, set this to | |
666 | * the same value as @src_bo. If copying between buffer objects, set it to | |
667 | * the buffer object @dst is currently bound to. | |
e9d285ff TH |
668 | * @src: The source TTM resource. |
669 | * @dst: The dst TTM resource. | |
266c8588 | 670 | * @copy_only_ccs: If true copy only CCS metadata |
e9d285ff TH |
671 | * |
672 | * Copies the contents of @src to @dst: On flat CCS devices, | |
673 | * the CCS metadata is copied as well if needed, or if not present, | |
674 | * the CCS metadata of @dst is cleared for security reasons. | |
e9d285ff TH |
675 | * |
676 | * Return: Pointer to a dma_fence representing the last copy batch, or | |
677 | * an error pointer on failure. If there is a failure, any copy operation | |
678 | * started by the function call has been synced. | |
679 | */ | |
dd08ebf6 | 680 | struct dma_fence *xe_migrate_copy(struct xe_migrate *m, |
3690a01b TH |
681 | struct xe_bo *src_bo, |
682 | struct xe_bo *dst_bo, | |
dd08ebf6 | 683 | struct ttm_resource *src, |
266c8588 HPG |
684 | struct ttm_resource *dst, |
685 | bool copy_only_ccs) | |
dd08ebf6 | 686 | { |
f6929e80 | 687 | struct xe_gt *gt = m->tile->primary_gt; |
dd08ebf6 MB |
688 | struct xe_device *xe = gt_to_xe(gt); |
689 | struct dma_fence *fence = NULL; | |
3690a01b | 690 | u64 size = src_bo->size; |
dd08ebf6 MB |
691 | struct xe_res_cursor src_it, dst_it, ccs_it; |
692 | u64 src_L0_ofs, dst_L0_ofs; | |
693 | u32 src_L0_pt, dst_L0_pt; | |
694 | u64 src_L0, dst_L0; | |
695 | int pass = 0; | |
696 | int err; | |
266c8588 HPG |
697 | bool src_is_pltt = src->mem_type == XE_PL_TT; |
698 | bool dst_is_pltt = dst->mem_type == XE_PL_TT; | |
dd08ebf6 MB |
699 | bool src_is_vram = mem_type_is_vram(src->mem_type); |
700 | bool dst_is_vram = mem_type_is_vram(dst->mem_type); | |
3690a01b TH |
701 | bool copy_ccs = xe_device_has_flat_ccs(xe) && |
702 | xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo); | |
dd08ebf6 MB |
703 | bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); |
704 | ||
3690a01b TH |
705 | /* Copying CCS between two different BOs is not supported yet. */ |
706 | if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) | |
707 | return ERR_PTR(-EINVAL); | |
708 | ||
709 | if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) | |
710 | return ERR_PTR(-EINVAL); | |
711 | ||
dd08ebf6 | 712 | if (!src_is_vram) |
a21fe5ee | 713 | xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); |
dd08ebf6 | 714 | else |
e89b384c | 715 | xe_res_first(src, 0, size, &src_it); |
dd08ebf6 | 716 | if (!dst_is_vram) |
a21fe5ee | 717 | xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it); |
dd08ebf6 | 718 | else |
e89b384c | 719 | xe_res_first(dst, 0, size, &dst_it); |
dd08ebf6 MB |
720 | |
721 | if (copy_system_ccs) | |
a21fe5ee | 722 | xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo), |
dd08ebf6 MB |
723 | PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), |
724 | &ccs_it); | |
725 | ||
726 | while (size) { | |
727 | u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ | |
728 | struct xe_sched_job *job; | |
729 | struct xe_bb *bb; | |
730 | u32 flush_flags; | |
731 | u32 update_idx; | |
732 | u64 ccs_ofs, ccs_size; | |
733 | u32 ccs_pt; | |
09427526 | 734 | |
5a92da34 | 735 | bool usm = xe->info.has_usm; |
09427526 | 736 | u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; |
dd08ebf6 | 737 | |
7425c43c TH |
738 | src_L0 = xe_migrate_res_sizes(m, &src_it); |
739 | dst_L0 = xe_migrate_res_sizes(m, &dst_it); | |
dd08ebf6 MB |
740 | |
741 | drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", | |
742 | pass++, src_L0, dst_L0); | |
743 | ||
744 | src_L0 = min(src_L0, dst_L0); | |
745 | ||
c33a7219 | 746 | batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0, |
dd08ebf6 | 747 | &src_L0_ofs, &src_L0_pt, 0, 0, |
09427526 | 748 | avail_pts); |
dd08ebf6 | 749 | |
c33a7219 | 750 | batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0, |
dd08ebf6 | 751 | &dst_L0_ofs, &dst_L0_pt, 0, |
09427526 | 752 | avail_pts, avail_pts); |
dd08ebf6 MB |
753 | |
754 | if (copy_system_ccs) { | |
755 | ccs_size = xe_device_ccs_bytes(xe, src_L0); | |
c33a7219 | 756 | batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size, |
dd08ebf6 | 757 | &ccs_ofs, &ccs_pt, 0, |
09427526 HPG |
758 | 2 * avail_pts, |
759 | avail_pts); | |
7425c43c | 760 | xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); |
dd08ebf6 MB |
761 | } |
762 | ||
763 | /* Add copy commands size here */ | |
266c8588 HPG |
764 | batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + |
765 | ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0)); | |
dd08ebf6 MB |
766 | |
767 | bb = xe_bb_new(gt, batch_size, usm); | |
768 | if (IS_ERR(bb)) { | |
769 | err = PTR_ERR(bb); | |
770 | goto err_sync; | |
771 | } | |
772 | ||
7425c43c | 773 | if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) |
dd08ebf6 | 774 | xe_res_next(&src_it, src_L0); |
dd08ebf6 | 775 | else |
c0e2508c HPG |
776 | emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, |
777 | &src_it, src_L0, src); | |
7425c43c TH |
778 | |
779 | if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) | |
dd08ebf6 | 780 | xe_res_next(&dst_it, src_L0); |
7425c43c | 781 | else |
c0e2508c HPG |
782 | emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs, |
783 | &dst_it, src_L0, dst); | |
dd08ebf6 MB |
784 | |
785 | if (copy_system_ccs) | |
7425c43c | 786 | emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); |
dd08ebf6 MB |
787 | |
788 | bb->cs[bb->len++] = MI_BATCH_BUFFER_END; | |
789 | update_idx = bb->len; | |
790 | ||
266c8588 HPG |
791 | if (!copy_only_ccs) |
792 | emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE); | |
793 | ||
794 | flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, | |
795 | IS_DGFX(xe) ? src_is_vram : src_is_pltt, | |
796 | dst_L0_ofs, | |
797 | IS_DGFX(xe) ? dst_is_vram : dst_is_pltt, | |
dd08ebf6 MB |
798 | src_L0, ccs_ofs, copy_ccs); |
799 | ||
800 | mutex_lock(&m->job_mutex); | |
9b9529ce | 801 | job = xe_bb_create_migration_job(m->q, bb, |
dd08ebf6 MB |
802 | xe_migrate_batch_base(m, usm), |
803 | update_idx); | |
804 | if (IS_ERR(job)) { | |
805 | err = PTR_ERR(job); | |
806 | goto err; | |
807 | } | |
808 | ||
809 | xe_sched_job_add_migrate_flush(job, flush_flags); | |
810 | if (!fence) { | |
3690a01b | 811 | err = job_add_deps(job, src_bo->ttm.base.resv, |
dd08ebf6 | 812 | DMA_RESV_USAGE_BOOKKEEP); |
3690a01b TH |
813 | if (!err && src_bo != dst_bo) |
814 | err = job_add_deps(job, dst_bo->ttm.base.resv, | |
815 | DMA_RESV_USAGE_BOOKKEEP); | |
dd08ebf6 MB |
816 | if (err) |
817 | goto err_job; | |
818 | } | |
819 | ||
820 | xe_sched_job_arm(job); | |
821 | dma_fence_put(fence); | |
822 | fence = dma_fence_get(&job->drm.s_fence->finished); | |
823 | xe_sched_job_push(job); | |
824 | ||
825 | dma_fence_put(m->fence); | |
826 | m->fence = dma_fence_get(fence); | |
827 | ||
828 | mutex_unlock(&m->job_mutex); | |
829 | ||
830 | xe_bb_free(bb, fence); | |
831 | size -= src_L0; | |
832 | continue; | |
833 | ||
834 | err_job: | |
835 | xe_sched_job_put(job); | |
836 | err: | |
837 | mutex_unlock(&m->job_mutex); | |
838 | xe_bb_free(bb, NULL); | |
839 | ||
840 | err_sync: | |
e9d285ff | 841 | /* Sync partial copy if any. FIXME: under job_mutex? */ |
dd08ebf6 MB |
842 | if (fence) { |
843 | dma_fence_wait(fence, false); | |
844 | dma_fence_put(fence); | |
845 | } | |
846 | ||
847 | return ERR_PTR(err); | |
848 | } | |
849 | ||
850 | return fence; | |
851 | } | |
852 | ||
11a2407e BV |
853 | static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, |
854 | u32 size, u32 pitch) | |
dd08ebf6 | 855 | { |
30603b5b | 856 | struct xe_device *xe = gt_to_xe(gt); |
11a2407e | 857 | u32 *cs = bb->cs + bb->len; |
11a2407e BV |
858 | u32 len = PVC_MEM_SET_CMD_LEN_DW; |
859 | ||
c690f0e6 | 860 | *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2); |
11a2407e BV |
861 | *cs++ = pitch - 1; |
862 | *cs++ = (size / pitch) - 1; | |
863 | *cs++ = pitch - 1; | |
864 | *cs++ = lower_32_bits(src_ofs); | |
865 | *cs++ = upper_32_bits(src_ofs); | |
30603b5b HK |
866 | if (GRAPHICS_VERx100(xe) >= 2000) |
867 | *cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); | |
868 | else | |
869 | *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); | |
11a2407e | 870 | |
c73acc1e | 871 | xe_gt_assert(gt, cs - bb->cs == len + bb->len); |
11a2407e BV |
872 | |
873 | bb->len += len; | |
874 | } | |
875 | ||
876 | static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, | |
877 | u64 src_ofs, u32 size, u32 pitch, bool is_vram) | |
878 | { | |
879 | struct xe_device *xe = gt_to_xe(gt); | |
dd08ebf6 MB |
880 | u32 *cs = bb->cs + bb->len; |
881 | u32 len = XY_FAST_COLOR_BLT_DW; | |
dd08ebf6 | 882 | |
11a2407e | 883 | if (GRAPHICS_VERx100(xe) < 1250) |
dd08ebf6 MB |
884 | len = 11; |
885 | ||
886 | *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | | |
887 | (len - 2); | |
30603b5b HK |
888 | if (GRAPHICS_VERx100(xe) >= 2000) |
889 | *cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) | | |
890 | (pitch - 1); | |
891 | else | |
892 | *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) | | |
893 | (pitch - 1); | |
dd08ebf6 MB |
894 | *cs++ = 0; |
895 | *cs++ = (size / pitch) << 16 | pitch / 4; | |
896 | *cs++ = lower_32_bits(src_ofs); | |
897 | *cs++ = upper_32_bits(src_ofs); | |
898 | *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; | |
11a2407e | 899 | *cs++ = 0; |
dd08ebf6 MB |
900 | *cs++ = 0; |
901 | *cs++ = 0; | |
902 | *cs++ = 0; | |
903 | ||
904 | if (len > 11) { | |
905 | *cs++ = 0; | |
906 | *cs++ = 0; | |
907 | *cs++ = 0; | |
908 | *cs++ = 0; | |
909 | *cs++ = 0; | |
910 | } | |
911 | ||
c73acc1e | 912 | xe_gt_assert(gt, cs - bb->cs == len + bb->len); |
11a2407e | 913 | |
dd08ebf6 | 914 | bb->len += len; |
11a2407e BV |
915 | } |
916 | ||
1951dad5 | 917 | static bool has_service_copy_support(struct xe_gt *gt) |
11a2407e | 918 | { |
1951dad5 MR |
919 | /* |
920 | * What we care about is whether the architecture was designed with | |
921 | * service copy functionality (specifically the new MEM_SET / MEM_COPY | |
922 | * instructions) so check the architectural engine list rather than the | |
923 | * actual list since these instructions are usable on BCS0 even if | |
924 | * all of the actual service copy engines (BCS1-BCS8) have been fused | |
925 | * off. | |
926 | */ | |
927 | return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8, | |
928 | XE_HW_ENGINE_BCS1); | |
929 | } | |
930 | ||
931 | static u32 emit_clear_cmd_len(struct xe_gt *gt) | |
932 | { | |
933 | if (has_service_copy_support(gt)) | |
11a2407e BV |
934 | return PVC_MEM_SET_CMD_LEN_DW; |
935 | else | |
936 | return XY_FAST_COLOR_BLT_DW; | |
937 | } | |
938 | ||
1951dad5 MR |
939 | static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, |
940 | u32 size, u32 pitch, bool is_vram) | |
11a2407e | 941 | { |
1951dad5 | 942 | if (has_service_copy_support(gt)) |
11a2407e | 943 | emit_clear_link_copy(gt, bb, src_ofs, size, pitch); |
1951dad5 | 944 | else |
11a2407e BV |
945 | emit_clear_main_copy(gt, bb, src_ofs, size, pitch, |
946 | is_vram); | |
dd08ebf6 MB |
947 | } |
948 | ||
e9d285ff TH |
949 | /** |
950 | * xe_migrate_clear() - Copy content of TTM resources. | |
951 | * @m: The migration context. | |
952 | * @bo: The buffer object @dst is currently bound to. | |
953 | * @dst: The dst TTM resource to be cleared. | |
e9d285ff | 954 | * |
11a2407e BV |
955 | * Clear the contents of @dst to zero. On flat CCS devices, |
956 | * the CCS metadata is cleared to zero as well on VRAM destinations. | |
e9d285ff TH |
957 | * TODO: Eliminate the @bo argument. |
958 | * | |
959 | * Return: Pointer to a dma_fence representing the last clear batch, or | |
960 | * an error pointer on failure. If there is a failure, any clear operation | |
961 | * started by the function call has been synced. | |
962 | */ | |
dd08ebf6 MB |
963 | struct dma_fence *xe_migrate_clear(struct xe_migrate *m, |
964 | struct xe_bo *bo, | |
11a2407e | 965 | struct ttm_resource *dst) |
dd08ebf6 MB |
966 | { |
967 | bool clear_vram = mem_type_is_vram(dst->mem_type); | |
f6929e80 | 968 | struct xe_gt *gt = m->tile->primary_gt; |
dd08ebf6 | 969 | struct xe_device *xe = gt_to_xe(gt); |
266c8588 | 970 | bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; |
dd08ebf6 MB |
971 | struct dma_fence *fence = NULL; |
972 | u64 size = bo->size; | |
973 | struct xe_res_cursor src_it; | |
974 | struct ttm_resource *src = dst; | |
975 | int err; | |
976 | int pass = 0; | |
977 | ||
978 | if (!clear_vram) | |
a21fe5ee | 979 | xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); |
dd08ebf6 MB |
980 | else |
981 | xe_res_first(src, 0, bo->size, &src_it); | |
982 | ||
983 | while (size) { | |
984 | u64 clear_L0_ofs; | |
985 | u32 clear_L0_pt; | |
986 | u32 flush_flags = 0; | |
987 | u64 clear_L0; | |
988 | struct xe_sched_job *job; | |
989 | struct xe_bb *bb; | |
990 | u32 batch_size, update_idx; | |
09427526 | 991 | |
5a92da34 | 992 | bool usm = xe->info.has_usm; |
09427526 HPG |
993 | u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; |
994 | ||
7425c43c | 995 | clear_L0 = xe_migrate_res_sizes(m, &src_it); |
dd08ebf6 | 996 | |
dd08ebf6 MB |
997 | drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); |
998 | ||
999 | /* Calculate final sizes and batch size.. */ | |
1000 | batch_size = 2 + | |
c33a7219 | 1001 | pte_update_size(m, clear_vram, src, &src_it, |
dd08ebf6 | 1002 | &clear_L0, &clear_L0_ofs, &clear_L0_pt, |
266c8588 | 1003 | clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, |
09427526 | 1004 | avail_pts); |
266c8588 HPG |
1005 | |
1006 | if (xe_device_has_flat_ccs(xe)) | |
dd08ebf6 MB |
1007 | batch_size += EMIT_COPY_CCS_DW; |
1008 | ||
1009 | /* Clear commands */ | |
1010 | ||
1011 | if (WARN_ON_ONCE(!clear_L0)) | |
1012 | break; | |
1013 | ||
1014 | bb = xe_bb_new(gt, batch_size, usm); | |
1015 | if (IS_ERR(bb)) { | |
1016 | err = PTR_ERR(bb); | |
1017 | goto err_sync; | |
1018 | } | |
1019 | ||
1020 | size -= clear_L0; | |
dd08ebf6 | 1021 | /* Preemption is enabled again by the ring ops. */ |
7425c43c | 1022 | if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) |
dd08ebf6 | 1023 | xe_res_next(&src_it, clear_L0); |
7425c43c | 1024 | else |
c0e2508c HPG |
1025 | emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, |
1026 | &src_it, clear_L0, dst); | |
7425c43c | 1027 | |
dd08ebf6 MB |
1028 | bb->cs[bb->len++] = MI_BATCH_BUFFER_END; |
1029 | update_idx = bb->len; | |
1030 | ||
266c8588 HPG |
1031 | if (!clear_system_ccs) |
1032 | emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); | |
1033 | ||
1034 | if (xe_device_has_flat_ccs(xe)) { | |
dd08ebf6 | 1035 | emit_copy_ccs(gt, bb, clear_L0_ofs, true, |
9116eabb | 1036 | m->cleared_mem_ofs, false, clear_L0); |
dd08ebf6 MB |
1037 | flush_flags = MI_FLUSH_DW_CCS; |
1038 | } | |
1039 | ||
1040 | mutex_lock(&m->job_mutex); | |
9b9529ce | 1041 | job = xe_bb_create_migration_job(m->q, bb, |
dd08ebf6 MB |
1042 | xe_migrate_batch_base(m, usm), |
1043 | update_idx); | |
1044 | if (IS_ERR(job)) { | |
1045 | err = PTR_ERR(job); | |
1046 | goto err; | |
1047 | } | |
1048 | ||
1049 | xe_sched_job_add_migrate_flush(job, flush_flags); | |
a667cf56 MA |
1050 | if (!fence) { |
1051 | /* | |
1052 | * There can't be anything userspace related at this | |
1053 | * point, so we just need to respect any potential move | |
1054 | * fences, which are always tracked as | |
1055 | * DMA_RESV_USAGE_KERNEL. | |
1056 | */ | |
1057 | err = job_add_deps(job, bo->ttm.base.resv, | |
1058 | DMA_RESV_USAGE_KERNEL); | |
1059 | if (err) | |
1060 | goto err_job; | |
1061 | } | |
dd08ebf6 MB |
1062 | |
1063 | xe_sched_job_arm(job); | |
1064 | dma_fence_put(fence); | |
1065 | fence = dma_fence_get(&job->drm.s_fence->finished); | |
1066 | xe_sched_job_push(job); | |
1067 | ||
1068 | dma_fence_put(m->fence); | |
1069 | m->fence = dma_fence_get(fence); | |
1070 | ||
1071 | mutex_unlock(&m->job_mutex); | |
1072 | ||
1073 | xe_bb_free(bb, fence); | |
1074 | continue; | |
1075 | ||
a667cf56 MA |
1076 | err_job: |
1077 | xe_sched_job_put(job); | |
dd08ebf6 MB |
1078 | err: |
1079 | mutex_unlock(&m->job_mutex); | |
1080 | xe_bb_free(bb, NULL); | |
1081 | err_sync: | |
e9d285ff | 1082 | /* Sync partial copies if any. FIXME: job_mutex? */ |
dd08ebf6 MB |
1083 | if (fence) { |
1084 | dma_fence_wait(m->fence, false); | |
1085 | dma_fence_put(fence); | |
1086 | } | |
1087 | ||
1088 | return ERR_PTR(err); | |
1089 | } | |
1090 | ||
266c8588 HPG |
1091 | if (clear_system_ccs) |
1092 | bo->ccs_cleared = true; | |
1093 | ||
dd08ebf6 MB |
1094 | return fence; |
1095 | } | |
1096 | ||
876611c2 | 1097 | static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, |
dd08ebf6 MB |
1098 | const struct xe_vm_pgtable_update *update, |
1099 | struct xe_migrate_pt_update *pt_update) | |
1100 | { | |
1101 | const struct xe_migrate_pt_update_ops *ops = pt_update->ops; | |
1102 | u32 chunk; | |
1103 | u32 ofs = update->ofs, size = update->qwords; | |
1104 | ||
1105 | /* | |
1106 | * If we have 512 entries (max), we would populate it ourselves, | |
1107 | * and update the PDE above it to the new pointer. | |
1108 | * The only time this can only happen if we have to update the top | |
1109 | * PDE. This requires a BO that is almost vm->size big. | |
1110 | * | |
1111 | * This shouldn't be possible in practice.. might change when 16K | |
c73acc1e | 1112 | * pages are used. Hence the assert. |
dd08ebf6 | 1113 | */ |
c73acc1e | 1114 | xe_tile_assert(tile, update->qwords <= 0x1ff); |
d9e85dd5 DK |
1115 | if (!ppgtt_ofs) |
1116 | ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile), | |
1117 | xe_bo_addr(update->pt_bo, 0, | |
937b4be7 | 1118 | XE_PAGE_SIZE)); |
dd08ebf6 MB |
1119 | |
1120 | do { | |
1121 | u64 addr = ppgtt_ofs + ofs * 8; | |
3e8e7ee6 | 1122 | |
dd08ebf6 MB |
1123 | chunk = min(update->qwords, 0x1ffU); |
1124 | ||
1125 | /* Ensure populatefn can do memset64 by aligning bb->cs */ | |
1126 | if (!(bb->len & 1)) | |
1127 | bb->cs[bb->len++] = MI_NOOP; | |
1128 | ||
14a1e6a4 | 1129 | bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); |
dd08ebf6 MB |
1130 | bb->cs[bb->len++] = lower_32_bits(addr); |
1131 | bb->cs[bb->len++] = upper_32_bits(addr); | |
876611c2 | 1132 | ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk, |
dd08ebf6 MB |
1133 | update); |
1134 | ||
1135 | bb->len += chunk * 2; | |
1136 | ofs += chunk; | |
1137 | size -= chunk; | |
1138 | } while (size); | |
1139 | } | |
1140 | ||
1141 | struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m) | |
1142 | { | |
9b9529ce | 1143 | return xe_vm_get(m->q->vm); |
dd08ebf6 MB |
1144 | } |
1145 | ||
7cba3396 TH |
1146 | #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) |
1147 | struct migrate_test_params { | |
1148 | struct xe_test_priv base; | |
1149 | bool force_gpu; | |
1150 | }; | |
1151 | ||
1152 | #define to_migrate_test_params(_priv) \ | |
1153 | container_of(_priv, struct migrate_test_params, base) | |
1154 | #endif | |
1155 | ||
dd08ebf6 MB |
1156 | static struct dma_fence * |
1157 | xe_migrate_update_pgtables_cpu(struct xe_migrate *m, | |
1158 | struct xe_vm *vm, struct xe_bo *bo, | |
1159 | const struct xe_vm_pgtable_update *updates, | |
1160 | u32 num_updates, bool wait_vm, | |
1161 | struct xe_migrate_pt_update *pt_update) | |
1162 | { | |
7cba3396 TH |
1163 | XE_TEST_DECLARE(struct migrate_test_params *test = |
1164 | to_migrate_test_params | |
1165 | (xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));) | |
dd08ebf6 MB |
1166 | const struct xe_migrate_pt_update_ops *ops = pt_update->ops; |
1167 | struct dma_fence *fence; | |
1168 | int err; | |
1169 | u32 i; | |
1170 | ||
7cba3396 TH |
1171 | if (XE_TEST_ONLY(test && test->force_gpu)) |
1172 | return ERR_PTR(-ETIME); | |
1173 | ||
fc1cc680 TH |
1174 | if (bo && !dma_resv_test_signaled(bo->ttm.base.resv, |
1175 | DMA_RESV_USAGE_KERNEL)) | |
1176 | return ERR_PTR(-ETIME); | |
1177 | ||
b06d47be | 1178 | if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm), |
fc1cc680 TH |
1179 | DMA_RESV_USAGE_BOOKKEEP)) |
1180 | return ERR_PTR(-ETIME); | |
dd08ebf6 MB |
1181 | |
1182 | if (ops->pre_commit) { | |
fd84041d | 1183 | pt_update->job = NULL; |
dd08ebf6 MB |
1184 | err = ops->pre_commit(pt_update); |
1185 | if (err) | |
1186 | return ERR_PTR(err); | |
1187 | } | |
1188 | for (i = 0; i < num_updates; i++) { | |
1189 | const struct xe_vm_pgtable_update *update = &updates[i]; | |
1190 | ||
08dea767 | 1191 | ops->populate(pt_update, m->tile, &update->pt_bo->vmap, NULL, |
dd08ebf6 MB |
1192 | update->ofs, update->qwords, update); |
1193 | } | |
1194 | ||
fc1cc680 TH |
1195 | if (vm) { |
1196 | trace_xe_vm_cpu_bind(vm); | |
1197 | xe_device_wmb(vm->xe); | |
1198 | } | |
dd08ebf6 MB |
1199 | |
1200 | fence = dma_fence_get_stub(); | |
1201 | ||
1202 | return fence; | |
1203 | } | |
1204 | ||
eb9702ad MB |
1205 | static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q, |
1206 | struct xe_sync_entry *syncs, u32 num_syncs) | |
dd08ebf6 | 1207 | { |
eb9702ad | 1208 | struct dma_fence *fence; |
dd08ebf6 MB |
1209 | int i; |
1210 | ||
1211 | for (i = 0; i < num_syncs; i++) { | |
eb9702ad | 1212 | fence = syncs[i].fence; |
dd08ebf6 MB |
1213 | |
1214 | if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, | |
1215 | &fence->flags)) | |
1216 | return false; | |
1217 | } | |
eb9702ad MB |
1218 | if (q) { |
1219 | fence = xe_exec_queue_last_fence_get(q, vm); | |
fc29b6d5 MB |
1220 | if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { |
1221 | dma_fence_put(fence); | |
eb9702ad | 1222 | return false; |
fc29b6d5 MB |
1223 | } |
1224 | dma_fence_put(fence); | |
eb9702ad | 1225 | } |
dd08ebf6 MB |
1226 | |
1227 | return true; | |
1228 | } | |
1229 | ||
e9d285ff TH |
1230 | /** |
1231 | * xe_migrate_update_pgtables() - Pipelined page-table update | |
1232 | * @m: The migrate context. | |
1233 | * @vm: The vm we'll be updating. | |
1234 | * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr. | |
9b9529ce | 1235 | * @q: The exec queue to be used for the update or NULL if the default |
e9d285ff TH |
1236 | * migration engine is to be used. |
1237 | * @updates: An array of update descriptors. | |
1238 | * @num_updates: Number of descriptors in @updates. | |
1239 | * @syncs: Array of xe_sync_entry to await before updating. Note that waits | |
1240 | * will block the engine timeline. | |
1241 | * @num_syncs: Number of entries in @syncs. | |
1242 | * @pt_update: Pointer to a struct xe_migrate_pt_update, which contains | |
1243 | * pointers to callback functions and, if subclassed, private arguments to | |
1244 | * those. | |
1245 | * | |
1246 | * Perform a pipelined page-table update. The update descriptors are typically | |
1247 | * built under the same lock critical section as a call to this function. If | |
1248 | * using the default engine for the updates, they will be performed in the | |
1249 | * order they grab the job_mutex. If different engines are used, external | |
1250 | * synchronization is needed for overlapping updates to maintain page-table | |
1251 | * consistency. Note that the meaing of "overlapping" is that the updates | |
1252 | * touch the same page-table, which might be a higher-level page-directory. | |
1253 | * If no pipelining is needed, then updates may be performed by the cpu. | |
1254 | * | |
1255 | * Return: A dma_fence that, when signaled, indicates the update completion. | |
1256 | */ | |
dd08ebf6 MB |
1257 | struct dma_fence * |
1258 | xe_migrate_update_pgtables(struct xe_migrate *m, | |
1259 | struct xe_vm *vm, | |
1260 | struct xe_bo *bo, | |
9b9529ce | 1261 | struct xe_exec_queue *q, |
dd08ebf6 MB |
1262 | const struct xe_vm_pgtable_update *updates, |
1263 | u32 num_updates, | |
1264 | struct xe_sync_entry *syncs, u32 num_syncs, | |
1265 | struct xe_migrate_pt_update *pt_update) | |
1266 | { | |
1267 | const struct xe_migrate_pt_update_ops *ops = pt_update->ops; | |
08dea767 | 1268 | struct xe_tile *tile = m->tile; |
f6929e80 | 1269 | struct xe_gt *gt = tile->primary_gt; |
08dea767 | 1270 | struct xe_device *xe = tile_to_xe(tile); |
dd08ebf6 MB |
1271 | struct xe_sched_job *job; |
1272 | struct dma_fence *fence; | |
1273 | struct drm_suballoc *sa_bo = NULL; | |
1274 | struct xe_vma *vma = pt_update->vma; | |
1275 | struct xe_bb *bb; | |
1276 | u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0; | |
1277 | u64 addr; | |
1278 | int err = 0; | |
5a92da34 | 1279 | bool usm = !q && xe->info.has_usm; |
b06d47be MB |
1280 | bool first_munmap_rebind = vma && |
1281 | vma->gpuva.flags & XE_VMA_FIRST_REBIND; | |
9b9529ce | 1282 | struct xe_exec_queue *q_override = !q ? m->q : q; |
e814389f | 1283 | u16 pat_index = xe->pat.idx[XE_CACHE_WB]; |
dd08ebf6 MB |
1284 | |
1285 | /* Use the CPU if no in syncs and engine is idle */ | |
eb9702ad | 1286 | if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) { |
dd08ebf6 MB |
1287 | fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, |
1288 | num_updates, | |
1289 | first_munmap_rebind, | |
1290 | pt_update); | |
1291 | if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN)) | |
1292 | return fence; | |
1293 | } | |
1294 | ||
1295 | /* fixed + PTE entries */ | |
1296 | if (IS_DGFX(xe)) | |
1297 | batch_size = 2; | |
1298 | else | |
1299 | batch_size = 6 + num_updates * 2; | |
1300 | ||
1301 | for (i = 0; i < num_updates; i++) { | |
1302 | u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff); | |
1303 | ||
1304 | /* align noop + MI_STORE_DATA_IMM cmd prefix */ | |
1305 | batch_size += 4 * num_cmds + updates[i].qwords * 2; | |
1306 | } | |
1307 | ||
1308 | /* | |
1309 | * XXX: Create temp bo to copy from, if batch_size becomes too big? | |
1310 | * | |
1311 | * Worst case: Sum(2 * (each lower level page size) + (top level page size)) | |
1312 | * Should be reasonably bound.. | |
1313 | */ | |
c73acc1e | 1314 | xe_tile_assert(tile, batch_size < SZ_128K); |
dd08ebf6 | 1315 | |
5a92da34 | 1316 | bb = xe_bb_new(gt, batch_size, !q && xe->info.has_usm); |
dd08ebf6 MB |
1317 | if (IS_ERR(bb)) |
1318 | return ERR_CAST(bb); | |
1319 | ||
1320 | /* For sysmem PTE's, need to map them in our hole.. */ | |
1321 | if (!IS_DGFX(xe)) { | |
1322 | ppgtt_ofs = NUM_KERNEL_PDE - 1; | |
9b9529ce | 1323 | if (q) { |
c73acc1e | 1324 | xe_tile_assert(tile, num_updates <= NUM_VMUSA_WRITES_PER_UNIT); |
dd08ebf6 MB |
1325 | |
1326 | sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, | |
1327 | GFP_KERNEL, true, 0); | |
1328 | if (IS_ERR(sa_bo)) { | |
1329 | err = PTR_ERR(sa_bo); | |
1330 | goto err; | |
1331 | } | |
1332 | ||
1333 | ppgtt_ofs = NUM_KERNEL_PDE + | |
1334 | (drm_suballoc_soffset(sa_bo) / | |
1335 | NUM_VMUSA_UNIT_PER_PAGE); | |
1336 | page_ofs = (drm_suballoc_soffset(sa_bo) % | |
1337 | NUM_VMUSA_UNIT_PER_PAGE) * | |
1338 | VM_SA_UPDATE_UNIT_SIZE; | |
1339 | } | |
1340 | ||
dd08ebf6 | 1341 | /* Map our PT's to gtt */ |
14a1e6a4 | 1342 | bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates); |
58e19acf | 1343 | bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs; |
dd08ebf6 MB |
1344 | bb->cs[bb->len++] = 0; /* upper_32_bits */ |
1345 | ||
1346 | for (i = 0; i < num_updates; i++) { | |
1347 | struct xe_bo *pt_bo = updates[i].pt_bo; | |
1348 | ||
c73acc1e | 1349 | xe_tile_assert(tile, pt_bo->size == SZ_4K); |
dd08ebf6 | 1350 | |
e814389f | 1351 | addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, pat_index, 0); |
dd08ebf6 MB |
1352 | bb->cs[bb->len++] = lower_32_bits(addr); |
1353 | bb->cs[bb->len++] = upper_32_bits(addr); | |
1354 | } | |
1355 | ||
1356 | bb->cs[bb->len++] = MI_BATCH_BUFFER_END; | |
1357 | update_idx = bb->len; | |
1358 | ||
1359 | addr = xe_migrate_vm_addr(ppgtt_ofs, 0) + | |
58e19acf | 1360 | (page_ofs / sizeof(u64)) * XE_PAGE_SIZE; |
dd08ebf6 | 1361 | for (i = 0; i < num_updates; i++) |
876611c2 | 1362 | write_pgtable(tile, bb, addr + i * XE_PAGE_SIZE, |
dd08ebf6 MB |
1363 | &updates[i], pt_update); |
1364 | } else { | |
1365 | /* phys pages, no preamble required */ | |
1366 | bb->cs[bb->len++] = MI_BATCH_BUFFER_END; | |
1367 | update_idx = bb->len; | |
1368 | ||
dd08ebf6 | 1369 | for (i = 0; i < num_updates; i++) |
876611c2 | 1370 | write_pgtable(tile, bb, 0, &updates[i], pt_update); |
dd08ebf6 MB |
1371 | } |
1372 | ||
9b9529ce | 1373 | if (!q) |
dd08ebf6 MB |
1374 | mutex_lock(&m->job_mutex); |
1375 | ||
9b9529ce | 1376 | job = xe_bb_create_migration_job(q ?: m->q, bb, |
dd08ebf6 MB |
1377 | xe_migrate_batch_base(m, usm), |
1378 | update_idx); | |
1379 | if (IS_ERR(job)) { | |
1380 | err = PTR_ERR(job); | |
1381 | goto err_bb; | |
1382 | } | |
1383 | ||
1384 | /* Wait on BO move */ | |
1385 | if (bo) { | |
1386 | err = job_add_deps(job, bo->ttm.base.resv, | |
1387 | DMA_RESV_USAGE_KERNEL); | |
1388 | if (err) | |
1389 | goto err_job; | |
1390 | } | |
1391 | ||
1392 | /* | |
1393 | * Munmap style VM unbind, need to wait for all jobs to be complete / | |
1394 | * trigger preempts before moving forward | |
1395 | */ | |
1396 | if (first_munmap_rebind) { | |
b06d47be | 1397 | err = job_add_deps(job, xe_vm_resv(vm), |
dd08ebf6 MB |
1398 | DMA_RESV_USAGE_BOOKKEEP); |
1399 | if (err) | |
1400 | goto err_job; | |
1401 | } | |
1402 | ||
eb9702ad | 1403 | err = xe_sched_job_last_fence_add_dep(job, vm); |
dd08ebf6 MB |
1404 | for (i = 0; !err && i < num_syncs; i++) |
1405 | err = xe_sync_entry_add_deps(&syncs[i], job); | |
1406 | ||
1407 | if (err) | |
1408 | goto err_job; | |
1409 | ||
1410 | if (ops->pre_commit) { | |
fd84041d | 1411 | pt_update->job = job; |
dd08ebf6 MB |
1412 | err = ops->pre_commit(pt_update); |
1413 | if (err) | |
1414 | goto err_job; | |
1415 | } | |
1416 | xe_sched_job_arm(job); | |
1417 | fence = dma_fence_get(&job->drm.s_fence->finished); | |
1418 | xe_sched_job_push(job); | |
1419 | ||
9b9529ce | 1420 | if (!q) |
dd08ebf6 MB |
1421 | mutex_unlock(&m->job_mutex); |
1422 | ||
1423 | xe_bb_free(bb, fence); | |
1424 | drm_suballoc_free(sa_bo, fence); | |
1425 | ||
1426 | return fence; | |
1427 | ||
1428 | err_job: | |
1429 | xe_sched_job_put(job); | |
1430 | err_bb: | |
9b9529ce | 1431 | if (!q) |
dd08ebf6 MB |
1432 | mutex_unlock(&m->job_mutex); |
1433 | xe_bb_free(bb, NULL); | |
1434 | err: | |
1435 | drm_suballoc_free(sa_bo, NULL); | |
1436 | return ERR_PTR(err); | |
1437 | } | |
1438 | ||
e9d285ff TH |
1439 | /** |
1440 | * xe_migrate_wait() - Complete all operations using the xe_migrate context | |
1441 | * @m: Migrate context to wait for. | |
1442 | * | |
1443 | * Waits until the GPU no longer uses the migrate context's default engine | |
1444 | * or its page-table objects. FIXME: What about separate page-table update | |
1445 | * engines? | |
1446 | */ | |
dd08ebf6 MB |
1447 | void xe_migrate_wait(struct xe_migrate *m) |
1448 | { | |
1449 | if (m->fence) | |
1450 | dma_fence_wait(m->fence, false); | |
1451 | } | |
1452 | ||
1453 | #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) | |
1454 | #include "tests/xe_migrate.c" | |
1455 | #endif |