drm/i915: clear the shadow batch
[linux-block.git] / drivers / gpu / drm / i915 / gem / i915_gem_execbuffer.c
CommitLineData
54cf91dc 1/*
10be98a7 2 * SPDX-License-Identifier: MIT
54cf91dc 3 *
10be98a7 4 * Copyright © 2008,2010 Intel Corporation
54cf91dc
CW
5 */
6
daedaa33 7#include <linux/intel-iommu.h>
52791eee 8#include <linux/dma-resv.h>
fec0445c 9#include <linux/sync_file.h>
ad778f89
CW
10#include <linux/uaccess.h>
11
cf6e7bac 12#include <drm/drm_syncobj.h>
ad778f89 13
df0566a6
JN
14#include "display/intel_frontbuffer.h"
15
afa13085 16#include "gem/i915_gem_ioctls.h"
10be98a7 17#include "gt/intel_context.h"
45233ab2 18#include "gt/intel_gpu_commands.h"
baea429d 19#include "gt/intel_gt.h"
16e87459 20#include "gt/intel_gt_buffer_pool.h"
8f2a1057 21#include "gt/intel_gt_pm.h"
2871ea85 22#include "gt/intel_ring.h"
8f2a1057 23
6da4a2c4 24#include "i915_drv.h"
57822dc6 25#include "i915_gem_clflush.h"
10be98a7 26#include "i915_gem_context.h"
6da4a2c4 27#include "i915_gem_ioctls.h"
686c7c35 28#include "i915_sw_fence_work.h"
54cf91dc 29#include "i915_trace.h"
cda9edd0 30#include "i915_user_extensions.h"
54cf91dc 31
7d6236bb
CW
32struct eb_vma {
33 struct i915_vma *vma;
34 unsigned int flags;
35
36 /** This vma's place in the execbuf reservation list */
37 struct drm_i915_gem_exec_object2 *exec;
38 struct list_head bind_link;
39 struct list_head reloc_link;
40
41 struct hlist_node node;
42 u32 handle;
43};
44
ad5d95e4
DA
45enum {
46 FORCE_CPU_RELOC = 1,
47 FORCE_GTT_RELOC,
48 FORCE_GPU_RELOC,
49#define DBG_FORCE_RELOC 0 /* choose one of the above! */
50};
51
003d8b91
CW
52#define __EXEC_OBJECT_HAS_PIN BIT(31)
53#define __EXEC_OBJECT_HAS_FENCE BIT(30)
54#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
55#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
56#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
8ae275c2 57#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
2889caa9
CW
58
59#define __EXEC_HAS_RELOC BIT(31)
2bf541ff
ML
60#define __EXEC_ENGINE_PINNED BIT(30)
61#define __EXEC_INTERNAL_FLAGS (~0u << 30)
2889caa9 62#define UPDATE PIN_OFFSET_FIXED
d23db88c
CW
63
64#define BATCH_OFFSET_BIAS (256*1024)
a415d355 65
650bc635 66#define __I915_EXEC_ILLEGAL_FLAGS \
08e3e21a
LDM
67 (__I915_EXEC_UNKNOWN_FLAGS | \
68 I915_EXEC_CONSTANTS_MASK | \
69 I915_EXEC_RESOURCE_STREAMER)
5b043f4e 70
d20ac620
CW
71/* Catch emission of unexpected errors for CI! */
72#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
73#undef EINVAL
74#define EINVAL ({ \
75 DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \
76 22; \
77})
78#endif
79
2889caa9
CW
80/**
81 * DOC: User command execution
82 *
83 * Userspace submits commands to be executed on the GPU as an instruction
84 * stream within a GEM object we call a batchbuffer. This instructions may
85 * refer to other GEM objects containing auxiliary state such as kernels,
86 * samplers, render targets and even secondary batchbuffers. Userspace does
87 * not know where in the GPU memory these objects reside and so before the
88 * batchbuffer is passed to the GPU for execution, those addresses in the
89 * batchbuffer and auxiliary objects are updated. This is known as relocation,
90 * or patching. To try and avoid having to relocate each object on the next
91 * execution, userspace is told the location of those objects in this pass,
92 * but this remains just a hint as the kernel may choose a new location for
93 * any object in the future.
94 *
99d7e4ee
KR
95 * At the level of talking to the hardware, submitting a batchbuffer for the
96 * GPU to execute is to add content to a buffer from which the HW
97 * command streamer is reading.
98 *
99 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
100 * Execlists, this command is not placed on the same buffer as the
101 * remaining items.
102 *
103 * 2. Add a command to invalidate caches to the buffer.
104 *
105 * 3. Add a batchbuffer start command to the buffer; the start command is
106 * essentially a token together with the GPU address of the batchbuffer
107 * to be executed.
108 *
109 * 4. Add a pipeline flush to the buffer.
110 *
111 * 5. Add a memory write command to the buffer to record when the GPU
112 * is done executing the batchbuffer. The memory write writes the
113 * global sequence number of the request, ``i915_request::global_seqno``;
114 * the i915 driver uses the current value in the register to determine
115 * if the GPU has completed the batchbuffer.
116 *
117 * 6. Add a user interrupt command to the buffer. This command instructs
118 * the GPU to issue an interrupt when the command, pipeline flush and
119 * memory write are completed.
120 *
121 * 7. Inform the hardware of the additional commands added to the buffer
122 * (by updating the tail pointer).
123 *
2889caa9
CW
124 * Processing an execbuf ioctl is conceptually split up into a few phases.
125 *
126 * 1. Validation - Ensure all the pointers, handles and flags are valid.
127 * 2. Reservation - Assign GPU address space for every object
128 * 3. Relocation - Update any addresses to point to the final locations
129 * 4. Serialisation - Order the request with respect to its dependencies
130 * 5. Construction - Construct a request to execute the batchbuffer
131 * 6. Submission (at some point in the future execution)
132 *
133 * Reserving resources for the execbuf is the most complicated phase. We
134 * neither want to have to migrate the object in the address space, nor do
135 * we want to have to update any relocations pointing to this object. Ideally,
136 * we want to leave the object where it is and for all the existing relocations
137 * to match. If the object is given a new address, or if userspace thinks the
138 * object is elsewhere, we have to parse all the relocation entries and update
139 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
140 * all the target addresses in all of its objects match the value in the
141 * relocation entries and that they all match the presumed offsets given by the
142 * list of execbuffer objects. Using this knowledge, we know that if we haven't
143 * moved any buffers, all the relocation entries are valid and we can skip
144 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
145 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
146 *
147 * The addresses written in the objects must match the corresponding
148 * reloc.presumed_offset which in turn must match the corresponding
149 * execobject.offset.
150 *
151 * Any render targets written to in the batch must be flagged with
152 * EXEC_OBJECT_WRITE.
153 *
154 * To avoid stalling, execobject.offset should match the current
155 * address of that object within the active context.
156 *
157 * The reservation is done is multiple phases. First we try and keep any
158 * object already bound in its current location - so as long as meets the
159 * constraints imposed by the new execbuffer. Any object left unbound after the
160 * first pass is then fitted into any available idle space. If an object does
161 * not fit, all objects are removed from the reservation and the process rerun
162 * after sorting the objects into a priority order (more difficult to fit
163 * objects are tried first). Failing that, the entire VM is cleared and we try
164 * to fit the execbuf once last time before concluding that it simply will not
165 * fit.
166 *
167 * A small complication to all of this is that we allow userspace not only to
168 * specify an alignment and a size for the object in the address space, but
169 * we also allow userspace to specify the exact offset. This objects are
170 * simpler to place (the location is known a priori) all we have to do is make
171 * sure the space is available.
172 *
173 * Once all the objects are in place, patching up the buried pointers to point
174 * to the final locations is a fairly simple job of walking over the relocation
175 * entry arrays, looking up the right address and rewriting the value into
176 * the object. Simple! ... The relocation entries are stored in user memory
177 * and so to access them we have to copy them into a local buffer. That copy
178 * has to avoid taking any pagefaults as they may lead back to a GEM object
179 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
180 * the relocation into multiple passes. First we try to do everything within an
181 * atomic context (avoid the pagefaults) which requires that we never wait. If
182 * we detect that we may wait, or if we need to fault, then we have to fallback
183 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
184 * bells yet?) Dropping the mutex means that we lose all the state we have
185 * built up so far for the execbuf and we must reset any global data. However,
186 * we do leave the objects pinned in their final locations - which is a
187 * potential issue for concurrent execbufs. Once we have left the mutex, we can
188 * allocate and copy all the relocation entries into a large array at our
189 * leisure, reacquire the mutex, reclaim all the objects and other state and
190 * then proceed to update any incorrect addresses with the objects.
191 *
192 * As we process the relocation entries, we maintain a record of whether the
193 * object is being written to. Using NORELOC, we expect userspace to provide
194 * this information instead. We also check whether we can skip the relocation
195 * by comparing the expected value inside the relocation entry with the target's
196 * final address. If they differ, we have to map the current object and rewrite
197 * the 4 or 8 byte pointer within.
198 *
199 * Serialising an execbuf is quite simple according to the rules of the GEM
200 * ABI. Execution within each context is ordered by the order of submission.
201 * Writes to any GEM object are in order of submission and are exclusive. Reads
202 * from a GEM object are unordered with respect to other reads, but ordered by
203 * writes. A write submitted after a read cannot occur before the read, and
204 * similarly any read submitted after a write cannot occur before the write.
205 * Writes are ordered between engines such that only one write occurs at any
206 * time (completing any reads beforehand) - using semaphores where available
207 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
208 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
209 * reads before starting, and any read (either using set-domain or pread) must
210 * flush all GPU writes before starting. (Note we only employ a barrier before,
211 * we currently rely on userspace not concurrently starting a new execution
212 * whilst reading or writing to an object. This may be an advantage or not
213 * depending on how much you trust userspace not to shoot themselves in the
214 * foot.) Serialisation may just result in the request being inserted into
215 * a DAG awaiting its turn, but most simple is to wait on the CPU until
216 * all dependencies are resolved.
217 *
218 * After all of that, is just a matter of closing the request and handing it to
219 * the hardware (well, leaving it in a queue to be executed). However, we also
220 * offer the ability for batchbuffers to be run with elevated privileges so
221 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
222 * Before any batch is given extra privileges we first must check that it
223 * contains no nefarious instructions, we check that each instruction is from
224 * our whitelist and all registers are also from an allowed list. We first
225 * copy the user's batchbuffer to a shadow (so that the user doesn't have
226 * access to it, either by the CPU or GPU as we scan it) and then parse each
227 * instruction. If everything is ok, we set a flag telling the hardware to run
228 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
229 */
230
13149e8b
LL
231struct eb_fence {
232 struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
233 struct dma_fence *dma_fence;
234 u64 value;
235 struct dma_fence_chain *chain_fence;
236};
237
650bc635 238struct i915_execbuffer {
2889caa9
CW
239 struct drm_i915_private *i915; /** i915 backpointer */
240 struct drm_file *file; /** per-file lookup tables and limits */
241 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
242 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
7d6236bb 243 struct eb_vma *vma;
2889caa9
CW
244
245 struct intel_engine_cs *engine; /** engine to queue the request to */
8f2a1057
CW
246 struct intel_context *context; /* logical state for the request */
247 struct i915_gem_context *gem_context; /** caller's context */
2889caa9 248
e61e0f51 249 struct i915_request *request; /** our request to build */
7d6236bb 250 struct eb_vma *batch; /** identity of the batch obj/vma */
32d94048 251 struct i915_vma *trampoline; /** trampoline used for chaining */
2889caa9
CW
252
253 /** actual size of execobj[] as we may extend it for the cmdparser */
254 unsigned int buffer_count;
255
256 /** list of vma not yet bound during reservation phase */
257 struct list_head unbound;
258
259 /** list of vma that have execobj.relocation_count */
260 struct list_head relocs;
261
c43ce123
ML
262 struct i915_gem_ww_ctx ww;
263
2889caa9
CW
264 /**
265 * Track the most recently used object for relocations, as we
266 * frequently have to perform multiple relocations within the same
267 * obj/page
268 */
650bc635 269 struct reloc_cache {
2889caa9 270 struct drm_mm_node node; /** temporary GTT binding */
ad5d95e4
DA
271 unsigned long vaddr; /** Current kmap address */
272 unsigned long page; /** Currently mapped page index */
7dd4f672 273 unsigned int gen; /** Cached value of INTEL_GEN */
650bc635 274 bool use_64bit_reloc : 1;
2889caa9
CW
275 bool has_llc : 1;
276 bool has_fence : 1;
277 bool needs_unfenced : 1;
7dd4f672 278
e61e0f51 279 struct i915_request *rq;
7dd4f672
CW
280 u32 *rq_cmd;
281 unsigned int rq_size;
c43ce123 282 struct intel_gt_buffer_pool_node *pool;
650bc635 283 } reloc_cache;
2889caa9 284
c43ce123 285 struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
2bf541ff 286 struct intel_context *reloc_context;
c43ce123 287
2889caa9
CW
288 u64 invalid_flags; /** Set of execobj.flags that are invalid */
289 u32 context_flags; /** Set of execobj.flags to insert from the ctx */
290
d5e87821 291 u64 batch_len; /** Length of batch within object */
2889caa9 292 u32 batch_start_offset; /** Location within object of batch */
2889caa9 293 u32 batch_flags; /** Flags composed for emit_bb_start() */
c43ce123 294 struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
2889caa9
CW
295
296 /**
297 * Indicate either the size of the hastable used to resolve
298 * relocation handles, or if negative that we are using a direct
299 * index into the execobj[].
300 */
301 int lut_size;
302 struct hlist_head *buckets; /** ht for relocation handles */
cda9edd0 303
13149e8b
LL
304 struct eb_fence *fences;
305 unsigned long num_fences;
67731b87
CW
306};
307
8e4ba491 308static int eb_parse(struct i915_execbuffer *eb);
2bf541ff
ML
309static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
310 bool throttle);
311static void eb_unpin_engine(struct i915_execbuffer *eb);
8e4ba491 312
3dbf26ed
CW
313static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
314{
311a50e7 315 return intel_engine_requires_cmd_parser(eb->engine) ||
435e8fc0
JB
316 (intel_engine_using_cmd_parser(eb->engine) &&
317 eb->args->batch_len);
3dbf26ed
CW
318}
319
650bc635 320static int eb_create(struct i915_execbuffer *eb)
67731b87 321{
2889caa9
CW
322 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
323 unsigned int size = 1 + ilog2(eb->buffer_count);
4ff4b44c 324
2889caa9
CW
325 /*
326 * Without a 1:1 association between relocation handles and
327 * the execobject[] index, we instead create a hashtable.
328 * We size it dynamically based on available memory, starting
329 * first with 1:1 assocative hash and scaling back until
330 * the allocation succeeds.
331 *
332 * Later on we use a positive lut_size to indicate we are
333 * using this hashtable, and a negative value to indicate a
334 * direct lookup.
335 */
4ff4b44c 336 do {
0d95c883 337 gfp_t flags;
4d470f73
CW
338
339 /* While we can still reduce the allocation size, don't
340 * raise a warning and allow the allocation to fail.
341 * On the last pass though, we want to try as hard
342 * as possible to perform the allocation and warn
343 * if it fails.
344 */
0ee931c4 345 flags = GFP_KERNEL;
4d470f73
CW
346 if (size > 1)
347 flags |= __GFP_NORETRY | __GFP_NOWARN;
348
4ff4b44c 349 eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
4d470f73 350 flags);
4ff4b44c
CW
351 if (eb->buckets)
352 break;
353 } while (--size);
354
8ae275c2 355 if (unlikely(!size))
4d470f73 356 return -ENOMEM;
eef90ccb 357
2889caa9 358 eb->lut_size = size;
650bc635 359 } else {
2889caa9 360 eb->lut_size = -eb->buffer_count;
650bc635 361 }
eef90ccb 362
650bc635 363 return 0;
67731b87
CW
364}
365
2889caa9
CW
366static bool
367eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
c7c6e46f
CW
368 const struct i915_vma *vma,
369 unsigned int flags)
2889caa9 370{
2889caa9
CW
371 if (vma->node.size < entry->pad_to_size)
372 return true;
373
374 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
375 return true;
376
c7c6e46f 377 if (flags & EXEC_OBJECT_PINNED &&
2889caa9
CW
378 vma->node.start != entry->offset)
379 return true;
380
c7c6e46f 381 if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
2889caa9
CW
382 vma->node.start < BATCH_OFFSET_BIAS)
383 return true;
384
c7c6e46f 385 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
5f22cc0b 386 (vma->node.start + vma->node.size + 4095) >> 32)
2889caa9
CW
387 return true;
388
1d033beb
CW
389 if (flags & __EXEC_OBJECT_NEEDS_MAP &&
390 !i915_vma_is_map_and_fenceable(vma))
391 return true;
392
2889caa9
CW
393 return false;
394}
395
8a338f4b
CW
396static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
397 unsigned int exec_flags)
398{
399 u64 pin_flags = 0;
400
401 if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
402 pin_flags |= PIN_GLOBAL;
403
404 /*
405 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
406 * limit address to the first 4GBs for unflagged objects.
407 */
408 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
409 pin_flags |= PIN_ZONE_4G;
410
411 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
412 pin_flags |= PIN_MAPPABLE;
413
414 if (exec_flags & EXEC_OBJECT_PINNED)
415 pin_flags |= entry->offset | PIN_OFFSET_FIXED;
416 else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
417 pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
418
419 return pin_flags;
420}
421
c7c6e46f 422static inline bool
2889caa9 423eb_pin_vma(struct i915_execbuffer *eb,
c7c6e46f 424 const struct drm_i915_gem_exec_object2 *entry,
7d6236bb 425 struct eb_vma *ev)
2889caa9 426{
7d6236bb 427 struct i915_vma *vma = ev->vma;
c7c6e46f 428 u64 pin_flags;
2889caa9 429
616d9cee 430 if (vma->node.size)
c7c6e46f 431 pin_flags = vma->node.start;
616d9cee 432 else
c7c6e46f 433 pin_flags = entry->offset & PIN_OFFSET_MASK;
616d9cee 434
c7c6e46f 435 pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
7d6236bb 436 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
c7c6e46f 437 pin_flags |= PIN_GLOBAL;
616d9cee 438
8a338f4b 439 /* Attempt to reuse the current location if available */
47b08693
ML
440 /* TODO: Add -EDEADLK handling here */
441 if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) {
8a338f4b
CW
442 if (entry->flags & EXEC_OBJECT_PINNED)
443 return false;
444
445 /* Failing that pick any _free_ space if suitable */
47b08693
ML
446 if (unlikely(i915_vma_pin_ww(vma, &eb->ww,
447 entry->pad_to_size,
448 entry->alignment,
449 eb_pin_flags(entry, ev->flags) |
450 PIN_USER | PIN_NOEVICT)))
8a338f4b
CW
451 return false;
452 }
2889caa9 453
7d6236bb 454 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
3bd40735 455 if (unlikely(i915_vma_pin_fence(vma))) {
2889caa9 456 i915_vma_unpin(vma);
c7c6e46f 457 return false;
2889caa9
CW
458 }
459
3bd40735 460 if (vma->fence)
7d6236bb 461 ev->flags |= __EXEC_OBJECT_HAS_FENCE;
2889caa9
CW
462 }
463
7d6236bb
CW
464 ev->flags |= __EXEC_OBJECT_HAS_PIN;
465 return !eb_vma_misplaced(entry, vma, ev->flags);
2889caa9
CW
466}
467
8ae275c2
ML
468static inline void
469eb_unreserve_vma(struct eb_vma *ev)
470{
471 if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
472 return;
473
c43ce123
ML
474 if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
475 __i915_vma_unpin_fence(ev->vma);
476
477 __i915_vma_unpin(ev->vma);
8ae275c2
ML
478 ev->flags &= ~__EXEC_OBJECT_RESERVED;
479}
480
2889caa9
CW
481static int
482eb_validate_vma(struct i915_execbuffer *eb,
483 struct drm_i915_gem_exec_object2 *entry,
484 struct i915_vma *vma)
67731b87 485{
2889caa9
CW
486 if (unlikely(entry->flags & eb->invalid_flags))
487 return -EINVAL;
d55495b4 488
2920516b
MA
489 if (unlikely(entry->alignment &&
490 !is_power_of_2_u64(entry->alignment)))
2889caa9
CW
491 return -EINVAL;
492
493 /*
494 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
495 * any non-page-aligned or non-canonical addresses.
496 */
497 if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
6fc4e48f 498 entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
2889caa9
CW
499 return -EINVAL;
500
501 /* pad_to_size was once a reserved field, so sanitize it */
502 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
503 if (unlikely(offset_in_page(entry->pad_to_size)))
504 return -EINVAL;
505 } else {
506 entry->pad_to_size = 0;
d55495b4 507 }
2889caa9
CW
508 /*
509 * From drm_mm perspective address space is continuous,
510 * so from this point we're always using non-canonical
511 * form internally.
512 */
513 entry->offset = gen8_noncanonical_addr(entry->offset);
514
c7c6e46f
CW
515 if (!eb->reloc_cache.has_fence) {
516 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
517 } else {
518 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
519 eb->reloc_cache.needs_unfenced) &&
520 i915_gem_object_is_tiled(vma->obj))
521 entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
522 }
523
524 if (!(entry->flags & EXEC_OBJECT_PINNED))
525 entry->flags |= eb->context_flags;
526
2889caa9 527 return 0;
67731b87
CW
528}
529
003d8b91 530static void
746c8f14
CW
531eb_add_vma(struct i915_execbuffer *eb,
532 unsigned int i, unsigned batch_idx,
533 struct i915_vma *vma)
59bfa124 534{
c7c6e46f 535 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
7d6236bb 536 struct eb_vma *ev = &eb->vma[i];
2889caa9 537
93159e12 538 ev->vma = vma;
7d6236bb
CW
539 ev->exec = entry;
540 ev->flags = entry->flags;
541
4d470f73 542 if (eb->lut_size > 0) {
7d6236bb
CW
543 ev->handle = entry->handle;
544 hlist_add_head(&ev->node,
2889caa9
CW
545 &eb->buckets[hash_32(entry->handle,
546 eb->lut_size)]);
4ff4b44c 547 }
59bfa124 548
2889caa9 549 if (entry->relocation_count)
7d6236bb 550 list_add_tail(&ev->reloc_link, &eb->relocs);
2889caa9 551
746c8f14
CW
552 /*
553 * SNA is doing fancy tricks with compressing batch buffers, which leads
554 * to negative relocation deltas. Usually that works out ok since the
555 * relocate address is still positive, except when the batch is placed
556 * very low in the GTT. Ensure this doesn't happen.
557 *
558 * Note that actual hangs have only been observed on gen7, but for
559 * paranoia do it everywhere.
560 */
561 if (i == batch_idx) {
827db9d8 562 if (entry->relocation_count &&
7d6236bb
CW
563 !(ev->flags & EXEC_OBJECT_PINNED))
564 ev->flags |= __EXEC_OBJECT_NEEDS_BIAS;
746c8f14 565 if (eb->reloc_cache.has_fence)
7d6236bb 566 ev->flags |= EXEC_OBJECT_NEEDS_FENCE;
746c8f14 567
7d6236bb 568 eb->batch = ev;
746c8f14 569 }
2889caa9
CW
570}
571
ad5d95e4
DA
572static inline int use_cpu_reloc(const struct reloc_cache *cache,
573 const struct drm_i915_gem_object *obj)
574{
575 if (!i915_gem_object_has_struct_page(obj))
576 return false;
577
578 if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
579 return true;
580
581 if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
582 return false;
583
584 return (cache->has_llc ||
585 obj->cache_dirty ||
586 obj->cache_level != I915_CACHE_NONE);
587}
588
47b08693 589static int eb_reserve_vma(struct i915_execbuffer *eb,
7d6236bb 590 struct eb_vma *ev,
2920bb94 591 u64 pin_flags)
2889caa9 592{
7d6236bb 593 struct drm_i915_gem_exec_object2 *entry = ev->exec;
7d6236bb 594 struct i915_vma *vma = ev->vma;
2889caa9
CW
595 int err;
596
003d8b91
CW
597 if (drm_mm_node_allocated(&vma->node) &&
598 eb_vma_misplaced(entry, vma, ev->flags)) {
599 err = i915_vma_unbind(vma);
600 if (err)
601 return err;
602 }
603
47b08693 604 err = i915_vma_pin_ww(vma, &eb->ww,
c7c6e46f 605 entry->pad_to_size, entry->alignment,
8a338f4b 606 eb_pin_flags(entry, ev->flags) | pin_flags);
2889caa9
CW
607 if (err)
608 return err;
609
610 if (entry->offset != vma->node.start) {
611 entry->offset = vma->node.start | UPDATE;
612 eb->args->flags |= __EXEC_HAS_RELOC;
613 }
614
8a338f4b 615 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
3bd40735 616 err = i915_vma_pin_fence(vma);
2889caa9
CW
617 if (unlikely(err)) {
618 i915_vma_unpin(vma);
619 return err;
620 }
621
3bd40735 622 if (vma->fence)
8a338f4b 623 ev->flags |= __EXEC_OBJECT_HAS_FENCE;
2889caa9
CW
624 }
625
8a338f4b 626 ev->flags |= __EXEC_OBJECT_HAS_PIN;
7d6236bb 627 GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
1da7b54c 628
2889caa9
CW
629 return 0;
630}
631
632static int eb_reserve(struct i915_execbuffer *eb)
633{
634 const unsigned int count = eb->buffer_count;
2920bb94 635 unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
2889caa9 636 struct list_head last;
7d6236bb 637 struct eb_vma *ev;
2889caa9 638 unsigned int i, pass;
ef398881 639 int err = 0;
2889caa9
CW
640
641 /*
642 * Attempt to pin all of the buffers into the GTT.
643 * This is done in 3 phases:
644 *
645 * 1a. Unbind all objects that do not match the GTT constraints for
646 * the execbuffer (fenceable, mappable, alignment etc).
647 * 1b. Increment pin count for already bound objects.
648 * 2. Bind new objects.
649 * 3. Decrement pin count.
650 *
651 * This avoid unnecessary unbinding of later objects in order to make
652 * room for the earlier objects *unless* we need to defragment.
653 */
2889caa9 654 pass = 0;
2889caa9 655 do {
7d6236bb
CW
656 list_for_each_entry(ev, &eb->unbound, bind_link) {
657 err = eb_reserve_vma(eb, ev, pin_flags);
2889caa9
CW
658 if (err)
659 break;
660 }
fd1500fc 661 if (err != -ENOSPC)
c43ce123 662 return err;
2889caa9
CW
663
664 /* Resort *all* the objects into priority order */
665 INIT_LIST_HEAD(&eb->unbound);
666 INIT_LIST_HEAD(&last);
667 for (i = 0; i < count; i++) {
7d6236bb 668 unsigned int flags;
2889caa9 669
7d6236bb
CW
670 ev = &eb->vma[i];
671 flags = ev->flags;
c7c6e46f
CW
672 if (flags & EXEC_OBJECT_PINNED &&
673 flags & __EXEC_OBJECT_HAS_PIN)
2889caa9
CW
674 continue;
675
7d6236bb 676 eb_unreserve_vma(ev);
2889caa9 677
c7c6e46f 678 if (flags & EXEC_OBJECT_PINNED)
35e882a4 679 /* Pinned must have their slot */
7d6236bb 680 list_add(&ev->bind_link, &eb->unbound);
c7c6e46f 681 else if (flags & __EXEC_OBJECT_NEEDS_MAP)
35e882a4 682 /* Map require the lowest 256MiB (aperture) */
7d6236bb 683 list_add_tail(&ev->bind_link, &eb->unbound);
35e882a4
CW
684 else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
685 /* Prioritise 4GiB region for restricted bo */
7d6236bb 686 list_add(&ev->bind_link, &last);
2889caa9 687 else
7d6236bb 688 list_add_tail(&ev->bind_link, &last);
2889caa9
CW
689 }
690 list_splice_tail(&last, &eb->unbound);
691
692 switch (pass++) {
693 case 0:
694 break;
695
696 case 1:
697 /* Too fragmented, unbind everything and retry */
2850748e 698 mutex_lock(&eb->context->vm->mutex);
f5d974f9 699 err = i915_gem_evict_vm(eb->context->vm);
2850748e 700 mutex_unlock(&eb->context->vm->mutex);
2889caa9 701 if (err)
c43ce123 702 return err;
2889caa9
CW
703 break;
704
705 default:
c43ce123 706 return -ENOSPC;
2889caa9 707 }
2920bb94
CW
708
709 pin_flags = PIN_USER;
2889caa9 710 } while (1);
4ff4b44c 711}
59bfa124 712
2889caa9
CW
713static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
714{
1a71cf2f
CW
715 if (eb->args->flags & I915_EXEC_BATCH_FIRST)
716 return 0;
717 else
718 return eb->buffer_count - 1;
2889caa9
CW
719}
720
721static int eb_select_context(struct i915_execbuffer *eb)
722{
723 struct i915_gem_context *ctx;
724
725 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
1acfc104
CW
726 if (unlikely(!ctx))
727 return -ENOENT;
2889caa9 728
8f2a1057 729 eb->gem_context = ctx;
a4e7ccda 730 if (rcu_access_pointer(ctx->vm))
4f2c7337 731 eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
2889caa9
CW
732
733 eb->context_flags = 0;
d3f3e5e4 734 if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
2889caa9
CW
735 eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
736
737 return 0;
738}
739
93159e12
CW
740static int __eb_add_lut(struct i915_execbuffer *eb,
741 u32 handle, struct i915_vma *vma)
3b96eff4 742{
93159e12
CW
743 struct i915_gem_context *ctx = eb->gem_context;
744 struct i915_lut_handle *lut;
2889caa9 745 int err;
3b96eff4 746
93159e12
CW
747 lut = i915_lut_handle_alloc();
748 if (unlikely(!lut))
749 return -ENOMEM;
750
751 i915_vma_get(vma);
752 if (!atomic_fetch_inc(&vma->open_count))
753 i915_vma_reopen(vma);
754 lut->handle = handle;
755 lut->ctx = ctx;
756
757 /* Check that the context hasn't been closed in the meantime */
758 err = -EINTR;
f7ce8639
CW
759 if (!mutex_lock_interruptible(&ctx->lut_mutex)) {
760 struct i915_address_space *vm = rcu_access_pointer(ctx->vm);
761
762 if (unlikely(vm && vma->vm != vm))
763 err = -EAGAIN; /* user racing with ctx set-vm */
764 else if (likely(!i915_gem_context_is_closed(ctx)))
93159e12 765 err = radix_tree_insert(&ctx->handles_vma, handle, vma);
f7ce8639
CW
766 else
767 err = -ENOENT;
93159e12
CW
768 if (err == 0) { /* And nor has this handle */
769 struct drm_i915_gem_object *obj = vma->obj;
770
096a42dd 771 spin_lock(&obj->lut_lock);
93159e12
CW
772 if (idr_find(&eb->file->object_idr, handle) == obj) {
773 list_add(&lut->obj_link, &obj->lut_list);
774 } else {
775 radix_tree_delete(&ctx->handles_vma, handle);
776 err = -ENOENT;
777 }
096a42dd 778 spin_unlock(&obj->lut_lock);
93159e12 779 }
f7ce8639 780 mutex_unlock(&ctx->lut_mutex);
93159e12
CW
781 }
782 if (unlikely(err))
783 goto err;
003d8b91 784
93159e12 785 return 0;
d55495b4 786
93159e12 787err:
50689771 788 i915_vma_close(vma);
93159e12
CW
789 i915_vma_put(vma);
790 i915_lut_handle_free(lut);
791 return err;
792}
746c8f14 793
93159e12
CW
794static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
795{
f7ce8639
CW
796 struct i915_address_space *vm = eb->context->vm;
797
93159e12
CW
798 do {
799 struct drm_i915_gem_object *obj;
170fa29b 800 struct i915_vma *vma;
93159e12 801 int err;
4ff4b44c 802
93159e12
CW
803 rcu_read_lock();
804 vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
f7ce8639 805 if (likely(vma && vma->vm == vm))
93159e12
CW
806 vma = i915_vma_tryget(vma);
807 rcu_read_unlock();
808 if (likely(vma))
809 return vma;
4ff4b44c 810
170fa29b 811 obj = i915_gem_object_lookup(eb->file, handle);
93159e12
CW
812 if (unlikely(!obj))
813 return ERR_PTR(-ENOENT);
3b96eff4 814
f7ce8639 815 vma = i915_vma_instance(obj, vm, NULL);
772b5408 816 if (IS_ERR(vma)) {
93159e12
CW
817 i915_gem_object_put(obj);
818 return vma;
27173f1f
BW
819 }
820
93159e12
CW
821 err = __eb_add_lut(eb, handle, vma);
822 if (likely(!err))
823 return vma;
d1b48c1e 824
93159e12
CW
825 i915_gem_object_put(obj);
826 if (err != -EEXIST)
827 return ERR_PTR(err);
828 } while (1);
829}
4ff4b44c 830
93159e12
CW
831static int eb_lookup_vmas(struct i915_execbuffer *eb)
832{
8e4ba491 833 struct drm_i915_private *i915 = eb->i915;
93159e12
CW
834 unsigned int batch = eb_batch_index(eb);
835 unsigned int i;
836 int err = 0;
155ab883 837
93159e12 838 INIT_LIST_HEAD(&eb->relocs);
93159e12
CW
839
840 for (i = 0; i < eb->buffer_count; i++) {
841 struct i915_vma *vma;
842
843 vma = eb_lookup_vma(eb, eb->exec[i].handle);
844 if (IS_ERR(vma)) {
845 err = PTR_ERR(vma);
8e4ba491 846 goto err;
93159e12 847 }
d1b48c1e 848
003d8b91 849 err = eb_validate_vma(eb, &eb->exec[i], vma);
93159e12
CW
850 if (unlikely(err)) {
851 i915_vma_put(vma);
8e4ba491 852 goto err;
93159e12 853 }
dade2a61 854
003d8b91 855 eb_add_vma(eb, i, batch, vma);
4ff4b44c
CW
856 }
857
8e4ba491
ML
858 if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
859 drm_dbg(&i915->drm,
860 "Attempting to use self-modifying batch buffer\n");
861 return -EINVAL;
862 }
863
864 if (range_overflows_t(u64,
865 eb->batch_start_offset, eb->batch_len,
866 eb->batch->vma->size)) {
867 drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
868 return -EINVAL;
869 }
870
871 if (eb->batch_len == 0)
872 eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
d5e87821
CW
873 if (unlikely(eb->batch_len == 0)) { /* impossible! */
874 drm_dbg(&i915->drm, "Invalid batch length\n");
875 return -EINVAL;
876 }
8e4ba491
ML
877
878 return 0;
879
880err:
7d6236bb 881 eb->vma[i].vma = NULL;
2889caa9 882 return err;
3b96eff4
CW
883}
884
c43ce123
ML
885static int eb_validate_vmas(struct i915_execbuffer *eb)
886{
887 unsigned int i;
888 int err;
889
890 INIT_LIST_HEAD(&eb->unbound);
891
892 for (i = 0; i < eb->buffer_count; i++) {
893 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
894 struct eb_vma *ev = &eb->vma[i];
895 struct i915_vma *vma = ev->vma;
896
897 err = i915_gem_object_lock(vma->obj, &eb->ww);
898 if (err)
899 return err;
900
901 if (eb_pin_vma(eb, entry, ev)) {
902 if (entry->offset != vma->node.start) {
903 entry->offset = vma->node.start | UPDATE;
904 eb->args->flags |= __EXEC_HAS_RELOC;
905 }
906 } else {
907 eb_unreserve_vma(ev);
908
909 list_add_tail(&ev->bind_link, &eb->unbound);
910 if (drm_mm_node_allocated(&vma->node)) {
911 err = i915_vma_unbind(vma);
912 if (err)
913 return err;
914 }
915 }
916
917 GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
918 eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
919 }
920
921 if (!list_empty(&eb->unbound))
922 return eb_reserve(eb);
923
924 return 0;
925}
926
7d6236bb 927static struct eb_vma *
2889caa9 928eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
67731b87 929{
2889caa9
CW
930 if (eb->lut_size < 0) {
931 if (handle >= -eb->lut_size)
eef90ccb 932 return NULL;
7d6236bb 933 return &eb->vma[handle];
eef90ccb
CW
934 } else {
935 struct hlist_head *head;
7d6236bb 936 struct eb_vma *ev;
67731b87 937
2889caa9 938 head = &eb->buckets[hash_32(handle, eb->lut_size)];
7d6236bb
CW
939 hlist_for_each_entry(ev, head, node) {
940 if (ev->handle == handle)
941 return ev;
eef90ccb
CW
942 }
943 return NULL;
944 }
67731b87
CW
945}
946
2bf541ff 947static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
8ae275c2
ML
948{
949 const unsigned int count = eb->buffer_count;
950 unsigned int i;
951
952 for (i = 0; i < count; i++) {
953 struct eb_vma *ev = &eb->vma[i];
954 struct i915_vma *vma = ev->vma;
955
956 if (!vma)
957 break;
958
c43ce123 959 eb_unreserve_vma(ev);
8ae275c2 960
c43ce123
ML
961 if (final)
962 i915_vma_put(vma);
8ae275c2 963 }
2bf541ff
ML
964
965 eb_unpin_engine(eb);
8ae275c2
ML
966}
967
2889caa9 968static void eb_destroy(const struct i915_execbuffer *eb)
934acce3 969{
7dd4f672
CW
970 GEM_BUG_ON(eb->reloc_cache.rq);
971
4d470f73 972 if (eb->lut_size > 0)
2889caa9 973 kfree(eb->buckets);
934acce3
MW
974}
975
2889caa9 976static inline u64
d50415cc 977relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
2889caa9 978 const struct i915_vma *target)
934acce3 979{
2889caa9 980 return gen8_canonical_addr((int)reloc->delta + target->node.start);
934acce3
MW
981}
982
c43ce123
ML
983static void reloc_cache_clear(struct reloc_cache *cache)
984{
985 cache->rq = NULL;
986 cache->rq_cmd = NULL;
987 cache->pool = NULL;
988 cache->rq_size = 0;
989}
990
d50415cc
CW
991static void reloc_cache_init(struct reloc_cache *cache,
992 struct drm_i915_private *i915)
5032d871 993{
ad5d95e4
DA
994 cache->page = -1;
995 cache->vaddr = 0;
dfc5148f 996 /* Must be a variable in the struct to allow GCC to unroll. */
7dd4f672 997 cache->gen = INTEL_GEN(i915);
2889caa9 998 cache->has_llc = HAS_LLC(i915);
dfc5148f 999 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
7dd4f672
CW
1000 cache->has_fence = cache->gen < 4;
1001 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
4ee92c71 1002 cache->node.flags = 0;
c43ce123 1003 reloc_cache_clear(cache);
d50415cc 1004}
5032d871 1005
20561da3
DA
1006static inline void *unmask_page(unsigned long p)
1007{
1008 return (void *)(uintptr_t)(p & PAGE_MASK);
1009}
1010
1011static inline unsigned int unmask_flags(unsigned long p)
1012{
1013 return p & ~PAGE_MASK;
1014}
1015
1016#define KMAP 0x4 /* after CLFLUSH_FLAGS */
1017
1018static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
1019{
1020 struct drm_i915_private *i915 =
1021 container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
1022 return &i915->ggtt;
1023}
1024
c43ce123 1025static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
964a9b0f 1026{
c43ce123
ML
1027 if (!cache->pool)
1028 return;
964a9b0f 1029
c43ce123
ML
1030 /*
1031 * This is a bit nasty, normally we keep objects locked until the end
1032 * of execbuffer, but we already submit this, and have to unlock before
1033 * dropping the reference. Fortunately we can only hold 1 pool node at
1034 * a time, so this should be harmless.
1035 */
1036 i915_gem_ww_unlock_single(cache->pool->obj);
1037 intel_gt_buffer_pool_put(cache->pool);
1038 cache->pool = NULL;
964a9b0f
CW
1039}
1040
c43ce123 1041static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
7dd4f672 1042{
50ae6c61 1043 struct drm_i915_gem_object *obj = cache->rq->batch->obj;
a679f58d 1044
50ae6c61
ML
1045 GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
1046 cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
a679f58d 1047
50ae6c61
ML
1048 __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
1049 i915_gem_object_unpin_map(obj);
7dd4f672 1050
50ae6c61 1051 intel_gt_chipset_flush(cache->rq->engine->gt);
964a9b0f 1052
50ae6c61 1053 i915_request_add(cache->rq);
c43ce123
ML
1054 reloc_cache_put_pool(eb, cache);
1055 reloc_cache_clear(cache);
0e97fbb0 1056
c43ce123 1057 eb->reloc_pool = NULL;
7dd4f672
CW
1058}
1059
c43ce123 1060static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
ad5d95e4
DA
1061{
1062 void *vaddr;
1063
50ae6c61 1064 if (cache->rq)
c43ce123 1065 reloc_gpu_flush(eb, cache);
50ae6c61 1066
ad5d95e4
DA
1067 if (!cache->vaddr)
1068 return;
1069
1070 vaddr = unmask_page(cache->vaddr);
1071 if (cache->vaddr & KMAP) {
1af343cd
ML
1072 struct drm_i915_gem_object *obj =
1073 (struct drm_i915_gem_object *)cache->node.mm;
ad5d95e4
DA
1074 if (cache->vaddr & CLFLUSH_AFTER)
1075 mb();
1076
1077 kunmap_atomic(vaddr);
1af343cd 1078 i915_gem_object_finish_access(obj);
ad5d95e4
DA
1079 } else {
1080 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1081
1082 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1083 io_mapping_unmap_atomic((void __iomem *)vaddr);
1084
1085 if (drm_mm_node_allocated(&cache->node)) {
1086 ggtt->vm.clear_range(&ggtt->vm,
1087 cache->node.start,
1088 cache->node.size);
1089 mutex_lock(&ggtt->vm.mutex);
1090 drm_mm_remove_node(&cache->node);
1091 mutex_unlock(&ggtt->vm.mutex);
1092 } else {
1093 i915_vma_unpin((struct i915_vma *)cache->node.mm);
1094 }
1095 }
1096
1097 cache->vaddr = 0;
1098 cache->page = -1;
1099}
1100
1101static void *reloc_kmap(struct drm_i915_gem_object *obj,
1102 struct reloc_cache *cache,
102a0a90 1103 unsigned long pageno)
ad5d95e4
DA
1104{
1105 void *vaddr;
102a0a90 1106 struct page *page;
ad5d95e4
DA
1107
1108 if (cache->vaddr) {
1109 kunmap_atomic(unmask_page(cache->vaddr));
1110 } else {
1111 unsigned int flushes;
1112 int err;
1113
1114 err = i915_gem_object_prepare_write(obj, &flushes);
1115 if (err)
1116 return ERR_PTR(err);
1117
1118 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
1119 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
1120
1121 cache->vaddr = flushes | KMAP;
1122 cache->node.mm = (void *)obj;
1123 if (flushes)
1124 mb();
1125 }
1126
102a0a90
ML
1127 page = i915_gem_object_get_page(obj, pageno);
1128 if (!obj->mm.dirty)
1129 set_page_dirty(page);
1130
1131 vaddr = kmap_atomic(page);
ad5d95e4 1132 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
102a0a90 1133 cache->page = pageno;
ad5d95e4
DA
1134
1135 return vaddr;
1136}
1137
1138static void *reloc_iomap(struct drm_i915_gem_object *obj,
47b08693 1139 struct i915_execbuffer *eb,
ad5d95e4
DA
1140 unsigned long page)
1141{
47b08693 1142 struct reloc_cache *cache = &eb->reloc_cache;
ad5d95e4
DA
1143 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1144 unsigned long offset;
1145 void *vaddr;
1146
1147 if (cache->vaddr) {
1148 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1149 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
1150 } else {
1151 struct i915_vma *vma;
1152 int err;
1153
1154 if (i915_gem_object_is_tiled(obj))
1155 return ERR_PTR(-EINVAL);
1156
1157 if (use_cpu_reloc(cache, obj))
1158 return NULL;
1159
ad5d95e4 1160 err = i915_gem_object_set_to_gtt_domain(obj, true);
ad5d95e4
DA
1161 if (err)
1162 return ERR_PTR(err);
1163
47b08693
ML
1164 vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
1165 PIN_MAPPABLE |
1166 PIN_NONBLOCK /* NOWARN */ |
1167 PIN_NOEVICT);
1168 if (vma == ERR_PTR(-EDEADLK))
1169 return vma;
1170
ad5d95e4
DA
1171 if (IS_ERR(vma)) {
1172 memset(&cache->node, 0, sizeof(cache->node));
1173 mutex_lock(&ggtt->vm.mutex);
1174 err = drm_mm_insert_node_in_range
1175 (&ggtt->vm.mm, &cache->node,
1176 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
1177 0, ggtt->mappable_end,
1178 DRM_MM_INSERT_LOW);
1179 mutex_unlock(&ggtt->vm.mutex);
1180 if (err) /* no inactive aperture space, use cpu reloc */
1181 return NULL;
1182 } else {
1183 cache->node.start = vma->node.start;
1184 cache->node.mm = (void *)vma;
1185 }
1186 }
1187
1188 offset = cache->node.start;
1189 if (drm_mm_node_allocated(&cache->node)) {
1190 ggtt->vm.insert_page(&ggtt->vm,
1191 i915_gem_object_get_dma_address(obj, page),
1192 offset, I915_CACHE_NONE, 0);
1193 } else {
1194 offset += page << PAGE_SHIFT;
1195 }
1196
1197 vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1198 offset);
1199 cache->page = page;
1200 cache->vaddr = (unsigned long)vaddr;
1201
1202 return vaddr;
1203}
1204
1205static void *reloc_vaddr(struct drm_i915_gem_object *obj,
47b08693 1206 struct i915_execbuffer *eb,
ad5d95e4
DA
1207 unsigned long page)
1208{
47b08693 1209 struct reloc_cache *cache = &eb->reloc_cache;
ad5d95e4
DA
1210 void *vaddr;
1211
1212 if (cache->page == page) {
1213 vaddr = unmask_page(cache->vaddr);
1214 } else {
1215 vaddr = NULL;
1216 if ((cache->vaddr & KMAP) == 0)
47b08693 1217 vaddr = reloc_iomap(obj, eb, page);
ad5d95e4
DA
1218 if (!vaddr)
1219 vaddr = reloc_kmap(obj, cache, page);
1220 }
1221
1222 return vaddr;
1223}
1224
1225static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1226{
1227 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
1228 if (flushes & CLFLUSH_BEFORE) {
1229 clflushopt(addr);
1230 mb();
1231 }
1232
1233 *addr = value;
1234
1235 /*
1236 * Writes to the same cacheline are serialised by the CPU
1237 * (including clflush). On the write path, we only require
1238 * that it hits memory in an orderly fashion and place
1239 * mb barriers at the start and end of the relocation phase
1240 * to ensure ordering of clflush wrt to the system.
1241 */
1242 if (flushes & CLFLUSH_AFTER)
1243 clflushopt(addr);
1244 } else
1245 *addr = value;
1246}
1247
6951e589
CW
1248static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
1249{
1250 struct drm_i915_gem_object *obj = vma->obj;
1251 int err;
1252
c43ce123 1253 assert_vma_held(vma);
6951e589
CW
1254
1255 if (obj->cache_dirty & ~obj->cache_coherent)
1256 i915_gem_clflush_object(obj, 0);
1257 obj->write_domain = 0;
1258
1259 err = i915_request_await_object(rq, vma->obj, true);
1260 if (err == 0)
1261 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1262
6951e589
CW
1263 return err;
1264}
1265
7dd4f672 1266static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
6f576d62 1267 struct intel_engine_cs *engine,
50ae6c61 1268 struct i915_vma *vma,
7dd4f672
CW
1269 unsigned int len)
1270{
1271 struct reloc_cache *cache = &eb->reloc_cache;
c43ce123 1272 struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
e61e0f51 1273 struct i915_request *rq;
7dd4f672
CW
1274 struct i915_vma *batch;
1275 u32 *cmd;
1276 int err;
1277
c43ce123
ML
1278 if (!pool) {
1279 pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
1280 if (IS_ERR(pool))
1281 return PTR_ERR(pool);
1282 }
1283 eb->reloc_pool = NULL;
1284
1285 err = i915_gem_object_lock(pool->obj, &eb->ww);
1286 if (err)
1287 goto err_pool;
7dd4f672 1288
b40d7378 1289 cmd = i915_gem_object_pin_map(pool->obj,
a575c676
CW
1290 cache->has_llc ?
1291 I915_MAP_FORCE_WB :
1292 I915_MAP_FORCE_WC);
b40d7378
CW
1293 if (IS_ERR(cmd)) {
1294 err = PTR_ERR(cmd);
c43ce123 1295 goto err_pool;
b40d7378 1296 }
7dd4f672 1297
50ae6c61 1298 batch = i915_vma_instance(pool->obj, vma->vm, NULL);
7dd4f672
CW
1299 if (IS_ERR(batch)) {
1300 err = PTR_ERR(batch);
1301 goto err_unmap;
1302 }
1303
47b08693 1304 err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK);
7dd4f672
CW
1305 if (err)
1306 goto err_unmap;
1307
6f576d62
CW
1308 if (engine == eb->context->engine) {
1309 rq = i915_request_create(eb->context);
1310 } else {
2bf541ff 1311 struct intel_context *ce = eb->reloc_context;
6f576d62 1312
2bf541ff
ML
1313 if (!ce) {
1314 ce = intel_context_create(engine);
1315 if (IS_ERR(ce)) {
1316 err = PTR_ERR(ce);
1317 goto err_unpin;
1318 }
1319
1320 i915_vm_put(ce->vm);
1321 ce->vm = i915_vm_get(eb->context->vm);
1322 eb->reloc_context = ce;
6f576d62
CW
1323 }
1324
47b08693 1325 err = intel_context_pin_ww(ce, &eb->ww);
2bf541ff
ML
1326 if (err)
1327 goto err_unpin;
6f576d62 1328
2bf541ff
ML
1329 rq = i915_request_create(ce);
1330 intel_context_unpin(ce);
6f576d62 1331 }
7dd4f672
CW
1332 if (IS_ERR(rq)) {
1333 err = PTR_ERR(rq);
1334 goto err_unpin;
1335 }
1336
16e87459 1337 err = intel_gt_buffer_pool_mark_active(pool, rq);
b40d7378
CW
1338 if (err)
1339 goto err_request;
1340
50ae6c61
ML
1341 err = reloc_move_to_gpu(rq, vma);
1342 if (err)
1343 goto err_request;
1344
1345 err = eb->engine->emit_bb_start(rq,
1346 batch->node.start, PAGE_SIZE,
1347 cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
1348 if (err)
1349 goto skip_request;
1350
c43ce123 1351 assert_vma_held(batch);
70d6894d
CW
1352 err = i915_request_await_object(rq, batch->obj, false);
1353 if (err == 0)
1354 err = i915_vma_move_to_active(batch, rq, 0);
a5236978
CW
1355 if (err)
1356 goto skip_request;
7dd4f672
CW
1357
1358 rq->batch = batch;
a5236978 1359 i915_vma_unpin(batch);
7dd4f672
CW
1360
1361 cache->rq = rq;
1362 cache->rq_cmd = cmd;
1363 cache->rq_size = 0;
c43ce123 1364 cache->pool = pool;
7dd4f672
CW
1365
1366 /* Return with batch mapping (cmd) still pinned */
c43ce123 1367 return 0;
7dd4f672 1368
a5236978 1369skip_request:
36e191f0 1370 i915_request_set_error_once(rq, err);
7dd4f672 1371err_request:
e61e0f51 1372 i915_request_add(rq);
7dd4f672
CW
1373err_unpin:
1374 i915_vma_unpin(batch);
1375err_unmap:
b40d7378 1376 i915_gem_object_unpin_map(pool->obj);
c43ce123
ML
1377err_pool:
1378 eb->reloc_pool = pool;
7dd4f672
CW
1379 return err;
1380}
1381
e3d29130
CW
1382static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
1383{
1384 return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
1385}
1386
7dd4f672
CW
1387static u32 *reloc_gpu(struct i915_execbuffer *eb,
1388 struct i915_vma *vma,
1389 unsigned int len)
1390{
1391 struct reloc_cache *cache = &eb->reloc_cache;
1392 u32 *cmd;
50ae6c61
ML
1393
1394 if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
c43ce123 1395 reloc_gpu_flush(eb, cache);
7dd4f672
CW
1396
1397 if (unlikely(!cache->rq)) {
50ae6c61 1398 int err;
6f576d62
CW
1399 struct intel_engine_cs *engine = eb->engine;
1400
e3d29130 1401 if (!reloc_can_use_engine(engine)) {
6f576d62 1402 engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
e3d29130 1403 if (!engine)
6f576d62
CW
1404 return ERR_PTR(-ENODEV);
1405 }
90cad095 1406
50ae6c61 1407 err = __reloc_gpu_alloc(eb, engine, vma, len);
7dd4f672
CW
1408 if (unlikely(err))
1409 return ERR_PTR(err);
1410 }
1411
1412 cmd = cache->rq_cmd + cache->rq_size;
1413 cache->rq_size += len;
1414
1415 return cmd;
1416}
1417
ad5d95e4
DA
1418static inline bool use_reloc_gpu(struct i915_vma *vma)
1419{
1420 if (DBG_FORCE_RELOC == FORCE_GPU_RELOC)
1421 return true;
1422
1423 if (DBG_FORCE_RELOC)
1424 return false;
1425
1426 return !dma_resv_test_signaled_rcu(vma->resv, true);
1427}
1428
e3d29130 1429static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
edf4427b 1430{
e3d29130
CW
1431 struct page *page;
1432 unsigned long addr;
edf4427b 1433
e3d29130 1434 GEM_BUG_ON(vma->pages != vma->obj->mm.pages);
7dd4f672 1435
e3d29130
CW
1436 page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT);
1437 addr = PFN_PHYS(page_to_pfn(page));
1438 GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */
7dd4f672 1439
e3d29130
CW
1440 return addr + offset_in_page(offset);
1441}
1442
166774a2 1443static int __reloc_entry_gpu(struct i915_execbuffer *eb,
ad5d95e4
DA
1444 struct i915_vma *vma,
1445 u64 offset,
1446 u64 target_addr)
e3d29130
CW
1447{
1448 const unsigned int gen = eb->reloc_cache.gen;
1449 unsigned int len;
1450 u32 *batch;
1451 u64 addr;
1452
1453 if (gen >= 8)
1454 len = offset & 7 ? 8 : 5;
1455 else if (gen >= 4)
1456 len = 4;
1457 else
1458 len = 3;
1459
1460 batch = reloc_gpu(eb, vma, len);
c43ce123 1461 if (batch == ERR_PTR(-EDEADLK))
166774a2 1462 return -EDEADLK;
c43ce123 1463 else if (IS_ERR(batch))
ad5d95e4 1464 return false;
e3d29130
CW
1465
1466 addr = gen8_canonical_addr(vma->node.start + offset);
1467 if (gen >= 8) {
1468 if (offset & 7) {
1469 *batch++ = MI_STORE_DWORD_IMM_GEN4;
1470 *batch++ = lower_32_bits(addr);
1471 *batch++ = upper_32_bits(addr);
1472 *batch++ = lower_32_bits(target_addr);
1473
1474 addr = gen8_canonical_addr(addr + 4);
7dd4f672 1475
7dd4f672 1476 *batch++ = MI_STORE_DWORD_IMM_GEN4;
e3d29130
CW
1477 *batch++ = lower_32_bits(addr);
1478 *batch++ = upper_32_bits(addr);
1479 *batch++ = upper_32_bits(target_addr);
7dd4f672 1480 } else {
e3d29130
CW
1481 *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
1482 *batch++ = lower_32_bits(addr);
1483 *batch++ = upper_32_bits(addr);
1484 *batch++ = lower_32_bits(target_addr);
1485 *batch++ = upper_32_bits(target_addr);
7dd4f672 1486 }
e3d29130
CW
1487 } else if (gen >= 6) {
1488 *batch++ = MI_STORE_DWORD_IMM_GEN4;
1489 *batch++ = 0;
1490 *batch++ = addr;
1491 *batch++ = target_addr;
1492 } else if (IS_I965G(eb->i915)) {
1493 *batch++ = MI_STORE_DWORD_IMM_GEN4;
1494 *batch++ = 0;
1495 *batch++ = vma_phys_addr(vma, offset);
1496 *batch++ = target_addr;
1497 } else if (gen >= 4) {
1498 *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1499 *batch++ = 0;
1500 *batch++ = addr;
1501 *batch++ = target_addr;
1502 } else if (gen >= 3 &&
1503 !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) {
1504 *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
1505 *batch++ = addr;
1506 *batch++ = target_addr;
1507 } else {
1508 *batch++ = MI_STORE_DWORD_IMM;
1509 *batch++ = vma_phys_addr(vma, offset);
1510 *batch++ = target_addr;
7dd4f672
CW
1511 }
1512
ad5d95e4
DA
1513 return true;
1514}
1515
c43ce123 1516static int reloc_entry_gpu(struct i915_execbuffer *eb,
ad5d95e4
DA
1517 struct i915_vma *vma,
1518 u64 offset,
1519 u64 target_addr)
1520{
1521 if (eb->reloc_cache.vaddr)
1522 return false;
1523
1524 if (!use_reloc_gpu(vma))
1525 return false;
1526
1527 return __reloc_entry_gpu(eb, vma, offset, target_addr);
e3d29130
CW
1528}
1529
1530static u64
ad5d95e4 1531relocate_entry(struct i915_vma *vma,
e3d29130 1532 const struct drm_i915_gem_relocation_entry *reloc,
ad5d95e4 1533 struct i915_execbuffer *eb,
e3d29130
CW
1534 const struct i915_vma *target)
1535{
1536 u64 target_addr = relocation_target(reloc, target);
ad5d95e4 1537 u64 offset = reloc->offset;
c43ce123
ML
1538 int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
1539
1540 if (reloc_gpu < 0)
1541 return reloc_gpu;
ad5d95e4 1542
c43ce123 1543 if (!reloc_gpu) {
ad5d95e4
DA
1544 bool wide = eb->reloc_cache.use_64bit_reloc;
1545 void *vaddr;
1546
1547repeat:
47b08693 1548 vaddr = reloc_vaddr(vma->obj, eb,
ad5d95e4
DA
1549 offset >> PAGE_SHIFT);
1550 if (IS_ERR(vaddr))
1551 return PTR_ERR(vaddr);
1552
1553 GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
1554 clflush_write32(vaddr + offset_in_page(offset),
1555 lower_32_bits(target_addr),
1556 eb->reloc_cache.vaddr);
1557
1558 if (wide) {
1559 offset += sizeof(u32);
1560 target_addr >>= 32;
1561 wide = false;
1562 goto repeat;
1563 }
1564 }
edf4427b 1565
2889caa9 1566 return target->node.start | UPDATE;
edf4427b 1567}
edf4427b 1568
2889caa9
CW
1569static u64
1570eb_relocate_entry(struct i915_execbuffer *eb,
7d6236bb 1571 struct eb_vma *ev,
2889caa9 1572 const struct drm_i915_gem_relocation_entry *reloc)
54cf91dc 1573{
baa89ba3 1574 struct drm_i915_private *i915 = eb->i915;
7d6236bb 1575 struct eb_vma *target;
2889caa9 1576 int err;
54cf91dc 1577
67731b87 1578 /* we've already hold a reference to all valid objects */
507d977f
CW
1579 target = eb_get_vma(eb, reloc->target_handle);
1580 if (unlikely(!target))
54cf91dc 1581 return -ENOENT;
e844b990 1582
54cf91dc 1583 /* Validate that the target is in a valid r/w GPU domain */
b8f7ab17 1584 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
baa89ba3 1585 drm_dbg(&i915->drm, "reloc with multiple write domains: "
507d977f 1586 "target %d offset %d "
54cf91dc 1587 "read %08x write %08x",
507d977f 1588 reloc->target_handle,
54cf91dc
CW
1589 (int) reloc->offset,
1590 reloc->read_domains,
1591 reloc->write_domain);
8b78f0e5 1592 return -EINVAL;
54cf91dc 1593 }
4ca4a250
DV
1594 if (unlikely((reloc->write_domain | reloc->read_domains)
1595 & ~I915_GEM_GPU_DOMAINS)) {
baa89ba3 1596 drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: "
507d977f 1597 "target %d offset %d "
54cf91dc 1598 "read %08x write %08x",
507d977f 1599 reloc->target_handle,
54cf91dc
CW
1600 (int) reloc->offset,
1601 reloc->read_domains,
1602 reloc->write_domain);
8b78f0e5 1603 return -EINVAL;
54cf91dc 1604 }
54cf91dc 1605
2889caa9 1606 if (reloc->write_domain) {
7d6236bb 1607 target->flags |= EXEC_OBJECT_WRITE;
507d977f 1608
2889caa9
CW
1609 /*
1610 * Sandybridge PPGTT errata: We need a global gtt mapping
1611 * for MI and pipe_control writes because the gpu doesn't
1612 * properly redirect them through the ppgtt for non_secure
1613 * batchbuffers.
1614 */
1615 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
cf819eff 1616 IS_GEN(eb->i915, 6)) {
7d6236bb
CW
1617 err = i915_vma_bind(target->vma,
1618 target->vma->obj->cache_level,
2850748e 1619 PIN_GLOBAL, NULL);
ea97c4ca 1620 if (err)
2889caa9
CW
1621 return err;
1622 }
507d977f 1623 }
54cf91dc 1624
2889caa9
CW
1625 /*
1626 * If the relocation already has the right value in it, no
54cf91dc
CW
1627 * more work needs to be done.
1628 */
ad5d95e4
DA
1629 if (!DBG_FORCE_RELOC &&
1630 gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
67731b87 1631 return 0;
54cf91dc
CW
1632
1633 /* Check that the relocation address is valid... */
3c94ceee 1634 if (unlikely(reloc->offset >
7d6236bb 1635 ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
baa89ba3 1636 drm_dbg(&i915->drm, "Relocation beyond object bounds: "
507d977f
CW
1637 "target %d offset %d size %d.\n",
1638 reloc->target_handle,
1639 (int)reloc->offset,
7d6236bb 1640 (int)ev->vma->size);
8b78f0e5 1641 return -EINVAL;
54cf91dc 1642 }
b8f7ab17 1643 if (unlikely(reloc->offset & 3)) {
baa89ba3 1644 drm_dbg(&i915->drm, "Relocation not 4-byte aligned: "
507d977f
CW
1645 "target %d offset %d.\n",
1646 reloc->target_handle,
1647 (int)reloc->offset);
8b78f0e5 1648 return -EINVAL;
54cf91dc
CW
1649 }
1650
071750e5
CW
1651 /*
1652 * If we write into the object, we need to force the synchronisation
1653 * barrier, either with an asynchronous clflush or if we executed the
1654 * patching using the GPU (though that should be serialised by the
1655 * timeline). To be completely sure, and since we are required to
1656 * do relocations we are already stalling, disable the user's opt
0519bcb1 1657 * out of our synchronisation.
071750e5 1658 */
7d6236bb 1659 ev->flags &= ~EXEC_OBJECT_ASYNC;
071750e5 1660
54cf91dc 1661 /* and update the user's relocation entry */
ad5d95e4 1662 return relocate_entry(ev->vma, reloc, eb, target->vma);
54cf91dc
CW
1663}
1664
7d6236bb 1665static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
54cf91dc 1666{
1d83f442 1667#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
2889caa9 1668 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
7d6236bb 1669 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
e94f7856
CW
1670 struct drm_i915_gem_relocation_entry __user *urelocs =
1671 u64_to_user_ptr(entry->relocs_ptr);
1672 unsigned long remain = entry->relocation_count;
54cf91dc 1673
e94f7856 1674 if (unlikely(remain > N_RELOC(ULONG_MAX)))
2889caa9 1675 return -EINVAL;
ebc0808f 1676
2889caa9
CW
1677 /*
1678 * We must check that the entire relocation array is safe
1679 * to read. However, if the array is not writable the user loses
1680 * the updated relocation values.
1681 */
e94f7856 1682 if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs))))
2889caa9
CW
1683 return -EFAULT;
1684
1685 do {
1686 struct drm_i915_gem_relocation_entry *r = stack;
1687 unsigned int count =
e94f7856 1688 min_t(unsigned long, remain, ARRAY_SIZE(stack));
2889caa9 1689 unsigned int copied;
1d83f442 1690
2889caa9
CW
1691 /*
1692 * This is the fast path and we cannot handle a pagefault
ebc0808f
CW
1693 * whilst holding the struct mutex lest the user pass in the
1694 * relocations contained within a mmaped bo. For in such a case
1695 * we, the page fault handler would call i915_gem_fault() and
1696 * we would try to acquire the struct mutex again. Obviously
1697 * this is bad and so lockdep complains vehemently.
1698 */
fd1500fc
ML
1699 pagefault_disable();
1700 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1701 pagefault_enable();
ad5d95e4
DA
1702 if (unlikely(copied)) {
1703 remain = -EFAULT;
1704 goto out;
1705 }
54cf91dc 1706
2889caa9 1707 remain -= count;
1d83f442 1708 do {
7d6236bb 1709 u64 offset = eb_relocate_entry(eb, ev, r);
54cf91dc 1710
2889caa9
CW
1711 if (likely(offset == 0)) {
1712 } else if ((s64)offset < 0) {
ad5d95e4
DA
1713 remain = (int)offset;
1714 goto out;
2889caa9
CW
1715 } else {
1716 /*
1717 * Note that reporting an error now
1718 * leaves everything in an inconsistent
1719 * state as we have *already* changed
1720 * the relocation value inside the
1721 * object. As we have not changed the
1722 * reloc.presumed_offset or will not
1723 * change the execobject.offset, on the
1724 * call we may not rewrite the value
1725 * inside the object, leaving it
1726 * dangling and causing a GPU hang. Unless
1727 * userspace dynamically rebuilds the
1728 * relocations on each execbuf rather than
1729 * presume a static tree.
1730 *
1731 * We did previously check if the relocations
1732 * were writable (access_ok), an error now
1733 * would be a strange race with mprotect,
1734 * having already demonstrated that we
1735 * can read from this userspace address.
1736 */
1737 offset = gen8_canonical_addr(offset & ~UPDATE);
97a37c91
CW
1738 __put_user(offset,
1739 &urelocs[r - stack].presumed_offset);
1d83f442 1740 }
2889caa9
CW
1741 } while (r++, --count);
1742 urelocs += ARRAY_SIZE(stack);
1743 } while (remain);
ad5d95e4 1744out:
c43ce123 1745 reloc_cache_reset(&eb->reloc_cache, eb);
ad5d95e4 1746 return remain;
54cf91dc
CW
1747}
1748
fd1500fc
ML
1749static int
1750eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
54cf91dc 1751{
fd1500fc
ML
1752 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1753 struct drm_i915_gem_relocation_entry *relocs =
1754 u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1755 unsigned int i;
003d8b91
CW
1756 int err;
1757
fd1500fc
ML
1758 for (i = 0; i < entry->relocation_count; i++) {
1759 u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
003d8b91 1760
fd1500fc
ML
1761 if ((s64)offset < 0) {
1762 err = (int)offset;
1763 goto err;
1764 }
ef398881 1765 }
fd1500fc
ML
1766 err = 0;
1767err:
c43ce123 1768 reloc_cache_reset(&eb->reloc_cache, eb);
fd1500fc
ML
1769 return err;
1770}
2889caa9 1771
fd1500fc
ML
1772static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1773{
1774 const char __user *addr, *end;
1775 unsigned long size;
1776 char __maybe_unused c;
2889caa9 1777
fd1500fc
ML
1778 size = entry->relocation_count;
1779 if (size == 0)
1780 return 0;
0e97fbb0 1781
fd1500fc
ML
1782 if (size > N_RELOC(ULONG_MAX))
1783 return -EINVAL;
2889caa9 1784
fd1500fc
ML
1785 addr = u64_to_user_ptr(entry->relocs_ptr);
1786 size *= sizeof(struct drm_i915_gem_relocation_entry);
1787 if (!access_ok(addr, size))
1788 return -EFAULT;
1789
1790 end = addr + size;
1791 for (; addr < end; addr += PAGE_SIZE) {
1792 int err = __get_user(c, addr);
1793 if (err)
1794 return err;
1795 }
1796 return __get_user(c, end - 1);
2889caa9
CW
1797}
1798
fd1500fc 1799static int eb_copy_relocations(const struct i915_execbuffer *eb)
2889caa9 1800{
fd1500fc 1801 struct drm_i915_gem_relocation_entry *relocs;
2889caa9
CW
1802 const unsigned int count = eb->buffer_count;
1803 unsigned int i;
fd1500fc 1804 int err;
54cf91dc 1805
2889caa9 1806 for (i = 0; i < count; i++) {
fd1500fc
ML
1807 const unsigned int nreloc = eb->exec[i].relocation_count;
1808 struct drm_i915_gem_relocation_entry __user *urelocs;
1809 unsigned long size;
1810 unsigned long copied;
6951e589 1811
fd1500fc
ML
1812 if (nreloc == 0)
1813 continue;
6951e589 1814
fd1500fc
ML
1815 err = check_relocations(&eb->exec[i]);
1816 if (err)
1817 goto err;
6951e589 1818
fd1500fc
ML
1819 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
1820 size = nreloc * sizeof(*relocs);
6951e589 1821
fd1500fc
ML
1822 relocs = kvmalloc_array(size, 1, GFP_KERNEL);
1823 if (!relocs) {
1824 err = -ENOMEM;
1825 goto err;
6951e589 1826 }
fd1500fc
ML
1827
1828 /* copy_from_user is limited to < 4GiB */
1829 copied = 0;
1830 do {
1831 unsigned int len =
1832 min_t(u64, BIT_ULL(31), size - copied);
1833
1834 if (__copy_from_user((char *)relocs + copied,
1835 (char __user *)urelocs + copied,
1836 len))
1837 goto end;
1838
1839 copied += len;
1840 } while (copied < size);
1841
1842 /*
1843 * As we do not update the known relocation offsets after
1844 * relocating (due to the complexities in lock handling),
1845 * we need to mark them as invalid now so that we force the
1846 * relocation processing next time. Just in case the target
1847 * object is evicted and then rebound into its old
1848 * presumed_offset before the next execbuffer - if that
1849 * happened we would make the mistake of assuming that the
1850 * relocations were valid.
1851 */
1852 if (!user_access_begin(urelocs, size))
1853 goto end;
1854
1855 for (copied = 0; copied < nreloc; copied++)
1856 unsafe_put_user(-1,
1857 &urelocs[copied].presumed_offset,
1858 end_user);
1859 user_access_end();
1860
1861 eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1862 }
1863
1864 return 0;
1865
1866end_user:
1867 user_access_end();
1868end:
1869 kvfree(relocs);
1870 err = -EFAULT;
1871err:
1872 while (i--) {
1873 relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1874 if (eb->exec[i].relocation_count)
1875 kvfree(relocs);
1876 }
1877 return err;
1878}
1879
1880static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1881{
1882 const unsigned int count = eb->buffer_count;
1883 unsigned int i;
1884
1885 for (i = 0; i < count; i++) {
1886 int err;
1887
1888 err = check_relocations(&eb->exec[i]);
1889 if (err)
1890 return err;
1891 }
1892
1893 return 0;
1894}
1895
2bf541ff
ML
1896static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
1897 struct i915_request *rq)
fd1500fc
ML
1898{
1899 bool have_copy = false;
1900 struct eb_vma *ev;
1901 int err = 0;
1902
1903repeat:
1904 if (signal_pending(current)) {
1905 err = -ERESTARTSYS;
1906 goto out;
6951e589 1907 }
fd1500fc 1908
c43ce123
ML
1909 /* We may process another execbuffer during the unlock... */
1910 eb_release_vmas(eb, false);
1911 i915_gem_ww_ctx_fini(&eb->ww);
1912
2bf541ff
ML
1913 if (rq) {
1914 /* nonblocking is always false */
1915 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
1916 MAX_SCHEDULE_TIMEOUT) < 0) {
1917 i915_request_put(rq);
1918 rq = NULL;
1919
1920 err = -EINTR;
1921 goto err_relock;
1922 }
1923
1924 i915_request_put(rq);
1925 rq = NULL;
1926 }
1927
fd1500fc
ML
1928 /*
1929 * We take 3 passes through the slowpatch.
1930 *
1931 * 1 - we try to just prefault all the user relocation entries and
1932 * then attempt to reuse the atomic pagefault disabled fast path again.
1933 *
1934 * 2 - we copy the user entries to a local buffer here outside of the
1935 * local and allow ourselves to wait upon any rendering before
1936 * relocations
1937 *
1938 * 3 - we already have a local copy of the relocation entries, but
1939 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1940 */
1941 if (!err) {
1942 err = eb_prefault_relocations(eb);
1943 } else if (!have_copy) {
1944 err = eb_copy_relocations(eb);
1945 have_copy = err == 0;
1946 } else {
1947 cond_resched();
1948 err = 0;
1949 }
1950
2bf541ff
ML
1951 if (!err)
1952 flush_workqueue(eb->i915->mm.userptr_wq);
fd1500fc 1953
2bf541ff 1954err_relock:
c43ce123 1955 i915_gem_ww_ctx_init(&eb->ww, true);
fd1500fc
ML
1956 if (err)
1957 goto out;
1958
c43ce123
ML
1959 /* reacquire the objects */
1960repeat_validate:
2bf541ff
ML
1961 rq = eb_pin_engine(eb, false);
1962 if (IS_ERR(rq)) {
1963 err = PTR_ERR(rq);
47b08693 1964 rq = NULL;
2bf541ff
ML
1965 goto err;
1966 }
1967
1968 /* We didn't throttle, should be NULL */
1969 GEM_WARN_ON(rq);
1970
c43ce123 1971 err = eb_validate_vmas(eb);
fd1500fc 1972 if (err)
c43ce123
ML
1973 goto err;
1974
1975 GEM_BUG_ON(!eb->batch);
fd1500fc
ML
1976
1977 list_for_each_entry(ev, &eb->relocs, reloc_link) {
1978 if (!have_copy) {
1979 pagefault_disable();
1980 err = eb_relocate_vma(eb, ev);
1981 pagefault_enable();
1982 if (err)
1983 break;
1984 } else {
1985 err = eb_relocate_vma_slow(eb, ev);
1986 if (err)
1987 break;
1988 }
1989 }
1990
c43ce123
ML
1991 if (err == -EDEADLK)
1992 goto err;
1993
fd1500fc
ML
1994 if (err && !have_copy)
1995 goto repeat;
1996
1997 if (err)
1998 goto err;
1999
8e4ba491
ML
2000 /* as last step, parse the command buffer */
2001 err = eb_parse(eb);
2002 if (err)
2003 goto err;
2004
fd1500fc
ML
2005 /*
2006 * Leave the user relocations as are, this is the painfully slow path,
2007 * and we want to avoid the complication of dropping the lock whilst
2008 * having buffers reserved in the aperture and so causing spurious
2009 * ENOSPC for random operations.
2010 */
2011
2012err:
c43ce123
ML
2013 if (err == -EDEADLK) {
2014 eb_release_vmas(eb, false);
2015 err = i915_gem_ww_ctx_backoff(&eb->ww);
2016 if (!err)
2017 goto repeat_validate;
2018 }
2019
fd1500fc
ML
2020 if (err == -EAGAIN)
2021 goto repeat;
2022
2023out:
2024 if (have_copy) {
2025 const unsigned int count = eb->buffer_count;
2026 unsigned int i;
2027
2028 for (i = 0; i < count; i++) {
2029 const struct drm_i915_gem_exec_object2 *entry =
2030 &eb->exec[i];
2031 struct drm_i915_gem_relocation_entry *relocs;
2032
2033 if (!entry->relocation_count)
2034 continue;
2035
2036 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
2037 kvfree(relocs);
2038 }
2039 }
2040
2bf541ff
ML
2041 if (rq)
2042 i915_request_put(rq);
2043
fd1500fc
ML
2044 return err;
2045}
2046
8e4ba491 2047static int eb_relocate_parse(struct i915_execbuffer *eb)
54cf91dc 2048{
003d8b91 2049 int err;
2bf541ff
ML
2050 struct i915_request *rq = NULL;
2051 bool throttle = true;
003d8b91 2052
c43ce123 2053retry:
2bf541ff
ML
2054 rq = eb_pin_engine(eb, throttle);
2055 if (IS_ERR(rq)) {
2056 err = PTR_ERR(rq);
2057 rq = NULL;
2058 if (err != -EDEADLK)
2059 return err;
2060
2061 goto err;
2062 }
2063
2064 if (rq) {
2065 bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
2066
2067 /* Need to drop all locks now for throttling, take slowpath */
2068 err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0);
2069 if (err == -ETIME) {
2070 if (nonblock) {
2071 err = -EWOULDBLOCK;
2072 i915_request_put(rq);
2073 goto err;
2074 }
2075 goto slow;
2076 }
2077 i915_request_put(rq);
2078 rq = NULL;
2079 }
2080
2081 /* only throttle once, even if we didn't need to throttle */
2082 throttle = false;
2083
c43ce123
ML
2084 err = eb_validate_vmas(eb);
2085 if (err == -EAGAIN)
2086 goto slow;
2087 else if (err)
2088 goto err;
2889caa9
CW
2089
2090 /* The objects are in their final locations, apply the relocations. */
2091 if (eb->args->flags & __EXEC_HAS_RELOC) {
7d6236bb 2092 struct eb_vma *ev;
2889caa9 2093
7d6236bb 2094 list_for_each_entry(ev, &eb->relocs, reloc_link) {
7dc8f114
CW
2095 err = eb_relocate_vma(eb, ev);
2096 if (err)
fd1500fc 2097 break;
2889caa9 2098 }
fd1500fc 2099
c43ce123
ML
2100 if (err == -EDEADLK)
2101 goto err;
2102 else if (err)
2103 goto slow;
2104 }
2105
2106 if (!err)
2107 err = eb_parse(eb);
2108
2109err:
2110 if (err == -EDEADLK) {
2111 eb_release_vmas(eb, false);
2112 err = i915_gem_ww_ctx_backoff(&eb->ww);
2113 if (!err)
2114 goto retry;
2889caa9
CW
2115 }
2116
c43ce123
ML
2117 return err;
2118
2119slow:
2bf541ff 2120 err = eb_relocate_parse_slow(eb, rq);
c43ce123
ML
2121 if (err)
2122 /*
2123 * If the user expects the execobject.offset and
2124 * reloc.presumed_offset to be an exact match,
2125 * as for using NO_RELOC, then we cannot update
2126 * the execobject.offset until we have completed
2127 * relocation.
2128 */
2129 eb->args->flags &= ~__EXEC_HAS_RELOC;
2130
2131 return err;
2889caa9
CW
2132}
2133
2889caa9
CW
2134static int eb_move_to_gpu(struct i915_execbuffer *eb)
2135{
2136 const unsigned int count = eb->buffer_count;
c43ce123 2137 unsigned int i = count;
6951e589 2138 int err = 0;
6951e589
CW
2139
2140 while (i--) {
7d6236bb
CW
2141 struct eb_vma *ev = &eb->vma[i];
2142 struct i915_vma *vma = ev->vma;
2143 unsigned int flags = ev->flags;
27173f1f 2144 struct drm_i915_gem_object *obj = vma->obj;
03ade511 2145
6951e589
CW
2146 assert_vma_held(vma);
2147
c7c6e46f 2148 if (flags & EXEC_OBJECT_CAPTURE) {
e61e0f51 2149 struct i915_capture_list *capture;
b0fd47ad
CW
2150
2151 capture = kmalloc(sizeof(*capture), GFP_KERNEL);
6951e589
CW
2152 if (capture) {
2153 capture->next = eb->request->capture_list;
2154 capture->vma = vma;
2155 eb->request->capture_list = capture;
2156 }
b0fd47ad
CW
2157 }
2158
b8f55be6
CW
2159 /*
2160 * If the GPU is not _reading_ through the CPU cache, we need
2161 * to make sure that any writes (both previous GPU writes from
2162 * before a change in snooping levels and normal CPU writes)
2163 * caught in that cache are flushed to main memory.
2164 *
2165 * We want to say
2166 * obj->cache_dirty &&
2167 * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
2168 * but gcc's optimiser doesn't handle that as well and emits
2169 * two jumps instead of one. Maybe one day...
2170 */
2171 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
0f46daa1 2172 if (i915_gem_clflush_object(obj, 0))
c7c6e46f 2173 flags &= ~EXEC_OBJECT_ASYNC;
0f46daa1
CW
2174 }
2175
6951e589
CW
2176 if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
2177 err = i915_request_await_object
2178 (eb->request, obj, flags & EXEC_OBJECT_WRITE);
2179 }
2889caa9 2180
6951e589
CW
2181 if (err == 0)
2182 err = i915_vma_move_to_active(vma, eb->request, flags);
c59a333f 2183 }
0f1dd022 2184
6951e589
CW
2185 if (unlikely(err))
2186 goto err_skip;
2187
dcd79934 2188 /* Unconditionally flush any chipset caches (for streaming writes). */
baea429d 2189 intel_gt_chipset_flush(eb->engine->gt);
2113184c 2190 return 0;
6951e589
CW
2191
2192err_skip:
36e191f0 2193 i915_request_set_error_once(eb->request, err);
6951e589 2194 return err;
54cf91dc
CW
2195}
2196
00aff3f6 2197static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
54cf91dc 2198{
650bc635 2199 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
00aff3f6 2200 return -EINVAL;
ed5982e6 2201
2f5945bc 2202 /* Kernel clipping was a DRI1 misfeature */
cda9edd0
LL
2203 if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
2204 I915_EXEC_USE_EXTENSIONS))) {
cf6e7bac 2205 if (exec->num_cliprects || exec->cliprects_ptr)
00aff3f6 2206 return -EINVAL;
cf6e7bac 2207 }
2f5945bc
CW
2208
2209 if (exec->DR4 == 0xffffffff) {
2210 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
2211 exec->DR4 = 0;
2212 }
2213 if (exec->DR1 || exec->DR4)
00aff3f6 2214 return -EINVAL;
2f5945bc
CW
2215
2216 if ((exec->batch_start_offset | exec->batch_len) & 0x7)
00aff3f6 2217 return -EINVAL;
2f5945bc 2218
00aff3f6 2219 return 0;
54cf91dc
CW
2220}
2221
e61e0f51 2222static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
ae662d31 2223{
73dec95e
TU
2224 u32 *cs;
2225 int i;
ae662d31 2226
5a833995
CW
2227 if (!IS_GEN(rq->engine->i915, 7) || rq->engine->id != RCS0) {
2228 drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n");
9d662da8
DV
2229 return -EINVAL;
2230 }
ae662d31 2231
e61e0f51 2232 cs = intel_ring_begin(rq, 4 * 2 + 2);
73dec95e
TU
2233 if (IS_ERR(cs))
2234 return PTR_ERR(cs);
ae662d31 2235
2889caa9 2236 *cs++ = MI_LOAD_REGISTER_IMM(4);
ae662d31 2237 for (i = 0; i < 4; i++) {
73dec95e
TU
2238 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
2239 *cs++ = 0;
ae662d31 2240 }
2889caa9 2241 *cs++ = MI_NOOP;
e61e0f51 2242 intel_ring_advance(rq, cs);
ae662d31
EA
2243
2244 return 0;
2245}
2246
4f7af194 2247static struct i915_vma *
47b08693
ML
2248shadow_batch_pin(struct i915_execbuffer *eb,
2249 struct drm_i915_gem_object *obj,
32d94048
CW
2250 struct i915_address_space *vm,
2251 unsigned int flags)
4f7af194 2252{
b291ce0a 2253 struct i915_vma *vma;
b291ce0a 2254 int err;
4f7af194 2255
b291ce0a
CW
2256 vma = i915_vma_instance(obj, vm, NULL);
2257 if (IS_ERR(vma))
2258 return vma;
2259
47b08693 2260 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
b291ce0a
CW
2261 if (err)
2262 return ERR_PTR(err);
2263
2264 return vma;
4f7af194
JB
2265}
2266
686c7c35
CW
2267struct eb_parse_work {
2268 struct dma_fence_work base;
2269 struct intel_engine_cs *engine;
2270 struct i915_vma *batch;
2271 struct i915_vma *shadow;
2272 struct i915_vma *trampoline;
c60b93cd
CW
2273 unsigned long batch_offset;
2274 unsigned long batch_length;
686c7c35
CW
2275};
2276
2277static int __eb_parse(struct dma_fence_work *work)
2278{
2279 struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
2280
2281 return intel_engine_cmd_parser(pw->engine,
2282 pw->batch,
2283 pw->batch_offset,
2284 pw->batch_length,
2285 pw->shadow,
2286 pw->trampoline);
2287}
2288
36c8e356
CW
2289static void __eb_parse_release(struct dma_fence_work *work)
2290{
2291 struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
2292
2293 if (pw->trampoline)
2294 i915_active_release(&pw->trampoline->active);
2295 i915_active_release(&pw->shadow->active);
2296 i915_active_release(&pw->batch->active);
2297}
2298
686c7c35
CW
2299static const struct dma_fence_work_ops eb_parse_ops = {
2300 .name = "eb_parse",
2301 .work = __eb_parse,
36c8e356 2302 .release = __eb_parse_release,
686c7c35
CW
2303};
2304
57a78ca4
CW
2305static inline int
2306__parser_mark_active(struct i915_vma *vma,
2307 struct intel_timeline *tl,
2308 struct dma_fence *fence)
2309{
2310 struct intel_gt_buffer_pool_node *node = vma->private;
2311
5d934137 2312 return i915_active_ref(&node->active, tl->fence_context, fence);
57a78ca4
CW
2313}
2314
2315static int
2316parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl)
2317{
2318 int err;
2319
2320 mutex_lock(&tl->mutex);
2321
2322 err = __parser_mark_active(pw->shadow, tl, &pw->base.dma);
2323 if (err)
2324 goto unlock;
2325
2326 if (pw->trampoline) {
2327 err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma);
2328 if (err)
2329 goto unlock;
2330 }
2331
2332unlock:
2333 mutex_unlock(&tl->mutex);
2334 return err;
2335}
2336
686c7c35
CW
2337static int eb_parse_pipeline(struct i915_execbuffer *eb,
2338 struct i915_vma *shadow,
2339 struct i915_vma *trampoline)
2340{
2341 struct eb_parse_work *pw;
2342 int err;
2343
c60b93cd
CW
2344 GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset));
2345 GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length));
2346
686c7c35
CW
2347 pw = kzalloc(sizeof(*pw), GFP_KERNEL);
2348 if (!pw)
2349 return -ENOMEM;
2350
7d6236bb 2351 err = i915_active_acquire(&eb->batch->vma->active);
36c8e356
CW
2352 if (err)
2353 goto err_free;
2354
2355 err = i915_active_acquire(&shadow->active);
2356 if (err)
2357 goto err_batch;
2358
2359 if (trampoline) {
2360 err = i915_active_acquire(&trampoline->active);
2361 if (err)
2362 goto err_shadow;
2363 }
2364
686c7c35
CW
2365 dma_fence_work_init(&pw->base, &eb_parse_ops);
2366
2367 pw->engine = eb->engine;
7d6236bb 2368 pw->batch = eb->batch->vma;
686c7c35
CW
2369 pw->batch_offset = eb->batch_start_offset;
2370 pw->batch_length = eb->batch_len;
2371 pw->shadow = shadow;
2372 pw->trampoline = trampoline;
2373
57a78ca4
CW
2374 /* Mark active refs early for this worker, in case we get interrupted */
2375 err = parser_mark_active(pw, eb->context->timeline);
2376 if (err)
2377 goto err_commit;
2378
686c7c35
CW
2379 err = dma_resv_reserve_shared(pw->batch->resv, 1);
2380 if (err)
c43ce123 2381 goto err_commit;
686c7c35
CW
2382
2383 /* Wait for all writes (and relocs) into the batch to complete */
2384 err = i915_sw_fence_await_reservation(&pw->base.chain,
2385 pw->batch->resv, NULL, false,
2386 0, I915_FENCE_GFP);
2387 if (err < 0)
c43ce123 2388 goto err_commit;
686c7c35
CW
2389
2390 /* Keep the batch alive and unwritten as we parse */
2391 dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
2392
686c7c35 2393 /* Force execution to wait for completion of the parser */
686c7c35 2394 dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
686c7c35 2395
92581f9f 2396 dma_fence_work_commit_imm(&pw->base);
686c7c35
CW
2397 return 0;
2398
57a78ca4
CW
2399err_commit:
2400 i915_sw_fence_set_error_once(&pw->base.chain, err);
2401 dma_fence_work_commit_imm(&pw->base);
2402 return err;
2403
36c8e356
CW
2404err_shadow:
2405 i915_active_release(&shadow->active);
2406err_batch:
7d6236bb 2407 i915_active_release(&eb->batch->vma->active);
36c8e356 2408err_free:
686c7c35
CW
2409 kfree(pw);
2410 return err;
2411}
2412
47b08693
ML
2413static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
2414{
2415 /*
2416 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2417 * batch" bit. Hence we need to pin secure batches into the global gtt.
2418 * hsw should have this fixed, but bdw mucks it up again. */
2419 if (eb->batch_flags & I915_DISPATCH_SECURE)
2420 return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
2421
2422 return NULL;
2423}
2424
51696691 2425static int eb_parse(struct i915_execbuffer *eb)
71745376 2426{
baa89ba3 2427 struct drm_i915_private *i915 = eb->i915;
c43ce123 2428 struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
47b08693 2429 struct i915_vma *shadow, *trampoline, *batch;
d5e87821 2430 unsigned long len;
2889caa9 2431 int err;
71745376 2432
47b08693
ML
2433 if (!eb_use_cmdparser(eb)) {
2434 batch = eb_dispatch_secure(eb, eb->batch->vma);
2435 if (IS_ERR(batch))
2436 return PTR_ERR(batch);
2437
2438 goto secure_batch;
2439 }
51696691 2440
32d94048
CW
2441 len = eb->batch_len;
2442 if (!CMDPARSER_USES_GGTT(eb->i915)) {
2443 /*
2444 * ppGTT backed shadow buffers must be mapped RO, to prevent
2445 * post-scan tampering
2446 */
2447 if (!eb->context->vm->has_read_only) {
baa89ba3
WK
2448 drm_dbg(&i915->drm,
2449 "Cannot prevent post-scan tampering without RO capable vm\n");
32d94048
CW
2450 return -EINVAL;
2451 }
2452 } else {
2453 len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
2454 }
d5e87821
CW
2455 if (unlikely(len < eb->batch_len)) /* last paranoid check of overflow */
2456 return -EINVAL;
32d94048 2457
c43ce123
ML
2458 if (!pool) {
2459 pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
2460 if (IS_ERR(pool))
2461 return PTR_ERR(pool);
2462 eb->batch_pool = pool;
2463 }
71745376 2464
c43ce123
ML
2465 err = i915_gem_object_lock(pool->obj, &eb->ww);
2466 if (err)
2467 goto err;
71745376 2468
47b08693 2469 shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
32d94048
CW
2470 if (IS_ERR(shadow)) {
2471 err = PTR_ERR(shadow);
f8c08d8f 2472 goto err;
51696691 2473 }
32d94048 2474 i915_gem_object_set_readonly(shadow->obj);
57a78ca4 2475 shadow->private = pool;
32d94048
CW
2476
2477 trampoline = NULL;
2478 if (CMDPARSER_USES_GGTT(eb->i915)) {
2479 trampoline = shadow;
2480
47b08693 2481 shadow = shadow_batch_pin(eb, pool->obj,
32d94048
CW
2482 &eb->engine->gt->ggtt->vm,
2483 PIN_GLOBAL);
2484 if (IS_ERR(shadow)) {
2485 err = PTR_ERR(shadow);
2486 shadow = trampoline;
2487 goto err_shadow;
2488 }
57a78ca4 2489 shadow->private = pool;
32d94048
CW
2490
2491 eb->batch_flags |= I915_DISPATCH_SECURE;
2492 }
f8c08d8f 2493
47b08693
ML
2494 batch = eb_dispatch_secure(eb, shadow);
2495 if (IS_ERR(batch)) {
2496 err = PTR_ERR(batch);
2497 goto err_trampoline;
2498 }
2499
686c7c35 2500 err = eb_parse_pipeline(eb, shadow, trampoline);
32d94048 2501 if (err)
47b08693 2502 goto err_unpin_batch;
71745376 2503
7d6236bb 2504 eb->batch = &eb->vma[eb->buffer_count++];
47b08693
ML
2505 eb->batch->vma = i915_vma_get(shadow);
2506 eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
71745376 2507
32d94048 2508 eb->trampoline = trampoline;
4f7af194 2509 eb->batch_start_offset = 0;
4f7af194 2510
47b08693
ML
2511secure_batch:
2512 if (batch) {
2513 eb->batch = &eb->vma[eb->buffer_count++];
2514 eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
2515 eb->batch->vma = i915_vma_get(batch);
2516 }
51696691 2517 return 0;
b40d7378 2518
47b08693
ML
2519err_unpin_batch:
2520 if (batch)
2521 i915_vma_unpin(batch);
32d94048
CW
2522err_trampoline:
2523 if (trampoline)
2524 i915_vma_unpin(trampoline);
2525err_shadow:
2526 i915_vma_unpin(shadow);
b40d7378 2527err:
51696691 2528 return err;
71745376 2529}
5c6c6003 2530
7d6236bb 2531static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
78382593 2532{
2889caa9 2533 int err;
78382593 2534
2889caa9
CW
2535 err = eb_move_to_gpu(eb);
2536 if (err)
2537 return err;
78382593 2538
650bc635 2539 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
2889caa9
CW
2540 err = i915_reset_gen7_sol_offsets(eb->request);
2541 if (err)
2542 return err;
78382593
OM
2543 }
2544
85474441
CW
2545 /*
2546 * After we completed waiting for other engines (using HW semaphores)
2547 * then we can signal that this request/batch is ready to run. This
2548 * allows us to determine if the batch is still waiting on the GPU
2549 * or actually running by checking the breadcrumb.
2550 */
2551 if (eb->engine->emit_init_breadcrumb) {
2552 err = eb->engine->emit_init_breadcrumb(eb->request);
2553 if (err)
2554 return err;
2555 }
2556
2889caa9 2557 err = eb->engine->emit_bb_start(eb->request,
7d6236bb 2558 batch->node.start +
650bc635
CW
2559 eb->batch_start_offset,
2560 eb->batch_len,
2889caa9
CW
2561 eb->batch_flags);
2562 if (err)
2563 return err;
78382593 2564
32d94048
CW
2565 if (eb->trampoline) {
2566 GEM_BUG_ON(eb->batch_start_offset);
2567 err = eb->engine->emit_bb_start(eb->request,
2568 eb->trampoline->node.start +
2569 eb->batch_len,
2570 0, 0);
2571 if (err)
2572 return err;
2573 }
2574
9f3ccd40 2575 if (intel_context_nopreempt(eb->context))
e1c31fb5 2576 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
9cd20ef7 2577
2f5945bc 2578 return 0;
78382593
OM
2579}
2580
d5b2a3a4
CW
2581static int num_vcs_engines(const struct drm_i915_private *i915)
2582{
792592e7 2583 return hweight64(VDBOX_MASK(&i915->gt));
d5b2a3a4
CW
2584}
2585
204bcfef 2586/*
a8ebba75 2587 * Find one BSD ring to dispatch the corresponding BSD command.
c80ff16e 2588 * The engine index is returned.
a8ebba75 2589 */
de1add36 2590static unsigned int
c80ff16e
CW
2591gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
2592 struct drm_file *file)
a8ebba75 2593{
a8ebba75
ZY
2594 struct drm_i915_file_private *file_priv = file->driver_priv;
2595
de1add36 2596 /* Check whether the file_priv has already selected one ring. */
6f633402 2597 if ((int)file_priv->bsd_engine < 0)
1a07e86c
CW
2598 file_priv->bsd_engine =
2599 get_random_int() % num_vcs_engines(dev_priv);
d23db88c 2600
c80ff16e 2601 return file_priv->bsd_engine;
d23db88c
CW
2602}
2603
5e2a0419 2604static const enum intel_engine_id user_ring_map[] = {
8a68d464
CW
2605 [I915_EXEC_DEFAULT] = RCS0,
2606 [I915_EXEC_RENDER] = RCS0,
2607 [I915_EXEC_BLT] = BCS0,
2608 [I915_EXEC_BSD] = VCS0,
2609 [I915_EXEC_VEBOX] = VECS0
de1add36
TU
2610};
2611
2bf541ff 2612static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
e5dadff4
CW
2613{
2614 struct intel_ring *ring = ce->ring;
2615 struct intel_timeline *tl = ce->timeline;
2616 struct i915_request *rq;
2617
2618 /*
2619 * Completely unscientific finger-in-the-air estimates for suitable
2620 * maximum user request size (to avoid blocking) and then backoff.
2621 */
2622 if (intel_ring_update_space(ring) >= PAGE_SIZE)
2623 return NULL;
2624
2625 /*
2626 * Find a request that after waiting upon, there will be at least half
2627 * the ring available. The hysteresis allows us to compete for the
2628 * shared ring and should mean that we sleep less often prior to
2629 * claiming our resources, but not so long that the ring completely
2630 * drains before we can submit our next request.
2631 */
2632 list_for_each_entry(rq, &tl->requests, link) {
2633 if (rq->ring != ring)
2634 continue;
2635
2636 if (__intel_ring_space(rq->postfix,
2637 ring->emit, ring->size) > ring->size / 2)
2638 break;
2639 }
2640 if (&rq->link == &tl->requests)
2641 return NULL; /* weird, we will check again later for real */
2642
2643 return i915_request_get(rq);
2644}
2645
2bf541ff 2646static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
e5dadff4 2647{
2bf541ff 2648 struct intel_context *ce = eb->context;
e5dadff4 2649 struct intel_timeline *tl;
2bf541ff 2650 struct i915_request *rq = NULL;
e5dadff4
CW
2651 int err;
2652
2bf541ff 2653 GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
8f2a1057 2654
9f3ccd40 2655 if (unlikely(intel_context_is_banned(ce)))
2bf541ff 2656 return ERR_PTR(-EIO);
9f3ccd40 2657
8f2a1057
CW
2658 /*
2659 * Pinning the contexts may generate requests in order to acquire
2660 * GGTT space, so do this first before we reserve a seqno for
2661 * ourselves.
2662 */
47b08693 2663 err = intel_context_pin_ww(ce, &eb->ww);
fa9f6681 2664 if (err)
2bf541ff 2665 return ERR_PTR(err);
8f2a1057 2666
a4e57f90
CW
2667 /*
2668 * Take a local wakeref for preparing to dispatch the execbuf as
2669 * we expect to access the hardware fairly frequently in the
2670 * process, and require the engine to be kept awake between accesses.
2671 * Upon dispatch, we acquire another prolonged wakeref that we hold
2672 * until the timeline is idle, which in turn releases the wakeref
2673 * taken on the engine, and the parent device.
2674 */
e5dadff4
CW
2675 tl = intel_context_timeline_lock(ce);
2676 if (IS_ERR(tl)) {
2bf541ff
ML
2677 intel_context_unpin(ce);
2678 return ERR_CAST(tl);
e5dadff4 2679 }
a4e57f90
CW
2680
2681 intel_context_enter(ce);
2bf541ff
ML
2682 if (throttle)
2683 rq = eb_throttle(eb, ce);
e5dadff4
CW
2684 intel_context_timeline_unlock(tl);
2685
2bf541ff
ML
2686 eb->args->flags |= __EXEC_ENGINE_PINNED;
2687 return rq;
8f2a1057
CW
2688}
2689
e5dadff4 2690static void eb_unpin_engine(struct i915_execbuffer *eb)
8f2a1057 2691{
a4e57f90 2692 struct intel_context *ce = eb->context;
75d0a7f3 2693 struct intel_timeline *tl = ce->timeline;
a4e57f90 2694
2bf541ff
ML
2695 if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
2696 return;
2697
2698 eb->args->flags &= ~__EXEC_ENGINE_PINNED;
2699
a4e57f90
CW
2700 mutex_lock(&tl->mutex);
2701 intel_context_exit(ce);
2702 mutex_unlock(&tl->mutex);
2703
2850748e 2704 intel_context_unpin(ce);
8f2a1057 2705}
de1add36 2706
5e2a0419 2707static unsigned int
b49a7d51 2708eb_select_legacy_ring(struct i915_execbuffer *eb)
de1add36 2709{
8f2a1057 2710 struct drm_i915_private *i915 = eb->i915;
b49a7d51 2711 struct drm_i915_gem_execbuffer2 *args = eb->args;
de1add36 2712 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
de1add36 2713
5e2a0419
CW
2714 if (user_ring_id != I915_EXEC_BSD &&
2715 (args->flags & I915_EXEC_BSD_MASK)) {
baa89ba3
WK
2716 drm_dbg(&i915->drm,
2717 "execbuf with non bsd ring but with invalid "
2718 "bsd dispatch flags: %d\n", (int)(args->flags));
5e2a0419 2719 return -1;
de1add36
TU
2720 }
2721
d5b2a3a4 2722 if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
de1add36
TU
2723 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
2724
2725 if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
b49a7d51 2726 bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
de1add36
TU
2727 } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
2728 bsd_idx <= I915_EXEC_BSD_RING2) {
d9da6aa0 2729 bsd_idx >>= I915_EXEC_BSD_SHIFT;
de1add36
TU
2730 bsd_idx--;
2731 } else {
baa89ba3
WK
2732 drm_dbg(&i915->drm,
2733 "execbuf with unknown bsd ring: %u\n",
2734 bsd_idx);
5e2a0419 2735 return -1;
de1add36
TU
2736 }
2737
5e2a0419 2738 return _VCS(bsd_idx);
de1add36
TU
2739 }
2740
5e2a0419 2741 if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
baa89ba3
WK
2742 drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n",
2743 user_ring_id);
5e2a0419 2744 return -1;
de1add36
TU
2745 }
2746
5e2a0419
CW
2747 return user_ring_map[user_ring_id];
2748}
2749
2750static int
2bf541ff 2751eb_select_engine(struct i915_execbuffer *eb)
5e2a0419
CW
2752{
2753 struct intel_context *ce;
2754 unsigned int idx;
2755 int err;
2756
976b55f0 2757 if (i915_gem_context_user_engines(eb->gem_context))
b49a7d51 2758 idx = eb->args->flags & I915_EXEC_RING_MASK;
976b55f0 2759 else
b49a7d51 2760 idx = eb_select_legacy_ring(eb);
5e2a0419
CW
2761
2762 ce = i915_gem_context_get_engine(eb->gem_context, idx);
2763 if (IS_ERR(ce))
2764 return PTR_ERR(ce);
2765
2bf541ff 2766 intel_gt_pm_get(ce->engine->gt);
5e2a0419 2767
2bf541ff
ML
2768 if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
2769 err = intel_context_alloc_state(ce);
2770 if (err)
2771 goto err;
2772 }
2773
2774 /*
2775 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
2776 * EIO if the GPU is already wedged.
2777 */
2778 err = intel_gt_terminally_wedged(ce->engine->gt);
2779 if (err)
2780 goto err;
2781
2782 eb->context = ce;
2783 eb->engine = ce->engine;
2784
2785 /*
2786 * Make sure engine pool stays alive even if we call intel_context_put
2787 * during ww handling. The pool is destroyed when last pm reference
2788 * is dropped, which breaks our -EDEADLK handling.
2789 */
2790 return err;
2791
2792err:
2793 intel_gt_pm_put(ce->engine->gt);
2794 intel_context_put(ce);
5e2a0419 2795 return err;
de1add36
TU
2796}
2797
2bf541ff
ML
2798static void
2799eb_put_engine(struct i915_execbuffer *eb)
2800{
2801 intel_gt_pm_put(eb->engine->gt);
2802 intel_context_put(eb->context);
2803}
2804
cf6e7bac 2805static void
13149e8b 2806__free_fence_array(struct eb_fence *fences, unsigned int n)
cf6e7bac 2807{
13149e8b 2808 while (n--) {
cda9edd0 2809 drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
13149e8b
LL
2810 dma_fence_put(fences[n].dma_fence);
2811 kfree(fences[n].chain_fence);
2812 }
cf6e7bac
JE
2813 kvfree(fences);
2814}
2815
cda9edd0 2816static int
13149e8b
LL
2817add_timeline_fence_array(struct i915_execbuffer *eb,
2818 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
cf6e7bac 2819{
13149e8b
LL
2820 struct drm_i915_gem_exec_fence __user *user_fences;
2821 u64 __user *user_values;
2822 struct eb_fence *f;
2823 u64 nfences;
2824 int err = 0;
cf6e7bac 2825
13149e8b
LL
2826 nfences = timeline_fences->fence_count;
2827 if (!nfences)
cda9edd0 2828 return 0;
cf6e7bac 2829
d710fc16
CW
2830 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2831 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
2832 if (nfences > min_t(unsigned long,
13149e8b
LL
2833 ULONG_MAX / sizeof(*user_fences),
2834 SIZE_MAX / sizeof(*f)) - eb->num_fences)
cda9edd0 2835 return -EINVAL;
cf6e7bac 2836
13149e8b
LL
2837 user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
2838 if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
2839 return -EFAULT;
2840
2841 user_values = u64_to_user_ptr(timeline_fences->values_ptr);
2842 if (!access_ok(user_values, nfences * sizeof(*user_values)))
cda9edd0 2843 return -EFAULT;
cf6e7bac 2844
13149e8b
LL
2845 f = krealloc(eb->fences,
2846 (eb->num_fences + nfences) * sizeof(*f),
2847 __GFP_NOWARN | GFP_KERNEL);
2848 if (!f)
cda9edd0 2849 return -ENOMEM;
cf6e7bac 2850
13149e8b
LL
2851 eb->fences = f;
2852 f += eb->num_fences;
2853
2854 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2855 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2856
2857 while (nfences--) {
2858 struct drm_i915_gem_exec_fence user_fence;
cf6e7bac 2859 struct drm_syncobj *syncobj;
13149e8b
LL
2860 struct dma_fence *fence = NULL;
2861 u64 point;
2862
2863 if (__copy_from_user(&user_fence,
2864 user_fences++,
2865 sizeof(user_fence)))
2866 return -EFAULT;
2867
2868 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
2869 return -EINVAL;
2870
2871 if (__get_user(point, user_values++))
2872 return -EFAULT;
2873
2874 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
2875 if (!syncobj) {
2876 DRM_DEBUG("Invalid syncobj handle provided\n");
2877 return -ENOENT;
2878 }
2879
2880 fence = drm_syncobj_fence_get(syncobj);
cf6e7bac 2881
13149e8b
LL
2882 if (!fence && user_fence.flags &&
2883 !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
2884 DRM_DEBUG("Syncobj handle has no fence\n");
2885 drm_syncobj_put(syncobj);
2886 return -EINVAL;
cf6e7bac
JE
2887 }
2888
13149e8b
LL
2889 if (fence)
2890 err = dma_fence_chain_find_seqno(&fence, point);
2891
2892 if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
2893 DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
da1ea128 2894 dma_fence_put(fence);
13149e8b
LL
2895 drm_syncobj_put(syncobj);
2896 return err;
2897 }
2898
2899 /*
2900 * A point might have been signaled already and
2901 * garbage collected from the timeline. In this case
2902 * just ignore the point and carry on.
2903 */
2904 if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
2905 drm_syncobj_put(syncobj);
2906 continue;
2907 }
2908
2909 /*
2910 * For timeline syncobjs we need to preallocate chains for
2911 * later signaling.
2912 */
2913 if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
2914 /*
2915 * Waiting and signaling the same point (when point !=
2916 * 0) would break the timeline.
2917 */
2918 if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
2919 DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
2920 dma_fence_put(fence);
2921 drm_syncobj_put(syncobj);
2922 return -EINVAL;
2923 }
2924
2925 f->chain_fence =
2926 kmalloc(sizeof(*f->chain_fence),
2927 GFP_KERNEL);
2928 if (!f->chain_fence) {
2929 drm_syncobj_put(syncobj);
2930 dma_fence_put(fence);
2931 return -ENOMEM;
2932 }
2933 } else {
2934 f->chain_fence = NULL;
ebcaa1ff
TU
2935 }
2936
13149e8b
LL
2937 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
2938 f->dma_fence = fence;
2939 f->value = point;
2940 f++;
2941 eb->num_fences++;
2942 }
2943
2944 return 0;
2945}
2946
2947static int add_fence_array(struct i915_execbuffer *eb)
2948{
2949 struct drm_i915_gem_execbuffer2 *args = eb->args;
2950 struct drm_i915_gem_exec_fence __user *user;
2951 unsigned long num_fences = args->num_cliprects;
2952 struct eb_fence *f;
2953
2954 if (!(args->flags & I915_EXEC_FENCE_ARRAY))
2955 return 0;
2956
2957 if (!num_fences)
2958 return 0;
2959
2960 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2961 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
2962 if (num_fences > min_t(unsigned long,
2963 ULONG_MAX / sizeof(*user),
2964 SIZE_MAX / sizeof(*f) - eb->num_fences))
2965 return -EINVAL;
2966
2967 user = u64_to_user_ptr(args->cliprects_ptr);
2968 if (!access_ok(user, num_fences * sizeof(*user)))
2969 return -EFAULT;
2970
2971 f = krealloc(eb->fences,
2972 (eb->num_fences + num_fences) * sizeof(*f),
2973 __GFP_NOWARN | GFP_KERNEL);
2974 if (!f)
2975 return -ENOMEM;
2976
2977 eb->fences = f;
2978 f += eb->num_fences;
2979 while (num_fences--) {
2980 struct drm_i915_gem_exec_fence user_fence;
2981 struct drm_syncobj *syncobj;
2982 struct dma_fence *fence = NULL;
2983
2984 if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
2985 return -EFAULT;
2986
2987 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
2988 return -EINVAL;
2989
2990 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
cf6e7bac
JE
2991 if (!syncobj) {
2992 DRM_DEBUG("Invalid syncobj handle provided\n");
13149e8b
LL
2993 return -ENOENT;
2994 }
2995
2996 if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
2997 fence = drm_syncobj_fence_get(syncobj);
2998 if (!fence) {
2999 DRM_DEBUG("Syncobj handle has no fence\n");
3000 drm_syncobj_put(syncobj);
3001 return -EINVAL;
3002 }
cf6e7bac
JE
3003 }
3004
ebcaa1ff
TU
3005 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
3006 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
3007
13149e8b
LL
3008 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
3009 f->dma_fence = fence;
3010 f->value = 0;
3011 f->chain_fence = NULL;
3012 f++;
3013 eb->num_fences++;
cf6e7bac
JE
3014 }
3015
cda9edd0 3016 return 0;
13149e8b 3017}
cf6e7bac 3018
13149e8b
LL
3019static void put_fence_array(struct eb_fence *fences, int num_fences)
3020{
3021 if (fences)
3022 __free_fence_array(fences, num_fences);
cf6e7bac
JE
3023}
3024
3025static int
cda9edd0 3026await_fence_array(struct i915_execbuffer *eb)
cf6e7bac 3027{
cf6e7bac
JE
3028 unsigned int n;
3029 int err;
3030
13149e8b 3031 for (n = 0; n < eb->num_fences; n++) {
cf6e7bac 3032 struct drm_syncobj *syncobj;
cf6e7bac
JE
3033 unsigned int flags;
3034
cda9edd0 3035 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
cf6e7bac 3036
13149e8b
LL
3037 if (!eb->fences[n].dma_fence)
3038 continue;
cf6e7bac 3039
13149e8b
LL
3040 err = i915_request_await_dma_fence(eb->request,
3041 eb->fences[n].dma_fence);
cf6e7bac
JE
3042 if (err < 0)
3043 return err;
3044 }
3045
3046 return 0;
3047}
3048
13149e8b 3049static void signal_fence_array(const struct i915_execbuffer *eb)
cf6e7bac 3050{
cf6e7bac
JE
3051 struct dma_fence * const fence = &eb->request->fence;
3052 unsigned int n;
3053
13149e8b 3054 for (n = 0; n < eb->num_fences; n++) {
cf6e7bac
JE
3055 struct drm_syncobj *syncobj;
3056 unsigned int flags;
3057
cda9edd0 3058 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
cf6e7bac
JE
3059 if (!(flags & I915_EXEC_FENCE_SIGNAL))
3060 continue;
3061
13149e8b
LL
3062 if (eb->fences[n].chain_fence) {
3063 drm_syncobj_add_point(syncobj,
3064 eb->fences[n].chain_fence,
3065 fence,
3066 eb->fences[n].value);
3067 /*
3068 * The chain's ownership is transferred to the
3069 * timeline.
3070 */
3071 eb->fences[n].chain_fence = NULL;
3072 } else {
3073 drm_syncobj_replace_fence(syncobj, fence);
3074 }
cf6e7bac
JE
3075 }
3076}
3077
13149e8b
LL
3078static int
3079parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
3080{
3081 struct i915_execbuffer *eb = data;
3082 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
3083
3084 if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
3085 return -EFAULT;
3086
3087 return add_timeline_fence_array(eb, &timeline_fences);
3088}
3089
61231f6b
CW
3090static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
3091{
3092 struct i915_request *rq, *rn;
3093
3094 list_for_each_entry_safe(rq, rn, &tl->requests, link)
3095 if (rq == end || !i915_request_retire(rq))
3096 break;
3097}
3098
ba38b79e 3099static int eb_request_add(struct i915_execbuffer *eb, int err)
61231f6b
CW
3100{
3101 struct i915_request *rq = eb->request;
3102 struct intel_timeline * const tl = i915_request_timeline(rq);
3103 struct i915_sched_attr attr = {};
3104 struct i915_request *prev;
3105
3106 lockdep_assert_held(&tl->mutex);
3107 lockdep_unpin_lock(&tl->mutex, rq->cookie);
3108
3109 trace_i915_request_add(rq);
3110
3111 prev = __i915_request_commit(rq);
3112
3113 /* Check that the context wasn't destroyed before submission */
207e4a71 3114 if (likely(!intel_context_is_closed(eb->context))) {
61231f6b 3115 attr = eb->gem_context->sched;
61231f6b
CW
3116 } else {
3117 /* Serialise with context_close via the add_to_timeline */
36e191f0
CW
3118 i915_request_set_error_once(rq, -ENOENT);
3119 __i915_request_skip(rq);
ba38b79e 3120 err = -ENOENT; /* override any transient errors */
61231f6b
CW
3121 }
3122
61231f6b 3123 __i915_request_queue(rq, &attr);
61231f6b
CW
3124
3125 /* Try to clean up the client's timeline after submitting the request */
3126 if (prev)
3127 retire_requests(tl, prev);
3128
3129 mutex_unlock(&tl->mutex);
ba38b79e
CW
3130
3131 return err;
61231f6b
CW
3132}
3133
cda9edd0 3134static const i915_user_extension_fn execbuf_extensions[] = {
13149e8b 3135 [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
cda9edd0
LL
3136};
3137
3138static int
3139parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
3140 struct i915_execbuffer *eb)
3141{
cda9edd0
LL
3142 if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
3143 return 0;
3144
3145 /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
3146 * have another flag also using it at the same time.
3147 */
3148 if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
3149 return -EINVAL;
3150
3151 if (args->num_cliprects != 0)
3152 return -EINVAL;
3153
3154 return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
3155 execbuf_extensions,
3156 ARRAY_SIZE(execbuf_extensions),
3157 eb);
3158}
3159
54cf91dc 3160static int
650bc635 3161i915_gem_do_execbuffer(struct drm_device *dev,
54cf91dc
CW
3162 struct drm_file *file,
3163 struct drm_i915_gem_execbuffer2 *args,
cda9edd0 3164 struct drm_i915_gem_exec_object2 *exec)
54cf91dc 3165{
44157641 3166 struct drm_i915_private *i915 = to_i915(dev);
650bc635 3167 struct i915_execbuffer eb;
fec0445c
CW
3168 struct dma_fence *in_fence = NULL;
3169 struct sync_file *out_fence = NULL;
7d6236bb 3170 struct i915_vma *batch;
fec0445c 3171 int out_fence_fd = -1;
2889caa9 3172 int err;
432e58ed 3173
74c1c694 3174 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
2889caa9
CW
3175 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
3176 ~__EXEC_OBJECT_UNKNOWN_FLAGS);
54cf91dc 3177
44157641 3178 eb.i915 = i915;
650bc635
CW
3179 eb.file = file;
3180 eb.args = args;
ad5d95e4 3181 if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
2889caa9 3182 args->flags |= __EXEC_HAS_RELOC;
c7c6e46f 3183
650bc635 3184 eb.exec = exec;
8ae275c2
ML
3185 eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
3186 eb.vma[0].vma = NULL;
c43ce123 3187 eb.reloc_pool = eb.batch_pool = NULL;
2bf541ff 3188 eb.reloc_context = NULL;
c7c6e46f 3189
2889caa9 3190 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
650bc635
CW
3191 reloc_cache_init(&eb.reloc_cache, eb.i915);
3192
2889caa9 3193 eb.buffer_count = args->buffer_count;
650bc635
CW
3194 eb.batch_start_offset = args->batch_start_offset;
3195 eb.batch_len = args->batch_len;
32d94048 3196 eb.trampoline = NULL;
650bc635 3197
cda9edd0 3198 eb.fences = NULL;
13149e8b 3199 eb.num_fences = 0;
cda9edd0 3200
2889caa9 3201 eb.batch_flags = 0;
d7d4eedd 3202 if (args->flags & I915_EXEC_SECURE) {
44157641
JB
3203 if (INTEL_GEN(i915) >= 11)
3204 return -ENODEV;
3205
3206 /* Return -EPERM to trigger fallback code on old binaries. */
3207 if (!HAS_SECURE_BATCHES(i915))
3208 return -EPERM;
3209
b3ac9f25 3210 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
44157641 3211 return -EPERM;
d7d4eedd 3212
2889caa9 3213 eb.batch_flags |= I915_DISPATCH_SECURE;
d7d4eedd 3214 }
b45305fc 3215 if (args->flags & I915_EXEC_IS_PINNED)
2889caa9 3216 eb.batch_flags |= I915_DISPATCH_PINNED;
54cf91dc 3217
13149e8b
LL
3218 err = parse_execbuf2_extensions(args, &eb);
3219 if (err)
3220 goto err_ext;
3221
3222 err = add_fence_array(&eb);
3223 if (err)
3224 goto err_ext;
3225
889333c7
CW
3226#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT)
3227 if (args->flags & IN_FENCES) {
3228 if ((args->flags & IN_FENCES) == IN_FENCES)
3229 return -EINVAL;
3230
fec0445c 3231 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
13149e8b
LL
3232 if (!in_fence) {
3233 err = -EINVAL;
3234 goto err_ext;
3235 }
fec0445c 3236 }
889333c7 3237#undef IN_FENCES
a88b6e4c 3238
fec0445c
CW
3239 if (args->flags & I915_EXEC_FENCE_OUT) {
3240 out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
3241 if (out_fence_fd < 0) {
2889caa9 3242 err = out_fence_fd;
889333c7 3243 goto err_in_fence;
fec0445c
CW
3244 }
3245 }
3246
cda9edd0
LL
3247 err = eb_create(&eb);
3248 if (err)
13149e8b 3249 goto err_out_fence;
cda9edd0 3250
4d470f73 3251 GEM_BUG_ON(!eb.lut_size);
2889caa9 3252
1acfc104
CW
3253 err = eb_select_context(&eb);
3254 if (unlikely(err))
3255 goto err_destroy;
3256
2bf541ff 3257 err = eb_select_engine(&eb);
d6f328bf 3258 if (unlikely(err))
e5dadff4 3259 goto err_context;
d6f328bf 3260
c43ce123
ML
3261 err = eb_lookup_vmas(&eb);
3262 if (err) {
3263 eb_release_vmas(&eb, true);
3264 goto err_engine;
3265 }
3266
3267 i915_gem_ww_ctx_init(&eb.ww, true);
3268
8e4ba491 3269 err = eb_relocate_parse(&eb);
1f727d9e 3270 if (err) {
2889caa9
CW
3271 /*
3272 * If the user expects the execobject.offset and
3273 * reloc.presumed_offset to be an exact match,
3274 * as for using NO_RELOC, then we cannot update
3275 * the execobject.offset until we have completed
3276 * relocation.
3277 */
3278 args->flags &= ~__EXEC_HAS_RELOC;
2889caa9 3279 goto err_vma;
1f727d9e 3280 }
54cf91dc 3281
c43ce123 3282 ww_acquire_done(&eb.ww.ctx);
7d6236bb 3283
7d6236bb 3284 batch = eb.batch->vma;
d7d4eedd 3285
7dd4f672
CW
3286 /* All GPU relocation batches must be submitted prior to the user rq */
3287 GEM_BUG_ON(eb.reloc_cache.rq);
3288
0c8dac88 3289 /* Allocate a request for this batch buffer nice and early. */
8f2a1057 3290 eb.request = i915_request_create(eb.context);
650bc635 3291 if (IS_ERR(eb.request)) {
2889caa9 3292 err = PTR_ERR(eb.request);
47b08693 3293 goto err_vma;
26827088 3294 }
0c8dac88 3295
fec0445c 3296 if (in_fence) {
889333c7
CW
3297 if (args->flags & I915_EXEC_FENCE_SUBMIT)
3298 err = i915_request_await_execution(eb.request,
3299 in_fence,
3300 eb.engine->bond_execute);
3301 else
3302 err = i915_request_await_dma_fence(eb.request,
3303 in_fence);
a88b6e4c
CW
3304 if (err < 0)
3305 goto err_request;
3306 }
3307
13149e8b 3308 if (eb.fences) {
cda9edd0 3309 err = await_fence_array(&eb);
cf6e7bac
JE
3310 if (err)
3311 goto err_request;
3312 }
3313
fec0445c 3314 if (out_fence_fd != -1) {
650bc635 3315 out_fence = sync_file_create(&eb.request->fence);
fec0445c 3316 if (!out_fence) {
2889caa9 3317 err = -ENOMEM;
fec0445c
CW
3318 goto err_request;
3319 }
3320 }
3321
2889caa9
CW
3322 /*
3323 * Whilst this request exists, batch_obj will be on the
17f298cf
CW
3324 * active_list, and so will hold the active reference. Only when this
3325 * request is retired will the the batch_obj be moved onto the
3326 * inactive_list and lose its active reference. Hence we do not need
3327 * to explicitly hold another reference here.
3328 */
7d6236bb 3329 eb.request->batch = batch;
c43ce123
ML
3330 if (eb.batch_pool)
3331 intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request);
5f19e2bf 3332
e61e0f51 3333 trace_i915_request_queue(eb.request, eb.batch_flags);
7d6236bb 3334 err = eb_submit(&eb, batch);
aa9b7810 3335err_request:
e14177f1 3336 i915_request_get(eb.request);
ba38b79e 3337 err = eb_request_add(&eb, err);
c8659efa 3338
13149e8b 3339 if (eb.fences)
cda9edd0 3340 signal_fence_array(&eb);
cf6e7bac 3341
fec0445c 3342 if (out_fence) {
2889caa9 3343 if (err == 0) {
fec0445c 3344 fd_install(out_fence_fd, out_fence->file);
b6a88e4a 3345 args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
fec0445c
CW
3346 args->rsvd2 |= (u64)out_fence_fd << 32;
3347 out_fence_fd = -1;
3348 } else {
3349 fput(out_fence->file);
3350 }
3351 }
e14177f1 3352 i915_request_put(eb.request);
54cf91dc 3353
2889caa9 3354err_vma:
c43ce123 3355 eb_release_vmas(&eb, true);
32d94048
CW
3356 if (eb.trampoline)
3357 i915_vma_unpin(eb.trampoline);
c43ce123
ML
3358 WARN_ON(err == -EDEADLK);
3359 i915_gem_ww_ctx_fini(&eb.ww);
3360
3361 if (eb.batch_pool)
3362 intel_gt_buffer_pool_put(eb.batch_pool);
3363 if (eb.reloc_pool)
3364 intel_gt_buffer_pool_put(eb.reloc_pool);
2bf541ff
ML
3365 if (eb.reloc_context)
3366 intel_context_put(eb.reloc_context);
c43ce123 3367err_engine:
2bf541ff 3368 eb_put_engine(&eb);
a4e57f90 3369err_context:
8f2a1057 3370 i915_gem_context_put(eb.gem_context);
1acfc104 3371err_destroy:
2889caa9 3372 eb_destroy(&eb);
4d470f73 3373err_out_fence:
fec0445c
CW
3374 if (out_fence_fd != -1)
3375 put_unused_fd(out_fence_fd);
4a04e371 3376err_in_fence:
fec0445c 3377 dma_fence_put(in_fence);
13149e8b
LL
3378err_ext:
3379 put_fence_array(eb.fences, eb.num_fences);
2889caa9 3380 return err;
54cf91dc
CW
3381}
3382
d710fc16
CW
3383static size_t eb_element_size(void)
3384{
8ae275c2 3385 return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
d710fc16
CW
3386}
3387
3388static bool check_buffer_count(size_t count)
3389{
3390 const size_t sz = eb_element_size();
3391
3392 /*
3393 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
3394 * array size (see eb_create()). Otherwise, we can accept an array as
3395 * large as can be addressed (though use large arrays at your peril)!
3396 */
3397
3398 return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
3399}
3400
54cf91dc
CW
3401/*
3402 * Legacy execbuffer just creates an exec2 list from the original exec object
3403 * list array and passes it to the real function.
3404 */
3405int
6a20fe7b
VS
3406i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
3407 struct drm_file *file)
54cf91dc 3408{
d0bf4582 3409 struct drm_i915_private *i915 = to_i915(dev);
54cf91dc
CW
3410 struct drm_i915_gem_execbuffer *args = data;
3411 struct drm_i915_gem_execbuffer2 exec2;
3412 struct drm_i915_gem_exec_object *exec_list = NULL;
3413 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
d710fc16 3414 const size_t count = args->buffer_count;
2889caa9
CW
3415 unsigned int i;
3416 int err;
54cf91dc 3417
d710fc16 3418 if (!check_buffer_count(count)) {
d0bf4582 3419 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count);
54cf91dc
CW
3420 return -EINVAL;
3421 }
3422
2889caa9
CW
3423 exec2.buffers_ptr = args->buffers_ptr;
3424 exec2.buffer_count = args->buffer_count;
3425 exec2.batch_start_offset = args->batch_start_offset;
3426 exec2.batch_len = args->batch_len;
3427 exec2.DR1 = args->DR1;
3428 exec2.DR4 = args->DR4;
3429 exec2.num_cliprects = args->num_cliprects;
3430 exec2.cliprects_ptr = args->cliprects_ptr;
3431 exec2.flags = I915_EXEC_RENDER;
3432 i915_execbuffer2_set_context_id(exec2, 0);
3433
00aff3f6
TU
3434 err = i915_gem_check_execbuffer(&exec2);
3435 if (err)
3436 return err;
2889caa9 3437
54cf91dc 3438 /* Copy in the exec list from userland */
d710fc16 3439 exec_list = kvmalloc_array(count, sizeof(*exec_list),
0ee931c4 3440 __GFP_NOWARN | GFP_KERNEL);
47b08693
ML
3441
3442 /* Allocate extra slots for use by the command parser */
3443 exec2_list = kvmalloc_array(count + 2, eb_element_size(),
0ee931c4 3444 __GFP_NOWARN | GFP_KERNEL);
54cf91dc 3445 if (exec_list == NULL || exec2_list == NULL) {
d0bf4582
WK
3446 drm_dbg(&i915->drm,
3447 "Failed to allocate exec list for %d buffers\n",
3448 args->buffer_count);
2098105e
MH
3449 kvfree(exec_list);
3450 kvfree(exec2_list);
54cf91dc
CW
3451 return -ENOMEM;
3452 }
2889caa9 3453 err = copy_from_user(exec_list,
3ed605bc 3454 u64_to_user_ptr(args->buffers_ptr),
d710fc16 3455 sizeof(*exec_list) * count);
2889caa9 3456 if (err) {
d0bf4582
WK
3457 drm_dbg(&i915->drm, "copy %d exec entries failed %d\n",
3458 args->buffer_count, err);
2098105e
MH
3459 kvfree(exec_list);
3460 kvfree(exec2_list);
54cf91dc
CW
3461 return -EFAULT;
3462 }
3463
3464 for (i = 0; i < args->buffer_count; i++) {
3465 exec2_list[i].handle = exec_list[i].handle;
3466 exec2_list[i].relocation_count = exec_list[i].relocation_count;
3467 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
3468 exec2_list[i].alignment = exec_list[i].alignment;
3469 exec2_list[i].offset = exec_list[i].offset;
f0836b72 3470 if (INTEL_GEN(to_i915(dev)) < 4)
54cf91dc
CW
3471 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
3472 else
3473 exec2_list[i].flags = 0;
3474 }
3475
cda9edd0 3476 err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
2889caa9 3477 if (exec2.flags & __EXEC_HAS_RELOC) {
9aab8bff 3478 struct drm_i915_gem_exec_object __user *user_exec_list =
3ed605bc 3479 u64_to_user_ptr(args->buffers_ptr);
9aab8bff 3480
54cf91dc 3481 /* Copy the new buffer offsets back to the user's exec list. */
9aab8bff 3482 for (i = 0; i < args->buffer_count; i++) {
2889caa9
CW
3483 if (!(exec2_list[i].offset & UPDATE))
3484 continue;
3485
934acce3 3486 exec2_list[i].offset =
2889caa9
CW
3487 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
3488 exec2_list[i].offset &= PIN_OFFSET_MASK;
3489 if (__copy_to_user(&user_exec_list[i].offset,
3490 &exec2_list[i].offset,
3491 sizeof(user_exec_list[i].offset)))
9aab8bff 3492 break;
54cf91dc
CW
3493 }
3494 }
3495
2098105e
MH
3496 kvfree(exec_list);
3497 kvfree(exec2_list);
2889caa9 3498 return err;
54cf91dc
CW
3499}
3500
3501int
6a20fe7b
VS
3502i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
3503 struct drm_file *file)
54cf91dc 3504{
d0bf4582 3505 struct drm_i915_private *i915 = to_i915(dev);
54cf91dc 3506 struct drm_i915_gem_execbuffer2 *args = data;
2889caa9 3507 struct drm_i915_gem_exec_object2 *exec2_list;
d710fc16 3508 const size_t count = args->buffer_count;
2889caa9 3509 int err;
54cf91dc 3510
d710fc16 3511 if (!check_buffer_count(count)) {
d0bf4582 3512 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count);
54cf91dc
CW
3513 return -EINVAL;
3514 }
3515
00aff3f6
TU
3516 err = i915_gem_check_execbuffer(args);
3517 if (err)
3518 return err;
2889caa9 3519
47b08693
ML
3520 /* Allocate extra slots for use by the command parser */
3521 exec2_list = kvmalloc_array(count + 2, eb_element_size(),
0ee931c4 3522 __GFP_NOWARN | GFP_KERNEL);
54cf91dc 3523 if (exec2_list == NULL) {
d0bf4582
WK
3524 drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
3525 count);
54cf91dc
CW
3526 return -ENOMEM;
3527 }
2889caa9
CW
3528 if (copy_from_user(exec2_list,
3529 u64_to_user_ptr(args->buffers_ptr),
d710fc16 3530 sizeof(*exec2_list) * count)) {
d0bf4582 3531 drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count);
2098105e 3532 kvfree(exec2_list);
54cf91dc
CW
3533 return -EFAULT;
3534 }
3535
cda9edd0 3536 err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
2889caa9
CW
3537
3538 /*
3539 * Now that we have begun execution of the batchbuffer, we ignore
3540 * any new error after this point. Also given that we have already
3541 * updated the associated relocations, we try to write out the current
3542 * object locations irrespective of any error.
3543 */
3544 if (args->flags & __EXEC_HAS_RELOC) {
d593d992 3545 struct drm_i915_gem_exec_object2 __user *user_exec_list =
2889caa9
CW
3546 u64_to_user_ptr(args->buffers_ptr);
3547 unsigned int i;
9aab8bff 3548
2889caa9 3549 /* Copy the new buffer offsets back to the user's exec list. */
594cc251
LT
3550 /*
3551 * Note: count * sizeof(*user_exec_list) does not overflow,
3552 * because we checked 'count' in check_buffer_count().
3553 *
3554 * And this range already got effectively checked earlier
3555 * when we did the "copy_from_user()" above.
3556 */
b44f6873
CL
3557 if (!user_write_access_begin(user_exec_list,
3558 count * sizeof(*user_exec_list)))
8f4faed0 3559 goto end;
594cc251 3560
9aab8bff 3561 for (i = 0; i < args->buffer_count; i++) {
2889caa9
CW
3562 if (!(exec2_list[i].offset & UPDATE))
3563 continue;
3564
934acce3 3565 exec2_list[i].offset =
2889caa9
CW
3566 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
3567 unsafe_put_user(exec2_list[i].offset,
3568 &user_exec_list[i].offset,
3569 end_user);
54cf91dc 3570 }
2889caa9 3571end_user:
b44f6873 3572 user_write_access_end();
8f4faed0 3573end:;
54cf91dc
CW
3574 }
3575
2889caa9 3576 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
2098105e 3577 kvfree(exec2_list);
2889caa9 3578 return err;
54cf91dc 3579}
e3d29130
CW
3580
3581#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3582#include "selftests/i915_gem_execbuffer.c"
3583#endif