Commit | Line | Data |
---|---|---|
b414fcd5 CW |
1 | /* |
2 | * SPDX-License-Identifier: MIT | |
3 | * | |
4 | * Copyright © 2014-2016 Intel Corporation | |
5 | */ | |
6 | ||
7 | #include <linux/mman.h> | |
8 | #include <linux/sizes.h> | |
9 | ||
cb823ed9 CW |
10 | #include "gt/intel_gt.h" |
11 | ||
b414fcd5 CW |
12 | #include "i915_drv.h" |
13 | #include "i915_gem_gtt.h" | |
14 | #include "i915_gem_ioctls.h" | |
15 | #include "i915_gem_object.h" | |
a09d9a80 | 16 | #include "i915_trace.h" |
b414fcd5 | 17 | #include "i915_vma.h" |
b414fcd5 CW |
18 | |
19 | static inline bool | |
20 | __vma_matches(struct vm_area_struct *vma, struct file *filp, | |
21 | unsigned long addr, unsigned long size) | |
22 | { | |
23 | if (vma->vm_file != filp) | |
24 | return false; | |
25 | ||
26 | return vma->vm_start == addr && | |
27 | (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size); | |
28 | } | |
29 | ||
30 | /** | |
31 | * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address | |
32 | * it is mapped to. | |
33 | * @dev: drm device | |
34 | * @data: ioctl data blob | |
35 | * @file: drm file | |
36 | * | |
37 | * While the mapping holds a reference on the contents of the object, it doesn't | |
38 | * imply a ref on the object itself. | |
39 | * | |
40 | * IMPORTANT: | |
41 | * | |
42 | * DRM driver writers who look a this function as an example for how to do GEM | |
43 | * mmap support, please don't implement mmap support like here. The modern way | |
44 | * to implement DRM mmap support is with an mmap offset ioctl (like | |
45 | * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. | |
46 | * That way debug tooling like valgrind will understand what's going on, hiding | |
47 | * the mmap call in a driver private ioctl will break that. The i915 driver only | |
48 | * does cpu mmaps this way because we didn't know better. | |
49 | */ | |
50 | int | |
51 | i915_gem_mmap_ioctl(struct drm_device *dev, void *data, | |
52 | struct drm_file *file) | |
53 | { | |
54 | struct drm_i915_gem_mmap *args = data; | |
55 | struct drm_i915_gem_object *obj; | |
56 | unsigned long addr; | |
57 | ||
58 | if (args->flags & ~(I915_MMAP_WC)) | |
59 | return -EINVAL; | |
60 | ||
61 | if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) | |
62 | return -ENODEV; | |
63 | ||
64 | obj = i915_gem_object_lookup(file, args->handle); | |
65 | if (!obj) | |
66 | return -ENOENT; | |
67 | ||
68 | /* prime objects have no backing filp to GEM mmap | |
69 | * pages from. | |
70 | */ | |
71 | if (!obj->base.filp) { | |
72 | addr = -ENXIO; | |
73 | goto err; | |
74 | } | |
75 | ||
76 | if (range_overflows(args->offset, args->size, (u64)obj->base.size)) { | |
77 | addr = -EINVAL; | |
78 | goto err; | |
79 | } | |
80 | ||
81 | addr = vm_mmap(obj->base.filp, 0, args->size, | |
82 | PROT_READ | PROT_WRITE, MAP_SHARED, | |
83 | args->offset); | |
84 | if (IS_ERR_VALUE(addr)) | |
85 | goto err; | |
86 | ||
87 | if (args->flags & I915_MMAP_WC) { | |
88 | struct mm_struct *mm = current->mm; | |
89 | struct vm_area_struct *vma; | |
90 | ||
91 | if (down_write_killable(&mm->mmap_sem)) { | |
92 | addr = -EINTR; | |
93 | goto err; | |
94 | } | |
95 | vma = find_vma(mm, addr); | |
96 | if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) | |
97 | vma->vm_page_prot = | |
98 | pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); | |
99 | else | |
100 | addr = -ENOMEM; | |
101 | up_write(&mm->mmap_sem); | |
102 | if (IS_ERR_VALUE(addr)) | |
103 | goto err; | |
b414fcd5 CW |
104 | } |
105 | i915_gem_object_put(obj); | |
106 | ||
107 | args->addr_ptr = (u64)addr; | |
108 | return 0; | |
109 | ||
110 | err: | |
111 | i915_gem_object_put(obj); | |
112 | return addr; | |
113 | } | |
114 | ||
115 | static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) | |
116 | { | |
117 | return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; | |
118 | } | |
119 | ||
120 | /** | |
121 | * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps | |
122 | * | |
123 | * A history of the GTT mmap interface: | |
124 | * | |
125 | * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to | |
126 | * aligned and suitable for fencing, and still fit into the available | |
127 | * mappable space left by the pinned display objects. A classic problem | |
128 | * we called the page-fault-of-doom where we would ping-pong between | |
129 | * two objects that could not fit inside the GTT and so the memcpy | |
130 | * would page one object in at the expense of the other between every | |
131 | * single byte. | |
132 | * | |
133 | * 1 - Objects can be any size, and have any compatible fencing (X Y, or none | |
134 | * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the | |
135 | * object is too large for the available space (or simply too large | |
136 | * for the mappable aperture!), a view is created instead and faulted | |
137 | * into userspace. (This view is aligned and sized appropriately for | |
138 | * fenced access.) | |
139 | * | |
140 | * 2 - Recognise WC as a separate cache domain so that we can flush the | |
141 | * delayed writes via GTT before performing direct access via WC. | |
142 | * | |
143 | * 3 - Remove implicit set-domain(GTT) and synchronisation on initial | |
144 | * pagefault; swapin remains transparent. | |
145 | * | |
146 | * Restrictions: | |
147 | * | |
148 | * * snoopable objects cannot be accessed via the GTT. It can cause machine | |
149 | * hangs on some architectures, corruption on others. An attempt to service | |
150 | * a GTT page fault from a snoopable object will generate a SIGBUS. | |
151 | * | |
152 | * * the object must be able to fit into RAM (physical memory, though no | |
153 | * limited to the mappable aperture). | |
154 | * | |
155 | * | |
156 | * Caveats: | |
157 | * | |
158 | * * a new GTT page fault will synchronize rendering from the GPU and flush | |
159 | * all data to system memory. Subsequent access will not be synchronized. | |
160 | * | |
161 | * * all mappings are revoked on runtime device suspend. | |
162 | * | |
163 | * * there are only 8, 16 or 32 fence registers to share between all users | |
164 | * (older machines require fence register for display and blitter access | |
165 | * as well). Contention of the fence registers will cause the previous users | |
166 | * to be unmapped and any new access will generate new page faults. | |
167 | * | |
168 | * * running out of memory while servicing a fault may generate a SIGBUS, | |
169 | * rather than the expected SIGSEGV. | |
170 | */ | |
171 | int i915_gem_mmap_gtt_version(void) | |
172 | { | |
173 | return 3; | |
174 | } | |
175 | ||
176 | static inline struct i915_ggtt_view | |
177 | compute_partial_view(const struct drm_i915_gem_object *obj, | |
178 | pgoff_t page_offset, | |
179 | unsigned int chunk) | |
180 | { | |
181 | struct i915_ggtt_view view; | |
182 | ||
183 | if (i915_gem_object_is_tiled(obj)) | |
184 | chunk = roundup(chunk, tile_row_pages(obj)); | |
185 | ||
186 | view.type = I915_GGTT_VIEW_PARTIAL; | |
187 | view.partial.offset = rounddown(page_offset, chunk); | |
188 | view.partial.size = | |
189 | min_t(unsigned int, chunk, | |
190 | (obj->base.size >> PAGE_SHIFT) - view.partial.offset); | |
191 | ||
192 | /* If the partial covers the entire object, just create a normal VMA. */ | |
193 | if (chunk >= obj->base.size >> PAGE_SHIFT) | |
194 | view.type = I915_GGTT_VIEW_NORMAL; | |
195 | ||
196 | return view; | |
197 | } | |
198 | ||
199 | /** | |
200 | * i915_gem_fault - fault a page into the GTT | |
201 | * @vmf: fault info | |
202 | * | |
203 | * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped | |
204 | * from userspace. The fault handler takes care of binding the object to | |
205 | * the GTT (if needed), allocating and programming a fence register (again, | |
206 | * only if needed based on whether the old reg is still valid or the object | |
207 | * is tiled) and inserting a new PTE into the faulting process. | |
208 | * | |
209 | * Note that the faulting process may involve evicting existing objects | |
210 | * from the GTT and/or fence registers to make room. So performance may | |
211 | * suffer if the GTT working set is large or there are few fence registers | |
212 | * left. | |
213 | * | |
214 | * The current feature set supported by i915_gem_fault() and thus GTT mmaps | |
215 | * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). | |
216 | */ | |
217 | vm_fault_t i915_gem_fault(struct vm_fault *vmf) | |
218 | { | |
219 | #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) | |
220 | struct vm_area_struct *area = vmf->vma; | |
221 | struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); | |
222 | struct drm_device *dev = obj->base.dev; | |
223 | struct drm_i915_private *i915 = to_i915(dev); | |
d858d569 | 224 | struct intel_runtime_pm *rpm = &i915->runtime_pm; |
b414fcd5 CW |
225 | struct i915_ggtt *ggtt = &i915->ggtt; |
226 | bool write = area->vm_flags & VM_WRITE; | |
227 | intel_wakeref_t wakeref; | |
228 | struct i915_vma *vma; | |
229 | pgoff_t page_offset; | |
230 | int srcu; | |
231 | int ret; | |
232 | ||
233 | /* Sanity check that we allow writing into this object */ | |
234 | if (i915_gem_object_is_readonly(obj) && write) | |
235 | return VM_FAULT_SIGBUS; | |
236 | ||
237 | /* We don't use vmf->pgoff since that has the fake offset */ | |
238 | page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; | |
239 | ||
240 | trace_i915_gem_object_fault(obj, page_offset, true, write); | |
241 | ||
242 | ret = i915_gem_object_pin_pages(obj); | |
243 | if (ret) | |
244 | goto err; | |
245 | ||
d858d569 | 246 | wakeref = intel_runtime_pm_get(rpm); |
b414fcd5 | 247 | |
fda9fa19 CW |
248 | ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); |
249 | if (ret) | |
b414fcd5 | 250 | goto err_rpm; |
b414fcd5 CW |
251 | |
252 | ret = i915_mutex_lock_interruptible(dev); | |
253 | if (ret) | |
254 | goto err_reset; | |
255 | ||
256 | /* Access to snoopable pages through the GTT is incoherent. */ | |
257 | if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { | |
258 | ret = -EFAULT; | |
259 | goto err_unlock; | |
260 | } | |
261 | ||
262 | /* Now pin it into the GTT as needed */ | |
263 | vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, | |
264 | PIN_MAPPABLE | | |
6846895f | 265 | PIN_NONBLOCK /* NOWARN */ | |
6e5c5272 | 266 | PIN_NOEVICT); |
b414fcd5 CW |
267 | if (IS_ERR(vma)) { |
268 | /* Use a partial view if it is bigger than available space */ | |
269 | struct i915_ggtt_view view = | |
270 | compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); | |
271 | unsigned int flags; | |
272 | ||
6846895f | 273 | flags = PIN_MAPPABLE | PIN_NOSEARCH; |
b414fcd5 CW |
274 | if (view.type == I915_GGTT_VIEW_NORMAL) |
275 | flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ | |
276 | ||
277 | /* | |
278 | * Userspace is now writing through an untracked VMA, abandon | |
279 | * all hope that the hardware is able to track future writes. | |
280 | */ | |
b414fcd5 CW |
281 | |
282 | vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); | |
6846895f | 283 | if (IS_ERR(vma)) { |
b414fcd5 CW |
284 | flags = PIN_MAPPABLE; |
285 | view.type = I915_GGTT_VIEW_PARTIAL; | |
286 | vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); | |
287 | } | |
288 | } | |
289 | if (IS_ERR(vma)) { | |
290 | ret = PTR_ERR(vma); | |
291 | goto err_unlock; | |
292 | } | |
293 | ||
294 | ret = i915_vma_pin_fence(vma); | |
295 | if (ret) | |
296 | goto err_unpin; | |
297 | ||
298 | /* Finally, remap it using the new GTT offset */ | |
299 | ret = remap_io_mapping(area, | |
300 | area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), | |
301 | (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, | |
302 | min_t(u64, vma->size, area->vm_end - area->vm_start), | |
303 | &ggtt->iomap); | |
304 | if (ret) | |
305 | goto err_fence; | |
306 | ||
d858d569 | 307 | assert_rpm_wakelock_held(rpm); |
b7d151ba CW |
308 | |
309 | /* Mark as being mmapped into userspace for later revocation */ | |
310 | mutex_lock(&i915->ggtt.vm.mutex); | |
b414fcd5 | 311 | if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) |
0cf289bd | 312 | list_add(&obj->userfault_link, &i915->ggtt.userfault_list); |
b7d151ba CW |
313 | mutex_unlock(&i915->ggtt.vm.mutex); |
314 | ||
b414fcd5 | 315 | if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) |
0cf289bd | 316 | intel_wakeref_auto(&i915->ggtt.userfault_wakeref, |
b414fcd5 | 317 | msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); |
b414fcd5 | 318 | |
b925708f CW |
319 | if (write) { |
320 | GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); | |
321 | i915_vma_set_ggtt_write(vma); | |
322 | obj->mm.dirty = true; | |
323 | } | |
b414fcd5 CW |
324 | |
325 | err_fence: | |
326 | i915_vma_unpin_fence(vma); | |
327 | err_unpin: | |
328 | __i915_vma_unpin(vma); | |
329 | err_unlock: | |
330 | mutex_unlock(&dev->struct_mutex); | |
331 | err_reset: | |
cb823ed9 | 332 | intel_gt_reset_unlock(ggtt->vm.gt, srcu); |
b414fcd5 | 333 | err_rpm: |
d858d569 | 334 | intel_runtime_pm_put(rpm, wakeref); |
b414fcd5 CW |
335 | i915_gem_object_unpin_pages(obj); |
336 | err: | |
337 | switch (ret) { | |
338 | case -EIO: | |
339 | /* | |
340 | * We eat errors when the gpu is terminally wedged to avoid | |
341 | * userspace unduly crashing (gl has no provisions for mmaps to | |
342 | * fail). But any other -EIO isn't ours (e.g. swap in failure) | |
343 | * and so needs to be reported. | |
344 | */ | |
cb823ed9 | 345 | if (!intel_gt_is_wedged(ggtt->vm.gt)) |
b414fcd5 | 346 | return VM_FAULT_SIGBUS; |
2defb94e | 347 | /* else, fall through */ |
b414fcd5 CW |
348 | case -EAGAIN: |
349 | /* | |
350 | * EAGAIN means the gpu is hung and we'll wait for the error | |
351 | * handler to reset everything when re-faulting in | |
352 | * i915_mutex_lock_interruptible. | |
353 | */ | |
354 | case 0: | |
355 | case -ERESTARTSYS: | |
356 | case -EINTR: | |
357 | case -EBUSY: | |
358 | /* | |
359 | * EBUSY is ok: this just means that another thread | |
360 | * already did the job. | |
361 | */ | |
362 | return VM_FAULT_NOPAGE; | |
363 | case -ENOMEM: | |
364 | return VM_FAULT_OOM; | |
365 | case -ENOSPC: | |
366 | case -EFAULT: | |
4f2a572e | 367 | case -ENODEV: /* bad object, how did you get here! */ |
b414fcd5 CW |
368 | return VM_FAULT_SIGBUS; |
369 | default: | |
370 | WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); | |
371 | return VM_FAULT_SIGBUS; | |
372 | } | |
373 | } | |
374 | ||
375 | void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) | |
376 | { | |
377 | struct i915_vma *vma; | |
378 | ||
379 | GEM_BUG_ON(!obj->userfault_count); | |
380 | ||
381 | obj->userfault_count = 0; | |
382 | list_del(&obj->userfault_link); | |
383 | drm_vma_node_unmap(&obj->base.vma_node, | |
384 | obj->base.dev->anon_inode->i_mapping); | |
385 | ||
386 | for_each_ggtt_vma(vma, obj) | |
387 | i915_vma_unset_userfault(vma); | |
388 | } | |
389 | ||
390 | /** | |
391 | * i915_gem_object_release_mmap - remove physical page mappings | |
392 | * @obj: obj in question | |
393 | * | |
394 | * Preserve the reservation of the mmapping with the DRM core code, but | |
395 | * relinquish ownership of the pages back to the system. | |
396 | * | |
397 | * It is vital that we remove the page mapping if we have mapped a tiled | |
398 | * object through the GTT and then lose the fence register due to | |
399 | * resource pressure. Similarly if the object has been moved out of the | |
400 | * aperture, than pages mapped into userspace must be revoked. Removing the | |
401 | * mapping will then trigger a page fault on the next user access, allowing | |
402 | * fixup by i915_gem_fault(). | |
403 | */ | |
404 | void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) | |
405 | { | |
406 | struct drm_i915_private *i915 = to_i915(obj->base.dev); | |
407 | intel_wakeref_t wakeref; | |
408 | ||
409 | /* Serialisation between user GTT access and our code depends upon | |
410 | * revoking the CPU's PTE whilst the mutex is held. The next user | |
411 | * pagefault then has to wait until we release the mutex. | |
412 | * | |
413 | * Note that RPM complicates somewhat by adding an additional | |
414 | * requirement that operations to the GGTT be made holding the RPM | |
415 | * wakeref. | |
416 | */ | |
d858d569 | 417 | wakeref = intel_runtime_pm_get(&i915->runtime_pm); |
b7d151ba | 418 | mutex_lock(&i915->ggtt.vm.mutex); |
b414fcd5 CW |
419 | |
420 | if (!obj->userfault_count) | |
421 | goto out; | |
422 | ||
423 | __i915_gem_object_release_mmap(obj); | |
424 | ||
425 | /* Ensure that the CPU's PTE are revoked and there are not outstanding | |
426 | * memory transactions from userspace before we return. The TLB | |
427 | * flushing implied above by changing the PTE above *should* be | |
428 | * sufficient, an extra barrier here just provides us with a bit | |
429 | * of paranoid documentation about our requirement to serialise | |
430 | * memory writes before touching registers / GSM. | |
431 | */ | |
432 | wmb(); | |
433 | ||
434 | out: | |
b7d151ba | 435 | mutex_unlock(&i915->ggtt.vm.mutex); |
d858d569 | 436 | intel_runtime_pm_put(&i915->runtime_pm, wakeref); |
b414fcd5 CW |
437 | } |
438 | ||
439 | static int create_mmap_offset(struct drm_i915_gem_object *obj) | |
440 | { | |
441 | struct drm_i915_private *i915 = to_i915(obj->base.dev); | |
442 | int err; | |
443 | ||
444 | err = drm_gem_create_mmap_offset(&obj->base); | |
445 | if (likely(!err)) | |
446 | return 0; | |
447 | ||
448 | /* Attempt to reap some mmap space from dead objects */ | |
449 | do { | |
450 | err = i915_gem_wait_for_idle(i915, | |
451 | I915_WAIT_INTERRUPTIBLE, | |
452 | MAX_SCHEDULE_TIMEOUT); | |
453 | if (err) | |
454 | break; | |
455 | ||
456 | i915_gem_drain_freed_objects(i915); | |
457 | err = drm_gem_create_mmap_offset(&obj->base); | |
458 | if (!err) | |
459 | break; | |
460 | ||
461 | } while (flush_delayed_work(&i915->gem.retire_work)); | |
462 | ||
463 | return err; | |
464 | } | |
465 | ||
466 | int | |
467 | i915_gem_mmap_gtt(struct drm_file *file, | |
468 | struct drm_device *dev, | |
469 | u32 handle, | |
470 | u64 *offset) | |
471 | { | |
472 | struct drm_i915_gem_object *obj; | |
473 | int ret; | |
474 | ||
475 | obj = i915_gem_object_lookup(file, handle); | |
476 | if (!obj) | |
477 | return -ENOENT; | |
478 | ||
4f2a572e CW |
479 | if (i915_gem_object_never_bind_ggtt(obj)) { |
480 | ret = -ENODEV; | |
481 | goto out; | |
482 | } | |
483 | ||
b414fcd5 CW |
484 | ret = create_mmap_offset(obj); |
485 | if (ret == 0) | |
486 | *offset = drm_vma_node_offset_addr(&obj->base.vma_node); | |
487 | ||
4f2a572e | 488 | out: |
b414fcd5 CW |
489 | i915_gem_object_put(obj); |
490 | return ret; | |
491 | } | |
492 | ||
493 | /** | |
494 | * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing | |
495 | * @dev: DRM device | |
496 | * @data: GTT mapping ioctl data | |
497 | * @file: GEM object info | |
498 | * | |
499 | * Simply returns the fake offset to userspace so it can mmap it. | |
500 | * The mmap call will end up in drm_gem_mmap(), which will set things | |
501 | * up so we can get faults in the handler above. | |
502 | * | |
503 | * The fault handler will take care of binding the object into the GTT | |
504 | * (since it may have been evicted to make room for something), allocating | |
505 | * a fence register, and mapping the appropriate aperture address into | |
506 | * userspace. | |
507 | */ | |
508 | int | |
509 | i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, | |
510 | struct drm_file *file) | |
511 | { | |
512 | struct drm_i915_gem_mmap_gtt *args = data; | |
513 | ||
514 | return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); | |
515 | } | |
516 | ||
517 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) | |
518 | #include "selftests/i915_gem_mman.c" | |
519 | #endif |