Commit | Line | Data |
---|---|---|
5be73b69 JG |
1 | /* |
2 | * Copyright 2018 Red Hat Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | #include "nouveau_dmem.h" | |
23 | #include "nouveau_drv.h" | |
24 | #include "nouveau_chan.h" | |
25 | #include "nouveau_dma.h" | |
26 | #include "nouveau_mem.h" | |
27 | #include "nouveau_bo.h" | |
28 | ||
29 | #include <nvif/class.h> | |
30 | #include <nvif/object.h> | |
31 | #include <nvif/if500b.h> | |
32 | #include <nvif/if900b.h> | |
33 | ||
34 | #include <linux/sched/mm.h> | |
35 | #include <linux/hmm.h> | |
36 | ||
37 | /* | |
38 | * FIXME: this is ugly right now we are using TTM to allocate vram and we pin | |
39 | * it in vram while in use. We likely want to overhaul memory management for | |
40 | * nouveau to be more page like (not necessarily with system page size but a | |
41 | * bigger page size) at lowest level and have some shim layer on top that would | |
42 | * provide the same functionality as TTM. | |
43 | */ | |
44 | #define DMEM_CHUNK_SIZE (2UL << 20) | |
45 | #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) | |
46 | ||
6c762d1b BS |
47 | enum nouveau_aper { |
48 | NOUVEAU_APER_VIRT, | |
49 | NOUVEAU_APER_VRAM, | |
50 | NOUVEAU_APER_HOST, | |
51 | }; | |
52 | ||
5be73b69 | 53 | typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages, |
6c762d1b BS |
54 | enum nouveau_aper, u64 dst_addr, |
55 | enum nouveau_aper, u64 src_addr); | |
5be73b69 JG |
56 | |
57 | struct nouveau_dmem_chunk { | |
58 | struct list_head list; | |
59 | struct nouveau_bo *bo; | |
60 | struct nouveau_drm *drm; | |
61 | unsigned long pfn_first; | |
62 | unsigned long callocated; | |
63 | unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)]; | |
5be73b69 JG |
64 | spinlock_t lock; |
65 | }; | |
66 | ||
67 | struct nouveau_dmem_migrate { | |
68 | nouveau_migrate_copy_t copy_func; | |
69 | struct nouveau_channel *chan; | |
70 | }; | |
71 | ||
72 | struct nouveau_dmem { | |
4239f267 CH |
73 | struct nouveau_drm *drm; |
74 | struct dev_pagemap pagemap; | |
5be73b69 JG |
75 | struct nouveau_dmem_migrate migrate; |
76 | struct list_head chunk_free; | |
77 | struct list_head chunk_full; | |
78 | struct list_head chunk_empty; | |
79 | struct mutex mutex; | |
80 | }; | |
81 | ||
4239f267 CH |
82 | static inline struct nouveau_dmem *page_to_dmem(struct page *page) |
83 | { | |
84 | return container_of(page->pgmap, struct nouveau_dmem, pagemap); | |
85 | } | |
86 | ||
64de8b8d CH |
87 | static unsigned long nouveau_dmem_page_addr(struct page *page) |
88 | { | |
89 | struct nouveau_dmem_chunk *chunk = page->zone_device_data; | |
90 | unsigned long idx = page_to_pfn(page) - chunk->pfn_first; | |
91 | ||
92 | return (idx << PAGE_SHIFT) + chunk->bo->bo.offset; | |
93 | } | |
94 | ||
4239f267 | 95 | static void nouveau_dmem_page_free(struct page *page) |
5be73b69 | 96 | { |
8a164fef CH |
97 | struct nouveau_dmem_chunk *chunk = page->zone_device_data; |
98 | unsigned long idx = page_to_pfn(page) - chunk->pfn_first; | |
5be73b69 JG |
99 | |
100 | /* | |
101 | * FIXME: | |
102 | * | |
103 | * This is really a bad example, we need to overhaul nouveau memory | |
104 | * management to be more page focus and allow lighter locking scheme | |
105 | * to be use in the process. | |
106 | */ | |
107 | spin_lock(&chunk->lock); | |
108 | clear_bit(idx, chunk->bitmap); | |
109 | WARN_ON(!chunk->callocated); | |
110 | chunk->callocated--; | |
111 | /* | |
112 | * FIXME when chunk->callocated reach 0 we should add the chunk to | |
113 | * a reclaim list so that it can be freed in case of memory pressure. | |
114 | */ | |
115 | spin_unlock(&chunk->lock); | |
116 | } | |
117 | ||
2ab2bda5 CH |
118 | static void nouveau_dmem_fence_done(struct nouveau_fence **fence) |
119 | { | |
120 | if (fence) { | |
121 | nouveau_fence_wait(*fence, true, false); | |
122 | nouveau_fence_unref(fence); | |
123 | } else { | |
124 | /* | |
125 | * FIXME wait for channel to be IDLE before calling finalizing | |
126 | * the hmem object. | |
127 | */ | |
128 | } | |
129 | } | |
130 | ||
bfe69ef9 CH |
131 | static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm, |
132 | struct vm_fault *vmf, struct migrate_vma *args, | |
133 | dma_addr_t *dma_addr) | |
5be73b69 | 134 | { |
a788ade4 | 135 | struct device *dev = drm->dev->dev; |
bfe69ef9 | 136 | struct page *dpage, *spage; |
5be73b69 | 137 | |
bfe69ef9 CH |
138 | spage = migrate_pfn_to_page(args->src[0]); |
139 | if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE)) | |
140 | return 0; | |
5be73b69 | 141 | |
bfe69ef9 CH |
142 | dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address); |
143 | if (!dpage) | |
144 | return VM_FAULT_SIGBUS; | |
145 | lock_page(dpage); | |
5be73b69 | 146 | |
bfe69ef9 CH |
147 | *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); |
148 | if (dma_mapping_error(dev, *dma_addr)) | |
149 | goto error_free_page; | |
5be73b69 | 150 | |
bfe69ef9 CH |
151 | if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr, |
152 | NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage))) | |
153 | goto error_dma_unmap; | |
5be73b69 | 154 | |
bfe69ef9 CH |
155 | args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; |
156 | return 0; | |
a788ade4 | 157 | |
bfe69ef9 CH |
158 | error_dma_unmap: |
159 | dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); | |
160 | error_free_page: | |
161 | __free_page(dpage); | |
162 | return VM_FAULT_SIGBUS; | |
5be73b69 JG |
163 | } |
164 | ||
4239f267 | 165 | static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) |
5be73b69 | 166 | { |
4239f267 | 167 | struct nouveau_dmem *dmem = page_to_dmem(vmf->page); |
bfe69ef9 CH |
168 | struct nouveau_drm *drm = dmem->drm; |
169 | struct nouveau_fence *fence; | |
170 | unsigned long src = 0, dst = 0; | |
171 | dma_addr_t dma_addr = 0; | |
172 | vm_fault_t ret; | |
a7d1f22b CH |
173 | struct migrate_vma args = { |
174 | .vma = vmf->vma, | |
175 | .start = vmf->address, | |
176 | .end = vmf->address + PAGE_SIZE, | |
bfe69ef9 CH |
177 | .src = &src, |
178 | .dst = &dst, | |
a7d1f22b | 179 | }; |
5be73b69 | 180 | |
5be73b69 JG |
181 | /* |
182 | * FIXME what we really want is to find some heuristic to migrate more | |
183 | * than just one page on CPU fault. When such fault happens it is very | |
184 | * likely that more surrounding page will CPU fault too. | |
185 | */ | |
a7d1f22b | 186 | if (migrate_vma_setup(&args) < 0) |
5be73b69 | 187 | return VM_FAULT_SIGBUS; |
a7d1f22b CH |
188 | if (!args.cpages) |
189 | return 0; | |
190 | ||
bfe69ef9 CH |
191 | ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr); |
192 | if (ret || dst == 0) | |
193 | goto done; | |
5be73b69 | 194 | |
bfe69ef9 CH |
195 | nouveau_fence_new(dmem->migrate.chan, false, &fence); |
196 | migrate_vma_pages(&args); | |
197 | nouveau_dmem_fence_done(&fence); | |
198 | dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); | |
199 | done: | |
a7d1f22b | 200 | migrate_vma_finalize(&args); |
bfe69ef9 | 201 | return ret; |
5be73b69 JG |
202 | } |
203 | ||
4239f267 CH |
204 | static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { |
205 | .page_free = nouveau_dmem_page_free, | |
206 | .migrate_to_ram = nouveau_dmem_migrate_to_ram, | |
5be73b69 JG |
207 | }; |
208 | ||
209 | static int | |
210 | nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) | |
211 | { | |
5be73b69 JG |
212 | struct nouveau_dmem_chunk *chunk; |
213 | int ret; | |
214 | ||
215 | if (drm->dmem == NULL) | |
216 | return -EINVAL; | |
217 | ||
218 | mutex_lock(&drm->dmem->mutex); | |
219 | chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, | |
220 | struct nouveau_dmem_chunk, | |
221 | list); | |
222 | if (chunk == NULL) { | |
223 | mutex_unlock(&drm->dmem->mutex); | |
224 | return -ENOMEM; | |
225 | } | |
226 | ||
227 | list_del(&chunk->list); | |
228 | mutex_unlock(&drm->dmem->mutex); | |
229 | ||
5be73b69 JG |
230 | ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0, |
231 | TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL, | |
232 | &chunk->bo); | |
233 | if (ret) | |
234 | goto out; | |
235 | ||
236 | ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); | |
237 | if (ret) { | |
238 | nouveau_bo_ref(NULL, &chunk->bo); | |
239 | goto out; | |
240 | } | |
241 | ||
5be73b69 JG |
242 | bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES); |
243 | spin_lock_init(&chunk->lock); | |
244 | ||
245 | out: | |
246 | mutex_lock(&drm->dmem->mutex); | |
247 | if (chunk->bo) | |
248 | list_add(&chunk->list, &drm->dmem->chunk_empty); | |
249 | else | |
250 | list_add_tail(&chunk->list, &drm->dmem->chunk_empty); | |
251 | mutex_unlock(&drm->dmem->mutex); | |
252 | ||
253 | return ret; | |
254 | } | |
255 | ||
256 | static struct nouveau_dmem_chunk * | |
257 | nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm) | |
258 | { | |
259 | struct nouveau_dmem_chunk *chunk; | |
260 | ||
261 | chunk = list_first_entry_or_null(&drm->dmem->chunk_free, | |
262 | struct nouveau_dmem_chunk, | |
263 | list); | |
264 | if (chunk) | |
265 | return chunk; | |
266 | ||
267 | chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, | |
268 | struct nouveau_dmem_chunk, | |
269 | list); | |
270 | if (chunk->bo) | |
271 | return chunk; | |
272 | ||
273 | return NULL; | |
274 | } | |
275 | ||
276 | static int | |
277 | nouveau_dmem_pages_alloc(struct nouveau_drm *drm, | |
278 | unsigned long npages, | |
279 | unsigned long *pages) | |
280 | { | |
281 | struct nouveau_dmem_chunk *chunk; | |
282 | unsigned long c; | |
283 | int ret; | |
284 | ||
285 | memset(pages, 0xff, npages * sizeof(*pages)); | |
286 | ||
287 | mutex_lock(&drm->dmem->mutex); | |
288 | for (c = 0; c < npages;) { | |
289 | unsigned long i; | |
290 | ||
291 | chunk = nouveau_dmem_chunk_first_free_locked(drm); | |
292 | if (chunk == NULL) { | |
293 | mutex_unlock(&drm->dmem->mutex); | |
294 | ret = nouveau_dmem_chunk_alloc(drm); | |
295 | if (ret) { | |
296 | if (c) | |
d304654b | 297 | return 0; |
5be73b69 JG |
298 | return ret; |
299 | } | |
d304654b | 300 | mutex_lock(&drm->dmem->mutex); |
5be73b69 JG |
301 | continue; |
302 | } | |
303 | ||
304 | spin_lock(&chunk->lock); | |
305 | i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES); | |
306 | while (i < DMEM_CHUNK_NPAGES && c < npages) { | |
307 | pages[c] = chunk->pfn_first + i; | |
308 | set_bit(i, chunk->bitmap); | |
309 | chunk->callocated++; | |
310 | c++; | |
311 | ||
312 | i = find_next_zero_bit(chunk->bitmap, | |
313 | DMEM_CHUNK_NPAGES, i); | |
314 | } | |
315 | spin_unlock(&chunk->lock); | |
316 | } | |
317 | mutex_unlock(&drm->dmem->mutex); | |
318 | ||
319 | return 0; | |
320 | } | |
321 | ||
322 | static struct page * | |
323 | nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) | |
324 | { | |
325 | unsigned long pfns[1]; | |
326 | struct page *page; | |
327 | int ret; | |
328 | ||
329 | /* FIXME stop all the miss-match API ... */ | |
330 | ret = nouveau_dmem_pages_alloc(drm, 1, pfns); | |
331 | if (ret) | |
332 | return NULL; | |
333 | ||
334 | page = pfn_to_page(pfns[0]); | |
335 | get_page(page); | |
336 | lock_page(page); | |
337 | return page; | |
338 | } | |
339 | ||
340 | static void | |
341 | nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page) | |
342 | { | |
343 | unlock_page(page); | |
344 | put_page(page); | |
345 | } | |
346 | ||
347 | void | |
348 | nouveau_dmem_resume(struct nouveau_drm *drm) | |
349 | { | |
350 | struct nouveau_dmem_chunk *chunk; | |
351 | int ret; | |
352 | ||
353 | if (drm->dmem == NULL) | |
354 | return; | |
355 | ||
356 | mutex_lock(&drm->dmem->mutex); | |
357 | list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { | |
358 | ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); | |
359 | /* FIXME handle pin failure */ | |
360 | WARN_ON(ret); | |
361 | } | |
362 | list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { | |
363 | ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); | |
364 | /* FIXME handle pin failure */ | |
365 | WARN_ON(ret); | |
366 | } | |
5be73b69 JG |
367 | mutex_unlock(&drm->dmem->mutex); |
368 | } | |
369 | ||
370 | void | |
371 | nouveau_dmem_suspend(struct nouveau_drm *drm) | |
372 | { | |
373 | struct nouveau_dmem_chunk *chunk; | |
374 | ||
375 | if (drm->dmem == NULL) | |
376 | return; | |
377 | ||
378 | mutex_lock(&drm->dmem->mutex); | |
379 | list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { | |
380 | nouveau_bo_unpin(chunk->bo); | |
381 | } | |
382 | list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { | |
383 | nouveau_bo_unpin(chunk->bo); | |
384 | } | |
5be73b69 JG |
385 | mutex_unlock(&drm->dmem->mutex); |
386 | } | |
387 | ||
388 | void | |
389 | nouveau_dmem_fini(struct nouveau_drm *drm) | |
390 | { | |
5be73b69 JG |
391 | struct nouveau_dmem_chunk *chunk, *tmp; |
392 | ||
393 | if (drm->dmem == NULL) | |
394 | return; | |
395 | ||
396 | mutex_lock(&drm->dmem->mutex); | |
397 | ||
398 | WARN_ON(!list_empty(&drm->dmem->chunk_free)); | |
399 | WARN_ON(!list_empty(&drm->dmem->chunk_full)); | |
400 | ||
401 | list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) { | |
402 | if (chunk->bo) { | |
403 | nouveau_bo_unpin(chunk->bo); | |
404 | nouveau_bo_ref(NULL, &chunk->bo); | |
405 | } | |
5be73b69 JG |
406 | list_del(&chunk->list); |
407 | kfree(chunk); | |
408 | } | |
409 | ||
410 | mutex_unlock(&drm->dmem->mutex); | |
411 | } | |
412 | ||
413 | static int | |
414 | nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, | |
6c762d1b BS |
415 | enum nouveau_aper dst_aper, u64 dst_addr, |
416 | enum nouveau_aper src_aper, u64 src_addr) | |
5be73b69 JG |
417 | { |
418 | struct nouveau_channel *chan = drm->dmem->migrate.chan; | |
6c762d1b BS |
419 | u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ | |
420 | (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ | | |
421 | (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ | | |
422 | (1 << 2) /* FLUSH_ENABLE_TRUE. */ | | |
423 | (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */; | |
5be73b69 JG |
424 | int ret; |
425 | ||
6c762d1b | 426 | ret = RING_SPACE(chan, 13); |
5be73b69 JG |
427 | if (ret) |
428 | return ret; | |
429 | ||
6c762d1b BS |
430 | if (src_aper != NOUVEAU_APER_VIRT) { |
431 | switch (src_aper) { | |
432 | case NOUVEAU_APER_VRAM: | |
433 | BEGIN_IMC0(chan, NvSubCopy, 0x0260, 0); | |
434 | break; | |
435 | case NOUVEAU_APER_HOST: | |
436 | BEGIN_IMC0(chan, NvSubCopy, 0x0260, 1); | |
437 | break; | |
438 | default: | |
439 | return -EINVAL; | |
440 | } | |
441 | launch_dma |= 0x00001000; /* SRC_TYPE_PHYSICAL. */ | |
442 | } | |
443 | ||
444 | if (dst_aper != NOUVEAU_APER_VIRT) { | |
445 | switch (dst_aper) { | |
446 | case NOUVEAU_APER_VRAM: | |
447 | BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0); | |
448 | break; | |
449 | case NOUVEAU_APER_HOST: | |
450 | BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1); | |
451 | break; | |
452 | default: | |
453 | return -EINVAL; | |
454 | } | |
455 | launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */ | |
456 | } | |
457 | ||
5be73b69 JG |
458 | BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8); |
459 | OUT_RING (chan, upper_32_bits(src_addr)); | |
460 | OUT_RING (chan, lower_32_bits(src_addr)); | |
461 | OUT_RING (chan, upper_32_bits(dst_addr)); | |
462 | OUT_RING (chan, lower_32_bits(dst_addr)); | |
463 | OUT_RING (chan, PAGE_SIZE); | |
464 | OUT_RING (chan, PAGE_SIZE); | |
465 | OUT_RING (chan, PAGE_SIZE); | |
466 | OUT_RING (chan, npages); | |
6c762d1b BS |
467 | BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1); |
468 | OUT_RING (chan, launch_dma); | |
5be73b69 JG |
469 | return 0; |
470 | } | |
471 | ||
472 | static int | |
473 | nouveau_dmem_migrate_init(struct nouveau_drm *drm) | |
474 | { | |
475 | switch (drm->ttm.copy.oclass) { | |
476 | case PASCAL_DMA_COPY_A: | |
477 | case PASCAL_DMA_COPY_B: | |
478 | case VOLTA_DMA_COPY_A: | |
479 | case TURING_DMA_COPY_A: | |
480 | drm->dmem->migrate.copy_func = nvc0b5_migrate_copy; | |
481 | drm->dmem->migrate.chan = drm->ttm.chan; | |
482 | return 0; | |
483 | default: | |
484 | break; | |
485 | } | |
486 | return -ENODEV; | |
487 | } | |
488 | ||
489 | void | |
490 | nouveau_dmem_init(struct nouveau_drm *drm) | |
491 | { | |
492 | struct device *device = drm->dev->dev; | |
4239f267 CH |
493 | struct resource *res; |
494 | unsigned long i, size, pfn_first; | |
5be73b69 JG |
495 | int ret; |
496 | ||
497 | /* This only make sense on PASCAL or newer */ | |
498 | if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL) | |
499 | return; | |
500 | ||
501 | if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL))) | |
502 | return; | |
503 | ||
4239f267 | 504 | drm->dmem->drm = drm; |
5be73b69 JG |
505 | mutex_init(&drm->dmem->mutex); |
506 | INIT_LIST_HEAD(&drm->dmem->chunk_free); | |
507 | INIT_LIST_HEAD(&drm->dmem->chunk_full); | |
508 | INIT_LIST_HEAD(&drm->dmem->chunk_empty); | |
509 | ||
510 | size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE); | |
511 | ||
512 | /* Initialize migration dma helpers before registering memory */ | |
513 | ret = nouveau_dmem_migrate_init(drm); | |
4239f267 CH |
514 | if (ret) |
515 | goto out_free; | |
5be73b69 JG |
516 | |
517 | /* | |
518 | * FIXME we need some kind of policy to decide how much VRAM we | |
519 | * want to register with HMM. For now just register everything | |
520 | * and latter if we want to do thing like over commit then we | |
521 | * could revisit this. | |
522 | */ | |
4239f267 CH |
523 | res = devm_request_free_mem_region(device, &iomem_resource, size); |
524 | if (IS_ERR(res)) | |
525 | goto out_free; | |
526 | drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE; | |
527 | drm->dmem->pagemap.res = *res; | |
528 | drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops; | |
529 | if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap))) | |
530 | goto out_free; | |
531 | ||
532 | pfn_first = res->start >> PAGE_SHIFT; | |
5be73b69 JG |
533 | for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) { |
534 | struct nouveau_dmem_chunk *chunk; | |
535 | struct page *page; | |
536 | unsigned long j; | |
537 | ||
538 | chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); | |
539 | if (chunk == NULL) { | |
540 | nouveau_dmem_fini(drm); | |
541 | return; | |
542 | } | |
543 | ||
544 | chunk->drm = drm; | |
4239f267 | 545 | chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES); |
5be73b69 JG |
546 | list_add_tail(&chunk->list, &drm->dmem->chunk_empty); |
547 | ||
548 | page = pfn_to_page(chunk->pfn_first); | |
8a164fef CH |
549 | for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) |
550 | page->zone_device_data = chunk; | |
5be73b69 JG |
551 | } |
552 | ||
553 | NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20); | |
4239f267 CH |
554 | return; |
555 | out_free: | |
556 | kfree(drm->dmem); | |
557 | drm->dmem = NULL; | |
5be73b69 JG |
558 | } |
559 | ||
f268307e CH |
560 | static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, |
561 | unsigned long src, dma_addr_t *dma_addr) | |
5be73b69 | 562 | { |
a788ade4 | 563 | struct device *dev = drm->dev->dev; |
f268307e | 564 | struct page *dpage, *spage; |
5be73b69 | 565 | |
f268307e CH |
566 | spage = migrate_pfn_to_page(src); |
567 | if (!spage || !(src & MIGRATE_PFN_MIGRATE)) | |
568 | goto out; | |
5be73b69 | 569 | |
f268307e CH |
570 | dpage = nouveau_dmem_page_alloc_locked(drm); |
571 | if (!dpage) | |
572 | return 0; | |
5be73b69 | 573 | |
f268307e CH |
574 | *dma_addr = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); |
575 | if (dma_mapping_error(dev, *dma_addr)) | |
576 | goto out_free_page; | |
5be73b69 | 577 | |
f268307e CH |
578 | if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_VRAM, |
579 | nouveau_dmem_page_addr(dpage), NOUVEAU_APER_HOST, | |
580 | *dma_addr)) | |
581 | goto out_dma_unmap; | |
5be73b69 | 582 | |
06d462be | 583 | return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; |
5be73b69 | 584 | |
f268307e CH |
585 | out_dma_unmap: |
586 | dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); | |
587 | out_free_page: | |
588 | nouveau_dmem_page_free_locked(drm, dpage); | |
589 | out: | |
590 | return 0; | |
5be73b69 JG |
591 | } |
592 | ||
f268307e CH |
593 | static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, |
594 | struct migrate_vma *args, dma_addr_t *dma_addrs) | |
5be73b69 | 595 | { |
f268307e CH |
596 | struct nouveau_fence *fence; |
597 | unsigned long addr = args->start, nr_dma = 0, i; | |
598 | ||
599 | for (i = 0; addr < args->end; i++) { | |
600 | args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i], | |
601 | dma_addrs + nr_dma); | |
602 | if (args->dst[i]) | |
603 | nr_dma++; | |
604 | addr += PAGE_SIZE; | |
605 | } | |
5be73b69 | 606 | |
f268307e CH |
607 | nouveau_fence_new(drm->dmem->migrate.chan, false, &fence); |
608 | migrate_vma_pages(args); | |
609 | nouveau_dmem_fence_done(&fence); | |
a788ade4 | 610 | |
f268307e CH |
611 | while (nr_dma--) { |
612 | dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE, | |
613 | DMA_BIDIRECTIONAL); | |
a788ade4 | 614 | } |
5be73b69 | 615 | /* |
f268307e CH |
616 | * FIXME optimization: update GPU page table to point to newly migrated |
617 | * memory. | |
5be73b69 | 618 | */ |
a7d1f22b CH |
619 | migrate_vma_finalize(args); |
620 | } | |
5be73b69 JG |
621 | |
622 | int | |
623 | nouveau_dmem_migrate_vma(struct nouveau_drm *drm, | |
624 | struct vm_area_struct *vma, | |
625 | unsigned long start, | |
626 | unsigned long end) | |
627 | { | |
a7d1f22b CH |
628 | unsigned long npages = (end - start) >> PAGE_SHIFT; |
629 | unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); | |
f268307e | 630 | dma_addr_t *dma_addrs; |
a7d1f22b CH |
631 | struct migrate_vma args = { |
632 | .vma = vma, | |
633 | .start = start, | |
634 | }; | |
a7d1f22b CH |
635 | unsigned long c, i; |
636 | int ret = -ENOMEM; | |
637 | ||
f42e4b33 | 638 | args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL); |
a7d1f22b CH |
639 | if (!args.src) |
640 | goto out; | |
f42e4b33 | 641 | args.dst = kcalloc(max, sizeof(*args.dst), GFP_KERNEL); |
a7d1f22b CH |
642 | if (!args.dst) |
643 | goto out_free_src; | |
5be73b69 | 644 | |
f268307e CH |
645 | dma_addrs = kmalloc_array(max, sizeof(*dma_addrs), GFP_KERNEL); |
646 | if (!dma_addrs) | |
647 | goto out_free_dst; | |
648 | ||
5be73b69 | 649 | for (i = 0; i < npages; i += c) { |
5be73b69 | 650 | c = min(SG_MAX_SINGLE_ALLOC, npages); |
a7d1f22b CH |
651 | args.end = start + (c << PAGE_SHIFT); |
652 | ret = migrate_vma_setup(&args); | |
5be73b69 | 653 | if (ret) |
f268307e | 654 | goto out_free_dma; |
a7d1f22b CH |
655 | |
656 | if (args.cpages) | |
f268307e | 657 | nouveau_dmem_migrate_chunk(drm, &args, dma_addrs); |
a7d1f22b | 658 | args.start = args.end; |
5be73b69 JG |
659 | } |
660 | ||
a7d1f22b | 661 | ret = 0; |
f268307e CH |
662 | out_free_dma: |
663 | kfree(dma_addrs); | |
a7d1f22b CH |
664 | out_free_dst: |
665 | kfree(args.dst); | |
666 | out_free_src: | |
667 | kfree(args.src); | |
5be73b69 | 668 | out: |
5be73b69 JG |
669 | return ret; |
670 | } | |
671 | ||
672 | static inline bool | |
673 | nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) | |
674 | { | |
4239f267 | 675 | return is_device_private_page(page) && drm->dmem == page_to_dmem(page); |
5be73b69 JG |
676 | } |
677 | ||
678 | void | |
679 | nouveau_dmem_convert_pfn(struct nouveau_drm *drm, | |
680 | struct hmm_range *range) | |
681 | { | |
682 | unsigned long i, npages; | |
683 | ||
684 | npages = (range->end - range->start) >> PAGE_SHIFT; | |
685 | for (i = 0; i < npages; ++i) { | |
5be73b69 JG |
686 | struct page *page; |
687 | uint64_t addr; | |
688 | ||
f32471e2 | 689 | page = hmm_device_entry_to_page(range, range->pfns[i]); |
5be73b69 JG |
690 | if (page == NULL) |
691 | continue; | |
692 | ||
693 | if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { | |
694 | continue; | |
695 | } | |
696 | ||
697 | if (!nouveau_dmem_page(drm, page)) { | |
698 | WARN(1, "Some unknown device memory !\n"); | |
699 | range->pfns[i] = 0; | |
700 | continue; | |
701 | } | |
702 | ||
64de8b8d | 703 | addr = nouveau_dmem_page_addr(page); |
5be73b69 JG |
704 | range->pfns[i] &= ((1UL << range->pfn_shift) - 1); |
705 | range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; | |
706 | } | |
707 | } |