Commit | Line | Data |
---|---|---|
a11c3198 BS |
1 | /* |
2 | * Copyright 2010 Red Hat Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | * Authors: Ben Skeggs | |
23 | */ | |
24 | ||
25 | #include "drmP.h" | |
26 | #include "nouveau_drv.h" | |
27 | #include "nouveau_mm.h" | |
28 | #include "nouveau_vm.h" | |
29 | ||
30 | void | |
d5f42394 | 31 | nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_mem *node) |
a11c3198 BS |
32 | { |
33 | struct nouveau_vm *vm = vma->vm; | |
34 | struct nouveau_mm_node *r; | |
3ee01281 | 35 | int big = vma->node->type != vm->spg_shift; |
a11c3198 BS |
36 | u32 offset = vma->node->offset + (delta >> 12); |
37 | u32 bits = vma->node->type - 12; | |
38 | u32 pde = (offset >> vm->pgt_bits) - vm->fpde; | |
39 | u32 pte = (offset & ((1 << vm->pgt_bits) - 1)) >> bits; | |
40 | u32 max = 1 << (vm->pgt_bits - bits); | |
41 | u32 end, len; | |
42 | ||
d5f42394 | 43 | list_for_each_entry(r, &node->regions, rl_entry) { |
a11c3198 BS |
44 | u64 phys = (u64)r->offset << 12; |
45 | u32 num = r->length >> bits; | |
46 | ||
47 | while (num) { | |
3ee01281 | 48 | struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; |
a11c3198 BS |
49 | |
50 | end = (pte + num); | |
51 | if (unlikely(end >= max)) | |
52 | end = max; | |
53 | len = end - pte; | |
54 | ||
d5f42394 | 55 | vm->map(vma, pgt, node, pte, len, phys); |
a11c3198 BS |
56 | |
57 | num -= len; | |
58 | pte += len; | |
59 | if (unlikely(end >= max)) { | |
60 | pde++; | |
61 | pte = 0; | |
62 | } | |
63 | } | |
64 | } | |
65 | ||
66 | vm->flush(vm); | |
67 | } | |
68 | ||
69 | void | |
d5f42394 | 70 | nouveau_vm_map(struct nouveau_vma *vma, struct nouveau_mem *node) |
a11c3198 | 71 | { |
d5f42394 | 72 | nouveau_vm_map_at(vma, 0, node); |
a11c3198 BS |
73 | } |
74 | ||
75 | void | |
76 | nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length, | |
26c0c9e3 | 77 | struct nouveau_mem *mem, dma_addr_t *list) |
a11c3198 BS |
78 | { |
79 | struct nouveau_vm *vm = vma->vm; | |
3ee01281 | 80 | int big = vma->node->type != vm->spg_shift; |
a11c3198 BS |
81 | u32 offset = vma->node->offset + (delta >> 12); |
82 | u32 bits = vma->node->type - 12; | |
83 | u32 num = length >> vma->node->type; | |
84 | u32 pde = (offset >> vm->pgt_bits) - vm->fpde; | |
85 | u32 pte = (offset & ((1 << vm->pgt_bits) - 1)) >> bits; | |
86 | u32 max = 1 << (vm->pgt_bits - bits); | |
87 | u32 end, len; | |
88 | ||
89 | while (num) { | |
3ee01281 | 90 | struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; |
a11c3198 BS |
91 | |
92 | end = (pte + num); | |
93 | if (unlikely(end >= max)) | |
94 | end = max; | |
95 | len = end - pte; | |
96 | ||
26c0c9e3 | 97 | vm->map_sg(vma, pgt, mem, pte, len, list); |
a11c3198 BS |
98 | |
99 | num -= len; | |
100 | pte += len; | |
101 | list += len; | |
102 | if (unlikely(end >= max)) { | |
103 | pde++; | |
104 | pte = 0; | |
105 | } | |
106 | } | |
107 | ||
108 | vm->flush(vm); | |
109 | } | |
110 | ||
111 | void | |
112 | nouveau_vm_unmap_at(struct nouveau_vma *vma, u64 delta, u64 length) | |
113 | { | |
114 | struct nouveau_vm *vm = vma->vm; | |
3ee01281 | 115 | int big = vma->node->type != vm->spg_shift; |
a11c3198 BS |
116 | u32 offset = vma->node->offset + (delta >> 12); |
117 | u32 bits = vma->node->type - 12; | |
118 | u32 num = length >> vma->node->type; | |
119 | u32 pde = (offset >> vm->pgt_bits) - vm->fpde; | |
120 | u32 pte = (offset & ((1 << vm->pgt_bits) - 1)) >> bits; | |
121 | u32 max = 1 << (vm->pgt_bits - bits); | |
122 | u32 end, len; | |
123 | ||
124 | while (num) { | |
3ee01281 | 125 | struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; |
a11c3198 BS |
126 | |
127 | end = (pte + num); | |
128 | if (unlikely(end >= max)) | |
129 | end = max; | |
130 | len = end - pte; | |
131 | ||
132 | vm->unmap(pgt, pte, len); | |
133 | ||
134 | num -= len; | |
135 | pte += len; | |
136 | if (unlikely(end >= max)) { | |
137 | pde++; | |
138 | pte = 0; | |
139 | } | |
140 | } | |
141 | ||
142 | vm->flush(vm); | |
143 | } | |
144 | ||
145 | void | |
146 | nouveau_vm_unmap(struct nouveau_vma *vma) | |
147 | { | |
148 | nouveau_vm_unmap_at(vma, 0, (u64)vma->node->length << 12); | |
149 | } | |
150 | ||
151 | static void | |
3ee01281 | 152 | nouveau_vm_unmap_pgt(struct nouveau_vm *vm, int big, u32 fpde, u32 lpde) |
a11c3198 BS |
153 | { |
154 | struct nouveau_vm_pgd *vpgd; | |
155 | struct nouveau_vm_pgt *vpgt; | |
156 | struct nouveau_gpuobj *pgt; | |
157 | u32 pde; | |
158 | ||
159 | for (pde = fpde; pde <= lpde; pde++) { | |
160 | vpgt = &vm->pgt[pde - vm->fpde]; | |
3ee01281 | 161 | if (--vpgt->refcount[big]) |
a11c3198 BS |
162 | continue; |
163 | ||
3ee01281 BS |
164 | pgt = vpgt->obj[big]; |
165 | vpgt->obj[big] = NULL; | |
166 | ||
a11c3198 | 167 | list_for_each_entry(vpgd, &vm->pgd_list, head) { |
3ee01281 | 168 | vm->map_pgt(vpgd->obj, pde, vpgt->obj); |
a11c3198 BS |
169 | } |
170 | ||
a11c3198 BS |
171 | mutex_unlock(&vm->mm->mutex); |
172 | nouveau_gpuobj_ref(NULL, &pgt); | |
173 | mutex_lock(&vm->mm->mutex); | |
174 | } | |
175 | } | |
176 | ||
177 | static int | |
178 | nouveau_vm_map_pgt(struct nouveau_vm *vm, u32 pde, u32 type) | |
179 | { | |
180 | struct nouveau_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; | |
181 | struct nouveau_vm_pgd *vpgd; | |
182 | struct nouveau_gpuobj *pgt; | |
3ee01281 | 183 | int big = (type != vm->spg_shift); |
a11c3198 BS |
184 | u32 pgt_size; |
185 | int ret; | |
186 | ||
187 | pgt_size = (1 << (vm->pgt_bits + 12)) >> type; | |
188 | pgt_size *= 8; | |
189 | ||
190 | mutex_unlock(&vm->mm->mutex); | |
191 | ret = nouveau_gpuobj_new(vm->dev, NULL, pgt_size, 0x1000, | |
192 | NVOBJ_FLAG_ZERO_ALLOC, &pgt); | |
193 | mutex_lock(&vm->mm->mutex); | |
194 | if (unlikely(ret)) | |
195 | return ret; | |
196 | ||
197 | /* someone beat us to filling the PDE while we didn't have the lock */ | |
3ee01281 | 198 | if (unlikely(vpgt->refcount[big]++)) { |
a11c3198 BS |
199 | mutex_unlock(&vm->mm->mutex); |
200 | nouveau_gpuobj_ref(NULL, &pgt); | |
201 | mutex_lock(&vm->mm->mutex); | |
202 | return 0; | |
203 | } | |
204 | ||
3ee01281 | 205 | vpgt->obj[big] = pgt; |
a11c3198 | 206 | list_for_each_entry(vpgd, &vm->pgd_list, head) { |
3ee01281 | 207 | vm->map_pgt(vpgd->obj, pde, vpgt->obj); |
a11c3198 BS |
208 | } |
209 | ||
a11c3198 BS |
210 | return 0; |
211 | } | |
212 | ||
213 | int | |
214 | nouveau_vm_get(struct nouveau_vm *vm, u64 size, u32 page_shift, | |
215 | u32 access, struct nouveau_vma *vma) | |
216 | { | |
217 | u32 align = (1 << page_shift) >> 12; | |
218 | u32 msize = size >> 12; | |
219 | u32 fpde, lpde, pde; | |
220 | int ret; | |
221 | ||
222 | mutex_lock(&vm->mm->mutex); | |
223 | ret = nouveau_mm_get(vm->mm, page_shift, msize, 0, align, &vma->node); | |
224 | if (unlikely(ret != 0)) { | |
225 | mutex_unlock(&vm->mm->mutex); | |
226 | return ret; | |
227 | } | |
228 | ||
229 | fpde = (vma->node->offset >> vm->pgt_bits); | |
230 | lpde = (vma->node->offset + vma->node->length - 1) >> vm->pgt_bits; | |
231 | for (pde = fpde; pde <= lpde; pde++) { | |
232 | struct nouveau_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; | |
3ee01281 | 233 | int big = (vma->node->type != vm->spg_shift); |
a11c3198 | 234 | |
3ee01281 BS |
235 | if (likely(vpgt->refcount[big])) { |
236 | vpgt->refcount[big]++; | |
a11c3198 BS |
237 | continue; |
238 | } | |
239 | ||
240 | ret = nouveau_vm_map_pgt(vm, pde, vma->node->type); | |
241 | if (ret) { | |
242 | if (pde != fpde) | |
3ee01281 | 243 | nouveau_vm_unmap_pgt(vm, big, fpde, pde - 1); |
a11c3198 BS |
244 | nouveau_mm_put(vm->mm, vma->node); |
245 | mutex_unlock(&vm->mm->mutex); | |
246 | vma->node = NULL; | |
247 | return ret; | |
248 | } | |
249 | } | |
250 | mutex_unlock(&vm->mm->mutex); | |
251 | ||
252 | vma->vm = vm; | |
253 | vma->offset = (u64)vma->node->offset << 12; | |
254 | vma->access = access; | |
255 | return 0; | |
256 | } | |
257 | ||
258 | void | |
259 | nouveau_vm_put(struct nouveau_vma *vma) | |
260 | { | |
261 | struct nouveau_vm *vm = vma->vm; | |
262 | u32 fpde, lpde; | |
263 | ||
264 | if (unlikely(vma->node == NULL)) | |
265 | return; | |
266 | fpde = (vma->node->offset >> vm->pgt_bits); | |
267 | lpde = (vma->node->offset + vma->node->length - 1) >> vm->pgt_bits; | |
268 | ||
269 | mutex_lock(&vm->mm->mutex); | |
3ee01281 | 270 | nouveau_vm_unmap_pgt(vm, vma->node->type != vm->spg_shift, fpde, lpde); |
a11c3198 BS |
271 | nouveau_mm_put(vm->mm, vma->node); |
272 | vma->node = NULL; | |
a11c3198 BS |
273 | mutex_unlock(&vm->mm->mutex); |
274 | } | |
275 | ||
276 | int | |
277 | nouveau_vm_new(struct drm_device *dev, u64 offset, u64 length, u64 mm_offset, | |
a11c3198 BS |
278 | struct nouveau_vm **pvm) |
279 | { | |
280 | struct drm_nouveau_private *dev_priv = dev->dev_private; | |
281 | struct nouveau_vm *vm; | |
282 | u64 mm_length = (offset + length) - mm_offset; | |
3ee01281 | 283 | u32 block, pgt_bits; |
a11c3198 BS |
284 | int ret; |
285 | ||
286 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); | |
287 | if (!vm) | |
288 | return -ENOMEM; | |
289 | ||
290 | if (dev_priv->card_type == NV_50) { | |
291 | vm->map_pgt = nv50_vm_map_pgt; | |
a11c3198 BS |
292 | vm->map = nv50_vm_map; |
293 | vm->map_sg = nv50_vm_map_sg; | |
294 | vm->unmap = nv50_vm_unmap; | |
295 | vm->flush = nv50_vm_flush; | |
3ee01281 BS |
296 | vm->spg_shift = 12; |
297 | vm->lpg_shift = 16; | |
4c74eb7f | 298 | |
3ee01281 | 299 | pgt_bits = 29; |
4c74eb7f BS |
300 | block = (1 << pgt_bits); |
301 | if (length < block) | |
302 | block = length; | |
303 | ||
304 | } else | |
305 | if (dev_priv->card_type == NV_C0) { | |
306 | vm->map_pgt = nvc0_vm_map_pgt; | |
307 | vm->map = nvc0_vm_map; | |
308 | vm->map_sg = nvc0_vm_map_sg; | |
309 | vm->unmap = nvc0_vm_unmap; | |
310 | vm->flush = nvc0_vm_flush; | |
311 | vm->spg_shift = 12; | |
312 | vm->lpg_shift = 17; | |
313 | pgt_bits = 27; | |
314 | ||
315 | /* Should be 4096 everywhere, this is a hack that's | |
316 | * currently necessary to avoid an elusive bug that | |
317 | * causes corruption when mixing small/large pages | |
318 | */ | |
319 | if (length < (1ULL << 40)) | |
320 | block = 4096; | |
321 | else { | |
322 | block = (1 << pgt_bits); | |
323 | if (length < block) | |
324 | block = length; | |
325 | } | |
a11c3198 BS |
326 | } else { |
327 | kfree(vm); | |
328 | return -ENOSYS; | |
329 | } | |
330 | ||
331 | vm->fpde = offset >> pgt_bits; | |
332 | vm->lpde = (offset + length - 1) >> pgt_bits; | |
333 | vm->pgt = kcalloc(vm->lpde - vm->fpde + 1, sizeof(*vm->pgt), GFP_KERNEL); | |
334 | if (!vm->pgt) { | |
335 | kfree(vm); | |
336 | return -ENOMEM; | |
337 | } | |
338 | ||
339 | INIT_LIST_HEAD(&vm->pgd_list); | |
340 | vm->dev = dev; | |
341 | vm->refcount = 1; | |
342 | vm->pgt_bits = pgt_bits - 12; | |
a11c3198 | 343 | |
a11c3198 BS |
344 | ret = nouveau_mm_init(&vm->mm, mm_offset >> 12, mm_length >> 12, |
345 | block >> 12); | |
346 | if (ret) { | |
347 | kfree(vm); | |
348 | return ret; | |
349 | } | |
350 | ||
351 | *pvm = vm; | |
352 | return 0; | |
353 | } | |
354 | ||
355 | static int | |
356 | nouveau_vm_link(struct nouveau_vm *vm, struct nouveau_gpuobj *pgd) | |
357 | { | |
358 | struct nouveau_vm_pgd *vpgd; | |
359 | int i; | |
360 | ||
361 | if (!pgd) | |
362 | return 0; | |
363 | ||
364 | vpgd = kzalloc(sizeof(*vpgd), GFP_KERNEL); | |
365 | if (!vpgd) | |
366 | return -ENOMEM; | |
367 | ||
368 | nouveau_gpuobj_ref(pgd, &vpgd->obj); | |
369 | ||
370 | mutex_lock(&vm->mm->mutex); | |
3ee01281 BS |
371 | for (i = vm->fpde; i <= vm->lpde; i++) |
372 | vm->map_pgt(pgd, i, vm->pgt[i - vm->fpde].obj); | |
a11c3198 BS |
373 | list_add(&vpgd->head, &vm->pgd_list); |
374 | mutex_unlock(&vm->mm->mutex); | |
375 | return 0; | |
376 | } | |
377 | ||
378 | static void | |
379 | nouveau_vm_unlink(struct nouveau_vm *vm, struct nouveau_gpuobj *pgd) | |
380 | { | |
381 | struct nouveau_vm_pgd *vpgd, *tmp; | |
382 | ||
383 | if (!pgd) | |
384 | return; | |
385 | ||
386 | mutex_lock(&vm->mm->mutex); | |
387 | list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { | |
388 | if (vpgd->obj != pgd) | |
389 | continue; | |
390 | ||
391 | list_del(&vpgd->head); | |
392 | nouveau_gpuobj_ref(NULL, &vpgd->obj); | |
393 | kfree(vpgd); | |
394 | } | |
395 | mutex_unlock(&vm->mm->mutex); | |
396 | } | |
397 | ||
398 | static void | |
399 | nouveau_vm_del(struct nouveau_vm *vm) | |
400 | { | |
401 | struct nouveau_vm_pgd *vpgd, *tmp; | |
402 | ||
403 | list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { | |
404 | nouveau_vm_unlink(vm, vpgd->obj); | |
405 | } | |
406 | WARN_ON(nouveau_mm_fini(&vm->mm) != 0); | |
407 | ||
408 | kfree(vm->pgt); | |
409 | kfree(vm); | |
410 | } | |
411 | ||
412 | int | |
413 | nouveau_vm_ref(struct nouveau_vm *ref, struct nouveau_vm **ptr, | |
414 | struct nouveau_gpuobj *pgd) | |
415 | { | |
416 | struct nouveau_vm *vm; | |
417 | int ret; | |
418 | ||
419 | vm = ref; | |
420 | if (vm) { | |
421 | ret = nouveau_vm_link(vm, pgd); | |
422 | if (ret) | |
423 | return ret; | |
424 | ||
425 | vm->refcount++; | |
426 | } | |
427 | ||
428 | vm = *ptr; | |
429 | *ptr = ref; | |
430 | ||
431 | if (vm) { | |
432 | nouveau_vm_unlink(vm, pgd); | |
433 | ||
434 | if (--vm->refcount == 0) | |
435 | nouveau_vm_del(vm); | |
436 | } | |
437 | ||
438 | return 0; | |
439 | } |