Commit | Line | Data |
---|---|---|
d38ceaf9 AD |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the | |
7 | * "Software"), to deal in the Software without restriction, including | |
8 | * without limitation the rights to use, copy, modify, merge, publish, | |
9 | * distribute, sub license, and/or sell copies of the Software, and to | |
10 | * permit persons to whom the Software is furnished to do so, subject to | |
11 | * the following conditions: | |
12 | * | |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | |
20 | * | |
21 | * The above copyright notice and this permission notice (including the | |
22 | * next paragraph) shall be included in all copies or substantial portions | |
23 | * of the Software. | |
24 | * | |
25 | */ | |
26 | /* | |
27 | * Authors: | |
28 | * Christian König <christian.koenig@amd.com> | |
29 | */ | |
30 | ||
22f0463a CK |
31 | #include <linux/dma-fence-chain.h> |
32 | ||
d38ceaf9 AD |
33 | #include "amdgpu.h" |
34 | #include "amdgpu_trace.h" | |
d8d019cc | 35 | #include "amdgpu_amdkfd.h" |
d38ceaf9 | 36 | |
f91b3a69 CK |
37 | struct amdgpu_sync_entry { |
38 | struct hlist_node node; | |
f54d1867 | 39 | struct dma_fence *fence; |
f91b3a69 CK |
40 | }; |
41 | ||
257bf15a CK |
42 | static struct kmem_cache *amdgpu_sync_slab; |
43 | ||
d38ceaf9 AD |
44 | /** |
45 | * amdgpu_sync_create - zero init sync object | |
46 | * | |
47 | * @sync: sync object to initialize | |
48 | * | |
49 | * Just clear the sync object for now. | |
50 | */ | |
51 | void amdgpu_sync_create(struct amdgpu_sync *sync) | |
52 | { | |
f91b3a69 | 53 | hash_init(sync->fences); |
d38ceaf9 AD |
54 | sync->last_vm_update = NULL; |
55 | } | |
56 | ||
bcc634f4 CK |
57 | /** |
58 | * amdgpu_sync_same_dev - test if fence belong to us | |
59 | * | |
60 | * @adev: amdgpu device to use for the test | |
61 | * @f: fence to test | |
62 | * | |
63 | * Test if the fence was issued by us. | |
64 | */ | |
f54d1867 CW |
65 | static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, |
66 | struct dma_fence *f) | |
3c62338c | 67 | { |
1b1f42d8 | 68 | struct drm_sched_fence *s_fence = to_drm_sched_fence(f); |
3c62338c | 69 | |
4f839a24 CK |
70 | if (s_fence) { |
71 | struct amdgpu_ring *ring; | |
72 | ||
73 | ring = container_of(s_fence->sched, struct amdgpu_ring, sched); | |
74 | return ring->adev == adev; | |
75 | } | |
76 | ||
3c62338c CZ |
77 | return false; |
78 | } | |
79 | ||
bcc634f4 CK |
80 | /** |
81 | * amdgpu_sync_get_owner - extract the owner of a fence | |
82 | * | |
1b4940bc | 83 | * @f: fence get the owner from |
bcc634f4 CK |
84 | * |
85 | * Extract who originally created the fence. | |
86 | */ | |
f54d1867 | 87 | static void *amdgpu_sync_get_owner(struct dma_fence *f) |
3c62338c | 88 | { |
d8d019cc FK |
89 | struct drm_sched_fence *s_fence; |
90 | struct amdgpu_amdkfd_fence *kfd_fence; | |
91 | ||
92 | if (!f) | |
93 | return AMDGPU_FENCE_OWNER_UNDEFINED; | |
bcc634f4 | 94 | |
d8d019cc | 95 | s_fence = to_drm_sched_fence(f); |
3c62338c | 96 | if (s_fence) |
bcc634f4 | 97 | return s_fence->owner; |
336d1f5e | 98 | |
d8d019cc FK |
99 | kfd_fence = to_amdgpu_amdkfd_fence(f); |
100 | if (kfd_fence) | |
101 | return AMDGPU_FENCE_OWNER_KFD; | |
102 | ||
bcc634f4 | 103 | return AMDGPU_FENCE_OWNER_UNDEFINED; |
3c62338c CZ |
104 | } |
105 | ||
bcc634f4 CK |
106 | /** |
107 | * amdgpu_sync_keep_later - Keep the later fence | |
108 | * | |
109 | * @keep: existing fence to test | |
110 | * @fence: new fence | |
111 | * | |
112 | * Either keep the existing fence or the new one, depending which one is later. | |
113 | */ | |
f54d1867 CW |
114 | static void amdgpu_sync_keep_later(struct dma_fence **keep, |
115 | struct dma_fence *fence) | |
24233860 | 116 | { |
f54d1867 | 117 | if (*keep && dma_fence_is_later(*keep, fence)) |
24233860 CK |
118 | return; |
119 | ||
f54d1867 CW |
120 | dma_fence_put(*keep); |
121 | *keep = dma_fence_get(fence); | |
24233860 CK |
122 | } |
123 | ||
832a902f CK |
124 | /** |
125 | * amdgpu_sync_add_later - add the fence to the hash | |
126 | * | |
127 | * @sync: sync object to add the fence to | |
128 | * @f: fence to add | |
129 | * | |
130 | * Tries to add the fence to an existing hash entry. Returns true when an entry | |
131 | * was found, false otherwise. | |
132 | */ | |
174b328b | 133 | static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) |
832a902f CK |
134 | { |
135 | struct amdgpu_sync_entry *e; | |
136 | ||
137 | hash_for_each_possible(sync->fences, e, node, f->context) { | |
138 | if (unlikely(e->fence->context != f->context)) | |
139 | continue; | |
140 | ||
141 | amdgpu_sync_keep_later(&e->fence, f); | |
142 | return true; | |
143 | } | |
144 | return false; | |
145 | } | |
146 | ||
d38ceaf9 | 147 | /** |
91e1a520 | 148 | * amdgpu_sync_fence - remember to sync to this fence |
d38ceaf9 AD |
149 | * |
150 | * @sync: sync object to add fence to | |
e095fc17 | 151 | * @f: fence to sync to |
d38ceaf9 | 152 | * |
e095fc17 | 153 | * Add the fence to the sync object. |
d38ceaf9 | 154 | */ |
174b328b | 155 | int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) |
d38ceaf9 | 156 | { |
f91b3a69 | 157 | struct amdgpu_sync_entry *e; |
d38ceaf9 | 158 | |
91e1a520 CK |
159 | if (!f) |
160 | return 0; | |
3c62338c | 161 | |
174b328b | 162 | if (amdgpu_sync_add_later(sync, f)) |
f91b3a69 | 163 | return 0; |
d38ceaf9 | 164 | |
257bf15a | 165 | e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); |
046c12c6 CK |
166 | if (!e) |
167 | return -ENOMEM; | |
d38ceaf9 | 168 | |
046c12c6 | 169 | hash_add(sync->fences, &e->node, f->context); |
f54d1867 | 170 | e->fence = dma_fence_get(f); |
91e1a520 | 171 | return 0; |
d38ceaf9 AD |
172 | } |
173 | ||
e095fc17 CK |
174 | /** |
175 | * amdgpu_sync_vm_fence - remember to sync to this VM fence | |
176 | * | |
e095fc17 CK |
177 | * @sync: sync object to add fence to |
178 | * @fence: the VM fence to add | |
179 | * | |
180 | * Add the fence to the sync object and remember it as VM update. | |
181 | */ | |
182 | int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) | |
183 | { | |
184 | if (!fence) | |
185 | return 0; | |
186 | ||
187 | amdgpu_sync_keep_later(&sync->last_vm_update, fence); | |
174b328b | 188 | return amdgpu_sync_fence(sync, fence); |
e095fc17 CK |
189 | } |
190 | ||
22f0463a CK |
191 | /* Determine based on the owner and mode if we should sync to a fence or not */ |
192 | static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, | |
193 | enum amdgpu_sync_mode mode, | |
194 | void *owner, struct dma_fence *f) | |
195 | { | |
196 | void *fence_owner = amdgpu_sync_get_owner(f); | |
197 | ||
198 | /* Always sync to moves, no matter what */ | |
199 | if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) | |
200 | return true; | |
201 | ||
202 | /* We only want to trigger KFD eviction fences on | |
203 | * evict or move jobs. Skip KFD fences otherwise. | |
204 | */ | |
205 | if (fence_owner == AMDGPU_FENCE_OWNER_KFD && | |
206 | owner != AMDGPU_FENCE_OWNER_UNDEFINED) | |
207 | return false; | |
208 | ||
209 | /* Never sync to VM updates either. */ | |
210 | if (fence_owner == AMDGPU_FENCE_OWNER_VM && | |
211 | owner != AMDGPU_FENCE_OWNER_UNDEFINED) | |
212 | return false; | |
213 | ||
214 | /* Ignore fences depending on the sync mode */ | |
215 | switch (mode) { | |
216 | case AMDGPU_SYNC_ALWAYS: | |
217 | return true; | |
218 | ||
219 | case AMDGPU_SYNC_NE_OWNER: | |
220 | if (amdgpu_sync_same_dev(adev, f) && | |
221 | fence_owner == owner) | |
222 | return false; | |
223 | break; | |
224 | ||
225 | case AMDGPU_SYNC_EQ_OWNER: | |
226 | if (amdgpu_sync_same_dev(adev, f) && | |
227 | fence_owner != owner) | |
228 | return false; | |
229 | break; | |
230 | ||
231 | case AMDGPU_SYNC_EXPLICIT: | |
232 | return false; | |
233 | } | |
234 | ||
235 | WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, | |
236 | "Adding eviction fence to sync obj"); | |
237 | return true; | |
238 | } | |
239 | ||
d38ceaf9 | 240 | /** |
2f4b9400 | 241 | * amdgpu_sync_resv - sync to a reservation object |
d38ceaf9 | 242 | * |
1b4940bc | 243 | * @adev: amdgpu device |
d38ceaf9 AD |
244 | * @sync: sync object to add fences from reservation object to |
245 | * @resv: reservation object with embedded fence | |
5d319660 CK |
246 | * @mode: how owner affects which fences we sync to |
247 | * @owner: owner of the planned job submission | |
d38ceaf9 | 248 | * |
2f4b9400 | 249 | * Sync to the fence |
d38ceaf9 | 250 | */ |
5d319660 CK |
251 | int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, |
252 | struct dma_resv *resv, enum amdgpu_sync_mode mode, | |
253 | void *owner) | |
d38ceaf9 | 254 | { |
930ca2a7 | 255 | struct dma_resv_iter cursor; |
f54d1867 | 256 | struct dma_fence *f; |
930ca2a7 | 257 | int r; |
d38ceaf9 | 258 | |
4b095304 JZ |
259 | if (resv == NULL) |
260 | return -EINVAL; | |
261 | ||
930ca2a7 CK |
262 | dma_resv_for_each_fence(&cursor, resv, true, f) { |
263 | dma_fence_chain_for_each(f, f) { | |
264 | struct dma_fence_chain *chain = to_dma_fence_chain(f); | |
265 | ||
266 | if (amdgpu_sync_test_fence(adev, mode, owner, chain ? | |
267 | chain->fence : f)) { | |
268 | r = amdgpu_sync_fence(sync, f); | |
269 | dma_fence_put(f); | |
270 | if (r) | |
271 | return r; | |
272 | break; | |
273 | } | |
4939d973 | 274 | } |
d38ceaf9 | 275 | } |
22f0463a | 276 | return 0; |
d38ceaf9 AD |
277 | } |
278 | ||
832a902f | 279 | /** |
1fbb2e92 | 280 | * amdgpu_sync_peek_fence - get the next fence not signaled yet |
832a902f CK |
281 | * |
282 | * @sync: the sync object | |
35420238 | 283 | * @ring: optional ring to use for test |
832a902f | 284 | * |
1fbb2e92 CK |
285 | * Returns the next fence not signaled yet without removing it from the sync |
286 | * object. | |
832a902f | 287 | */ |
f54d1867 CW |
288 | struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, |
289 | struct amdgpu_ring *ring) | |
832a902f CK |
290 | { |
291 | struct amdgpu_sync_entry *e; | |
292 | struct hlist_node *tmp; | |
293 | int i; | |
294 | ||
295 | hash_for_each_safe(sync->fences, i, tmp, e, node) { | |
f54d1867 | 296 | struct dma_fence *f = e->fence; |
1b1f42d8 | 297 | struct drm_sched_fence *s_fence = to_drm_sched_fence(f); |
35420238 | 298 | |
7a7c286d CZ |
299 | if (dma_fence_is_signaled(f)) { |
300 | hash_del(&e->node); | |
301 | dma_fence_put(f); | |
302 | kmem_cache_free(amdgpu_sync_slab, e); | |
303 | continue; | |
304 | } | |
35420238 CK |
305 | if (ring && s_fence) { |
306 | /* For fences from the same ring it is sufficient | |
307 | * when they are scheduled. | |
308 | */ | |
1fbb2e92 | 309 | if (s_fence->sched == &ring->sched) { |
f54d1867 | 310 | if (dma_fence_is_signaled(&s_fence->scheduled)) |
1fbb2e92 | 311 | continue; |
832a902f | 312 | |
1fbb2e92 CK |
313 | return &s_fence->scheduled; |
314 | } | |
832a902f CK |
315 | } |
316 | ||
1fbb2e92 | 317 | return f; |
832a902f CK |
318 | } |
319 | ||
1fbb2e92 | 320 | return NULL; |
832a902f CK |
321 | } |
322 | ||
0e9d239b CK |
323 | /** |
324 | * amdgpu_sync_get_fence - get the next fence from the sync object | |
325 | * | |
326 | * @sync: sync object to use | |
327 | * | |
328 | * Get and removes the next fence from the sync object not signaled yet. | |
329 | */ | |
174b328b | 330 | struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) |
e61235db CK |
331 | { |
332 | struct amdgpu_sync_entry *e; | |
333 | struct hlist_node *tmp; | |
f54d1867 | 334 | struct dma_fence *f; |
e61235db | 335 | int i; |
e61235db CK |
336 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
337 | ||
338 | f = e->fence; | |
339 | ||
340 | hash_del(&e->node); | |
257bf15a | 341 | kmem_cache_free(amdgpu_sync_slab, e); |
e61235db | 342 | |
f54d1867 | 343 | if (!dma_fence_is_signaled(f)) |
e61235db CK |
344 | return f; |
345 | ||
f54d1867 | 346 | dma_fence_put(f); |
e61235db CK |
347 | } |
348 | return NULL; | |
349 | } | |
350 | ||
3c728d3a FK |
351 | /** |
352 | * amdgpu_sync_clone - clone a sync object | |
353 | * | |
354 | * @source: sync object to clone | |
355 | * @clone: pointer to destination sync object | |
356 | * | |
357 | * Adds references to all unsignaled fences in @source to @clone. Also | |
358 | * removes signaled fences from @source while at it. | |
359 | */ | |
360 | int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) | |
361 | { | |
362 | struct amdgpu_sync_entry *e; | |
363 | struct hlist_node *tmp; | |
364 | struct dma_fence *f; | |
365 | int i, r; | |
366 | ||
367 | hash_for_each_safe(source->fences, i, tmp, e, node) { | |
368 | f = e->fence; | |
369 | if (!dma_fence_is_signaled(f)) { | |
174b328b | 370 | r = amdgpu_sync_fence(clone, f); |
3c728d3a FK |
371 | if (r) |
372 | return r; | |
373 | } else { | |
374 | hash_del(&e->node); | |
375 | dma_fence_put(f); | |
376 | kmem_cache_free(amdgpu_sync_slab, e); | |
377 | } | |
378 | } | |
379 | ||
380 | dma_fence_put(clone->last_vm_update); | |
381 | clone->last_vm_update = dma_fence_get(source->last_vm_update); | |
382 | ||
383 | return 0; | |
384 | } | |
385 | ||
a6583af4 HK |
386 | int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) |
387 | { | |
388 | struct amdgpu_sync_entry *e; | |
389 | struct hlist_node *tmp; | |
390 | int i, r; | |
391 | ||
392 | hash_for_each_safe(sync->fences, i, tmp, e, node) { | |
393 | r = dma_fence_wait(e->fence, intr); | |
394 | if (r) | |
395 | return r; | |
396 | ||
397 | hash_del(&e->node); | |
398 | dma_fence_put(e->fence); | |
399 | kmem_cache_free(amdgpu_sync_slab, e); | |
400 | } | |
401 | ||
402 | return 0; | |
403 | } | |
404 | ||
d38ceaf9 AD |
405 | /** |
406 | * amdgpu_sync_free - free the sync object | |
407 | * | |
d38ceaf9 | 408 | * @sync: sync object to use |
d38ceaf9 | 409 | * |
2f4b9400 | 410 | * Free the sync object. |
d38ceaf9 | 411 | */ |
8a8f0b48 | 412 | void amdgpu_sync_free(struct amdgpu_sync *sync) |
d38ceaf9 | 413 | { |
f91b3a69 CK |
414 | struct amdgpu_sync_entry *e; |
415 | struct hlist_node *tmp; | |
d38ceaf9 AD |
416 | unsigned i; |
417 | ||
f91b3a69 CK |
418 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
419 | hash_del(&e->node); | |
f54d1867 | 420 | dma_fence_put(e->fence); |
257bf15a | 421 | kmem_cache_free(amdgpu_sync_slab, e); |
f91b3a69 CK |
422 | } |
423 | ||
f54d1867 | 424 | dma_fence_put(sync->last_vm_update); |
d38ceaf9 | 425 | } |
257bf15a CK |
426 | |
427 | /** | |
428 | * amdgpu_sync_init - init sync object subsystem | |
429 | * | |
430 | * Allocate the slab allocator. | |
431 | */ | |
432 | int amdgpu_sync_init(void) | |
433 | { | |
434 | amdgpu_sync_slab = kmem_cache_create( | |
435 | "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0, | |
436 | SLAB_HWCACHE_ALIGN, NULL); | |
437 | if (!amdgpu_sync_slab) | |
438 | return -ENOMEM; | |
439 | ||
440 | return 0; | |
441 | } | |
442 | ||
443 | /** | |
444 | * amdgpu_sync_fini - fini sync object subsystem | |
445 | * | |
446 | * Free the slab allocator. | |
447 | */ | |
448 | void amdgpu_sync_fini(void) | |
449 | { | |
450 | kmem_cache_destroy(amdgpu_sync_slab); | |
451 | } |