Merge tag 'pinctrl-v6.9-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[linux-block.git] / drivers / gpu / drm / xe / xe_gt_tlb_invalidation.c
CommitLineData
a9351846
MB
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2023 Intel Corporation
4 */
5
a9351846 6#include "xe_gt_tlb_invalidation.h"
ea9f879d 7
b67cb798 8#include "abi/guc_actions_abi.h"
565ce72e 9#include "xe_device.h"
ea9f879d 10#include "xe_gt.h"
e8b9b309 11#include "xe_gt_printk.h"
a9351846
MB
12#include "xe_guc.h"
13#include "xe_guc_ct.h"
24b52db6 14#include "xe_trace.h"
a9351846 15
38224c00
MB
16#define TLB_TIMEOUT (HZ / 4)
17
38224c00
MB
18static void xe_gt_tlb_fence_timeout(struct work_struct *work)
19{
20 struct xe_gt *gt = container_of(work, struct xe_gt,
21 tlb_invalidation.fence_tdr.work);
22 struct xe_gt_tlb_invalidation_fence *fence, *next;
23
35c8a964 24 spin_lock_irq(&gt->tlb_invalidation.pending_lock);
38224c00
MB
25 list_for_each_entry_safe(fence, next,
26 &gt->tlb_invalidation.pending_fences, link) {
27 s64 since_inval_ms = ktime_ms_delta(ktime_get(),
28 fence->invalidation_time);
29
30 if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
31 break;
32
33 trace_xe_gt_tlb_invalidation_fence_timeout(fence);
e8b9b309
MW
34 xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
35 fence->seqno, gt->tlb_invalidation.seqno_recv);
38224c00
MB
36
37 list_del(&fence->link);
38 fence->base.error = -ETIME;
39 dma_fence_signal(&fence->base);
40 dma_fence_put(&fence->base);
41 }
42 if (!list_empty(&gt->tlb_invalidation.pending_fences))
43 queue_delayed_work(system_wq,
44 &gt->tlb_invalidation.fence_tdr,
45 TLB_TIMEOUT);
35c8a964 46 spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
38224c00
MB
47}
48
c6b0948f
MB
49/**
50 * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
51 * @gt: graphics tile
52 *
53 * Initialize GT TLB invalidation state, purely software initialization, should
54 * be called once during driver load.
55 *
56 * Return: 0 on success, negative error code on error.
57 */
a9351846
MB
58int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
59{
62ad0621 60 gt->tlb_invalidation.seqno = 1;
fc108a8b 61 INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
35c8a964 62 spin_lock_init(&gt->tlb_invalidation.pending_lock);
f4a8add9 63 spin_lock_init(&gt->tlb_invalidation.lock);
38224c00
MB
64 INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
65 xe_gt_tlb_fence_timeout);
a9351846
MB
66
67 return 0;
68}
69
9f9f09d4 70static void
35c8a964 71__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
9f9f09d4
MB
72{
73 trace_xe_gt_tlb_invalidation_fence_signal(fence);
9f9f09d4
MB
74 dma_fence_signal(&fence->base);
75 dma_fence_put(&fence->base);
76}
77
35c8a964
MA
78static void
79invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
80{
81 list_del(&fence->link);
82 __invalidation_fence_signal(fence);
83}
84
c6b0948f
MB
85/**
86 * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
87 * @gt: graphics tile
88 *
89 * Signal any pending invalidation fences, should be called during a GT reset
90 */
2ca01fe3 91void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
fc108a8b
MB
92{
93 struct xe_gt_tlb_invalidation_fence *fence, *next;
2ca01fe3
MA
94 struct xe_guc *guc = &gt->uc.guc;
95 int pending_seqno;
fc108a8b 96
2ca01fe3
MA
97 /*
98 * CT channel is already disabled at this point. No new TLB requests can
99 * appear.
100 */
38224c00 101
fc108a8b 102 mutex_lock(&gt->uc.guc.ct.lock);
35c8a964 103 spin_lock_irq(&gt->tlb_invalidation.pending_lock);
2ca01fe3
MA
104 cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
105 /*
106 * We might have various kworkers waiting for TLB flushes to complete
107 * which are not tracked with an explicit TLB fence, however at this
108 * stage that will never happen since the CT is already disabled, so
109 * make sure we signal them here under the assumption that we have
110 * completed a full GT reset.
111 */
112 if (gt->tlb_invalidation.seqno == 1)
113 pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
114 else
115 pending_seqno = gt->tlb_invalidation.seqno - 1;
116 WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
117 wake_up_all(&guc->ct.wq);
118
fc108a8b 119 list_for_each_entry_safe(fence, next,
9f9f09d4
MB
120 &gt->tlb_invalidation.pending_fences, link)
121 invalidation_fence_signal(fence);
35c8a964 122 spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
fc108a8b
MB
123 mutex_unlock(&gt->uc.guc.ct.lock);
124}
125
35c8a964
MA
126static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
127{
128 int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
129
130 if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
131 return false;
132
133 if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
134 return true;
135
136 return seqno_recv >= seqno;
137}
138
fc108a8b 139static int send_tlb_invalidation(struct xe_guc *guc,
332dd011
MB
140 struct xe_gt_tlb_invalidation_fence *fence,
141 u32 *action, int len)
a9351846
MB
142{
143 struct xe_gt *gt = guc_to_gt(guc);
a9351846
MB
144 int seqno;
145 int ret;
146
147 /*
148 * XXX: The seqno algorithm relies on TLB invalidation being processed
149 * in order which they currently are, if that changes the algorithm will
150 * need to be updated.
151 */
565ce72e 152
a9351846 153 mutex_lock(&guc->ct.lock);
62ad0621 154 seqno = gt->tlb_invalidation.seqno;
fc108a8b 155 if (fence) {
fc108a8b 156 fence->seqno = seqno;
24b52db6 157 trace_xe_gt_tlb_invalidation_fence_send(fence);
fc108a8b 158 }
a9351846 159 action[1] = seqno;
332dd011 160 ret = xe_guc_ct_send_locked(&guc->ct, action, len,
a9351846 161 G2H_LEN_DW_TLB_INVALIDATE, 1);
38224c00 162 if (!ret && fence) {
35c8a964
MA
163 spin_lock_irq(&gt->tlb_invalidation.pending_lock);
164 /*
165 * We haven't actually published the TLB fence as per
166 * pending_fences, but in theory our seqno could have already
167 * been written as we acquired the pending_lock. In such a case
168 * we can just go ahead and signal the fence here.
169 */
170 if (tlb_invalidation_seqno_past(gt, seqno)) {
171 __invalidation_fence_signal(fence);
172 } else {
173 fence->invalidation_time = ktime_get();
174 list_add_tail(&fence->link,
175 &gt->tlb_invalidation.pending_fences);
176
177 if (list_is_singular(&gt->tlb_invalidation.pending_fences))
178 queue_delayed_work(system_wq,
179 &gt->tlb_invalidation.fence_tdr,
180 TLB_TIMEOUT);
181 }
182 spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
183 } else if (ret < 0 && fence) {
184 __invalidation_fence_signal(fence);
38224c00 185 }
4803f6e2
MA
186 if (!ret) {
187 gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
188 TLB_INVALIDATION_SEQNO_MAX;
189 if (!gt->tlb_invalidation.seqno)
190 gt->tlb_invalidation.seqno = 1;
a9351846 191 ret = seqno;
4803f6e2 192 }
a9351846
MB
193 mutex_unlock(&guc->ct.lock);
194
195 return ret;
196}
197
da3799c9
MB
198#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
199 XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
200 XE_GUC_TLB_INVAL_FLUSH_CACHE)
201
202/**
203 * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
204 * @gt: graphics tile
205 *
206 * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
207 * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
208 *
209 * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
210 * negative error code on error.
211 */
212int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
213{
214 u32 action[] = {
215 XE_GUC_ACTION_TLB_INVALIDATION,
216 0, /* seqno, replaced in send_tlb_invalidation */
217 MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
218 };
219
220 return send_tlb_invalidation(&gt->uc.guc, NULL, action,
221 ARRAY_SIZE(action));
222}
223
c6b0948f 224/**
da3799c9 225 * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
c6b0948f
MB
226 * @gt: graphics tile
227 * @fence: invalidation fence which will be signal on TLB invalidation
228 * completion, can be NULL
332dd011 229 * @vma: VMA to invalidate
c6b0948f 230 *
332dd011
MB
231 * Issue a range based TLB invalidation if supported, if not fallback to a full
232 * TLB invalidation. Completion of TLB is asynchronous and caller can either use
233 * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
234 * completion.
c6b0948f
MB
235 *
236 * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
237 * negative error code on error.
238 */
da3799c9
MB
239int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
240 struct xe_gt_tlb_invalidation_fence *fence,
241 struct xe_vma *vma)
a9351846 242{
332dd011
MB
243 struct xe_device *xe = gt_to_xe(gt);
244#define MAX_TLB_INVALIDATION_LEN 7
245 u32 action[MAX_TLB_INVALIDATION_LEN];
7d623575 246 int len = 0;
332dd011 247
c73acc1e 248 xe_gt_assert(gt, vma);
332dd011 249
ccff0b21
MB
250 /* Execlists not supported */
251 if (gt_to_xe(gt)->info.force_execlist) {
252 if (fence)
253 __invalidation_fence_signal(fence);
254
255 return 0;
256 }
257
da3799c9
MB
258 action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
259 action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
332dd011 260 if (!xe->info.has_range_tlb_invalidation) {
332dd011
MB
261 action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
262 } else {
21ed3327
MB
263 u64 start = xe_vma_start(vma);
264 u64 length = xe_vma_size(vma);
332dd011
MB
265 u64 align, end;
266
267 if (length < SZ_4K)
268 length = SZ_4K;
269
270 /*
271 * We need to invalidate a higher granularity if start address
272 * is not aligned to length. When start is not aligned with
273 * length we need to find the length large enough to create an
274 * address mask covering the required range.
275 */
276 align = roundup_pow_of_two(length);
21ed3327
MB
277 start = ALIGN_DOWN(xe_vma_start(vma), align);
278 end = ALIGN(xe_vma_end(vma), align);
332dd011
MB
279 length = align;
280 while (start + length < end) {
281 length <<= 1;
21ed3327 282 start = ALIGN_DOWN(xe_vma_start(vma), length);
332dd011
MB
283 }
284
285 /*
286 * Minimum invalidation size for a 2MB page that the hardware
287 * expects is 16MB
288 */
289 if (length >= SZ_2M) {
290 length = max_t(u64, SZ_16M, length);
21ed3327 291 start = ALIGN_DOWN(xe_vma_start(vma), length);
332dd011
MB
292 }
293
c73acc1e
FD
294 xe_gt_assert(gt, length >= SZ_4K);
295 xe_gt_assert(gt, is_power_of_2(length));
296 xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
297 xe_gt_assert(gt, IS_ALIGNED(start, length));
332dd011 298
332dd011 299 action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
21ed3327 300 action[len++] = xe_vma_vm(vma)->usm.asid;
332dd011
MB
301 action[len++] = lower_32_bits(start);
302 action[len++] = upper_32_bits(start);
303 action[len++] = ilog2(length) - ilog2(SZ_4K);
304 }
305
c73acc1e 306 xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
332dd011 307
7d623575 308 return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
a9351846
MB
309}
310
c6b0948f
MB
311/**
312 * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
313 * @gt: graphics tile
314 * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
315 *
316 * Wait for 200ms for a TLB invalidation to complete, in practice we always
317 * should receive the TLB invalidation within 200ms.
318 *
319 * Return: 0 on success, -ETIME on TLB invalidation timeout
320 */
a9351846
MB
321int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
322{
a9351846
MB
323 struct xe_guc *guc = &gt->uc.guc;
324 int ret;
325
ccff0b21
MB
326 /* Execlists not supported */
327 if (gt_to_xe(gt)->info.force_execlist)
328 return 0;
329
a9351846
MB
330 /*
331 * XXX: See above, this algorithm only works if seqno are always in
332 * order
333 */
334 ret = wait_event_timeout(guc->ct.wq,
335 tlb_invalidation_seqno_past(gt, seqno),
38224c00 336 TLB_TIMEOUT);
a9351846 337 if (!ret) {
e8b9b309
MW
338 struct drm_printer p = xe_gt_err_printer(gt);
339
340 xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
341 seqno, gt->tlb_invalidation.seqno_recv);
ebb00b28 342 xe_guc_ct_print(&guc->ct, &p, true);
a9351846
MB
343 return -ETIME;
344 }
345
346 return 0;
347}
348
c6b0948f
MB
349/**
350 * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
351 * @guc: guc
352 * @msg: message indicating TLB invalidation done
353 * @len: length of message
354 *
355 * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
356 * invalidation fences for seqno. Algorithm for this depends on seqno being
357 * received in-order and asserts this assumption.
358 *
359 * Return: 0 on success, -EPROTO for malformed messages.
360 */
a9351846
MB
361int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
362{
363 struct xe_gt *gt = guc_to_gt(guc);
35c8a964
MA
364 struct xe_gt_tlb_invalidation_fence *fence, *next;
365 unsigned long flags;
fc108a8b 366
a9351846
MB
367 if (unlikely(len != 1))
368 return -EPROTO;
369
35c8a964
MA
370 /*
371 * This can also be run both directly from the IRQ handler and also in
372 * process_g2h_msg(). Only one may process any individual CT message,
373 * however the order they are processed here could result in skipping a
374 * seqno. To handle that we just process all the seqnos from the last
375 * seqno_recv up to and including the one in msg[0]. The delta should be
376 * very small so there shouldn't be much of pending_fences we actually
377 * need to iterate over here.
378 *
379 * From GuC POV we expect the seqnos to always appear in-order, so if we
380 * see something later in the timeline we can be sure that anything
381 * appearing earlier has already signalled, just that we have yet to
382 * officially process the CT message like if racing against
383 * process_g2h_msg().
384 */
385 spin_lock_irqsave(&gt->tlb_invalidation.pending_lock, flags);
386 if (tlb_invalidation_seqno_past(gt, msg[0])) {
387 spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
388 return 0;
fa4fe0db 389 }
a9351846 390
38fa29dc
MA
391 /*
392 * wake_up_all() and wait_event_timeout() already have the correct
393 * barriers.
394 */
86ed0925 395 WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
a9351846
MB
396 wake_up_all(&guc->ct.wq);
397
35c8a964
MA
398 list_for_each_entry_safe(fence, next,
399 &gt->tlb_invalidation.pending_fences, link) {
24b52db6 400 trace_xe_gt_tlb_invalidation_fence_recv(fence);
35c8a964
MA
401
402 if (!tlb_invalidation_seqno_past(gt, fence->seqno))
403 break;
404
9f9f09d4 405 invalidation_fence_signal(fence);
fc108a8b
MB
406 }
407
35c8a964
MA
408 if (!list_empty(&gt->tlb_invalidation.pending_fences))
409 mod_delayed_work(system_wq,
410 &gt->tlb_invalidation.fence_tdr,
411 TLB_TIMEOUT);
412 else
413 cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
414
415 spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
416
a9351846
MB
417 return 0;
418}