drm/i915: Add dev.i915.perf_stream_paranoid sysctl option
[linux-block.git] / drivers / gpu / drm / i915 / i915_perf.c
CommitLineData
eec688e1
RB
1/*
2 * Copyright © 2015-2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Robert Bragg <robert@sixbynine.org>
25 */
26
27#include <linux/anon_inodes.h>
d7965152 28#include <linux/sizes.h>
eec688e1
RB
29
30#include "i915_drv.h"
d7965152
RB
31#include "i915_oa_hsw.h"
32
33/* HW requires this to be a power of two, between 128k and 16M, though driver
34 * is currently generally designed assuming the largest 16M size is used such
35 * that the overflow cases are unlikely in normal operation.
36 */
37#define OA_BUFFER_SIZE SZ_16M
38
39#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
40
41/* There's a HW race condition between OA unit tail pointer register updates and
42 * writes to memory whereby the tail pointer can sometimes get ahead of what's
43 * been written out to the OA buffer so far.
44 *
45 * Although this can be observed explicitly by checking for a zeroed report-id
46 * field in tail reports, it seems preferable to account for this earlier e.g.
47 * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles
48 * in this situation.
49 *
50 * To give time for the most recent reports to land before they may be copied to
51 * userspace, the driver operates as if the tail pointer effectively lags behind
52 * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated
53 * based on this constant in nanoseconds, the current OA sampling exponent
54 * and current report size.
55 *
56 * There is also a fallback check while reading to simply skip over reports with
57 * a zeroed report-id.
58 */
59#define OA_TAIL_MARGIN_NSEC 100000ULL
60
61/* frequency for checking whether the OA unit has written new reports to the
62 * circular OA buffer...
63 */
64#define POLL_FREQUENCY 200
65#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
66
ccdf6341
RB
67/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
68static int zero;
69static int one = 1;
70static u32 i915_perf_stream_paranoid = true;
71
d7965152
RB
72/* The maximum exponent the hardware accepts is 63 (essentially it selects one
73 * of the 64bit timestamp bits to trigger reports from) but there's currently
74 * no known use case for sampling as infrequently as once per 47 thousand years.
75 *
76 * Since the timestamps included in OA reports are only 32bits it seems
77 * reasonable to limit the OA exponent where it's still possible to account for
78 * overflow in OA report timestamps.
79 */
80#define OA_EXPONENT_MAX 31
81
82#define INVALID_CTX_ID 0xffffffff
83
84
85/* XXX: beware if future OA HW adds new report formats that the current
86 * code assumes all reports have a power-of-two size and ~(size - 1) can
87 * be used as a mask to align the OA tail pointer.
88 */
89static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
90 [I915_OA_FORMAT_A13] = { 0, 64 },
91 [I915_OA_FORMAT_A29] = { 1, 128 },
92 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
93 /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
94 [I915_OA_FORMAT_B4_C8] = { 4, 64 },
95 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
96 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
97 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
98};
99
100#define SAMPLE_OA_REPORT (1<<0)
eec688e1
RB
101
102struct perf_open_properties {
103 u32 sample_flags;
104
105 u64 single_context:1;
106 u64 ctx_handle;
d7965152
RB
107
108 /* OA sampling state */
109 int metrics_set;
110 int oa_format;
111 bool oa_periodic;
112 int oa_period_exponent;
113};
114
115/* NB: This is either called via fops or the poll check hrtimer (atomic ctx)
116 *
117 * It's safe to read OA config state here unlocked, assuming that this is only
118 * called while the stream is enabled, while the global OA configuration can't
119 * be modified.
120 *
121 * Note: we don't lock around the head/tail reads even though there's the slim
122 * possibility of read() fop errors forcing a re-init of the OA buffer
123 * pointers. A race here could result in a false positive !empty status which
124 * is acceptable.
125 */
126static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv)
127{
128 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
129 u32 oastatus2 = I915_READ(GEN7_OASTATUS2);
130 u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
131 u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
132 u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
133
134 return OA_TAKEN(tail, head) <
135 dev_priv->perf.oa.tail_margin + report_size;
136}
137
138/**
139 * Appends a status record to a userspace read() buffer.
140 */
141static int append_oa_status(struct i915_perf_stream *stream,
142 char __user *buf,
143 size_t count,
144 size_t *offset,
145 enum drm_i915_perf_record_type type)
146{
147 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
148
149 if ((count - *offset) < header.size)
150 return -ENOSPC;
151
152 if (copy_to_user(buf + *offset, &header, sizeof(header)))
153 return -EFAULT;
154
155 (*offset) += header.size;
156
157 return 0;
158}
159
160/**
161 * Copies single OA report into userspace read() buffer.
162 */
163static int append_oa_sample(struct i915_perf_stream *stream,
164 char __user *buf,
165 size_t count,
166 size_t *offset,
167 const u8 *report)
168{
169 struct drm_i915_private *dev_priv = stream->dev_priv;
170 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
171 struct drm_i915_perf_record_header header;
172 u32 sample_flags = stream->sample_flags;
173
174 header.type = DRM_I915_PERF_RECORD_SAMPLE;
175 header.pad = 0;
176 header.size = stream->sample_size;
177
178 if ((count - *offset) < header.size)
179 return -ENOSPC;
180
181 buf += *offset;
182 if (copy_to_user(buf, &header, sizeof(header)))
183 return -EFAULT;
184 buf += sizeof(header);
185
186 if (sample_flags & SAMPLE_OA_REPORT) {
187 if (copy_to_user(buf, report, report_size))
188 return -EFAULT;
189 }
190
191 (*offset) += header.size;
192
193 return 0;
194}
195
196/**
197 * Copies all buffered OA reports into userspace read() buffer.
198 * @stream: An i915-perf stream opened for OA metrics
199 * @buf: destination buffer given by userspace
200 * @count: the number of bytes userspace wants to read
201 * @offset: (inout): the current position for writing into @buf
202 * @head_ptr: (inout): the current oa buffer cpu read position
203 * @tail: the current oa buffer gpu write position
204 *
205 * Returns 0 on success, negative error code on failure.
206 *
207 * Notably any error condition resulting in a short read (-ENOSPC or
208 * -EFAULT) will be returned even though one or more records may
209 * have been successfully copied. In this case it's up to the caller
210 * to decide if the error should be squashed before returning to
211 * userspace.
212 *
213 * Note: reports are consumed from the head, and appended to the
214 * tail, so the head chases the tail?... If you think that's mad
215 * and back-to-front you're not alone, but this follows the
216 * Gen PRM naming convention.
217 */
218static int gen7_append_oa_reports(struct i915_perf_stream *stream,
219 char __user *buf,
220 size_t count,
221 size_t *offset,
222 u32 *head_ptr,
223 u32 tail)
224{
225 struct drm_i915_private *dev_priv = stream->dev_priv;
226 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
227 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
228 int tail_margin = dev_priv->perf.oa.tail_margin;
229 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
230 u32 mask = (OA_BUFFER_SIZE - 1);
231 u32 head;
232 u32 taken;
233 int ret = 0;
234
235 if (WARN_ON(!stream->enabled))
236 return -EIO;
237
238 head = *head_ptr - gtt_offset;
239 tail -= gtt_offset;
240
241 /* The OA unit is expected to wrap the tail pointer according to the OA
242 * buffer size and since we should never write a misaligned head
243 * pointer we don't expect to read one back either...
244 */
245 if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE ||
246 head % report_size) {
247 DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n",
248 head, tail);
249 dev_priv->perf.oa.ops.oa_disable(dev_priv);
250 dev_priv->perf.oa.ops.oa_enable(dev_priv);
251 *head_ptr = I915_READ(GEN7_OASTATUS2) &
252 GEN7_OASTATUS2_HEAD_MASK;
253 return -EIO;
254 }
255
256
257 /* The tail pointer increases in 64 byte increments, not in report_size
258 * steps...
259 */
260 tail &= ~(report_size - 1);
261
262 /* Move the tail pointer back by the current tail_margin to account for
263 * the possibility that the latest reports may not have really landed
264 * in memory yet...
265 */
266
267 if (OA_TAKEN(tail, head) < report_size + tail_margin)
268 return -EAGAIN;
269
270 tail -= tail_margin;
271 tail &= mask;
272
273 for (/* none */;
274 (taken = OA_TAKEN(tail, head));
275 head = (head + report_size) & mask) {
276 u8 *report = oa_buf_base + head;
277 u32 *report32 = (void *)report;
278
279 /* All the report sizes factor neatly into the buffer
280 * size so we never expect to see a report split
281 * between the beginning and end of the buffer.
282 *
283 * Given the initial alignment check a misalignment
284 * here would imply a driver bug that would result
285 * in an overrun.
286 */
287 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
288 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
289 break;
290 }
291
292 /* The report-ID field for periodic samples includes
293 * some undocumented flags related to what triggered
294 * the report and is never expected to be zero so we
295 * can check that the report isn't invalid before
296 * copying it to userspace...
297 */
298 if (report32[0] == 0) {
299 DRM_ERROR("Skipping spurious, invalid OA report\n");
300 continue;
301 }
302
303 ret = append_oa_sample(stream, buf, count, offset, report);
304 if (ret)
305 break;
306
307 /* The above report-id field sanity check is based on
308 * the assumption that the OA buffer is initially
309 * zeroed and we reset the field after copying so the
310 * check is still meaningful once old reports start
311 * being overwritten.
312 */
313 report32[0] = 0;
314 }
315
316 *head_ptr = gtt_offset + head;
317
318 return ret;
319}
320
321static int gen7_oa_read(struct i915_perf_stream *stream,
322 char __user *buf,
323 size_t count,
324 size_t *offset)
325{
326 struct drm_i915_private *dev_priv = stream->dev_priv;
327 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
328 u32 oastatus2;
329 u32 oastatus1;
330 u32 head;
331 u32 tail;
332 int ret;
333
334 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
335 return -EIO;
336
337 oastatus2 = I915_READ(GEN7_OASTATUS2);
338 oastatus1 = I915_READ(GEN7_OASTATUS1);
339
340 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
341 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
342
343 /* XXX: On Haswell we don't have a safe way to clear oastatus1
344 * bits while the OA unit is enabled (while the tail pointer
345 * may be updated asynchronously) so we ignore status bits
346 * that have already been reported to userspace.
347 */
348 oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
349
350 /* We treat OABUFFER_OVERFLOW as a significant error:
351 *
352 * - The status can be interpreted to mean that the buffer is
353 * currently full (with a higher precedence than OA_TAKEN()
354 * which will start to report a near-empty buffer after an
355 * overflow) but it's awkward that we can't clear the status
356 * on Haswell, so without a reset we won't be able to catch
357 * the state again.
358 *
359 * - Since it also implies the HW has started overwriting old
360 * reports it may also affect our sanity checks for invalid
361 * reports when copying to userspace that assume new reports
362 * are being written to cleared memory.
363 *
364 * - In the future we may want to introduce a flight recorder
365 * mode where the driver will automatically maintain a safe
366 * guard band between head/tail, avoiding this overflow
367 * condition, but we avoid the added driver complexity for
368 * now.
369 */
370 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
371 ret = append_oa_status(stream, buf, count, offset,
372 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
373 if (ret)
374 return ret;
375
376 DRM_ERROR("OA buffer overflow: force restart\n");
377
378 dev_priv->perf.oa.ops.oa_disable(dev_priv);
379 dev_priv->perf.oa.ops.oa_enable(dev_priv);
380
381 oastatus2 = I915_READ(GEN7_OASTATUS2);
382 oastatus1 = I915_READ(GEN7_OASTATUS1);
383
384 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
385 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
386 }
387
388 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
389 ret = append_oa_status(stream, buf, count, offset,
390 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
391 if (ret)
392 return ret;
393 dev_priv->perf.oa.gen7_latched_oastatus1 |=
394 GEN7_OASTATUS1_REPORT_LOST;
395 }
396
397 ret = gen7_append_oa_reports(stream, buf, count, offset,
398 &head, tail);
399
400 /* All the report sizes are a power of two and the
401 * head should always be incremented by some multiple
402 * of the report size.
403 *
404 * A warning here, but notably if we later read back a
405 * misaligned pointer we will treat that as a bug since
406 * it could lead to a buffer overrun.
407 */
408 WARN_ONCE(head & (report_size - 1),
409 "i915: Writing misaligned OA head pointer");
410
411 /* Note: we update the head pointer here even if an error
412 * was returned since the error may represent a short read
413 * where some some reports were successfully copied.
414 */
415 I915_WRITE(GEN7_OASTATUS2,
416 ((head & GEN7_OASTATUS2_HEAD_MASK) |
417 OA_MEM_SELECT_GGTT));
418
419 return ret;
420}
421
422static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
423{
424 struct drm_i915_private *dev_priv = stream->dev_priv;
425
426 /* We would wait indefinitely if periodic sampling is not enabled */
427 if (!dev_priv->perf.oa.periodic)
428 return -EIO;
429
430 /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it
431 * just performs mmio reads of the OA buffer head + tail pointers and
432 * it's assumed we're handling some operation that implies the stream
433 * can't be destroyed until completion (such as a read()) that ensures
434 * the device + OA buffer can't disappear
435 */
436 return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
437 !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv));
438}
439
440static void i915_oa_poll_wait(struct i915_perf_stream *stream,
441 struct file *file,
442 poll_table *wait)
443{
444 struct drm_i915_private *dev_priv = stream->dev_priv;
445
446 poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
447}
448
449static int i915_oa_read(struct i915_perf_stream *stream,
450 char __user *buf,
451 size_t count,
452 size_t *offset)
453{
454 struct drm_i915_private *dev_priv = stream->dev_priv;
455
456 return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
457}
458
459/* Determine the render context hw id, and ensure it remains fixed for the
460 * lifetime of the stream. This ensures that we don't have to worry about
461 * updating the context ID in OACONTROL on the fly.
462 */
463static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
464{
465 struct drm_i915_private *dev_priv = stream->dev_priv;
466 struct i915_vma *vma;
467 int ret;
468
469 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
470 if (ret)
471 return ret;
472
473 /* As the ID is the gtt offset of the context's vma we pin
474 * the vma to ensure the ID remains fixed.
475 *
476 * NB: implied RCS engine...
477 */
478 vma = i915_gem_context_pin_legacy(stream->ctx, 0);
479 if (IS_ERR(vma)) {
480 ret = PTR_ERR(vma);
481 goto unlock;
482 }
483
484 dev_priv->perf.oa.pinned_rcs_vma = vma;
485
486 /* Explicitly track the ID (instead of calling i915_ggtt_offset()
487 * on the fly) considering the difference with gen8+ and
488 * execlists
489 */
490 dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma);
491
492unlock:
493 mutex_unlock(&dev_priv->drm.struct_mutex);
494
495 return ret;
496}
497
498static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
499{
500 struct drm_i915_private *dev_priv = stream->dev_priv;
501
502 mutex_lock(&dev_priv->drm.struct_mutex);
503
504 i915_vma_unpin(dev_priv->perf.oa.pinned_rcs_vma);
505 dev_priv->perf.oa.pinned_rcs_vma = NULL;
506
507 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
508
509 mutex_unlock(&dev_priv->drm.struct_mutex);
510}
511
512static void
513free_oa_buffer(struct drm_i915_private *i915)
514{
515 mutex_lock(&i915->drm.struct_mutex);
516
517 i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj);
518 i915_vma_unpin(i915->perf.oa.oa_buffer.vma);
519 i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj);
520
521 i915->perf.oa.oa_buffer.vma = NULL;
522 i915->perf.oa.oa_buffer.vaddr = NULL;
523
524 mutex_unlock(&i915->drm.struct_mutex);
525}
526
527static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
528{
529 struct drm_i915_private *dev_priv = stream->dev_priv;
530
531 BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
532
533 dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
534
535 free_oa_buffer(dev_priv);
536
537 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
538 intel_runtime_pm_put(dev_priv);
539
540 if (stream->ctx)
541 oa_put_render_ctx_id(stream);
542
543 dev_priv->perf.oa.exclusive_stream = NULL;
544}
545
546static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
547{
548 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
549
550 /* Pre-DevBDW: OABUFFER must be set with counters off,
551 * before OASTATUS1, but after OASTATUS2
552 */
553 I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */
554 I915_WRITE(GEN7_OABUFFER, gtt_offset);
555 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
556
557 /* On Haswell we have to track which OASTATUS1 flags we've
558 * already seen since they can't be cleared while periodic
559 * sampling is enabled.
560 */
561 dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
562
563 /* NB: although the OA buffer will initially be allocated
564 * zeroed via shmfs (and so this memset is redundant when
565 * first allocating), we may re-init the OA buffer, either
566 * when re-enabling a stream or in error/reset paths.
567 *
568 * The reason we clear the buffer for each re-init is for the
569 * sanity check in gen7_append_oa_reports() that looks at the
570 * report-id field to make sure it's non-zero which relies on
571 * the assumption that new reports are being written to zeroed
572 * memory...
573 */
574 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
575
576 /* Maybe make ->pollin per-stream state if we support multiple
577 * concurrent streams in the future.
578 */
579 dev_priv->perf.oa.pollin = false;
580}
581
582static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
583{
584 struct drm_i915_gem_object *bo;
585 struct i915_vma *vma;
586 int ret;
587
588 if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
589 return -ENODEV;
590
591 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
592 if (ret)
593 return ret;
594
595 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
596 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
597
598 bo = i915_gem_object_create(&dev_priv->drm, OA_BUFFER_SIZE);
599 if (IS_ERR(bo)) {
600 DRM_ERROR("Failed to allocate OA buffer\n");
601 ret = PTR_ERR(bo);
602 goto unlock;
603 }
604
605 ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
606 if (ret)
607 goto err_unref;
608
609 /* PreHSW required 512K alignment, HSW requires 16M */
610 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
611 if (IS_ERR(vma)) {
612 ret = PTR_ERR(vma);
613 goto err_unref;
614 }
615 dev_priv->perf.oa.oa_buffer.vma = vma;
616
617 dev_priv->perf.oa.oa_buffer.vaddr =
618 i915_gem_object_pin_map(bo, I915_MAP_WB);
619 if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
620 ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
621 goto err_unpin;
622 }
623
624 dev_priv->perf.oa.ops.init_oa_buffer(dev_priv);
625
626 DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
627 i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
628 dev_priv->perf.oa.oa_buffer.vaddr);
629
630 goto unlock;
631
632err_unpin:
633 __i915_vma_unpin(vma);
634
635err_unref:
636 i915_gem_object_put(bo);
637
638 dev_priv->perf.oa.oa_buffer.vaddr = NULL;
639 dev_priv->perf.oa.oa_buffer.vma = NULL;
640
641unlock:
642 mutex_unlock(&dev_priv->drm.struct_mutex);
643 return ret;
644}
645
646static void config_oa_regs(struct drm_i915_private *dev_priv,
647 const struct i915_oa_reg *regs,
648 int n_regs)
649{
650 int i;
651
652 for (i = 0; i < n_regs; i++) {
653 const struct i915_oa_reg *reg = regs + i;
654
655 I915_WRITE(reg->addr, reg->value);
656 }
657}
658
659static int hsw_enable_metric_set(struct drm_i915_private *dev_priv)
660{
661 int ret = i915_oa_select_metric_set_hsw(dev_priv);
662
663 if (ret)
664 return ret;
665
666 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) |
667 GT_NOA_ENABLE));
668
669 /* PRM:
670 *
671 * OA unit is using “crclk” for its functionality. When trunk
672 * level clock gating takes place, OA clock would be gated,
673 * unable to count the events from non-render clock domain.
674 * Render clock gating must be disabled when OA is enabled to
675 * count the events from non-render domain. Unit level clock
676 * gating for RCS should also be disabled.
677 */
678 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
679 ~GEN7_DOP_CLOCK_GATE_ENABLE));
680 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
681 GEN6_CSUNIT_CLOCK_GATE_DISABLE));
682
683 config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs,
684 dev_priv->perf.oa.mux_regs_len);
685
686 /* It apparently takes a fairly long time for a new MUX
687 * configuration to be be applied after these register writes.
688 * This delay duration was derived empirically based on the
689 * render_basic config but hopefully it covers the maximum
690 * configuration latency.
691 *
692 * As a fallback, the checks in _append_oa_reports() to skip
693 * invalid OA reports do also seem to work to discard reports
694 * generated before this config has completed - albeit not
695 * silently.
696 *
697 * Unfortunately this is essentially a magic number, since we
698 * don't currently know of a reliable mechanism for predicting
699 * how long the MUX config will take to apply and besides
700 * seeing invalid reports we don't know of a reliable way to
701 * explicitly check that the MUX config has landed.
702 *
703 * It's even possible we've miss characterized the underlying
704 * problem - it just seems like the simplest explanation why
705 * a delay at this location would mitigate any invalid reports.
706 */
707 usleep_range(15000, 20000);
708
709 config_oa_regs(dev_priv, dev_priv->perf.oa.b_counter_regs,
710 dev_priv->perf.oa.b_counter_regs_len);
711
712 return 0;
713}
714
715static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
716{
717 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
718 ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
719 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
720 GEN7_DOP_CLOCK_GATE_ENABLE));
721
722 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
723 ~GT_NOA_ENABLE));
724}
725
726static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv)
727{
728 assert_spin_locked(&dev_priv->perf.hook_lock);
729
730 if (dev_priv->perf.oa.exclusive_stream->enabled) {
731 struct i915_gem_context *ctx =
732 dev_priv->perf.oa.exclusive_stream->ctx;
733 u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
734
735 bool periodic = dev_priv->perf.oa.periodic;
736 u32 period_exponent = dev_priv->perf.oa.period_exponent;
737 u32 report_format = dev_priv->perf.oa.oa_buffer.format;
738
739 I915_WRITE(GEN7_OACONTROL,
740 (ctx_id & GEN7_OACONTROL_CTX_MASK) |
741 (period_exponent <<
742 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
743 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
744 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
745 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
746 GEN7_OACONTROL_ENABLE);
747 } else
748 I915_WRITE(GEN7_OACONTROL, 0);
749}
750
751static void gen7_oa_enable(struct drm_i915_private *dev_priv)
752{
753 unsigned long flags;
754
755 /* Reset buf pointers so we don't forward reports from before now.
756 *
757 * Think carefully if considering trying to avoid this, since it
758 * also ensures status flags and the buffer itself are cleared
759 * in error paths, and we have checks for invalid reports based
760 * on the assumption that certain fields are written to zeroed
761 * memory which this helps maintains.
762 */
763 gen7_init_oa_buffer(dev_priv);
764
765 spin_lock_irqsave(&dev_priv->perf.hook_lock, flags);
766 gen7_update_oacontrol_locked(dev_priv);
767 spin_unlock_irqrestore(&dev_priv->perf.hook_lock, flags);
768}
769
770static void i915_oa_stream_enable(struct i915_perf_stream *stream)
771{
772 struct drm_i915_private *dev_priv = stream->dev_priv;
773
774 dev_priv->perf.oa.ops.oa_enable(dev_priv);
775
776 if (dev_priv->perf.oa.periodic)
777 hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
778 ns_to_ktime(POLL_PERIOD),
779 HRTIMER_MODE_REL_PINNED);
780}
781
782static void gen7_oa_disable(struct drm_i915_private *dev_priv)
783{
784 I915_WRITE(GEN7_OACONTROL, 0);
785}
786
787static void i915_oa_stream_disable(struct i915_perf_stream *stream)
788{
789 struct drm_i915_private *dev_priv = stream->dev_priv;
790
791 dev_priv->perf.oa.ops.oa_disable(dev_priv);
792
793 if (dev_priv->perf.oa.periodic)
794 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
795}
796
797static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
798{
799 return 1000000000ULL * (2ULL << exponent) /
800 dev_priv->perf.oa.timestamp_frequency;
801}
802
803static const struct i915_perf_stream_ops i915_oa_stream_ops = {
804 .destroy = i915_oa_stream_destroy,
805 .enable = i915_oa_stream_enable,
806 .disable = i915_oa_stream_disable,
807 .wait_unlocked = i915_oa_wait_unlocked,
808 .poll_wait = i915_oa_poll_wait,
809 .read = i915_oa_read,
eec688e1
RB
810};
811
d7965152
RB
812static int i915_oa_stream_init(struct i915_perf_stream *stream,
813 struct drm_i915_perf_open_param *param,
814 struct perf_open_properties *props)
815{
816 struct drm_i915_private *dev_priv = stream->dev_priv;
817 int format_size;
818 int ret;
819
442b8c06
RB
820 /* If the sysfs metrics/ directory wasn't registered for some
821 * reason then don't let userspace try their luck with config
822 * IDs
823 */
824 if (!dev_priv->perf.metrics_kobj) {
825 DRM_ERROR("OA metrics weren't advertised via sysfs\n");
826 return -EINVAL;
827 }
828
d7965152
RB
829 if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
830 DRM_ERROR("Only OA report sampling supported\n");
831 return -EINVAL;
832 }
833
834 if (!dev_priv->perf.oa.ops.init_oa_buffer) {
835 DRM_ERROR("OA unit not supported\n");
836 return -ENODEV;
837 }
838
839 /* To avoid the complexity of having to accurately filter
840 * counter reports and marshal to the appropriate client
841 * we currently only allow exclusive access
842 */
843 if (dev_priv->perf.oa.exclusive_stream) {
844 DRM_ERROR("OA unit already in use\n");
845 return -EBUSY;
846 }
847
848 if (!props->metrics_set) {
849 DRM_ERROR("OA metric set not specified\n");
850 return -EINVAL;
851 }
852
853 if (!props->oa_format) {
854 DRM_ERROR("OA report format not specified\n");
855 return -EINVAL;
856 }
857
858 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
859
860 format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
861
862 stream->sample_flags |= SAMPLE_OA_REPORT;
863 stream->sample_size += format_size;
864
865 dev_priv->perf.oa.oa_buffer.format_size = format_size;
866 if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
867 return -EINVAL;
868
869 dev_priv->perf.oa.oa_buffer.format =
870 dev_priv->perf.oa.oa_formats[props->oa_format].format;
871
872 dev_priv->perf.oa.metrics_set = props->metrics_set;
873
874 dev_priv->perf.oa.periodic = props->oa_periodic;
875 if (dev_priv->perf.oa.periodic) {
876 u64 period_ns = oa_exponent_to_ns(dev_priv,
877 props->oa_period_exponent);
878
879 dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
880
881 /* See comment for OA_TAIL_MARGIN_NSEC for details
882 * about this tail_margin...
883 */
884 dev_priv->perf.oa.tail_margin =
885 ((OA_TAIL_MARGIN_NSEC / period_ns) + 1) * format_size;
886 }
887
888 if (stream->ctx) {
889 ret = oa_get_render_ctx_id(stream);
890 if (ret)
891 return ret;
892 }
893
894 ret = alloc_oa_buffer(dev_priv);
895 if (ret)
896 goto err_oa_buf_alloc;
897
898 /* PRM - observability performance counters:
899 *
900 * OACONTROL, performance counter enable, note:
901 *
902 * "When this bit is set, in order to have coherent counts,
903 * RC6 power state and trunk clock gating must be disabled.
904 * This can be achieved by programming MMIO registers as
905 * 0xA094=0 and 0xA090[31]=1"
906 *
907 * In our case we are expecting that taking pm + FORCEWAKE
908 * references will effectively disable RC6.
909 */
910 intel_runtime_pm_get(dev_priv);
911 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
912
913 ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv);
914 if (ret)
915 goto err_enable;
916
917 stream->ops = &i915_oa_stream_ops;
918
919 dev_priv->perf.oa.exclusive_stream = stream;
920
921 return 0;
922
923err_enable:
924 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
925 intel_runtime_pm_put(dev_priv);
926 free_oa_buffer(dev_priv);
927
928err_oa_buf_alloc:
929 if (stream->ctx)
930 oa_put_render_ctx_id(stream);
931
932 return ret;
933}
934
eec688e1
RB
935static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
936 struct file *file,
937 char __user *buf,
938 size_t count,
939 loff_t *ppos)
940{
941 /* Note we keep the offset (aka bytes read) separate from any
942 * error status so that the final check for whether we return
943 * the bytes read with a higher precedence than any error (see
944 * comment below) doesn't need to be handled/duplicated in
945 * stream->ops->read() implementations.
946 */
947 size_t offset = 0;
948 int ret = stream->ops->read(stream, buf, count, &offset);
949
950 /* If we've successfully copied any data then reporting that
951 * takes precedence over any internal error status, so the
952 * data isn't lost.
953 *
954 * For example ret will be -ENOSPC whenever there is more
955 * buffered data than can be copied to userspace, but that's
956 * only interesting if we weren't able to copy some data
957 * because it implies the userspace buffer is too small to
958 * receive a single record (and we never split records).
959 *
960 * Another case with ret == -EFAULT is more of a grey area
961 * since it would seem like bad form for userspace to ask us
962 * to overrun its buffer, but the user knows best:
963 *
964 * http://yarchive.net/comp/linux/partial_reads_writes.html
965 */
966 return offset ?: (ret ?: -EAGAIN);
967}
968
969static ssize_t i915_perf_read(struct file *file,
970 char __user *buf,
971 size_t count,
972 loff_t *ppos)
973{
974 struct i915_perf_stream *stream = file->private_data;
975 struct drm_i915_private *dev_priv = stream->dev_priv;
976 ssize_t ret;
977
d7965152
RB
978 /* To ensure it's handled consistently we simply treat all reads of a
979 * disabled stream as an error. In particular it might otherwise lead
980 * to a deadlock for blocking file descriptors...
981 */
982 if (!stream->enabled)
983 return -EIO;
984
eec688e1 985 if (!(file->f_flags & O_NONBLOCK)) {
d7965152
RB
986 /* There's the small chance of false positives from
987 * stream->ops->wait_unlocked.
988 *
989 * E.g. with single context filtering since we only wait until
990 * oabuffer has >= 1 report we don't immediately know whether
991 * any reports really belong to the current context
eec688e1
RB
992 */
993 do {
994 ret = stream->ops->wait_unlocked(stream);
995 if (ret)
996 return ret;
997
998 mutex_lock(&dev_priv->perf.lock);
999 ret = i915_perf_read_locked(stream, file,
1000 buf, count, ppos);
1001 mutex_unlock(&dev_priv->perf.lock);
1002 } while (ret == -EAGAIN);
1003 } else {
1004 mutex_lock(&dev_priv->perf.lock);
1005 ret = i915_perf_read_locked(stream, file, buf, count, ppos);
1006 mutex_unlock(&dev_priv->perf.lock);
1007 }
1008
d7965152
RB
1009 if (ret >= 0) {
1010 /* Maybe make ->pollin per-stream state if we support multiple
1011 * concurrent streams in the future.
1012 */
1013 dev_priv->perf.oa.pollin = false;
1014 }
1015
eec688e1
RB
1016 return ret;
1017}
1018
d7965152
RB
1019static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
1020{
1021 struct drm_i915_private *dev_priv =
1022 container_of(hrtimer, typeof(*dev_priv),
1023 perf.oa.poll_check_timer);
1024
1025 if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) {
1026 dev_priv->perf.oa.pollin = true;
1027 wake_up(&dev_priv->perf.oa.poll_wq);
1028 }
1029
1030 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
1031
1032 return HRTIMER_RESTART;
1033}
1034
1035static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv,
1036 struct i915_perf_stream *stream,
eec688e1
RB
1037 struct file *file,
1038 poll_table *wait)
1039{
d7965152 1040 unsigned int events = 0;
eec688e1
RB
1041
1042 stream->ops->poll_wait(stream, file, wait);
1043
d7965152
RB
1044 /* Note: we don't explicitly check whether there's something to read
1045 * here since this path may be very hot depending on what else
1046 * userspace is polling, or on the timeout in use. We rely solely on
1047 * the hrtimer/oa_poll_check_timer_cb to notify us when there are
1048 * samples to read.
1049 */
1050 if (dev_priv->perf.oa.pollin)
1051 events |= POLLIN;
eec688e1 1052
d7965152 1053 return events;
eec688e1
RB
1054}
1055
1056static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
1057{
1058 struct i915_perf_stream *stream = file->private_data;
1059 struct drm_i915_private *dev_priv = stream->dev_priv;
1060 int ret;
1061
1062 mutex_lock(&dev_priv->perf.lock);
d7965152 1063 ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
eec688e1
RB
1064 mutex_unlock(&dev_priv->perf.lock);
1065
1066 return ret;
1067}
1068
1069static void i915_perf_enable_locked(struct i915_perf_stream *stream)
1070{
1071 if (stream->enabled)
1072 return;
1073
1074 /* Allow stream->ops->enable() to refer to this */
1075 stream->enabled = true;
1076
1077 if (stream->ops->enable)
1078 stream->ops->enable(stream);
1079}
1080
1081static void i915_perf_disable_locked(struct i915_perf_stream *stream)
1082{
1083 if (!stream->enabled)
1084 return;
1085
1086 /* Allow stream->ops->disable() to refer to this */
1087 stream->enabled = false;
1088
1089 if (stream->ops->disable)
1090 stream->ops->disable(stream);
1091}
1092
1093static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
1094 unsigned int cmd,
1095 unsigned long arg)
1096{
1097 switch (cmd) {
1098 case I915_PERF_IOCTL_ENABLE:
1099 i915_perf_enable_locked(stream);
1100 return 0;
1101 case I915_PERF_IOCTL_DISABLE:
1102 i915_perf_disable_locked(stream);
1103 return 0;
1104 }
1105
1106 return -EINVAL;
1107}
1108
1109static long i915_perf_ioctl(struct file *file,
1110 unsigned int cmd,
1111 unsigned long arg)
1112{
1113 struct i915_perf_stream *stream = file->private_data;
1114 struct drm_i915_private *dev_priv = stream->dev_priv;
1115 long ret;
1116
1117 mutex_lock(&dev_priv->perf.lock);
1118 ret = i915_perf_ioctl_locked(stream, cmd, arg);
1119 mutex_unlock(&dev_priv->perf.lock);
1120
1121 return ret;
1122}
1123
1124static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
1125{
1126 struct drm_i915_private *dev_priv = stream->dev_priv;
1127
1128 if (stream->enabled)
1129 i915_perf_disable_locked(stream);
1130
1131 if (stream->ops->destroy)
1132 stream->ops->destroy(stream);
1133
1134 list_del(&stream->link);
1135
1136 if (stream->ctx) {
1137 mutex_lock(&dev_priv->drm.struct_mutex);
1138 i915_gem_context_put(stream->ctx);
1139 mutex_unlock(&dev_priv->drm.struct_mutex);
1140 }
1141
1142 kfree(stream);
1143}
1144
1145static int i915_perf_release(struct inode *inode, struct file *file)
1146{
1147 struct i915_perf_stream *stream = file->private_data;
1148 struct drm_i915_private *dev_priv = stream->dev_priv;
1149
1150 mutex_lock(&dev_priv->perf.lock);
1151 i915_perf_destroy_locked(stream);
1152 mutex_unlock(&dev_priv->perf.lock);
1153
1154 return 0;
1155}
1156
1157
1158static const struct file_operations fops = {
1159 .owner = THIS_MODULE,
1160 .llseek = no_llseek,
1161 .release = i915_perf_release,
1162 .poll = i915_perf_poll,
1163 .read = i915_perf_read,
1164 .unlocked_ioctl = i915_perf_ioctl,
1165};
1166
1167
1168static struct i915_gem_context *
1169lookup_context(struct drm_i915_private *dev_priv,
1170 struct drm_i915_file_private *file_priv,
1171 u32 ctx_user_handle)
1172{
1173 struct i915_gem_context *ctx;
1174 int ret;
1175
1176 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1177 if (ret)
1178 return ERR_PTR(ret);
1179
1180 ctx = i915_gem_context_lookup(file_priv, ctx_user_handle);
1181 if (!IS_ERR(ctx))
1182 i915_gem_context_get(ctx);
1183
1184 mutex_unlock(&dev_priv->drm.struct_mutex);
1185
1186 return ctx;
1187}
1188
1189static int
1190i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
1191 struct drm_i915_perf_open_param *param,
1192 struct perf_open_properties *props,
1193 struct drm_file *file)
1194{
1195 struct i915_gem_context *specific_ctx = NULL;
1196 struct i915_perf_stream *stream = NULL;
1197 unsigned long f_flags = 0;
1198 int stream_fd;
1199 int ret;
1200
1201 if (props->single_context) {
1202 u32 ctx_handle = props->ctx_handle;
1203 struct drm_i915_file_private *file_priv = file->driver_priv;
1204
1205 specific_ctx = lookup_context(dev_priv, file_priv, ctx_handle);
1206 if (IS_ERR(specific_ctx)) {
1207 ret = PTR_ERR(specific_ctx);
1208 if (ret != -EINTR)
1209 DRM_ERROR("Failed to look up context with ID %u for opening perf stream\n",
1210 ctx_handle);
1211 goto err;
1212 }
1213 }
1214
ccdf6341
RB
1215 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
1216 * we check a dev.i915.perf_stream_paranoid sysctl option
1217 * to determine if it's ok to access system wide OA counters
1218 * without CAP_SYS_ADMIN privileges.
1219 */
1220 if (!specific_ctx &&
1221 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
eec688e1
RB
1222 DRM_ERROR("Insufficient privileges to open system-wide i915 perf stream\n");
1223 ret = -EACCES;
1224 goto err_ctx;
1225 }
1226
1227 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
1228 if (!stream) {
1229 ret = -ENOMEM;
1230 goto err_ctx;
1231 }
1232
eec688e1
RB
1233 stream->dev_priv = dev_priv;
1234 stream->ctx = specific_ctx;
1235
d7965152
RB
1236 ret = i915_oa_stream_init(stream, param, props);
1237 if (ret)
1238 goto err_alloc;
1239
1240 /* we avoid simply assigning stream->sample_flags = props->sample_flags
1241 * to have _stream_init check the combination of sample flags more
1242 * thoroughly, but still this is the expected result at this point.
eec688e1 1243 */
d7965152
RB
1244 if (WARN_ON(stream->sample_flags != props->sample_flags)) {
1245 ret = -ENODEV;
1246 goto err_alloc;
1247 }
eec688e1
RB
1248
1249 list_add(&stream->link, &dev_priv->perf.streams);
1250
1251 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
1252 f_flags |= O_CLOEXEC;
1253 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
1254 f_flags |= O_NONBLOCK;
1255
1256 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
1257 if (stream_fd < 0) {
1258 ret = stream_fd;
1259 goto err_open;
1260 }
1261
1262 if (!(param->flags & I915_PERF_FLAG_DISABLED))
1263 i915_perf_enable_locked(stream);
1264
1265 return stream_fd;
1266
1267err_open:
1268 list_del(&stream->link);
1269 if (stream->ops->destroy)
1270 stream->ops->destroy(stream);
1271err_alloc:
1272 kfree(stream);
1273err_ctx:
1274 if (specific_ctx) {
1275 mutex_lock(&dev_priv->drm.struct_mutex);
1276 i915_gem_context_put(specific_ctx);
1277 mutex_unlock(&dev_priv->drm.struct_mutex);
1278 }
1279err:
1280 return ret;
1281}
1282
1283/* Note we copy the properties from userspace outside of the i915 perf
1284 * mutex to avoid an awkward lockdep with mmap_sem.
1285 *
1286 * Note this function only validates properties in isolation it doesn't
1287 * validate that the combination of properties makes sense or that all
1288 * properties necessary for a particular kind of stream have been set.
1289 */
1290static int read_properties_unlocked(struct drm_i915_private *dev_priv,
1291 u64 __user *uprops,
1292 u32 n_props,
1293 struct perf_open_properties *props)
1294{
1295 u64 __user *uprop = uprops;
1296 int i;
1297
1298 memset(props, 0, sizeof(struct perf_open_properties));
1299
1300 if (!n_props) {
1301 DRM_ERROR("No i915 perf properties given");
1302 return -EINVAL;
1303 }
1304
1305 /* Considering that ID = 0 is reserved and assuming that we don't
1306 * (currently) expect any configurations to ever specify duplicate
1307 * values for a particular property ID then the last _PROP_MAX value is
1308 * one greater than the maximum number of properties we expect to get
1309 * from userspace.
1310 */
1311 if (n_props >= DRM_I915_PERF_PROP_MAX) {
1312 DRM_ERROR("More i915 perf properties specified than exist");
1313 return -EINVAL;
1314 }
1315
1316 for (i = 0; i < n_props; i++) {
1317 u64 id, value;
1318 int ret;
1319
1320 ret = get_user(id, uprop);
1321 if (ret)
1322 return ret;
1323
1324 ret = get_user(value, uprop + 1);
1325 if (ret)
1326 return ret;
1327
1328 switch ((enum drm_i915_perf_property_id)id) {
1329 case DRM_I915_PERF_PROP_CTX_HANDLE:
1330 props->single_context = 1;
1331 props->ctx_handle = value;
1332 break;
d7965152
RB
1333 case DRM_I915_PERF_PROP_SAMPLE_OA:
1334 props->sample_flags |= SAMPLE_OA_REPORT;
1335 break;
1336 case DRM_I915_PERF_PROP_OA_METRICS_SET:
1337 if (value == 0 ||
1338 value > dev_priv->perf.oa.n_builtin_sets) {
1339 DRM_ERROR("Unknown OA metric set ID");
1340 return -EINVAL;
1341 }
1342 props->metrics_set = value;
1343 break;
1344 case DRM_I915_PERF_PROP_OA_FORMAT:
1345 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
1346 DRM_ERROR("Invalid OA report format\n");
1347 return -EINVAL;
1348 }
1349 if (!dev_priv->perf.oa.oa_formats[value].size) {
1350 DRM_ERROR("Invalid OA report format\n");
1351 return -EINVAL;
1352 }
1353 props->oa_format = value;
1354 break;
1355 case DRM_I915_PERF_PROP_OA_EXPONENT:
1356 if (value > OA_EXPONENT_MAX) {
1357 DRM_ERROR("OA timer exponent too high (> %u)\n",
1358 OA_EXPONENT_MAX);
1359 return -EINVAL;
1360 }
1361
1362 /* NB: The exponent represents a period as follows:
1363 *
1364 * 80ns * 2^(period_exponent + 1)
1365 *
1366 * Theoretically we can program the OA unit to sample
1367 * every 160ns but don't allow that by default unless
1368 * root.
1369 *
1370 * Referring to perf's
1371 * kernel.perf_event_max_sample_rate for a precedent
1372 * (100000 by default); with an OA exponent of 6 we get
1373 * a period of 10.240 microseconds -just under 100000Hz
1374 */
1375 if (value < 6 && !capable(CAP_SYS_ADMIN)) {
1376 DRM_ERROR("Minimum OA sampling exponent is 6 without root privileges\n");
1377 return -EACCES;
1378 }
1379
1380 props->oa_periodic = true;
1381 props->oa_period_exponent = value;
1382 break;
eec688e1
RB
1383 default:
1384 MISSING_CASE(id);
1385 DRM_ERROR("Unknown i915 perf property ID");
1386 return -EINVAL;
1387 }
1388
1389 uprop += 2;
1390 }
1391
1392 return 0;
1393}
1394
1395int i915_perf_open_ioctl(struct drm_device *dev, void *data,
1396 struct drm_file *file)
1397{
1398 struct drm_i915_private *dev_priv = dev->dev_private;
1399 struct drm_i915_perf_open_param *param = data;
1400 struct perf_open_properties props;
1401 u32 known_open_flags;
1402 int ret;
1403
1404 if (!dev_priv->perf.initialized) {
1405 DRM_ERROR("i915 perf interface not available for this system");
1406 return -ENOTSUPP;
1407 }
1408
1409 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
1410 I915_PERF_FLAG_FD_NONBLOCK |
1411 I915_PERF_FLAG_DISABLED;
1412 if (param->flags & ~known_open_flags) {
1413 DRM_ERROR("Unknown drm_i915_perf_open_param flag\n");
1414 return -EINVAL;
1415 }
1416
1417 ret = read_properties_unlocked(dev_priv,
1418 u64_to_user_ptr(param->properties_ptr),
1419 param->num_properties,
1420 &props);
1421 if (ret)
1422 return ret;
1423
1424 mutex_lock(&dev_priv->perf.lock);
1425 ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
1426 mutex_unlock(&dev_priv->perf.lock);
1427
1428 return ret;
1429}
1430
442b8c06
RB
1431void i915_perf_register(struct drm_i915_private *dev_priv)
1432{
1433 if (!IS_HASWELL(dev_priv))
1434 return;
1435
1436 if (!dev_priv->perf.initialized)
1437 return;
1438
1439 /* To be sure we're synchronized with an attempted
1440 * i915_perf_open_ioctl(); considering that we register after
1441 * being exposed to userspace.
1442 */
1443 mutex_lock(&dev_priv->perf.lock);
1444
1445 dev_priv->perf.metrics_kobj =
1446 kobject_create_and_add("metrics",
1447 &dev_priv->drm.primary->kdev->kobj);
1448 if (!dev_priv->perf.metrics_kobj)
1449 goto exit;
1450
1451 if (i915_perf_register_sysfs_hsw(dev_priv)) {
1452 kobject_put(dev_priv->perf.metrics_kobj);
1453 dev_priv->perf.metrics_kobj = NULL;
1454 }
1455
1456exit:
1457 mutex_unlock(&dev_priv->perf.lock);
1458}
1459
1460void i915_perf_unregister(struct drm_i915_private *dev_priv)
1461{
1462 if (!IS_HASWELL(dev_priv))
1463 return;
1464
1465 if (!dev_priv->perf.metrics_kobj)
1466 return;
1467
1468 i915_perf_unregister_sysfs_hsw(dev_priv);
1469
1470 kobject_put(dev_priv->perf.metrics_kobj);
1471 dev_priv->perf.metrics_kobj = NULL;
1472}
1473
ccdf6341
RB
1474static struct ctl_table oa_table[] = {
1475 {
1476 .procname = "perf_stream_paranoid",
1477 .data = &i915_perf_stream_paranoid,
1478 .maxlen = sizeof(i915_perf_stream_paranoid),
1479 .mode = 0644,
1480 .proc_handler = proc_dointvec_minmax,
1481 .extra1 = &zero,
1482 .extra2 = &one,
1483 },
1484 {}
1485};
1486
1487static struct ctl_table i915_root[] = {
1488 {
1489 .procname = "i915",
1490 .maxlen = 0,
1491 .mode = 0555,
1492 .child = oa_table,
1493 },
1494 {}
1495};
1496
1497static struct ctl_table dev_root[] = {
1498 {
1499 .procname = "dev",
1500 .maxlen = 0,
1501 .mode = 0555,
1502 .child = i915_root,
1503 },
1504 {}
1505};
1506
eec688e1
RB
1507void i915_perf_init(struct drm_i915_private *dev_priv)
1508{
d7965152
RB
1509 if (!IS_HASWELL(dev_priv))
1510 return;
1511
1512 hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
1513 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1514 dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
1515 init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
1516
eec688e1
RB
1517 INIT_LIST_HEAD(&dev_priv->perf.streams);
1518 mutex_init(&dev_priv->perf.lock);
d7965152
RB
1519 spin_lock_init(&dev_priv->perf.hook_lock);
1520
1521 dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
1522 dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
1523 dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
1524 dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
1525 dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
1526 dev_priv->perf.oa.ops.read = gen7_oa_read;
1527 dev_priv->perf.oa.ops.oa_buffer_is_empty =
1528 gen7_oa_buffer_is_empty_fop_unlocked;
1529
1530 dev_priv->perf.oa.timestamp_frequency = 12500000;
1531
1532 dev_priv->perf.oa.oa_formats = hsw_oa_formats;
1533
1534 dev_priv->perf.oa.n_builtin_sets =
1535 i915_oa_n_builtin_metric_sets_hsw;
eec688e1 1536
ccdf6341
RB
1537 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
1538
eec688e1
RB
1539 dev_priv->perf.initialized = true;
1540}
1541
1542void i915_perf_fini(struct drm_i915_private *dev_priv)
1543{
1544 if (!dev_priv->perf.initialized)
1545 return;
1546
ccdf6341
RB
1547 unregister_sysctl_table(dev_priv->perf.sysctl_header);
1548
d7965152 1549 memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
eec688e1
RB
1550 dev_priv->perf.initialized = false;
1551}