drm/i915/perf: Add engine class instance parameters to perf
[linux-block.git] / drivers / gpu / drm / i915 / i915_perf.c
CommitLineData
eec688e1
RB
1/*
2 * Copyright © 2015-2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Robert Bragg <robert@sixbynine.org>
25 */
26
7abbd8d6
RB
27
28/**
16d98b31 29 * DOC: i915 Perf Overview
7abbd8d6
RB
30 *
31 * Gen graphics supports a large number of performance counters that can help
32 * driver and application developers understand and optimize their use of the
33 * GPU.
34 *
35 * This i915 perf interface enables userspace to configure and open a file
36 * descriptor representing a stream of GPU metrics which can then be read() as
37 * a stream of sample records.
38 *
39 * The interface is particularly suited to exposing buffered metrics that are
40 * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU.
41 *
42 * Streams representing a single context are accessible to applications with a
43 * corresponding drm file descriptor, such that OpenGL can use the interface
44 * without special privileges. Access to system-wide metrics requires root
45 * privileges by default, unless changed via the dev.i915.perf_event_paranoid
46 * sysctl option.
47 *
16d98b31
RB
48 */
49
50/**
51 * DOC: i915 Perf History and Comparison with Core Perf
7abbd8d6
RB
52 *
53 * The interface was initially inspired by the core Perf infrastructure but
54 * some notable differences are:
55 *
56 * i915 perf file descriptors represent a "stream" instead of an "event"; where
57 * a perf event primarily corresponds to a single 64bit value, while a stream
58 * might sample sets of tightly-coupled counters, depending on the
59 * configuration. For example the Gen OA unit isn't designed to support
60 * orthogonal configurations of individual counters; it's configured for a set
61 * of related counters. Samples for an i915 perf stream capturing OA metrics
62 * will include a set of counter values packed in a compact HW specific format.
63 * The OA unit supports a number of different packing formats which can be
64 * selected by the user opening the stream. Perf has support for grouping
65 * events, but each event in the group is configured, validated and
66 * authenticated individually with separate system calls.
67 *
68 * i915 perf stream configurations are provided as an array of u64 (key,value)
69 * pairs, instead of a fixed struct with multiple miscellaneous config members,
70 * interleaved with event-type specific members.
71 *
72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
73 * The supported metrics are being written to memory by the GPU unsynchronized
74 * with the CPU, using HW specific packing formats for counter sets. Sometimes
75 * the constraints on HW configuration require reports to be filtered before it
76 * would be acceptable to expose them to unprivileged applications - to hide
77 * the metrics of other processes/contexts. For these use cases a read() based
78 * interface is a good fit, and provides an opportunity to filter data as it
79 * gets copied from the GPU mapped buffers to userspace buffers.
80 *
81 *
16d98b31
RB
82 * Issues hit with first prototype based on Core Perf
83 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7abbd8d6
RB
84 *
85 * The first prototype of this driver was based on the core perf
86 * infrastructure, and while we did make that mostly work, with some changes to
87 * perf, we found we were breaking or working around too many assumptions baked
88 * into perf's currently cpu centric design.
89 *
90 * In the end we didn't see a clear benefit to making perf's implementation and
91 * interface more complex by changing design assumptions while we knew we still
92 * wouldn't be able to use any existing perf based userspace tools.
93 *
94 * Also considering the Gen specific nature of the Observability hardware and
95 * how userspace will sometimes need to combine i915 perf OA metrics with
96 * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're
97 * expecting the interface to be used by a platform specific userspace such as
98 * OpenGL or tools. This is to say; we aren't inherently missing out on having
99 * a standard vendor/architecture agnostic interface by not using perf.
100 *
101 *
102 * For posterity, in case we might re-visit trying to adapt core perf to be
103 * better suited to exposing i915 metrics these were the main pain points we
104 * hit:
105 *
106 * - The perf based OA PMU driver broke some significant design assumptions:
107 *
108 * Existing perf pmus are used for profiling work on a cpu and we were
109 * introducing the idea of _IS_DEVICE pmus with different security
110 * implications, the need to fake cpu-related data (such as user/kernel
111 * registers) to fit with perf's current design, and adding _DEVICE records
112 * as a way to forward device-specific status records.
113 *
114 * The OA unit writes reports of counters into a circular buffer, without
115 * involvement from the CPU, making our PMU driver the first of a kind.
116 *
117 * Given the way we were periodically forward data from the GPU-mapped, OA
118 * buffer to perf's buffer, those bursts of sample writes looked to perf like
119 * we were sampling too fast and so we had to subvert its throttling checks.
120 *
121 * Perf supports groups of counters and allows those to be read via
122 * transactions internally but transactions currently seem designed to be
123 * explicitly initiated from the cpu (say in response to a userspace read())
124 * and while we could pull a report out of the OA buffer we can't
125 * trigger a report from the cpu on demand.
126 *
127 * Related to being report based; the OA counters are configured in HW as a
128 * set while perf generally expects counter configurations to be orthogonal.
129 * Although counters can be associated with a group leader as they are
130 * opened, there's no clear precedent for being able to provide group-wide
131 * configuration attributes (for example we want to let userspace choose the
132 * OA unit report format used to capture all counters in a set, or specify a
133 * GPU context to filter metrics on). We avoided using perf's grouping
134 * feature and forwarded OA reports to userspace via perf's 'raw' sample
135 * field. This suited our userspace well considering how coupled the counters
136 * are when dealing with normalizing. It would be inconvenient to split
137 * counters up into separate events, only to require userspace to recombine
138 * them. For Mesa it's also convenient to be forwarded raw, periodic reports
139 * for combining with the side-band raw reports it captures using
140 * MI_REPORT_PERF_COUNT commands.
141 *
16d98b31 142 * - As a side note on perf's grouping feature; there was also some concern
7abbd8d6
RB
143 * that using PERF_FORMAT_GROUP as a way to pack together counter values
144 * would quite drastically inflate our sample sizes, which would likely
145 * lower the effective sampling resolutions we could use when the available
146 * memory bandwidth is limited.
147 *
148 * With the OA unit's report formats, counters are packed together as 32
149 * or 40bit values, with the largest report size being 256 bytes.
150 *
151 * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a
152 * documented ordering to the values, implying PERF_FORMAT_ID must also be
153 * used to add a 64bit ID before each value; giving 16 bytes per counter.
154 *
155 * Related to counter orthogonality; we can't time share the OA unit, while
156 * event scheduling is a central design idea within perf for allowing
157 * userspace to open + enable more events than can be configured in HW at any
158 * one time. The OA unit is not designed to allow re-configuration while in
159 * use. We can't reconfigure the OA unit without losing internal OA unit
160 * state which we can't access explicitly to save and restore. Reconfiguring
161 * the OA unit is also relatively slow, involving ~100 register writes. From
162 * userspace Mesa also depends on a stable OA configuration when emitting
163 * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be
164 * disabled while there are outstanding MI_RPC commands lest we hang the
165 * command streamer.
166 *
167 * The contents of sample records aren't extensible by device drivers (i.e.
168 * the sample_type bits). As an example; Sourab Gupta had been looking to
169 * attach GPU timestamps to our OA samples. We were shoehorning OA reports
170 * into sample records by using the 'raw' field, but it's tricky to pack more
171 * than one thing into this field because events/core.c currently only lets a
172 * pmu give a single raw data pointer plus len which will be copied into the
173 * ring buffer. To include more than the OA report we'd have to copy the
174 * report into an intermediate larger buffer. I'd been considering allowing a
175 * vector of data+len values to be specified for copying the raw data, but
176 * it felt like a kludge to being using the raw field for this purpose.
177 *
178 * - It felt like our perf based PMU was making some technical compromises
179 * just for the sake of using perf:
180 *
181 * perf_event_open() requires events to either relate to a pid or a specific
182 * cpu core, while our device pmu related to neither. Events opened with a
183 * pid will be automatically enabled/disabled according to the scheduling of
184 * that process - so not appropriate for us. When an event is related to a
185 * cpu id, perf ensures pmu methods will be invoked via an inter process
186 * interrupt on that core. To avoid invasive changes our userspace opened OA
187 * perf events for a specific cpu. This was workable but it meant the
188 * majority of the OA driver ran in atomic context, including all OA report
189 * forwarding, which wasn't really necessary in our case and seems to make
190 * our locking requirements somewhat complex as we handled the interaction
191 * with the rest of the i915 driver.
192 */
193
eec688e1 194#include <linux/anon_inodes.h>
d7965152 195#include <linux/sizes.h>
f89823c2 196#include <linux/uuid.h>
eec688e1 197
10be98a7 198#include "gem/i915_gem_context.h"
b508d01f 199#include "gem/i915_gem_internal.h"
a5efcde6 200#include "gt/intel_engine_pm.h"
202b1f4c 201#include "gt/intel_engine_regs.h"
9a61363a 202#include "gt/intel_engine_user.h"
70a2b431 203#include "gt/intel_execlists_submission.h"
45233ab2 204#include "gt/intel_gpu_commands.h"
daed3e44 205#include "gt/intel_gt.h"
f170523a 206#include "gt/intel_gt_clock_utils.h"
ed6b25aa 207#include "gt/intel_gt_mcr.h"
0d6419e9 208#include "gt/intel_gt_regs.h"
a0d3fdb6 209#include "gt/intel_lrc.h"
dd4821ba 210#include "gt/intel_lrc_reg.h"
2871ea85 211#include "gt/intel_ring.h"
01e74274 212#include "gt/uc/intel_guc_slpc.h"
112ed2d3 213
eec688e1 214#include "i915_drv.h"
5472b3f2 215#include "i915_file_private.h"
db94e9f1 216#include "i915_perf.h"
2ef6d3bf 217#include "i915_perf_oa_regs.h"
801543b2 218#include "i915_reg.h"
d7965152 219
fe841686
JL
220/* HW requires this to be a power of two, between 128k and 16M, though driver
221 * is currently generally designed assuming the largest 16M size is used such
222 * that the overflow cases are unlikely in normal operation.
223 */
224#define OA_BUFFER_SIZE SZ_16M
225
226#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
d7965152 227
0dd860cf
RB
228/**
229 * DOC: OA Tail Pointer Race
230 *
231 * There's a HW race condition between OA unit tail pointer register updates and
d7965152 232 * writes to memory whereby the tail pointer can sometimes get ahead of what's
0dd860cf
RB
233 * been written out to the OA buffer so far (in terms of what's visible to the
234 * CPU).
235 *
236 * Although this can be observed explicitly while copying reports to userspace
237 * by checking for a zeroed report-id field in tail reports, we want to account
d1df41eb
LL
238 * for this earlier, as part of the oa_buffer_check_unlocked to avoid lots of
239 * redundant read() attempts.
240 *
241 * We workaround this issue in oa_buffer_check_unlocked() by reading the reports
242 * in the OA buffer, starting from the tail reported by the HW until we find a
243 * report with its first 2 dwords not 0 meaning its previous report is
244 * completely in memory and ready to be read. Those dwords are also set to 0
245 * once read and the whole buffer is cleared upon OA buffer initialization. The
246 * first dword is the reason for this report while the second is the timestamp,
247 * making the chances of having those 2 fields at 0 fairly unlikely. A more
248 * detailed explanation is available in oa_buffer_check_unlocked().
0dd860cf
RB
249 *
250 * Most of the implementation details for this workaround are in
19f81df2 251 * oa_buffer_check_unlocked() and _append_oa_reports()
0dd860cf
RB
252 *
253 * Note for posterity: previously the driver used to define an effective tail
254 * pointer that lagged the real pointer by a 'tail margin' measured in bytes
255 * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
256 * This was flawed considering that the OA unit may also automatically generate
257 * non-periodic reports (such as on context switch) or the OA unit may be
258 * enabled without any periodic sampling.
d7965152
RB
259 */
260#define OA_TAIL_MARGIN_NSEC 100000ULL
0dd860cf 261#define INVALID_TAIL_PTR 0xffffffff
d7965152 262
4ef10fe0
LL
263/* The default frequency for checking whether the OA unit has written new
264 * reports to the circular OA buffer...
d7965152 265 */
4ef10fe0
LL
266#define DEFAULT_POLL_FREQUENCY_HZ 200
267#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
d7965152 268
ccdf6341 269/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
ccdf6341
RB
270static u32 i915_perf_stream_paranoid = true;
271
d7965152
RB
272/* The maximum exponent the hardware accepts is 63 (essentially it selects one
273 * of the 64bit timestamp bits to trigger reports from) but there's currently
274 * no known use case for sampling as infrequently as once per 47 thousand years.
275 *
276 * Since the timestamps included in OA reports are only 32bits it seems
277 * reasonable to limit the OA exponent where it's still possible to account for
278 * overflow in OA report timestamps.
279 */
280#define OA_EXPONENT_MAX 31
281
282#define INVALID_CTX_ID 0xffffffff
283
19f81df2
RB
284/* On Gen8+ automatically triggered OA reports include a 'reason' field... */
285#define OAREPORT_REASON_MASK 0x3f
00a7f0d7 286#define OAREPORT_REASON_MASK_EXTENDED 0x7f
19f81df2
RB
287#define OAREPORT_REASON_SHIFT 19
288#define OAREPORT_REASON_TIMER (1<<0)
289#define OAREPORT_REASON_CTX_SWITCH (1<<3)
290#define OAREPORT_REASON_CLK_RATIO (1<<5)
291
2d9da585 292#define HAS_MI_SET_PREDICATE(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
d7965152 293
00319ba0
RB
294/* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
295 *
155e941f
RB
296 * The highest sampling frequency we can theoretically program the OA unit
297 * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell.
298 *
299 * Initialized just before we register the sysctl parameter.
00319ba0 300 */
155e941f 301static int oa_sample_rate_hard_limit;
00319ba0
RB
302
303/* Theoretically we can program the OA unit to sample every 160ns but don't
304 * allow that by default unless root...
305 *
306 * The default threshold of 100000Hz is based on perf's similar
307 * kernel.perf_event_max_sample_rate sysctl parameter.
308 */
309static u32 i915_oa_max_sample_rate = 100000;
310
d7965152
RB
311/* XXX: beware if future OA HW adds new report formats that the current
312 * code assumes all reports have a power-of-two size and ~(size - 1) can
313 * be used as a mask to align the OA tail pointer.
314 */
0f15c5b0 315static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
d7965152
RB
316 [I915_OA_FORMAT_A13] = { 0, 64 },
317 [I915_OA_FORMAT_A29] = { 1, 128 },
318 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
319 /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
320 [I915_OA_FORMAT_B4_C8] = { 4, 64 },
321 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
322 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
323 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
19f81df2
RB
324 [I915_OA_FORMAT_A12] = { 0, 64 },
325 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
326 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
81d5f7d9
UNR
327 [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
328 [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 },
00a7f0d7
LL
329};
330
d7965152 331#define SAMPLE_OA_REPORT (1<<0)
eec688e1 332
16d98b31
RB
333/**
334 * struct perf_open_properties - for validated properties given to open a stream
335 * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
336 * @single_context: Whether a single or all gpu contexts should be monitored
9cd20ef7
LL
337 * @hold_preemption: Whether the preemption is disabled for the filtered
338 * context
16d98b31
RB
339 * @ctx_handle: A gem ctx handle for use with @single_context
340 * @metrics_set: An ID for an OA unit metric set advertised via sysfs
341 * @oa_format: An OA unit HW report format
342 * @oa_periodic: Whether to enable periodic OA unit sampling
343 * @oa_period_exponent: The OA unit sampling period is derived from this
9a61363a 344 * @engine: The engine (typically rcs0) being monitored by the OA unit
11ecbddd
LL
345 * @has_sseu: Whether @sseu was specified by userspace
346 * @sseu: internal SSEU configuration computed either from the userspace
347 * specified configuration in the opening parameters or a default value
348 * (see get_default_sseu_config())
4ef10fe0
LL
349 * @poll_oa_period: The period in nanoseconds at which the CPU will check for OA
350 * data availability
16d98b31
RB
351 *
352 * As read_properties_unlocked() enumerates and validates the properties given
353 * to open a stream of metrics the configuration is built up in the structure
354 * which starts out zero initialized.
355 */
eec688e1
RB
356struct perf_open_properties {
357 u32 sample_flags;
358
359 u64 single_context:1;
9cd20ef7 360 u64 hold_preemption:1;
eec688e1 361 u64 ctx_handle;
d7965152
RB
362
363 /* OA sampling state */
364 int metrics_set;
365 int oa_format;
366 bool oa_periodic;
367 int oa_period_exponent;
9a61363a
LL
368
369 struct intel_engine_cs *engine;
11ecbddd
LL
370
371 bool has_sseu;
372 struct intel_sseu sseu;
4ef10fe0
LL
373
374 u64 poll_oa_period;
d7965152
RB
375};
376
6a45008a
LL
377struct i915_oa_config_bo {
378 struct llist_node node;
379
380 struct i915_oa_config *oa_config;
381 struct i915_vma *vma;
382};
383
3dc716fd
VSD
384static struct ctl_table_header *sysctl_header;
385
a37f08a8
UNR
386static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
387
6a45008a 388void i915_oa_config_release(struct kref *ref)
f89823c2 389{
6a45008a
LL
390 struct i915_oa_config *oa_config =
391 container_of(ref, typeof(*oa_config), ref);
392
c2fba936
CW
393 kfree(oa_config->flex_regs);
394 kfree(oa_config->b_counter_regs);
395 kfree(oa_config->mux_regs);
f89823c2 396
6a45008a 397 kfree_rcu(oa_config, rcu);
f89823c2
LL
398}
399
6a45008a
LL
400struct i915_oa_config *
401i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
f89823c2 402{
6a45008a 403 struct i915_oa_config *oa_config;
f89823c2 404
6a45008a 405 rcu_read_lock();
9aba9c18 406 oa_config = idr_find(&perf->metrics_idr, metrics_set);
6a45008a
LL
407 if (oa_config)
408 oa_config = i915_oa_config_get(oa_config);
409 rcu_read_unlock();
f89823c2 410
6a45008a
LL
411 return oa_config;
412}
f89823c2 413
6a45008a
LL
414static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
415{
416 i915_oa_config_put(oa_bo->oa_config);
417 i915_vma_put(oa_bo->vma);
418 kfree(oa_bo);
f89823c2
LL
419}
420
00a7f0d7
LL
421static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
422{
423 struct intel_uncore *uncore = stream->uncore;
424
425 return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
426 GEN12_OAG_OATAILPTR_MASK;
427}
428
a37f08a8 429static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
19f81df2 430{
52111c46 431 struct intel_uncore *uncore = stream->uncore;
a37f08a8 432
8f8b1171 433 return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
19f81df2
RB
434}
435
a37f08a8 436static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
19f81df2 437{
52111c46 438 struct intel_uncore *uncore = stream->uncore;
8f8b1171 439 u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
19f81df2
RB
440
441 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
442}
443
dbc9a5fb
UNR
444#define oa_report_header_64bit(__s) \
445 ((__s)->oa_buffer.format->header == HDR_64_BIT)
446
447static u64 oa_report_id(struct i915_perf_stream *stream, void *report)
448{
449 return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
450}
451
452static u64 oa_report_reason(struct i915_perf_stream *stream, void *report)
453{
454 return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) &
455 (GRAPHICS_VER(stream->perf->i915) == 12 ?
456 OAREPORT_REASON_MASK_EXTENDED :
457 OAREPORT_REASON_MASK);
458}
459
460static void oa_report_id_clear(struct i915_perf_stream *stream, u32 *report)
461{
462 if (oa_report_header_64bit(stream))
463 *(u64 *)report = 0;
464 else
465 *report = 0;
466}
467
468static bool oa_report_ctx_invalid(struct i915_perf_stream *stream, void *report)
469{
470 return !(oa_report_id(stream, report) &
471 stream->perf->gen8_valid_ctx_bit) &&
472 GRAPHICS_VER(stream->perf->i915) <= 11;
473}
474
475static u64 oa_timestamp(struct i915_perf_stream *stream, void *report)
476{
477 return oa_report_header_64bit(stream) ?
478 *((u64 *)report + 1) :
479 *((u32 *)report + 1);
480}
481
482static void oa_timestamp_clear(struct i915_perf_stream *stream, u32 *report)
483{
484 if (oa_report_header_64bit(stream))
485 *(u64 *)&report[2] = 0;
486 else
487 report[1] = 0;
488}
489
490static u32 oa_context_id(struct i915_perf_stream *stream, u32 *report)
491{
492 u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2];
493
494 return ctx_id & stream->specific_ctx_id_mask;
495}
496
497static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
498{
499 if (oa_report_header_64bit(stream))
500 report[4] = INVALID_CTX_ID;
501 else
502 report[2] = INVALID_CTX_ID;
503}
504
0dd860cf 505/**
19f81df2 506 * oa_buffer_check_unlocked - check for data and update tail ptr state
a37f08a8 507 * @stream: i915 stream instance
d7965152 508 *
0dd860cf
RB
509 * This is either called via fops (for blocking reads in user ctx) or the poll
510 * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
511 * if there is data available for userspace to read.
d7965152 512 *
0dd860cf
RB
513 * This function is central to providing a workaround for the OA unit tail
514 * pointer having a race with respect to what data is visible to the CPU.
515 * It is responsible for reading tail pointers from the hardware and giving
516 * the pointers time to 'age' before they are made available for reading.
517 * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
518 *
519 * Besides returning true when there is data available to read() this function
d1df41eb
LL
520 * also updates the tail, aging_tail and aging_timestamp in the oa_buffer
521 * object.
0dd860cf
RB
522 *
523 * Note: It's safe to read OA config state here unlocked, assuming that this is
524 * only called while the stream is enabled, while the global OA configuration
525 * can't be modified.
526 *
527 * Returns: %true if the OA buffer contains data, else %false
d7965152 528 */
a37f08a8 529static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
d7965152 530{
d1df41eb 531 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
90981da6 532 int report_size = stream->oa_buffer.format->size;
0dd860cf 533 unsigned long flags;
d16e137e 534 bool pollin;
d1df41eb 535 u32 hw_tail;
0dd860cf 536 u64 now;
3c67ce06 537 u32 partial_report_size;
0dd860cf
RB
538
539 /* We have to consider the (unlikely) possibility that read() errors
d1df41eb
LL
540 * could result in an OA buffer reset which might reset the head and
541 * tail state.
0dd860cf 542 */
a37f08a8 543 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
0dd860cf 544
8f8b1171 545 hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
0dd860cf 546
3c67ce06
UNR
547 /* The tail pointer increases in 64 byte increments, not in report_size
548 * steps. Also the report size may not be a power of 2. Compute
549 * potentially partially landed report in the OA buffer
0dd860cf 550 */
3c67ce06
UNR
551 partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
552 partial_report_size %= report_size;
553
554 /* Subtract partial amount off the tail */
555 hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
0dd860cf
RB
556
557 now = ktime_get_mono_fast_ns();
558
d1df41eb
LL
559 if (hw_tail == stream->oa_buffer.aging_tail &&
560 (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
561 /* If the HW tail hasn't move since the last check and the HW
562 * tail has been aging for long enough, declare it the new
563 * tail.
564 */
565 stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
566 } else {
567 u32 head, tail, aged_tail;
4117ebc7 568
d1df41eb
LL
569 /* NB: The head we observe here might effectively be a little
570 * out of date. If a read() is in progress, the head could be
571 * anywhere between this head and stream->oa_buffer.tail.
572 */
573 head = stream->oa_buffer.head - gtt_offset;
574 aged_tail = stream->oa_buffer.tail - gtt_offset;
575
576 hw_tail -= gtt_offset;
577 tail = hw_tail;
578
dbc9a5fb
UNR
579 /* Walk the stream backward until we find a report with report
580 * id and timestmap not at 0. Since the circular buffer pointers
581 * progress by increments of 64 bytes and that reports can be up
582 * to 256 bytes long, we can't tell whether a report has fully
583 * landed in memory before the report id and timestamp of the
584 * following report have effectively landed.
d1df41eb
LL
585 *
586 * This is assuming that the writes of the OA unit land in
587 * memory in the order they were written to.
588 * If not : (╯°□°)╯︵ ┻━┻
589 */
590 while (OA_TAKEN(tail, aged_tail) >= report_size) {
dbc9a5fb 591 void *report = stream->oa_buffer.vaddr + tail;
4117ebc7 592
dbc9a5fb
UNR
593 if (oa_report_id(stream, report) ||
594 oa_timestamp(stream, report))
d1df41eb 595 break;
4117ebc7 596
d1df41eb 597 tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
0dd860cf 598 }
d1df41eb
LL
599
600 if (OA_TAKEN(hw_tail, tail) > report_size &&
601 __ratelimit(&stream->perf->tail_pointer_race))
a10234fd
TU
602 drm_notice(&stream->uncore->i915->drm,
603 "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
604 head, tail, hw_tail);
d1df41eb
LL
605
606 stream->oa_buffer.tail = gtt_offset + tail;
607 stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
608 stream->oa_buffer.aging_timestamp = now;
0dd860cf
RB
609 }
610
d16e137e
LL
611 pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
612 stream->oa_buffer.head - gtt_offset) >= report_size;
613
a37f08a8 614 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
0dd860cf 615
d16e137e 616 return pollin;
d7965152
RB
617}
618
619/**
16d98b31
RB
620 * append_oa_status - Appends a status record to a userspace read() buffer.
621 * @stream: An i915-perf stream opened for OA metrics
622 * @buf: destination buffer given by userspace
623 * @count: the number of bytes userspace wants to read
624 * @offset: (inout): the current position for writing into @buf
625 * @type: The kind of status to report to userspace
626 *
627 * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`)
628 * into the userspace read() buffer.
629 *
630 * The @buf @offset will only be updated on success.
631 *
632 * Returns: 0 on success, negative error code on failure.
d7965152
RB
633 */
634static int append_oa_status(struct i915_perf_stream *stream,
635 char __user *buf,
636 size_t count,
637 size_t *offset,
638 enum drm_i915_perf_record_type type)
639{
640 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
641
642 if ((count - *offset) < header.size)
643 return -ENOSPC;
644
645 if (copy_to_user(buf + *offset, &header, sizeof(header)))
646 return -EFAULT;
647
648 (*offset) += header.size;
649
650 return 0;
651}
652
653/**
16d98b31
RB
654 * append_oa_sample - Copies single OA report into userspace read() buffer.
655 * @stream: An i915-perf stream opened for OA metrics
656 * @buf: destination buffer given by userspace
657 * @count: the number of bytes userspace wants to read
658 * @offset: (inout): the current position for writing into @buf
659 * @report: A single OA report to (optionally) include as part of the sample
660 *
661 * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*`
662 * properties when opening a stream, tracked as `stream->sample_flags`. This
663 * function copies the requested components of a single sample to the given
664 * read() @buf.
665 *
666 * The @buf @offset will only be updated on success.
667 *
668 * Returns: 0 on success, negative error code on failure.
d7965152
RB
669 */
670static int append_oa_sample(struct i915_perf_stream *stream,
671 char __user *buf,
672 size_t count,
673 size_t *offset,
674 const u8 *report)
675{
90981da6 676 int report_size = stream->oa_buffer.format->size;
d7965152 677 struct drm_i915_perf_record_header header;
3c67ce06
UNR
678 int report_size_partial;
679 u8 *oa_buf_end;
d7965152
RB
680
681 header.type = DRM_I915_PERF_RECORD_SAMPLE;
682 header.pad = 0;
683 header.size = stream->sample_size;
684
685 if ((count - *offset) < header.size)
686 return -ENOSPC;
687
688 buf += *offset;
689 if (copy_to_user(buf, &header, sizeof(header)))
690 return -EFAULT;
691 buf += sizeof(header);
692
3c67ce06
UNR
693 oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE;
694 report_size_partial = oa_buf_end - report;
695
696 if (report_size_partial < report_size) {
697 if (copy_to_user(buf, report, report_size_partial))
698 return -EFAULT;
699 buf += report_size_partial;
700
701 if (copy_to_user(buf, stream->oa_buffer.vaddr,
702 report_size - report_size_partial))
703 return -EFAULT;
704 } else if (copy_to_user(buf, report, report_size)) {
be0bdd67 705 return -EFAULT;
3c67ce06 706 }
d7965152
RB
707
708 (*offset) += header.size;
709
710 return 0;
711}
712
19f81df2 713/**
e9d2871f
MCC
714 * gen8_append_oa_reports - Copies all buffered OA reports into
715 * userspace read() buffer.
19f81df2
RB
716 * @stream: An i915-perf stream opened for OA metrics
717 * @buf: destination buffer given by userspace
718 * @count: the number of bytes userspace wants to read
719 * @offset: (inout): the current position for writing into @buf
720 *
721 * Notably any error condition resulting in a short read (-%ENOSPC or
722 * -%EFAULT) will be returned even though one or more records may
723 * have been successfully copied. In this case it's up to the caller
724 * to decide if the error should be squashed before returning to
725 * userspace.
726 *
727 * Note: reports are consumed from the head, and appended to the
728 * tail, so the tail chases the head?... If you think that's mad
729 * and back-to-front you're not alone, but this follows the
730 * Gen PRM naming convention.
731 *
732 * Returns: 0 on success, negative error code on failure.
733 */
734static int gen8_append_oa_reports(struct i915_perf_stream *stream,
735 char __user *buf,
736 size_t count,
737 size_t *offset)
738{
52111c46 739 struct intel_uncore *uncore = stream->uncore;
90981da6 740 int report_size = stream->oa_buffer.format->size;
a37f08a8
UNR
741 u8 *oa_buf_base = stream->oa_buffer.vaddr;
742 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
fe841686 743 u32 mask = (OA_BUFFER_SIZE - 1);
19f81df2
RB
744 size_t start_offset = *offset;
745 unsigned long flags;
19f81df2 746 u32 head, tail;
19f81df2
RB
747 int ret = 0;
748
a9f236d1 749 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
19f81df2
RB
750 return -EIO;
751
a37f08a8 752 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
19f81df2 753
a37f08a8 754 head = stream->oa_buffer.head;
d1df41eb 755 tail = stream->oa_buffer.tail;
19f81df2 756
a37f08a8 757 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
19f81df2 758
19f81df2
RB
759 /*
760 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
761 * while indexing relative to oa_buf_base.
762 */
763 head -= gtt_offset;
764 tail -= gtt_offset;
765
766 /*
767 * An out of bounds or misaligned head or tail pointer implies a driver
768 * bug since we validate + align the tail pointers we read from the
769 * hardware and we are in full control of the head pointer which should
3c67ce06 770 * only be incremented by multiples of the report size.
19f81df2 771 */
a9f236d1 772 if (drm_WARN_ONCE(&uncore->i915->drm,
3c67ce06
UNR
773 head > OA_BUFFER_SIZE ||
774 tail > OA_BUFFER_SIZE,
a9f236d1
PB
775 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
776 head, tail))
19f81df2
RB
777 return -EIO;
778
779
780 for (/* none */;
f1d8e2bf 781 OA_TAKEN(tail, head);
19f81df2
RB
782 head = (head + report_size) & mask) {
783 u8 *report = oa_buf_base + head;
784 u32 *report32 = (void *)report;
785 u32 ctx_id;
dbc9a5fb 786 u64 reason;
19f81df2 787
19f81df2
RB
788 /*
789 * The reason field includes flags identifying what
790 * triggered this specific report (mostly timer
791 * triggered or e.g. due to a context switch).
792 *
dbc9a5fb
UNR
793 * In MMIO triggered reports, some platforms do not set the
794 * reason bit in this field and it is valid to have a reason
795 * field of zero.
19f81df2 796 */
dbc9a5fb
UNR
797 reason = oa_report_reason(stream, report);
798 ctx_id = oa_context_id(stream, report32);
19f81df2
RB
799
800 /*
801 * Squash whatever is in the CTX_ID field if it's marked as
802 * invalid to be sure we avoid false-positive, single-context
803 * filtering below...
804 *
805 * Note: that we don't clear the valid_ctx_bit so userspace can
806 * understand that the ID has been squashed by the kernel.
807 */
dbc9a5fb
UNR
808 if (oa_report_ctx_invalid(stream, report)) {
809 ctx_id = INVALID_CTX_ID;
810 oa_context_id_squash(stream, report32);
811 }
19f81df2
RB
812
813 /*
814 * NB: For Gen 8 the OA unit no longer supports clock gating
815 * off for a specific context and the kernel can't securely
816 * stop the counters from updating as system-wide / global
817 * values.
818 *
819 * Automatic reports now include a context ID so reports can be
820 * filtered on the cpu but it's not worth trying to
821 * automatically subtract/hide counter progress for other
822 * contexts while filtering since we can't stop userspace
823 * issuing MI_REPORT_PERF_COUNT commands which would still
824 * provide a side-band view of the real values.
825 *
826 * To allow userspace (such as Mesa/GL_INTEL_performance_query)
827 * to normalize counters for a single filtered context then it
828 * needs be forwarded bookend context-switch reports so that it
829 * can track switches in between MI_REPORT_PERF_COUNT commands
830 * and can itself subtract/ignore the progress of counters
831 * associated with other contexts. Note that the hardware
832 * automatically triggers reports when switching to a new
833 * context which are tagged with the ID of the newly active
834 * context. To avoid the complexity (and likely fragility) of
835 * reading ahead while parsing reports to try and minimize
836 * forwarding redundant context switch reports (i.e. between
837 * other, unrelated contexts) we simply elect to forward them
838 * all.
839 *
840 * We don't rely solely on the reason field to identify context
841 * switches since it's not-uncommon for periodic samples to
842 * identify a switch before any 'context switch' report.
843 */
a5a6d92f 844 if (!stream->ctx ||
a37f08a8
UNR
845 stream->specific_ctx_id == ctx_id ||
846 stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
19f81df2
RB
847 reason & OAREPORT_REASON_CTX_SWITCH) {
848
849 /*
850 * While filtering for a single context we avoid
851 * leaking the IDs of other contexts.
852 */
a5a6d92f 853 if (stream->ctx &&
a37f08a8 854 stream->specific_ctx_id != ctx_id) {
dbc9a5fb 855 oa_context_id_squash(stream, report32);
19f81df2
RB
856 }
857
858 ret = append_oa_sample(stream, buf, count, offset,
859 report);
860 if (ret)
861 break;
862
a37f08a8 863 stream->oa_buffer.last_ctx_id = ctx_id;
19f81df2
RB
864 }
865
866 /*
dbc9a5fb 867 * Clear out the report id and timestamp as a means to detect unlanded
d1df41eb 868 * reports.
19f81df2 869 */
dbc9a5fb
UNR
870 oa_report_id_clear(stream, report32);
871 oa_timestamp_clear(stream, report32);
19f81df2
RB
872 }
873
874 if (start_offset != *offset) {
00a7f0d7
LL
875 i915_reg_t oaheadptr;
876
651e7d48 877 oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
00a7f0d7
LL
878 GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
879
a37f08a8 880 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
19f81df2
RB
881
882 /*
883 * We removed the gtt_offset for the copy loop above, indexing
884 * relative to oa_buf_base so put back here...
885 */
886 head += gtt_offset;
00a7f0d7
LL
887 intel_uncore_write(uncore, oaheadptr,
888 head & GEN12_OAG_OAHEADPTR_MASK);
a37f08a8 889 stream->oa_buffer.head = head;
19f81df2 890
a37f08a8 891 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
19f81df2
RB
892 }
893
894 return ret;
895}
896
897/**
898 * gen8_oa_read - copy status records then buffered OA reports
899 * @stream: An i915-perf stream opened for OA metrics
900 * @buf: destination buffer given by userspace
901 * @count: the number of bytes userspace wants to read
902 * @offset: (inout): the current position for writing into @buf
903 *
904 * Checks OA unit status registers and if necessary appends corresponding
905 * status records for userspace (such as for a buffer full condition) and then
906 * initiate appending any buffered OA reports.
907 *
908 * Updates @offset according to the number of bytes successfully copied into
909 * the userspace buffer.
910 *
911 * NB: some data may be successfully copied to the userspace buffer
912 * even if an error is returned, and this is reflected in the
913 * updated @offset.
914 *
915 * Returns: zero on success or a negative error code
916 */
917static int gen8_oa_read(struct i915_perf_stream *stream,
918 char __user *buf,
919 size_t count,
920 size_t *offset)
921{
52111c46 922 struct intel_uncore *uncore = stream->uncore;
19f81df2 923 u32 oastatus;
00a7f0d7 924 i915_reg_t oastatus_reg;
19f81df2
RB
925 int ret;
926
a9f236d1 927 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr))
19f81df2
RB
928 return -EIO;
929
651e7d48 930 oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
00a7f0d7
LL
931 GEN12_OAG_OASTATUS : GEN8_OASTATUS;
932
933 oastatus = intel_uncore_read(uncore, oastatus_reg);
19f81df2
RB
934
935 /*
936 * We treat OABUFFER_OVERFLOW as a significant error:
937 *
938 * Although theoretically we could handle this more gracefully
939 * sometimes, some Gens don't correctly suppress certain
940 * automatically triggered reports in this condition and so we
941 * have to assume that old reports are now being trampled
942 * over.
fe841686
JL
943 *
944 * Considering how we don't currently give userspace control
945 * over the OA buffer size and always configure a large 16MB
946 * buffer, then a buffer overflow does anyway likely indicate
947 * that something has gone quite badly wrong.
19f81df2
RB
948 */
949 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
950 ret = append_oa_status(stream, buf, count, offset,
951 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
952 if (ret)
953 return ret;
954
2fec5391
UNR
955 drm_dbg(&stream->perf->i915->drm,
956 "OA buffer overflow (exponent = %d): force restart\n",
957 stream->period_exponent);
19f81df2 958
8f8b1171
CW
959 stream->perf->ops.oa_disable(stream);
960 stream->perf->ops.oa_enable(stream);
19f81df2
RB
961
962 /*
963 * Note: .oa_enable() is expected to re-init the oabuffer and
964 * reset GEN8_OASTATUS for us
965 */
00a7f0d7 966 oastatus = intel_uncore_read(uncore, oastatus_reg);
19f81df2
RB
967 }
968
969 if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
970 ret = append_oa_status(stream, buf, count, offset,
971 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
972 if (ret)
973 return ret;
059a0beb
LL
974
975 intel_uncore_rmw(uncore, oastatus_reg,
976 GEN8_OASTATUS_COUNTER_OVERFLOW |
977 GEN8_OASTATUS_REPORT_LOST,
651e7d48 978 IS_GRAPHICS_VER(uncore->i915, 8, 11) ?
059a0beb
LL
979 (GEN8_OASTATUS_HEAD_POINTER_WRAP |
980 GEN8_OASTATUS_TAIL_POINTER_WRAP) : 0);
19f81df2
RB
981 }
982
983 return gen8_append_oa_reports(stream, buf, count, offset);
984}
985
d7965152 986/**
e9d2871f
MCC
987 * gen7_append_oa_reports - Copies all buffered OA reports into
988 * userspace read() buffer.
d7965152
RB
989 * @stream: An i915-perf stream opened for OA metrics
990 * @buf: destination buffer given by userspace
991 * @count: the number of bytes userspace wants to read
992 * @offset: (inout): the current position for writing into @buf
d7965152 993 *
16d98b31
RB
994 * Notably any error condition resulting in a short read (-%ENOSPC or
995 * -%EFAULT) will be returned even though one or more records may
d7965152
RB
996 * have been successfully copied. In this case it's up to the caller
997 * to decide if the error should be squashed before returning to
998 * userspace.
999 *
1000 * Note: reports are consumed from the head, and appended to the
e81b3a55 1001 * tail, so the tail chases the head?... If you think that's mad
d7965152
RB
1002 * and back-to-front you're not alone, but this follows the
1003 * Gen PRM naming convention.
16d98b31
RB
1004 *
1005 * Returns: 0 on success, negative error code on failure.
d7965152
RB
1006 */
1007static int gen7_append_oa_reports(struct i915_perf_stream *stream,
1008 char __user *buf,
1009 size_t count,
3bb335c1 1010 size_t *offset)
d7965152 1011{
52111c46 1012 struct intel_uncore *uncore = stream->uncore;
90981da6 1013 int report_size = stream->oa_buffer.format->size;
a37f08a8
UNR
1014 u8 *oa_buf_base = stream->oa_buffer.vaddr;
1015 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
fe841686 1016 u32 mask = (OA_BUFFER_SIZE - 1);
3bb335c1 1017 size_t start_offset = *offset;
0dd860cf 1018 unsigned long flags;
0dd860cf 1019 u32 head, tail;
d7965152
RB
1020 int ret = 0;
1021
a9f236d1 1022 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
d7965152
RB
1023 return -EIO;
1024
a37f08a8 1025 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
f279020a 1026
a37f08a8 1027 head = stream->oa_buffer.head;
d1df41eb 1028 tail = stream->oa_buffer.tail;
f279020a 1029
a37f08a8 1030 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
d7965152 1031
0dd860cf
RB
1032 /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
1033 * while indexing relative to oa_buf_base.
d7965152 1034 */
0dd860cf
RB
1035 head -= gtt_offset;
1036 tail -= gtt_offset;
d7965152 1037
0dd860cf
RB
1038 /* An out of bounds or misaligned head or tail pointer implies a driver
1039 * bug since we validate + align the tail pointers we read from the
1040 * hardware and we are in full control of the head pointer which should
1041 * only be incremented by multiples of the report size (notably also
1042 * all a power of two).
d7965152 1043 */
a9f236d1
PB
1044 if (drm_WARN_ONCE(&uncore->i915->drm,
1045 head > OA_BUFFER_SIZE || head % report_size ||
1046 tail > OA_BUFFER_SIZE || tail % report_size,
1047 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
1048 head, tail))
0dd860cf 1049 return -EIO;
d7965152 1050
d7965152
RB
1051
1052 for (/* none */;
f1d8e2bf 1053 OA_TAKEN(tail, head);
d7965152
RB
1054 head = (head + report_size) & mask) {
1055 u8 *report = oa_buf_base + head;
1056 u32 *report32 = (void *)report;
1057
1058 /* All the report sizes factor neatly into the buffer
1059 * size so we never expect to see a report split
1060 * between the beginning and end of the buffer.
1061 *
1062 * Given the initial alignment check a misalignment
1063 * here would imply a driver bug that would result
1064 * in an overrun.
1065 */
a9f236d1
PB
1066 if (drm_WARN_ON(&uncore->i915->drm,
1067 (OA_BUFFER_SIZE - head) < report_size)) {
0bf85735
WK
1068 drm_err(&uncore->i915->drm,
1069 "Spurious OA head ptr: non-integral report offset\n");
d7965152
RB
1070 break;
1071 }
1072
1073 /* The report-ID field for periodic samples includes
1074 * some undocumented flags related to what triggered
1075 * the report and is never expected to be zero so we
1076 * can check that the report isn't invalid before
1077 * copying it to userspace...
1078 */
1079 if (report32[0] == 0) {
8f8b1171 1080 if (__ratelimit(&stream->perf->spurious_report_rs))
a10234fd
TU
1081 drm_notice(&uncore->i915->drm,
1082 "Skipping spurious, invalid OA report\n");
d7965152
RB
1083 continue;
1084 }
1085
1086 ret = append_oa_sample(stream, buf, count, offset, report);
1087 if (ret)
1088 break;
1089
d1df41eb
LL
1090 /* Clear out the first 2 dwords as a mean to detect unlanded
1091 * reports.
d7965152
RB
1092 */
1093 report32[0] = 0;
d1df41eb 1094 report32[1] = 0;
d7965152
RB
1095 }
1096
3bb335c1 1097 if (start_offset != *offset) {
a37f08a8 1098 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
0dd860cf 1099
3bb335c1
RB
1100 /* We removed the gtt_offset for the copy loop above, indexing
1101 * relative to oa_buf_base so put back here...
1102 */
1103 head += gtt_offset;
1104
8f8b1171
CW
1105 intel_uncore_write(uncore, GEN7_OASTATUS2,
1106 (head & GEN7_OASTATUS2_HEAD_MASK) |
1107 GEN7_OASTATUS2_MEM_SELECT_GGTT);
a37f08a8 1108 stream->oa_buffer.head = head;
0dd860cf 1109
a37f08a8 1110 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
3bb335c1 1111 }
d7965152
RB
1112
1113 return ret;
1114}
1115
16d98b31
RB
1116/**
1117 * gen7_oa_read - copy status records then buffered OA reports
1118 * @stream: An i915-perf stream opened for OA metrics
1119 * @buf: destination buffer given by userspace
1120 * @count: the number of bytes userspace wants to read
1121 * @offset: (inout): the current position for writing into @buf
1122 *
1123 * Checks Gen 7 specific OA unit status registers and if necessary appends
1124 * corresponding status records for userspace (such as for a buffer full
1125 * condition) and then initiate appending any buffered OA reports.
1126 *
1127 * Updates @offset according to the number of bytes successfully copied into
1128 * the userspace buffer.
1129 *
1130 * Returns: zero on success or a negative error code
1131 */
d7965152
RB
1132static int gen7_oa_read(struct i915_perf_stream *stream,
1133 char __user *buf,
1134 size_t count,
1135 size_t *offset)
1136{
52111c46 1137 struct intel_uncore *uncore = stream->uncore;
d7965152 1138 u32 oastatus1;
d7965152
RB
1139 int ret;
1140
a9f236d1 1141 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr))
d7965152
RB
1142 return -EIO;
1143
8f8b1171 1144 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
d7965152 1145
d7965152
RB
1146 /* XXX: On Haswell we don't have a safe way to clear oastatus1
1147 * bits while the OA unit is enabled (while the tail pointer
1148 * may be updated asynchronously) so we ignore status bits
1149 * that have already been reported to userspace.
1150 */
8f8b1171 1151 oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
d7965152
RB
1152
1153 /* We treat OABUFFER_OVERFLOW as a significant error:
1154 *
1155 * - The status can be interpreted to mean that the buffer is
1156 * currently full (with a higher precedence than OA_TAKEN()
1157 * which will start to report a near-empty buffer after an
1158 * overflow) but it's awkward that we can't clear the status
1159 * on Haswell, so without a reset we won't be able to catch
1160 * the state again.
1161 *
1162 * - Since it also implies the HW has started overwriting old
1163 * reports it may also affect our sanity checks for invalid
1164 * reports when copying to userspace that assume new reports
1165 * are being written to cleared memory.
1166 *
1167 * - In the future we may want to introduce a flight recorder
1168 * mode where the driver will automatically maintain a safe
1169 * guard band between head/tail, avoiding this overflow
1170 * condition, but we avoid the added driver complexity for
1171 * now.
1172 */
1173 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
1174 ret = append_oa_status(stream, buf, count, offset,
1175 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1176 if (ret)
1177 return ret;
1178
2fec5391
UNR
1179 drm_dbg(&stream->perf->i915->drm,
1180 "OA buffer overflow (exponent = %d): force restart\n",
1181 stream->period_exponent);
d7965152 1182
8f8b1171
CW
1183 stream->perf->ops.oa_disable(stream);
1184 stream->perf->ops.oa_enable(stream);
d7965152 1185
8f8b1171 1186 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
d7965152
RB
1187 }
1188
1189 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
1190 ret = append_oa_status(stream, buf, count, offset,
1191 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1192 if (ret)
1193 return ret;
8f8b1171 1194 stream->perf->gen7_latched_oastatus1 |=
d7965152
RB
1195 GEN7_OASTATUS1_REPORT_LOST;
1196 }
1197
3bb335c1 1198 return gen7_append_oa_reports(stream, buf, count, offset);
d7965152
RB
1199}
1200
16d98b31
RB
1201/**
1202 * i915_oa_wait_unlocked - handles blocking IO until OA data available
1203 * @stream: An i915-perf stream opened for OA metrics
1204 *
1205 * Called when userspace tries to read() from a blocking stream FD opened
1206 * for OA metrics. It waits until the hrtimer callback finds a non-empty
1207 * OA buffer and wakes us.
1208 *
1209 * Note: it's acceptable to have this return with some false positives
1210 * since any subsequent read handling will return -EAGAIN if there isn't
1211 * really data ready for userspace yet.
1212 *
1213 * Returns: zero on success or a negative error code
1214 */
d7965152
RB
1215static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1216{
d7965152 1217 /* We would wait indefinitely if periodic sampling is not enabled */
a37f08a8 1218 if (!stream->periodic)
d7965152
RB
1219 return -EIO;
1220
a37f08a8
UNR
1221 return wait_event_interruptible(stream->poll_wq,
1222 oa_buffer_check_unlocked(stream));
d7965152
RB
1223}
1224
16d98b31
RB
1225/**
1226 * i915_oa_poll_wait - call poll_wait() for an OA stream poll()
1227 * @stream: An i915-perf stream opened for OA metrics
1228 * @file: An i915 perf stream file
1229 * @wait: poll() state table
1230 *
1231 * For handling userspace polling on an i915 perf stream opened for OA metrics,
1232 * this starts a poll_wait with the wait queue that our hrtimer callback wakes
1233 * when it sees data ready to read in the circular OA buffer.
1234 */
d7965152
RB
1235static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1236 struct file *file,
1237 poll_table *wait)
1238{
a37f08a8 1239 poll_wait(file, &stream->poll_wq, wait);
d7965152
RB
1240}
1241
16d98b31
RB
1242/**
1243 * i915_oa_read - just calls through to &i915_oa_ops->read
1244 * @stream: An i915-perf stream opened for OA metrics
1245 * @buf: destination buffer given by userspace
1246 * @count: the number of bytes userspace wants to read
1247 * @offset: (inout): the current position for writing into @buf
1248 *
1249 * Updates @offset according to the number of bytes successfully copied into
1250 * the userspace buffer.
1251 *
1252 * Returns: zero on success or a negative error code
1253 */
d7965152
RB
1254static int i915_oa_read(struct i915_perf_stream *stream,
1255 char __user *buf,
1256 size_t count,
1257 size_t *offset)
1258{
8f8b1171 1259 return stream->perf->ops.read(stream, buf, count, offset);
d7965152
RB
1260}
1261
a37f08a8 1262static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
61d5676b 1263{
5e2a0419 1264 struct i915_gem_engines_iter it;
a37f08a8 1265 struct i915_gem_context *ctx = stream->ctx;
61d5676b 1266 struct intel_context *ce;
f00ecc2e
ML
1267 struct i915_gem_ww_ctx ww;
1268 int err = -ENODEV;
61d5676b 1269
5e2a0419 1270 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
9a61363a 1271 if (ce->engine != stream->engine) /* first match! */
5e2a0419
CW
1272 continue;
1273
f00ecc2e
ML
1274 err = 0;
1275 break;
fa9f6681 1276 }
5e2a0419 1277 i915_gem_context_unlock_engines(ctx);
61d5676b 1278
f00ecc2e
ML
1279 if (err)
1280 return ERR_PTR(err);
1281
1282 i915_gem_ww_ctx_init(&ww, true);
1283retry:
1284 /*
1285 * As the ID is the gtt offset of the context's vma we
1286 * pin the vma to ensure the ID remains fixed.
1287 */
1288 err = intel_context_pin_ww(ce, &ww);
1289 if (err == -EDEADLK) {
1290 err = i915_gem_ww_ctx_backoff(&ww);
1291 if (!err)
1292 goto retry;
1293 }
1294 i915_gem_ww_ctx_fini(&ww);
1295
1296 if (err)
1297 return ERR_PTR(err);
1298
1299 stream->pinned_ctx = ce;
a37f08a8 1300 return stream->pinned_ctx;
61d5676b
LL
1301}
1302
682aa437
UNR
1303static int
1304__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
1305{
1306 u32 *cs, cmd;
1307
1308 cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1309 if (GRAPHICS_VER(rq->engine->i915) >= 8)
1310 cmd++;
1311
1312 cs = intel_ring_begin(rq, 4);
1313 if (IS_ERR(cs))
1314 return PTR_ERR(cs);
1315
1316 *cs++ = cmd;
1317 *cs++ = i915_mmio_reg_offset(reg);
1318 *cs++ = ggtt_offset;
1319 *cs++ = 0;
1320
1321 intel_ring_advance(rq, cs);
1322
1323 return 0;
1324}
1325
1326static int
1327__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
1328{
1329 struct i915_request *rq;
1330 int err;
1331
1332 rq = i915_request_create(ce);
1333 if (IS_ERR(rq))
1334 return PTR_ERR(rq);
1335
1336 i915_request_get(rq);
1337
1338 err = __store_reg_to_mem(rq, reg, ggtt_offset);
1339
1340 i915_request_add(rq);
1341 if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
1342 err = -ETIME;
1343
1344 i915_request_put(rq);
1345
1346 return err;
1347}
1348
1349static int
1350gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
1351{
1352 struct i915_vma *scratch;
1353 u32 *val;
1354 int err;
1355
1356 scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
1357 if (IS_ERR(scratch))
1358 return PTR_ERR(scratch);
1359
1360 err = i915_vma_sync(scratch);
1361 if (err)
1362 goto err_scratch;
1363
1364 err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
1365 i915_ggtt_offset(scratch));
1366 if (err)
1367 goto err_scratch;
1368
1369 val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
1370 if (IS_ERR(val)) {
1371 err = PTR_ERR(val);
1372 goto err_scratch;
1373 }
1374
1375 *ctx_id = *val;
1376 i915_gem_object_unpin_map(scratch->obj);
1377
1378err_scratch:
1379 i915_vma_unpin_and_release(&scratch, 0);
1380 return err;
1381}
1382
1383/*
1384 * For execlist mode of submission, pick an unused context id
1385 * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
1386 * XXX_MAX_CONTEXT_HW_ID is used by idle context
1387 *
1388 * For GuC mode of submission read context id from the upper dword of the
1389 * EXECLIST_STATUS register. Note that we read this value only once and expect
1390 * that the value stays fixed for the entire OA use case. There are cases where
1391 * GuC KMD implementation may deregister a context to reuse it's context id, but
1392 * we prevent that from happening to the OA context by pinning it.
1393 */
1394static int gen12_get_render_context_id(struct i915_perf_stream *stream)
1395{
1396 u32 ctx_id, mask;
1397 int ret;
1398
1399 if (intel_engine_uses_guc(stream->engine)) {
1400 ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
1401 if (ret)
1402 return ret;
1403
1404 mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
1405 (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
1406 } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
1407 ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
1408 (XEHP_SW_CTX_ID_SHIFT - 32);
1409
1410 mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
1411 (XEHP_SW_CTX_ID_SHIFT - 32);
1412 } else {
1413 ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
1414 (GEN11_SW_CTX_ID_SHIFT - 32);
1415
1416 mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
1417 (GEN11_SW_CTX_ID_SHIFT - 32);
1418 }
1419 stream->specific_ctx_id = ctx_id & mask;
1420 stream->specific_ctx_id_mask = mask;
1421
1422 return 0;
1423}
1424
a5c3a3cb
UNR
1425static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
1426{
1427 u32 idx = *offset;
1428 u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
1429 bool found = false;
1430
1431 idx++;
1432 for (; idx < len; idx += 2) {
1433 if (state[idx] == reg) {
1434 found = true;
1435 break;
1436 }
1437 }
1438
1439 *offset = idx;
1440 return found;
1441}
1442
1443static u32 oa_context_image_offset(struct intel_context *ce, u32 reg)
1444{
1445 u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
1446 u32 *state = ce->lrc_reg_state;
1447
95c713d7
UNR
1448 if (drm_WARN_ON(&ce->engine->i915->drm, !state))
1449 return U32_MAX;
1450
a5c3a3cb
UNR
1451 for (offset = 0; offset < len; ) {
1452 if (IS_MI_LRI_CMD(state[offset])) {
1453 /*
1454 * We expect reg-value pairs in MI_LRI command, so
1455 * MI_LRI_LEN() should be even, if not, issue a warning.
1456 */
1457 drm_WARN_ON(&ce->engine->i915->drm,
1458 MI_LRI_LEN(state[offset]) & 0x1);
1459
1460 if (oa_find_reg_in_lri(state, reg, &offset, len))
1461 break;
1462 } else {
1463 offset++;
1464 }
1465 }
1466
1467 return offset < len ? offset : U32_MAX;
1468}
1469
1470static int set_oa_ctx_ctrl_offset(struct intel_context *ce)
1471{
1472 i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
1473 struct i915_perf *perf = &ce->engine->i915->perf;
1474 u32 offset = perf->ctx_oactxctrl_offset;
1475
1476 /* Do this only once. Failure is stored as offset of U32_MAX */
1477 if (offset)
1478 goto exit;
1479
1480 offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg));
1481 perf->ctx_oactxctrl_offset = offset;
1482
1483 drm_dbg(&ce->engine->i915->drm,
1484 "%s oa ctx control at 0x%08x dword offset\n",
1485 ce->engine->name, offset);
1486
1487exit:
1488 return offset && offset != U32_MAX ? 0 : -ENODEV;
1489}
1490
1491static bool engine_supports_mi_query(struct intel_engine_cs *engine)
1492{
1493 return engine->class == RENDER_CLASS;
1494}
1495
16d98b31
RB
1496/**
1497 * oa_get_render_ctx_id - determine and hold ctx hw id
1498 * @stream: An i915-perf stream opened for OA metrics
1499 *
1500 * Determine the render context hw id, and ensure it remains fixed for the
d7965152
RB
1501 * lifetime of the stream. This ensures that we don't have to worry about
1502 * updating the context ID in OACONTROL on the fly.
16d98b31
RB
1503 *
1504 * Returns: zero on success or a negative error code
d7965152
RB
1505 */
1506static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1507{
61d5676b 1508 struct intel_context *ce;
682aa437 1509 int ret = 0;
d7965152 1510
a37f08a8 1511 ce = oa_pin_context(stream);
61d5676b
LL
1512 if (IS_ERR(ce))
1513 return PTR_ERR(ce);
19f81df2 1514
95c713d7
UNR
1515 if (engine_supports_mi_query(stream->engine) &&
1516 HAS_LOGICAL_RING_CONTEXTS(stream->perf->i915)) {
a5c3a3cb
UNR
1517 /*
1518 * We are enabling perf query here. If we don't find the context
1519 * offset here, just return an error.
1520 */
1521 ret = set_oa_ctx_ctrl_offset(ce);
1522 if (ret) {
1523 intel_context_unpin(ce);
1524 drm_err(&stream->perf->i915->drm,
1525 "Enabling perf query failed for %s\n",
1526 stream->engine->name);
1527 return ret;
1528 }
1529 }
1530
651e7d48 1531 switch (GRAPHICS_VER(ce->engine->i915)) {
61d5676b 1532 case 7: {
19f81df2 1533 /*
61d5676b
LL
1534 * On Haswell we don't do any post processing of the reports
1535 * and don't need to use the mask.
19f81df2 1536 */
a37f08a8
UNR
1537 stream->specific_ctx_id = i915_ggtt_offset(ce->state);
1538 stream->specific_ctx_id_mask = 0;
61d5676b
LL
1539 break;
1540 }
d7965152 1541
61d5676b
LL
1542 case 8:
1543 case 9:
c92c36ed 1544 if (intel_engine_uses_guc(ce->engine)) {
61d5676b
LL
1545 /*
1546 * When using GuC, the context descriptor we write in
1547 * i915 is read by GuC and rewritten before it's
1548 * actually written into the hardware. The LRCA is
1549 * what is put into the context id field of the
1550 * context descriptor by GuC. Because it's aligned to
1551 * a page, the lower 12bits are always at 0 and
1552 * dropped by GuC. They won't be part of the context
1553 * ID in the OA reports, so squash those lower bits.
1554 */
53b2622e 1555 stream->specific_ctx_id = ce->lrc.lrca >> 12;
19f81df2 1556
61d5676b
LL
1557 /*
1558 * GuC uses the top bit to signal proxy submission, so
1559 * ignore that bit.
1560 */
a37f08a8 1561 stream->specific_ctx_id_mask =
61d5676b 1562 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
c92c36ed
CW
1563 } else {
1564 stream->specific_ctx_id_mask =
1565 (1U << GEN8_CTX_ID_WIDTH) - 1;
1566 stream->specific_ctx_id = stream->specific_ctx_id_mask;
61d5676b
LL
1567 }
1568 break;
1569
45e9c829 1570 case 11:
50a9ea08 1571 case 12:
682aa437 1572 ret = gen12_get_render_context_id(stream);
61d5676b 1573 break;
61d5676b
LL
1574
1575 default:
651e7d48 1576 MISSING_CASE(GRAPHICS_VER(ce->engine->i915));
19f81df2 1577 }
d7965152 1578
6f280b13 1579 ce->tag = stream->specific_ctx_id;
2935ed53 1580
0bf85735
WK
1581 drm_dbg(&stream->perf->i915->drm,
1582 "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
1583 stream->specific_ctx_id,
1584 stream->specific_ctx_id_mask);
61d5676b 1585
682aa437 1586 return ret;
d7965152
RB
1587}
1588
16d98b31
RB
1589/**
1590 * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
1591 * @stream: An i915-perf stream opened for OA metrics
1592 *
1593 * In case anything needed doing to ensure the context HW ID would remain valid
1594 * for the lifetime of the stream, then that can be undone here.
1595 */
d7965152
RB
1596static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1597{
1fc44d9b 1598 struct intel_context *ce;
d7965152 1599
a37f08a8 1600 ce = fetch_and_zero(&stream->pinned_ctx);
2935ed53
CW
1601 if (ce) {
1602 ce->tag = 0; /* recomputed on next submission after parking */
1fc44d9b 1603 intel_context_unpin(ce);
2935ed53
CW
1604 }
1605
1606 stream->specific_ctx_id = INVALID_CTX_ID;
1607 stream->specific_ctx_id_mask = 0;
d7965152
RB
1608}
1609
1610static void
a37f08a8 1611free_oa_buffer(struct i915_perf_stream *stream)
d7965152 1612{
a37f08a8 1613 i915_vma_unpin_and_release(&stream->oa_buffer.vma,
6a2f59e4 1614 I915_VMA_RELEASE_MAP);
d7965152 1615
a37f08a8 1616 stream->oa_buffer.vaddr = NULL;
d7965152
RB
1617}
1618
6a45008a
LL
1619static void
1620free_oa_configs(struct i915_perf_stream *stream)
1621{
1622 struct i915_oa_config_bo *oa_bo, *tmp;
1623
1624 i915_oa_config_put(stream->oa_config);
1625 llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
1626 free_oa_config_bo(oa_bo);
1627}
1628
daed3e44
LL
1629static void
1630free_noa_wait(struct i915_perf_stream *stream)
1631{
1632 i915_vma_unpin_and_release(&stream->noa_wait, 0);
1633}
1634
5f284e9c
UNR
1635static bool engine_supports_oa(const struct intel_engine_cs *engine)
1636{
1637 return engine->oa_group;
1638}
1639
d7965152
RB
1640static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1641{
8f8b1171 1642 struct i915_perf *perf = stream->perf;
9677a9f3 1643 struct intel_gt *gt = stream->engine->gt;
5f284e9c 1644 struct i915_perf_group *g = stream->engine->oa_group;
d7965152 1645
5f284e9c 1646 if (WARN_ON(stream != g->exclusive_stream))
6f10c4d6 1647 return;
d7965152 1648
19f81df2 1649 /*
f89823c2
LL
1650 * Unset exclusive_stream first, it will be checked while disabling
1651 * the metric set on gen8+.
a5af081d
CW
1652 *
1653 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
19f81df2 1654 */
5f284e9c 1655 WRITE_ONCE(g->exclusive_stream, NULL);
8f8b1171 1656 perf->ops.disable_metric_set(stream);
d7965152 1657
a37f08a8 1658 free_oa_buffer(stream);
d7965152 1659
01e74274
VB
1660 /*
1661 * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6.
1662 */
2810ac6c 1663 if (stream->override_gucrc)
01e74274
VB
1664 drm_WARN_ON(&gt->i915->drm,
1665 intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc));
1666
52111c46 1667 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
a5efcde6 1668 intel_engine_pm_put(stream->engine);
d7965152
RB
1669
1670 if (stream->ctx)
1671 oa_put_render_ctx_id(stream);
1672
6a45008a 1673 free_oa_configs(stream);
daed3e44 1674 free_noa_wait(stream);
f89823c2 1675
8f8b1171 1676 if (perf->spurious_report_rs.missed) {
a10234fd
TU
1677 drm_notice(&gt->i915->drm,
1678 "%d spurious OA report notices suppressed due to ratelimiting\n",
1679 perf->spurious_report_rs.missed);
712122ea 1680 }
d7965152
RB
1681}
1682
a37f08a8 1683static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
d7965152 1684{
52111c46 1685 struct intel_uncore *uncore = stream->uncore;
a37f08a8 1686 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
0dd860cf
RB
1687 unsigned long flags;
1688
a37f08a8 1689 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
d7965152
RB
1690
1691 /* Pre-DevBDW: OABUFFER must be set with counters off,
1692 * before OASTATUS1, but after OASTATUS2
1693 */
8f8b1171
CW
1694 intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */
1695 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT);
a37f08a8 1696 stream->oa_buffer.head = gtt_offset;
f279020a 1697
8f8b1171 1698 intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset);
f279020a 1699
8f8b1171
CW
1700 intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */
1701 gtt_offset | OABUFFER_SIZE_16M);
d7965152 1702
0dd860cf 1703 /* Mark that we need updated tail pointers to read from... */
d1df41eb
LL
1704 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
1705 stream->oa_buffer.tail = gtt_offset;
0dd860cf 1706
a37f08a8 1707 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
0dd860cf 1708
d7965152
RB
1709 /* On Haswell we have to track which OASTATUS1 flags we've
1710 * already seen since they can't be cleared while periodic
1711 * sampling is enabled.
1712 */
8f8b1171 1713 stream->perf->gen7_latched_oastatus1 = 0;
d7965152
RB
1714
1715 /* NB: although the OA buffer will initially be allocated
1716 * zeroed via shmfs (and so this memset is redundant when
1717 * first allocating), we may re-init the OA buffer, either
1718 * when re-enabling a stream or in error/reset paths.
1719 *
1720 * The reason we clear the buffer for each re-init is for the
1721 * sanity check in gen7_append_oa_reports() that looks at the
1722 * report-id field to make sure it's non-zero which relies on
1723 * the assumption that new reports are being written to zeroed
1724 * memory...
1725 */
a37f08a8 1726 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
d7965152
RB
1727}
1728
a37f08a8 1729static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
19f81df2 1730{
52111c46 1731 struct intel_uncore *uncore = stream->uncore;
a37f08a8 1732 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
19f81df2
RB
1733 unsigned long flags;
1734
a37f08a8 1735 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
19f81df2 1736
8f8b1171
CW
1737 intel_uncore_write(uncore, GEN8_OASTATUS, 0);
1738 intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset);
a37f08a8 1739 stream->oa_buffer.head = gtt_offset;
19f81df2 1740
8f8b1171 1741 intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0);
19f81df2
RB
1742
1743 /*
1744 * PRM says:
1745 *
1746 * "This MMIO must be set before the OATAILPTR
1747 * register and after the OAHEADPTR register. This is
1748 * to enable proper functionality of the overflow
1749 * bit."
1750 */
8f8b1171 1751 intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset |
fe841686 1752 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
8f8b1171 1753 intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
19f81df2
RB
1754
1755 /* Mark that we need updated tail pointers to read from... */
d1df41eb
LL
1756 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
1757 stream->oa_buffer.tail = gtt_offset;
19f81df2
RB
1758
1759 /*
1760 * Reset state used to recognise context switches, affecting which
1761 * reports we will forward to userspace while filtering for a single
1762 * context.
1763 */
a37f08a8 1764 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
19f81df2 1765
a37f08a8 1766 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
19f81df2
RB
1767
1768 /*
1769 * NB: although the OA buffer will initially be allocated
1770 * zeroed via shmfs (and so this memset is redundant when
1771 * first allocating), we may re-init the OA buffer, either
1772 * when re-enabling a stream or in error/reset paths.
1773 *
1774 * The reason we clear the buffer for each re-init is for the
1775 * sanity check in gen8_append_oa_reports() that looks at the
1776 * reason field to make sure it's non-zero which relies on
1777 * the assumption that new reports are being written to zeroed
1778 * memory...
1779 */
a37f08a8 1780 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
19f81df2
RB
1781}
1782
00a7f0d7
LL
1783static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
1784{
1785 struct intel_uncore *uncore = stream->uncore;
1786 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1787 unsigned long flags;
1788
1789 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1790
1791 intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
1792 intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
1793 gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
1794 stream->oa_buffer.head = gtt_offset;
1795
1796 /*
1797 * PRM says:
1798 *
1799 * "This MMIO must be set before the OATAILPTR
1800 * register and after the OAHEADPTR register. This is
1801 * to enable proper functionality of the overflow
1802 * bit."
1803 */
1804 intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
1805 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
1806 intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
1807 gtt_offset & GEN12_OAG_OATAILPTR_MASK);
1808
1809 /* Mark that we need updated tail pointers to read from... */
d1df41eb
LL
1810 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
1811 stream->oa_buffer.tail = gtt_offset;
00a7f0d7
LL
1812
1813 /*
1814 * Reset state used to recognise context switches, affecting which
1815 * reports we will forward to userspace while filtering for a single
1816 * context.
1817 */
1818 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
1819
1820 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1821
1822 /*
1823 * NB: although the OA buffer will initially be allocated
1824 * zeroed via shmfs (and so this memset is redundant when
1825 * first allocating), we may re-init the OA buffer, either
1826 * when re-enabling a stream or in error/reset paths.
1827 *
1828 * The reason we clear the buffer for each re-init is for the
1829 * sanity check in gen8_append_oa_reports() that looks at the
1830 * reason field to make sure it's non-zero which relies on
1831 * the assumption that new reports are being written to zeroed
1832 * memory...
1833 */
1834 memset(stream->oa_buffer.vaddr, 0,
1835 stream->oa_buffer.vma->size);
00a7f0d7
LL
1836}
1837
a37f08a8 1838static int alloc_oa_buffer(struct i915_perf_stream *stream)
d7965152 1839{
a9f236d1 1840 struct drm_i915_private *i915 = stream->perf->i915;
cc85345d 1841 struct intel_gt *gt = stream->engine->gt;
d7965152
RB
1842 struct drm_i915_gem_object *bo;
1843 struct i915_vma *vma;
1844 int ret;
1845
a9f236d1 1846 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.vma))
d7965152
RB
1847 return -ENODEV;
1848
fe841686
JL
1849 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
1850 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
1851
8f8b1171 1852 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
d7965152 1853 if (IS_ERR(bo)) {
00376ccf 1854 drm_err(&i915->drm, "Failed to allocate OA buffer\n");
2850748e 1855 return PTR_ERR(bo);
d7965152
RB
1856 }
1857
a679f58d 1858 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
d7965152
RB
1859
1860 /* PreHSW required 512K alignment, HSW requires 16M */
cc85345d 1861 vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
d7965152
RB
1862 if (IS_ERR(vma)) {
1863 ret = PTR_ERR(vma);
1864 goto err_unref;
1865 }
cc85345d
UNR
1866
1867 /*
1868 * PreHSW required 512K alignment.
1869 * HSW and onwards, align to requested size of OA buffer.
1870 */
1871 ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH);
1872 if (ret) {
1873 drm_err(&gt->i915->drm, "Failed to pin OA buffer %d\n", ret);
1874 goto err_unref;
1875 }
1876
a37f08a8 1877 stream->oa_buffer.vma = vma;
d7965152 1878
a37f08a8 1879 stream->oa_buffer.vaddr =
ef4985ba 1880 i915_gem_object_pin_map_unlocked(bo, I915_MAP_WB);
a37f08a8
UNR
1881 if (IS_ERR(stream->oa_buffer.vaddr)) {
1882 ret = PTR_ERR(stream->oa_buffer.vaddr);
d7965152
RB
1883 goto err_unpin;
1884 }
1885
2850748e 1886 return 0;
d7965152
RB
1887
1888err_unpin:
1889 __i915_vma_unpin(vma);
1890
1891err_unref:
1892 i915_gem_object_put(bo);
1893
a37f08a8
UNR
1894 stream->oa_buffer.vaddr = NULL;
1895 stream->oa_buffer.vma = NULL;
d7965152 1896
d7965152
RB
1897 return ret;
1898}
1899
daed3e44
LL
1900static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
1901 bool save, i915_reg_t reg, u32 offset,
1902 u32 dword_count)
1903{
1904 u32 cmd;
1905 u32 d;
1906
1907 cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
e43ff99c 1908 cmd |= MI_SRM_LRM_GLOBAL_GTT;
651e7d48 1909 if (GRAPHICS_VER(stream->perf->i915) >= 8)
daed3e44
LL
1910 cmd++;
1911
1912 for (d = 0; d < dword_count; d++) {
1913 *cs++ = cmd;
1914 *cs++ = i915_mmio_reg_offset(reg) + 4 * d;
a4b6e74c 1915 *cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d;
daed3e44
LL
1916 *cs++ = 0;
1917 }
1918
1919 return cs;
1920}
1921
1922static int alloc_noa_wait(struct i915_perf_stream *stream)
1923{
1924 struct drm_i915_private *i915 = stream->perf->i915;
cc85345d 1925 struct intel_gt *gt = stream->engine->gt;
daed3e44
LL
1926 struct drm_i915_gem_object *bo;
1927 struct i915_vma *vma;
1928 const u64 delay_ticks = 0xffffffffffffffff -
204129a2
MW
1929 intel_gt_ns_to_clock_interval(to_gt(stream->perf->i915),
1930 atomic64_read(&stream->perf->noa_programming_delay));
daed3e44
LL
1931 const u32 base = stream->engine->mmio_base;
1932#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
1933 u32 *batch, *ts0, *cs, *jump;
ef4985ba 1934 struct i915_gem_ww_ctx ww;
daed3e44
LL
1935 int ret, i;
1936 enum {
1937 START_TS,
1938 NOW_TS,
1939 DELTA_TS,
1940 JUMP_PREDICATE,
1941 DELTA_TARGET,
1942 N_CS_GPR
1943 };
2d9da585
UNR
1944 i915_reg_t mi_predicate_result = HAS_MI_SET_PREDICATE(i915) ?
1945 MI_PREDICATE_RESULT_2_ENGINE(base) :
1946 MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
daed3e44 1947
a4b6e74c
UNR
1948 /*
1949 * gt->scratch was being used to save/restore the GPR registers, but on
1950 * MTL the scratch uses stolen lmem. An MI_SRM to this memory region
1951 * causes an engine hang. Instead allocate an additional page here to
1952 * save/restore GPR registers
1953 */
1954 bo = i915_gem_object_create_internal(i915, 8192);
daed3e44 1955 if (IS_ERR(bo)) {
00376ccf
WK
1956 drm_err(&i915->drm,
1957 "Failed to allocate NOA wait batchbuffer\n");
daed3e44
LL
1958 return PTR_ERR(bo);
1959 }
1960
ef4985ba
ML
1961 i915_gem_ww_ctx_init(&ww, true);
1962retry:
1963 ret = i915_gem_object_lock(bo, &ww);
1964 if (ret)
1965 goto out_ww;
1966
daed3e44
LL
1967 /*
1968 * We pin in GGTT because we jump into this buffer now because
1969 * multiple OA config BOs will have a jump to this address and it
1970 * needs to be fixed during the lifetime of the i915/perf stream.
1971 */
cc85345d 1972 vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
daed3e44
LL
1973 if (IS_ERR(vma)) {
1974 ret = PTR_ERR(vma);
ef4985ba 1975 goto out_ww;
daed3e44
LL
1976 }
1977
cc85345d
UNR
1978 ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
1979 if (ret)
1980 goto out_ww;
1981
daed3e44
LL
1982 batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
1983 if (IS_ERR(batch)) {
1984 ret = PTR_ERR(batch);
1985 goto err_unpin;
1986 }
1987
a4b6e74c
UNR
1988 stream->noa_wait = vma;
1989
1990#define GPR_SAVE_OFFSET 4096
1991#define PREDICATE_SAVE_OFFSET 4160
1992
daed3e44
LL
1993 /* Save registers. */
1994 for (i = 0; i < N_CS_GPR; i++)
1995 cs = save_restore_register(
1996 stream, cs, true /* save */, CS_GPR(i),
a4b6e74c 1997 GPR_SAVE_OFFSET + 8 * i, 2);
daed3e44 1998 cs = save_restore_register(
2d9da585 1999 stream, cs, true /* save */, mi_predicate_result,
a4b6e74c 2000 PREDICATE_SAVE_OFFSET, 1);
daed3e44
LL
2001
2002 /* First timestamp snapshot location. */
2003 ts0 = cs;
2004
2005 /*
2006 * Initial snapshot of the timestamp register to implement the wait.
2007 * We work with 32b values, so clear out the top 32b bits of the
2008 * register because the ALU works 64bits.
2009 */
2010 *cs++ = MI_LOAD_REGISTER_IMM(1);
2011 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4;
2012 *cs++ = 0;
2013 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
2014 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
2015 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS));
2016
2017 /*
2018 * This is the location we're going to jump back into until the
2019 * required amount of time has passed.
2020 */
2021 jump = cs;
2022
2023 /*
2024 * Take another snapshot of the timestamp register. Take care to clear
2025 * up the top 32bits of CS_GPR(1) as we're using it for other
2026 * operations below.
2027 */
2028 *cs++ = MI_LOAD_REGISTER_IMM(1);
2029 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4;
2030 *cs++ = 0;
2031 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
2032 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
2033 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS));
2034
2035 /*
2036 * Do a diff between the 2 timestamps and store the result back into
2037 * CS_GPR(1).
2038 */
2039 *cs++ = MI_MATH(5);
2040 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
2041 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
2042 *cs++ = MI_MATH_SUB;
2043 *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
2044 *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
2045
2046 /*
2047 * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
2048 * timestamp have rolled over the 32bits) into the predicate register
2049 * to be used for the predicated jump.
2050 */
2051 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
2052 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
2d9da585
UNR
2053 *cs++ = i915_mmio_reg_offset(mi_predicate_result);
2054
2055 if (HAS_MI_SET_PREDICATE(i915))
2056 *cs++ = MI_SET_PREDICATE | 1;
daed3e44
LL
2057
2058 /* Restart from the beginning if we had timestamps roll over. */
651e7d48 2059 *cs++ = (GRAPHICS_VER(i915) < 8 ?
daed3e44
LL
2060 MI_BATCH_BUFFER_START :
2061 MI_BATCH_BUFFER_START_GEN8) |
2062 MI_BATCH_PREDICATE;
2063 *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
2064 *cs++ = 0;
2065
2d9da585
UNR
2066 if (HAS_MI_SET_PREDICATE(i915))
2067 *cs++ = MI_SET_PREDICATE;
2068
daed3e44
LL
2069 /*
2070 * Now add the diff between to previous timestamps and add it to :
2071 * (((1 * << 64) - 1) - delay_ns)
2072 *
2073 * When the Carry Flag contains 1 this means the elapsed time is
2074 * longer than the expected delay, and we can exit the wait loop.
2075 */
2076 *cs++ = MI_LOAD_REGISTER_IMM(2);
2077 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET));
2078 *cs++ = lower_32_bits(delay_ticks);
2079 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4;
2080 *cs++ = upper_32_bits(delay_ticks);
2081
2082 *cs++ = MI_MATH(4);
2083 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
2084 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
2085 *cs++ = MI_MATH_ADD;
2086 *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
2087
dd590f68
LL
2088 *cs++ = MI_ARB_CHECK;
2089
daed3e44
LL
2090 /*
2091 * Transfer the result into the predicate register to be used for the
2092 * predicated jump.
2093 */
2094 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
2095 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
2d9da585
UNR
2096 *cs++ = i915_mmio_reg_offset(mi_predicate_result);
2097
2098 if (HAS_MI_SET_PREDICATE(i915))
2099 *cs++ = MI_SET_PREDICATE | 1;
daed3e44
LL
2100
2101 /* Predicate the jump. */
651e7d48 2102 *cs++ = (GRAPHICS_VER(i915) < 8 ?
daed3e44
LL
2103 MI_BATCH_BUFFER_START :
2104 MI_BATCH_BUFFER_START_GEN8) |
2105 MI_BATCH_PREDICATE;
2106 *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
2107 *cs++ = 0;
2108
2d9da585
UNR
2109 if (HAS_MI_SET_PREDICATE(i915))
2110 *cs++ = MI_SET_PREDICATE;
2111
daed3e44
LL
2112 /* Restore registers. */
2113 for (i = 0; i < N_CS_GPR; i++)
2114 cs = save_restore_register(
2115 stream, cs, false /* restore */, CS_GPR(i),
a4b6e74c 2116 GPR_SAVE_OFFSET + 8 * i, 2);
daed3e44 2117 cs = save_restore_register(
2d9da585 2118 stream, cs, false /* restore */, mi_predicate_result,
a4b6e74c 2119 PREDICATE_SAVE_OFFSET, 1);
daed3e44
LL
2120
2121 /* And return to the ring. */
2122 *cs++ = MI_BATCH_BUFFER_END;
2123
2124 GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch));
2125
2126 i915_gem_object_flush_map(bo);
89d19b2b 2127 __i915_gem_object_release_map(bo);
daed3e44 2128
ef4985ba 2129 goto out_ww;
daed3e44
LL
2130
2131err_unpin:
15d0ace1 2132 i915_vma_unpin_and_release(&vma, 0);
ef4985ba
ML
2133out_ww:
2134 if (ret == -EDEADLK) {
2135 ret = i915_gem_ww_ctx_backoff(&ww);
2136 if (!ret)
2137 goto retry;
2138 }
2139 i915_gem_ww_ctx_fini(&ww);
2140 if (ret)
2141 i915_gem_object_put(bo);
daed3e44
LL
2142 return ret;
2143}
2144
15d0ace1
LL
2145static u32 *write_cs_mi_lri(u32 *cs,
2146 const struct i915_oa_reg *reg_data,
2147 u32 n_regs)
d7965152 2148{
701f8231 2149 u32 i;
d7965152
RB
2150
2151 for (i = 0; i < n_regs; i++) {
15d0ace1
LL
2152 if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
2153 u32 n_lri = min_t(u32,
2154 n_regs - i,
2155 MI_LOAD_REGISTER_IMM_MAX_REGS);
d7965152 2156
15d0ace1
LL
2157 *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
2158 }
2159 *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
2160 *cs++ = reg_data[i].value;
d7965152 2161 }
15d0ace1
LL
2162
2163 return cs;
d7965152
RB
2164}
2165
15d0ace1 2166static int num_lri_dwords(int num_regs)
d7965152 2167{
15d0ace1
LL
2168 int count = 0;
2169
2170 if (num_regs > 0) {
2171 count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
2172 count += num_regs * 2;
2173 }
2174
2175 return count;
2176}
2177
2178static struct i915_oa_config_bo *
2179alloc_oa_config_buffer(struct i915_perf_stream *stream,
2180 struct i915_oa_config *oa_config)
2181{
2182 struct drm_i915_gem_object *obj;
2183 struct i915_oa_config_bo *oa_bo;
ef4985ba 2184 struct i915_gem_ww_ctx ww;
15d0ace1
LL
2185 size_t config_length = 0;
2186 u32 *cs;
2187 int err;
2188
2189 oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
2190 if (!oa_bo)
2191 return ERR_PTR(-ENOMEM);
2192
2193 config_length += num_lri_dwords(oa_config->mux_regs_len);
2194 config_length += num_lri_dwords(oa_config->b_counter_regs_len);
2195 config_length += num_lri_dwords(oa_config->flex_regs_len);
93937659 2196 config_length += 3; /* MI_BATCH_BUFFER_START */
15d0ace1
LL
2197 config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
2198
2199 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
2200 if (IS_ERR(obj)) {
2201 err = PTR_ERR(obj);
2202 goto err_free;
2203 }
2204
ef4985ba
ML
2205 i915_gem_ww_ctx_init(&ww, true);
2206retry:
2207 err = i915_gem_object_lock(obj, &ww);
2208 if (err)
2209 goto out_ww;
2210
15d0ace1
LL
2211 cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
2212 if (IS_ERR(cs)) {
2213 err = PTR_ERR(cs);
ef4985ba 2214 goto out_ww;
15d0ace1
LL
2215 }
2216
2217 cs = write_cs_mi_lri(cs,
2218 oa_config->mux_regs,
2219 oa_config->mux_regs_len);
2220 cs = write_cs_mi_lri(cs,
2221 oa_config->b_counter_regs,
2222 oa_config->b_counter_regs_len);
2223 cs = write_cs_mi_lri(cs,
2224 oa_config->flex_regs,
2225 oa_config->flex_regs_len);
2226
93937659 2227 /* Jump into the active wait. */
651e7d48 2228 *cs++ = (GRAPHICS_VER(stream->perf->i915) < 8 ?
93937659
LL
2229 MI_BATCH_BUFFER_START :
2230 MI_BATCH_BUFFER_START_GEN8);
2231 *cs++ = i915_ggtt_offset(stream->noa_wait);
2232 *cs++ = 0;
15d0ace1
LL
2233
2234 i915_gem_object_flush_map(obj);
89d19b2b 2235 __i915_gem_object_release_map(obj);
15d0ace1
LL
2236
2237 oa_bo->vma = i915_vma_instance(obj,
2238 &stream->engine->gt->ggtt->vm,
2239 NULL);
2240 if (IS_ERR(oa_bo->vma)) {
2241 err = PTR_ERR(oa_bo->vma);
ef4985ba 2242 goto out_ww;
15d0ace1
LL
2243 }
2244
2245 oa_bo->oa_config = i915_oa_config_get(oa_config);
2246 llist_add(&oa_bo->node, &stream->oa_config_bos);
2247
ef4985ba
ML
2248out_ww:
2249 if (err == -EDEADLK) {
2250 err = i915_gem_ww_ctx_backoff(&ww);
2251 if (!err)
2252 goto retry;
2253 }
2254 i915_gem_ww_ctx_fini(&ww);
15d0ace1 2255
ef4985ba
ML
2256 if (err)
2257 i915_gem_object_put(obj);
15d0ace1 2258err_free:
ef4985ba
ML
2259 if (err) {
2260 kfree(oa_bo);
2261 return ERR_PTR(err);
2262 }
2263 return oa_bo;
15d0ace1
LL
2264}
2265
2266static struct i915_vma *
2267get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
2268{
2269 struct i915_oa_config_bo *oa_bo;
2270
14bfcd3e 2271 /*
15d0ace1
LL
2272 * Look for the buffer in the already allocated BOs attached
2273 * to the stream.
d7965152 2274 */
15d0ace1
LL
2275 llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
2276 if (oa_bo->oa_config == oa_config &&
2277 memcmp(oa_bo->oa_config->uuid,
2278 oa_config->uuid,
2279 sizeof(oa_config->uuid)) == 0)
2280 goto out;
2281 }
2282
2283 oa_bo = alloc_oa_config_buffer(stream, oa_config);
2284 if (IS_ERR(oa_bo))
2285 return ERR_CAST(oa_bo);
2286
2287out:
2288 return i915_vma_get(oa_bo->vma);
2289}
2290
d7d50f80 2291static int
4b4e973d
CW
2292emit_oa_config(struct i915_perf_stream *stream,
2293 struct i915_oa_config *oa_config,
d7d50f80
CW
2294 struct intel_context *ce,
2295 struct i915_active *active)
15d0ace1
LL
2296{
2297 struct i915_request *rq;
2298 struct i915_vma *vma;
f00ecc2e 2299 struct i915_gem_ww_ctx ww;
15d0ace1
LL
2300 int err;
2301
8814c6d0 2302 vma = get_oa_vma(stream, oa_config);
15d0ace1 2303 if (IS_ERR(vma))
d7d50f80 2304 return PTR_ERR(vma);
15d0ace1 2305
f00ecc2e
ML
2306 i915_gem_ww_ctx_init(&ww, true);
2307retry:
2308 err = i915_gem_object_lock(vma->obj, &ww);
15d0ace1 2309 if (err)
f00ecc2e
ML
2310 goto err;
2311
2312 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
2313 if (err)
2314 goto err;
15d0ace1 2315
de5825be 2316 intel_engine_pm_get(ce->engine);
15d0ace1 2317 rq = i915_request_create(ce);
de5825be 2318 intel_engine_pm_put(ce->engine);
15d0ace1
LL
2319 if (IS_ERR(rq)) {
2320 err = PTR_ERR(rq);
2321 goto err_vma_unpin;
2322 }
2323
d7d50f80
CW
2324 if (!IS_ERR_OR_NULL(active)) {
2325 /* After all individual context modifications */
2326 err = i915_request_await_active(rq, active,
442dbc5c 2327 I915_ACTIVE_AWAIT_ACTIVE);
d7d50f80
CW
2328 if (err)
2329 goto err_add_request;
2330
2331 err = i915_active_add_request(active, rq);
2332 if (err)
2333 goto err_add_request;
2334 }
2335
2a76fc89 2336 err = i915_vma_move_to_active(vma, rq, 0);
15d0ace1
LL
2337 if (err)
2338 goto err_add_request;
2339
2340 err = rq->engine->emit_bb_start(rq,
8e4ee5e8 2341 i915_vma_offset(vma), 0,
15d0ace1 2342 I915_DISPATCH_SECURE);
4b4e973d
CW
2343 if (err)
2344 goto err_add_request;
2345
15d0ace1
LL
2346err_add_request:
2347 i915_request_add(rq);
2348err_vma_unpin:
2349 i915_vma_unpin(vma);
f00ecc2e
ML
2350err:
2351 if (err == -EDEADLK) {
2352 err = i915_gem_ww_ctx_backoff(&ww);
2353 if (!err)
2354 goto retry;
2355 }
2356
2357 i915_gem_ww_ctx_fini(&ww);
15d0ace1 2358 i915_vma_put(vma);
d7d50f80 2359 return err;
14bfcd3e
LL
2360}
2361
5f5c382e
CW
2362static struct intel_context *oa_context(struct i915_perf_stream *stream)
2363{
2364 return stream->pinned_ctx ?: stream->engine->kernel_context;
2365}
2366
d7d50f80
CW
2367static int
2368hsw_enable_metric_set(struct i915_perf_stream *stream,
2369 struct i915_active *active)
14bfcd3e 2370{
52111c46 2371 struct intel_uncore *uncore = stream->uncore;
14bfcd3e
LL
2372
2373 /*
2374 * PRM:
2375 *
2376 * OA unit is using “crclk” for its functionality. When trunk
2377 * level clock gating takes place, OA clock would be gated,
2378 * unable to count the events from non-render clock domain.
2379 * Render clock gating must be disabled when OA is enabled to
2380 * count the events from non-render domain. Unit level clock
2381 * gating for RCS should also be disabled.
2382 */
8f8b1171
CW
2383 intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
2384 GEN7_DOP_CLOCK_GATE_ENABLE, 0);
2385 intel_uncore_rmw(uncore, GEN6_UCGCTL1,
2386 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
14bfcd3e 2387
d7d50f80
CW
2388 return emit_oa_config(stream,
2389 stream->oa_config, oa_context(stream),
2390 active);
d7965152
RB
2391}
2392
a37f08a8 2393static void hsw_disable_metric_set(struct i915_perf_stream *stream)
d7965152 2394{
52111c46 2395 struct intel_uncore *uncore = stream->uncore;
a37f08a8 2396
8f8b1171
CW
2397 intel_uncore_rmw(uncore, GEN6_UCGCTL1,
2398 GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0);
2399 intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
2400 0, GEN7_DOP_CLOCK_GATE_ENABLE);
d7965152 2401
8f8b1171 2402 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
d7965152
RB
2403}
2404
a9877da2
CW
2405static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
2406 i915_reg_t reg)
2407{
2408 u32 mmio = i915_mmio_reg_offset(reg);
2409 int i;
2410
2411 /*
2412 * This arbitrary default will select the 'EU FPU0 Pipeline
2413 * Active' event. In the future it's anticipated that there
2414 * will be an explicit 'No Event' we can select, but not yet...
2415 */
2416 if (!oa_config)
2417 return 0;
2418
2419 for (i = 0; i < oa_config->flex_regs_len; i++) {
2420 if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
2421 return oa_config->flex_regs[i].value;
2422 }
2423
2424 return 0;
2425}
19f81df2
RB
2426/*
2427 * NB: It must always remain pointer safe to run this even if the OA unit
2428 * has been disabled.
2429 *
2430 * It's fine to put out-of-date values into these per-context registers
2431 * in the case that the OA unit has been disabled.
2432 */
b146e5ef 2433static void
7dc56af5
CW
2434gen8_update_reg_state_unlocked(const struct intel_context *ce,
2435 const struct i915_perf_stream *stream)
19f81df2 2436{
8f8b1171
CW
2437 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2438 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
19f81df2 2439 /* The MMIO offsets for Flex EU registers aren't contiguous */
3a5d604f 2440 static const i915_reg_t flex_regs[] = {
35ab4fd2
LL
2441 EU_PERF_CNTL0,
2442 EU_PERF_CNTL1,
2443 EU_PERF_CNTL2,
2444 EU_PERF_CNTL3,
2445 EU_PERF_CNTL4,
2446 EU_PERF_CNTL5,
2447 EU_PERF_CNTL6,
19f81df2 2448 };
7dc56af5 2449 u32 *reg_state = ce->lrc_reg_state;
19f81df2
RB
2450 int i;
2451
ccdeed49
UNR
2452 reg_state[ctx_oactxctrl + 1] =
2453 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2454 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2455 GEN8_OA_COUNTER_RESUME;
19f81df2 2456
ccdeed49 2457 for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
7dc56af5
CW
2458 reg_state[ctx_flexeu0 + i * 2 + 1] =
2459 oa_config_flex_reg(stream->oa_config, flex_regs[i]);
19f81df2
RB
2460}
2461
a9877da2
CW
2462struct flex {
2463 i915_reg_t reg;
2464 u32 offset;
2465 u32 value;
2466};
2467
2468static int
2469gen8_store_flex(struct i915_request *rq,
2470 struct intel_context *ce,
2471 const struct flex *flex, unsigned int count)
2472{
2473 u32 offset;
2474 u32 *cs;
2475
2476 cs = intel_ring_begin(rq, 4 * count);
2477 if (IS_ERR(cs))
2478 return PTR_ERR(cs);
2479
b4892e44 2480 offset = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET;
a9877da2
CW
2481 do {
2482 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
7dc56af5 2483 *cs++ = offset + flex->offset * sizeof(u32);
a9877da2
CW
2484 *cs++ = 0;
2485 *cs++ = flex->value;
2486 } while (flex++, --count);
2487
2488 intel_ring_advance(rq, cs);
2489
2490 return 0;
2491}
2492
2493static int
2494gen8_load_flex(struct i915_request *rq,
2495 struct intel_context *ce,
2496 const struct flex *flex, unsigned int count)
2497{
2498 u32 *cs;
2499
2500 GEM_BUG_ON(!count || count > 63);
2501
2502 cs = intel_ring_begin(rq, 2 * count + 2);
2503 if (IS_ERR(cs))
2504 return PTR_ERR(cs);
2505
2506 *cs++ = MI_LOAD_REGISTER_IMM(count);
2507 do {
2508 *cs++ = i915_mmio_reg_offset(flex->reg);
2509 *cs++ = flex->value;
2510 } while (flex++, --count);
2511 *cs++ = MI_NOOP;
2512
2513 intel_ring_advance(rq, cs);
2514
2515 return 0;
2516}
2517
2518static int gen8_modify_context(struct intel_context *ce,
2519 const struct flex *flex, unsigned int count)
2520{
2521 struct i915_request *rq;
2522 int err;
2523
de5825be 2524 rq = intel_engine_create_kernel_request(ce->engine);
a9877da2
CW
2525 if (IS_ERR(rq))
2526 return PTR_ERR(rq);
2527
2528 /* Serialise with the remote context */
2529 err = intel_context_prepare_remote_request(ce, rq);
2530 if (err == 0)
2531 err = gen8_store_flex(rq, ce, flex, count);
2532
2533 i915_request_add(rq);
2534 return err;
2535}
2536
d7d50f80
CW
2537static int
2538gen8_modify_self(struct intel_context *ce,
2539 const struct flex *flex, unsigned int count,
2540 struct i915_active *active)
a9877da2
CW
2541{
2542 struct i915_request *rq;
2543 int err;
2544
d236e2ac 2545 intel_engine_pm_get(ce->engine);
a9877da2 2546 rq = i915_request_create(ce);
d236e2ac 2547 intel_engine_pm_put(ce->engine);
a9877da2
CW
2548 if (IS_ERR(rq))
2549 return PTR_ERR(rq);
2550
d7d50f80
CW
2551 if (!IS_ERR_OR_NULL(active)) {
2552 err = i915_active_add_request(active, rq);
2553 if (err)
2554 goto err_add_request;
2555 }
2556
a9877da2 2557 err = gen8_load_flex(rq, ce, flex, count);
d7d50f80
CW
2558 if (err)
2559 goto err_add_request;
a9877da2 2560
d7d50f80 2561err_add_request:
a9877da2
CW
2562 i915_request_add(rq);
2563 return err;
2564}
2565
5cca5038
CW
2566static int gen8_configure_context(struct i915_gem_context *ctx,
2567 struct flex *flex, unsigned int count)
2568{
2569 struct i915_gem_engines_iter it;
2570 struct intel_context *ce;
2571 int err = 0;
2572
2573 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
2574 GEM_BUG_ON(ce == ce->engine->kernel_context);
2575
2576 if (ce->engine->class != RENDER_CLASS)
2577 continue;
2578
feed5c7b
CW
2579 /* Otherwise OA settings will be set upon first use */
2580 if (!intel_context_pin_if_active(ce))
2581 continue;
5cca5038 2582
0b6613c6 2583 flex->value = intel_sseu_make_rpcs(ce->engine->gt, &ce->sseu);
feed5c7b 2584 err = gen8_modify_context(ce, flex, count);
5cca5038 2585
feed5c7b 2586 intel_context_unpin(ce);
5cca5038
CW
2587 if (err)
2588 break;
2589 }
2590 i915_gem_context_unlock_engines(ctx);
2591
2592 return err;
2593}
2594
d7d50f80
CW
2595static int gen12_configure_oar_context(struct i915_perf_stream *stream,
2596 struct i915_active *active)
00a7f0d7 2597{
ccdeed49
UNR
2598 int err;
2599 struct intel_context *ce = stream->pinned_ctx;
90981da6 2600 u32 format = stream->oa_buffer.format->format;
a5c3a3cb 2601 u32 offset = stream->perf->ctx_oactxctrl_offset;
ccdeed49
UNR
2602 struct flex regs_context[] = {
2603 {
2604 GEN8_OACTXCONTROL,
a5c3a3cb 2605 offset + 1,
d7d50f80 2606 active ? GEN8_OA_COUNTER_RESUME : 0,
ccdeed49
UNR
2607 },
2608 };
2609 /* Offsets in regs_lri are not used since this configuration is only
2610 * applied using LRI. Initialize the correct offsets for posterity.
2611 */
2612#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
2613 struct flex regs_lri[] = {
2614 {
2615 GEN12_OAR_OACONTROL,
2616 GEN12_OAR_OACONTROL_OFFSET + 1,
2617 (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
d7d50f80 2618 (active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
ccdeed49
UNR
2619 },
2620 {
2621 RING_CONTEXT_CONTROL(ce->engine->mmio_base),
2622 CTX_CONTEXT_CONTROL,
2623 _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
d7d50f80 2624 active ?
ccdeed49
UNR
2625 GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
2626 0)
2627 },
2628 };
00a7f0d7 2629
a5c3a3cb 2630 /* Modify the context image of pinned context with regs_context */
ccdeed49
UNR
2631 err = intel_context_lock_pinned(ce);
2632 if (err)
2633 return err;
00a7f0d7 2634
a5c3a3cb
UNR
2635 err = gen8_modify_context(ce, regs_context,
2636 ARRAY_SIZE(regs_context));
ccdeed49
UNR
2637 intel_context_unlock_pinned(ce);
2638 if (err)
2639 return err;
00a7f0d7 2640
ccdeed49 2641 /* Apply regs_lri using LRI with pinned context */
d7d50f80 2642 return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
00a7f0d7
LL
2643}
2644
19f81df2
RB
2645/*
2646 * Manages updating the per-context aspects of the OA stream
2647 * configuration across all contexts.
2648 *
2649 * The awkward consideration here is that OACTXCONTROL controls the
2650 * exponent for periodic sampling which is primarily used for system
2651 * wide profiling where we'd like a consistent sampling period even in
2652 * the face of context switches.
2653 *
2654 * Our approach of updating the register state context (as opposed to
2655 * say using a workaround batch buffer) ensures that the hardware
2656 * won't automatically reload an out-of-date timer exponent even
2657 * transiently before a WA BB could be parsed.
2658 *
2659 * This function needs to:
2660 * - Ensure the currently running context's per-context OA state is
2661 * updated
2662 * - Ensure that all existing contexts will have the correct per-context
2663 * OA state if they are scheduled for use.
2664 * - Ensure any new contexts will be initialized with the correct
2665 * per-context OA state.
2666 *
2667 * Note: it's only the RCS/Render context that has any OA state.
ccdeed49 2668 * Note: the first flex register passed must always be R_PWR_CLK_STATE
19f81df2 2669 */
d7d50f80
CW
2670static int
2671oa_configure_all_contexts(struct i915_perf_stream *stream,
2672 struct flex *regs,
2673 size_t num_regs,
2674 struct i915_active *active)
19f81df2 2675{
8f8b1171 2676 struct drm_i915_private *i915 = stream->perf->i915;
a9877da2 2677 struct intel_engine_cs *engine;
9677a9f3 2678 struct intel_gt *gt = stream->engine->gt;
a4e7ccda 2679 struct i915_gem_context *ctx, *cn;
ccdeed49 2680 int err;
a9877da2 2681
9677a9f3 2682 lockdep_assert_held(&gt->perf.lock);
19f81df2 2683
19f81df2
RB
2684 /*
2685 * The OA register config is setup through the context image. This image
2686 * might be written to by the GPU on context switch (in particular on
2687 * lite-restore). This means we can't safely update a context's image,
2688 * if this context is scheduled/submitted to run on the GPU.
2689 *
2690 * We could emit the OA register config through the batch buffer but
2691 * this might leave small interval of time where the OA unit is
2692 * configured at an invalid sampling period.
2693 *
a9877da2
CW
2694 * Note that since we emit all requests from a single ring, there
2695 * is still an implicit global barrier here that may cause a high
2696 * priority context to wait for an otherwise independent low priority
2697 * context. Contexts idle at the time of reconfiguration are not
2698 * trapped behind the barrier.
19f81df2 2699 */
a4e7ccda
CW
2700 spin_lock(&i915->gem.contexts.lock);
2701 list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
a4e7ccda
CW
2702 if (!kref_get_unless_zero(&ctx->ref))
2703 continue;
2704
2705 spin_unlock(&i915->gem.contexts.lock);
2706
ccdeed49 2707 err = gen8_configure_context(ctx, regs, num_regs);
a4e7ccda
CW
2708 if (err) {
2709 i915_gem_context_put(ctx);
a9877da2 2710 return err;
a4e7ccda
CW
2711 }
2712
2713 spin_lock(&i915->gem.contexts.lock);
2714 list_safe_reset_next(ctx, cn, link);
2715 i915_gem_context_put(ctx);
19f81df2 2716 }
a4e7ccda 2717 spin_unlock(&i915->gem.contexts.lock);
19f81df2 2718
722f3de3 2719 /*
a9877da2
CW
2720 * After updating all other contexts, we need to modify ourselves.
2721 * If we don't modify the kernel_context, we do not get events while
2722 * idle.
722f3de3 2723 */
750e76b4 2724 for_each_uabi_engine(engine, i915) {
a9877da2 2725 struct intel_context *ce = engine->kernel_context;
722f3de3 2726
a9877da2
CW
2727 if (engine->class != RENDER_CLASS)
2728 continue;
2729
0b6613c6 2730 regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu);
a9877da2 2731
d7d50f80 2732 err = gen8_modify_self(ce, regs, num_regs, active);
a9877da2
CW
2733 if (err)
2734 return err;
2735 }
722f3de3
TU
2736
2737 return 0;
19f81df2
RB
2738}
2739
d7d50f80
CW
2740static int
2741gen12_configure_all_contexts(struct i915_perf_stream *stream,
2742 const struct i915_oa_config *oa_config,
2743 struct i915_active *active)
ccdeed49
UNR
2744{
2745 struct flex regs[] = {
2746 {
7d296f36 2747 GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE),
ccdeed49
UNR
2748 CTX_R_PWR_CLK_STATE,
2749 },
2750 };
2751
d7d50f80
CW
2752 return oa_configure_all_contexts(stream,
2753 regs, ARRAY_SIZE(regs),
2754 active);
ccdeed49
UNR
2755}
2756
d7d50f80
CW
2757static int
2758lrc_configure_all_contexts(struct i915_perf_stream *stream,
2759 const struct i915_oa_config *oa_config,
2760 struct i915_active *active)
ccdeed49 2761{
a5c3a3cb 2762 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
ccdeed49
UNR
2763 /* The MMIO offsets for Flex EU registers aren't contiguous */
2764 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2765#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
2766 struct flex regs[] = {
2767 {
7d296f36 2768 GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE),
ccdeed49
UNR
2769 CTX_R_PWR_CLK_STATE,
2770 },
2771 {
2772 GEN8_OACTXCONTROL,
a5c3a3cb 2773 ctx_oactxctrl + 1,
ccdeed49
UNR
2774 },
2775 { EU_PERF_CNTL0, ctx_flexeuN(0) },
2776 { EU_PERF_CNTL1, ctx_flexeuN(1) },
2777 { EU_PERF_CNTL2, ctx_flexeuN(2) },
2778 { EU_PERF_CNTL3, ctx_flexeuN(3) },
2779 { EU_PERF_CNTL4, ctx_flexeuN(4) },
2780 { EU_PERF_CNTL5, ctx_flexeuN(5) },
2781 { EU_PERF_CNTL6, ctx_flexeuN(6) },
2782 };
2783#undef ctx_flexeuN
2784 int i;
2785
2786 regs[1].value =
2787 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2788 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2789 GEN8_OA_COUNTER_RESUME;
2790
2791 for (i = 2; i < ARRAY_SIZE(regs); i++)
2792 regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
2793
d7d50f80
CW
2794 return oa_configure_all_contexts(stream,
2795 regs, ARRAY_SIZE(regs),
2796 active);
ccdeed49
UNR
2797}
2798
d7d50f80
CW
2799static int
2800gen8_enable_metric_set(struct i915_perf_stream *stream,
2801 struct i915_active *active)
19f81df2 2802{
52111c46 2803 struct intel_uncore *uncore = stream->uncore;
8814c6d0 2804 struct i915_oa_config *oa_config = stream->oa_config;
701f8231 2805 int ret;
19f81df2
RB
2806
2807 /*
2808 * We disable slice/unslice clock ratio change reports on SKL since
2809 * they are too noisy. The HW generates a lot of redundant reports
2810 * where the ratio hasn't really changed causing a lot of redundant
2811 * work to processes and increasing the chances we'll hit buffer
2812 * overruns.
2813 *
2814 * Although we don't currently use the 'disable overrun' OABUFFER
2815 * feature it's worth noting that clock ratio reports have to be
2816 * disabled before considering to use that feature since the HW doesn't
2817 * correctly block these reports.
2818 *
2819 * Currently none of the high-level metrics we have depend on knowing
2820 * this ratio to normalize.
2821 *
2822 * Note: This register is not power context saved and restored, but
2823 * that's OK considering that we disable RC6 while the OA unit is
2824 * enabled.
2825 *
2826 * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to
2827 * be read back from automatically triggered reports, as part of the
2828 * RPT_ID field.
2829 */
651e7d48 2830 if (IS_GRAPHICS_VER(stream->perf->i915, 9, 11)) {
8f8b1171
CW
2831 intel_uncore_write(uncore, GEN8_OA_DEBUG,
2832 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
2833 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
19f81df2
RB
2834 }
2835
2836 /*
2837 * Update all contexts prior writing the mux configurations as we need
2838 * to make sure all slices/subslices are ON before writing to NOA
2839 * registers.
2840 */
d7d50f80 2841 ret = lrc_configure_all_contexts(stream, oa_config, active);
00a7f0d7 2842 if (ret)
d7d50f80 2843 return ret;
00a7f0d7 2844
d7d50f80
CW
2845 return emit_oa_config(stream,
2846 stream->oa_config, oa_context(stream),
2847 active);
00a7f0d7
LL
2848}
2849
9278bbb6
CW
2850static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
2851{
2852 return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS,
2853 (stream->sample_flags & SAMPLE_OA_REPORT) ?
2854 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
2855}
2856
d7d50f80
CW
2857static int
2858gen12_enable_metric_set(struct i915_perf_stream *stream,
2859 struct i915_active *active)
00a7f0d7 2860{
cceb0849 2861 struct drm_i915_private *i915 = stream->perf->i915;
00a7f0d7
LL
2862 struct intel_uncore *uncore = stream->uncore;
2863 struct i915_oa_config *oa_config = stream->oa_config;
2864 bool periodic = stream->periodic;
2865 u32 period_exponent = stream->period_exponent;
cceb0849 2866 u32 sqcnt1;
00a7f0d7
LL
2867 int ret;
2868
ed6b25aa
UNR
2869 /*
2870 * Wa_1508761755:xehpsdv, dg2
2871 * EU NOA signals behave incorrectly if EU clock gating is enabled.
2872 * Disable thread stall DOP gating and EU DOP gating.
2873 */
2874 if (IS_XEHPSDV(i915) || IS_DG2(i915)) {
2875 intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN,
2876 _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
2877 intel_uncore_write(uncore, GEN7_ROW_CHICKEN2,
2878 _MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
2879 }
2880
00a7f0d7
LL
2881 intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
2882 /* Disable clk ratio reports, like previous Gens. */
2883 _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
2884 GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
2885 /*
9278bbb6
CW
2886 * If the user didn't require OA reports, instruct
2887 * the hardware not to emit ctx switch reports.
00a7f0d7 2888 */
9278bbb6 2889 oag_report_ctx_switches(stream));
00a7f0d7
LL
2890
2891 intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
2892 (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
2893 GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
2894 (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
2895 : 0);
2896
cceb0849
UNR
2897 /*
2898 * Initialize Super Queue Internal Cnt Register
2899 * Set PMON Enable in order to collect valid metrics.
2900 * Enable byets per clock reporting in OA for XEHPSDV onward.
2901 */
2902 sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
2903 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
2904
2905 intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
2906
00a7f0d7
LL
2907 /*
2908 * Update all contexts prior writing the mux configurations as we need
2909 * to make sure all slices/subslices are ON before writing to NOA
2910 * registers.
2911 */
d7d50f80 2912 ret = gen12_configure_all_contexts(stream, oa_config, active);
19f81df2 2913 if (ret)
d7d50f80 2914 return ret;
19f81df2 2915
00a7f0d7
LL
2916 /*
2917 * For Gen12, performance counters are context
2918 * saved/restored. Only enable it for the context that
2919 * requested this.
2920 */
2921 if (stream->ctx) {
d7d50f80 2922 ret = gen12_configure_oar_context(stream, active);
00a7f0d7 2923 if (ret)
d7d50f80 2924 return ret;
00a7f0d7
LL
2925 }
2926
d7d50f80
CW
2927 return emit_oa_config(stream,
2928 stream->oa_config, oa_context(stream),
2929 active);
19f81df2
RB
2930}
2931
a37f08a8 2932static void gen8_disable_metric_set(struct i915_perf_stream *stream)
19f81df2 2933{
52111c46 2934 struct intel_uncore *uncore = stream->uncore;
a37f08a8 2935
19f81df2 2936 /* Reset all contexts' slices/subslices configurations. */
d7d50f80 2937 lrc_configure_all_contexts(stream, NULL, NULL);
28964cf2 2938
8f8b1171 2939 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
19f81df2
RB
2940}
2941
5dae69a9 2942static void gen11_disable_metric_set(struct i915_perf_stream *stream)
95690a02 2943{
52111c46 2944 struct intel_uncore *uncore = stream->uncore;
a37f08a8 2945
95690a02 2946 /* Reset all contexts' slices/subslices configurations. */
d7d50f80 2947 lrc_configure_all_contexts(stream, NULL, NULL);
00a7f0d7
LL
2948
2949 /* Make sure we disable noa to save power. */
2950 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
2951}
2952
2953static void gen12_disable_metric_set(struct i915_perf_stream *stream)
2954{
2955 struct intel_uncore *uncore = stream->uncore;
cceb0849
UNR
2956 struct drm_i915_private *i915 = stream->perf->i915;
2957 u32 sqcnt1;
00a7f0d7 2958
ed6b25aa
UNR
2959 /*
2960 * Wa_1508761755:xehpsdv, dg2
2961 * Enable thread stall DOP gating and EU DOP gating.
2962 */
2963 if (IS_XEHPSDV(i915) || IS_DG2(i915)) {
2964 intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN,
2965 _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE));
2966 intel_uncore_write(uncore, GEN7_ROW_CHICKEN2,
2967 _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING));
2968 }
2969
00a7f0d7 2970 /* Reset all contexts' slices/subslices configurations. */
d7d50f80 2971 gen12_configure_all_contexts(stream, NULL, NULL);
00a7f0d7
LL
2972
2973 /* disable the context save/restore or OAR counters */
2974 if (stream->ctx)
d7d50f80 2975 gen12_configure_oar_context(stream, NULL);
95690a02
LL
2976
2977 /* Make sure we disable noa to save power. */
8f8b1171 2978 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
cceb0849
UNR
2979
2980 sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
2981 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
2982
2983 /* Reset PMON Enable to save power. */
2984 intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
95690a02
LL
2985}
2986
5728de2f 2987static void gen7_oa_enable(struct i915_perf_stream *stream)
d7965152 2988{
52111c46 2989 struct intel_uncore *uncore = stream->uncore;
5728de2f 2990 struct i915_gem_context *ctx = stream->ctx;
a37f08a8
UNR
2991 u32 ctx_id = stream->specific_ctx_id;
2992 bool periodic = stream->periodic;
2993 u32 period_exponent = stream->period_exponent;
90981da6 2994 u32 report_format = stream->oa_buffer.format->format;
11051303 2995
1bef3409
RB
2996 /*
2997 * Reset buf pointers so we don't forward reports from before now.
2998 *
2999 * Think carefully if considering trying to avoid this, since it
3000 * also ensures status flags and the buffer itself are cleared
3001 * in error paths, and we have checks for invalid reports based
3002 * on the assumption that certain fields are written to zeroed
3003 * memory which this helps maintains.
3004 */
a37f08a8 3005 gen7_init_oa_buffer(stream);
d7965152 3006
8f8b1171
CW
3007 intel_uncore_write(uncore, GEN7_OACONTROL,
3008 (ctx_id & GEN7_OACONTROL_CTX_MASK) |
3009 (period_exponent <<
3010 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
3011 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
3012 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
3013 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
3014 GEN7_OACONTROL_ENABLE);
d7965152
RB
3015}
3016
5728de2f 3017static void gen8_oa_enable(struct i915_perf_stream *stream)
19f81df2 3018{
52111c46 3019 struct intel_uncore *uncore = stream->uncore;
90981da6 3020 u32 report_format = stream->oa_buffer.format->format;
19f81df2
RB
3021
3022 /*
3023 * Reset buf pointers so we don't forward reports from before now.
3024 *
3025 * Think carefully if considering trying to avoid this, since it
3026 * also ensures status flags and the buffer itself are cleared
3027 * in error paths, and we have checks for invalid reports based
3028 * on the assumption that certain fields are written to zeroed
3029 * memory which this helps maintains.
3030 */
a37f08a8 3031 gen8_init_oa_buffer(stream);
19f81df2
RB
3032
3033 /*
3034 * Note: we don't rely on the hardware to perform single context
3035 * filtering and instead filter on the cpu based on the context-id
3036 * field of reports
3037 */
8f8b1171
CW
3038 intel_uncore_write(uncore, GEN8_OACONTROL,
3039 (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
3040 GEN8_OA_COUNTER_ENABLE);
19f81df2
RB
3041}
3042
00a7f0d7
LL
3043static void gen12_oa_enable(struct i915_perf_stream *stream)
3044{
3045 struct intel_uncore *uncore = stream->uncore;
90981da6 3046 u32 report_format = stream->oa_buffer.format->format;
00a7f0d7
LL
3047
3048 /*
3049 * If we don't want OA reports from the OA buffer, then we don't even
3050 * need to program the OAG unit.
3051 */
3052 if (!(stream->sample_flags & SAMPLE_OA_REPORT))
3053 return;
3054
3055 gen12_init_oa_buffer(stream);
3056
3057 intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
3058 (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
3059 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
3060}
3061
16d98b31
RB
3062/**
3063 * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
3064 * @stream: An i915 perf stream opened for OA metrics
3065 *
3066 * [Re]enables hardware periodic sampling according to the period configured
3067 * when opening the stream. This also starts a hrtimer that will periodically
3068 * check for data in the circular OA buffer for notifying userspace (e.g.
3069 * during a read() or poll()).
3070 */
d7965152
RB
3071static void i915_oa_stream_enable(struct i915_perf_stream *stream)
3072{
c51dbc6e
LL
3073 stream->pollin = false;
3074
8f8b1171 3075 stream->perf->ops.oa_enable(stream);
d7965152 3076
be0bdd67 3077 if (stream->sample_flags & SAMPLE_OA_REPORT)
a37f08a8 3078 hrtimer_start(&stream->poll_check_timer,
4ef10fe0 3079 ns_to_ktime(stream->poll_oa_period),
d7965152
RB
3080 HRTIMER_MODE_REL_PINNED);
3081}
3082
5728de2f 3083static void gen7_oa_disable(struct i915_perf_stream *stream)
d7965152 3084{
52111c46 3085 struct intel_uncore *uncore = stream->uncore;
5728de2f 3086
97a04e0d
DCS
3087 intel_uncore_write(uncore, GEN7_OACONTROL, 0);
3088 if (intel_wait_for_register(uncore,
e896d29a
CW
3089 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
3090 50))
0bf85735
WK
3091 drm_err(&stream->perf->i915->drm,
3092 "wait for OA to be disabled timed out\n");
d7965152
RB
3093}
3094
5728de2f 3095static void gen8_oa_disable(struct i915_perf_stream *stream)
19f81df2 3096{
52111c46 3097 struct intel_uncore *uncore = stream->uncore;
5728de2f 3098
97a04e0d
DCS
3099 intel_uncore_write(uncore, GEN8_OACONTROL, 0);
3100 if (intel_wait_for_register(uncore,
e896d29a
CW
3101 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
3102 50))
0bf85735
WK
3103 drm_err(&stream->perf->i915->drm,
3104 "wait for OA to be disabled timed out\n");
19f81df2
RB
3105}
3106
00a7f0d7
LL
3107static void gen12_oa_disable(struct i915_perf_stream *stream)
3108{
3109 struct intel_uncore *uncore = stream->uncore;
3110
3111 intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
3112 if (intel_wait_for_register(uncore,
3113 GEN12_OAG_OACONTROL,
3114 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
3115 50))
0bf85735
WK
3116 drm_err(&stream->perf->i915->drm,
3117 "wait for OA to be disabled timed out\n");
c06aa1b4
UNR
3118
3119 intel_uncore_write(uncore, GEN12_OA_TLB_INV_CR, 1);
3120 if (intel_wait_for_register(uncore,
3121 GEN12_OA_TLB_INV_CR,
3122 1, 0,
3123 50))
3124 drm_err(&stream->perf->i915->drm,
3125 "wait for OA tlb invalidate timed out\n");
00a7f0d7
LL
3126}
3127
16d98b31
RB
3128/**
3129 * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
3130 * @stream: An i915 perf stream opened for OA metrics
3131 *
3132 * Stops the OA unit from periodically writing counter reports into the
3133 * circular OA buffer. This also stops the hrtimer that periodically checks for
3134 * data in the circular OA buffer, for notifying userspace.
3135 */
d7965152
RB
3136static void i915_oa_stream_disable(struct i915_perf_stream *stream)
3137{
8f8b1171 3138 stream->perf->ops.oa_disable(stream);
d7965152 3139
be0bdd67 3140 if (stream->sample_flags & SAMPLE_OA_REPORT)
a37f08a8 3141 hrtimer_cancel(&stream->poll_check_timer);
d7965152
RB
3142}
3143
d7965152
RB
3144static const struct i915_perf_stream_ops i915_oa_stream_ops = {
3145 .destroy = i915_oa_stream_destroy,
3146 .enable = i915_oa_stream_enable,
3147 .disable = i915_oa_stream_disable,
3148 .wait_unlocked = i915_oa_wait_unlocked,
3149 .poll_wait = i915_oa_poll_wait,
3150 .read = i915_oa_read,
eec688e1
RB
3151};
3152
4b4e973d
CW
3153static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
3154{
d7d50f80
CW
3155 struct i915_active *active;
3156 int err;
4b4e973d 3157
d7d50f80
CW
3158 active = i915_active_create();
3159 if (!active)
3160 return -ENOMEM;
4b4e973d 3161
d7d50f80
CW
3162 err = stream->perf->ops.enable_metric_set(stream, active);
3163 if (err == 0)
3164 __i915_active_wait(active, TASK_UNINTERRUPTIBLE);
4b4e973d 3165
d7d50f80
CW
3166 i915_active_put(active);
3167 return err;
4b4e973d
CW
3168}
3169
11ecbddd
LL
3170static void
3171get_default_sseu_config(struct intel_sseu *out_sseu,
3172 struct intel_engine_cs *engine)
3173{
0b6613c6 3174 const struct sseu_dev_info *devinfo_sseu = &engine->gt->info.sseu;
11ecbddd
LL
3175
3176 *out_sseu = intel_sseu_from_device_info(devinfo_sseu);
3177
651e7d48 3178 if (GRAPHICS_VER(engine->i915) == 11) {
11ecbddd
LL
3179 /*
3180 * We only need subslice count so it doesn't matter which ones
3181 * we select - just turn off low bits in the amount of half of
3182 * all available subslices per slice.
3183 */
3184 out_sseu->subslice_mask =
3185 ~(~0 << (hweight8(out_sseu->subslice_mask) / 2));
3186 out_sseu->slice_mask = 0x1;
3187 }
3188}
3189
3190static int
3191get_sseu_config(struct intel_sseu *out_sseu,
3192 struct intel_engine_cs *engine,
3193 const struct drm_i915_gem_context_param_sseu *drm_sseu)
3194{
3195 if (drm_sseu->engine.engine_class != engine->uabi_class ||
3196 drm_sseu->engine.engine_instance != engine->uabi_instance)
3197 return -EINVAL;
3198
0b6613c6 3199 return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu);
4b4e973d
CW
3200}
3201
bc7ed4d3
UNR
3202/*
3203 * OA timestamp frequency = CS timestamp frequency in most platforms. On some
3204 * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such
3205 * cases, return the adjusted CS timestamp frequency to the user.
3206 */
3207u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
3208{
a6b44302
UNR
3209 /*
3210 * Wa_18013179988:dg2
3211 * Wa_14015846243:mtl
3212 */
3213 if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
bc7ed4d3
UNR
3214 intel_wakeref_t wakeref;
3215 u32 reg, shift;
3216
3217 with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref)
3218 reg = intel_uncore_read(to_gt(i915)->uncore, RPM_CONFIG0);
3219
3220 shift = REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK,
3221 reg);
3222
3223 return to_gt(i915)->clock_frequency << (3 - shift);
3224 }
3225
3226 return to_gt(i915)->clock_frequency;
3227}
3228
16d98b31
RB
3229/**
3230 * i915_oa_stream_init - validate combined props for OA stream and init
3231 * @stream: An i915 perf stream
3232 * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
3233 * @props: The property state that configures stream (individually validated)
3234 *
3235 * While read_properties_unlocked() validates properties in isolation it
3236 * doesn't ensure that the combination necessarily makes sense.
3237 *
3238 * At this point it has been determined that userspace wants a stream of
3239 * OA metrics, but still we need to further validate the combined
3240 * properties are OK.
3241 *
3242 * If the configuration makes sense then we can allocate memory for
3243 * a circular OA buffer and apply the requested metric set configuration.
3244 *
3245 * Returns: zero on success or a negative error code.
3246 */
d7965152
RB
3247static int i915_oa_stream_init(struct i915_perf_stream *stream,
3248 struct drm_i915_perf_open_param *param,
3249 struct perf_open_properties *props)
3250{
a9f236d1 3251 struct drm_i915_private *i915 = stream->perf->i915;
8f8b1171 3252 struct i915_perf *perf = stream->perf;
5f284e9c 3253 struct i915_perf_group *g;
9677a9f3 3254 struct intel_gt *gt;
d7965152
RB
3255 int ret;
3256
9a61363a 3257 if (!props->engine) {
2fec5391
UNR
3258 drm_dbg(&stream->perf->i915->drm,
3259 "OA engine not specified\n");
9a61363a
LL
3260 return -EINVAL;
3261 }
9677a9f3 3262 gt = props->engine->gt;
5f284e9c 3263 g = props->engine->oa_group;
9a61363a
LL
3264
3265 /*
3266 * If the sysfs metrics/ directory wasn't registered for some
442b8c06
RB
3267 * reason then don't let userspace try their luck with config
3268 * IDs
3269 */
8f8b1171 3270 if (!perf->metrics_kobj) {
2fec5391
UNR
3271 drm_dbg(&stream->perf->i915->drm,
3272 "OA metrics weren't advertised via sysfs\n");
442b8c06
RB
3273 return -EINVAL;
3274 }
3275
322d56aa 3276 if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
651e7d48 3277 (GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) {
2fec5391
UNR
3278 drm_dbg(&stream->perf->i915->drm,
3279 "Only OA report sampling supported\n");
d7965152
RB
3280 return -EINVAL;
3281 }
3282
8f8b1171 3283 if (!perf->ops.enable_metric_set) {
2fec5391
UNR
3284 drm_dbg(&stream->perf->i915->drm,
3285 "OA unit not supported\n");
d7965152
RB
3286 return -ENODEV;
3287 }
3288
9a61363a
LL
3289 /*
3290 * To avoid the complexity of having to accurately filter
d7965152
RB
3291 * counter reports and marshal to the appropriate client
3292 * we currently only allow exclusive access
3293 */
5f284e9c 3294 if (g->exclusive_stream) {
2fec5391
UNR
3295 drm_dbg(&stream->perf->i915->drm,
3296 "OA unit already in use\n");
d7965152
RB
3297 return -EBUSY;
3298 }
3299
d7965152 3300 if (!props->oa_format) {
2fec5391
UNR
3301 drm_dbg(&stream->perf->i915->drm,
3302 "OA report format not specified\n");
d7965152
RB
3303 return -EINVAL;
3304 }
3305
9a61363a 3306 stream->engine = props->engine;
52111c46 3307 stream->uncore = stream->engine->gt->uncore;
9a61363a 3308
d7965152
RB
3309 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
3310
90981da6
UNR
3311 stream->oa_buffer.format = &perf->oa_formats[props->oa_format];
3312 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0))
3313 return -EINVAL;
d7965152 3314
322d56aa 3315 stream->sample_flags = props->sample_flags;
90981da6 3316 stream->sample_size += stream->oa_buffer.format->size;
d7965152 3317
9cd20ef7
LL
3318 stream->hold_preemption = props->hold_preemption;
3319
a37f08a8
UNR
3320 stream->periodic = props->oa_periodic;
3321 if (stream->periodic)
3322 stream->period_exponent = props->oa_period_exponent;
d7965152 3323
d7965152
RB
3324 if (stream->ctx) {
3325 ret = oa_get_render_ctx_id(stream);
9bd9be66 3326 if (ret) {
2fec5391
UNR
3327 drm_dbg(&stream->perf->i915->drm,
3328 "Invalid context id to filter with\n");
d7965152 3329 return ret;
9bd9be66 3330 }
d7965152
RB
3331 }
3332
daed3e44
LL
3333 ret = alloc_noa_wait(stream);
3334 if (ret) {
2fec5391
UNR
3335 drm_dbg(&stream->perf->i915->drm,
3336 "Unable to allocate NOA wait batch buffer\n");
daed3e44
LL
3337 goto err_noa_wait_alloc;
3338 }
3339
6a45008a
LL
3340 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
3341 if (!stream->oa_config) {
2fec5391
UNR
3342 drm_dbg(&stream->perf->i915->drm,
3343 "Invalid OA config id=%i\n", props->metrics_set);
6a45008a 3344 ret = -EINVAL;
f89823c2 3345 goto err_config;
9bd9be66 3346 }
701f8231 3347
d7965152
RB
3348 /* PRM - observability performance counters:
3349 *
3350 * OACONTROL, performance counter enable, note:
3351 *
3352 * "When this bit is set, in order to have coherent counts,
3353 * RC6 power state and trunk clock gating must be disabled.
3354 * This can be achieved by programming MMIO registers as
3355 * 0xA094=0 and 0xA090[31]=1"
3356 *
3357 * In our case we are expecting that taking pm + FORCEWAKE
3358 * references will effectively disable RC6.
3359 */
a5efcde6 3360 intel_engine_pm_get(stream->engine);
52111c46 3361 intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
d7965152 3362
01e74274
VB
3363 /*
3364 * Wa_16011777198:dg2: GuC resets render as part of the Wa. This causes
3365 * OA to lose the configuration state. Prevent this by overriding GUCRC
3366 * mode.
3367 */
3368 if (intel_uc_uses_guc_rc(&gt->uc) &&
3369 (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
3370 IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))) {
3371 ret = intel_guc_slpc_override_gucrc_mode(&gt->uc.guc.slpc,
3372 SLPC_GUCRC_MODE_GUCRC_NO_RC6);
3373 if (ret) {
3374 drm_dbg(&stream->perf->i915->drm,
3375 "Unable to override gucrc mode\n");
2810ac6c 3376 goto err_gucrc;
01e74274 3377 }
2810ac6c
CW
3378
3379 stream->override_gucrc = true;
01e74274
VB
3380 }
3381
a37f08a8 3382 ret = alloc_oa_buffer(stream);
987f8c44 3383 if (ret)
3384 goto err_oa_buf_alloc;
3385
ec431eae 3386 stream->ops = &i915_oa_stream_ops;
11ecbddd 3387
9677a9f3 3388 stream->engine->gt->perf.sseu = props->sseu;
5f284e9c 3389 WRITE_ONCE(g->exclusive_stream, stream);
ec431eae 3390
4b4e973d 3391 ret = i915_perf_stream_enable_sync(stream);
9bd9be66 3392 if (ret) {
2fec5391
UNR
3393 drm_dbg(&stream->perf->i915->drm,
3394 "Unable to enable metric set\n");
d7965152 3395 goto err_enable;
9bd9be66 3396 }
d7965152 3397
2fec5391
UNR
3398 drm_dbg(&stream->perf->i915->drm,
3399 "opening stream oa config uuid=%s\n",
6a45008a
LL
3400 stream->oa_config->uuid);
3401
a37f08a8
UNR
3402 hrtimer_init(&stream->poll_check_timer,
3403 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3404 stream->poll_check_timer.function = oa_poll_check_timer_cb;
3405 init_waitqueue_head(&stream->poll_wq);
3406 spin_lock_init(&stream->oa_buffer.ptr_lock);
2db609c0 3407 mutex_init(&stream->lock);
a37f08a8 3408
d7965152
RB
3409 return 0;
3410
41d3fdcd 3411err_enable:
5f284e9c 3412 WRITE_ONCE(g->exclusive_stream, NULL);
8f8b1171 3413 perf->ops.disable_metric_set(stream);
701f8231 3414
a37f08a8 3415 free_oa_buffer(stream);
d7965152
RB
3416
3417err_oa_buf_alloc:
2810ac6c
CW
3418 if (stream->override_gucrc)
3419 intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc);
f89823c2 3420
2810ac6c 3421err_gucrc:
52111c46 3422 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
a5efcde6 3423 intel_engine_pm_put(stream->engine);
f89823c2 3424
2810ac6c
CW
3425 free_oa_configs(stream);
3426
f89823c2 3427err_config:
daed3e44
LL
3428 free_noa_wait(stream);
3429
3430err_noa_wait_alloc:
d7965152
RB
3431 if (stream->ctx)
3432 oa_put_render_ctx_id(stream);
3433
3434 return ret;
3435}
3436
7dc56af5
CW
3437void i915_oa_init_reg_state(const struct intel_context *ce,
3438 const struct intel_engine_cs *engine)
19f81df2 3439{
28b6cb08 3440 struct i915_perf_stream *stream;
19f81df2 3441
8a68d464 3442 if (engine->class != RENDER_CLASS)
19f81df2
RB
3443 return;
3444
a5af081d 3445 /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
5f284e9c 3446 stream = READ_ONCE(engine->oa_group->exclusive_stream);
651e7d48 3447 if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
7dc56af5 3448 gen8_update_reg_state_unlocked(ce, stream);
19f81df2
RB
3449}
3450
16d98b31
RB
3451/**
3452 * i915_perf_read - handles read() FOP for i915 perf stream FDs
3453 * @file: An i915 perf stream file
3454 * @buf: destination buffer given by userspace
3455 * @count: the number of bytes userspace wants to read
3456 * @ppos: (inout) file seek position (unused)
3457 *
3458 * The entry point for handling a read() on a stream file descriptor from
3459 * userspace. Most of the work is left to the i915_perf_read_locked() and
3460 * &i915_perf_stream_ops->read but to save having stream implementations (of
3461 * which we might have multiple later) we handle blocking read here.
3462 *
3463 * We can also consistently treat trying to read from a disabled stream
3464 * as an IO error so implementations can assume the stream is enabled
3465 * while reading.
3466 *
3467 * Returns: The number of bytes copied or a negative error code on failure.
3468 */
eec688e1
RB
3469static ssize_t i915_perf_read(struct file *file,
3470 char __user *buf,
3471 size_t count,
3472 loff_t *ppos)
3473{
3474 struct i915_perf_stream *stream = file->private_data;
bcad588d
AD
3475 size_t offset = 0;
3476 int ret;
eec688e1 3477
d7965152
RB
3478 /* To ensure it's handled consistently we simply treat all reads of a
3479 * disabled stream as an error. In particular it might otherwise lead
3480 * to a deadlock for blocking file descriptors...
3481 */
be0bdd67 3482 if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT))
d7965152
RB
3483 return -EIO;
3484
eec688e1 3485 if (!(file->f_flags & O_NONBLOCK)) {
d7965152
RB
3486 /* There's the small chance of false positives from
3487 * stream->ops->wait_unlocked.
3488 *
3489 * E.g. with single context filtering since we only wait until
3490 * oabuffer has >= 1 report we don't immediately know whether
3491 * any reports really belong to the current context
eec688e1
RB
3492 */
3493 do {
3494 ret = stream->ops->wait_unlocked(stream);
3495 if (ret)
3496 return ret;
3497
2db609c0 3498 mutex_lock(&stream->lock);
bcad588d 3499 ret = stream->ops->read(stream, buf, count, &offset);
2db609c0 3500 mutex_unlock(&stream->lock);
bcad588d 3501 } while (!offset && !ret);
eec688e1 3502 } else {
2db609c0 3503 mutex_lock(&stream->lock);
bcad588d 3504 ret = stream->ops->read(stream, buf, count, &offset);
2db609c0 3505 mutex_unlock(&stream->lock);
eec688e1
RB
3506 }
3507
a9a08845 3508 /* We allow the poll checking to sometimes report false positive EPOLLIN
26ebd9c7
RB
3509 * events where we might actually report EAGAIN on read() if there's
3510 * not really any data available. In this situation though we don't
a9a08845 3511 * want to enter a busy loop between poll() reporting a EPOLLIN event
26ebd9c7
RB
3512 * and read() returning -EAGAIN. Clearing the oa.pollin state here
3513 * effectively ensures we back off until the next hrtimer callback
a9a08845 3514 * before reporting another EPOLLIN event.
bcad588d
AD
3515 * The exception to this is if ops->read() returned -ENOSPC which means
3516 * that more OA data is available than could fit in the user provided
3517 * buffer. In this case we want the next poll() call to not block.
26ebd9c7 3518 */
bcad588d 3519 if (ret != -ENOSPC)
a37f08a8 3520 stream->pollin = false;
d7965152 3521
bcad588d
AD
3522 /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */
3523 return offset ?: (ret ?: -EAGAIN);
eec688e1
RB
3524}
3525
d7965152
RB
3526static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
3527{
a37f08a8
UNR
3528 struct i915_perf_stream *stream =
3529 container_of(hrtimer, typeof(*stream), poll_check_timer);
d7965152 3530
a37f08a8
UNR
3531 if (oa_buffer_check_unlocked(stream)) {
3532 stream->pollin = true;
3533 wake_up(&stream->poll_wq);
d7965152
RB
3534 }
3535
4ef10fe0
LL
3536 hrtimer_forward_now(hrtimer,
3537 ns_to_ktime(stream->poll_oa_period));
d7965152
RB
3538
3539 return HRTIMER_RESTART;
3540}
3541
16d98b31
RB
3542/**
3543 * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
16d98b31
RB
3544 * @stream: An i915 perf stream
3545 * @file: An i915 perf stream file
3546 * @wait: poll() state table
3547 *
3548 * For handling userspace polling on an i915 perf stream, this calls through to
3549 * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
3550 * will be woken for new stream data.
3551 *
16d98b31
RB
3552 * Returns: any poll events that are ready without sleeping
3553 */
8f8b1171
CW
3554static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
3555 struct file *file,
3556 poll_table *wait)
eec688e1 3557{
afc9a42b 3558 __poll_t events = 0;
eec688e1
RB
3559
3560 stream->ops->poll_wait(stream, file, wait);
3561
d7965152
RB
3562 /* Note: we don't explicitly check whether there's something to read
3563 * here since this path may be very hot depending on what else
3564 * userspace is polling, or on the timeout in use. We rely solely on
3565 * the hrtimer/oa_poll_check_timer_cb to notify us when there are
3566 * samples to read.
3567 */
a37f08a8 3568 if (stream->pollin)
a9a08845 3569 events |= EPOLLIN;
eec688e1 3570
d7965152 3571 return events;
eec688e1
RB
3572}
3573
16d98b31
RB
3574/**
3575 * i915_perf_poll - call poll_wait() with a suitable wait queue for stream
3576 * @file: An i915 perf stream file
3577 * @wait: poll() state table
3578 *
3579 * For handling userspace polling on an i915 perf stream, this ensures
3580 * poll_wait() gets called with a wait queue that will be woken for new stream
3581 * data.
3582 *
3583 * Note: Implementation deferred to i915_perf_poll_locked()
3584 *
3585 * Returns: any poll events that are ready without sleeping
3586 */
afc9a42b 3587static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
eec688e1
RB
3588{
3589 struct i915_perf_stream *stream = file->private_data;
afc9a42b 3590 __poll_t ret;
eec688e1 3591
2db609c0 3592 mutex_lock(&stream->lock);
8f8b1171 3593 ret = i915_perf_poll_locked(stream, file, wait);
2db609c0 3594 mutex_unlock(&stream->lock);
eec688e1
RB
3595
3596 return ret;
3597}
3598
16d98b31
RB
3599/**
3600 * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
3601 * @stream: A disabled i915 perf stream
3602 *
3603 * [Re]enables the associated capture of data for this stream.
3604 *
3605 * If a stream was previously enabled then there's currently no intention
3606 * to provide userspace any guarantee about the preservation of previously
3607 * buffered data.
3608 */
eec688e1
RB
3609static void i915_perf_enable_locked(struct i915_perf_stream *stream)
3610{
3611 if (stream->enabled)
3612 return;
3613
3614 /* Allow stream->ops->enable() to refer to this */
3615 stream->enabled = true;
3616
3617 if (stream->ops->enable)
3618 stream->ops->enable(stream);
9cd20ef7
LL
3619
3620 if (stream->hold_preemption)
9f3ccd40 3621 intel_context_set_nopreempt(stream->pinned_ctx);
eec688e1
RB
3622}
3623
16d98b31
RB
3624/**
3625 * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl
3626 * @stream: An enabled i915 perf stream
3627 *
3628 * Disables the associated capture of data for this stream.
3629 *
3630 * The intention is that disabling an re-enabling a stream will ideally be
3631 * cheaper than destroying and re-opening a stream with the same configuration,
3632 * though there are no formal guarantees about what state or buffered data
3633 * must be retained between disabling and re-enabling a stream.
3634 *
3635 * Note: while a stream is disabled it's considered an error for userspace
3636 * to attempt to read from the stream (-EIO).
3637 */
eec688e1
RB
3638static void i915_perf_disable_locked(struct i915_perf_stream *stream)
3639{
3640 if (!stream->enabled)
3641 return;
3642
3643 /* Allow stream->ops->disable() to refer to this */
3644 stream->enabled = false;
3645
9cd20ef7 3646 if (stream->hold_preemption)
9f3ccd40 3647 intel_context_clear_nopreempt(stream->pinned_ctx);
9cd20ef7 3648
eec688e1
RB
3649 if (stream->ops->disable)
3650 stream->ops->disable(stream);
3651}
3652
7831e9a9
CW
3653static long i915_perf_config_locked(struct i915_perf_stream *stream,
3654 unsigned long metrics_set)
3655{
3656 struct i915_oa_config *config;
3657 long ret = stream->oa_config->id;
3658
3659 config = i915_perf_get_oa_config(stream->perf, metrics_set);
3660 if (!config)
3661 return -EINVAL;
3662
3663 if (config != stream->oa_config) {
d7d50f80 3664 int err;
7831e9a9
CW
3665
3666 /*
3667 * If OA is bound to a specific context, emit the
3668 * reconfiguration inline from that context. The update
3669 * will then be ordered with respect to submission on that
3670 * context.
3671 *
3672 * When set globally, we use a low priority kernel context,
3673 * so it will effectively take effect when idle.
3674 */
d7d50f80
CW
3675 err = emit_oa_config(stream, config, oa_context(stream), NULL);
3676 if (!err)
7831e9a9 3677 config = xchg(&stream->oa_config, config);
d7d50f80
CW
3678 else
3679 ret = err;
7831e9a9
CW
3680 }
3681
3682 i915_oa_config_put(config);
3683
3684 return ret;
3685}
3686
16d98b31 3687/**
e9d2871f 3688 * i915_perf_ioctl_locked - support ioctl() usage with i915 perf stream FDs
16d98b31
RB
3689 * @stream: An i915 perf stream
3690 * @cmd: the ioctl request
3691 * @arg: the ioctl data
3692 *
16d98b31
RB
3693 * Returns: zero on success or a negative error code. Returns -EINVAL for
3694 * an unknown ioctl request.
3695 */
eec688e1
RB
3696static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
3697 unsigned int cmd,
3698 unsigned long arg)
3699{
3700 switch (cmd) {
3701 case I915_PERF_IOCTL_ENABLE:
3702 i915_perf_enable_locked(stream);
3703 return 0;
3704 case I915_PERF_IOCTL_DISABLE:
3705 i915_perf_disable_locked(stream);
3706 return 0;
7831e9a9
CW
3707 case I915_PERF_IOCTL_CONFIG:
3708 return i915_perf_config_locked(stream, arg);
eec688e1
RB
3709 }
3710
3711 return -EINVAL;
3712}
3713
16d98b31
RB
3714/**
3715 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3716 * @file: An i915 perf stream file
3717 * @cmd: the ioctl request
3718 * @arg: the ioctl data
3719 *
3720 * Implementation deferred to i915_perf_ioctl_locked().
3721 *
3722 * Returns: zero on success or a negative error code. Returns -EINVAL for
3723 * an unknown ioctl request.
3724 */
eec688e1
RB
3725static long i915_perf_ioctl(struct file *file,
3726 unsigned int cmd,
3727 unsigned long arg)
3728{
3729 struct i915_perf_stream *stream = file->private_data;
eec688e1
RB
3730 long ret;
3731
2db609c0 3732 mutex_lock(&stream->lock);
eec688e1 3733 ret = i915_perf_ioctl_locked(stream, cmd, arg);
2db609c0 3734 mutex_unlock(&stream->lock);
eec688e1
RB
3735
3736 return ret;
3737}
3738
16d98b31
RB
3739/**
3740 * i915_perf_destroy_locked - destroy an i915 perf stream
3741 * @stream: An i915 perf stream
3742 *
3743 * Frees all resources associated with the given i915 perf @stream, disabling
3744 * any associated data capture in the process.
3745 *
9677a9f3 3746 * Note: The &gt->perf.lock mutex has been taken to serialize
16d98b31
RB
3747 * with any non-file-operation driver hooks.
3748 */
eec688e1
RB
3749static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
3750{
eec688e1
RB
3751 if (stream->enabled)
3752 i915_perf_disable_locked(stream);
3753
3754 if (stream->ops->destroy)
3755 stream->ops->destroy(stream);
3756
69df05e1 3757 if (stream->ctx)
5f09a9c8 3758 i915_gem_context_put(stream->ctx);
eec688e1
RB
3759
3760 kfree(stream);
3761}
3762
16d98b31
RB
3763/**
3764 * i915_perf_release - handles userspace close() of a stream file
3765 * @inode: anonymous inode associated with file
3766 * @file: An i915 perf stream file
3767 *
3768 * Cleans up any resources associated with an open i915 perf stream file.
3769 *
3770 * NB: close() can't really fail from the userspace point of view.
3771 *
3772 * Returns: zero on success or a negative error code.
3773 */
eec688e1
RB
3774static int i915_perf_release(struct inode *inode, struct file *file)
3775{
3776 struct i915_perf_stream *stream = file->private_data;
8f8b1171 3777 struct i915_perf *perf = stream->perf;
9677a9f3 3778 struct intel_gt *gt = stream->engine->gt;
eec688e1 3779
2db609c0
UNR
3780 /*
3781 * Within this call, we know that the fd is being closed and we have no
3782 * other user of stream->lock. Use the perf lock to destroy the stream
3783 * here.
3784 */
9677a9f3 3785 mutex_lock(&gt->perf.lock);
eec688e1 3786 i915_perf_destroy_locked(stream);
9677a9f3 3787 mutex_unlock(&gt->perf.lock);
eec688e1 3788
a5af1df7 3789 /* Release the reference the perf stream kept on the driver. */
8f8b1171 3790 drm_dev_put(&perf->i915->drm);
a5af1df7 3791
eec688e1
RB
3792 return 0;
3793}
3794
3795
3796static const struct file_operations fops = {
3797 .owner = THIS_MODULE,
3798 .llseek = no_llseek,
3799 .release = i915_perf_release,
3800 .poll = i915_perf_poll,
3801 .read = i915_perf_read,
3802 .unlocked_ioctl = i915_perf_ioctl,
191f8960
LL
3803 /* Our ioctl have no arguments, so it's safe to use the same function
3804 * to handle 32bits compatibility.
3805 */
3806 .compat_ioctl = i915_perf_ioctl,
eec688e1
RB
3807};
3808
3809
16d98b31
RB
3810/**
3811 * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
8f8b1171 3812 * @perf: i915 perf instance
16d98b31
RB
3813 * @param: The open parameters passed to 'DRM_I915_PERF_OPEN`
3814 * @props: individually validated u64 property value pairs
3815 * @file: drm file
3816 *
3817 * See i915_perf_ioctl_open() for interface details.
3818 *
3819 * Implements further stream config validation and stream initialization on
9677a9f3 3820 * behalf of i915_perf_open_ioctl() with the &gt->perf.lock mutex
16d98b31
RB
3821 * taken to serialize with any non-file-operation driver hooks.
3822 *
3823 * Note: at this point the @props have only been validated in isolation and
3824 * it's still necessary to validate that the combination of properties makes
3825 * sense.
3826 *
3827 * In the case where userspace is interested in OA unit metrics then further
3828 * config validation and stream initialization details will be handled by
3829 * i915_oa_stream_init(). The code here should only validate config state that
3830 * will be relevant to all stream types / backends.
3831 *
3832 * Returns: zero on success or a negative error code.
3833 */
eec688e1 3834static int
8f8b1171 3835i915_perf_open_ioctl_locked(struct i915_perf *perf,
eec688e1
RB
3836 struct drm_i915_perf_open_param *param,
3837 struct perf_open_properties *props,
3838 struct drm_file *file)
3839{
3840 struct i915_gem_context *specific_ctx = NULL;
3841 struct i915_perf_stream *stream = NULL;
3842 unsigned long f_flags = 0;
19f81df2 3843 bool privileged_op = true;
eec688e1
RB
3844 int stream_fd;
3845 int ret;
3846
3847 if (props->single_context) {
3848 u32 ctx_handle = props->ctx_handle;
3849 struct drm_i915_file_private *file_priv = file->driver_priv;
3850
635f56c3 3851 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
046d1660 3852 if (IS_ERR(specific_ctx)) {
2fec5391
UNR
3853 drm_dbg(&perf->i915->drm,
3854 "Failed to look up context with ID %u for opening perf stream\n",
635f56c3 3855 ctx_handle);
046d1660 3856 ret = PTR_ERR(specific_ctx);
eec688e1
RB
3857 goto err;
3858 }
3859 }
3860
19f81df2
RB
3861 /*
3862 * On Haswell the OA unit supports clock gating off for a specific
3863 * context and in this mode there's no visibility of metrics for the
3864 * rest of the system, which we consider acceptable for a
3865 * non-privileged client.
3866 *
00a7f0d7 3867 * For Gen8->11 the OA unit no longer supports clock gating off for a
19f81df2
RB
3868 * specific context and the kernel can't securely stop the counters
3869 * from updating as system-wide / global values. Even though we can
3870 * filter reports based on the included context ID we can't block
3871 * clients from seeing the raw / global counter values via
3872 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
3873 * enable the OA unit by default.
00a7f0d7
LL
3874 *
3875 * For Gen12+ we gain a new OAR unit that only monitors the RCS on a
3876 * per context basis. So we can relax requirements there if the user
3877 * doesn't request global stream access (i.e. query based sampling
3878 * using MI_RECORD_PERF_COUNT.
19f81df2 3879 */
0b0120d4 3880 if (IS_HASWELL(perf->i915) && specific_ctx)
19f81df2 3881 privileged_op = false;
651e7d48 3882 else if (GRAPHICS_VER(perf->i915) == 12 && specific_ctx &&
00a7f0d7
LL
3883 (props->sample_flags & SAMPLE_OA_REPORT) == 0)
3884 privileged_op = false;
19f81df2 3885
0b0120d4
LL
3886 if (props->hold_preemption) {
3887 if (!props->single_context) {
2fec5391
UNR
3888 drm_dbg(&perf->i915->drm,
3889 "preemption disable with no context\n");
0b0120d4
LL
3890 ret = -EINVAL;
3891 goto err;
3892 }
3893 privileged_op = true;
3894 }
3895
11ecbddd
LL
3896 /*
3897 * Asking for SSEU configuration is a priviliged operation.
3898 */
3899 if (props->has_sseu)
3900 privileged_op = true;
3901 else
3902 get_default_sseu_config(&props->sseu, props->engine);
3903
ccdf6341
RB
3904 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
3905 * we check a dev.i915.perf_stream_paranoid sysctl option
3906 * to determine if it's ok to access system wide OA counters
4e3d3456 3907 * without CAP_PERFMON or CAP_SYS_ADMIN privileges.
ccdf6341 3908 */
19f81df2 3909 if (privileged_op &&
4e3d3456 3910 i915_perf_stream_paranoid && !perfmon_capable()) {
2fec5391
UNR
3911 drm_dbg(&perf->i915->drm,
3912 "Insufficient privileges to open i915 perf stream\n");
eec688e1
RB
3913 ret = -EACCES;
3914 goto err_ctx;
3915 }
3916
3917 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
3918 if (!stream) {
3919 ret = -ENOMEM;
3920 goto err_ctx;
3921 }
3922
8f8b1171 3923 stream->perf = perf;
eec688e1 3924 stream->ctx = specific_ctx;
4ef10fe0 3925 stream->poll_oa_period = props->poll_oa_period;
eec688e1 3926
d7965152
RB
3927 ret = i915_oa_stream_init(stream, param, props);
3928 if (ret)
3929 goto err_alloc;
3930
3931 /* we avoid simply assigning stream->sample_flags = props->sample_flags
3932 * to have _stream_init check the combination of sample flags more
3933 * thoroughly, but still this is the expected result at this point.
eec688e1 3934 */
d7965152
RB
3935 if (WARN_ON(stream->sample_flags != props->sample_flags)) {
3936 ret = -ENODEV;
22f880ca 3937 goto err_flags;
d7965152 3938 }
eec688e1 3939
eec688e1
RB
3940 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
3941 f_flags |= O_CLOEXEC;
3942 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
3943 f_flags |= O_NONBLOCK;
3944
3945 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
3946 if (stream_fd < 0) {
3947 ret = stream_fd;
23b9e41a 3948 goto err_flags;
eec688e1
RB
3949 }
3950
3951 if (!(param->flags & I915_PERF_FLAG_DISABLED))
3952 i915_perf_enable_locked(stream);
3953
a5af1df7
LL
3954 /* Take a reference on the driver that will be kept with stream_fd
3955 * until its release.
3956 */
8f8b1171 3957 drm_dev_get(&perf->i915->drm);
a5af1df7 3958
eec688e1
RB
3959 return stream_fd;
3960
22f880ca 3961err_flags:
eec688e1
RB
3962 if (stream->ops->destroy)
3963 stream->ops->destroy(stream);
3964err_alloc:
3965 kfree(stream);
3966err_ctx:
69df05e1 3967 if (specific_ctx)
5f09a9c8 3968 i915_gem_context_put(specific_ctx);
eec688e1
RB
3969err:
3970 return ret;
3971}
3972
8f8b1171 3973static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
155e941f 3974{
bc7ed4d3
UNR
3975 u64 nom = (2ULL << exponent) * NSEC_PER_SEC;
3976 u32 den = i915_perf_oa_timestamp_frequency(perf->i915);
3977
3978 return div_u64(nom + den - 1, den);
155e941f
RB
3979}
3980
77892f4f
UNR
3981static __always_inline bool
3982oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format)
3983{
3984 return test_bit(format, perf->format_mask);
3985}
3986
3987static __always_inline void
3988oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format)
3989{
3990 __set_bit(format, perf->format_mask);
3991}
3992
16d98b31
RB
3993/**
3994 * read_properties_unlocked - validate + copy userspace stream open properties
8f8b1171 3995 * @perf: i915 perf instance
16d98b31
RB
3996 * @uprops: The array of u64 key value pairs given by userspace
3997 * @n_props: The number of key value pairs expected in @uprops
3998 * @props: The stream configuration built up while validating properties
eec688e1
RB
3999 *
4000 * Note this function only validates properties in isolation it doesn't
4001 * validate that the combination of properties makes sense or that all
4002 * properties necessary for a particular kind of stream have been set.
16d98b31
RB
4003 *
4004 * Note that there currently aren't any ordering requirements for properties so
4005 * we shouldn't validate or assume anything about ordering here. This doesn't
4006 * rule out defining new properties with ordering requirements in the future.
eec688e1 4007 */
8f8b1171 4008static int read_properties_unlocked(struct i915_perf *perf,
eec688e1
RB
4009 u64 __user *uprops,
4010 u32 n_props,
4011 struct perf_open_properties *props)
4012{
9919d119 4013 struct drm_i915_gem_context_param_sseu user_sseu;
eec688e1 4014 u64 __user *uprop = uprops;
c61d04c9
UNR
4015 bool config_instance = false;
4016 bool config_class = false;
9919d119 4017 bool config_sseu = false;
c61d04c9 4018 u8 class, instance;
701f8231 4019 u32 i;
11ecbddd 4020 int ret;
eec688e1
RB
4021
4022 memset(props, 0, sizeof(struct perf_open_properties));
4ef10fe0 4023 props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
eec688e1 4024
eec688e1
RB
4025 /* Considering that ID = 0 is reserved and assuming that we don't
4026 * (currently) expect any configurations to ever specify duplicate
4027 * values for a particular property ID then the last _PROP_MAX value is
4028 * one greater than the maximum number of properties we expect to get
4029 * from userspace.
4030 */
c61d04c9 4031 if (!n_props || n_props >= DRM_I915_PERF_PROP_MAX) {
2fec5391 4032 drm_dbg(&perf->i915->drm,
c61d04c9 4033 "Invalid number of i915 perf properties given\n");
eec688e1
RB
4034 return -EINVAL;
4035 }
4036
c61d04c9
UNR
4037 /* Defaults when class:instance is not passed */
4038 class = I915_ENGINE_CLASS_RENDER;
4039 instance = 0;
4040
eec688e1 4041 for (i = 0; i < n_props; i++) {
00319ba0 4042 u64 oa_period, oa_freq_hz;
eec688e1 4043 u64 id, value;
eec688e1
RB
4044
4045 ret = get_user(id, uprop);
4046 if (ret)
4047 return ret;
4048
4049 ret = get_user(value, uprop + 1);
4050 if (ret)
4051 return ret;
4052
0a309f9e 4053 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
2fec5391
UNR
4054 drm_dbg(&perf->i915->drm,
4055 "Unknown i915 perf property ID\n");
0a309f9e
MA
4056 return -EINVAL;
4057 }
4058
eec688e1
RB
4059 switch ((enum drm_i915_perf_property_id)id) {
4060 case DRM_I915_PERF_PROP_CTX_HANDLE:
4061 props->single_context = 1;
4062 props->ctx_handle = value;
4063 break;
d7965152 4064 case DRM_I915_PERF_PROP_SAMPLE_OA:
b6dd47b9
LL
4065 if (value)
4066 props->sample_flags |= SAMPLE_OA_REPORT;
d7965152
RB
4067 break;
4068 case DRM_I915_PERF_PROP_OA_METRICS_SET:
701f8231 4069 if (value == 0) {
2fec5391
UNR
4070 drm_dbg(&perf->i915->drm,
4071 "Unknown OA metric set ID\n");
d7965152
RB
4072 return -EINVAL;
4073 }
4074 props->metrics_set = value;
4075 break;
4076 case DRM_I915_PERF_PROP_OA_FORMAT:
4077 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
2fec5391
UNR
4078 drm_dbg(&perf->i915->drm,
4079 "Out-of-range OA report format %llu\n",
52c57c26 4080 value);
d7965152
RB
4081 return -EINVAL;
4082 }
77892f4f 4083 if (!oa_format_valid(perf, value)) {
2fec5391
UNR
4084 drm_dbg(&perf->i915->drm,
4085 "Unsupported OA report format %llu\n",
52c57c26 4086 value);
d7965152
RB
4087 return -EINVAL;
4088 }
4089 props->oa_format = value;
4090 break;
4091 case DRM_I915_PERF_PROP_OA_EXPONENT:
4092 if (value > OA_EXPONENT_MAX) {
2fec5391
UNR
4093 drm_dbg(&perf->i915->drm,
4094 "OA timer exponent too high (> %u)\n",
7708550c 4095 OA_EXPONENT_MAX);
d7965152
RB
4096 return -EINVAL;
4097 }
4098
00319ba0 4099 /* Theoretically we can program the OA unit to sample
155e941f
RB
4100 * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns
4101 * for BXT. We don't allow such high sampling
4102 * frequencies by default unless root.
00319ba0 4103 */
155e941f 4104
00319ba0 4105 BUILD_BUG_ON(sizeof(oa_period) != 8);
8f8b1171 4106 oa_period = oa_exponent_to_ns(perf, value);
00319ba0
RB
4107
4108 /* This check is primarily to ensure that oa_period <=
4109 * UINT32_MAX (before passing to do_div which only
4110 * accepts a u32 denominator), but we can also skip
4111 * checking anything < 1Hz which implicitly can't be
4112 * limited via an integer oa_max_sample_rate.
d7965152 4113 */
00319ba0
RB
4114 if (oa_period <= NSEC_PER_SEC) {
4115 u64 tmp = NSEC_PER_SEC;
4116 do_div(tmp, oa_period);
4117 oa_freq_hz = tmp;
4118 } else
4119 oa_freq_hz = 0;
4120
4e3d3456 4121 if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) {
2fec5391
UNR
4122 drm_dbg(&perf->i915->drm,
4123 "OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n",
00319ba0 4124 i915_oa_max_sample_rate);
d7965152
RB
4125 return -EACCES;
4126 }
4127
4128 props->oa_periodic = true;
4129 props->oa_period_exponent = value;
4130 break;
9cd20ef7
LL
4131 case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
4132 props->hold_preemption = !!value;
4133 break;
11ecbddd 4134 case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
ca437b45
UNR
4135 if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
4136 drm_dbg(&perf->i915->drm,
4137 "SSEU config not supported on gfx %x\n",
4138 GRAPHICS_VER_FULL(perf->i915));
4139 return -ENODEV;
4140 }
4141
11ecbddd
LL
4142 if (copy_from_user(&user_sseu,
4143 u64_to_user_ptr(value),
4144 sizeof(user_sseu))) {
2fec5391
UNR
4145 drm_dbg(&perf->i915->drm,
4146 "Unable to copy global sseu parameter\n");
11ecbddd
LL
4147 return -EFAULT;
4148 }
9919d119 4149 config_sseu = true;
11ecbddd
LL
4150 break;
4151 }
4ef10fe0
LL
4152 case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
4153 if (value < 100000 /* 100us */) {
2fec5391
UNR
4154 drm_dbg(&perf->i915->drm,
4155 "OA availability timer too small (%lluns < 100us)\n",
4ef10fe0
LL
4156 value);
4157 return -EINVAL;
4158 }
4159 props->poll_oa_period = value;
4160 break;
c61d04c9
UNR
4161 case DRM_I915_PERF_PROP_OA_ENGINE_CLASS:
4162 class = (u8)value;
4163 config_class = true;
4164 break;
4165 case DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE:
4166 instance = (u8)value;
4167 config_instance = true;
4168 break;
4169 default:
eec688e1 4170 MISSING_CASE(id);
eec688e1
RB
4171 return -EINVAL;
4172 }
4173
4174 uprop += 2;
4175 }
4176
c61d04c9
UNR
4177 if ((config_class && !config_instance) ||
4178 (config_instance && !config_class)) {
4179 drm_dbg(&perf->i915->drm,
4180 "OA engine-class and engine-instance parameters must be passed together\n");
4181 return -EINVAL;
4182 }
4183
4184 props->engine = intel_engine_lookup_user(perf->i915, class, instance);
4185 if (!props->engine) {
4186 drm_dbg(&perf->i915->drm,
4187 "OA engine class and instance invalid %d:%d\n",
4188 class, instance);
4189 return -EINVAL;
4190 }
4191
4192 if (!engine_supports_oa(props->engine)) {
4193 drm_dbg(&perf->i915->drm,
4194 "Engine not supported by OA %d:%d\n",
4195 class, instance);
4196 return -EINVAL;
4197 }
4198
9919d119
UNR
4199 if (config_sseu) {
4200 ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
4201 if (ret) {
4202 drm_dbg(&perf->i915->drm,
4203 "Invalid SSEU configuration\n");
4204 return ret;
4205 }
4206 props->has_sseu = true;
4207 }
4208
eec688e1
RB
4209 return 0;
4210}
4211
16d98b31
RB
4212/**
4213 * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD
4214 * @dev: drm device
4215 * @data: ioctl data copied from userspace (unvalidated)
4216 * @file: drm file
4217 *
4218 * Validates the stream open parameters given by userspace including flags
4219 * and an array of u64 key, value pair properties.
4220 *
4221 * Very little is assumed up front about the nature of the stream being
4222 * opened (for instance we don't assume it's for periodic OA unit metrics). An
4223 * i915-perf stream is expected to be a suitable interface for other forms of
4224 * buffered data written by the GPU besides periodic OA metrics.
4225 *
4226 * Note we copy the properties from userspace outside of the i915 perf
c1e8d7c6 4227 * mutex to avoid an awkward lockdep with mmap_lock.
16d98b31
RB
4228 *
4229 * Most of the implementation details are handled by
9677a9f3 4230 * i915_perf_open_ioctl_locked() after taking the &gt->perf.lock
16d98b31
RB
4231 * mutex for serializing with any non-file-operation driver hooks.
4232 *
4233 * Return: A newly opened i915 Perf stream file descriptor or negative
4234 * error code on failure.
4235 */
eec688e1
RB
4236int i915_perf_open_ioctl(struct drm_device *dev, void *data,
4237 struct drm_file *file)
4238{
8f8b1171 4239 struct i915_perf *perf = &to_i915(dev)->perf;
eec688e1 4240 struct drm_i915_perf_open_param *param = data;
9677a9f3 4241 struct intel_gt *gt;
eec688e1
RB
4242 struct perf_open_properties props;
4243 u32 known_open_flags;
4244 int ret;
4245
8f8b1171 4246 if (!perf->i915) {
2fec5391
UNR
4247 drm_dbg(&perf->i915->drm,
4248 "i915 perf interface not available for this system\n");
eec688e1
RB
4249 return -ENOTSUPP;
4250 }
4251
4252 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
4253 I915_PERF_FLAG_FD_NONBLOCK |
4254 I915_PERF_FLAG_DISABLED;
4255 if (param->flags & ~known_open_flags) {
2fec5391
UNR
4256 drm_dbg(&perf->i915->drm,
4257 "Unknown drm_i915_perf_open_param flag\n");
eec688e1
RB
4258 return -EINVAL;
4259 }
4260
8f8b1171 4261 ret = read_properties_unlocked(perf,
eec688e1
RB
4262 u64_to_user_ptr(param->properties_ptr),
4263 param->num_properties,
4264 &props);
4265 if (ret)
4266 return ret;
4267
9677a9f3
UNR
4268 gt = props.engine->gt;
4269
4270 mutex_lock(&gt->perf.lock);
8f8b1171 4271 ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
9677a9f3 4272 mutex_unlock(&gt->perf.lock);
eec688e1
RB
4273
4274 return ret;
4275}
4276
16d98b31
RB
4277/**
4278 * i915_perf_register - exposes i915-perf to userspace
8f8b1171 4279 * @i915: i915 device instance
16d98b31
RB
4280 *
4281 * In particular OA metric sets are advertised under a sysfs metrics/
4282 * directory allowing userspace to enumerate valid IDs that can be
4283 * used to open an i915-perf stream.
4284 */
8f8b1171 4285void i915_perf_register(struct drm_i915_private *i915)
442b8c06 4286{
8f8b1171 4287 struct i915_perf *perf = &i915->perf;
9677a9f3 4288 struct intel_gt *gt = to_gt(i915);
701f8231 4289
8f8b1171 4290 if (!perf->i915)
442b8c06
RB
4291 return;
4292
4293 /* To be sure we're synchronized with an attempted
4294 * i915_perf_open_ioctl(); considering that we register after
4295 * being exposed to userspace.
4296 */
9677a9f3 4297 mutex_lock(&gt->perf.lock);
442b8c06 4298
8f8b1171 4299 perf->metrics_kobj =
442b8c06 4300 kobject_create_and_add("metrics",
8f8b1171 4301 &i915->drm.primary->kdev->kobj);
19f81df2 4302
9677a9f3 4303 mutex_unlock(&gt->perf.lock);
442b8c06
RB
4304}
4305
16d98b31
RB
4306/**
4307 * i915_perf_unregister - hide i915-perf from userspace
8f8b1171 4308 * @i915: i915 device instance
16d98b31
RB
4309 *
4310 * i915-perf state cleanup is split up into an 'unregister' and
4311 * 'deinit' phase where the interface is first hidden from
4312 * userspace by i915_perf_unregister() before cleaning up
4313 * remaining state in i915_perf_fini().
4314 */
8f8b1171 4315void i915_perf_unregister(struct drm_i915_private *i915)
442b8c06 4316{
8f8b1171
CW
4317 struct i915_perf *perf = &i915->perf;
4318
4319 if (!perf->metrics_kobj)
442b8c06
RB
4320 return;
4321
8f8b1171
CW
4322 kobject_put(perf->metrics_kobj);
4323 perf->metrics_kobj = NULL;
442b8c06
RB
4324}
4325
8f8b1171 4326static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
f89823c2
LL
4327{
4328 static const i915_reg_t flex_eu_regs[] = {
4329 EU_PERF_CNTL0,
4330 EU_PERF_CNTL1,
4331 EU_PERF_CNTL2,
4332 EU_PERF_CNTL3,
4333 EU_PERF_CNTL4,
4334 EU_PERF_CNTL5,
4335 EU_PERF_CNTL6,
4336 };
4337 int i;
4338
4339 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
7c52a221 4340 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
f89823c2
LL
4341 return true;
4342 }
4343 return false;
4344}
4345
66a19a3a
MR
4346static bool reg_in_range_table(u32 addr, const struct i915_range *table)
4347{
4348 while (table->start || table->end) {
4349 if (addr >= table->start && addr <= table->end)
4350 return true;
fc215230 4351
66a19a3a
MR
4352 table++;
4353 }
4354
4355 return false;
4356}
fc215230
UNR
4357
4358#define REG_EQUAL(addr, mmio) \
4359 ((addr) == i915_mmio_reg_offset(mmio))
4360
66a19a3a
MR
4361static const struct i915_range gen7_oa_b_counters[] = {
4362 { .start = 0x2710, .end = 0x272c }, /* OASTARTTRIG[1-8] */
4363 { .start = 0x2740, .end = 0x275c }, /* OAREPORTTRIG[1-8] */
4364 { .start = 0x2770, .end = 0x27ac }, /* OACEC[0-7][0-1] */
4365 {}
4366};
f89823c2 4367
66a19a3a
MR
4368static const struct i915_range gen12_oa_b_counters[] = {
4369 { .start = 0x2b2c, .end = 0x2b2c }, /* GEN12_OAG_OA_PESS */
4370 { .start = 0xd900, .end = 0xd91c }, /* GEN12_OAG_OASTARTTRIG[1-8] */
4371 { .start = 0xd920, .end = 0xd93c }, /* GEN12_OAG_OAREPORTTRIG1[1-8] */
4372 { .start = 0xd940, .end = 0xd97c }, /* GEN12_OAG_CEC[0-7][0-1] */
4373 { .start = 0xdc00, .end = 0xdc3c }, /* GEN12_OAG_SCEC[0-7][0-1] */
4374 { .start = 0xdc40, .end = 0xdc40 }, /* GEN12_OAG_SPCTR_CNF */
4375 { .start = 0xdc44, .end = 0xdc44 }, /* GEN12_OAA_DBG_REG */
4376 {}
4377};
4378
0fa9349d
LL
4379static const struct i915_range xehp_oa_b_counters[] = {
4380 { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */
4381 { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
4382};
4383
66a19a3a
MR
4384static const struct i915_range gen7_oa_mux_regs[] = {
4385 { .start = 0x91b8, .end = 0x91cc }, /* OA_PERFCNT[1-2], OA_PERFMATRIX */
4386 { .start = 0x9800, .end = 0x9888 }, /* MICRO_BP0_0 - NOA_WRITE */
4387 { .start = 0xe180, .end = 0xe180 }, /* HALF_SLICE_CHICKEN2 */
4388 {}
4389};
4390
4391static const struct i915_range hsw_oa_mux_regs[] = {
4392 { .start = 0x09e80, .end = 0x09ea4 }, /* HSW_MBVID2_NOA[0-9] */
4393 { .start = 0x09ec0, .end = 0x09ec0 }, /* HSW_MBVID2_MISR0 */
4394 { .start = 0x25100, .end = 0x2ff90 },
4395 {}
4396};
4397
4398static const struct i915_range chv_oa_mux_regs[] = {
4399 { .start = 0x182300, .end = 0x1823a4 },
4400 {}
4401};
4402
4403static const struct i915_range gen8_oa_mux_regs[] = {
4404 { .start = 0x0d00, .end = 0x0d2c }, /* RPM_CONFIG[0-1], NOA_CONFIG[0-8] */
4405 { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */
4406 {}
4407};
4408
4409static const struct i915_range gen11_oa_mux_regs[] = {
4410 { .start = 0x91c8, .end = 0x91dc }, /* OA_PERFCNT[3-4] */
4411 {}
4412};
4413
4414static const struct i915_range gen12_oa_mux_regs[] = {
4415 { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
4416 { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
4417 { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
4418 { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
4419 { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */
4420 {}
4421};
4422
d654ae8b
UNR
4423/*
4424 * Ref: 14010536224:
4425 * 0x20cc is repurposed on MTL, so use a separate array for MTL.
4426 */
4427static const struct i915_range mtl_oa_mux_regs[] = {
4428 { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
4429 { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
4430 { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
4431 { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
4432};
4433
66a19a3a 4434static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
f89823c2 4435{
66a19a3a 4436 return reg_in_range_table(addr, gen7_oa_b_counters);
f89823c2
LL
4437}
4438
8f8b1171 4439static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
f89823c2 4440{
66a19a3a
MR
4441 return reg_in_range_table(addr, gen7_oa_mux_regs) ||
4442 reg_in_range_table(addr, gen8_oa_mux_regs);
f89823c2
LL
4443}
4444
5dae69a9 4445static bool gen11_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
95690a02 4446{
66a19a3a
MR
4447 return reg_in_range_table(addr, gen7_oa_mux_regs) ||
4448 reg_in_range_table(addr, gen8_oa_mux_regs) ||
4449 reg_in_range_table(addr, gen11_oa_mux_regs);
95690a02
LL
4450}
4451
8f8b1171 4452static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
f89823c2 4453{
66a19a3a
MR
4454 return reg_in_range_table(addr, gen7_oa_mux_regs) ||
4455 reg_in_range_table(addr, hsw_oa_mux_regs);
f89823c2
LL
4456}
4457
8f8b1171 4458static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
f89823c2 4459{
66a19a3a
MR
4460 return reg_in_range_table(addr, gen7_oa_mux_regs) ||
4461 reg_in_range_table(addr, chv_oa_mux_regs);
f89823c2
LL
4462}
4463
00a7f0d7
LL
4464static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4465{
66a19a3a 4466 return reg_in_range_table(addr, gen12_oa_b_counters);
00a7f0d7
LL
4467}
4468
0fa9349d
LL
4469static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4470{
4471 return reg_in_range_table(addr, xehp_oa_b_counters) ||
4472 reg_in_range_table(addr, gen12_oa_b_counters);
4473}
4474
00a7f0d7
LL
4475static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4476{
d654ae8b
UNR
4477 if (IS_METEORLAKE(perf->i915))
4478 return reg_in_range_table(addr, mtl_oa_mux_regs);
4479 else
4480 return reg_in_range_table(addr, gen12_oa_mux_regs);
00a7f0d7
LL
4481}
4482
739f3abd 4483static u32 mask_reg_value(u32 reg, u32 val)
f89823c2
LL
4484{
4485 /* HALF_SLICE_CHICKEN2 is programmed with a the
4486 * WaDisableSTUnitPowerOptimization workaround. Make sure the value
4487 * programmed by userspace doesn't change this.
4488 */
fc215230 4489 if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2))
f89823c2
LL
4490 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
4491
4492 /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
4493 * indicated by its name and a bunch of selection fields used by OA
4494 * configs.
4495 */
fc215230 4496 if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT))
f89823c2
LL
4497 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
4498
4499 return val;
4500}
4501
8f8b1171
CW
4502static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
4503 bool (*is_valid)(struct i915_perf *perf, u32 addr),
f89823c2
LL
4504 u32 __user *regs,
4505 u32 n_regs)
4506{
4507 struct i915_oa_reg *oa_regs;
4508 int err;
4509 u32 i;
4510
4511 if (!n_regs)
4512 return NULL;
4513
f89823c2
LL
4514 /* No is_valid function means we're not allowing any register to be programmed. */
4515 GEM_BUG_ON(!is_valid);
4516 if (!is_valid)
4517 return ERR_PTR(-EINVAL);
4518
4519 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
4520 if (!oa_regs)
4521 return ERR_PTR(-ENOMEM);
4522
4523 for (i = 0; i < n_regs; i++) {
4524 u32 addr, value;
4525
4526 err = get_user(addr, regs);
4527 if (err)
4528 goto addr_err;
4529
8f8b1171 4530 if (!is_valid(perf, addr)) {
2fec5391
UNR
4531 drm_dbg(&perf->i915->drm,
4532 "Invalid oa_reg address: %X\n", addr);
f89823c2
LL
4533 err = -EINVAL;
4534 goto addr_err;
4535 }
4536
4537 err = get_user(value, regs + 1);
4538 if (err)
4539 goto addr_err;
4540
4541 oa_regs[i].addr = _MMIO(addr);
4542 oa_regs[i].value = mask_reg_value(addr, value);
4543
4544 regs += 2;
4545 }
4546
4547 return oa_regs;
4548
4549addr_err:
4550 kfree(oa_regs);
4551 return ERR_PTR(err);
4552}
4553
18fb42db
NC
4554static ssize_t show_dynamic_id(struct kobject *kobj,
4555 struct kobj_attribute *attr,
f89823c2
LL
4556 char *buf)
4557{
4558 struct i915_oa_config *oa_config =
4559 container_of(attr, typeof(*oa_config), sysfs_metric_id);
4560
4561 return sprintf(buf, "%d\n", oa_config->id);
4562}
4563
8f8b1171 4564static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
f89823c2
LL
4565 struct i915_oa_config *oa_config)
4566{
28152a23 4567 sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
f89823c2
LL
4568 oa_config->sysfs_metric_id.attr.name = "id";
4569 oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
4570 oa_config->sysfs_metric_id.show = show_dynamic_id;
4571 oa_config->sysfs_metric_id.store = NULL;
4572
4573 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
4574 oa_config->attrs[1] = NULL;
4575
4576 oa_config->sysfs_metric.name = oa_config->uuid;
4577 oa_config->sysfs_metric.attrs = oa_config->attrs;
4578
8f8b1171 4579 return sysfs_create_group(perf->metrics_kobj,
f89823c2
LL
4580 &oa_config->sysfs_metric);
4581}
4582
4583/**
4584 * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config
4585 * @dev: drm device
4586 * @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from
4587 * userspace (unvalidated)
4588 * @file: drm file
4589 *
4590 * Validates the submitted OA register to be saved into a new OA config that
4591 * can then be used for programming the OA unit and its NOA network.
4592 *
4593 * Returns: A new allocated config number to be used with the perf open ioctl
4594 * or a negative error code on failure.
4595 */
4596int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
4597 struct drm_file *file)
4598{
8f8b1171 4599 struct i915_perf *perf = &to_i915(dev)->perf;
f89823c2
LL
4600 struct drm_i915_perf_oa_config *args = data;
4601 struct i915_oa_config *oa_config, *tmp;
c415ef2a 4602 struct i915_oa_reg *regs;
f89823c2
LL
4603 int err, id;
4604
8f8b1171 4605 if (!perf->i915) {
2fec5391
UNR
4606 drm_dbg(&perf->i915->drm,
4607 "i915 perf interface not available for this system\n");
f89823c2
LL
4608 return -ENOTSUPP;
4609 }
4610
8f8b1171 4611 if (!perf->metrics_kobj) {
2fec5391
UNR
4612 drm_dbg(&perf->i915->drm,
4613 "OA metrics weren't advertised via sysfs\n");
f89823c2
LL
4614 return -EINVAL;
4615 }
4616
4e3d3456 4617 if (i915_perf_stream_paranoid && !perfmon_capable()) {
2fec5391
UNR
4618 drm_dbg(&perf->i915->drm,
4619 "Insufficient privileges to add i915 OA config\n");
f89823c2
LL
4620 return -EACCES;
4621 }
4622
4623 if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
4624 (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
4625 (!args->flex_regs_ptr || !args->n_flex_regs)) {
2fec5391
UNR
4626 drm_dbg(&perf->i915->drm,
4627 "No OA registers given\n");
f89823c2
LL
4628 return -EINVAL;
4629 }
4630
4631 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
4632 if (!oa_config) {
2fec5391
UNR
4633 drm_dbg(&perf->i915->drm,
4634 "Failed to allocate memory for the OA config\n");
f89823c2
LL
4635 return -ENOMEM;
4636 }
4637
6a45008a
LL
4638 oa_config->perf = perf;
4639 kref_init(&oa_config->ref);
f89823c2
LL
4640
4641 if (!uuid_is_valid(args->uuid)) {
2fec5391
UNR
4642 drm_dbg(&perf->i915->drm,
4643 "Invalid uuid format for OA config\n");
f89823c2
LL
4644 err = -EINVAL;
4645 goto reg_err;
4646 }
4647
4648 /* Last character in oa_config->uuid will be 0 because oa_config is
4649 * kzalloc.
4650 */
4651 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
4652
4653 oa_config->mux_regs_len = args->n_mux_regs;
c2fba936
CW
4654 regs = alloc_oa_regs(perf,
4655 perf->ops.is_valid_mux_reg,
4656 u64_to_user_ptr(args->mux_regs_ptr),
4657 args->n_mux_regs);
f89823c2 4658
c2fba936 4659 if (IS_ERR(regs)) {
2fec5391
UNR
4660 drm_dbg(&perf->i915->drm,
4661 "Failed to create OA config for mux_regs\n");
c2fba936 4662 err = PTR_ERR(regs);
f89823c2
LL
4663 goto reg_err;
4664 }
c2fba936 4665 oa_config->mux_regs = regs;
f89823c2
LL
4666
4667 oa_config->b_counter_regs_len = args->n_boolean_regs;
c2fba936
CW
4668 regs = alloc_oa_regs(perf,
4669 perf->ops.is_valid_b_counter_reg,
4670 u64_to_user_ptr(args->boolean_regs_ptr),
4671 args->n_boolean_regs);
f89823c2 4672
c2fba936 4673 if (IS_ERR(regs)) {
2fec5391
UNR
4674 drm_dbg(&perf->i915->drm,
4675 "Failed to create OA config for b_counter_regs\n");
c2fba936 4676 err = PTR_ERR(regs);
f89823c2
LL
4677 goto reg_err;
4678 }
c2fba936 4679 oa_config->b_counter_regs = regs;
f89823c2 4680
651e7d48 4681 if (GRAPHICS_VER(perf->i915) < 8) {
f89823c2
LL
4682 if (args->n_flex_regs != 0) {
4683 err = -EINVAL;
4684 goto reg_err;
4685 }
4686 } else {
4687 oa_config->flex_regs_len = args->n_flex_regs;
c2fba936
CW
4688 regs = alloc_oa_regs(perf,
4689 perf->ops.is_valid_flex_reg,
4690 u64_to_user_ptr(args->flex_regs_ptr),
4691 args->n_flex_regs);
f89823c2 4692
c2fba936 4693 if (IS_ERR(regs)) {
2fec5391
UNR
4694 drm_dbg(&perf->i915->drm,
4695 "Failed to create OA config for flex_regs\n");
c2fba936 4696 err = PTR_ERR(regs);
f89823c2
LL
4697 goto reg_err;
4698 }
c2fba936 4699 oa_config->flex_regs = regs;
f89823c2
LL
4700 }
4701
8f8b1171 4702 err = mutex_lock_interruptible(&perf->metrics_lock);
f89823c2
LL
4703 if (err)
4704 goto reg_err;
4705
4706 /* We shouldn't have too many configs, so this iteration shouldn't be
4707 * too costly.
4708 */
8f8b1171 4709 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
f89823c2 4710 if (!strcmp(tmp->uuid, oa_config->uuid)) {
2fec5391
UNR
4711 drm_dbg(&perf->i915->drm,
4712 "OA config already exists with this uuid\n");
f89823c2
LL
4713 err = -EADDRINUSE;
4714 goto sysfs_err;
4715 }
4716 }
4717
8f8b1171 4718 err = create_dynamic_oa_sysfs_entry(perf, oa_config);
f89823c2 4719 if (err) {
2fec5391
UNR
4720 drm_dbg(&perf->i915->drm,
4721 "Failed to create sysfs entry for OA config\n");
f89823c2
LL
4722 goto sysfs_err;
4723 }
4724
4725 /* Config id 0 is invalid, id 1 for kernel stored test config. */
8f8b1171 4726 oa_config->id = idr_alloc(&perf->metrics_idr,
f89823c2
LL
4727 oa_config, 2,
4728 0, GFP_KERNEL);
4729 if (oa_config->id < 0) {
2fec5391
UNR
4730 drm_dbg(&perf->i915->drm,
4731 "Failed to create sysfs entry for OA config\n");
f89823c2
LL
4732 err = oa_config->id;
4733 goto sysfs_err;
4734 }
4735
8f8b1171 4736 mutex_unlock(&perf->metrics_lock);
f89823c2 4737
2fec5391
UNR
4738 drm_dbg(&perf->i915->drm,
4739 "Added config %s id=%i\n", oa_config->uuid, oa_config->id);
9bd9be66 4740
f89823c2
LL
4741 return oa_config->id;
4742
4743sysfs_err:
8f8b1171 4744 mutex_unlock(&perf->metrics_lock);
f89823c2 4745reg_err:
6a45008a 4746 i915_oa_config_put(oa_config);
2fec5391
UNR
4747 drm_dbg(&perf->i915->drm,
4748 "Failed to add new OA config\n");
f89823c2
LL
4749 return err;
4750}
4751
4752/**
4753 * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config
4754 * @dev: drm device
4755 * @data: ioctl data (pointer to u64 integer) copied from userspace
4756 * @file: drm file
4757 *
4758 * Configs can be removed while being used, the will stop appearing in sysfs
4759 * and their content will be freed when the stream using the config is closed.
4760 *
4761 * Returns: 0 on success or a negative error code on failure.
4762 */
4763int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
4764 struct drm_file *file)
4765{
8f8b1171 4766 struct i915_perf *perf = &to_i915(dev)->perf;
f89823c2
LL
4767 u64 *arg = data;
4768 struct i915_oa_config *oa_config;
4769 int ret;
4770
8f8b1171 4771 if (!perf->i915) {
2fec5391
UNR
4772 drm_dbg(&perf->i915->drm,
4773 "i915 perf interface not available for this system\n");
f89823c2
LL
4774 return -ENOTSUPP;
4775 }
4776
4e3d3456 4777 if (i915_perf_stream_paranoid && !perfmon_capable()) {
2fec5391
UNR
4778 drm_dbg(&perf->i915->drm,
4779 "Insufficient privileges to remove i915 OA config\n");
f89823c2
LL
4780 return -EACCES;
4781 }
4782
8f8b1171 4783 ret = mutex_lock_interruptible(&perf->metrics_lock);
f89823c2 4784 if (ret)
6a45008a 4785 return ret;
f89823c2 4786
8f8b1171 4787 oa_config = idr_find(&perf->metrics_idr, *arg);
f89823c2 4788 if (!oa_config) {
2fec5391
UNR
4789 drm_dbg(&perf->i915->drm,
4790 "Failed to remove unknown OA config\n");
f89823c2 4791 ret = -ENOENT;
6a45008a 4792 goto err_unlock;
f89823c2
LL
4793 }
4794
4795 GEM_BUG_ON(*arg != oa_config->id);
4796
4f6ccc74 4797 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
f89823c2 4798
8f8b1171 4799 idr_remove(&perf->metrics_idr, *arg);
9bd9be66 4800
6a45008a
LL
4801 mutex_unlock(&perf->metrics_lock);
4802
2fec5391
UNR
4803 drm_dbg(&perf->i915->drm,
4804 "Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
9bd9be66 4805
6a45008a
LL
4806 i915_oa_config_put(oa_config);
4807
4808 return 0;
f89823c2 4809
6a45008a 4810err_unlock:
8f8b1171 4811 mutex_unlock(&perf->metrics_lock);
f89823c2
LL
4812 return ret;
4813}
4814
ccdf6341
RB
4815static struct ctl_table oa_table[] = {
4816 {
4817 .procname = "perf_stream_paranoid",
4818 .data = &i915_perf_stream_paranoid,
4819 .maxlen = sizeof(i915_perf_stream_paranoid),
4820 .mode = 0644,
4821 .proc_handler = proc_dointvec_minmax,
eec4844f
MC
4822 .extra1 = SYSCTL_ZERO,
4823 .extra2 = SYSCTL_ONE,
ccdf6341 4824 },
00319ba0
RB
4825 {
4826 .procname = "oa_max_sample_rate",
4827 .data = &i915_oa_max_sample_rate,
4828 .maxlen = sizeof(i915_oa_max_sample_rate),
4829 .mode = 0644,
4830 .proc_handler = proc_dointvec_minmax,
eec4844f 4831 .extra1 = SYSCTL_ZERO,
00319ba0
RB
4832 .extra2 = &oa_sample_rate_hard_limit,
4833 },
ccdf6341
RB
4834 {}
4835};
4836
5f284e9c
UNR
4837static u32 num_perf_groups_per_gt(struct intel_gt *gt)
4838{
4839 return 1;
4840}
4841
4842static u32 __oa_engine_group(struct intel_engine_cs *engine)
4843{
4844 if (engine->class == RENDER_CLASS)
4845 return PERF_GROUP_OAG;
4846 else
4847 return PERF_GROUP_INVALID;
4848}
4849
4850static int oa_init_gt(struct intel_gt *gt)
4851{
4852 u32 num_groups = num_perf_groups_per_gt(gt);
4853 struct intel_engine_cs *engine;
4854 struct i915_perf_group *g;
4855 intel_engine_mask_t tmp;
4856
4857 g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
4858 if (!g)
4859 return -ENOMEM;
4860
4861 for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
4862 u32 index = __oa_engine_group(engine);
4863
4864 engine->oa_group = NULL;
4865 if (index < num_groups) {
4866 g[index].num_engines++;
4867 engine->oa_group = &g[index];
4868 }
4869 }
4870
4871 gt->perf.num_perf_groups = num_groups;
4872 gt->perf.group = g;
4873
4874 return 0;
4875}
4876
4877static int oa_init_engine_groups(struct i915_perf *perf)
4878{
4879 struct intel_gt *gt;
4880 int i, ret;
4881
4882 for_each_gt(gt, perf->i915, i) {
4883 ret = oa_init_gt(gt);
4884 if (ret)
4885 return ret;
4886 }
4887
4888 return 0;
4889}
4890
77892f4f
UNR
4891static void oa_init_supported_formats(struct i915_perf *perf)
4892{
4893 struct drm_i915_private *i915 = perf->i915;
4894 enum intel_platform platform = INTEL_INFO(i915)->platform;
4895
4896 switch (platform) {
4897 case INTEL_HASWELL:
4898 oa_format_add(perf, I915_OA_FORMAT_A13);
4899 oa_format_add(perf, I915_OA_FORMAT_A13);
4900 oa_format_add(perf, I915_OA_FORMAT_A29);
4901 oa_format_add(perf, I915_OA_FORMAT_A13_B8_C8);
4902 oa_format_add(perf, I915_OA_FORMAT_B4_C8);
4903 oa_format_add(perf, I915_OA_FORMAT_A45_B8_C8);
4904 oa_format_add(perf, I915_OA_FORMAT_B4_C8_A16);
4905 oa_format_add(perf, I915_OA_FORMAT_C4_B8);
4906 break;
4907
4908 case INTEL_BROADWELL:
4909 case INTEL_CHERRYVIEW:
4910 case INTEL_SKYLAKE:
4911 case INTEL_BROXTON:
4912 case INTEL_KABYLAKE:
4913 case INTEL_GEMINILAKE:
4914 case INTEL_COFFEELAKE:
4915 case INTEL_COMETLAKE:
77892f4f
UNR
4916 case INTEL_ICELAKE:
4917 case INTEL_ELKHARTLAKE:
4918 case INTEL_JASPERLAKE:
77892f4f
UNR
4919 case INTEL_TIGERLAKE:
4920 case INTEL_ROCKETLAKE:
4921 case INTEL_DG1:
4922 case INTEL_ALDERLAKE_S:
73c1bf0f 4923 case INTEL_ALDERLAKE_P:
5e4b7385
UNR
4924 oa_format_add(perf, I915_OA_FORMAT_A12);
4925 oa_format_add(perf, I915_OA_FORMAT_A12_B8_C8);
77892f4f 4926 oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8);
5e4b7385 4927 oa_format_add(perf, I915_OA_FORMAT_C4_B8);
77892f4f
UNR
4928 break;
4929
81d5f7d9 4930 case INTEL_DG2:
d0fa30be 4931 case INTEL_METEORLAKE:
81d5f7d9
UNR
4932 oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
4933 oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
4934 break;
4935
77892f4f
UNR
4936 default:
4937 MISSING_CASE(platform);
4938 }
4939}
4940
a5c3a3cb
UNR
4941static void i915_perf_init_info(struct drm_i915_private *i915)
4942{
4943 struct i915_perf *perf = &i915->perf;
4944
4945 switch (GRAPHICS_VER(i915)) {
4946 case 8:
4947 perf->ctx_oactxctrl_offset = 0x120;
4948 perf->ctx_flexeu0_offset = 0x2ce;
4949 perf->gen8_valid_ctx_bit = BIT(25);
4950 break;
4951 case 9:
4952 perf->ctx_oactxctrl_offset = 0x128;
4953 perf->ctx_flexeu0_offset = 0x3de;
4954 perf->gen8_valid_ctx_bit = BIT(16);
4955 break;
4956 case 11:
4957 perf->ctx_oactxctrl_offset = 0x124;
4958 perf->ctx_flexeu0_offset = 0x78e;
4959 perf->gen8_valid_ctx_bit = BIT(16);
4960 break;
4961 case 12:
4962 /*
4963 * Calculate offset at runtime in oa_pin_context for gen12 and
4964 * cache the value in perf->ctx_oactxctrl_offset.
4965 */
4966 break;
4967 default:
4968 MISSING_CASE(GRAPHICS_VER(i915));
4969 }
4970}
4971
16d98b31 4972/**
3dc716fd 4973 * i915_perf_init - initialize i915-perf state on module bind
8f8b1171 4974 * @i915: i915 device instance
16d98b31
RB
4975 *
4976 * Initializes i915-perf state without exposing anything to userspace.
4977 *
4978 * Note: i915-perf initialization is split into an 'init' and 'register'
4979 * phase with the i915_perf_register() exposing state to userspace.
4980 */
772a5803 4981int i915_perf_init(struct drm_i915_private *i915)
8f8b1171
CW
4982{
4983 struct i915_perf *perf = &i915->perf;
4984
0f15c5b0 4985 perf->oa_formats = oa_formats;
8f8b1171
CW
4986 if (IS_HASWELL(i915)) {
4987 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
4988 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
4989 perf->ops.is_valid_flex_reg = NULL;
4990 perf->ops.enable_metric_set = hsw_enable_metric_set;
4991 perf->ops.disable_metric_set = hsw_disable_metric_set;
4992 perf->ops.oa_enable = gen7_oa_enable;
4993 perf->ops.oa_disable = gen7_oa_disable;
4994 perf->ops.read = gen7_oa_read;
4995 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
8f8b1171 4996 } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
19f81df2
RB
4997 /* Note: that although we could theoretically also support the
4998 * legacy ringbuffer mode on BDW (and earlier iterations of
4999 * this driver, before upstreaming did this) it didn't seem
5000 * worth the complexity to maintain now that BDW+ enable
5001 * execlist mode by default.
5002 */
8f8b1171 5003 perf->ops.read = gen8_oa_read;
a5c3a3cb 5004 i915_perf_init_info(i915);
701f8231 5005
651e7d48 5006 if (IS_GRAPHICS_VER(i915, 8, 9)) {
8f8b1171 5007 perf->ops.is_valid_b_counter_reg =
ba6b7c1a 5008 gen7_is_valid_b_counter_addr;
8f8b1171 5009 perf->ops.is_valid_mux_reg =
ba6b7c1a 5010 gen8_is_valid_mux_addr;
8f8b1171 5011 perf->ops.is_valid_flex_reg =
ba6b7c1a 5012 gen8_is_valid_flex_addr;
155e941f 5013
8f8b1171
CW
5014 if (IS_CHERRYVIEW(i915)) {
5015 perf->ops.is_valid_mux_reg =
f89823c2
LL
5016 chv_is_valid_mux_addr;
5017 }
155e941f 5018
00a7f0d7
LL
5019 perf->ops.oa_enable = gen8_oa_enable;
5020 perf->ops.oa_disable = gen8_oa_disable;
8f8b1171
CW
5021 perf->ops.enable_metric_set = gen8_enable_metric_set;
5022 perf->ops.disable_metric_set = gen8_disable_metric_set;
00a7f0d7 5023 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
5dae69a9 5024 } else if (GRAPHICS_VER(i915) == 11) {
8f8b1171 5025 perf->ops.is_valid_b_counter_reg =
95690a02 5026 gen7_is_valid_b_counter_addr;
8f8b1171 5027 perf->ops.is_valid_mux_reg =
5dae69a9 5028 gen11_is_valid_mux_addr;
8f8b1171 5029 perf->ops.is_valid_flex_reg =
95690a02
LL
5030 gen8_is_valid_flex_addr;
5031
00a7f0d7
LL
5032 perf->ops.oa_enable = gen8_oa_enable;
5033 perf->ops.oa_disable = gen8_oa_disable;
8f8b1171 5034 perf->ops.enable_metric_set = gen8_enable_metric_set;
5dae69a9 5035 perf->ops.disable_metric_set = gen11_disable_metric_set;
00a7f0d7 5036 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
651e7d48 5037 } else if (GRAPHICS_VER(i915) == 12) {
00a7f0d7 5038 perf->ops.is_valid_b_counter_reg =
0fa9349d
LL
5039 HAS_OA_SLICE_CONTRIB_LIMITS(i915) ?
5040 xehp_is_valid_b_counter_addr :
00a7f0d7
LL
5041 gen12_is_valid_b_counter_addr;
5042 perf->ops.is_valid_mux_reg =
5043 gen12_is_valid_mux_addr;
5044 perf->ops.is_valid_flex_reg =
5045 gen8_is_valid_flex_addr;
5046
5047 perf->ops.oa_enable = gen12_oa_enable;
5048 perf->ops.oa_disable = gen12_oa_disable;
5049 perf->ops.enable_metric_set = gen12_enable_metric_set;
5050 perf->ops.disable_metric_set = gen12_disable_metric_set;
5051 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
19f81df2 5052 }
19f81df2 5053 }
d7965152 5054
8f8b1171 5055 if (perf->ops.enable_metric_set) {
9677a9f3 5056 struct intel_gt *gt;
5f284e9c 5057 int i, ret;
9677a9f3
UNR
5058
5059 for_each_gt(gt, i915, i)
5060 mutex_init(&gt->perf.lock);
eec688e1 5061
f170523a 5062 /* Choose a representative limit */
2cbc876d 5063 oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2;
ccdf6341 5064
8f8b1171 5065 mutex_init(&perf->metrics_lock);
8d989f44 5066 idr_init_base(&perf->metrics_idr, 1);
f89823c2 5067
a37f08a8
UNR
5068 /* We set up some ratelimit state to potentially throttle any
5069 * _NOTES about spurious, invalid OA reports which we don't
5070 * forward to userspace.
5071 *
5072 * We print a _NOTE about any throttling when closing the
5073 * stream instead of waiting until driver _fini which no one
5074 * would ever see.
5075 *
5076 * Using the same limiting factors as printk_ratelimit()
5077 */
8f8b1171 5078 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
a37f08a8
UNR
5079 /* Since we use a DRM_NOTE for spurious reports it would be
5080 * inconsistent to let __ratelimit() automatically print a
5081 * warning for throttling.
5082 */
8f8b1171 5083 ratelimit_set_flags(&perf->spurious_report_rs,
a37f08a8
UNR
5084 RATELIMIT_MSG_ON_RELEASE);
5085
d1df41eb
LL
5086 ratelimit_state_init(&perf->tail_pointer_race,
5087 5 * HZ, 10);
5088 ratelimit_set_flags(&perf->tail_pointer_race,
5089 RATELIMIT_MSG_ON_RELEASE);
5090
daed3e44
LL
5091 atomic64_set(&perf->noa_programming_delay,
5092 500 * 1000 /* 500us */);
5093
8f8b1171 5094 perf->i915 = i915;
77892f4f 5095
5f284e9c 5096 ret = oa_init_engine_groups(perf);
772a5803 5097 if (ret) {
5f284e9c
UNR
5098 drm_err(&i915->drm,
5099 "OA initialization failed %d\n", ret);
772a5803
UNR
5100 return ret;
5101 }
5f284e9c 5102
77892f4f 5103 oa_init_supported_formats(perf);
19f81df2 5104 }
772a5803
UNR
5105
5106 return 0;
eec688e1
RB
5107}
5108
f89823c2
LL
5109static int destroy_config(int id, void *p, void *data)
5110{
6a45008a 5111 i915_oa_config_put(p);
f89823c2
LL
5112 return 0;
5113}
5114
a04ea6ae 5115int i915_perf_sysctl_register(void)
3dc716fd 5116{
e5a1fd99 5117 sysctl_header = register_sysctl("dev/i915", oa_table);
a04ea6ae 5118 return 0;
3dc716fd
VSD
5119}
5120
5121void i915_perf_sysctl_unregister(void)
5122{
5123 unregister_sysctl_table(sysctl_header);
5124}
5125
16d98b31
RB
5126/**
5127 * i915_perf_fini - Counter part to i915_perf_init()
8f8b1171 5128 * @i915: i915 device instance
16d98b31 5129 */
8f8b1171 5130void i915_perf_fini(struct drm_i915_private *i915)
eec688e1 5131{
8f8b1171 5132 struct i915_perf *perf = &i915->perf;
5f284e9c
UNR
5133 struct intel_gt *gt;
5134 int i;
eec688e1 5135
8f8b1171
CW
5136 if (!perf->i915)
5137 return;
f89823c2 5138
5f284e9c
UNR
5139 for_each_gt(gt, perf->i915, i)
5140 kfree(gt->perf.group);
5141
8f8b1171
CW
5142 idr_for_each(&perf->metrics_idr, destroy_config, perf);
5143 idr_destroy(&perf->metrics_idr);
ccdf6341 5144
8f8b1171
CW
5145 memset(&perf->ops, 0, sizeof(perf->ops));
5146 perf->i915 = NULL;
eec688e1 5147}
daed3e44 5148
b8d49f28
LL
5149/**
5150 * i915_perf_ioctl_version - Version of the i915-perf subsystem
5151 *
5152 * This version number is used by userspace to detect available features.
5153 */
5154int i915_perf_ioctl_version(void)
5155{
7831e9a9
CW
5156 /*
5157 * 1: Initial version
5158 * I915_PERF_IOCTL_ENABLE
5159 * I915_PERF_IOCTL_DISABLE
5160 *
5161 * 2: Added runtime modification of OA config.
5162 * I915_PERF_IOCTL_CONFIG
9cd20ef7
LL
5163 *
5164 * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold
5165 * preemption on a particular context so that performance data is
5166 * accessible from a delta of MI_RPC reports without looking at the
5167 * OA buffer.
11ecbddd
LL
5168 *
5169 * 4: Add DRM_I915_PERF_PROP_ALLOWED_SSEU to limit what contexts can
5170 * be run for the duration of the performance recording based on
5171 * their SSEU configuration.
4ef10fe0
LL
5172 *
5173 * 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
5174 * interval for the hrtimer used to check for OA data.
c61d04c9
UNR
5175 *
5176 * 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and
5177 * DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE
7831e9a9 5178 */
c61d04c9 5179 return 6;
b8d49f28
LL
5180}
5181
daed3e44
LL
5182#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5183#include "selftests/i915_perf.c"
5184#endif