Commit | Line | Data |
---|---|---|
eec688e1 RB |
1 | /* |
2 | * Copyright © 2015-2016 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | * Authors: | |
24 | * Robert Bragg <robert@sixbynine.org> | |
25 | */ | |
26 | ||
7abbd8d6 RB |
27 | |
28 | /** | |
29 | * DOC: i915 Perf, streaming API for GPU metrics | |
30 | * | |
31 | * Gen graphics supports a large number of performance counters that can help | |
32 | * driver and application developers understand and optimize their use of the | |
33 | * GPU. | |
34 | * | |
35 | * This i915 perf interface enables userspace to configure and open a file | |
36 | * descriptor representing a stream of GPU metrics which can then be read() as | |
37 | * a stream of sample records. | |
38 | * | |
39 | * The interface is particularly suited to exposing buffered metrics that are | |
40 | * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU. | |
41 | * | |
42 | * Streams representing a single context are accessible to applications with a | |
43 | * corresponding drm file descriptor, such that OpenGL can use the interface | |
44 | * without special privileges. Access to system-wide metrics requires root | |
45 | * privileges by default, unless changed via the dev.i915.perf_event_paranoid | |
46 | * sysctl option. | |
47 | * | |
48 | * | |
49 | * The interface was initially inspired by the core Perf infrastructure but | |
50 | * some notable differences are: | |
51 | * | |
52 | * i915 perf file descriptors represent a "stream" instead of an "event"; where | |
53 | * a perf event primarily corresponds to a single 64bit value, while a stream | |
54 | * might sample sets of tightly-coupled counters, depending on the | |
55 | * configuration. For example the Gen OA unit isn't designed to support | |
56 | * orthogonal configurations of individual counters; it's configured for a set | |
57 | * of related counters. Samples for an i915 perf stream capturing OA metrics | |
58 | * will include a set of counter values packed in a compact HW specific format. | |
59 | * The OA unit supports a number of different packing formats which can be | |
60 | * selected by the user opening the stream. Perf has support for grouping | |
61 | * events, but each event in the group is configured, validated and | |
62 | * authenticated individually with separate system calls. | |
63 | * | |
64 | * i915 perf stream configurations are provided as an array of u64 (key,value) | |
65 | * pairs, instead of a fixed struct with multiple miscellaneous config members, | |
66 | * interleaved with event-type specific members. | |
67 | * | |
68 | * i915 perf doesn't support exposing metrics via an mmap'd circular buffer. | |
69 | * The supported metrics are being written to memory by the GPU unsynchronized | |
70 | * with the CPU, using HW specific packing formats for counter sets. Sometimes | |
71 | * the constraints on HW configuration require reports to be filtered before it | |
72 | * would be acceptable to expose them to unprivileged applications - to hide | |
73 | * the metrics of other processes/contexts. For these use cases a read() based | |
74 | * interface is a good fit, and provides an opportunity to filter data as it | |
75 | * gets copied from the GPU mapped buffers to userspace buffers. | |
76 | * | |
77 | * | |
78 | * Some notes regarding Linux Perf: | |
79 | * -------------------------------- | |
80 | * | |
81 | * The first prototype of this driver was based on the core perf | |
82 | * infrastructure, and while we did make that mostly work, with some changes to | |
83 | * perf, we found we were breaking or working around too many assumptions baked | |
84 | * into perf's currently cpu centric design. | |
85 | * | |
86 | * In the end we didn't see a clear benefit to making perf's implementation and | |
87 | * interface more complex by changing design assumptions while we knew we still | |
88 | * wouldn't be able to use any existing perf based userspace tools. | |
89 | * | |
90 | * Also considering the Gen specific nature of the Observability hardware and | |
91 | * how userspace will sometimes need to combine i915 perf OA metrics with | |
92 | * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're | |
93 | * expecting the interface to be used by a platform specific userspace such as | |
94 | * OpenGL or tools. This is to say; we aren't inherently missing out on having | |
95 | * a standard vendor/architecture agnostic interface by not using perf. | |
96 | * | |
97 | * | |
98 | * For posterity, in case we might re-visit trying to adapt core perf to be | |
99 | * better suited to exposing i915 metrics these were the main pain points we | |
100 | * hit: | |
101 | * | |
102 | * - The perf based OA PMU driver broke some significant design assumptions: | |
103 | * | |
104 | * Existing perf pmus are used for profiling work on a cpu and we were | |
105 | * introducing the idea of _IS_DEVICE pmus with different security | |
106 | * implications, the need to fake cpu-related data (such as user/kernel | |
107 | * registers) to fit with perf's current design, and adding _DEVICE records | |
108 | * as a way to forward device-specific status records. | |
109 | * | |
110 | * The OA unit writes reports of counters into a circular buffer, without | |
111 | * involvement from the CPU, making our PMU driver the first of a kind. | |
112 | * | |
113 | * Given the way we were periodically forward data from the GPU-mapped, OA | |
114 | * buffer to perf's buffer, those bursts of sample writes looked to perf like | |
115 | * we were sampling too fast and so we had to subvert its throttling checks. | |
116 | * | |
117 | * Perf supports groups of counters and allows those to be read via | |
118 | * transactions internally but transactions currently seem designed to be | |
119 | * explicitly initiated from the cpu (say in response to a userspace read()) | |
120 | * and while we could pull a report out of the OA buffer we can't | |
121 | * trigger a report from the cpu on demand. | |
122 | * | |
123 | * Related to being report based; the OA counters are configured in HW as a | |
124 | * set while perf generally expects counter configurations to be orthogonal. | |
125 | * Although counters can be associated with a group leader as they are | |
126 | * opened, there's no clear precedent for being able to provide group-wide | |
127 | * configuration attributes (for example we want to let userspace choose the | |
128 | * OA unit report format used to capture all counters in a set, or specify a | |
129 | * GPU context to filter metrics on). We avoided using perf's grouping | |
130 | * feature and forwarded OA reports to userspace via perf's 'raw' sample | |
131 | * field. This suited our userspace well considering how coupled the counters | |
132 | * are when dealing with normalizing. It would be inconvenient to split | |
133 | * counters up into separate events, only to require userspace to recombine | |
134 | * them. For Mesa it's also convenient to be forwarded raw, periodic reports | |
135 | * for combining with the side-band raw reports it captures using | |
136 | * MI_REPORT_PERF_COUNT commands. | |
137 | * | |
138 | * _ As a side note on perf's grouping feature; there was also some concern | |
139 | * that using PERF_FORMAT_GROUP as a way to pack together counter values | |
140 | * would quite drastically inflate our sample sizes, which would likely | |
141 | * lower the effective sampling resolutions we could use when the available | |
142 | * memory bandwidth is limited. | |
143 | * | |
144 | * With the OA unit's report formats, counters are packed together as 32 | |
145 | * or 40bit values, with the largest report size being 256 bytes. | |
146 | * | |
147 | * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a | |
148 | * documented ordering to the values, implying PERF_FORMAT_ID must also be | |
149 | * used to add a 64bit ID before each value; giving 16 bytes per counter. | |
150 | * | |
151 | * Related to counter orthogonality; we can't time share the OA unit, while | |
152 | * event scheduling is a central design idea within perf for allowing | |
153 | * userspace to open + enable more events than can be configured in HW at any | |
154 | * one time. The OA unit is not designed to allow re-configuration while in | |
155 | * use. We can't reconfigure the OA unit without losing internal OA unit | |
156 | * state which we can't access explicitly to save and restore. Reconfiguring | |
157 | * the OA unit is also relatively slow, involving ~100 register writes. From | |
158 | * userspace Mesa also depends on a stable OA configuration when emitting | |
159 | * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be | |
160 | * disabled while there are outstanding MI_RPC commands lest we hang the | |
161 | * command streamer. | |
162 | * | |
163 | * The contents of sample records aren't extensible by device drivers (i.e. | |
164 | * the sample_type bits). As an example; Sourab Gupta had been looking to | |
165 | * attach GPU timestamps to our OA samples. We were shoehorning OA reports | |
166 | * into sample records by using the 'raw' field, but it's tricky to pack more | |
167 | * than one thing into this field because events/core.c currently only lets a | |
168 | * pmu give a single raw data pointer plus len which will be copied into the | |
169 | * ring buffer. To include more than the OA report we'd have to copy the | |
170 | * report into an intermediate larger buffer. I'd been considering allowing a | |
171 | * vector of data+len values to be specified for copying the raw data, but | |
172 | * it felt like a kludge to being using the raw field for this purpose. | |
173 | * | |
174 | * - It felt like our perf based PMU was making some technical compromises | |
175 | * just for the sake of using perf: | |
176 | * | |
177 | * perf_event_open() requires events to either relate to a pid or a specific | |
178 | * cpu core, while our device pmu related to neither. Events opened with a | |
179 | * pid will be automatically enabled/disabled according to the scheduling of | |
180 | * that process - so not appropriate for us. When an event is related to a | |
181 | * cpu id, perf ensures pmu methods will be invoked via an inter process | |
182 | * interrupt on that core. To avoid invasive changes our userspace opened OA | |
183 | * perf events for a specific cpu. This was workable but it meant the | |
184 | * majority of the OA driver ran in atomic context, including all OA report | |
185 | * forwarding, which wasn't really necessary in our case and seems to make | |
186 | * our locking requirements somewhat complex as we handled the interaction | |
187 | * with the rest of the i915 driver. | |
188 | */ | |
189 | ||
eec688e1 | 190 | #include <linux/anon_inodes.h> |
d7965152 | 191 | #include <linux/sizes.h> |
eec688e1 RB |
192 | |
193 | #include "i915_drv.h" | |
d7965152 RB |
194 | #include "i915_oa_hsw.h" |
195 | ||
196 | /* HW requires this to be a power of two, between 128k and 16M, though driver | |
197 | * is currently generally designed assuming the largest 16M size is used such | |
198 | * that the overflow cases are unlikely in normal operation. | |
199 | */ | |
200 | #define OA_BUFFER_SIZE SZ_16M | |
201 | ||
202 | #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) | |
203 | ||
204 | /* There's a HW race condition between OA unit tail pointer register updates and | |
205 | * writes to memory whereby the tail pointer can sometimes get ahead of what's | |
206 | * been written out to the OA buffer so far. | |
207 | * | |
208 | * Although this can be observed explicitly by checking for a zeroed report-id | |
209 | * field in tail reports, it seems preferable to account for this earlier e.g. | |
210 | * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles | |
211 | * in this situation. | |
212 | * | |
213 | * To give time for the most recent reports to land before they may be copied to | |
214 | * userspace, the driver operates as if the tail pointer effectively lags behind | |
215 | * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated | |
216 | * based on this constant in nanoseconds, the current OA sampling exponent | |
217 | * and current report size. | |
218 | * | |
219 | * There is also a fallback check while reading to simply skip over reports with | |
220 | * a zeroed report-id. | |
221 | */ | |
222 | #define OA_TAIL_MARGIN_NSEC 100000ULL | |
223 | ||
224 | /* frequency for checking whether the OA unit has written new reports to the | |
225 | * circular OA buffer... | |
226 | */ | |
227 | #define POLL_FREQUENCY 200 | |
228 | #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) | |
229 | ||
ccdf6341 RB |
230 | /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ |
231 | static int zero; | |
232 | static int one = 1; | |
233 | static u32 i915_perf_stream_paranoid = true; | |
234 | ||
d7965152 RB |
235 | /* The maximum exponent the hardware accepts is 63 (essentially it selects one |
236 | * of the 64bit timestamp bits to trigger reports from) but there's currently | |
237 | * no known use case for sampling as infrequently as once per 47 thousand years. | |
238 | * | |
239 | * Since the timestamps included in OA reports are only 32bits it seems | |
240 | * reasonable to limit the OA exponent where it's still possible to account for | |
241 | * overflow in OA report timestamps. | |
242 | */ | |
243 | #define OA_EXPONENT_MAX 31 | |
244 | ||
245 | #define INVALID_CTX_ID 0xffffffff | |
246 | ||
247 | ||
00319ba0 RB |
248 | /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate |
249 | * | |
250 | * 160ns is the smallest sampling period we can theoretically program the OA | |
251 | * unit with on Haswell, corresponding to 6.25MHz. | |
252 | */ | |
253 | static int oa_sample_rate_hard_limit = 6250000; | |
254 | ||
255 | /* Theoretically we can program the OA unit to sample every 160ns but don't | |
256 | * allow that by default unless root... | |
257 | * | |
258 | * The default threshold of 100000Hz is based on perf's similar | |
259 | * kernel.perf_event_max_sample_rate sysctl parameter. | |
260 | */ | |
261 | static u32 i915_oa_max_sample_rate = 100000; | |
262 | ||
d7965152 RB |
263 | /* XXX: beware if future OA HW adds new report formats that the current |
264 | * code assumes all reports have a power-of-two size and ~(size - 1) can | |
265 | * be used as a mask to align the OA tail pointer. | |
266 | */ | |
267 | static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = { | |
268 | [I915_OA_FORMAT_A13] = { 0, 64 }, | |
269 | [I915_OA_FORMAT_A29] = { 1, 128 }, | |
270 | [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 }, | |
271 | /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */ | |
272 | [I915_OA_FORMAT_B4_C8] = { 4, 64 }, | |
273 | [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 }, | |
274 | [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 }, | |
275 | [I915_OA_FORMAT_C4_B8] = { 7, 64 }, | |
276 | }; | |
277 | ||
278 | #define SAMPLE_OA_REPORT (1<<0) | |
eec688e1 RB |
279 | |
280 | struct perf_open_properties { | |
281 | u32 sample_flags; | |
282 | ||
283 | u64 single_context:1; | |
284 | u64 ctx_handle; | |
d7965152 RB |
285 | |
286 | /* OA sampling state */ | |
287 | int metrics_set; | |
288 | int oa_format; | |
289 | bool oa_periodic; | |
290 | int oa_period_exponent; | |
291 | }; | |
292 | ||
293 | /* NB: This is either called via fops or the poll check hrtimer (atomic ctx) | |
294 | * | |
295 | * It's safe to read OA config state here unlocked, assuming that this is only | |
296 | * called while the stream is enabled, while the global OA configuration can't | |
297 | * be modified. | |
298 | * | |
299 | * Note: we don't lock around the head/tail reads even though there's the slim | |
300 | * possibility of read() fop errors forcing a re-init of the OA buffer | |
301 | * pointers. A race here could result in a false positive !empty status which | |
302 | * is acceptable. | |
303 | */ | |
304 | static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv) | |
305 | { | |
306 | int report_size = dev_priv->perf.oa.oa_buffer.format_size; | |
307 | u32 oastatus2 = I915_READ(GEN7_OASTATUS2); | |
308 | u32 oastatus1 = I915_READ(GEN7_OASTATUS1); | |
309 | u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; | |
310 | u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; | |
311 | ||
312 | return OA_TAKEN(tail, head) < | |
313 | dev_priv->perf.oa.tail_margin + report_size; | |
314 | } | |
315 | ||
316 | /** | |
317 | * Appends a status record to a userspace read() buffer. | |
318 | */ | |
319 | static int append_oa_status(struct i915_perf_stream *stream, | |
320 | char __user *buf, | |
321 | size_t count, | |
322 | size_t *offset, | |
323 | enum drm_i915_perf_record_type type) | |
324 | { | |
325 | struct drm_i915_perf_record_header header = { type, 0, sizeof(header) }; | |
326 | ||
327 | if ((count - *offset) < header.size) | |
328 | return -ENOSPC; | |
329 | ||
330 | if (copy_to_user(buf + *offset, &header, sizeof(header))) | |
331 | return -EFAULT; | |
332 | ||
333 | (*offset) += header.size; | |
334 | ||
335 | return 0; | |
336 | } | |
337 | ||
338 | /** | |
339 | * Copies single OA report into userspace read() buffer. | |
340 | */ | |
341 | static int append_oa_sample(struct i915_perf_stream *stream, | |
342 | char __user *buf, | |
343 | size_t count, | |
344 | size_t *offset, | |
345 | const u8 *report) | |
346 | { | |
347 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
348 | int report_size = dev_priv->perf.oa.oa_buffer.format_size; | |
349 | struct drm_i915_perf_record_header header; | |
350 | u32 sample_flags = stream->sample_flags; | |
351 | ||
352 | header.type = DRM_I915_PERF_RECORD_SAMPLE; | |
353 | header.pad = 0; | |
354 | header.size = stream->sample_size; | |
355 | ||
356 | if ((count - *offset) < header.size) | |
357 | return -ENOSPC; | |
358 | ||
359 | buf += *offset; | |
360 | if (copy_to_user(buf, &header, sizeof(header))) | |
361 | return -EFAULT; | |
362 | buf += sizeof(header); | |
363 | ||
364 | if (sample_flags & SAMPLE_OA_REPORT) { | |
365 | if (copy_to_user(buf, report, report_size)) | |
366 | return -EFAULT; | |
367 | } | |
368 | ||
369 | (*offset) += header.size; | |
370 | ||
371 | return 0; | |
372 | } | |
373 | ||
374 | /** | |
375 | * Copies all buffered OA reports into userspace read() buffer. | |
376 | * @stream: An i915-perf stream opened for OA metrics | |
377 | * @buf: destination buffer given by userspace | |
378 | * @count: the number of bytes userspace wants to read | |
379 | * @offset: (inout): the current position for writing into @buf | |
380 | * @head_ptr: (inout): the current oa buffer cpu read position | |
381 | * @tail: the current oa buffer gpu write position | |
382 | * | |
383 | * Returns 0 on success, negative error code on failure. | |
384 | * | |
385 | * Notably any error condition resulting in a short read (-ENOSPC or | |
386 | * -EFAULT) will be returned even though one or more records may | |
387 | * have been successfully copied. In this case it's up to the caller | |
388 | * to decide if the error should be squashed before returning to | |
389 | * userspace. | |
390 | * | |
391 | * Note: reports are consumed from the head, and appended to the | |
392 | * tail, so the head chases the tail?... If you think that's mad | |
393 | * and back-to-front you're not alone, but this follows the | |
394 | * Gen PRM naming convention. | |
395 | */ | |
396 | static int gen7_append_oa_reports(struct i915_perf_stream *stream, | |
397 | char __user *buf, | |
398 | size_t count, | |
399 | size_t *offset, | |
400 | u32 *head_ptr, | |
401 | u32 tail) | |
402 | { | |
403 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
404 | int report_size = dev_priv->perf.oa.oa_buffer.format_size; | |
405 | u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; | |
406 | int tail_margin = dev_priv->perf.oa.tail_margin; | |
407 | u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); | |
408 | u32 mask = (OA_BUFFER_SIZE - 1); | |
409 | u32 head; | |
410 | u32 taken; | |
411 | int ret = 0; | |
412 | ||
413 | if (WARN_ON(!stream->enabled)) | |
414 | return -EIO; | |
415 | ||
416 | head = *head_ptr - gtt_offset; | |
417 | tail -= gtt_offset; | |
418 | ||
419 | /* The OA unit is expected to wrap the tail pointer according to the OA | |
420 | * buffer size and since we should never write a misaligned head | |
421 | * pointer we don't expect to read one back either... | |
422 | */ | |
423 | if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE || | |
424 | head % report_size) { | |
425 | DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n", | |
426 | head, tail); | |
427 | dev_priv->perf.oa.ops.oa_disable(dev_priv); | |
428 | dev_priv->perf.oa.ops.oa_enable(dev_priv); | |
429 | *head_ptr = I915_READ(GEN7_OASTATUS2) & | |
430 | GEN7_OASTATUS2_HEAD_MASK; | |
431 | return -EIO; | |
432 | } | |
433 | ||
434 | ||
435 | /* The tail pointer increases in 64 byte increments, not in report_size | |
436 | * steps... | |
437 | */ | |
438 | tail &= ~(report_size - 1); | |
439 | ||
440 | /* Move the tail pointer back by the current tail_margin to account for | |
441 | * the possibility that the latest reports may not have really landed | |
442 | * in memory yet... | |
443 | */ | |
444 | ||
445 | if (OA_TAKEN(tail, head) < report_size + tail_margin) | |
446 | return -EAGAIN; | |
447 | ||
448 | tail -= tail_margin; | |
449 | tail &= mask; | |
450 | ||
451 | for (/* none */; | |
452 | (taken = OA_TAKEN(tail, head)); | |
453 | head = (head + report_size) & mask) { | |
454 | u8 *report = oa_buf_base + head; | |
455 | u32 *report32 = (void *)report; | |
456 | ||
457 | /* All the report sizes factor neatly into the buffer | |
458 | * size so we never expect to see a report split | |
459 | * between the beginning and end of the buffer. | |
460 | * | |
461 | * Given the initial alignment check a misalignment | |
462 | * here would imply a driver bug that would result | |
463 | * in an overrun. | |
464 | */ | |
465 | if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { | |
466 | DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); | |
467 | break; | |
468 | } | |
469 | ||
470 | /* The report-ID field for periodic samples includes | |
471 | * some undocumented flags related to what triggered | |
472 | * the report and is never expected to be zero so we | |
473 | * can check that the report isn't invalid before | |
474 | * copying it to userspace... | |
475 | */ | |
476 | if (report32[0] == 0) { | |
477 | DRM_ERROR("Skipping spurious, invalid OA report\n"); | |
478 | continue; | |
479 | } | |
480 | ||
481 | ret = append_oa_sample(stream, buf, count, offset, report); | |
482 | if (ret) | |
483 | break; | |
484 | ||
485 | /* The above report-id field sanity check is based on | |
486 | * the assumption that the OA buffer is initially | |
487 | * zeroed and we reset the field after copying so the | |
488 | * check is still meaningful once old reports start | |
489 | * being overwritten. | |
490 | */ | |
491 | report32[0] = 0; | |
492 | } | |
493 | ||
494 | *head_ptr = gtt_offset + head; | |
495 | ||
496 | return ret; | |
497 | } | |
498 | ||
499 | static int gen7_oa_read(struct i915_perf_stream *stream, | |
500 | char __user *buf, | |
501 | size_t count, | |
502 | size_t *offset) | |
503 | { | |
504 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
505 | int report_size = dev_priv->perf.oa.oa_buffer.format_size; | |
506 | u32 oastatus2; | |
507 | u32 oastatus1; | |
508 | u32 head; | |
509 | u32 tail; | |
510 | int ret; | |
511 | ||
512 | if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) | |
513 | return -EIO; | |
514 | ||
515 | oastatus2 = I915_READ(GEN7_OASTATUS2); | |
516 | oastatus1 = I915_READ(GEN7_OASTATUS1); | |
517 | ||
518 | head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; | |
519 | tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; | |
520 | ||
521 | /* XXX: On Haswell we don't have a safe way to clear oastatus1 | |
522 | * bits while the OA unit is enabled (while the tail pointer | |
523 | * may be updated asynchronously) so we ignore status bits | |
524 | * that have already been reported to userspace. | |
525 | */ | |
526 | oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1; | |
527 | ||
528 | /* We treat OABUFFER_OVERFLOW as a significant error: | |
529 | * | |
530 | * - The status can be interpreted to mean that the buffer is | |
531 | * currently full (with a higher precedence than OA_TAKEN() | |
532 | * which will start to report a near-empty buffer after an | |
533 | * overflow) but it's awkward that we can't clear the status | |
534 | * on Haswell, so without a reset we won't be able to catch | |
535 | * the state again. | |
536 | * | |
537 | * - Since it also implies the HW has started overwriting old | |
538 | * reports it may also affect our sanity checks for invalid | |
539 | * reports when copying to userspace that assume new reports | |
540 | * are being written to cleared memory. | |
541 | * | |
542 | * - In the future we may want to introduce a flight recorder | |
543 | * mode where the driver will automatically maintain a safe | |
544 | * guard band between head/tail, avoiding this overflow | |
545 | * condition, but we avoid the added driver complexity for | |
546 | * now. | |
547 | */ | |
548 | if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) { | |
549 | ret = append_oa_status(stream, buf, count, offset, | |
550 | DRM_I915_PERF_RECORD_OA_BUFFER_LOST); | |
551 | if (ret) | |
552 | return ret; | |
553 | ||
554 | DRM_ERROR("OA buffer overflow: force restart\n"); | |
555 | ||
556 | dev_priv->perf.oa.ops.oa_disable(dev_priv); | |
557 | dev_priv->perf.oa.ops.oa_enable(dev_priv); | |
558 | ||
559 | oastatus2 = I915_READ(GEN7_OASTATUS2); | |
560 | oastatus1 = I915_READ(GEN7_OASTATUS1); | |
561 | ||
562 | head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; | |
563 | tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; | |
564 | } | |
565 | ||
566 | if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { | |
567 | ret = append_oa_status(stream, buf, count, offset, | |
568 | DRM_I915_PERF_RECORD_OA_REPORT_LOST); | |
569 | if (ret) | |
570 | return ret; | |
571 | dev_priv->perf.oa.gen7_latched_oastatus1 |= | |
572 | GEN7_OASTATUS1_REPORT_LOST; | |
573 | } | |
574 | ||
575 | ret = gen7_append_oa_reports(stream, buf, count, offset, | |
576 | &head, tail); | |
577 | ||
578 | /* All the report sizes are a power of two and the | |
579 | * head should always be incremented by some multiple | |
580 | * of the report size. | |
581 | * | |
582 | * A warning here, but notably if we later read back a | |
583 | * misaligned pointer we will treat that as a bug since | |
584 | * it could lead to a buffer overrun. | |
585 | */ | |
586 | WARN_ONCE(head & (report_size - 1), | |
587 | "i915: Writing misaligned OA head pointer"); | |
588 | ||
589 | /* Note: we update the head pointer here even if an error | |
590 | * was returned since the error may represent a short read | |
591 | * where some some reports were successfully copied. | |
592 | */ | |
593 | I915_WRITE(GEN7_OASTATUS2, | |
594 | ((head & GEN7_OASTATUS2_HEAD_MASK) | | |
595 | OA_MEM_SELECT_GGTT)); | |
596 | ||
597 | return ret; | |
598 | } | |
599 | ||
600 | static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) | |
601 | { | |
602 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
603 | ||
604 | /* We would wait indefinitely if periodic sampling is not enabled */ | |
605 | if (!dev_priv->perf.oa.periodic) | |
606 | return -EIO; | |
607 | ||
608 | /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it | |
609 | * just performs mmio reads of the OA buffer head + tail pointers and | |
610 | * it's assumed we're handling some operation that implies the stream | |
611 | * can't be destroyed until completion (such as a read()) that ensures | |
612 | * the device + OA buffer can't disappear | |
613 | */ | |
614 | return wait_event_interruptible(dev_priv->perf.oa.poll_wq, | |
615 | !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)); | |
616 | } | |
617 | ||
618 | static void i915_oa_poll_wait(struct i915_perf_stream *stream, | |
619 | struct file *file, | |
620 | poll_table *wait) | |
621 | { | |
622 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
623 | ||
624 | poll_wait(file, &dev_priv->perf.oa.poll_wq, wait); | |
625 | } | |
626 | ||
627 | static int i915_oa_read(struct i915_perf_stream *stream, | |
628 | char __user *buf, | |
629 | size_t count, | |
630 | size_t *offset) | |
631 | { | |
632 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
633 | ||
634 | return dev_priv->perf.oa.ops.read(stream, buf, count, offset); | |
635 | } | |
636 | ||
637 | /* Determine the render context hw id, and ensure it remains fixed for the | |
638 | * lifetime of the stream. This ensures that we don't have to worry about | |
639 | * updating the context ID in OACONTROL on the fly. | |
640 | */ | |
641 | static int oa_get_render_ctx_id(struct i915_perf_stream *stream) | |
642 | { | |
643 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
644 | struct i915_vma *vma; | |
645 | int ret; | |
646 | ||
647 | ret = i915_mutex_lock_interruptible(&dev_priv->drm); | |
648 | if (ret) | |
649 | return ret; | |
650 | ||
651 | /* As the ID is the gtt offset of the context's vma we pin | |
652 | * the vma to ensure the ID remains fixed. | |
653 | * | |
654 | * NB: implied RCS engine... | |
655 | */ | |
656 | vma = i915_gem_context_pin_legacy(stream->ctx, 0); | |
657 | if (IS_ERR(vma)) { | |
658 | ret = PTR_ERR(vma); | |
659 | goto unlock; | |
660 | } | |
661 | ||
662 | dev_priv->perf.oa.pinned_rcs_vma = vma; | |
663 | ||
664 | /* Explicitly track the ID (instead of calling i915_ggtt_offset() | |
665 | * on the fly) considering the difference with gen8+ and | |
666 | * execlists | |
667 | */ | |
668 | dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma); | |
669 | ||
670 | unlock: | |
671 | mutex_unlock(&dev_priv->drm.struct_mutex); | |
672 | ||
673 | return ret; | |
674 | } | |
675 | ||
676 | static void oa_put_render_ctx_id(struct i915_perf_stream *stream) | |
677 | { | |
678 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
679 | ||
680 | mutex_lock(&dev_priv->drm.struct_mutex); | |
681 | ||
682 | i915_vma_unpin(dev_priv->perf.oa.pinned_rcs_vma); | |
683 | dev_priv->perf.oa.pinned_rcs_vma = NULL; | |
684 | ||
685 | dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; | |
686 | ||
687 | mutex_unlock(&dev_priv->drm.struct_mutex); | |
688 | } | |
689 | ||
690 | static void | |
691 | free_oa_buffer(struct drm_i915_private *i915) | |
692 | { | |
693 | mutex_lock(&i915->drm.struct_mutex); | |
694 | ||
695 | i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj); | |
696 | i915_vma_unpin(i915->perf.oa.oa_buffer.vma); | |
697 | i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj); | |
698 | ||
699 | i915->perf.oa.oa_buffer.vma = NULL; | |
700 | i915->perf.oa.oa_buffer.vaddr = NULL; | |
701 | ||
702 | mutex_unlock(&i915->drm.struct_mutex); | |
703 | } | |
704 | ||
705 | static void i915_oa_stream_destroy(struct i915_perf_stream *stream) | |
706 | { | |
707 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
708 | ||
709 | BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); | |
710 | ||
711 | dev_priv->perf.oa.ops.disable_metric_set(dev_priv); | |
712 | ||
713 | free_oa_buffer(dev_priv); | |
714 | ||
715 | intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); | |
716 | intel_runtime_pm_put(dev_priv); | |
717 | ||
718 | if (stream->ctx) | |
719 | oa_put_render_ctx_id(stream); | |
720 | ||
721 | dev_priv->perf.oa.exclusive_stream = NULL; | |
722 | } | |
723 | ||
724 | static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) | |
725 | { | |
726 | u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); | |
727 | ||
728 | /* Pre-DevBDW: OABUFFER must be set with counters off, | |
729 | * before OASTATUS1, but after OASTATUS2 | |
730 | */ | |
731 | I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */ | |
732 | I915_WRITE(GEN7_OABUFFER, gtt_offset); | |
733 | I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ | |
734 | ||
735 | /* On Haswell we have to track which OASTATUS1 flags we've | |
736 | * already seen since they can't be cleared while periodic | |
737 | * sampling is enabled. | |
738 | */ | |
739 | dev_priv->perf.oa.gen7_latched_oastatus1 = 0; | |
740 | ||
741 | /* NB: although the OA buffer will initially be allocated | |
742 | * zeroed via shmfs (and so this memset is redundant when | |
743 | * first allocating), we may re-init the OA buffer, either | |
744 | * when re-enabling a stream or in error/reset paths. | |
745 | * | |
746 | * The reason we clear the buffer for each re-init is for the | |
747 | * sanity check in gen7_append_oa_reports() that looks at the | |
748 | * report-id field to make sure it's non-zero which relies on | |
749 | * the assumption that new reports are being written to zeroed | |
750 | * memory... | |
751 | */ | |
752 | memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); | |
753 | ||
754 | /* Maybe make ->pollin per-stream state if we support multiple | |
755 | * concurrent streams in the future. | |
756 | */ | |
757 | dev_priv->perf.oa.pollin = false; | |
758 | } | |
759 | ||
760 | static int alloc_oa_buffer(struct drm_i915_private *dev_priv) | |
761 | { | |
762 | struct drm_i915_gem_object *bo; | |
763 | struct i915_vma *vma; | |
764 | int ret; | |
765 | ||
766 | if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) | |
767 | return -ENODEV; | |
768 | ||
769 | ret = i915_mutex_lock_interruptible(&dev_priv->drm); | |
770 | if (ret) | |
771 | return ret; | |
772 | ||
773 | BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); | |
774 | BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); | |
775 | ||
12d79d78 | 776 | bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE); |
d7965152 RB |
777 | if (IS_ERR(bo)) { |
778 | DRM_ERROR("Failed to allocate OA buffer\n"); | |
779 | ret = PTR_ERR(bo); | |
780 | goto unlock; | |
781 | } | |
782 | ||
783 | ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); | |
784 | if (ret) | |
785 | goto err_unref; | |
786 | ||
787 | /* PreHSW required 512K alignment, HSW requires 16M */ | |
788 | vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); | |
789 | if (IS_ERR(vma)) { | |
790 | ret = PTR_ERR(vma); | |
791 | goto err_unref; | |
792 | } | |
793 | dev_priv->perf.oa.oa_buffer.vma = vma; | |
794 | ||
795 | dev_priv->perf.oa.oa_buffer.vaddr = | |
796 | i915_gem_object_pin_map(bo, I915_MAP_WB); | |
797 | if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) { | |
798 | ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr); | |
799 | goto err_unpin; | |
800 | } | |
801 | ||
802 | dev_priv->perf.oa.ops.init_oa_buffer(dev_priv); | |
803 | ||
804 | DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", | |
805 | i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), | |
806 | dev_priv->perf.oa.oa_buffer.vaddr); | |
807 | ||
808 | goto unlock; | |
809 | ||
810 | err_unpin: | |
811 | __i915_vma_unpin(vma); | |
812 | ||
813 | err_unref: | |
814 | i915_gem_object_put(bo); | |
815 | ||
816 | dev_priv->perf.oa.oa_buffer.vaddr = NULL; | |
817 | dev_priv->perf.oa.oa_buffer.vma = NULL; | |
818 | ||
819 | unlock: | |
820 | mutex_unlock(&dev_priv->drm.struct_mutex); | |
821 | return ret; | |
822 | } | |
823 | ||
824 | static void config_oa_regs(struct drm_i915_private *dev_priv, | |
825 | const struct i915_oa_reg *regs, | |
826 | int n_regs) | |
827 | { | |
828 | int i; | |
829 | ||
830 | for (i = 0; i < n_regs; i++) { | |
831 | const struct i915_oa_reg *reg = regs + i; | |
832 | ||
833 | I915_WRITE(reg->addr, reg->value); | |
834 | } | |
835 | } | |
836 | ||
837 | static int hsw_enable_metric_set(struct drm_i915_private *dev_priv) | |
838 | { | |
839 | int ret = i915_oa_select_metric_set_hsw(dev_priv); | |
840 | ||
841 | if (ret) | |
842 | return ret; | |
843 | ||
844 | I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) | | |
845 | GT_NOA_ENABLE)); | |
846 | ||
847 | /* PRM: | |
848 | * | |
849 | * OA unit is using “crclk” for its functionality. When trunk | |
850 | * level clock gating takes place, OA clock would be gated, | |
851 | * unable to count the events from non-render clock domain. | |
852 | * Render clock gating must be disabled when OA is enabled to | |
853 | * count the events from non-render domain. Unit level clock | |
854 | * gating for RCS should also be disabled. | |
855 | */ | |
856 | I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & | |
857 | ~GEN7_DOP_CLOCK_GATE_ENABLE)); | |
858 | I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | | |
859 | GEN6_CSUNIT_CLOCK_GATE_DISABLE)); | |
860 | ||
861 | config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs, | |
862 | dev_priv->perf.oa.mux_regs_len); | |
863 | ||
864 | /* It apparently takes a fairly long time for a new MUX | |
865 | * configuration to be be applied after these register writes. | |
866 | * This delay duration was derived empirically based on the | |
867 | * render_basic config but hopefully it covers the maximum | |
868 | * configuration latency. | |
869 | * | |
870 | * As a fallback, the checks in _append_oa_reports() to skip | |
871 | * invalid OA reports do also seem to work to discard reports | |
872 | * generated before this config has completed - albeit not | |
873 | * silently. | |
874 | * | |
875 | * Unfortunately this is essentially a magic number, since we | |
876 | * don't currently know of a reliable mechanism for predicting | |
877 | * how long the MUX config will take to apply and besides | |
878 | * seeing invalid reports we don't know of a reliable way to | |
879 | * explicitly check that the MUX config has landed. | |
880 | * | |
881 | * It's even possible we've miss characterized the underlying | |
882 | * problem - it just seems like the simplest explanation why | |
883 | * a delay at this location would mitigate any invalid reports. | |
884 | */ | |
885 | usleep_range(15000, 20000); | |
886 | ||
887 | config_oa_regs(dev_priv, dev_priv->perf.oa.b_counter_regs, | |
888 | dev_priv->perf.oa.b_counter_regs_len); | |
889 | ||
890 | return 0; | |
891 | } | |
892 | ||
893 | static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) | |
894 | { | |
895 | I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & | |
896 | ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); | |
897 | I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | | |
898 | GEN7_DOP_CLOCK_GATE_ENABLE)); | |
899 | ||
900 | I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & | |
901 | ~GT_NOA_ENABLE)); | |
902 | } | |
903 | ||
904 | static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) | |
905 | { | |
906 | assert_spin_locked(&dev_priv->perf.hook_lock); | |
907 | ||
908 | if (dev_priv->perf.oa.exclusive_stream->enabled) { | |
909 | struct i915_gem_context *ctx = | |
910 | dev_priv->perf.oa.exclusive_stream->ctx; | |
911 | u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; | |
912 | ||
913 | bool periodic = dev_priv->perf.oa.periodic; | |
914 | u32 period_exponent = dev_priv->perf.oa.period_exponent; | |
915 | u32 report_format = dev_priv->perf.oa.oa_buffer.format; | |
916 | ||
917 | I915_WRITE(GEN7_OACONTROL, | |
918 | (ctx_id & GEN7_OACONTROL_CTX_MASK) | | |
919 | (period_exponent << | |
920 | GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | | |
921 | (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | | |
922 | (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | | |
923 | (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | | |
924 | GEN7_OACONTROL_ENABLE); | |
925 | } else | |
926 | I915_WRITE(GEN7_OACONTROL, 0); | |
927 | } | |
928 | ||
929 | static void gen7_oa_enable(struct drm_i915_private *dev_priv) | |
930 | { | |
931 | unsigned long flags; | |
932 | ||
933 | /* Reset buf pointers so we don't forward reports from before now. | |
934 | * | |
935 | * Think carefully if considering trying to avoid this, since it | |
936 | * also ensures status flags and the buffer itself are cleared | |
937 | * in error paths, and we have checks for invalid reports based | |
938 | * on the assumption that certain fields are written to zeroed | |
939 | * memory which this helps maintains. | |
940 | */ | |
941 | gen7_init_oa_buffer(dev_priv); | |
942 | ||
943 | spin_lock_irqsave(&dev_priv->perf.hook_lock, flags); | |
944 | gen7_update_oacontrol_locked(dev_priv); | |
945 | spin_unlock_irqrestore(&dev_priv->perf.hook_lock, flags); | |
946 | } | |
947 | ||
948 | static void i915_oa_stream_enable(struct i915_perf_stream *stream) | |
949 | { | |
950 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
951 | ||
952 | dev_priv->perf.oa.ops.oa_enable(dev_priv); | |
953 | ||
954 | if (dev_priv->perf.oa.periodic) | |
955 | hrtimer_start(&dev_priv->perf.oa.poll_check_timer, | |
956 | ns_to_ktime(POLL_PERIOD), | |
957 | HRTIMER_MODE_REL_PINNED); | |
958 | } | |
959 | ||
960 | static void gen7_oa_disable(struct drm_i915_private *dev_priv) | |
961 | { | |
962 | I915_WRITE(GEN7_OACONTROL, 0); | |
963 | } | |
964 | ||
965 | static void i915_oa_stream_disable(struct i915_perf_stream *stream) | |
966 | { | |
967 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
968 | ||
969 | dev_priv->perf.oa.ops.oa_disable(dev_priv); | |
970 | ||
971 | if (dev_priv->perf.oa.periodic) | |
972 | hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); | |
973 | } | |
974 | ||
975 | static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) | |
976 | { | |
24603935 CW |
977 | return div_u64(1000000000ULL * (2ULL << exponent), |
978 | dev_priv->perf.oa.timestamp_frequency); | |
d7965152 RB |
979 | } |
980 | ||
981 | static const struct i915_perf_stream_ops i915_oa_stream_ops = { | |
982 | .destroy = i915_oa_stream_destroy, | |
983 | .enable = i915_oa_stream_enable, | |
984 | .disable = i915_oa_stream_disable, | |
985 | .wait_unlocked = i915_oa_wait_unlocked, | |
986 | .poll_wait = i915_oa_poll_wait, | |
987 | .read = i915_oa_read, | |
eec688e1 RB |
988 | }; |
989 | ||
d7965152 RB |
990 | static int i915_oa_stream_init(struct i915_perf_stream *stream, |
991 | struct drm_i915_perf_open_param *param, | |
992 | struct perf_open_properties *props) | |
993 | { | |
994 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
995 | int format_size; | |
996 | int ret; | |
997 | ||
442b8c06 RB |
998 | /* If the sysfs metrics/ directory wasn't registered for some |
999 | * reason then don't let userspace try their luck with config | |
1000 | * IDs | |
1001 | */ | |
1002 | if (!dev_priv->perf.metrics_kobj) { | |
1003 | DRM_ERROR("OA metrics weren't advertised via sysfs\n"); | |
1004 | return -EINVAL; | |
1005 | } | |
1006 | ||
d7965152 RB |
1007 | if (!(props->sample_flags & SAMPLE_OA_REPORT)) { |
1008 | DRM_ERROR("Only OA report sampling supported\n"); | |
1009 | return -EINVAL; | |
1010 | } | |
1011 | ||
1012 | if (!dev_priv->perf.oa.ops.init_oa_buffer) { | |
1013 | DRM_ERROR("OA unit not supported\n"); | |
1014 | return -ENODEV; | |
1015 | } | |
1016 | ||
1017 | /* To avoid the complexity of having to accurately filter | |
1018 | * counter reports and marshal to the appropriate client | |
1019 | * we currently only allow exclusive access | |
1020 | */ | |
1021 | if (dev_priv->perf.oa.exclusive_stream) { | |
1022 | DRM_ERROR("OA unit already in use\n"); | |
1023 | return -EBUSY; | |
1024 | } | |
1025 | ||
1026 | if (!props->metrics_set) { | |
1027 | DRM_ERROR("OA metric set not specified\n"); | |
1028 | return -EINVAL; | |
1029 | } | |
1030 | ||
1031 | if (!props->oa_format) { | |
1032 | DRM_ERROR("OA report format not specified\n"); | |
1033 | return -EINVAL; | |
1034 | } | |
1035 | ||
1036 | stream->sample_size = sizeof(struct drm_i915_perf_record_header); | |
1037 | ||
1038 | format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; | |
1039 | ||
1040 | stream->sample_flags |= SAMPLE_OA_REPORT; | |
1041 | stream->sample_size += format_size; | |
1042 | ||
1043 | dev_priv->perf.oa.oa_buffer.format_size = format_size; | |
1044 | if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0)) | |
1045 | return -EINVAL; | |
1046 | ||
1047 | dev_priv->perf.oa.oa_buffer.format = | |
1048 | dev_priv->perf.oa.oa_formats[props->oa_format].format; | |
1049 | ||
1050 | dev_priv->perf.oa.metrics_set = props->metrics_set; | |
1051 | ||
1052 | dev_priv->perf.oa.periodic = props->oa_periodic; | |
1053 | if (dev_priv->perf.oa.periodic) { | |
24603935 | 1054 | u32 tail; |
d7965152 RB |
1055 | |
1056 | dev_priv->perf.oa.period_exponent = props->oa_period_exponent; | |
1057 | ||
1058 | /* See comment for OA_TAIL_MARGIN_NSEC for details | |
1059 | * about this tail_margin... | |
1060 | */ | |
24603935 CW |
1061 | tail = div64_u64(OA_TAIL_MARGIN_NSEC, |
1062 | oa_exponent_to_ns(dev_priv, | |
1063 | props->oa_period_exponent)); | |
1064 | dev_priv->perf.oa.tail_margin = (tail + 1) * format_size; | |
d7965152 RB |
1065 | } |
1066 | ||
1067 | if (stream->ctx) { | |
1068 | ret = oa_get_render_ctx_id(stream); | |
1069 | if (ret) | |
1070 | return ret; | |
1071 | } | |
1072 | ||
1073 | ret = alloc_oa_buffer(dev_priv); | |
1074 | if (ret) | |
1075 | goto err_oa_buf_alloc; | |
1076 | ||
1077 | /* PRM - observability performance counters: | |
1078 | * | |
1079 | * OACONTROL, performance counter enable, note: | |
1080 | * | |
1081 | * "When this bit is set, in order to have coherent counts, | |
1082 | * RC6 power state and trunk clock gating must be disabled. | |
1083 | * This can be achieved by programming MMIO registers as | |
1084 | * 0xA094=0 and 0xA090[31]=1" | |
1085 | * | |
1086 | * In our case we are expecting that taking pm + FORCEWAKE | |
1087 | * references will effectively disable RC6. | |
1088 | */ | |
1089 | intel_runtime_pm_get(dev_priv); | |
1090 | intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); | |
1091 | ||
1092 | ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv); | |
1093 | if (ret) | |
1094 | goto err_enable; | |
1095 | ||
1096 | stream->ops = &i915_oa_stream_ops; | |
1097 | ||
1098 | dev_priv->perf.oa.exclusive_stream = stream; | |
1099 | ||
1100 | return 0; | |
1101 | ||
1102 | err_enable: | |
1103 | intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); | |
1104 | intel_runtime_pm_put(dev_priv); | |
1105 | free_oa_buffer(dev_priv); | |
1106 | ||
1107 | err_oa_buf_alloc: | |
1108 | if (stream->ctx) | |
1109 | oa_put_render_ctx_id(stream); | |
1110 | ||
1111 | return ret; | |
1112 | } | |
1113 | ||
eec688e1 RB |
1114 | static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, |
1115 | struct file *file, | |
1116 | char __user *buf, | |
1117 | size_t count, | |
1118 | loff_t *ppos) | |
1119 | { | |
1120 | /* Note we keep the offset (aka bytes read) separate from any | |
1121 | * error status so that the final check for whether we return | |
1122 | * the bytes read with a higher precedence than any error (see | |
1123 | * comment below) doesn't need to be handled/duplicated in | |
1124 | * stream->ops->read() implementations. | |
1125 | */ | |
1126 | size_t offset = 0; | |
1127 | int ret = stream->ops->read(stream, buf, count, &offset); | |
1128 | ||
1129 | /* If we've successfully copied any data then reporting that | |
1130 | * takes precedence over any internal error status, so the | |
1131 | * data isn't lost. | |
1132 | * | |
1133 | * For example ret will be -ENOSPC whenever there is more | |
1134 | * buffered data than can be copied to userspace, but that's | |
1135 | * only interesting if we weren't able to copy some data | |
1136 | * because it implies the userspace buffer is too small to | |
1137 | * receive a single record (and we never split records). | |
1138 | * | |
1139 | * Another case with ret == -EFAULT is more of a grey area | |
1140 | * since it would seem like bad form for userspace to ask us | |
1141 | * to overrun its buffer, but the user knows best: | |
1142 | * | |
1143 | * http://yarchive.net/comp/linux/partial_reads_writes.html | |
1144 | */ | |
1145 | return offset ?: (ret ?: -EAGAIN); | |
1146 | } | |
1147 | ||
1148 | static ssize_t i915_perf_read(struct file *file, | |
1149 | char __user *buf, | |
1150 | size_t count, | |
1151 | loff_t *ppos) | |
1152 | { | |
1153 | struct i915_perf_stream *stream = file->private_data; | |
1154 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
1155 | ssize_t ret; | |
1156 | ||
d7965152 RB |
1157 | /* To ensure it's handled consistently we simply treat all reads of a |
1158 | * disabled stream as an error. In particular it might otherwise lead | |
1159 | * to a deadlock for blocking file descriptors... | |
1160 | */ | |
1161 | if (!stream->enabled) | |
1162 | return -EIO; | |
1163 | ||
eec688e1 | 1164 | if (!(file->f_flags & O_NONBLOCK)) { |
d7965152 RB |
1165 | /* There's the small chance of false positives from |
1166 | * stream->ops->wait_unlocked. | |
1167 | * | |
1168 | * E.g. with single context filtering since we only wait until | |
1169 | * oabuffer has >= 1 report we don't immediately know whether | |
1170 | * any reports really belong to the current context | |
eec688e1 RB |
1171 | */ |
1172 | do { | |
1173 | ret = stream->ops->wait_unlocked(stream); | |
1174 | if (ret) | |
1175 | return ret; | |
1176 | ||
1177 | mutex_lock(&dev_priv->perf.lock); | |
1178 | ret = i915_perf_read_locked(stream, file, | |
1179 | buf, count, ppos); | |
1180 | mutex_unlock(&dev_priv->perf.lock); | |
1181 | } while (ret == -EAGAIN); | |
1182 | } else { | |
1183 | mutex_lock(&dev_priv->perf.lock); | |
1184 | ret = i915_perf_read_locked(stream, file, buf, count, ppos); | |
1185 | mutex_unlock(&dev_priv->perf.lock); | |
1186 | } | |
1187 | ||
d7965152 RB |
1188 | if (ret >= 0) { |
1189 | /* Maybe make ->pollin per-stream state if we support multiple | |
1190 | * concurrent streams in the future. | |
1191 | */ | |
1192 | dev_priv->perf.oa.pollin = false; | |
1193 | } | |
1194 | ||
eec688e1 RB |
1195 | return ret; |
1196 | } | |
1197 | ||
d7965152 RB |
1198 | static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) |
1199 | { | |
1200 | struct drm_i915_private *dev_priv = | |
1201 | container_of(hrtimer, typeof(*dev_priv), | |
1202 | perf.oa.poll_check_timer); | |
1203 | ||
1204 | if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) { | |
1205 | dev_priv->perf.oa.pollin = true; | |
1206 | wake_up(&dev_priv->perf.oa.poll_wq); | |
1207 | } | |
1208 | ||
1209 | hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); | |
1210 | ||
1211 | return HRTIMER_RESTART; | |
1212 | } | |
1213 | ||
1214 | static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv, | |
1215 | struct i915_perf_stream *stream, | |
eec688e1 RB |
1216 | struct file *file, |
1217 | poll_table *wait) | |
1218 | { | |
d7965152 | 1219 | unsigned int events = 0; |
eec688e1 RB |
1220 | |
1221 | stream->ops->poll_wait(stream, file, wait); | |
1222 | ||
d7965152 RB |
1223 | /* Note: we don't explicitly check whether there's something to read |
1224 | * here since this path may be very hot depending on what else | |
1225 | * userspace is polling, or on the timeout in use. We rely solely on | |
1226 | * the hrtimer/oa_poll_check_timer_cb to notify us when there are | |
1227 | * samples to read. | |
1228 | */ | |
1229 | if (dev_priv->perf.oa.pollin) | |
1230 | events |= POLLIN; | |
eec688e1 | 1231 | |
d7965152 | 1232 | return events; |
eec688e1 RB |
1233 | } |
1234 | ||
1235 | static unsigned int i915_perf_poll(struct file *file, poll_table *wait) | |
1236 | { | |
1237 | struct i915_perf_stream *stream = file->private_data; | |
1238 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
1239 | int ret; | |
1240 | ||
1241 | mutex_lock(&dev_priv->perf.lock); | |
d7965152 | 1242 | ret = i915_perf_poll_locked(dev_priv, stream, file, wait); |
eec688e1 RB |
1243 | mutex_unlock(&dev_priv->perf.lock); |
1244 | ||
1245 | return ret; | |
1246 | } | |
1247 | ||
1248 | static void i915_perf_enable_locked(struct i915_perf_stream *stream) | |
1249 | { | |
1250 | if (stream->enabled) | |
1251 | return; | |
1252 | ||
1253 | /* Allow stream->ops->enable() to refer to this */ | |
1254 | stream->enabled = true; | |
1255 | ||
1256 | if (stream->ops->enable) | |
1257 | stream->ops->enable(stream); | |
1258 | } | |
1259 | ||
1260 | static void i915_perf_disable_locked(struct i915_perf_stream *stream) | |
1261 | { | |
1262 | if (!stream->enabled) | |
1263 | return; | |
1264 | ||
1265 | /* Allow stream->ops->disable() to refer to this */ | |
1266 | stream->enabled = false; | |
1267 | ||
1268 | if (stream->ops->disable) | |
1269 | stream->ops->disable(stream); | |
1270 | } | |
1271 | ||
1272 | static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, | |
1273 | unsigned int cmd, | |
1274 | unsigned long arg) | |
1275 | { | |
1276 | switch (cmd) { | |
1277 | case I915_PERF_IOCTL_ENABLE: | |
1278 | i915_perf_enable_locked(stream); | |
1279 | return 0; | |
1280 | case I915_PERF_IOCTL_DISABLE: | |
1281 | i915_perf_disable_locked(stream); | |
1282 | return 0; | |
1283 | } | |
1284 | ||
1285 | return -EINVAL; | |
1286 | } | |
1287 | ||
1288 | static long i915_perf_ioctl(struct file *file, | |
1289 | unsigned int cmd, | |
1290 | unsigned long arg) | |
1291 | { | |
1292 | struct i915_perf_stream *stream = file->private_data; | |
1293 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
1294 | long ret; | |
1295 | ||
1296 | mutex_lock(&dev_priv->perf.lock); | |
1297 | ret = i915_perf_ioctl_locked(stream, cmd, arg); | |
1298 | mutex_unlock(&dev_priv->perf.lock); | |
1299 | ||
1300 | return ret; | |
1301 | } | |
1302 | ||
1303 | static void i915_perf_destroy_locked(struct i915_perf_stream *stream) | |
1304 | { | |
1305 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
1306 | ||
1307 | if (stream->enabled) | |
1308 | i915_perf_disable_locked(stream); | |
1309 | ||
1310 | if (stream->ops->destroy) | |
1311 | stream->ops->destroy(stream); | |
1312 | ||
1313 | list_del(&stream->link); | |
1314 | ||
1315 | if (stream->ctx) { | |
1316 | mutex_lock(&dev_priv->drm.struct_mutex); | |
1317 | i915_gem_context_put(stream->ctx); | |
1318 | mutex_unlock(&dev_priv->drm.struct_mutex); | |
1319 | } | |
1320 | ||
1321 | kfree(stream); | |
1322 | } | |
1323 | ||
1324 | static int i915_perf_release(struct inode *inode, struct file *file) | |
1325 | { | |
1326 | struct i915_perf_stream *stream = file->private_data; | |
1327 | struct drm_i915_private *dev_priv = stream->dev_priv; | |
1328 | ||
1329 | mutex_lock(&dev_priv->perf.lock); | |
1330 | i915_perf_destroy_locked(stream); | |
1331 | mutex_unlock(&dev_priv->perf.lock); | |
1332 | ||
1333 | return 0; | |
1334 | } | |
1335 | ||
1336 | ||
1337 | static const struct file_operations fops = { | |
1338 | .owner = THIS_MODULE, | |
1339 | .llseek = no_llseek, | |
1340 | .release = i915_perf_release, | |
1341 | .poll = i915_perf_poll, | |
1342 | .read = i915_perf_read, | |
1343 | .unlocked_ioctl = i915_perf_ioctl, | |
1344 | }; | |
1345 | ||
1346 | ||
1347 | static struct i915_gem_context * | |
1348 | lookup_context(struct drm_i915_private *dev_priv, | |
1349 | struct drm_i915_file_private *file_priv, | |
1350 | u32 ctx_user_handle) | |
1351 | { | |
1352 | struct i915_gem_context *ctx; | |
1353 | int ret; | |
1354 | ||
1355 | ret = i915_mutex_lock_interruptible(&dev_priv->drm); | |
1356 | if (ret) | |
1357 | return ERR_PTR(ret); | |
1358 | ||
1359 | ctx = i915_gem_context_lookup(file_priv, ctx_user_handle); | |
1360 | if (!IS_ERR(ctx)) | |
1361 | i915_gem_context_get(ctx); | |
1362 | ||
1363 | mutex_unlock(&dev_priv->drm.struct_mutex); | |
1364 | ||
1365 | return ctx; | |
1366 | } | |
1367 | ||
1368 | static int | |
1369 | i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, | |
1370 | struct drm_i915_perf_open_param *param, | |
1371 | struct perf_open_properties *props, | |
1372 | struct drm_file *file) | |
1373 | { | |
1374 | struct i915_gem_context *specific_ctx = NULL; | |
1375 | struct i915_perf_stream *stream = NULL; | |
1376 | unsigned long f_flags = 0; | |
1377 | int stream_fd; | |
1378 | int ret; | |
1379 | ||
1380 | if (props->single_context) { | |
1381 | u32 ctx_handle = props->ctx_handle; | |
1382 | struct drm_i915_file_private *file_priv = file->driver_priv; | |
1383 | ||
1384 | specific_ctx = lookup_context(dev_priv, file_priv, ctx_handle); | |
1385 | if (IS_ERR(specific_ctx)) { | |
1386 | ret = PTR_ERR(specific_ctx); | |
1387 | if (ret != -EINTR) | |
1388 | DRM_ERROR("Failed to look up context with ID %u for opening perf stream\n", | |
1389 | ctx_handle); | |
1390 | goto err; | |
1391 | } | |
1392 | } | |
1393 | ||
ccdf6341 RB |
1394 | /* Similar to perf's kernel.perf_paranoid_cpu sysctl option |
1395 | * we check a dev.i915.perf_stream_paranoid sysctl option | |
1396 | * to determine if it's ok to access system wide OA counters | |
1397 | * without CAP_SYS_ADMIN privileges. | |
1398 | */ | |
1399 | if (!specific_ctx && | |
1400 | i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { | |
eec688e1 RB |
1401 | DRM_ERROR("Insufficient privileges to open system-wide i915 perf stream\n"); |
1402 | ret = -EACCES; | |
1403 | goto err_ctx; | |
1404 | } | |
1405 | ||
1406 | stream = kzalloc(sizeof(*stream), GFP_KERNEL); | |
1407 | if (!stream) { | |
1408 | ret = -ENOMEM; | |
1409 | goto err_ctx; | |
1410 | } | |
1411 | ||
eec688e1 RB |
1412 | stream->dev_priv = dev_priv; |
1413 | stream->ctx = specific_ctx; | |
1414 | ||
d7965152 RB |
1415 | ret = i915_oa_stream_init(stream, param, props); |
1416 | if (ret) | |
1417 | goto err_alloc; | |
1418 | ||
1419 | /* we avoid simply assigning stream->sample_flags = props->sample_flags | |
1420 | * to have _stream_init check the combination of sample flags more | |
1421 | * thoroughly, but still this is the expected result at this point. | |
eec688e1 | 1422 | */ |
d7965152 RB |
1423 | if (WARN_ON(stream->sample_flags != props->sample_flags)) { |
1424 | ret = -ENODEV; | |
1425 | goto err_alloc; | |
1426 | } | |
eec688e1 RB |
1427 | |
1428 | list_add(&stream->link, &dev_priv->perf.streams); | |
1429 | ||
1430 | if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) | |
1431 | f_flags |= O_CLOEXEC; | |
1432 | if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) | |
1433 | f_flags |= O_NONBLOCK; | |
1434 | ||
1435 | stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); | |
1436 | if (stream_fd < 0) { | |
1437 | ret = stream_fd; | |
1438 | goto err_open; | |
1439 | } | |
1440 | ||
1441 | if (!(param->flags & I915_PERF_FLAG_DISABLED)) | |
1442 | i915_perf_enable_locked(stream); | |
1443 | ||
1444 | return stream_fd; | |
1445 | ||
1446 | err_open: | |
1447 | list_del(&stream->link); | |
1448 | if (stream->ops->destroy) | |
1449 | stream->ops->destroy(stream); | |
1450 | err_alloc: | |
1451 | kfree(stream); | |
1452 | err_ctx: | |
1453 | if (specific_ctx) { | |
1454 | mutex_lock(&dev_priv->drm.struct_mutex); | |
1455 | i915_gem_context_put(specific_ctx); | |
1456 | mutex_unlock(&dev_priv->drm.struct_mutex); | |
1457 | } | |
1458 | err: | |
1459 | return ret; | |
1460 | } | |
1461 | ||
1462 | /* Note we copy the properties from userspace outside of the i915 perf | |
1463 | * mutex to avoid an awkward lockdep with mmap_sem. | |
1464 | * | |
1465 | * Note this function only validates properties in isolation it doesn't | |
1466 | * validate that the combination of properties makes sense or that all | |
1467 | * properties necessary for a particular kind of stream have been set. | |
1468 | */ | |
1469 | static int read_properties_unlocked(struct drm_i915_private *dev_priv, | |
1470 | u64 __user *uprops, | |
1471 | u32 n_props, | |
1472 | struct perf_open_properties *props) | |
1473 | { | |
1474 | u64 __user *uprop = uprops; | |
1475 | int i; | |
1476 | ||
1477 | memset(props, 0, sizeof(struct perf_open_properties)); | |
1478 | ||
1479 | if (!n_props) { | |
1480 | DRM_ERROR("No i915 perf properties given"); | |
1481 | return -EINVAL; | |
1482 | } | |
1483 | ||
1484 | /* Considering that ID = 0 is reserved and assuming that we don't | |
1485 | * (currently) expect any configurations to ever specify duplicate | |
1486 | * values for a particular property ID then the last _PROP_MAX value is | |
1487 | * one greater than the maximum number of properties we expect to get | |
1488 | * from userspace. | |
1489 | */ | |
1490 | if (n_props >= DRM_I915_PERF_PROP_MAX) { | |
1491 | DRM_ERROR("More i915 perf properties specified than exist"); | |
1492 | return -EINVAL; | |
1493 | } | |
1494 | ||
1495 | for (i = 0; i < n_props; i++) { | |
00319ba0 | 1496 | u64 oa_period, oa_freq_hz; |
eec688e1 RB |
1497 | u64 id, value; |
1498 | int ret; | |
1499 | ||
1500 | ret = get_user(id, uprop); | |
1501 | if (ret) | |
1502 | return ret; | |
1503 | ||
1504 | ret = get_user(value, uprop + 1); | |
1505 | if (ret) | |
1506 | return ret; | |
1507 | ||
1508 | switch ((enum drm_i915_perf_property_id)id) { | |
1509 | case DRM_I915_PERF_PROP_CTX_HANDLE: | |
1510 | props->single_context = 1; | |
1511 | props->ctx_handle = value; | |
1512 | break; | |
d7965152 RB |
1513 | case DRM_I915_PERF_PROP_SAMPLE_OA: |
1514 | props->sample_flags |= SAMPLE_OA_REPORT; | |
1515 | break; | |
1516 | case DRM_I915_PERF_PROP_OA_METRICS_SET: | |
1517 | if (value == 0 || | |
1518 | value > dev_priv->perf.oa.n_builtin_sets) { | |
1519 | DRM_ERROR("Unknown OA metric set ID"); | |
1520 | return -EINVAL; | |
1521 | } | |
1522 | props->metrics_set = value; | |
1523 | break; | |
1524 | case DRM_I915_PERF_PROP_OA_FORMAT: | |
1525 | if (value == 0 || value >= I915_OA_FORMAT_MAX) { | |
1526 | DRM_ERROR("Invalid OA report format\n"); | |
1527 | return -EINVAL; | |
1528 | } | |
1529 | if (!dev_priv->perf.oa.oa_formats[value].size) { | |
1530 | DRM_ERROR("Invalid OA report format\n"); | |
1531 | return -EINVAL; | |
1532 | } | |
1533 | props->oa_format = value; | |
1534 | break; | |
1535 | case DRM_I915_PERF_PROP_OA_EXPONENT: | |
1536 | if (value > OA_EXPONENT_MAX) { | |
1537 | DRM_ERROR("OA timer exponent too high (> %u)\n", | |
1538 | OA_EXPONENT_MAX); | |
1539 | return -EINVAL; | |
1540 | } | |
1541 | ||
00319ba0 | 1542 | /* Theoretically we can program the OA unit to sample |
d7965152 RB |
1543 | * every 160ns but don't allow that by default unless |
1544 | * root. | |
1545 | * | |
00319ba0 RB |
1546 | * On Haswell the period is derived from the exponent |
1547 | * as: | |
1548 | * | |
1549 | * period = 80ns * 2^(exponent + 1) | |
1550 | */ | |
1551 | BUILD_BUG_ON(sizeof(oa_period) != 8); | |
1552 | oa_period = 80ull * (2ull << value); | |
1553 | ||
1554 | /* This check is primarily to ensure that oa_period <= | |
1555 | * UINT32_MAX (before passing to do_div which only | |
1556 | * accepts a u32 denominator), but we can also skip | |
1557 | * checking anything < 1Hz which implicitly can't be | |
1558 | * limited via an integer oa_max_sample_rate. | |
d7965152 | 1559 | */ |
00319ba0 RB |
1560 | if (oa_period <= NSEC_PER_SEC) { |
1561 | u64 tmp = NSEC_PER_SEC; | |
1562 | do_div(tmp, oa_period); | |
1563 | oa_freq_hz = tmp; | |
1564 | } else | |
1565 | oa_freq_hz = 0; | |
1566 | ||
1567 | if (oa_freq_hz > i915_oa_max_sample_rate && | |
1568 | !capable(CAP_SYS_ADMIN)) { | |
1569 | DRM_ERROR("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n", | |
1570 | i915_oa_max_sample_rate); | |
d7965152 RB |
1571 | return -EACCES; |
1572 | } | |
1573 | ||
1574 | props->oa_periodic = true; | |
1575 | props->oa_period_exponent = value; | |
1576 | break; | |
eec688e1 RB |
1577 | default: |
1578 | MISSING_CASE(id); | |
1579 | DRM_ERROR("Unknown i915 perf property ID"); | |
1580 | return -EINVAL; | |
1581 | } | |
1582 | ||
1583 | uprop += 2; | |
1584 | } | |
1585 | ||
1586 | return 0; | |
1587 | } | |
1588 | ||
1589 | int i915_perf_open_ioctl(struct drm_device *dev, void *data, | |
1590 | struct drm_file *file) | |
1591 | { | |
1592 | struct drm_i915_private *dev_priv = dev->dev_private; | |
1593 | struct drm_i915_perf_open_param *param = data; | |
1594 | struct perf_open_properties props; | |
1595 | u32 known_open_flags; | |
1596 | int ret; | |
1597 | ||
1598 | if (!dev_priv->perf.initialized) { | |
1599 | DRM_ERROR("i915 perf interface not available for this system"); | |
1600 | return -ENOTSUPP; | |
1601 | } | |
1602 | ||
1603 | known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | | |
1604 | I915_PERF_FLAG_FD_NONBLOCK | | |
1605 | I915_PERF_FLAG_DISABLED; | |
1606 | if (param->flags & ~known_open_flags) { | |
1607 | DRM_ERROR("Unknown drm_i915_perf_open_param flag\n"); | |
1608 | return -EINVAL; | |
1609 | } | |
1610 | ||
1611 | ret = read_properties_unlocked(dev_priv, | |
1612 | u64_to_user_ptr(param->properties_ptr), | |
1613 | param->num_properties, | |
1614 | &props); | |
1615 | if (ret) | |
1616 | return ret; | |
1617 | ||
1618 | mutex_lock(&dev_priv->perf.lock); | |
1619 | ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); | |
1620 | mutex_unlock(&dev_priv->perf.lock); | |
1621 | ||
1622 | return ret; | |
1623 | } | |
1624 | ||
442b8c06 RB |
1625 | void i915_perf_register(struct drm_i915_private *dev_priv) |
1626 | { | |
1627 | if (!IS_HASWELL(dev_priv)) | |
1628 | return; | |
1629 | ||
1630 | if (!dev_priv->perf.initialized) | |
1631 | return; | |
1632 | ||
1633 | /* To be sure we're synchronized with an attempted | |
1634 | * i915_perf_open_ioctl(); considering that we register after | |
1635 | * being exposed to userspace. | |
1636 | */ | |
1637 | mutex_lock(&dev_priv->perf.lock); | |
1638 | ||
1639 | dev_priv->perf.metrics_kobj = | |
1640 | kobject_create_and_add("metrics", | |
1641 | &dev_priv->drm.primary->kdev->kobj); | |
1642 | if (!dev_priv->perf.metrics_kobj) | |
1643 | goto exit; | |
1644 | ||
1645 | if (i915_perf_register_sysfs_hsw(dev_priv)) { | |
1646 | kobject_put(dev_priv->perf.metrics_kobj); | |
1647 | dev_priv->perf.metrics_kobj = NULL; | |
1648 | } | |
1649 | ||
1650 | exit: | |
1651 | mutex_unlock(&dev_priv->perf.lock); | |
1652 | } | |
1653 | ||
1654 | void i915_perf_unregister(struct drm_i915_private *dev_priv) | |
1655 | { | |
1656 | if (!IS_HASWELL(dev_priv)) | |
1657 | return; | |
1658 | ||
1659 | if (!dev_priv->perf.metrics_kobj) | |
1660 | return; | |
1661 | ||
1662 | i915_perf_unregister_sysfs_hsw(dev_priv); | |
1663 | ||
1664 | kobject_put(dev_priv->perf.metrics_kobj); | |
1665 | dev_priv->perf.metrics_kobj = NULL; | |
1666 | } | |
1667 | ||
ccdf6341 RB |
1668 | static struct ctl_table oa_table[] = { |
1669 | { | |
1670 | .procname = "perf_stream_paranoid", | |
1671 | .data = &i915_perf_stream_paranoid, | |
1672 | .maxlen = sizeof(i915_perf_stream_paranoid), | |
1673 | .mode = 0644, | |
1674 | .proc_handler = proc_dointvec_minmax, | |
1675 | .extra1 = &zero, | |
1676 | .extra2 = &one, | |
1677 | }, | |
00319ba0 RB |
1678 | { |
1679 | .procname = "oa_max_sample_rate", | |
1680 | .data = &i915_oa_max_sample_rate, | |
1681 | .maxlen = sizeof(i915_oa_max_sample_rate), | |
1682 | .mode = 0644, | |
1683 | .proc_handler = proc_dointvec_minmax, | |
1684 | .extra1 = &zero, | |
1685 | .extra2 = &oa_sample_rate_hard_limit, | |
1686 | }, | |
ccdf6341 RB |
1687 | {} |
1688 | }; | |
1689 | ||
1690 | static struct ctl_table i915_root[] = { | |
1691 | { | |
1692 | .procname = "i915", | |
1693 | .maxlen = 0, | |
1694 | .mode = 0555, | |
1695 | .child = oa_table, | |
1696 | }, | |
1697 | {} | |
1698 | }; | |
1699 | ||
1700 | static struct ctl_table dev_root[] = { | |
1701 | { | |
1702 | .procname = "dev", | |
1703 | .maxlen = 0, | |
1704 | .mode = 0555, | |
1705 | .child = i915_root, | |
1706 | }, | |
1707 | {} | |
1708 | }; | |
1709 | ||
eec688e1 RB |
1710 | void i915_perf_init(struct drm_i915_private *dev_priv) |
1711 | { | |
d7965152 RB |
1712 | if (!IS_HASWELL(dev_priv)) |
1713 | return; | |
1714 | ||
1715 | hrtimer_init(&dev_priv->perf.oa.poll_check_timer, | |
1716 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
1717 | dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; | |
1718 | init_waitqueue_head(&dev_priv->perf.oa.poll_wq); | |
1719 | ||
eec688e1 RB |
1720 | INIT_LIST_HEAD(&dev_priv->perf.streams); |
1721 | mutex_init(&dev_priv->perf.lock); | |
d7965152 RB |
1722 | spin_lock_init(&dev_priv->perf.hook_lock); |
1723 | ||
1724 | dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; | |
1725 | dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; | |
1726 | dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; | |
1727 | dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; | |
1728 | dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; | |
1729 | dev_priv->perf.oa.ops.read = gen7_oa_read; | |
1730 | dev_priv->perf.oa.ops.oa_buffer_is_empty = | |
1731 | gen7_oa_buffer_is_empty_fop_unlocked; | |
1732 | ||
1733 | dev_priv->perf.oa.timestamp_frequency = 12500000; | |
1734 | ||
1735 | dev_priv->perf.oa.oa_formats = hsw_oa_formats; | |
1736 | ||
1737 | dev_priv->perf.oa.n_builtin_sets = | |
1738 | i915_oa_n_builtin_metric_sets_hsw; | |
eec688e1 | 1739 | |
ccdf6341 RB |
1740 | dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); |
1741 | ||
eec688e1 RB |
1742 | dev_priv->perf.initialized = true; |
1743 | } | |
1744 | ||
1745 | void i915_perf_fini(struct drm_i915_private *dev_priv) | |
1746 | { | |
1747 | if (!dev_priv->perf.initialized) | |
1748 | return; | |
1749 | ||
ccdf6341 RB |
1750 | unregister_sysctl_table(dev_priv->perf.sysctl_header); |
1751 | ||
d7965152 | 1752 | memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); |
eec688e1 RB |
1753 | dev_priv->perf.initialized = false; |
1754 | } |