Merge drm/drm-next into drm-intel-next-queued
[linux-2.6-block.git] / drivers / gpu / drm / i915 / intel_pm.c
CommitLineData
85208be0
ED
1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eugeni Dodonov <eugeni.dodonov@intel.com>
25 *
26 */
27
2b4e57bd 28#include <linux/cpufreq.h>
08ea70a4 29#include <linux/pm_runtime.h>
9c2f7a9d 30#include <drm/drm_plane_helper.h>
85208be0
ED
31#include "i915_drv.h"
32#include "intel_drv.h"
eb48eb00
DV
33#include "../../../platform/x86/intel_ips.h"
34#include <linux/module.h>
c8fe32c1 35#include <drm/drm_atomic_helper.h>
85208be0 36
dc39fff7 37/**
18afd443
JN
38 * DOC: RC6
39 *
dc39fff7
BW
40 * RC6 is a special power stage which allows the GPU to enter an very
41 * low-voltage mode when idle, using down to 0V while at this stage. This
42 * stage is entered automatically when the GPU is idle when RC6 support is
43 * enabled, and as soon as new workload arises GPU wakes up automatically as well.
44 *
45 * There are different RC6 modes available in Intel GPU, which differentiate
46 * among each other with the latency required to enter and leave RC6 and
47 * voltage consumed by the GPU in different states.
48 *
49 * The combination of the following flags define which states GPU is allowed
50 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
51 * RC6pp is deepest RC6. Their support by hardware varies according to the
52 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
53 * which brings the most power savings; deeper states save more power, but
54 * require higher latency to switch to and wake up.
55 */
dc39fff7 56
46f16e63 57static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
a82abe43 58{
93564044
VS
59 if (HAS_LLC(dev_priv)) {
60 /*
61 * WaCompressedResourceDisplayNewHashMode:skl,kbl
e0403cb9 62 * Display WA #0390: skl,kbl
93564044
VS
63 *
64 * Must match Sampler, Pixel Back End, and Media. See
65 * WaCompressedResourceSamplerPbeMediaNewHashMode.
66 */
67 I915_WRITE(CHICKEN_PAR1_1,
68 I915_READ(CHICKEN_PAR1_1) |
69 SKL_DE_COMPRESSED_HASH_MODE);
70 }
71
82525c17 72 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
dc00b6a0
DV
73 I915_WRITE(CHICKEN_PAR1_1,
74 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
75
82525c17 76 /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
590e8ff0
MK
77 I915_WRITE(GEN8_CHICKEN_DCPR_1,
78 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
0f78dee6 79
82525c17
RV
80 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
81 /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
303d4ea5
MK
82 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
83 DISP_FBC_WM_DIS |
84 DISP_FBC_MEMORY_WAKE);
d1b4eefd 85
82525c17 86 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
d1b4eefd
MK
87 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
88 ILK_DPFC_DISABLE_DUMMY0);
32087d14
PP
89
90 if (IS_SKYLAKE(dev_priv)) {
91 /* WaDisableDopClockGating */
92 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
93 & ~GEN7_DOP_CLOCK_GATE_ENABLE);
94 }
b033bb6d
MK
95}
96
46f16e63 97static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
b033bb6d 98{
46f16e63 99 gen9_init_clock_gating(dev_priv);
b033bb6d 100
a7546159
NH
101 /* WaDisableSDEUnitClockGating:bxt */
102 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
103 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
104
32608ca2
ID
105 /*
106 * FIXME:
868434c5 107 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
32608ca2 108 */
32608ca2 109 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
868434c5 110 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
d965e7ac
ID
111
112 /*
113 * Wa: Backlight PWM may stop in the asserted state, causing backlight
114 * to stay fully on.
115 */
8aeaf64c
JN
116 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
117 PWM1_GATING_DIS | PWM2_GATING_DIS);
a82abe43
ID
118}
119
9fb5026f
ACO
120static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
121{
122 gen9_init_clock_gating(dev_priv);
123
124 /*
125 * WaDisablePWMClockGating:glk
126 * Backlight PWM may stop in the asserted state, causing backlight
127 * to stay fully on.
128 */
129 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
130 PWM1_GATING_DIS | PWM2_GATING_DIS);
f4f4b59b
ACO
131
132 /* WaDDIIOTimeout:glk */
133 if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
134 u32 val = I915_READ(CHICKEN_MISC_2);
135 val &= ~(GLK_CL0_PWR_DOWN |
136 GLK_CL1_PWR_DOWN |
137 GLK_CL2_PWR_DOWN);
138 I915_WRITE(CHICKEN_MISC_2, val);
139 }
140
9fb5026f
ACO
141}
142
148ac1f3 143static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
c921aba8 144{
c921aba8
DV
145 u32 tmp;
146
147 tmp = I915_READ(CLKCFG);
148
149 switch (tmp & CLKCFG_FSB_MASK) {
150 case CLKCFG_FSB_533:
151 dev_priv->fsb_freq = 533; /* 133*4 */
152 break;
153 case CLKCFG_FSB_800:
154 dev_priv->fsb_freq = 800; /* 200*4 */
155 break;
156 case CLKCFG_FSB_667:
157 dev_priv->fsb_freq = 667; /* 167*4 */
158 break;
159 case CLKCFG_FSB_400:
160 dev_priv->fsb_freq = 400; /* 100*4 */
161 break;
162 }
163
164 switch (tmp & CLKCFG_MEM_MASK) {
165 case CLKCFG_MEM_533:
166 dev_priv->mem_freq = 533;
167 break;
168 case CLKCFG_MEM_667:
169 dev_priv->mem_freq = 667;
170 break;
171 case CLKCFG_MEM_800:
172 dev_priv->mem_freq = 800;
173 break;
174 }
175
176 /* detect pineview DDR3 setting */
177 tmp = I915_READ(CSHRDDR3CTL);
178 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
179}
180
148ac1f3 181static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
c921aba8 182{
c921aba8
DV
183 u16 ddrpll, csipll;
184
185 ddrpll = I915_READ16(DDRMPLL1);
186 csipll = I915_READ16(CSIPLL0);
187
188 switch (ddrpll & 0xff) {
189 case 0xc:
190 dev_priv->mem_freq = 800;
191 break;
192 case 0x10:
193 dev_priv->mem_freq = 1066;
194 break;
195 case 0x14:
196 dev_priv->mem_freq = 1333;
197 break;
198 case 0x18:
199 dev_priv->mem_freq = 1600;
200 break;
201 default:
202 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
203 ddrpll & 0xff);
204 dev_priv->mem_freq = 0;
205 break;
206 }
207
20e4d407 208 dev_priv->ips.r_t = dev_priv->mem_freq;
c921aba8
DV
209
210 switch (csipll & 0x3ff) {
211 case 0x00c:
212 dev_priv->fsb_freq = 3200;
213 break;
214 case 0x00e:
215 dev_priv->fsb_freq = 3733;
216 break;
217 case 0x010:
218 dev_priv->fsb_freq = 4266;
219 break;
220 case 0x012:
221 dev_priv->fsb_freq = 4800;
222 break;
223 case 0x014:
224 dev_priv->fsb_freq = 5333;
225 break;
226 case 0x016:
227 dev_priv->fsb_freq = 5866;
228 break;
229 case 0x018:
230 dev_priv->fsb_freq = 6400;
231 break;
232 default:
233 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
234 csipll & 0x3ff);
235 dev_priv->fsb_freq = 0;
236 break;
237 }
238
239 if (dev_priv->fsb_freq == 3200) {
20e4d407 240 dev_priv->ips.c_m = 0;
c921aba8 241 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
20e4d407 242 dev_priv->ips.c_m = 1;
c921aba8 243 } else {
20e4d407 244 dev_priv->ips.c_m = 2;
c921aba8
DV
245 }
246}
247
b445e3b0
ED
248static const struct cxsr_latency cxsr_latency_table[] = {
249 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
250 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
251 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
252 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
253 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
254
255 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
256 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
257 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
258 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
259 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
260
261 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
262 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
263 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
264 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
265 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
266
267 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
268 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
269 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
270 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
271 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
272
273 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
274 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
275 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
276 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
277 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
278
279 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
280 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
281 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
282 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
283 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
284};
285
44a655ca
TU
286static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
287 bool is_ddr3,
b445e3b0
ED
288 int fsb,
289 int mem)
290{
291 const struct cxsr_latency *latency;
292 int i;
293
294 if (fsb == 0 || mem == 0)
295 return NULL;
296
297 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
298 latency = &cxsr_latency_table[i];
299 if (is_desktop == latency->is_desktop &&
300 is_ddr3 == latency->is_ddr3 &&
301 fsb == latency->fsb_freq && mem == latency->mem_freq)
302 return latency;
303 }
304
305 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
306
307 return NULL;
308}
309
fc1ac8de
VS
310static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
311{
312 u32 val;
313
9f817501 314 mutex_lock(&dev_priv->pcu_lock);
fc1ac8de
VS
315
316 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
317 if (enable)
318 val &= ~FORCE_DDR_HIGH_FREQ;
319 else
320 val |= FORCE_DDR_HIGH_FREQ;
321 val &= ~FORCE_DDR_LOW_FREQ;
322 val |= FORCE_DDR_FREQ_REQ_ACK;
323 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
324
325 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
326 FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
327 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
328
9f817501 329 mutex_unlock(&dev_priv->pcu_lock);
fc1ac8de
VS
330}
331
cfb41411
VS
332static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
333{
334 u32 val;
335
9f817501 336 mutex_lock(&dev_priv->pcu_lock);
cfb41411
VS
337
338 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
339 if (enable)
340 val |= DSP_MAXFIFO_PM5_ENABLE;
341 else
342 val &= ~DSP_MAXFIFO_PM5_ENABLE;
343 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
344
9f817501 345 mutex_unlock(&dev_priv->pcu_lock);
cfb41411
VS
346}
347
f4998963
VS
348#define FW_WM(value, plane) \
349 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
350
11a85d6a 351static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
b445e3b0 352{
11a85d6a 353 bool was_enabled;
5209b1f4 354 u32 val;
b445e3b0 355
920a14b2 356 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
11a85d6a 357 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
5209b1f4 358 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
a7a6c498 359 POSTING_READ(FW_BLC_SELF_VLV);
c0f86832 360 } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
11a85d6a 361 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5209b1f4 362 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
a7a6c498 363 POSTING_READ(FW_BLC_SELF);
9b1e14f4 364 } else if (IS_PINEVIEW(dev_priv)) {
11a85d6a
VS
365 val = I915_READ(DSPFW3);
366 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
367 if (enable)
368 val |= PINEVIEW_SELF_REFRESH_EN;
369 else
370 val &= ~PINEVIEW_SELF_REFRESH_EN;
5209b1f4 371 I915_WRITE(DSPFW3, val);
a7a6c498 372 POSTING_READ(DSPFW3);
50a0bc90 373 } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
11a85d6a 374 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5209b1f4
ID
375 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
376 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
377 I915_WRITE(FW_BLC_SELF, val);
a7a6c498 378 POSTING_READ(FW_BLC_SELF);
50a0bc90 379 } else if (IS_I915GM(dev_priv)) {
acb91359
VS
380 /*
381 * FIXME can't find a bit like this for 915G, and
382 * and yet it does have the related watermark in
383 * FW_BLC_SELF. What's going on?
384 */
11a85d6a 385 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
5209b1f4
ID
386 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
387 _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
388 I915_WRITE(INSTPM, val);
a7a6c498 389 POSTING_READ(INSTPM);
5209b1f4 390 } else {
11a85d6a 391 return false;
5209b1f4 392 }
b445e3b0 393
1489bba8
VS
394 trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
395
11a85d6a
VS
396 DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
397 enableddisabled(enable),
398 enableddisabled(was_enabled));
399
400 return was_enabled;
b445e3b0
ED
401}
402
62571fc3
VS
403/**
404 * intel_set_memory_cxsr - Configure CxSR state
405 * @dev_priv: i915 device
406 * @enable: Allow vs. disallow CxSR
407 *
408 * Allow or disallow the system to enter a special CxSR
409 * (C-state self refresh) state. What typically happens in CxSR mode
410 * is that several display FIFOs may get combined into a single larger
411 * FIFO for a particular plane (so called max FIFO mode) to allow the
412 * system to defer memory fetches longer, and the memory will enter
413 * self refresh.
414 *
415 * Note that enabling CxSR does not guarantee that the system enter
416 * this special mode, nor does it guarantee that the system stays
417 * in that mode once entered. So this just allows/disallows the system
418 * to autonomously utilize the CxSR mode. Other factors such as core
419 * C-states will affect when/if the system actually enters/exits the
420 * CxSR mode.
421 *
422 * Note that on VLV/CHV this actually only controls the max FIFO mode,
423 * and the system is free to enter/exit memory self refresh at any time
424 * even when the use of CxSR has been disallowed.
425 *
426 * While the system is actually in the CxSR/max FIFO mode, some plane
427 * control registers will not get latched on vblank. Thus in order to
428 * guarantee the system will respond to changes in the plane registers
429 * we must always disallow CxSR prior to making changes to those registers.
430 * Unfortunately the system will re-evaluate the CxSR conditions at
431 * frame start which happens after vblank start (which is when the plane
432 * registers would get latched), so we can't proceed with the plane update
433 * during the same frame where we disallowed CxSR.
434 *
435 * Certain platforms also have a deeper HPLL SR mode. Fortunately the
436 * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
437 * the hardware w.r.t. HPLL SR when writing to plane registers.
438 * Disallowing just CxSR is sufficient.
439 */
11a85d6a 440bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
3d90e649 441{
11a85d6a
VS
442 bool ret;
443
3d90e649 444 mutex_lock(&dev_priv->wm.wm_mutex);
11a85d6a 445 ret = _intel_set_memory_cxsr(dev_priv, enable);
04548cba
VS
446 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
447 dev_priv->wm.vlv.cxsr = enable;
448 else if (IS_G4X(dev_priv))
449 dev_priv->wm.g4x.cxsr = enable;
3d90e649 450 mutex_unlock(&dev_priv->wm.wm_mutex);
11a85d6a
VS
451
452 return ret;
3d90e649 453}
fc1ac8de 454
b445e3b0
ED
455/*
456 * Latency for FIFO fetches is dependent on several factors:
457 * - memory configuration (speed, channels)
458 * - chipset
459 * - current MCH state
460 * It can be fairly high in some situations, so here we assume a fairly
461 * pessimal value. It's a tradeoff between extra memory fetches (if we
462 * set this value too high, the FIFO will fetch frequently to stay full)
463 * and power consumption (set it too low to save power and we might see
464 * FIFO underruns and display "flicker").
465 *
466 * A value of 5us seems to be a good balance; safe for very low end
467 * platforms but not overly aggressive on lower latency configs.
468 */
5aef6003 469static const int pessimal_latency_ns = 5000;
b445e3b0 470
b5004720
VS
471#define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
472 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
473
814e7f0b 474static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
b5004720 475{
814e7f0b 476 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
f07d43d2 477 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
814e7f0b 478 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
f07d43d2
VS
479 enum pipe pipe = crtc->pipe;
480 int sprite0_start, sprite1_start;
49845a23 481
f07d43d2 482 switch (pipe) {
b5004720
VS
483 uint32_t dsparb, dsparb2, dsparb3;
484 case PIPE_A:
485 dsparb = I915_READ(DSPARB);
486 dsparb2 = I915_READ(DSPARB2);
487 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
488 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
489 break;
490 case PIPE_B:
491 dsparb = I915_READ(DSPARB);
492 dsparb2 = I915_READ(DSPARB2);
493 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
494 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
495 break;
496 case PIPE_C:
497 dsparb2 = I915_READ(DSPARB2);
498 dsparb3 = I915_READ(DSPARB3);
499 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
500 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
501 break;
502 default:
f07d43d2
VS
503 MISSING_CASE(pipe);
504 return;
b5004720
VS
505 }
506
f07d43d2
VS
507 fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
508 fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
509 fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
510 fifo_state->plane[PLANE_CURSOR] = 63;
b5004720
VS
511}
512
bdaf8439
VS
513static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
514 enum i9xx_plane_id i9xx_plane)
b445e3b0 515{
b445e3b0
ED
516 uint32_t dsparb = I915_READ(DSPARB);
517 int size;
518
519 size = dsparb & 0x7f;
bdaf8439 520 if (i9xx_plane == PLANE_B)
b445e3b0
ED
521 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
522
bdaf8439
VS
523 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
524 dsparb, plane_name(i9xx_plane), size);
b445e3b0
ED
525
526 return size;
527}
528
bdaf8439
VS
529static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
530 enum i9xx_plane_id i9xx_plane)
b445e3b0 531{
b445e3b0
ED
532 uint32_t dsparb = I915_READ(DSPARB);
533 int size;
534
535 size = dsparb & 0x1ff;
bdaf8439 536 if (i9xx_plane == PLANE_B)
b445e3b0
ED
537 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
538 size >>= 1; /* Convert to cachelines */
539
bdaf8439
VS
540 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
541 dsparb, plane_name(i9xx_plane), size);
b445e3b0
ED
542
543 return size;
544}
545
bdaf8439
VS
546static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
547 enum i9xx_plane_id i9xx_plane)
b445e3b0 548{
b445e3b0
ED
549 uint32_t dsparb = I915_READ(DSPARB);
550 int size;
551
552 size = dsparb & 0x7f;
553 size >>= 2; /* Convert to cachelines */
554
bdaf8439
VS
555 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
556 dsparb, plane_name(i9xx_plane), size);
b445e3b0
ED
557
558 return size;
559}
560
b445e3b0
ED
561/* Pineview has different values for various configs */
562static const struct intel_watermark_params pineview_display_wm = {
e0f0273e
VS
563 .fifo_size = PINEVIEW_DISPLAY_FIFO,
564 .max_wm = PINEVIEW_MAX_WM,
565 .default_wm = PINEVIEW_DFT_WM,
566 .guard_size = PINEVIEW_GUARD_WM,
567 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
b445e3b0
ED
568};
569static const struct intel_watermark_params pineview_display_hplloff_wm = {
e0f0273e
VS
570 .fifo_size = PINEVIEW_DISPLAY_FIFO,
571 .max_wm = PINEVIEW_MAX_WM,
572 .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
573 .guard_size = PINEVIEW_GUARD_WM,
574 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
b445e3b0
ED
575};
576static const struct intel_watermark_params pineview_cursor_wm = {
e0f0273e
VS
577 .fifo_size = PINEVIEW_CURSOR_FIFO,
578 .max_wm = PINEVIEW_CURSOR_MAX_WM,
579 .default_wm = PINEVIEW_CURSOR_DFT_WM,
580 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
581 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
b445e3b0
ED
582};
583static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
e0f0273e
VS
584 .fifo_size = PINEVIEW_CURSOR_FIFO,
585 .max_wm = PINEVIEW_CURSOR_MAX_WM,
586 .default_wm = PINEVIEW_CURSOR_DFT_WM,
587 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
588 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
b445e3b0 589};
b445e3b0 590static const struct intel_watermark_params i965_cursor_wm_info = {
e0f0273e
VS
591 .fifo_size = I965_CURSOR_FIFO,
592 .max_wm = I965_CURSOR_MAX_WM,
593 .default_wm = I965_CURSOR_DFT_WM,
594 .guard_size = 2,
595 .cacheline_size = I915_FIFO_LINE_SIZE,
b445e3b0
ED
596};
597static const struct intel_watermark_params i945_wm_info = {
e0f0273e
VS
598 .fifo_size = I945_FIFO_SIZE,
599 .max_wm = I915_MAX_WM,
600 .default_wm = 1,
601 .guard_size = 2,
602 .cacheline_size = I915_FIFO_LINE_SIZE,
b445e3b0
ED
603};
604static const struct intel_watermark_params i915_wm_info = {
e0f0273e
VS
605 .fifo_size = I915_FIFO_SIZE,
606 .max_wm = I915_MAX_WM,
607 .default_wm = 1,
608 .guard_size = 2,
609 .cacheline_size = I915_FIFO_LINE_SIZE,
b445e3b0 610};
9d539105 611static const struct intel_watermark_params i830_a_wm_info = {
e0f0273e
VS
612 .fifo_size = I855GM_FIFO_SIZE,
613 .max_wm = I915_MAX_WM,
614 .default_wm = 1,
615 .guard_size = 2,
616 .cacheline_size = I830_FIFO_LINE_SIZE,
b445e3b0 617};
9d539105
VS
618static const struct intel_watermark_params i830_bc_wm_info = {
619 .fifo_size = I855GM_FIFO_SIZE,
620 .max_wm = I915_MAX_WM/2,
621 .default_wm = 1,
622 .guard_size = 2,
623 .cacheline_size = I830_FIFO_LINE_SIZE,
624};
feb56b93 625static const struct intel_watermark_params i845_wm_info = {
e0f0273e
VS
626 .fifo_size = I830_FIFO_SIZE,
627 .max_wm = I915_MAX_WM,
628 .default_wm = 1,
629 .guard_size = 2,
630 .cacheline_size = I830_FIFO_LINE_SIZE,
b445e3b0
ED
631};
632
baf69ca8
VS
633/**
634 * intel_wm_method1 - Method 1 / "small buffer" watermark formula
635 * @pixel_rate: Pipe pixel rate in kHz
636 * @cpp: Plane bytes per pixel
637 * @latency: Memory wakeup latency in 0.1us units
638 *
639 * Compute the watermark using the method 1 or "small buffer"
640 * formula. The caller may additonally add extra cachelines
641 * to account for TLB misses and clock crossings.
642 *
643 * This method is concerned with the short term drain rate
644 * of the FIFO, ie. it does not account for blanking periods
645 * which would effectively reduce the average drain rate across
646 * a longer period. The name "small" refers to the fact the
647 * FIFO is relatively small compared to the amount of data
648 * fetched.
649 *
650 * The FIFO level vs. time graph might look something like:
651 *
652 * |\ |\
653 * | \ | \
654 * __---__---__ (- plane active, _ blanking)
655 * -> time
656 *
657 * or perhaps like this:
658 *
659 * |\|\ |\|\
660 * __----__----__ (- plane active, _ blanking)
661 * -> time
662 *
663 * Returns:
664 * The watermark in bytes
665 */
666static unsigned int intel_wm_method1(unsigned int pixel_rate,
667 unsigned int cpp,
668 unsigned int latency)
669{
670 uint64_t ret;
671
672 ret = (uint64_t) pixel_rate * cpp * latency;
673 ret = DIV_ROUND_UP_ULL(ret, 10000);
674
675 return ret;
676}
677
678/**
679 * intel_wm_method2 - Method 2 / "large buffer" watermark formula
680 * @pixel_rate: Pipe pixel rate in kHz
681 * @htotal: Pipe horizontal total
682 * @width: Plane width in pixels
683 * @cpp: Plane bytes per pixel
684 * @latency: Memory wakeup latency in 0.1us units
685 *
686 * Compute the watermark using the method 2 or "large buffer"
687 * formula. The caller may additonally add extra cachelines
688 * to account for TLB misses and clock crossings.
689 *
690 * This method is concerned with the long term drain rate
691 * of the FIFO, ie. it does account for blanking periods
692 * which effectively reduce the average drain rate across
693 * a longer period. The name "large" refers to the fact the
694 * FIFO is relatively large compared to the amount of data
695 * fetched.
696 *
697 * The FIFO level vs. time graph might look something like:
698 *
699 * |\___ |\___
700 * | \___ | \___
701 * | \ | \
702 * __ --__--__--__--__--__--__ (- plane active, _ blanking)
703 * -> time
704 *
705 * Returns:
706 * The watermark in bytes
707 */
708static unsigned int intel_wm_method2(unsigned int pixel_rate,
709 unsigned int htotal,
710 unsigned int width,
711 unsigned int cpp,
712 unsigned int latency)
713{
714 unsigned int ret;
715
716 /*
717 * FIXME remove once all users are computing
718 * watermarks in the correct place.
719 */
720 if (WARN_ON_ONCE(htotal == 0))
721 htotal = 1;
722
723 ret = (latency * pixel_rate) / (htotal * 10000);
724 ret = (ret + 1) * width * cpp;
725
726 return ret;
727}
728
b445e3b0
ED
729/**
730 * intel_calculate_wm - calculate watermark level
baf69ca8 731 * @pixel_rate: pixel clock
b445e3b0 732 * @wm: chip FIFO params
31383410 733 * @fifo_size: size of the FIFO buffer
ac484963 734 * @cpp: bytes per pixel
b445e3b0
ED
735 * @latency_ns: memory latency for the platform
736 *
737 * Calculate the watermark level (the level at which the display plane will
738 * start fetching from memory again). Each chip has a different display
739 * FIFO size and allocation, so the caller needs to figure that out and pass
740 * in the correct intel_watermark_params structure.
741 *
742 * As the pixel clock runs, the FIFO will be drained at a rate that depends
743 * on the pixel size. When it reaches the watermark level, it'll start
744 * fetching FIFO line sized based chunks from memory until the FIFO fills
745 * past the watermark point. If the FIFO drains completely, a FIFO underrun
746 * will occur, and a display engine hang could result.
747 */
baf69ca8
VS
748static unsigned int intel_calculate_wm(int pixel_rate,
749 const struct intel_watermark_params *wm,
750 int fifo_size, int cpp,
751 unsigned int latency_ns)
b445e3b0 752{
baf69ca8 753 int entries, wm_size;
b445e3b0
ED
754
755 /*
756 * Note: we need to make sure we don't overflow for various clock &
757 * latency values.
758 * clocks go from a few thousand to several hundred thousand.
759 * latency is usually a few thousand
760 */
baf69ca8
VS
761 entries = intel_wm_method1(pixel_rate, cpp,
762 latency_ns / 100);
763 entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
764 wm->guard_size;
765 DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
b445e3b0 766
baf69ca8
VS
767 wm_size = fifo_size - entries;
768 DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
b445e3b0
ED
769
770 /* Don't promote wm_size to unsigned... */
baf69ca8 771 if (wm_size > wm->max_wm)
b445e3b0
ED
772 wm_size = wm->max_wm;
773 if (wm_size <= 0)
774 wm_size = wm->default_wm;
d6feb196
VS
775
776 /*
777 * Bspec seems to indicate that the value shouldn't be lower than
778 * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
779 * Lets go for 8 which is the burst size since certain platforms
780 * already use a hardcoded 8 (which is what the spec says should be
781 * done).
782 */
783 if (wm_size <= 8)
784 wm_size = 8;
785
b445e3b0
ED
786 return wm_size;
787}
788
04548cba
VS
789static bool is_disabling(int old, int new, int threshold)
790{
791 return old >= threshold && new < threshold;
792}
793
794static bool is_enabling(int old, int new, int threshold)
795{
796 return old < threshold && new >= threshold;
797}
798
6d5019b6
VS
799static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
800{
801 return dev_priv->wm.max_level + 1;
802}
803
24304d81
VS
804static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
805 const struct intel_plane_state *plane_state)
806{
807 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
808
809 /* FIXME check the 'enable' instead */
810 if (!crtc_state->base.active)
811 return false;
812
813 /*
814 * Treat cursor with fb as always visible since cursor updates
815 * can happen faster than the vrefresh rate, and the current
816 * watermark code doesn't handle that correctly. Cursor updates
817 * which set/clear the fb or change the cursor size are going
818 * to get throttled by intel_legacy_cursor_update() to work
819 * around this problem with the watermark code.
820 */
821 if (plane->id == PLANE_CURSOR)
822 return plane_state->base.fb != NULL;
823 else
824 return plane_state->base.visible;
825}
826
ffc7a76b 827static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
b445e3b0 828{
efc2611e 829 struct intel_crtc *crtc, *enabled = NULL;
b445e3b0 830
ffc7a76b 831 for_each_intel_crtc(&dev_priv->drm, crtc) {
efc2611e 832 if (intel_crtc_active(crtc)) {
b445e3b0
ED
833 if (enabled)
834 return NULL;
835 enabled = crtc;
836 }
837 }
838
839 return enabled;
840}
841
432081bc 842static void pineview_update_wm(struct intel_crtc *unused_crtc)
b445e3b0 843{
ffc7a76b 844 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
efc2611e 845 struct intel_crtc *crtc;
b445e3b0
ED
846 const struct cxsr_latency *latency;
847 u32 reg;
baf69ca8 848 unsigned int wm;
b445e3b0 849
50a0bc90
TU
850 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
851 dev_priv->is_ddr3,
852 dev_priv->fsb_freq,
853 dev_priv->mem_freq);
b445e3b0
ED
854 if (!latency) {
855 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
5209b1f4 856 intel_set_memory_cxsr(dev_priv, false);
b445e3b0
ED
857 return;
858 }
859
ffc7a76b 860 crtc = single_enabled_crtc(dev_priv);
b445e3b0 861 if (crtc) {
efc2611e
VS
862 const struct drm_display_mode *adjusted_mode =
863 &crtc->config->base.adjusted_mode;
864 const struct drm_framebuffer *fb =
865 crtc->base.primary->state->fb;
353c8598 866 int cpp = fb->format->cpp[0];
7c5f93b0 867 int clock = adjusted_mode->crtc_clock;
b445e3b0
ED
868
869 /* Display SR */
870 wm = intel_calculate_wm(clock, &pineview_display_wm,
871 pineview_display_wm.fifo_size,
ac484963 872 cpp, latency->display_sr);
b445e3b0
ED
873 reg = I915_READ(DSPFW1);
874 reg &= ~DSPFW_SR_MASK;
f4998963 875 reg |= FW_WM(wm, SR);
b445e3b0
ED
876 I915_WRITE(DSPFW1, reg);
877 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
878
879 /* cursor SR */
880 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
881 pineview_display_wm.fifo_size,
99834b14 882 4, latency->cursor_sr);
b445e3b0
ED
883 reg = I915_READ(DSPFW3);
884 reg &= ~DSPFW_CURSOR_SR_MASK;
f4998963 885 reg |= FW_WM(wm, CURSOR_SR);
b445e3b0
ED
886 I915_WRITE(DSPFW3, reg);
887
888 /* Display HPLL off SR */
889 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
890 pineview_display_hplloff_wm.fifo_size,
ac484963 891 cpp, latency->display_hpll_disable);
b445e3b0
ED
892 reg = I915_READ(DSPFW3);
893 reg &= ~DSPFW_HPLL_SR_MASK;
f4998963 894 reg |= FW_WM(wm, HPLL_SR);
b445e3b0
ED
895 I915_WRITE(DSPFW3, reg);
896
897 /* cursor HPLL off SR */
898 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
899 pineview_display_hplloff_wm.fifo_size,
99834b14 900 4, latency->cursor_hpll_disable);
b445e3b0
ED
901 reg = I915_READ(DSPFW3);
902 reg &= ~DSPFW_HPLL_CURSOR_MASK;
f4998963 903 reg |= FW_WM(wm, HPLL_CURSOR);
b445e3b0
ED
904 I915_WRITE(DSPFW3, reg);
905 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
906
5209b1f4 907 intel_set_memory_cxsr(dev_priv, true);
b445e3b0 908 } else {
5209b1f4 909 intel_set_memory_cxsr(dev_priv, false);
b445e3b0
ED
910 }
911}
912
0f95ff85
VS
913/*
914 * Documentation says:
915 * "If the line size is small, the TLB fetches can get in the way of the
916 * data fetches, causing some lag in the pixel data return which is not
917 * accounted for in the above formulas. The following adjustment only
918 * needs to be applied if eight whole lines fit in the buffer at once.
919 * The WM is adjusted upwards by the difference between the FIFO size
920 * and the size of 8 whole lines. This adjustment is always performed
921 * in the actual pixel depth regardless of whether FBC is enabled or not."
922 */
1a1f1287 923static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
0f95ff85
VS
924{
925 int tlb_miss = fifo_size * 64 - width * cpp * 8;
926
927 return max(0, tlb_miss);
928}
929
04548cba
VS
930static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
931 const struct g4x_wm_values *wm)
b445e3b0 932{
e93329a5
VS
933 enum pipe pipe;
934
935 for_each_pipe(dev_priv, pipe)
936 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
937
04548cba
VS
938 I915_WRITE(DSPFW1,
939 FW_WM(wm->sr.plane, SR) |
940 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
941 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
942 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
943 I915_WRITE(DSPFW2,
944 (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
945 FW_WM(wm->sr.fbc, FBC_SR) |
946 FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
947 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
948 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
949 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
950 I915_WRITE(DSPFW3,
951 (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
952 FW_WM(wm->sr.cursor, CURSOR_SR) |
953 FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
954 FW_WM(wm->hpll.plane, HPLL_SR));
b445e3b0 955
04548cba 956 POSTING_READ(DSPFW1);
b445e3b0
ED
957}
958
15665979
VS
959#define FW_WM_VLV(value, plane) \
960 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
961
50f4caef 962static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
0018fda1
VS
963 const struct vlv_wm_values *wm)
964{
50f4caef
VS
965 enum pipe pipe;
966
967 for_each_pipe(dev_priv, pipe) {
c137d660
VS
968 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
969
50f4caef
VS
970 I915_WRITE(VLV_DDL(pipe),
971 (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
972 (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
973 (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
974 (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
975 }
0018fda1 976
6fe6a7ff
VS
977 /*
978 * Zero the (unused) WM1 watermarks, and also clear all the
979 * high order bits so that there are no out of bounds values
980 * present in the registers during the reprogramming.
981 */
982 I915_WRITE(DSPHOWM, 0);
983 I915_WRITE(DSPHOWM1, 0);
984 I915_WRITE(DSPFW4, 0);
985 I915_WRITE(DSPFW5, 0);
986 I915_WRITE(DSPFW6, 0);
987
ae80152d 988 I915_WRITE(DSPFW1,
15665979 989 FW_WM(wm->sr.plane, SR) |
1b31389c
VS
990 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
991 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
992 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
ae80152d 993 I915_WRITE(DSPFW2,
1b31389c
VS
994 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
995 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
996 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
ae80152d 997 I915_WRITE(DSPFW3,
15665979 998 FW_WM(wm->sr.cursor, CURSOR_SR));
ae80152d
VS
999
1000 if (IS_CHERRYVIEW(dev_priv)) {
1001 I915_WRITE(DSPFW7_CHV,
1b31389c
VS
1002 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1003 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
ae80152d 1004 I915_WRITE(DSPFW8_CHV,
1b31389c
VS
1005 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1006 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
ae80152d 1007 I915_WRITE(DSPFW9_CHV,
1b31389c
VS
1008 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1009 FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
ae80152d 1010 I915_WRITE(DSPHOWM,
15665979 1011 FW_WM(wm->sr.plane >> 9, SR_HI) |
1b31389c
VS
1012 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1013 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1014 FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1015 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1016 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1017 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1018 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1019 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1020 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
ae80152d
VS
1021 } else {
1022 I915_WRITE(DSPFW7,
1b31389c
VS
1023 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1024 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
ae80152d 1025 I915_WRITE(DSPHOWM,
15665979 1026 FW_WM(wm->sr.plane >> 9, SR_HI) |
1b31389c
VS
1027 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1028 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1029 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1030 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1031 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1032 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
ae80152d
VS
1033 }
1034
1035 POSTING_READ(DSPFW1);
0018fda1
VS
1036}
1037
15665979
VS
1038#undef FW_WM_VLV
1039
04548cba
VS
1040static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1041{
1042 /* all latencies in usec */
1043 dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1044 dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
79d94306 1045 dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
04548cba 1046
79d94306 1047 dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
04548cba
VS
1048}
1049
1050static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1051{
1052 /*
1053 * DSPCNTR[13] supposedly controls whether the
1054 * primary plane can use the FIFO space otherwise
1055 * reserved for the sprite plane. It's not 100% clear
1056 * what the actual FIFO size is, but it looks like we
1057 * can happily set both primary and sprite watermarks
1058 * up to 127 cachelines. So that would seem to mean
1059 * that either DSPCNTR[13] doesn't do anything, or that
1060 * the total FIFO is >= 256 cachelines in size. Either
1061 * way, we don't seem to have to worry about this
1062 * repartitioning as the maximum watermark value the
1063 * register can hold for each plane is lower than the
1064 * minimum FIFO size.
1065 */
1066 switch (plane_id) {
1067 case PLANE_CURSOR:
1068 return 63;
1069 case PLANE_PRIMARY:
1070 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1071 case PLANE_SPRITE0:
1072 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1073 default:
1074 MISSING_CASE(plane_id);
1075 return 0;
1076 }
1077}
1078
1079static int g4x_fbc_fifo_size(int level)
1080{
1081 switch (level) {
1082 case G4X_WM_LEVEL_SR:
1083 return 7;
1084 case G4X_WM_LEVEL_HPLL:
1085 return 15;
1086 default:
1087 MISSING_CASE(level);
1088 return 0;
1089 }
1090}
1091
1092static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1093 const struct intel_plane_state *plane_state,
1094 int level)
1095{
1096 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1097 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1098 const struct drm_display_mode *adjusted_mode =
1099 &crtc_state->base.adjusted_mode;
1a1f1287
CW
1100 unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1101 unsigned int clock, htotal, cpp, width, wm;
04548cba
VS
1102
1103 if (latency == 0)
1104 return USHRT_MAX;
1105
1106 if (!intel_wm_plane_visible(crtc_state, plane_state))
1107 return 0;
1108
1109 /*
1110 * Not 100% sure which way ELK should go here as the
1111 * spec only says CL/CTG should assume 32bpp and BW
1112 * doesn't need to. But as these things followed the
1113 * mobile vs. desktop lines on gen3 as well, let's
1114 * assume ELK doesn't need this.
1115 *
1116 * The spec also fails to list such a restriction for
1117 * the HPLL watermark, which seems a little strange.
1118 * Let's use 32bpp for the HPLL watermark as well.
1119 */
1120 if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1121 level != G4X_WM_LEVEL_NORMAL)
1122 cpp = 4;
1123 else
1124 cpp = plane_state->base.fb->format->cpp[0];
1125
1126 clock = adjusted_mode->crtc_clock;
1127 htotal = adjusted_mode->crtc_htotal;
1128
1129 if (plane->id == PLANE_CURSOR)
1130 width = plane_state->base.crtc_w;
1131 else
1132 width = drm_rect_width(&plane_state->base.dst);
1133
1134 if (plane->id == PLANE_CURSOR) {
1135 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1136 } else if (plane->id == PLANE_PRIMARY &&
1137 level == G4X_WM_LEVEL_NORMAL) {
1138 wm = intel_wm_method1(clock, cpp, latency);
1139 } else {
1a1f1287 1140 unsigned int small, large;
04548cba
VS
1141
1142 small = intel_wm_method1(clock, cpp, latency);
1143 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1144
1145 wm = min(small, large);
1146 }
1147
1148 wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1149 width, cpp);
1150
1151 wm = DIV_ROUND_UP(wm, 64) + 2;
1152
1a1f1287 1153 return min_t(unsigned int, wm, USHRT_MAX);
04548cba
VS
1154}
1155
1156static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1157 int level, enum plane_id plane_id, u16 value)
1158{
1159 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1160 bool dirty = false;
1161
1162 for (; level < intel_wm_num_levels(dev_priv); level++) {
1163 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1164
1165 dirty |= raw->plane[plane_id] != value;
1166 raw->plane[plane_id] = value;
1167 }
1168
1169 return dirty;
1170}
1171
1172static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1173 int level, u16 value)
1174{
1175 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1176 bool dirty = false;
1177
1178 /* NORMAL level doesn't have an FBC watermark */
1179 level = max(level, G4X_WM_LEVEL_SR);
1180
1181 for (; level < intel_wm_num_levels(dev_priv); level++) {
1182 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1183
1184 dirty |= raw->fbc != value;
1185 raw->fbc = value;
1186 }
1187
1188 return dirty;
1189}
1190
1191static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1192 const struct intel_plane_state *pstate,
1193 uint32_t pri_val);
1194
1195static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1196 const struct intel_plane_state *plane_state)
1197{
1198 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1199 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1200 enum plane_id plane_id = plane->id;
1201 bool dirty = false;
1202 int level;
1203
1204 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1205 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1206 if (plane_id == PLANE_PRIMARY)
1207 dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1208 goto out;
1209 }
1210
1211 for (level = 0; level < num_levels; level++) {
1212 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1213 int wm, max_wm;
1214
1215 wm = g4x_compute_wm(crtc_state, plane_state, level);
1216 max_wm = g4x_plane_fifo_size(plane_id, level);
1217
1218 if (wm > max_wm)
1219 break;
1220
1221 dirty |= raw->plane[plane_id] != wm;
1222 raw->plane[plane_id] = wm;
1223
1224 if (plane_id != PLANE_PRIMARY ||
1225 level == G4X_WM_LEVEL_NORMAL)
1226 continue;
1227
1228 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1229 raw->plane[plane_id]);
1230 max_wm = g4x_fbc_fifo_size(level);
1231
1232 /*
1233 * FBC wm is not mandatory as we
1234 * can always just disable its use.
1235 */
1236 if (wm > max_wm)
1237 wm = USHRT_MAX;
1238
1239 dirty |= raw->fbc != wm;
1240 raw->fbc = wm;
1241 }
1242
1243 /* mark watermarks as invalid */
1244 dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1245
1246 if (plane_id == PLANE_PRIMARY)
1247 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1248
1249 out:
1250 if (dirty) {
1251 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1252 plane->base.name,
1253 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1254 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1255 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1256
1257 if (plane_id == PLANE_PRIMARY)
1258 DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1259 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1260 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1261 }
1262
1263 return dirty;
1264}
1265
1266static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1267 enum plane_id plane_id, int level)
1268{
1269 const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1270
1271 return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1272}
1273
1274static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1275 int level)
1276{
1277 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1278
1279 if (level > dev_priv->wm.max_level)
1280 return false;
1281
1282 return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1283 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1284 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1285}
1286
1287/* mark all levels starting from 'level' as invalid */
1288static void g4x_invalidate_wms(struct intel_crtc *crtc,
1289 struct g4x_wm_state *wm_state, int level)
1290{
1291 if (level <= G4X_WM_LEVEL_NORMAL) {
1292 enum plane_id plane_id;
1293
1294 for_each_plane_id_on_crtc(crtc, plane_id)
1295 wm_state->wm.plane[plane_id] = USHRT_MAX;
1296 }
1297
1298 if (level <= G4X_WM_LEVEL_SR) {
1299 wm_state->cxsr = false;
1300 wm_state->sr.cursor = USHRT_MAX;
1301 wm_state->sr.plane = USHRT_MAX;
1302 wm_state->sr.fbc = USHRT_MAX;
1303 }
1304
1305 if (level <= G4X_WM_LEVEL_HPLL) {
1306 wm_state->hpll_en = false;
1307 wm_state->hpll.cursor = USHRT_MAX;
1308 wm_state->hpll.plane = USHRT_MAX;
1309 wm_state->hpll.fbc = USHRT_MAX;
1310 }
1311}
1312
1313static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1314{
1315 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1316 struct intel_atomic_state *state =
1317 to_intel_atomic_state(crtc_state->base.state);
1318 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1319 int num_active_planes = hweight32(crtc_state->active_planes &
1320 ~BIT(PLANE_CURSOR));
1321 const struct g4x_pipe_wm *raw;
7b510451
VS
1322 const struct intel_plane_state *old_plane_state;
1323 const struct intel_plane_state *new_plane_state;
04548cba
VS
1324 struct intel_plane *plane;
1325 enum plane_id plane_id;
1326 int i, level;
1327 unsigned int dirty = 0;
1328
7b510451
VS
1329 for_each_oldnew_intel_plane_in_state(state, plane,
1330 old_plane_state,
1331 new_plane_state, i) {
1332 if (new_plane_state->base.crtc != &crtc->base &&
04548cba
VS
1333 old_plane_state->base.crtc != &crtc->base)
1334 continue;
1335
7b510451 1336 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
04548cba
VS
1337 dirty |= BIT(plane->id);
1338 }
1339
1340 if (!dirty)
1341 return 0;
1342
1343 level = G4X_WM_LEVEL_NORMAL;
1344 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1345 goto out;
1346
1347 raw = &crtc_state->wm.g4x.raw[level];
1348 for_each_plane_id_on_crtc(crtc, plane_id)
1349 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1350
1351 level = G4X_WM_LEVEL_SR;
1352
1353 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1354 goto out;
1355
1356 raw = &crtc_state->wm.g4x.raw[level];
1357 wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1358 wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1359 wm_state->sr.fbc = raw->fbc;
1360
1361 wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1362
1363 level = G4X_WM_LEVEL_HPLL;
1364
1365 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1366 goto out;
1367
1368 raw = &crtc_state->wm.g4x.raw[level];
1369 wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1370 wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1371 wm_state->hpll.fbc = raw->fbc;
1372
1373 wm_state->hpll_en = wm_state->cxsr;
1374
1375 level++;
1376
1377 out:
1378 if (level == G4X_WM_LEVEL_NORMAL)
1379 return -EINVAL;
1380
1381 /* invalidate the higher levels */
1382 g4x_invalidate_wms(crtc, wm_state, level);
1383
1384 /*
1385 * Determine if the FBC watermark(s) can be used. IF
1386 * this isn't the case we prefer to disable the FBC
1387 ( watermark(s) rather than disable the SR/HPLL
1388 * level(s) entirely.
1389 */
1390 wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1391
1392 if (level >= G4X_WM_LEVEL_SR &&
1393 wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1394 wm_state->fbc_en = false;
1395 else if (level >= G4X_WM_LEVEL_HPLL &&
1396 wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1397 wm_state->fbc_en = false;
1398
1399 return 0;
1400}
1401
cd1d3ee9 1402static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
04548cba 1403{
cd1d3ee9 1404 struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
248c2435
ML
1405 struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1406 const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1407 struct intel_atomic_state *intel_state =
1408 to_intel_atomic_state(new_crtc_state->base.state);
1409 const struct intel_crtc_state *old_crtc_state =
1410 intel_atomic_get_old_crtc_state(intel_state, crtc);
1411 const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
04548cba
VS
1412 enum plane_id plane_id;
1413
248c2435
ML
1414 if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1415 *intermediate = *optimal;
1416
1417 intermediate->cxsr = false;
1418 intermediate->hpll_en = false;
1419 goto out;
1420 }
1421
04548cba 1422 intermediate->cxsr = optimal->cxsr && active->cxsr &&
248c2435 1423 !new_crtc_state->disable_cxsr;
04548cba 1424 intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
248c2435 1425 !new_crtc_state->disable_cxsr;
04548cba
VS
1426 intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1427
1428 for_each_plane_id_on_crtc(crtc, plane_id) {
1429 intermediate->wm.plane[plane_id] =
1430 max(optimal->wm.plane[plane_id],
1431 active->wm.plane[plane_id]);
1432
1433 WARN_ON(intermediate->wm.plane[plane_id] >
1434 g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1435 }
1436
1437 intermediate->sr.plane = max(optimal->sr.plane,
1438 active->sr.plane);
1439 intermediate->sr.cursor = max(optimal->sr.cursor,
1440 active->sr.cursor);
1441 intermediate->sr.fbc = max(optimal->sr.fbc,
1442 active->sr.fbc);
1443
1444 intermediate->hpll.plane = max(optimal->hpll.plane,
1445 active->hpll.plane);
1446 intermediate->hpll.cursor = max(optimal->hpll.cursor,
1447 active->hpll.cursor);
1448 intermediate->hpll.fbc = max(optimal->hpll.fbc,
1449 active->hpll.fbc);
1450
1451 WARN_ON((intermediate->sr.plane >
1452 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1453 intermediate->sr.cursor >
1454 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1455 intermediate->cxsr);
1456 WARN_ON((intermediate->sr.plane >
1457 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1458 intermediate->sr.cursor >
1459 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1460 intermediate->hpll_en);
1461
1462 WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1463 intermediate->fbc_en && intermediate->cxsr);
1464 WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1465 intermediate->fbc_en && intermediate->hpll_en);
1466
248c2435 1467out:
04548cba
VS
1468 /*
1469 * If our intermediate WM are identical to the final WM, then we can
1470 * omit the post-vblank programming; only update if it's different.
1471 */
1472 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
248c2435 1473 new_crtc_state->wm.need_postvbl_update = true;
04548cba
VS
1474
1475 return 0;
1476}
1477
1478static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1479 struct g4x_wm_values *wm)
1480{
1481 struct intel_crtc *crtc;
1482 int num_active_crtcs = 0;
1483
1484 wm->cxsr = true;
1485 wm->hpll_en = true;
1486 wm->fbc_en = true;
1487
1488 for_each_intel_crtc(&dev_priv->drm, crtc) {
1489 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1490
1491 if (!crtc->active)
1492 continue;
1493
1494 if (!wm_state->cxsr)
1495 wm->cxsr = false;
1496 if (!wm_state->hpll_en)
1497 wm->hpll_en = false;
1498 if (!wm_state->fbc_en)
1499 wm->fbc_en = false;
1500
1501 num_active_crtcs++;
1502 }
1503
1504 if (num_active_crtcs != 1) {
1505 wm->cxsr = false;
1506 wm->hpll_en = false;
1507 wm->fbc_en = false;
1508 }
1509
1510 for_each_intel_crtc(&dev_priv->drm, crtc) {
1511 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1512 enum pipe pipe = crtc->pipe;
1513
1514 wm->pipe[pipe] = wm_state->wm;
1515 if (crtc->active && wm->cxsr)
1516 wm->sr = wm_state->sr;
1517 if (crtc->active && wm->hpll_en)
1518 wm->hpll = wm_state->hpll;
1519 }
1520}
1521
1522static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1523{
1524 struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1525 struct g4x_wm_values new_wm = {};
1526
1527 g4x_merge_wm(dev_priv, &new_wm);
1528
1529 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1530 return;
1531
1532 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1533 _intel_set_memory_cxsr(dev_priv, false);
1534
1535 g4x_write_wm_values(dev_priv, &new_wm);
1536
1537 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1538 _intel_set_memory_cxsr(dev_priv, true);
1539
1540 *old_wm = new_wm;
1541}
1542
1543static void g4x_initial_watermarks(struct intel_atomic_state *state,
1544 struct intel_crtc_state *crtc_state)
1545{
1546 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1547 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1548
1549 mutex_lock(&dev_priv->wm.wm_mutex);
1550 crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1551 g4x_program_watermarks(dev_priv);
1552 mutex_unlock(&dev_priv->wm.wm_mutex);
1553}
1554
1555static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1556 struct intel_crtc_state *crtc_state)
1557{
1558 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1559 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1560
1561 if (!crtc_state->wm.need_postvbl_update)
1562 return;
1563
1564 mutex_lock(&dev_priv->wm.wm_mutex);
1565 intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1566 g4x_program_watermarks(dev_priv);
1567 mutex_unlock(&dev_priv->wm.wm_mutex);
1568}
1569
262cd2e1
VS
1570/* latency must be in 0.1us units. */
1571static unsigned int vlv_wm_method2(unsigned int pixel_rate,
baf69ca8
VS
1572 unsigned int htotal,
1573 unsigned int width,
ac484963 1574 unsigned int cpp,
262cd2e1
VS
1575 unsigned int latency)
1576{
1577 unsigned int ret;
1578
baf69ca8
VS
1579 ret = intel_wm_method2(pixel_rate, htotal,
1580 width, cpp, latency);
262cd2e1
VS
1581 ret = DIV_ROUND_UP(ret, 64);
1582
1583 return ret;
1584}
1585
bb726519 1586static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
262cd2e1 1587{
262cd2e1
VS
1588 /* all latencies in usec */
1589 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1590
58590c14
VS
1591 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1592
262cd2e1
VS
1593 if (IS_CHERRYVIEW(dev_priv)) {
1594 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1595 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
58590c14
VS
1596
1597 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
262cd2e1
VS
1598 }
1599}
1600
e339d67e
VS
1601static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1602 const struct intel_plane_state *plane_state,
262cd2e1
VS
1603 int level)
1604{
e339d67e 1605 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
262cd2e1 1606 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
e339d67e
VS
1607 const struct drm_display_mode *adjusted_mode =
1608 &crtc_state->base.adjusted_mode;
1a1f1287 1609 unsigned int clock, htotal, cpp, width, wm;
262cd2e1
VS
1610
1611 if (dev_priv->wm.pri_latency[level] == 0)
1612 return USHRT_MAX;
1613
a07102f1 1614 if (!intel_wm_plane_visible(crtc_state, plane_state))
262cd2e1
VS
1615 return 0;
1616
ef426c10 1617 cpp = plane_state->base.fb->format->cpp[0];
e339d67e
VS
1618 clock = adjusted_mode->crtc_clock;
1619 htotal = adjusted_mode->crtc_htotal;
1620 width = crtc_state->pipe_src_w;
262cd2e1 1621
709f3fc9 1622 if (plane->id == PLANE_CURSOR) {
262cd2e1
VS
1623 /*
1624 * FIXME the formula gives values that are
1625 * too big for the cursor FIFO, and hence we
1626 * would never be able to use cursors. For
1627 * now just hardcode the watermark.
1628 */
1629 wm = 63;
1630 } else {
ac484963 1631 wm = vlv_wm_method2(clock, htotal, width, cpp,
262cd2e1
VS
1632 dev_priv->wm.pri_latency[level] * 10);
1633 }
1634
1a1f1287 1635 return min_t(unsigned int, wm, USHRT_MAX);
262cd2e1
VS
1636}
1637
1a10ae6b
VS
1638static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1639{
1640 return (active_planes & (BIT(PLANE_SPRITE0) |
1641 BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1642}
1643
5012e604 1644static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
54f1b6e1 1645{
855c79f5 1646 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
114d7dc0 1647 const struct g4x_pipe_wm *raw =
5012e604 1648 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
814e7f0b 1649 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
5012e604
VS
1650 unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1651 int num_active_planes = hweight32(active_planes);
1652 const int fifo_size = 511;
54f1b6e1 1653 int fifo_extra, fifo_left = fifo_size;
1a10ae6b 1654 int sprite0_fifo_extra = 0;
5012e604
VS
1655 unsigned int total_rate;
1656 enum plane_id plane_id;
54f1b6e1 1657
1a10ae6b
VS
1658 /*
1659 * When enabling sprite0 after sprite1 has already been enabled
1660 * we tend to get an underrun unless sprite0 already has some
1661 * FIFO space allcoated. Hence we always allocate at least one
1662 * cacheline for sprite0 whenever sprite1 is enabled.
1663 *
1664 * All other plane enable sequences appear immune to this problem.
1665 */
1666 if (vlv_need_sprite0_fifo_workaround(active_planes))
1667 sprite0_fifo_extra = 1;
1668
5012e604
VS
1669 total_rate = raw->plane[PLANE_PRIMARY] +
1670 raw->plane[PLANE_SPRITE0] +
1a10ae6b
VS
1671 raw->plane[PLANE_SPRITE1] +
1672 sprite0_fifo_extra;
54f1b6e1 1673
5012e604
VS
1674 if (total_rate > fifo_size)
1675 return -EINVAL;
54f1b6e1 1676
5012e604
VS
1677 if (total_rate == 0)
1678 total_rate = 1;
54f1b6e1 1679
5012e604 1680 for_each_plane_id_on_crtc(crtc, plane_id) {
54f1b6e1
VS
1681 unsigned int rate;
1682
5012e604
VS
1683 if ((active_planes & BIT(plane_id)) == 0) {
1684 fifo_state->plane[plane_id] = 0;
54f1b6e1
VS
1685 continue;
1686 }
1687
5012e604
VS
1688 rate = raw->plane[plane_id];
1689 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1690 fifo_left -= fifo_state->plane[plane_id];
54f1b6e1
VS
1691 }
1692
1a10ae6b
VS
1693 fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1694 fifo_left -= sprite0_fifo_extra;
1695
5012e604
VS
1696 fifo_state->plane[PLANE_CURSOR] = 63;
1697
1698 fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
54f1b6e1
VS
1699
1700 /* spread the remainder evenly */
5012e604 1701 for_each_plane_id_on_crtc(crtc, plane_id) {
54f1b6e1
VS
1702 int plane_extra;
1703
1704 if (fifo_left == 0)
1705 break;
1706
5012e604 1707 if ((active_planes & BIT(plane_id)) == 0)
54f1b6e1
VS
1708 continue;
1709
1710 plane_extra = min(fifo_extra, fifo_left);
5012e604 1711 fifo_state->plane[plane_id] += plane_extra;
54f1b6e1
VS
1712 fifo_left -= plane_extra;
1713 }
1714
5012e604
VS
1715 WARN_ON(active_planes != 0 && fifo_left != 0);
1716
1717 /* give it all to the first plane if none are active */
1718 if (active_planes == 0) {
1719 WARN_ON(fifo_left != fifo_size);
1720 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1721 }
1722
1723 return 0;
54f1b6e1
VS
1724}
1725
ff32c54e
VS
1726/* mark all levels starting from 'level' as invalid */
1727static void vlv_invalidate_wms(struct intel_crtc *crtc,
1728 struct vlv_wm_state *wm_state, int level)
1729{
1730 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1731
6d5019b6 1732 for (; level < intel_wm_num_levels(dev_priv); level++) {
ff32c54e
VS
1733 enum plane_id plane_id;
1734
1735 for_each_plane_id_on_crtc(crtc, plane_id)
1736 wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1737
1738 wm_state->sr[level].cursor = USHRT_MAX;
1739 wm_state->sr[level].plane = USHRT_MAX;
1740 }
1741}
1742
26cca0e5
VS
1743static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1744{
1745 if (wm > fifo_size)
1746 return USHRT_MAX;
1747 else
1748 return fifo_size - wm;
1749}
1750
ff32c54e
VS
1751/*
1752 * Starting from 'level' set all higher
1753 * levels to 'value' in the "raw" watermarks.
1754 */
236c48e6 1755static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
ff32c54e 1756 int level, enum plane_id plane_id, u16 value)
262cd2e1 1757{
ff32c54e 1758 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
6d5019b6 1759 int num_levels = intel_wm_num_levels(dev_priv);
236c48e6 1760 bool dirty = false;
262cd2e1 1761
ff32c54e 1762 for (; level < num_levels; level++) {
114d7dc0 1763 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
262cd2e1 1764
236c48e6 1765 dirty |= raw->plane[plane_id] != value;
ff32c54e 1766 raw->plane[plane_id] = value;
262cd2e1 1767 }
236c48e6
VS
1768
1769 return dirty;
262cd2e1
VS
1770}
1771
77d14ee4
VS
1772static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1773 const struct intel_plane_state *plane_state)
262cd2e1 1774{
ff32c54e
VS
1775 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1776 enum plane_id plane_id = plane->id;
6d5019b6 1777 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
262cd2e1 1778 int level;
236c48e6 1779 bool dirty = false;
262cd2e1 1780
a07102f1 1781 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
236c48e6
VS
1782 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1783 goto out;
ff32c54e 1784 }
262cd2e1 1785
ff32c54e 1786 for (level = 0; level < num_levels; level++) {
114d7dc0 1787 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
ff32c54e
VS
1788 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1789 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
262cd2e1 1790
ff32c54e
VS
1791 if (wm > max_wm)
1792 break;
262cd2e1 1793
236c48e6 1794 dirty |= raw->plane[plane_id] != wm;
ff32c54e
VS
1795 raw->plane[plane_id] = wm;
1796 }
262cd2e1 1797
ff32c54e 1798 /* mark all higher levels as invalid */
236c48e6 1799 dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
262cd2e1 1800
236c48e6
VS
1801out:
1802 if (dirty)
57a6528a 1803 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
236c48e6
VS
1804 plane->base.name,
1805 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1806 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1807 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1808
1809 return dirty;
ff32c54e 1810}
262cd2e1 1811
77d14ee4
VS
1812static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1813 enum plane_id plane_id, int level)
ff32c54e 1814{
114d7dc0 1815 const struct g4x_pipe_wm *raw =
ff32c54e
VS
1816 &crtc_state->wm.vlv.raw[level];
1817 const struct vlv_fifo_state *fifo_state =
1818 &crtc_state->wm.vlv.fifo_state;
262cd2e1 1819
ff32c54e
VS
1820 return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1821}
262cd2e1 1822
77d14ee4 1823static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
ff32c54e 1824{
77d14ee4
VS
1825 return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1826 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1827 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1828 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
ff32c54e
VS
1829}
1830
1831static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1832{
1833 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1834 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1835 struct intel_atomic_state *state =
1836 to_intel_atomic_state(crtc_state->base.state);
1837 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1838 const struct vlv_fifo_state *fifo_state =
1839 &crtc_state->wm.vlv.fifo_state;
1840 int num_active_planes = hweight32(crtc_state->active_planes &
1841 ~BIT(PLANE_CURSOR));
236c48e6 1842 bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
7b510451
VS
1843 const struct intel_plane_state *old_plane_state;
1844 const struct intel_plane_state *new_plane_state;
ff32c54e
VS
1845 struct intel_plane *plane;
1846 enum plane_id plane_id;
1847 int level, ret, i;
236c48e6 1848 unsigned int dirty = 0;
ff32c54e 1849
7b510451
VS
1850 for_each_oldnew_intel_plane_in_state(state, plane,
1851 old_plane_state,
1852 new_plane_state, i) {
1853 if (new_plane_state->base.crtc != &crtc->base &&
ff32c54e
VS
1854 old_plane_state->base.crtc != &crtc->base)
1855 continue;
262cd2e1 1856
7b510451 1857 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
236c48e6
VS
1858 dirty |= BIT(plane->id);
1859 }
1860
1861 /*
1862 * DSPARB registers may have been reset due to the
1863 * power well being turned off. Make sure we restore
1864 * them to a consistent state even if no primary/sprite
1865 * planes are initially active.
1866 */
1867 if (needs_modeset)
1868 crtc_state->fifo_changed = true;
1869
1870 if (!dirty)
1871 return 0;
1872
1873 /* cursor changes don't warrant a FIFO recompute */
1874 if (dirty & ~BIT(PLANE_CURSOR)) {
1875 const struct intel_crtc_state *old_crtc_state =
7b510451 1876 intel_atomic_get_old_crtc_state(state, crtc);
236c48e6
VS
1877 const struct vlv_fifo_state *old_fifo_state =
1878 &old_crtc_state->wm.vlv.fifo_state;
1879
1880 ret = vlv_compute_fifo(crtc_state);
1881 if (ret)
1882 return ret;
1883
1884 if (needs_modeset ||
1885 memcmp(old_fifo_state, fifo_state,
1886 sizeof(*fifo_state)) != 0)
1887 crtc_state->fifo_changed = true;
5012e604 1888 }
262cd2e1 1889
ff32c54e 1890 /* initially allow all levels */
6d5019b6 1891 wm_state->num_levels = intel_wm_num_levels(dev_priv);
ff32c54e
VS
1892 /*
1893 * Note that enabling cxsr with no primary/sprite planes
1894 * enabled can wedge the pipe. Hence we only allow cxsr
1895 * with exactly one enabled primary/sprite plane.
1896 */
5eeb798b 1897 wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
ff32c54e 1898
5012e604 1899 for (level = 0; level < wm_state->num_levels; level++) {
114d7dc0 1900 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
ff32c54e 1901 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
5012e604 1902
77d14ee4 1903 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
ff32c54e 1904 break;
5012e604 1905
ff32c54e
VS
1906 for_each_plane_id_on_crtc(crtc, plane_id) {
1907 wm_state->wm[level].plane[plane_id] =
1908 vlv_invert_wm_value(raw->plane[plane_id],
1909 fifo_state->plane[plane_id]);
1910 }
1911
1912 wm_state->sr[level].plane =
1913 vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
5012e604 1914 raw->plane[PLANE_SPRITE0],
ff32c54e
VS
1915 raw->plane[PLANE_SPRITE1]),
1916 sr_fifo_size);
262cd2e1 1917
ff32c54e
VS
1918 wm_state->sr[level].cursor =
1919 vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1920 63);
262cd2e1
VS
1921 }
1922
ff32c54e
VS
1923 if (level == 0)
1924 return -EINVAL;
1925
1926 /* limit to only levels we can actually handle */
1927 wm_state->num_levels = level;
1928
1929 /* invalidate the higher levels */
1930 vlv_invalidate_wms(crtc, wm_state, level);
1931
1932 return 0;
262cd2e1
VS
1933}
1934
54f1b6e1
VS
1935#define VLV_FIFO(plane, value) \
1936 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1937
ff32c54e
VS
1938static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1939 struct intel_crtc_state *crtc_state)
54f1b6e1 1940{
814e7f0b 1941 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
f07d43d2 1942 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
814e7f0b
VS
1943 const struct vlv_fifo_state *fifo_state =
1944 &crtc_state->wm.vlv.fifo_state;
f07d43d2 1945 int sprite0_start, sprite1_start, fifo_size;
54f1b6e1 1946
236c48e6
VS
1947 if (!crtc_state->fifo_changed)
1948 return;
1949
f07d43d2
VS
1950 sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1951 sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1952 fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
54f1b6e1 1953
f07d43d2
VS
1954 WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1955 WARN_ON(fifo_size != 511);
54f1b6e1 1956
c137d660
VS
1957 trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1958
44e921d4
VS
1959 /*
1960 * uncore.lock serves a double purpose here. It allows us to
1961 * use the less expensive I915_{READ,WRITE}_FW() functions, and
1962 * it protects the DSPARB registers from getting clobbered by
1963 * parallel updates from multiple pipes.
1964 *
1965 * intel_pipe_update_start() has already disabled interrupts
1966 * for us, so a plain spin_lock() is sufficient here.
1967 */
1968 spin_lock(&dev_priv->uncore.lock);
467a14d9 1969
54f1b6e1
VS
1970 switch (crtc->pipe) {
1971 uint32_t dsparb, dsparb2, dsparb3;
1972 case PIPE_A:
44e921d4
VS
1973 dsparb = I915_READ_FW(DSPARB);
1974 dsparb2 = I915_READ_FW(DSPARB2);
54f1b6e1
VS
1975
1976 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1977 VLV_FIFO(SPRITEB, 0xff));
1978 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1979 VLV_FIFO(SPRITEB, sprite1_start));
1980
1981 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1982 VLV_FIFO(SPRITEB_HI, 0x1));
1983 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1984 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1985
44e921d4
VS
1986 I915_WRITE_FW(DSPARB, dsparb);
1987 I915_WRITE_FW(DSPARB2, dsparb2);
54f1b6e1
VS
1988 break;
1989 case PIPE_B:
44e921d4
VS
1990 dsparb = I915_READ_FW(DSPARB);
1991 dsparb2 = I915_READ_FW(DSPARB2);
54f1b6e1
VS
1992
1993 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1994 VLV_FIFO(SPRITED, 0xff));
1995 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1996 VLV_FIFO(SPRITED, sprite1_start));
1997
1998 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1999 VLV_FIFO(SPRITED_HI, 0xff));
2000 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2001 VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2002
44e921d4
VS
2003 I915_WRITE_FW(DSPARB, dsparb);
2004 I915_WRITE_FW(DSPARB2, dsparb2);
54f1b6e1
VS
2005 break;
2006 case PIPE_C:
44e921d4
VS
2007 dsparb3 = I915_READ_FW(DSPARB3);
2008 dsparb2 = I915_READ_FW(DSPARB2);
54f1b6e1
VS
2009
2010 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2011 VLV_FIFO(SPRITEF, 0xff));
2012 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2013 VLV_FIFO(SPRITEF, sprite1_start));
2014
2015 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2016 VLV_FIFO(SPRITEF_HI, 0xff));
2017 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2018 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2019
44e921d4
VS
2020 I915_WRITE_FW(DSPARB3, dsparb3);
2021 I915_WRITE_FW(DSPARB2, dsparb2);
54f1b6e1
VS
2022 break;
2023 default:
2024 break;
2025 }
467a14d9 2026
44e921d4 2027 POSTING_READ_FW(DSPARB);
467a14d9 2028
44e921d4 2029 spin_unlock(&dev_priv->uncore.lock);
54f1b6e1
VS
2030}
2031
2032#undef VLV_FIFO
2033
cd1d3ee9 2034static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
4841da51 2035{
cd1d3ee9 2036 struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
5b9489cb
ML
2037 struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2038 const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2039 struct intel_atomic_state *intel_state =
2040 to_intel_atomic_state(new_crtc_state->base.state);
2041 const struct intel_crtc_state *old_crtc_state =
2042 intel_atomic_get_old_crtc_state(intel_state, crtc);
2043 const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
4841da51
VS
2044 int level;
2045
5b9489cb
ML
2046 if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2047 *intermediate = *optimal;
2048
2049 intermediate->cxsr = false;
2050 goto out;
2051 }
2052
4841da51 2053 intermediate->num_levels = min(optimal->num_levels, active->num_levels);
5eeb798b 2054 intermediate->cxsr = optimal->cxsr && active->cxsr &&
5b9489cb 2055 !new_crtc_state->disable_cxsr;
4841da51
VS
2056
2057 for (level = 0; level < intermediate->num_levels; level++) {
2058 enum plane_id plane_id;
2059
2060 for_each_plane_id_on_crtc(crtc, plane_id) {
2061 intermediate->wm[level].plane[plane_id] =
2062 min(optimal->wm[level].plane[plane_id],
2063 active->wm[level].plane[plane_id]);
2064 }
2065
2066 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2067 active->sr[level].plane);
2068 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2069 active->sr[level].cursor);
2070 }
2071
2072 vlv_invalidate_wms(crtc, intermediate, level);
2073
5b9489cb 2074out:
4841da51
VS
2075 /*
2076 * If our intermediate WM are identical to the final WM, then we can
2077 * omit the post-vblank programming; only update if it's different.
2078 */
5eeb798b 2079 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
5b9489cb 2080 new_crtc_state->wm.need_postvbl_update = true;
4841da51
VS
2081
2082 return 0;
2083}
2084
7c951c00 2085static void vlv_merge_wm(struct drm_i915_private *dev_priv,
262cd2e1
VS
2086 struct vlv_wm_values *wm)
2087{
2088 struct intel_crtc *crtc;
2089 int num_active_crtcs = 0;
2090
7c951c00 2091 wm->level = dev_priv->wm.max_level;
262cd2e1
VS
2092 wm->cxsr = true;
2093
7c951c00 2094 for_each_intel_crtc(&dev_priv->drm, crtc) {
7eb4941f 2095 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
262cd2e1
VS
2096
2097 if (!crtc->active)
2098 continue;
2099
2100 if (!wm_state->cxsr)
2101 wm->cxsr = false;
2102
2103 num_active_crtcs++;
2104 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2105 }
2106
2107 if (num_active_crtcs != 1)
2108 wm->cxsr = false;
2109
6f9c784b
VS
2110 if (num_active_crtcs > 1)
2111 wm->level = VLV_WM_LEVEL_PM2;
2112
7c951c00 2113 for_each_intel_crtc(&dev_priv->drm, crtc) {
7eb4941f 2114 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
262cd2e1
VS
2115 enum pipe pipe = crtc->pipe;
2116
262cd2e1 2117 wm->pipe[pipe] = wm_state->wm[wm->level];
ff32c54e 2118 if (crtc->active && wm->cxsr)
262cd2e1
VS
2119 wm->sr = wm_state->sr[wm->level];
2120
1b31389c
VS
2121 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2122 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2123 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2124 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
262cd2e1
VS
2125 }
2126}
2127
ff32c54e 2128static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
262cd2e1 2129{
fa292a4b
VS
2130 struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2131 struct vlv_wm_values new_wm = {};
262cd2e1 2132
fa292a4b 2133 vlv_merge_wm(dev_priv, &new_wm);
262cd2e1 2134
ff32c54e 2135 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
262cd2e1
VS
2136 return;
2137
fa292a4b 2138 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
262cd2e1
VS
2139 chv_set_memory_dvfs(dev_priv, false);
2140
fa292a4b 2141 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
262cd2e1
VS
2142 chv_set_memory_pm5(dev_priv, false);
2143
fa292a4b 2144 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
3d90e649 2145 _intel_set_memory_cxsr(dev_priv, false);
262cd2e1 2146
fa292a4b 2147 vlv_write_wm_values(dev_priv, &new_wm);
262cd2e1 2148
fa292a4b 2149 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
3d90e649 2150 _intel_set_memory_cxsr(dev_priv, true);
262cd2e1 2151
fa292a4b 2152 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
262cd2e1
VS
2153 chv_set_memory_pm5(dev_priv, true);
2154
fa292a4b 2155 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
262cd2e1
VS
2156 chv_set_memory_dvfs(dev_priv, true);
2157
fa292a4b 2158 *old_wm = new_wm;
3c2777fd
VS
2159}
2160
ff32c54e
VS
2161static void vlv_initial_watermarks(struct intel_atomic_state *state,
2162 struct intel_crtc_state *crtc_state)
2163{
2164 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2165 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2166
2167 mutex_lock(&dev_priv->wm.wm_mutex);
4841da51
VS
2168 crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2169 vlv_program_watermarks(dev_priv);
2170 mutex_unlock(&dev_priv->wm.wm_mutex);
2171}
2172
2173static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2174 struct intel_crtc_state *crtc_state)
2175{
2176 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2177 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2178
2179 if (!crtc_state->wm.need_postvbl_update)
2180 return;
2181
2182 mutex_lock(&dev_priv->wm.wm_mutex);
2183 intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
ff32c54e
VS
2184 vlv_program_watermarks(dev_priv);
2185 mutex_unlock(&dev_priv->wm.wm_mutex);
2186}
2187
432081bc 2188static void i965_update_wm(struct intel_crtc *unused_crtc)
b445e3b0 2189{
ffc7a76b 2190 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
efc2611e 2191 struct intel_crtc *crtc;
b445e3b0
ED
2192 int srwm = 1;
2193 int cursor_sr = 16;
9858425c 2194 bool cxsr_enabled;
b445e3b0
ED
2195
2196 /* Calc sr entries for one plane configs */
ffc7a76b 2197 crtc = single_enabled_crtc(dev_priv);
b445e3b0
ED
2198 if (crtc) {
2199 /* self-refresh has much higher latency */
2200 static const int sr_latency_ns = 12000;
efc2611e
VS
2201 const struct drm_display_mode *adjusted_mode =
2202 &crtc->config->base.adjusted_mode;
2203 const struct drm_framebuffer *fb =
2204 crtc->base.primary->state->fb;
241bfc38 2205 int clock = adjusted_mode->crtc_clock;
fec8cba3 2206 int htotal = adjusted_mode->crtc_htotal;
efc2611e 2207 int hdisplay = crtc->config->pipe_src_w;
353c8598 2208 int cpp = fb->format->cpp[0];
b445e3b0
ED
2209 int entries;
2210
baf69ca8
VS
2211 entries = intel_wm_method2(clock, htotal,
2212 hdisplay, cpp, sr_latency_ns / 100);
b445e3b0
ED
2213 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2214 srwm = I965_FIFO_SIZE - entries;
2215 if (srwm < 0)
2216 srwm = 1;
2217 srwm &= 0x1ff;
2218 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2219 entries, srwm);
2220
baf69ca8
VS
2221 entries = intel_wm_method2(clock, htotal,
2222 crtc->base.cursor->state->crtc_w, 4,
2223 sr_latency_ns / 100);
b445e3b0 2224 entries = DIV_ROUND_UP(entries,
baf69ca8
VS
2225 i965_cursor_wm_info.cacheline_size) +
2226 i965_cursor_wm_info.guard_size;
b445e3b0 2227
baf69ca8 2228 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
b445e3b0
ED
2229 if (cursor_sr > i965_cursor_wm_info.max_wm)
2230 cursor_sr = i965_cursor_wm_info.max_wm;
2231
2232 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2233 "cursor %d\n", srwm, cursor_sr);
2234
9858425c 2235 cxsr_enabled = true;
b445e3b0 2236 } else {
9858425c 2237 cxsr_enabled = false;
b445e3b0 2238 /* Turn off self refresh if both pipes are enabled */
5209b1f4 2239 intel_set_memory_cxsr(dev_priv, false);
b445e3b0
ED
2240 }
2241
2242 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2243 srwm);
2244
2245 /* 965 has limitations... */
f4998963
VS
2246 I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2247 FW_WM(8, CURSORB) |
2248 FW_WM(8, PLANEB) |
2249 FW_WM(8, PLANEA));
2250 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2251 FW_WM(8, PLANEC_OLD));
b445e3b0 2252 /* update cursor SR watermark */
f4998963 2253 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
9858425c
ID
2254
2255 if (cxsr_enabled)
2256 intel_set_memory_cxsr(dev_priv, true);
b445e3b0
ED
2257}
2258
f4998963
VS
2259#undef FW_WM
2260
432081bc 2261static void i9xx_update_wm(struct intel_crtc *unused_crtc)
b445e3b0 2262{
ffc7a76b 2263 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
b445e3b0
ED
2264 const struct intel_watermark_params *wm_info;
2265 uint32_t fwater_lo;
2266 uint32_t fwater_hi;
2267 int cwm, srwm = 1;
2268 int fifo_size;
2269 int planea_wm, planeb_wm;
efc2611e 2270 struct intel_crtc *crtc, *enabled = NULL;
b445e3b0 2271
a9097be4 2272 if (IS_I945GM(dev_priv))
b445e3b0 2273 wm_info = &i945_wm_info;
cf819eff 2274 else if (!IS_GEN(dev_priv, 2))
b445e3b0
ED
2275 wm_info = &i915_wm_info;
2276 else
9d539105 2277 wm_info = &i830_a_wm_info;
b445e3b0 2278
bdaf8439
VS
2279 fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2280 crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
efc2611e
VS
2281 if (intel_crtc_active(crtc)) {
2282 const struct drm_display_mode *adjusted_mode =
2283 &crtc->config->base.adjusted_mode;
2284 const struct drm_framebuffer *fb =
2285 crtc->base.primary->state->fb;
2286 int cpp;
2287
cf819eff 2288 if (IS_GEN(dev_priv, 2))
b9e0bda3 2289 cpp = 4;
efc2611e 2290 else
353c8598 2291 cpp = fb->format->cpp[0];
b9e0bda3 2292
241bfc38 2293 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
b9e0bda3 2294 wm_info, fifo_size, cpp,
5aef6003 2295 pessimal_latency_ns);
b445e3b0 2296 enabled = crtc;
9d539105 2297 } else {
b445e3b0 2298 planea_wm = fifo_size - wm_info->guard_size;
9d539105
VS
2299 if (planea_wm > (long)wm_info->max_wm)
2300 planea_wm = wm_info->max_wm;
2301 }
2302
cf819eff 2303 if (IS_GEN(dev_priv, 2))
9d539105 2304 wm_info = &i830_bc_wm_info;
b445e3b0 2305
bdaf8439
VS
2306 fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2307 crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
efc2611e
VS
2308 if (intel_crtc_active(crtc)) {
2309 const struct drm_display_mode *adjusted_mode =
2310 &crtc->config->base.adjusted_mode;
2311 const struct drm_framebuffer *fb =
2312 crtc->base.primary->state->fb;
2313 int cpp;
2314
cf819eff 2315 if (IS_GEN(dev_priv, 2))
b9e0bda3 2316 cpp = 4;
efc2611e 2317 else
353c8598 2318 cpp = fb->format->cpp[0];
b9e0bda3 2319
241bfc38 2320 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
b9e0bda3 2321 wm_info, fifo_size, cpp,
5aef6003 2322 pessimal_latency_ns);
b445e3b0
ED
2323 if (enabled == NULL)
2324 enabled = crtc;
2325 else
2326 enabled = NULL;
9d539105 2327 } else {
b445e3b0 2328 planeb_wm = fifo_size - wm_info->guard_size;
9d539105
VS
2329 if (planeb_wm > (long)wm_info->max_wm)
2330 planeb_wm = wm_info->max_wm;
2331 }
b445e3b0
ED
2332
2333 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2334
50a0bc90 2335 if (IS_I915GM(dev_priv) && enabled) {
2ff8fde1 2336 struct drm_i915_gem_object *obj;
2ab1bc9d 2337
efc2611e 2338 obj = intel_fb_obj(enabled->base.primary->state->fb);
2ab1bc9d
DV
2339
2340 /* self-refresh seems busted with untiled */
3e510a8e 2341 if (!i915_gem_object_is_tiled(obj))
2ab1bc9d
DV
2342 enabled = NULL;
2343 }
2344
b445e3b0
ED
2345 /*
2346 * Overlay gets an aggressive default since video jitter is bad.
2347 */
2348 cwm = 2;
2349
2350 /* Play safe and disable self-refresh before adjusting watermarks. */
5209b1f4 2351 intel_set_memory_cxsr(dev_priv, false);
b445e3b0
ED
2352
2353 /* Calc sr entries for one plane configs */
03427fcb 2354 if (HAS_FW_BLC(dev_priv) && enabled) {
b445e3b0
ED
2355 /* self-refresh has much higher latency */
2356 static const int sr_latency_ns = 6000;
efc2611e
VS
2357 const struct drm_display_mode *adjusted_mode =
2358 &enabled->config->base.adjusted_mode;
2359 const struct drm_framebuffer *fb =
2360 enabled->base.primary->state->fb;
241bfc38 2361 int clock = adjusted_mode->crtc_clock;
fec8cba3 2362 int htotal = adjusted_mode->crtc_htotal;
efc2611e
VS
2363 int hdisplay = enabled->config->pipe_src_w;
2364 int cpp;
b445e3b0
ED
2365 int entries;
2366
50a0bc90 2367 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2d1b5056 2368 cpp = 4;
efc2611e 2369 else
353c8598 2370 cpp = fb->format->cpp[0];
2d1b5056 2371
baf69ca8
VS
2372 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2373 sr_latency_ns / 100);
b445e3b0
ED
2374 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2375 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2376 srwm = wm_info->fifo_size - entries;
2377 if (srwm < 0)
2378 srwm = 1;
2379
50a0bc90 2380 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
b445e3b0
ED
2381 I915_WRITE(FW_BLC_SELF,
2382 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
acb91359 2383 else
b445e3b0
ED
2384 I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2385 }
2386
2387 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2388 planea_wm, planeb_wm, cwm, srwm);
2389
2390 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2391 fwater_hi = (cwm & 0x1f);
2392
2393 /* Set request length to 8 cachelines per fetch */
2394 fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2395 fwater_hi = fwater_hi | (1 << 8);
2396
2397 I915_WRITE(FW_BLC, fwater_lo);
2398 I915_WRITE(FW_BLC2, fwater_hi);
2399
5209b1f4
ID
2400 if (enabled)
2401 intel_set_memory_cxsr(dev_priv, true);
b445e3b0
ED
2402}
2403
432081bc 2404static void i845_update_wm(struct intel_crtc *unused_crtc)
b445e3b0 2405{
ffc7a76b 2406 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
efc2611e 2407 struct intel_crtc *crtc;
241bfc38 2408 const struct drm_display_mode *adjusted_mode;
b445e3b0
ED
2409 uint32_t fwater_lo;
2410 int planea_wm;
2411
ffc7a76b 2412 crtc = single_enabled_crtc(dev_priv);
b445e3b0
ED
2413 if (crtc == NULL)
2414 return;
2415
efc2611e 2416 adjusted_mode = &crtc->config->base.adjusted_mode;
241bfc38 2417 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
feb56b93 2418 &i845_wm_info,
bdaf8439 2419 dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
5aef6003 2420 4, pessimal_latency_ns);
b445e3b0
ED
2421 fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2422 fwater_lo |= (3<<8) | planea_wm;
2423
2424 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2425
2426 I915_WRITE(FW_BLC, fwater_lo);
2427}
2428
37126462 2429/* latency must be in 0.1us units. */
baf69ca8
VS
2430static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2431 unsigned int cpp,
2432 unsigned int latency)
801bcfff 2433{
baf69ca8 2434 unsigned int ret;
3312ba65 2435
baf69ca8
VS
2436 ret = intel_wm_method1(pixel_rate, cpp, latency);
2437 ret = DIV_ROUND_UP(ret, 64) + 2;
801bcfff
PZ
2438
2439 return ret;
2440}
2441
37126462 2442/* latency must be in 0.1us units. */
baf69ca8
VS
2443static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2444 unsigned int htotal,
2445 unsigned int width,
2446 unsigned int cpp,
2447 unsigned int latency)
801bcfff 2448{
baf69ca8 2449 unsigned int ret;
3312ba65 2450
baf69ca8
VS
2451 ret = intel_wm_method2(pixel_rate, htotal,
2452 width, cpp, latency);
801bcfff 2453 ret = DIV_ROUND_UP(ret, 64) + 2;
baf69ca8 2454
801bcfff
PZ
2455 return ret;
2456}
2457
23297044 2458static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
ac484963 2459 uint8_t cpp)
cca32e9a 2460{
15126882
MR
2461 /*
2462 * Neither of these should be possible since this function shouldn't be
2463 * called if the CRTC is off or the plane is invisible. But let's be
2464 * extra paranoid to avoid a potential divide-by-zero if we screw up
2465 * elsewhere in the driver.
2466 */
ac484963 2467 if (WARN_ON(!cpp))
15126882
MR
2468 return 0;
2469 if (WARN_ON(!horiz_pixels))
2470 return 0;
2471
ac484963 2472 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
cca32e9a
PZ
2473}
2474
820c1980 2475struct ilk_wm_maximums {
cca32e9a
PZ
2476 uint16_t pri;
2477 uint16_t spr;
2478 uint16_t cur;
2479 uint16_t fbc;
2480};
2481
37126462
VS
2482/*
2483 * For both WM_PIPE and WM_LP.
2484 * mem_value must be in 0.1us units.
2485 */
7221fc33 2486static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
43d59eda 2487 const struct intel_plane_state *pstate,
cca32e9a
PZ
2488 uint32_t mem_value,
2489 bool is_lp)
801bcfff 2490{
cca32e9a 2491 uint32_t method1, method2;
8305494e 2492 int cpp;
cca32e9a 2493
03981c6e
VS
2494 if (mem_value == 0)
2495 return U32_MAX;
2496
24304d81 2497 if (!intel_wm_plane_visible(cstate, pstate))
801bcfff
PZ
2498 return 0;
2499
353c8598 2500 cpp = pstate->base.fb->format->cpp[0];
8305494e 2501
a7d1b3f4 2502 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
cca32e9a
PZ
2503
2504 if (!is_lp)
2505 return method1;
2506
a7d1b3f4 2507 method2 = ilk_wm_method2(cstate->pixel_rate,
7221fc33 2508 cstate->base.adjusted_mode.crtc_htotal,
936e71e3 2509 drm_rect_width(&pstate->base.dst),
ac484963 2510 cpp, mem_value);
cca32e9a
PZ
2511
2512 return min(method1, method2);
801bcfff
PZ
2513}
2514
37126462
VS
2515/*
2516 * For both WM_PIPE and WM_LP.
2517 * mem_value must be in 0.1us units.
2518 */
7221fc33 2519static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
43d59eda 2520 const struct intel_plane_state *pstate,
801bcfff
PZ
2521 uint32_t mem_value)
2522{
2523 uint32_t method1, method2;
8305494e 2524 int cpp;
801bcfff 2525
03981c6e
VS
2526 if (mem_value == 0)
2527 return U32_MAX;
2528
24304d81 2529 if (!intel_wm_plane_visible(cstate, pstate))
801bcfff
PZ
2530 return 0;
2531
353c8598 2532 cpp = pstate->base.fb->format->cpp[0];
8305494e 2533
a7d1b3f4
VS
2534 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2535 method2 = ilk_wm_method2(cstate->pixel_rate,
7221fc33 2536 cstate->base.adjusted_mode.crtc_htotal,
936e71e3 2537 drm_rect_width(&pstate->base.dst),
ac484963 2538 cpp, mem_value);
801bcfff
PZ
2539 return min(method1, method2);
2540}
2541
37126462
VS
2542/*
2543 * For both WM_PIPE and WM_LP.
2544 * mem_value must be in 0.1us units.
2545 */
7221fc33 2546static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
43d59eda 2547 const struct intel_plane_state *pstate,
801bcfff
PZ
2548 uint32_t mem_value)
2549{
a5509abd
VS
2550 int cpp;
2551
03981c6e
VS
2552 if (mem_value == 0)
2553 return U32_MAX;
2554
24304d81 2555 if (!intel_wm_plane_visible(cstate, pstate))
801bcfff
PZ
2556 return 0;
2557
a5509abd
VS
2558 cpp = pstate->base.fb->format->cpp[0];
2559
a7d1b3f4 2560 return ilk_wm_method2(cstate->pixel_rate,
7221fc33 2561 cstate->base.adjusted_mode.crtc_htotal,
a5509abd 2562 pstate->base.crtc_w, cpp, mem_value);
801bcfff
PZ
2563}
2564
cca32e9a 2565/* Only for WM_LP. */
7221fc33 2566static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
43d59eda 2567 const struct intel_plane_state *pstate,
1fda9882 2568 uint32_t pri_val)
cca32e9a 2569{
8305494e 2570 int cpp;
43d59eda 2571
24304d81 2572 if (!intel_wm_plane_visible(cstate, pstate))
cca32e9a
PZ
2573 return 0;
2574
353c8598 2575 cpp = pstate->base.fb->format->cpp[0];
8305494e 2576
936e71e3 2577 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
cca32e9a
PZ
2578}
2579
175fded1
TU
2580static unsigned int
2581ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
158ae64f 2582{
175fded1 2583 if (INTEL_GEN(dev_priv) >= 8)
416f4727 2584 return 3072;
175fded1 2585 else if (INTEL_GEN(dev_priv) >= 7)
158ae64f
VS
2586 return 768;
2587 else
2588 return 512;
2589}
2590
175fded1
TU
2591static unsigned int
2592ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2593 int level, bool is_sprite)
4e975081 2594{
175fded1 2595 if (INTEL_GEN(dev_priv) >= 8)
4e975081
VS
2596 /* BDW primary/sprite plane watermarks */
2597 return level == 0 ? 255 : 2047;
175fded1 2598 else if (INTEL_GEN(dev_priv) >= 7)
4e975081
VS
2599 /* IVB/HSW primary/sprite plane watermarks */
2600 return level == 0 ? 127 : 1023;
2601 else if (!is_sprite)
2602 /* ILK/SNB primary plane watermarks */
2603 return level == 0 ? 127 : 511;
2604 else
2605 /* ILK/SNB sprite plane watermarks */
2606 return level == 0 ? 63 : 255;
2607}
2608
175fded1
TU
2609static unsigned int
2610ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
4e975081 2611{
175fded1 2612 if (INTEL_GEN(dev_priv) >= 7)
4e975081
VS
2613 return level == 0 ? 63 : 255;
2614 else
2615 return level == 0 ? 31 : 63;
2616}
2617
175fded1 2618static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
4e975081 2619{
175fded1 2620 if (INTEL_GEN(dev_priv) >= 8)
4e975081
VS
2621 return 31;
2622 else
2623 return 15;
2624}
2625
158ae64f 2626/* Calculate the maximum primary/sprite plane watermark */
cd1d3ee9 2627static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
158ae64f 2628 int level,
240264f4 2629 const struct intel_wm_config *config,
158ae64f
VS
2630 enum intel_ddb_partitioning ddb_partitioning,
2631 bool is_sprite)
2632{
175fded1 2633 unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
158ae64f
VS
2634
2635 /* if sprites aren't enabled, sprites get nothing */
240264f4 2636 if (is_sprite && !config->sprites_enabled)
158ae64f
VS
2637 return 0;
2638
2639 /* HSW allows LP1+ watermarks even with multiple pipes */
240264f4 2640 if (level == 0 || config->num_pipes_active > 1) {
175fded1 2641 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
158ae64f
VS
2642
2643 /*
2644 * For some reason the non self refresh
2645 * FIFO size is only half of the self
2646 * refresh FIFO size on ILK/SNB.
2647 */
175fded1 2648 if (INTEL_GEN(dev_priv) <= 6)
158ae64f
VS
2649 fifo_size /= 2;
2650 }
2651
240264f4 2652 if (config->sprites_enabled) {
158ae64f
VS
2653 /* level 0 is always calculated with 1:1 split */
2654 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2655 if (is_sprite)
2656 fifo_size *= 5;
2657 fifo_size /= 6;
2658 } else {
2659 fifo_size /= 2;
2660 }
2661 }
2662
2663 /* clamp to max that the registers can hold */
175fded1 2664 return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
158ae64f
VS
2665}
2666
2667/* Calculate the maximum cursor plane watermark */
cd1d3ee9 2668static unsigned int ilk_cursor_wm_max(const struct drm_i915_private *dev_priv,
240264f4
VS
2669 int level,
2670 const struct intel_wm_config *config)
158ae64f
VS
2671{
2672 /* HSW LP1+ watermarks w/ multiple pipes */
240264f4 2673 if (level > 0 && config->num_pipes_active > 1)
158ae64f
VS
2674 return 64;
2675
2676 /* otherwise just report max that registers can hold */
cd1d3ee9 2677 return ilk_cursor_wm_reg_max(dev_priv, level);
158ae64f
VS
2678}
2679
cd1d3ee9 2680static void ilk_compute_wm_maximums(const struct drm_i915_private *dev_priv,
34982fe1
VS
2681 int level,
2682 const struct intel_wm_config *config,
2683 enum intel_ddb_partitioning ddb_partitioning,
820c1980 2684 struct ilk_wm_maximums *max)
158ae64f 2685{
cd1d3ee9
MR
2686 max->pri = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, false);
2687 max->spr = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, true);
2688 max->cur = ilk_cursor_wm_max(dev_priv, level, config);
2689 max->fbc = ilk_fbc_wm_reg_max(dev_priv);
158ae64f
VS
2690}
2691
175fded1 2692static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
a3cb4048
VS
2693 int level,
2694 struct ilk_wm_maximums *max)
2695{
175fded1
TU
2696 max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2697 max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2698 max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2699 max->fbc = ilk_fbc_wm_reg_max(dev_priv);
a3cb4048
VS
2700}
2701
d9395655 2702static bool ilk_validate_wm_level(int level,
820c1980 2703 const struct ilk_wm_maximums *max,
d9395655 2704 struct intel_wm_level *result)
a9786a11
VS
2705{
2706 bool ret;
2707
2708 /* already determined to be invalid? */
2709 if (!result->enable)
2710 return false;
2711
2712 result->enable = result->pri_val <= max->pri &&
2713 result->spr_val <= max->spr &&
2714 result->cur_val <= max->cur;
2715
2716 ret = result->enable;
2717
2718 /*
2719 * HACK until we can pre-compute everything,
2720 * and thus fail gracefully if LP0 watermarks
2721 * are exceeded...
2722 */
2723 if (level == 0 && !result->enable) {
2724 if (result->pri_val > max->pri)
2725 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2726 level, result->pri_val, max->pri);
2727 if (result->spr_val > max->spr)
2728 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2729 level, result->spr_val, max->spr);
2730 if (result->cur_val > max->cur)
2731 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2732 level, result->cur_val, max->cur);
2733
2734 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
2735 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2736 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2737 result->enable = true;
2738 }
2739
a9786a11
VS
2740 return ret;
2741}
2742
d34ff9c6 2743static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
43d59eda 2744 const struct intel_crtc *intel_crtc,
6f5ddd17 2745 int level,
7221fc33 2746 struct intel_crtc_state *cstate,
28283f4f
ML
2747 const struct intel_plane_state *pristate,
2748 const struct intel_plane_state *sprstate,
2749 const struct intel_plane_state *curstate,
1fd527cc 2750 struct intel_wm_level *result)
6f5ddd17
VS
2751{
2752 uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2753 uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2754 uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2755
2756 /* WM1+ latency values stored in 0.5us units */
2757 if (level > 0) {
2758 pri_latency *= 5;
2759 spr_latency *= 5;
2760 cur_latency *= 5;
2761 }
2762
e3bddded
ML
2763 if (pristate) {
2764 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2765 pri_latency, level);
2766 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2767 }
2768
2769 if (sprstate)
2770 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2771
2772 if (curstate)
2773 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2774
6f5ddd17
VS
2775 result->enable = true;
2776}
2777
801bcfff 2778static uint32_t
532f7a7f 2779hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
1f8eeabf 2780{
532f7a7f
VS
2781 const struct intel_atomic_state *intel_state =
2782 to_intel_atomic_state(cstate->base.state);
ee91a159
MR
2783 const struct drm_display_mode *adjusted_mode =
2784 &cstate->base.adjusted_mode;
85a02deb 2785 u32 linetime, ips_linetime;
1f8eeabf 2786
ee91a159
MR
2787 if (!cstate->base.active)
2788 return 0;
2789 if (WARN_ON(adjusted_mode->crtc_clock == 0))
2790 return 0;
bb0f4aab 2791 if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
801bcfff 2792 return 0;
1011d8c4 2793
1f8eeabf
ED
2794 /* The WM are computed with base on how long it takes to fill a single
2795 * row at the given clock rate, multiplied by 8.
2796 * */
124abe07
VS
2797 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2798 adjusted_mode->crtc_clock);
2799 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
bb0f4aab 2800 intel_state->cdclk.logical.cdclk);
1f8eeabf 2801
801bcfff
PZ
2802 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2803 PIPE_WM_LINETIME_TIME(linetime);
1f8eeabf
ED
2804}
2805
bb726519
VS
2806static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2807 uint16_t wm[8])
12b134df 2808{
50682ee6 2809 if (INTEL_GEN(dev_priv) >= 9) {
2af30a5c 2810 uint32_t val;
4f947386 2811 int ret, i;
5db94019 2812 int level, max_level = ilk_wm_max_level(dev_priv);
2af30a5c
PB
2813
2814 /* read the first set of memory latencies[0:3] */
2815 val = 0; /* data0 to be programmed to 0 for first set */
9f817501 2816 mutex_lock(&dev_priv->pcu_lock);
2af30a5c
PB
2817 ret = sandybridge_pcode_read(dev_priv,
2818 GEN9_PCODE_READ_MEM_LATENCY,
2819 &val);
9f817501 2820 mutex_unlock(&dev_priv->pcu_lock);
2af30a5c
PB
2821
2822 if (ret) {
2823 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2824 return;
2825 }
2826
2827 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2828 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2829 GEN9_MEM_LATENCY_LEVEL_MASK;
2830 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2831 GEN9_MEM_LATENCY_LEVEL_MASK;
2832 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2833 GEN9_MEM_LATENCY_LEVEL_MASK;
2834
2835 /* read the second set of memory latencies[4:7] */
2836 val = 1; /* data0 to be programmed to 1 for second set */
9f817501 2837 mutex_lock(&dev_priv->pcu_lock);
2af30a5c
PB
2838 ret = sandybridge_pcode_read(dev_priv,
2839 GEN9_PCODE_READ_MEM_LATENCY,
2840 &val);
9f817501 2841 mutex_unlock(&dev_priv->pcu_lock);
2af30a5c
PB
2842 if (ret) {
2843 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2844 return;
2845 }
2846
2847 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2848 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2849 GEN9_MEM_LATENCY_LEVEL_MASK;
2850 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2851 GEN9_MEM_LATENCY_LEVEL_MASK;
2852 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2853 GEN9_MEM_LATENCY_LEVEL_MASK;
2854
0727e40a
PZ
2855 /*
2856 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2857 * need to be disabled. We make sure to sanitize the values out
2858 * of the punit to satisfy this requirement.
2859 */
2860 for (level = 1; level <= max_level; level++) {
2861 if (wm[level] == 0) {
2862 for (i = level + 1; i <= max_level; i++)
2863 wm[i] = 0;
2864 break;
2865 }
2866 }
2867
367294be 2868 /*
50682ee6 2869 * WaWmMemoryReadLatency:skl+,glk
6f97235b 2870 *
367294be 2871 * punit doesn't take into account the read latency so we need
0727e40a
PZ
2872 * to add 2us to the various latency levels we retrieve from the
2873 * punit when level 0 response data us 0us.
367294be 2874 */
0727e40a
PZ
2875 if (wm[0] == 0) {
2876 wm[0] += 2;
2877 for (level = 1; level <= max_level; level++) {
2878 if (wm[level] == 0)
2879 break;
367294be 2880 wm[level] += 2;
4f947386 2881 }
0727e40a
PZ
2882 }
2883
86b59287
MK
2884 /*
2885 * WA Level-0 adjustment for 16GB DIMMs: SKL+
2886 * If we could not get dimm info enable this WA to prevent from
2887 * any underrun. If not able to get Dimm info assume 16GB dimm
2888 * to avoid any underrun.
2889 */
5d6f36b2 2890 if (dev_priv->dram_info.is_16gb_dimm)
86b59287
MK
2891 wm[0] += 1;
2892
8652744b 2893 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
12b134df
VS
2894 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2895
2896 wm[0] = (sskpd >> 56) & 0xFF;
2897 if (wm[0] == 0)
2898 wm[0] = sskpd & 0xF;
e5d5019e
VS
2899 wm[1] = (sskpd >> 4) & 0xFF;
2900 wm[2] = (sskpd >> 12) & 0xFF;
2901 wm[3] = (sskpd >> 20) & 0x1FF;
2902 wm[4] = (sskpd >> 32) & 0x1FF;
bb726519 2903 } else if (INTEL_GEN(dev_priv) >= 6) {
63cf9a13
VS
2904 uint32_t sskpd = I915_READ(MCH_SSKPD);
2905
2906 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2907 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2908 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2909 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
bb726519 2910 } else if (INTEL_GEN(dev_priv) >= 5) {
3a88d0ac
VS
2911 uint32_t mltr = I915_READ(MLTR_ILK);
2912
2913 /* ILK primary LP0 latency is 700 ns */
2914 wm[0] = 7;
2915 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2916 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
50682ee6
PZ
2917 } else {
2918 MISSING_CASE(INTEL_DEVID(dev_priv));
12b134df
VS
2919 }
2920}
2921
5db94019
TU
2922static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2923 uint16_t wm[5])
53615a5e
VS
2924{
2925 /* ILK sprite LP0 latency is 1300 ns */
cf819eff 2926 if (IS_GEN(dev_priv, 5))
53615a5e
VS
2927 wm[0] = 13;
2928}
2929
fd6b8f43
TU
2930static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2931 uint16_t wm[5])
53615a5e
VS
2932{
2933 /* ILK cursor LP0 latency is 1300 ns */
cf819eff 2934 if (IS_GEN(dev_priv, 5))
53615a5e 2935 wm[0] = 13;
53615a5e
VS
2936}
2937
5db94019 2938int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
26ec971e 2939{
26ec971e 2940 /* how many WM levels are we expecting */
8652744b 2941 if (INTEL_GEN(dev_priv) >= 9)
2af30a5c 2942 return 7;
8652744b 2943 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
ad0d6dc4 2944 return 4;
8652744b 2945 else if (INTEL_GEN(dev_priv) >= 6)
ad0d6dc4 2946 return 3;
26ec971e 2947 else
ad0d6dc4
VS
2948 return 2;
2949}
7526ed79 2950
5db94019 2951static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
ad0d6dc4 2952 const char *name,
2af30a5c 2953 const uint16_t wm[8])
ad0d6dc4 2954{
5db94019 2955 int level, max_level = ilk_wm_max_level(dev_priv);
26ec971e
VS
2956
2957 for (level = 0; level <= max_level; level++) {
2958 unsigned int latency = wm[level];
2959
2960 if (latency == 0) {
86c1c87d
CW
2961 DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2962 name, level);
26ec971e
VS
2963 continue;
2964 }
2965
2af30a5c
PB
2966 /*
2967 * - latencies are in us on gen9.
2968 * - before then, WM1+ latency values are in 0.5us units
2969 */
dfc267ab 2970 if (INTEL_GEN(dev_priv) >= 9)
2af30a5c
PB
2971 latency *= 10;
2972 else if (level > 0)
26ec971e
VS
2973 latency *= 5;
2974
2975 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2976 name, level, wm[level],
2977 latency / 10, latency % 10);
2978 }
2979}
2980
e95a2f75
VS
2981static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2982 uint16_t wm[5], uint16_t min)
2983{
5db94019 2984 int level, max_level = ilk_wm_max_level(dev_priv);
e95a2f75
VS
2985
2986 if (wm[0] >= min)
2987 return false;
2988
2989 wm[0] = max(wm[0], min);
2990 for (level = 1; level <= max_level; level++)
2991 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2992
2993 return true;
2994}
2995
bb726519 2996static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
e95a2f75 2997{
e95a2f75
VS
2998 bool changed;
2999
3000 /*
3001 * The BIOS provided WM memory latency values are often
3002 * inadequate for high resolution displays. Adjust them.
3003 */
3004 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
3005 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
3006 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3007
3008 if (!changed)
3009 return;
3010
3011 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
5db94019
TU
3012 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3013 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3014 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
e95a2f75
VS
3015}
3016
03981c6e
VS
3017static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3018{
3019 /*
3020 * On some SNB machines (Thinkpad X220 Tablet at least)
3021 * LP3 usage can cause vblank interrupts to be lost.
3022 * The DEIIR bit will go high but it looks like the CPU
3023 * never gets interrupted.
3024 *
3025 * It's not clear whether other interrupt source could
3026 * be affected or if this is somehow limited to vblank
3027 * interrupts only. To play it safe we disable LP3
3028 * watermarks entirely.
3029 */
3030 if (dev_priv->wm.pri_latency[3] == 0 &&
3031 dev_priv->wm.spr_latency[3] == 0 &&
3032 dev_priv->wm.cur_latency[3] == 0)
3033 return;
3034
3035 dev_priv->wm.pri_latency[3] = 0;
3036 dev_priv->wm.spr_latency[3] = 0;
3037 dev_priv->wm.cur_latency[3] = 0;
3038
3039 DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3040 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3041 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3042 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3043}
3044
bb726519 3045static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
53615a5e 3046{
bb726519 3047 intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
53615a5e
VS
3048
3049 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3050 sizeof(dev_priv->wm.pri_latency));
3051 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3052 sizeof(dev_priv->wm.pri_latency));
3053
5db94019 3054 intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
fd6b8f43 3055 intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
26ec971e 3056
5db94019
TU
3057 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3058 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3059 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
e95a2f75 3060
cf819eff 3061 if (IS_GEN(dev_priv, 6)) {
bb726519 3062 snb_wm_latency_quirk(dev_priv);
03981c6e
VS
3063 snb_wm_lp3_irq_quirk(dev_priv);
3064 }
53615a5e
VS
3065}
3066
bb726519 3067static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
2af30a5c 3068{
bb726519 3069 intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
5db94019 3070 intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
2af30a5c
PB
3071}
3072
cd1d3ee9 3073static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
ed4a6a7c
MR
3074 struct intel_pipe_wm *pipe_wm)
3075{
3076 /* LP0 watermark maximums depend on this pipe alone */
3077 const struct intel_wm_config config = {
3078 .num_pipes_active = 1,
3079 .sprites_enabled = pipe_wm->sprites_enabled,
3080 .sprites_scaled = pipe_wm->sprites_scaled,
3081 };
3082 struct ilk_wm_maximums max;
3083
3084 /* LP0 watermarks always use 1/2 DDB partitioning */
cd1d3ee9 3085 ilk_compute_wm_maximums(dev_priv, 0, &config, INTEL_DDB_PART_1_2, &max);
ed4a6a7c
MR
3086
3087 /* At least LP0 must be valid */
3088 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3089 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3090 return false;
3091 }
3092
3093 return true;
3094}
3095
0b2ae6d7 3096/* Compute new watermarks for the pipe */
e3bddded 3097static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
0b2ae6d7 3098{
e3bddded
ML
3099 struct drm_atomic_state *state = cstate->base.state;
3100 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
86c8bbbe 3101 struct intel_pipe_wm *pipe_wm;
e3bddded 3102 struct drm_device *dev = state->dev;
fac5e23e 3103 const struct drm_i915_private *dev_priv = to_i915(dev);
28283f4f
ML
3104 struct drm_plane *plane;
3105 const struct drm_plane_state *plane_state;
3106 const struct intel_plane_state *pristate = NULL;
3107 const struct intel_plane_state *sprstate = NULL;
3108 const struct intel_plane_state *curstate = NULL;
5db94019 3109 int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
820c1980 3110 struct ilk_wm_maximums max;
0b2ae6d7 3111
e8f1f02e 3112 pipe_wm = &cstate->wm.ilk.optimal;
86c8bbbe 3113
28283f4f
ML
3114 drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3115 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
e3bddded 3116
28283f4f 3117 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
e3bddded 3118 pristate = ps;
28283f4f 3119 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
e3bddded 3120 sprstate = ps;
28283f4f 3121 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
e3bddded 3122 curstate = ps;
43d59eda
MR
3123 }
3124
ed4a6a7c 3125 pipe_wm->pipe_enabled = cstate->base.active;
e3bddded 3126 if (sprstate) {
936e71e3
VS
3127 pipe_wm->sprites_enabled = sprstate->base.visible;
3128 pipe_wm->sprites_scaled = sprstate->base.visible &&
3129 (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3130 drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
e3bddded
ML
3131 }
3132
d81f04c5
ML
3133 usable_level = max_level;
3134
7b39a0b7 3135 /* ILK/SNB: LP2+ watermarks only w/o sprites */
175fded1 3136 if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
d81f04c5 3137 usable_level = 1;
7b39a0b7
VS
3138
3139 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
ed4a6a7c 3140 if (pipe_wm->sprites_scaled)
d81f04c5 3141 usable_level = 0;
7b39a0b7 3142
71f0a626 3143 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
28283f4f
ML
3144 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3145 pristate, sprstate, curstate, &pipe_wm->wm[0]);
0b2ae6d7 3146
8652744b 3147 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
532f7a7f 3148 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
0b2ae6d7 3149
cd1d3ee9 3150 if (!ilk_validate_pipe_wm(dev_priv, pipe_wm))
1a426d61 3151 return -EINVAL;
a3cb4048 3152
175fded1 3153 ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
a3cb4048 3154
28283f4f
ML
3155 for (level = 1; level <= usable_level; level++) {
3156 struct intel_wm_level *wm = &pipe_wm->wm[level];
a3cb4048 3157
86c8bbbe 3158 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
d81f04c5 3159 pristate, sprstate, curstate, wm);
a3cb4048
VS
3160
3161 /*
3162 * Disable any watermark level that exceeds the
3163 * register maximums since such watermarks are
3164 * always invalid.
3165 */
28283f4f
ML
3166 if (!ilk_validate_wm_level(level, &max, wm)) {
3167 memset(wm, 0, sizeof(*wm));
3168 break;
3169 }
a3cb4048
VS
3170 }
3171
86c8bbbe 3172 return 0;
0b2ae6d7
VS
3173}
3174
ed4a6a7c
MR
3175/*
3176 * Build a set of 'intermediate' watermark values that satisfy both the old
3177 * state and the new state. These can be programmed to the hardware
3178 * immediately.
3179 */
cd1d3ee9 3180static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate)
ed4a6a7c 3181{
cd1d3ee9
MR
3182 struct intel_crtc *intel_crtc = to_intel_crtc(newstate->base.crtc);
3183 struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
e8f1f02e 3184 struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
b6b178a7
ML
3185 struct intel_atomic_state *intel_state =
3186 to_intel_atomic_state(newstate->base.state);
3187 const struct intel_crtc_state *oldstate =
3188 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3189 const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
cd1d3ee9 3190 int level, max_level = ilk_wm_max_level(dev_priv);
ed4a6a7c
MR
3191
3192 /*
3193 * Start with the final, target watermarks, then combine with the
3194 * currently active watermarks to get values that are safe both before
3195 * and after the vblank.
3196 */
e8f1f02e 3197 *a = newstate->wm.ilk.optimal;
f255c624
VS
3198 if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base) ||
3199 intel_state->skip_intermediate_wm)
b6b178a7
ML
3200 return 0;
3201
ed4a6a7c
MR
3202 a->pipe_enabled |= b->pipe_enabled;
3203 a->sprites_enabled |= b->sprites_enabled;
3204 a->sprites_scaled |= b->sprites_scaled;
3205
3206 for (level = 0; level <= max_level; level++) {
3207 struct intel_wm_level *a_wm = &a->wm[level];
3208 const struct intel_wm_level *b_wm = &b->wm[level];
3209
3210 a_wm->enable &= b_wm->enable;
3211 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3212 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3213 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3214 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3215 }
3216
3217 /*
3218 * We need to make sure that these merged watermark values are
3219 * actually a valid configuration themselves. If they're not,
3220 * there's no safe way to transition from the old state to
3221 * the new state, so we need to fail the atomic transaction.
3222 */
cd1d3ee9 3223 if (!ilk_validate_pipe_wm(dev_priv, a))
ed4a6a7c
MR
3224 return -EINVAL;
3225
3226 /*
3227 * If our intermediate WM are identical to the final WM, then we can
3228 * omit the post-vblank programming; only update if it's different.
3229 */
5eeb798b
VS
3230 if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3231 newstate->wm.need_postvbl_update = true;
ed4a6a7c
MR
3232
3233 return 0;
3234}
3235
0b2ae6d7
VS
3236/*
3237 * Merge the watermarks from all active pipes for a specific level.
3238 */
cd1d3ee9 3239static void ilk_merge_wm_level(struct drm_i915_private *dev_priv,
0b2ae6d7
VS
3240 int level,
3241 struct intel_wm_level *ret_wm)
3242{
3243 const struct intel_crtc *intel_crtc;
3244
d52fea5b
VS
3245 ret_wm->enable = true;
3246
cd1d3ee9 3247 for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
ed4a6a7c 3248 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
fe392efd
VS
3249 const struct intel_wm_level *wm = &active->wm[level];
3250
3251 if (!active->pipe_enabled)
3252 continue;
0b2ae6d7 3253
d52fea5b
VS
3254 /*
3255 * The watermark values may have been used in the past,
3256 * so we must maintain them in the registers for some
3257 * time even if the level is now disabled.
3258 */
0b2ae6d7 3259 if (!wm->enable)
d52fea5b 3260 ret_wm->enable = false;
0b2ae6d7
VS
3261
3262 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3263 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3264 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3265 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3266 }
0b2ae6d7
VS
3267}
3268
3269/*
3270 * Merge all low power watermarks for all active pipes.
3271 */
cd1d3ee9 3272static void ilk_wm_merge(struct drm_i915_private *dev_priv,
0ba22e26 3273 const struct intel_wm_config *config,
820c1980 3274 const struct ilk_wm_maximums *max,
0b2ae6d7
VS
3275 struct intel_pipe_wm *merged)
3276{
5db94019 3277 int level, max_level = ilk_wm_max_level(dev_priv);
d52fea5b 3278 int last_enabled_level = max_level;
0b2ae6d7 3279
0ba22e26 3280 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
fd6b8f43 3281 if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
0ba22e26 3282 config->num_pipes_active > 1)
1204d5ba 3283 last_enabled_level = 0;
0ba22e26 3284
6c8b6c28 3285 /* ILK: FBC WM must be disabled always */
175fded1 3286 merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
0b2ae6d7
VS
3287
3288 /* merge each WM1+ level */
3289 for (level = 1; level <= max_level; level++) {
3290 struct intel_wm_level *wm = &merged->wm[level];
3291
cd1d3ee9 3292 ilk_merge_wm_level(dev_priv, level, wm);
0b2ae6d7 3293
d52fea5b
VS
3294 if (level > last_enabled_level)
3295 wm->enable = false;
3296 else if (!ilk_validate_wm_level(level, max, wm))
3297 /* make sure all following levels get disabled */
3298 last_enabled_level = level - 1;
0b2ae6d7
VS
3299
3300 /*
3301 * The spec says it is preferred to disable
3302 * FBC WMs instead of disabling a WM level.
3303 */
3304 if (wm->fbc_val > max->fbc) {
d52fea5b
VS
3305 if (wm->enable)
3306 merged->fbc_wm_enabled = false;
0b2ae6d7
VS
3307 wm->fbc_val = 0;
3308 }
3309 }
6c8b6c28
VS
3310
3311 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3312 /*
3313 * FIXME this is racy. FBC might get enabled later.
3314 * What we should check here is whether FBC can be
3315 * enabled sometime later.
3316 */
cf819eff 3317 if (IS_GEN(dev_priv, 5) && !merged->fbc_wm_enabled &&
0e631adc 3318 intel_fbc_is_active(dev_priv)) {
6c8b6c28
VS
3319 for (level = 2; level <= max_level; level++) {
3320 struct intel_wm_level *wm = &merged->wm[level];
3321
3322 wm->enable = false;
3323 }
3324 }
0b2ae6d7
VS
3325}
3326
b380ca3c
VS
3327static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3328{
3329 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3330 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3331}
3332
a68d68ee 3333/* The value we need to program into the WM_LPx latency field */
cd1d3ee9
MR
3334static unsigned int ilk_wm_lp_latency(struct drm_i915_private *dev_priv,
3335 int level)
a68d68ee 3336{
8652744b 3337 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
a68d68ee
VS
3338 return 2 * level;
3339 else
3340 return dev_priv->wm.pri_latency[level];
3341}
3342
cd1d3ee9 3343static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
0362c781 3344 const struct intel_pipe_wm *merged,
609cedef 3345 enum intel_ddb_partitioning partitioning,
820c1980 3346 struct ilk_wm_values *results)
801bcfff 3347{
0b2ae6d7
VS
3348 struct intel_crtc *intel_crtc;
3349 int level, wm_lp;
cca32e9a 3350
0362c781 3351 results->enable_fbc_wm = merged->fbc_wm_enabled;
609cedef 3352 results->partitioning = partitioning;
cca32e9a 3353
0b2ae6d7 3354 /* LP1+ register values */
cca32e9a 3355 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
1fd527cc 3356 const struct intel_wm_level *r;
801bcfff 3357
b380ca3c 3358 level = ilk_wm_lp_to_level(wm_lp, merged);
0b2ae6d7 3359
0362c781 3360 r = &merged->wm[level];
cca32e9a 3361
d52fea5b
VS
3362 /*
3363 * Maintain the watermark values even if the level is
3364 * disabled. Doing otherwise could cause underruns.
3365 */
3366 results->wm_lp[wm_lp - 1] =
cd1d3ee9 3367 (ilk_wm_lp_latency(dev_priv, level) << WM1_LP_LATENCY_SHIFT) |
416f4727
VS
3368 (r->pri_val << WM1_LP_SR_SHIFT) |
3369 r->cur_val;
3370
d52fea5b
VS
3371 if (r->enable)
3372 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3373
175fded1 3374 if (INTEL_GEN(dev_priv) >= 8)
416f4727
VS
3375 results->wm_lp[wm_lp - 1] |=
3376 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3377 else
3378 results->wm_lp[wm_lp - 1] |=
3379 r->fbc_val << WM1_LP_FBC_SHIFT;
3380
d52fea5b
VS
3381 /*
3382 * Always set WM1S_LP_EN when spr_val != 0, even if the
3383 * level is disabled. Doing otherwise could cause underruns.
3384 */
175fded1 3385 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
6cef2b8a
VS
3386 WARN_ON(wm_lp != 1);
3387 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3388 } else
3389 results->wm_lp_spr[wm_lp - 1] = r->spr_val;
cca32e9a 3390 }
801bcfff 3391
0b2ae6d7 3392 /* LP0 register values */
cd1d3ee9 3393 for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
0b2ae6d7 3394 enum pipe pipe = intel_crtc->pipe;
ed4a6a7c
MR
3395 const struct intel_wm_level *r =
3396 &intel_crtc->wm.active.ilk.wm[0];
0b2ae6d7
VS
3397
3398 if (WARN_ON(!r->enable))
3399 continue;
3400
ed4a6a7c 3401 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
1011d8c4 3402
0b2ae6d7
VS
3403 results->wm_pipe[pipe] =
3404 (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3405 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3406 r->cur_val;
801bcfff
PZ
3407 }
3408}
3409
861f3389
PZ
3410/* Find the result with the highest level enabled. Check for enable_fbc_wm in
3411 * case both are at the same level. Prefer r1 in case they're the same. */
cd1d3ee9
MR
3412static struct intel_pipe_wm *
3413ilk_find_best_result(struct drm_i915_private *dev_priv,
3414 struct intel_pipe_wm *r1,
3415 struct intel_pipe_wm *r2)
861f3389 3416{
cd1d3ee9 3417 int level, max_level = ilk_wm_max_level(dev_priv);
198a1e9b 3418 int level1 = 0, level2 = 0;
861f3389 3419
198a1e9b
VS
3420 for (level = 1; level <= max_level; level++) {
3421 if (r1->wm[level].enable)
3422 level1 = level;
3423 if (r2->wm[level].enable)
3424 level2 = level;
861f3389
PZ
3425 }
3426
198a1e9b
VS
3427 if (level1 == level2) {
3428 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
861f3389
PZ
3429 return r2;
3430 else
3431 return r1;
198a1e9b 3432 } else if (level1 > level2) {
861f3389
PZ
3433 return r1;
3434 } else {
3435 return r2;
3436 }
3437}
3438
49a687c4
VS
3439/* dirty bits used to track which watermarks need changes */
3440#define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3441#define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3442#define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3443#define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3444#define WM_DIRTY_FBC (1 << 24)
3445#define WM_DIRTY_DDB (1 << 25)
3446
055e393f 3447static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
820c1980
ID
3448 const struct ilk_wm_values *old,
3449 const struct ilk_wm_values *new)
49a687c4
VS
3450{
3451 unsigned int dirty = 0;
3452 enum pipe pipe;
3453 int wm_lp;
3454
055e393f 3455 for_each_pipe(dev_priv, pipe) {
49a687c4
VS
3456 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3457 dirty |= WM_DIRTY_LINETIME(pipe);
3458 /* Must disable LP1+ watermarks too */
3459 dirty |= WM_DIRTY_LP_ALL;
3460 }
3461
3462 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3463 dirty |= WM_DIRTY_PIPE(pipe);
3464 /* Must disable LP1+ watermarks too */
3465 dirty |= WM_DIRTY_LP_ALL;
3466 }
3467 }
3468
3469 if (old->enable_fbc_wm != new->enable_fbc_wm) {
3470 dirty |= WM_DIRTY_FBC;
3471 /* Must disable LP1+ watermarks too */
3472 dirty |= WM_DIRTY_LP_ALL;
3473 }
3474
3475 if (old->partitioning != new->partitioning) {
3476 dirty |= WM_DIRTY_DDB;
3477 /* Must disable LP1+ watermarks too */
3478 dirty |= WM_DIRTY_LP_ALL;
3479 }
3480
3481 /* LP1+ watermarks already deemed dirty, no need to continue */
3482 if (dirty & WM_DIRTY_LP_ALL)
3483 return dirty;
3484
3485 /* Find the lowest numbered LP1+ watermark in need of an update... */
3486 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3487 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3488 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3489 break;
3490 }
3491
3492 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3493 for (; wm_lp <= 3; wm_lp++)
3494 dirty |= WM_DIRTY_LP(wm_lp);
3495
3496 return dirty;
3497}
3498
8553c18e
VS
3499static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3500 unsigned int dirty)
801bcfff 3501{
820c1980 3502 struct ilk_wm_values *previous = &dev_priv->wm.hw;
8553c18e 3503 bool changed = false;
801bcfff 3504
facd619b
VS
3505 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3506 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3507 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
8553c18e 3508 changed = true;
facd619b
VS
3509 }
3510 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3511 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3512 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
8553c18e 3513 changed = true;
facd619b
VS
3514 }
3515 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3516 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3517 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
8553c18e 3518 changed = true;
facd619b 3519 }
801bcfff 3520
facd619b
VS
3521 /*
3522 * Don't touch WM1S_LP_EN here.
3523 * Doing so could cause underruns.
3524 */
6cef2b8a 3525
8553c18e
VS
3526 return changed;
3527}
3528
3529/*
3530 * The spec says we shouldn't write when we don't need, because every write
3531 * causes WMs to be re-evaluated, expending some power.
3532 */
820c1980
ID
3533static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3534 struct ilk_wm_values *results)
8553c18e 3535{
820c1980 3536 struct ilk_wm_values *previous = &dev_priv->wm.hw;
8553c18e
VS
3537 unsigned int dirty;
3538 uint32_t val;
3539
055e393f 3540 dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
8553c18e
VS
3541 if (!dirty)
3542 return;
3543
3544 _ilk_disable_lp_wm(dev_priv, dirty);
3545
49a687c4 3546 if (dirty & WM_DIRTY_PIPE(PIPE_A))
801bcfff 3547 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
49a687c4 3548 if (dirty & WM_DIRTY_PIPE(PIPE_B))
801bcfff 3549 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
49a687c4 3550 if (dirty & WM_DIRTY_PIPE(PIPE_C))
801bcfff
PZ
3551 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3552
49a687c4 3553 if (dirty & WM_DIRTY_LINETIME(PIPE_A))
801bcfff 3554 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
49a687c4 3555 if (dirty & WM_DIRTY_LINETIME(PIPE_B))
801bcfff 3556 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
49a687c4 3557 if (dirty & WM_DIRTY_LINETIME(PIPE_C))
801bcfff
PZ
3558 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3559
49a687c4 3560 if (dirty & WM_DIRTY_DDB) {
8652744b 3561 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
ac9545fd
VS
3562 val = I915_READ(WM_MISC);
3563 if (results->partitioning == INTEL_DDB_PART_1_2)
3564 val &= ~WM_MISC_DATA_PARTITION_5_6;
3565 else
3566 val |= WM_MISC_DATA_PARTITION_5_6;
3567 I915_WRITE(WM_MISC, val);
3568 } else {
3569 val = I915_READ(DISP_ARB_CTL2);
3570 if (results->partitioning == INTEL_DDB_PART_1_2)
3571 val &= ~DISP_DATA_PARTITION_5_6;
3572 else
3573 val |= DISP_DATA_PARTITION_5_6;
3574 I915_WRITE(DISP_ARB_CTL2, val);
3575 }
1011d8c4
PZ
3576 }
3577
49a687c4 3578 if (dirty & WM_DIRTY_FBC) {
cca32e9a
PZ
3579 val = I915_READ(DISP_ARB_CTL);
3580 if (results->enable_fbc_wm)
3581 val &= ~DISP_FBC_WM_DIS;
3582 else
3583 val |= DISP_FBC_WM_DIS;
3584 I915_WRITE(DISP_ARB_CTL, val);
3585 }
3586
954911eb
ID
3587 if (dirty & WM_DIRTY_LP(1) &&
3588 previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3589 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3590
175fded1 3591 if (INTEL_GEN(dev_priv) >= 7) {
6cef2b8a
VS
3592 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3593 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3594 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3595 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3596 }
801bcfff 3597
facd619b 3598 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
801bcfff 3599 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
facd619b 3600 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
801bcfff 3601 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
facd619b 3602 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
801bcfff 3603 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
609cedef
VS
3604
3605 dev_priv->wm.hw = *results;
801bcfff
PZ
3606}
3607
ed4a6a7c 3608bool ilk_disable_lp_wm(struct drm_device *dev)
8553c18e 3609{
fac5e23e 3610 struct drm_i915_private *dev_priv = to_i915(dev);
8553c18e
VS
3611
3612 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3613}
3614
74bd8004
MK
3615static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
3616{
3617 u8 enabled_slices;
3618
3619 /* Slice 1 will always be enabled */
3620 enabled_slices = 1;
3621
3622 /* Gen prior to GEN11 have only one DBuf slice */
3623 if (INTEL_GEN(dev_priv) < 11)
3624 return enabled_slices;
3625
3626 if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
3627 enabled_slices++;
3628
3629 return enabled_slices;
3630}
3631
ee3d532f
PZ
3632/*
3633 * FIXME: We still don't have the proper code detect if we need to apply the WA,
3634 * so assume we'll always need it in order to avoid underruns.
3635 */
3636static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
3637{
3638 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
3639
b976dc53 3640 if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
ee3d532f
PZ
3641 return true;
3642
3643 return false;
3644}
3645
56feca91
PZ
3646static bool
3647intel_has_sagv(struct drm_i915_private *dev_priv)
3648{
1ca2b067
RV
3649 return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&
3650 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
56feca91
PZ
3651}
3652
656d1b89
L
3653/*
3654 * SAGV dynamically adjusts the system agent voltage and clock frequencies
3655 * depending on power and performance requirements. The display engine access
3656 * to system memory is blocked during the adjustment time. Because of the
3657 * blocking time, having this enabled can cause full system hangs and/or pipe
3658 * underruns if we don't meet all of the following requirements:
3659 *
3660 * - <= 1 pipe enabled
3661 * - All planes can enable watermarks for latencies >= SAGV engine block time
3662 * - We're not using an interlaced display configuration
3663 */
3664int
16dcdc4e 3665intel_enable_sagv(struct drm_i915_private *dev_priv)
656d1b89
L
3666{
3667 int ret;
3668
56feca91
PZ
3669 if (!intel_has_sagv(dev_priv))
3670 return 0;
3671
3672 if (dev_priv->sagv_status == I915_SAGV_ENABLED)
656d1b89
L
3673 return 0;
3674
3675 DRM_DEBUG_KMS("Enabling the SAGV\n");
9f817501 3676 mutex_lock(&dev_priv->pcu_lock);
656d1b89
L
3677
3678 ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3679 GEN9_SAGV_ENABLE);
3680
3681 /* We don't need to wait for the SAGV when enabling */
9f817501 3682 mutex_unlock(&dev_priv->pcu_lock);
656d1b89
L
3683
3684 /*
3685 * Some skl systems, pre-release machines in particular,
3686 * don't actually have an SAGV.
3687 */
6e3100ec 3688 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
656d1b89 3689 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
16dcdc4e 3690 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
656d1b89
L
3691 return 0;
3692 } else if (ret < 0) {
3693 DRM_ERROR("Failed to enable the SAGV\n");
3694 return ret;
3695 }
3696
16dcdc4e 3697 dev_priv->sagv_status = I915_SAGV_ENABLED;
656d1b89
L
3698 return 0;
3699}
3700
656d1b89 3701int
16dcdc4e 3702intel_disable_sagv(struct drm_i915_private *dev_priv)
656d1b89 3703{
b3b8e999 3704 int ret;
656d1b89 3705
56feca91
PZ
3706 if (!intel_has_sagv(dev_priv))
3707 return 0;
3708
3709 if (dev_priv->sagv_status == I915_SAGV_DISABLED)
656d1b89
L
3710 return 0;
3711
3712 DRM_DEBUG_KMS("Disabling the SAGV\n");
9f817501 3713 mutex_lock(&dev_priv->pcu_lock);
656d1b89
L
3714
3715 /* bspec says to keep retrying for at least 1 ms */
b3b8e999
ID
3716 ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3717 GEN9_SAGV_DISABLE,
3718 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3719 1);
9f817501 3720 mutex_unlock(&dev_priv->pcu_lock);
656d1b89 3721
656d1b89
L
3722 /*
3723 * Some skl systems, pre-release machines in particular,
3724 * don't actually have an SAGV.
3725 */
b3b8e999 3726 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
656d1b89 3727 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
16dcdc4e 3728 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
656d1b89 3729 return 0;
b3b8e999
ID
3730 } else if (ret < 0) {
3731 DRM_ERROR("Failed to disable the SAGV (%d)\n", ret);
3732 return ret;
656d1b89
L
3733 }
3734
16dcdc4e 3735 dev_priv->sagv_status = I915_SAGV_DISABLED;
656d1b89
L
3736 return 0;
3737}
3738
16dcdc4e 3739bool intel_can_enable_sagv(struct drm_atomic_state *state)
656d1b89
L
3740{
3741 struct drm_device *dev = state->dev;
3742 struct drm_i915_private *dev_priv = to_i915(dev);
3743 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
ee3d532f
PZ
3744 struct intel_crtc *crtc;
3745 struct intel_plane *plane;
d8c0fafc 3746 struct intel_crtc_state *cstate;
656d1b89 3747 enum pipe pipe;
d8c0fafc 3748 int level, latency;
4357ce07 3749 int sagv_block_time_us;
656d1b89 3750
56feca91
PZ
3751 if (!intel_has_sagv(dev_priv))
3752 return false;
3753
cf819eff 3754 if (IS_GEN(dev_priv, 9))
4357ce07 3755 sagv_block_time_us = 30;
cf819eff 3756 else if (IS_GEN(dev_priv, 10))
4357ce07
PZ
3757 sagv_block_time_us = 20;
3758 else
3759 sagv_block_time_us = 10;
3760
656d1b89 3761 /*
fdd11c2b 3762 * SKL+ workaround: bspec recommends we disable the SAGV when we have
656d1b89
L
3763 * more then one pipe enabled
3764 *
3765 * If there are no active CRTCs, no additional checks need be performed
3766 */
3767 if (hweight32(intel_state->active_crtcs) == 0)
3768 return true;
3769 else if (hweight32(intel_state->active_crtcs) > 1)
3770 return false;
3771
3772 /* Since we're now guaranteed to only have one active CRTC... */
3773 pipe = ffs(intel_state->active_crtcs) - 1;
98187836 3774 crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
d8c0fafc 3775 cstate = to_intel_crtc_state(crtc->base.state);
656d1b89 3776
c89cadd5 3777 if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
656d1b89
L
3778 return false;
3779
ee3d532f 3780 for_each_intel_plane_on_crtc(dev, crtc, plane) {
d5cdfdf5
VS
3781 struct skl_plane_wm *wm =
3782 &cstate->wm.skl.optimal.planes[plane->id];
ee3d532f 3783
656d1b89 3784 /* Skip this plane if it's not enabled */
d8c0fafc 3785 if (!wm->wm[0].plane_en)
656d1b89
L
3786 continue;
3787
3788 /* Find the highest enabled wm level for this plane */
5db94019 3789 for (level = ilk_wm_max_level(dev_priv);
d8c0fafc 3790 !wm->wm[level].plane_en; --level)
656d1b89
L
3791 { }
3792
ee3d532f
PZ
3793 latency = dev_priv->wm.skl_latency[level];
3794
3795 if (skl_needs_memory_bw_wa(intel_state) &&
bae781b2 3796 plane->base.state->fb->modifier ==
ee3d532f
PZ
3797 I915_FORMAT_MOD_X_TILED)
3798 latency += 15;
3799
656d1b89 3800 /*
fdd11c2b
PZ
3801 * If any of the planes on this pipe don't enable wm levels that
3802 * incur memory latencies higher than sagv_block_time_us we
3803 * can't enable the SAGV.
656d1b89 3804 */
fdd11c2b 3805 if (latency < sagv_block_time_us)
656d1b89
L
3806 return false;
3807 }
3808
3809 return true;
3810}
3811
aaa02378
MK
3812static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
3813 const struct intel_crtc_state *cstate,
24719e94 3814 const u64 total_data_rate,
aaa02378
MK
3815 const int num_active,
3816 struct skl_ddb_allocation *ddb)
aa9664ff
MK
3817{
3818 const struct drm_display_mode *adjusted_mode;
3819 u64 total_data_bw;
3820 u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3821
3822 WARN_ON(ddb_size == 0);
3823
3824 if (INTEL_GEN(dev_priv) < 11)
3825 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3826
3827 adjusted_mode = &cstate->base.adjusted_mode;
24719e94 3828 total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
aa9664ff
MK
3829
3830 /*
3831 * 12GB/s is maximum BW supported by single DBuf slice.
3832 */
24719e94 3833 if (num_active > 1 || total_data_bw >= GBps(12)) {
aa9664ff
MK
3834 ddb->enabled_slices = 2;
3835 } else {
3836 ddb->enabled_slices = 1;
3837 ddb_size /= 2;
3838 }
3839
3840 return ddb_size;
3841}
3842
b9cec075 3843static void
b048a00b 3844skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
024c9045 3845 const struct intel_crtc_state *cstate,
24719e94 3846 const u64 total_data_rate,
aa9664ff 3847 struct skl_ddb_allocation *ddb,
c107acfe
MR
3848 struct skl_ddb_entry *alloc, /* out */
3849 int *num_active /* out */)
b9cec075 3850{
c107acfe
MR
3851 struct drm_atomic_state *state = cstate->base.state;
3852 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
024c9045 3853 struct drm_crtc *for_crtc = cstate->base.crtc;
cf1f697a
MK
3854 const struct drm_crtc_state *crtc_state;
3855 const struct drm_crtc *crtc;
3856 u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
3857 enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
3858 u16 ddb_size;
3859 u32 i;
c107acfe 3860
a6d3460e 3861 if (WARN_ON(!state) || !cstate->base.active) {
b9cec075
DL
3862 alloc->start = 0;
3863 alloc->end = 0;
a6d3460e 3864 *num_active = hweight32(dev_priv->active_crtcs);
b9cec075
DL
3865 return;
3866 }
3867
a6d3460e
MR
3868 if (intel_state->active_pipe_changes)
3869 *num_active = hweight32(intel_state->active_crtcs);
3870 else
3871 *num_active = hweight32(dev_priv->active_crtcs);
3872
aa9664ff
MK
3873 ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
3874 *num_active, ddb);
b9cec075 3875
c107acfe 3876 /*
cf1f697a
MK
3877 * If the state doesn't change the active CRTC's or there is no
3878 * modeset request, then there's no need to recalculate;
3879 * the existing pipe allocation limits should remain unchanged.
3880 * Note that we're safe from racing commits since any racing commit
3881 * that changes the active CRTC list or do modeset would need to
3882 * grab _all_ crtc locks, including the one we currently hold.
c107acfe 3883 */
cf1f697a 3884 if (!intel_state->active_pipe_changes && !intel_state->modeset) {
512b5527
ML
3885 /*
3886 * alloc may be cleared by clear_intel_crtc_state,
3887 * copy from old state to be sure
3888 */
3889 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
a6d3460e 3890 return;
c107acfe 3891 }
a6d3460e 3892
cf1f697a
MK
3893 /*
3894 * Watermark/ddb requirement highly depends upon width of the
3895 * framebuffer, So instead of allocating DDB equally among pipes
3896 * distribute DDB based on resolution/width of the display.
3897 */
3898 for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
3899 const struct drm_display_mode *adjusted_mode;
3900 int hdisplay, vdisplay;
3901 enum pipe pipe;
3902
3903 if (!crtc_state->enable)
3904 continue;
3905
3906 pipe = to_intel_crtc(crtc)->pipe;
3907 adjusted_mode = &crtc_state->adjusted_mode;
3908 drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
3909 total_width += hdisplay;
3910
3911 if (pipe < for_pipe)
3912 width_before_pipe += hdisplay;
3913 else if (pipe == for_pipe)
3914 pipe_width = hdisplay;
3915 }
3916
3917 alloc->start = ddb_size * width_before_pipe / total_width;
3918 alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
b9cec075
DL
3919}
3920
c107acfe 3921static unsigned int skl_cursor_allocation(int num_active)
b9cec075 3922{
c107acfe 3923 if (num_active == 1)
b9cec075
DL
3924 return 32;
3925
3926 return 8;
3927}
3928
37cde11b
MK
3929static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
3930 struct skl_ddb_entry *entry, u32 reg)
a269c583 3931{
37cde11b
MK
3932 u16 mask;
3933
3934 if (INTEL_GEN(dev_priv) >= 11)
3935 mask = ICL_DDB_ENTRY_MASK;
3936 else
3937 mask = SKL_DDB_ENTRY_MASK;
3938 entry->start = reg & mask;
3939 entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask;
3940
16160e3d
DL
3941 if (entry->end)
3942 entry->end += 1;
a269c583
DL
3943}
3944
ddf34319
MK
3945static void
3946skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
3947 const enum pipe pipe,
3948 const enum plane_id plane_id,
ff43bc37
VS
3949 struct skl_ddb_entry *ddb_y,
3950 struct skl_ddb_entry *ddb_uv)
ddf34319 3951{
ff43bc37
VS
3952 u32 val, val2;
3953 u32 fourcc = 0;
ddf34319
MK
3954
3955 /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3956 if (plane_id == PLANE_CURSOR) {
3957 val = I915_READ(CUR_BUF_CFG(pipe));
ff43bc37 3958 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
ddf34319
MK
3959 return;
3960 }
3961
3962 val = I915_READ(PLANE_CTL(pipe, plane_id));
3963
3964 /* No DDB allocated for disabled planes */
ff43bc37
VS
3965 if (val & PLANE_CTL_ENABLE)
3966 fourcc = skl_format_to_fourcc(val & PLANE_CTL_FORMAT_MASK,
3967 val & PLANE_CTL_ORDER_RGBX,
3968 val & PLANE_CTL_ALPHA_MASK);
ddf34319 3969
ff43bc37
VS
3970 if (INTEL_GEN(dev_priv) >= 11) {
3971 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
3972 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
3973 } else {
3974 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
12a6c931 3975 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
ddf34319 3976
ff43bc37
VS
3977 if (fourcc == DRM_FORMAT_NV12)
3978 swap(val, val2);
3979
3980 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
3981 skl_ddb_entry_init_from_hw(dev_priv, ddb_uv, val2);
ddf34319
MK
3982 }
3983}
3984
ff43bc37
VS
3985void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
3986 struct skl_ddb_entry *ddb_y,
3987 struct skl_ddb_entry *ddb_uv)
a269c583 3988{
ff43bc37
VS
3989 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
3990 enum intel_display_power_domain power_domain;
3991 enum pipe pipe = crtc->pipe;
3992 enum plane_id plane_id;
74bd8004 3993
ff43bc37
VS
3994 power_domain = POWER_DOMAIN_PIPE(pipe);
3995 if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
3996 return;
4d800030 3997
ff43bc37
VS
3998 for_each_plane_id_on_crtc(crtc, plane_id)
3999 skl_ddb_get_hw_plane_state(dev_priv, pipe,
4000 plane_id,
4001 &ddb_y[plane_id],
4002 &ddb_uv[plane_id]);
b10f1b20 4003
ff43bc37
VS
4004 intel_display_power_put(dev_priv, power_domain);
4005}
4d800030 4006
ff43bc37
VS
4007void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
4008 struct skl_ddb_allocation *ddb /* out */)
4009{
4010 ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
a269c583
DL
4011}
4012
9c2f7a9d
KM
4013/*
4014 * Determines the downscale amount of a plane for the purposes of watermark calculations.
4015 * The bspec defines downscale amount as:
4016 *
4017 * """
4018 * Horizontal down scale amount = maximum[1, Horizontal source size /
4019 * Horizontal destination size]
4020 * Vertical down scale amount = maximum[1, Vertical source size /
4021 * Vertical destination size]
4022 * Total down scale amount = Horizontal down scale amount *
4023 * Vertical down scale amount
4024 * """
4025 *
4026 * Return value is provided in 16.16 fixed point form to retain fractional part.
4027 * Caller should take care of dividing & rounding off the value.
4028 */
7084b50b 4029static uint_fixed_16_16_t
93aa2a1c
VS
4030skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
4031 const struct intel_plane_state *pstate)
9c2f7a9d 4032{
93aa2a1c 4033 struct intel_plane *plane = to_intel_plane(pstate->base.plane);
9c2f7a9d 4034 uint32_t src_w, src_h, dst_w, dst_h;
7084b50b
KM
4035 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4036 uint_fixed_16_16_t downscale_h, downscale_w;
9c2f7a9d 4037
93aa2a1c 4038 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
eac2cb81 4039 return u32_to_fixed16(0);
9c2f7a9d
KM
4040
4041 /* n.b., src is 16.16 fixed point, dst is whole integer */
93aa2a1c 4042 if (plane->id == PLANE_CURSOR) {
fce5adf5
VS
4043 /*
4044 * Cursors only support 0/180 degree rotation,
4045 * hence no need to account for rotation here.
4046 */
7084b50b
KM
4047 src_w = pstate->base.src_w >> 16;
4048 src_h = pstate->base.src_h >> 16;
93aa2a1c
VS
4049 dst_w = pstate->base.crtc_w;
4050 dst_h = pstate->base.crtc_h;
4051 } else {
fce5adf5
VS
4052 /*
4053 * Src coordinates are already rotated by 270 degrees for
4054 * the 90/270 degree plane rotation cases (to match the
4055 * GTT mapping), hence no need to account for rotation here.
4056 */
7084b50b
KM
4057 src_w = drm_rect_width(&pstate->base.src) >> 16;
4058 src_h = drm_rect_height(&pstate->base.src) >> 16;
93aa2a1c
VS
4059 dst_w = drm_rect_width(&pstate->base.dst);
4060 dst_h = drm_rect_height(&pstate->base.dst);
4061 }
4062
eac2cb81
KM
4063 fp_w_ratio = div_fixed16(src_w, dst_w);
4064 fp_h_ratio = div_fixed16(src_h, dst_h);
4065 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4066 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
9c2f7a9d 4067
7084b50b 4068 return mul_fixed16(downscale_w, downscale_h);
9c2f7a9d
KM
4069}
4070
73b0ca8e
MK
4071static uint_fixed_16_16_t
4072skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
4073{
eac2cb81 4074 uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
73b0ca8e
MK
4075
4076 if (!crtc_state->base.enable)
4077 return pipe_downscale;
4078
4079 if (crtc_state->pch_pfit.enabled) {
4080 uint32_t src_w, src_h, dst_w, dst_h;
4081 uint32_t pfit_size = crtc_state->pch_pfit.size;
4082 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4083 uint_fixed_16_16_t downscale_h, downscale_w;
4084
4085 src_w = crtc_state->pipe_src_w;
4086 src_h = crtc_state->pipe_src_h;
4087 dst_w = pfit_size >> 16;
4088 dst_h = pfit_size & 0xffff;
4089
4090 if (!dst_w || !dst_h)
4091 return pipe_downscale;
4092
eac2cb81
KM
4093 fp_w_ratio = div_fixed16(src_w, dst_w);
4094 fp_h_ratio = div_fixed16(src_h, dst_h);
4095 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4096 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
73b0ca8e
MK
4097
4098 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
4099 }
4100
4101 return pipe_downscale;
4102}
4103
4104int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
4105 struct intel_crtc_state *cstate)
4106{
43037c86 4107 struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
73b0ca8e
MK
4108 struct drm_crtc_state *crtc_state = &cstate->base;
4109 struct drm_atomic_state *state = crtc_state->state;
4110 struct drm_plane *plane;
4111 const struct drm_plane_state *pstate;
4112 struct intel_plane_state *intel_pstate;
789f35d7 4113 int crtc_clock, dotclk;
73b0ca8e
MK
4114 uint32_t pipe_max_pixel_rate;
4115 uint_fixed_16_16_t pipe_downscale;
eac2cb81 4116 uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
73b0ca8e
MK
4117
4118 if (!cstate->base.enable)
4119 return 0;
4120
4121 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4122 uint_fixed_16_16_t plane_downscale;
eac2cb81 4123 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
73b0ca8e
MK
4124 int bpp;
4125
4126 if (!intel_wm_plane_visible(cstate,
4127 to_intel_plane_state(pstate)))
4128 continue;
4129
4130 if (WARN_ON(!pstate->fb))
4131 return -EINVAL;
4132
4133 intel_pstate = to_intel_plane_state(pstate);
4134 plane_downscale = skl_plane_downscale_amount(cstate,
4135 intel_pstate);
4136 bpp = pstate->fb->format->cpp[0] * 8;
4137 if (bpp == 64)
4138 plane_downscale = mul_fixed16(plane_downscale,
4139 fp_9_div_8);
4140
eac2cb81 4141 max_downscale = max_fixed16(plane_downscale, max_downscale);
73b0ca8e
MK
4142 }
4143 pipe_downscale = skl_pipe_downscale_amount(cstate);
4144
4145 pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4146
4147 crtc_clock = crtc_state->adjusted_mode.crtc_clock;
789f35d7
ML
4148 dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4149
43037c86 4150 if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
789f35d7
ML
4151 dotclk *= 2;
4152
4153 pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
73b0ca8e
MK
4154
4155 if (pipe_max_pixel_rate < crtc_clock) {
789f35d7 4156 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
73b0ca8e
MK
4157 return -EINVAL;
4158 }
4159
4160 return 0;
4161}
4162
24719e94 4163static u64
024c9045 4164skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
b048a00b 4165 const struct intel_plane_state *intel_pstate,
b879d58f 4166 const int plane)
b9cec075 4167{
b048a00b
ML
4168 struct intel_plane *intel_plane =
4169 to_intel_plane(intel_pstate->base.plane);
7084b50b 4170 uint32_t data_rate;
a280f7dd 4171 uint32_t width = 0, height = 0;
8305494e
VS
4172 struct drm_framebuffer *fb;
4173 u32 format;
7084b50b 4174 uint_fixed_16_16_t down_scale_amount;
24719e94 4175 u64 rate;
a1de91e5 4176
936e71e3 4177 if (!intel_pstate->base.visible)
a1de91e5 4178 return 0;
8305494e 4179
b048a00b 4180 fb = intel_pstate->base.fb;
438b74a5 4181 format = fb->format->format;
8305494e 4182
b879d58f 4183 if (intel_plane->id == PLANE_CURSOR)
a1de91e5 4184 return 0;
b879d58f 4185 if (plane == 1 && format != DRM_FORMAT_NV12)
a1de91e5 4186 return 0;
a280f7dd 4187
fce5adf5
VS
4188 /*
4189 * Src coordinates are already rotated by 270 degrees for
4190 * the 90/270 degree plane rotation cases (to match the
4191 * GTT mapping), hence no need to account for rotation here.
4192 */
936e71e3
VS
4193 width = drm_rect_width(&intel_pstate->base.src) >> 16;
4194 height = drm_rect_height(&intel_pstate->base.src) >> 16;
a280f7dd 4195
b879d58f
MK
4196 /* UV plane does 1/2 pixel sub-sampling */
4197 if (plane == 1 && format == DRM_FORMAT_NV12) {
4198 width /= 2;
4199 height /= 2;
2cd601c6
CK
4200 }
4201
24719e94 4202 data_rate = width * height;
b879d58f 4203
93aa2a1c 4204 down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
8d19d7d9 4205
24719e94
ML
4206 rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4207
4208 rate *= fb->format->cpp[plane];
4209 return rate;
b9cec075
DL
4210}
4211
24719e94 4212static u64
1e6ee542 4213skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
24719e94
ML
4214 u64 *plane_data_rate,
4215 u64 *uv_plane_data_rate)
b9cec075 4216{
9c74d826
MR
4217 struct drm_crtc_state *cstate = &intel_cstate->base;
4218 struct drm_atomic_state *state = cstate->state;
c8fe32c1 4219 struct drm_plane *plane;
c8fe32c1 4220 const struct drm_plane_state *pstate;
24719e94 4221 u64 total_data_rate = 0;
a6d3460e
MR
4222
4223 if (WARN_ON(!state))
4224 return 0;
b9cec075 4225
a1de91e5 4226 /* Calculate and cache data rate for each plane */
c8fe32c1 4227 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
d5cdfdf5 4228 enum plane_id plane_id = to_intel_plane(plane)->id;
24719e94 4229 u64 rate;
b048a00b
ML
4230 const struct intel_plane_state *intel_pstate =
4231 to_intel_plane_state(pstate);
a6d3460e 4232
b879d58f 4233 /* packed/y */
a6d3460e 4234 rate = skl_plane_relative_data_rate(intel_cstate,
b048a00b 4235 intel_pstate, 0);
d5cdfdf5 4236 plane_data_rate[plane_id] = rate;
1e6ee542 4237 total_data_rate += rate;
a6d3460e 4238
b879d58f 4239 /* uv-plane */
a6d3460e 4240 rate = skl_plane_relative_data_rate(intel_cstate,
b048a00b 4241 intel_pstate, 1);
b879d58f 4242 uv_plane_data_rate[plane_id] = rate;
1e6ee542 4243 total_data_rate += rate;
b9cec075
DL
4244 }
4245
4246 return total_data_rate;
4247}
4248
b048a00b
ML
4249static u64
4250icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4251 u64 *plane_data_rate)
4252{
4253 struct drm_crtc_state *cstate = &intel_cstate->base;
4254 struct drm_atomic_state *state = cstate->state;
4255 struct drm_plane *plane;
4256 const struct drm_plane_state *pstate;
4257 u64 total_data_rate = 0;
4258
4259 if (WARN_ON(!state))
4260 return 0;
4261
4262 /* Calculate and cache data rate for each plane */
4263 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4264 const struct intel_plane_state *intel_pstate =
4265 to_intel_plane_state(pstate);
4266 enum plane_id plane_id = to_intel_plane(plane)->id;
4267 u64 rate;
4268
4269 if (!intel_pstate->linked_plane) {
4270 rate = skl_plane_relative_data_rate(intel_cstate,
4271 intel_pstate, 0);
4272 plane_data_rate[plane_id] = rate;
4273 total_data_rate += rate;
4274 } else {
4275 enum plane_id y_plane_id;
4276
4277 /*
4278 * The slave plane might not iterate in
4279 * drm_atomic_crtc_state_for_each_plane_state(),
4280 * and needs the master plane state which may be
4281 * NULL if we try get_new_plane_state(), so we
4282 * always calculate from the master.
4283 */
4284 if (intel_pstate->slave)
4285 continue;
4286
4287 /* Y plane rate is calculated on the slave */
4288 rate = skl_plane_relative_data_rate(intel_cstate,
4289 intel_pstate, 0);
4290 y_plane_id = intel_pstate->linked_plane->id;
4291 plane_data_rate[y_plane_id] = rate;
4292 total_data_rate += rate;
4293
4294 rate = skl_plane_relative_data_rate(intel_cstate,
4295 intel_pstate, 1);
4296 plane_data_rate[plane_id] = rate;
4297 total_data_rate += rate;
4298 }
4299 }
4300
4301 return total_data_rate;
4302}
4303
c107acfe 4304static int
024c9045 4305skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
b9cec075
DL
4306 struct skl_ddb_allocation *ddb /* out */)
4307{
c107acfe 4308 struct drm_atomic_state *state = cstate->base.state;
024c9045 4309 struct drm_crtc *crtc = cstate->base.crtc;
b048a00b 4310 struct drm_i915_private *dev_priv = to_i915(crtc->dev);
b9cec075 4311 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
ce0ba283 4312 struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
d8e87498
MR
4313 struct skl_plane_wm *wm;
4314 uint16_t alloc_size, start = 0;
4315 uint16_t total[I915_MAX_PLANES] = {};
4316 uint16_t uv_total[I915_MAX_PLANES] = {};
24719e94 4317 u64 total_data_rate;
d5cdfdf5 4318 enum plane_id plane_id;
c107acfe 4319 int num_active;
24719e94
ML
4320 u64 plane_data_rate[I915_MAX_PLANES] = {};
4321 u64 uv_plane_data_rate[I915_MAX_PLANES] = {};
d8e87498
MR
4322 uint16_t blocks = 0;
4323 int level;
b9cec075 4324
5a920b85 4325 /* Clear the partitioning for disabled planes. */
ff43bc37
VS
4326 memset(cstate->wm.skl.plane_ddb_y, 0, sizeof(cstate->wm.skl.plane_ddb_y));
4327 memset(cstate->wm.skl.plane_ddb_uv, 0, sizeof(cstate->wm.skl.plane_ddb_uv));
5a920b85 4328
a6d3460e
MR
4329 if (WARN_ON(!state))
4330 return 0;
4331
c107acfe 4332 if (!cstate->base.active) {
ce0ba283 4333 alloc->start = alloc->end = 0;
c107acfe
MR
4334 return 0;
4335 }
4336
b048a00b
ML
4337 if (INTEL_GEN(dev_priv) < 11)
4338 total_data_rate =
4339 skl_get_total_relative_data_rate(cstate,
4340 plane_data_rate,
4341 uv_plane_data_rate);
4342 else
4343 total_data_rate =
4344 icl_get_total_relative_data_rate(cstate,
4345 plane_data_rate);
4346
4347 skl_ddb_get_pipe_allocation_limits(dev_priv, cstate, total_data_rate,
4348 ddb, alloc, &num_active);
34bb56af 4349 alloc_size = skl_ddb_entry_size(alloc);
336031ea 4350 if (alloc_size == 0)
c107acfe 4351 return 0;
b9cec075 4352
d8e87498
MR
4353 /* Allocate fixed number of blocks for cursor. */
4354 total[PLANE_CURSOR] = skl_cursor_allocation(num_active);
4355 alloc_size -= total[PLANE_CURSOR];
4356 cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].start =
4357 alloc->end - total[PLANE_CURSOR];
4358 cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
4359
4360 if (total_data_rate == 0)
4361 return 0;
a6d3460e 4362
49845a7a 4363 /*
d8e87498
MR
4364 * Find the highest watermark level for which we can satisfy the block
4365 * requirement of active planes.
49845a7a 4366 */
d8e87498 4367 for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) {
25db2eaf 4368 blocks = 0;
d8e87498
MR
4369 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4370 if (plane_id == PLANE_CURSOR)
4371 continue;
80958155 4372
d8e87498
MR
4373 wm = &cstate->wm.skl.optimal.planes[plane_id];
4374 blocks += wm->wm[level].plane_res_b;
4375 blocks += wm->uv_wm[level].plane_res_b;
4376 }
4377
4378 if (blocks < alloc_size) {
4379 alloc_size -= blocks;
4380 break;
4381 }
80958155
DL
4382 }
4383
d8e87498 4384 if (level < 0) {
5ba6faaf 4385 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
d8e87498
MR
4386 DRM_DEBUG_KMS("minimum required %d/%d\n", blocks,
4387 alloc_size);
5ba6faaf
KM
4388 return -EINVAL;
4389 }
4390
b9cec075 4391 /*
d8e87498
MR
4392 * Grant each plane the blocks it requires at the highest achievable
4393 * watermark level, plus an extra share of the leftover blocks
4394 * proportional to its relative data rate.
b9cec075 4395 */
d5cdfdf5 4396 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
d8e87498
MR
4397 u64 rate;
4398 u16 extra;
b9cec075 4399
d5cdfdf5 4400 if (plane_id == PLANE_CURSOR)
49845a7a
ML
4401 continue;
4402
b9cec075 4403 /*
d8e87498
MR
4404 * We've accounted for all active planes; remaining planes are
4405 * all disabled.
b9cec075 4406 */
d8e87498
MR
4407 if (total_data_rate == 0)
4408 break;
b9cec075 4409
d8e87498 4410 wm = &cstate->wm.skl.optimal.planes[plane_id];
b9cec075 4411
d8e87498
MR
4412 rate = plane_data_rate[plane_id];
4413 extra = min_t(u16, alloc_size,
4414 DIV64_U64_ROUND_UP(alloc_size * rate,
4415 total_data_rate));
4416 total[plane_id] = wm->wm[level].plane_res_b + extra;
4417 alloc_size -= extra;
4418 total_data_rate -= rate;
9a30a261 4419
d8e87498
MR
4420 if (total_data_rate == 0)
4421 break;
a1de91e5 4422
d8e87498
MR
4423 rate = uv_plane_data_rate[plane_id];
4424 extra = min_t(u16, alloc_size,
4425 DIV64_U64_ROUND_UP(alloc_size * rate,
4426 total_data_rate));
4427 uv_total[plane_id] = wm->uv_wm[level].plane_res_b + extra;
4428 alloc_size -= extra;
4429 total_data_rate -= rate;
4430 }
4431 WARN_ON(alloc_size != 0 || total_data_rate != 0);
4432
4433 /* Set the actual DDB start/end points for each plane */
4434 start = alloc->start;
4435 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4436 struct skl_ddb_entry *plane_alloc, *uv_plane_alloc;
4437
4438 if (plane_id == PLANE_CURSOR)
4439 continue;
4440
4441 plane_alloc = &cstate->wm.skl.plane_ddb_y[plane_id];
4442 uv_plane_alloc = &cstate->wm.skl.plane_ddb_uv[plane_id];
9a30a261 4443
b048a00b 4444 /* Gen11+ uses a separate plane for UV watermarks */
d8e87498
MR
4445 WARN_ON(INTEL_GEN(dev_priv) >= 11 && uv_total[plane_id]);
4446
4447 /* Leave disabled planes at (0,0) */
4448 if (total[plane_id]) {
4449 plane_alloc->start = start;
4450 start += total[plane_id];
4451 plane_alloc->end = start;
4452 }
b048a00b 4453
d8e87498
MR
4454 if (uv_total[plane_id]) {
4455 uv_plane_alloc->start = start;
4456 start += uv_total[plane_id];
4457 uv_plane_alloc->end = start;
c107acfe 4458 }
d8e87498 4459 }
9a30a261 4460
d8e87498
MR
4461 /*
4462 * When we calculated watermark values we didn't know how high
4463 * of a level we'd actually be able to hit, so we just marked
4464 * all levels as "enabled." Go back now and disable the ones
4465 * that aren't actually possible.
4466 */
4467 for (level++; level <= ilk_wm_max_level(dev_priv); level++) {
4468 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4469 wm = &cstate->wm.skl.optimal.planes[plane_id];
4470 memset(&wm->wm[level], 0, sizeof(wm->wm[level]));
4471 }
4472 }
4473
4474 /*
4475 * Go back and disable the transition watermark if it turns out we
4476 * don't have enough DDB blocks for it.
4477 */
4478 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4479 wm = &cstate->wm.skl.optimal.planes[plane_id];
4480 if (wm->trans_wm.plane_res_b > total[plane_id])
4481 memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
b9cec075
DL
4482 }
4483
c107acfe 4484 return 0;
b9cec075
DL
4485}
4486
2d41c0b5
PB
4487/*
4488 * The max latency should be 257 (max the punit can code is 255 and we add 2us
ac484963 4489 * for the read latency) and cpp should always be <= 8, so that
2d41c0b5
PB
4490 * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4491 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4492*/
6c64dd37
PZ
4493static uint_fixed_16_16_t
4494skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
df8ee190 4495 uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
2d41c0b5 4496{
b95320bd
MK
4497 uint32_t wm_intermediate_val;
4498 uint_fixed_16_16_t ret;
2d41c0b5
PB
4499
4500 if (latency == 0)
b95320bd 4501 return FP_16_16_MAX;
2d41c0b5 4502
b95320bd 4503 wm_intermediate_val = latency * pixel_rate * cpp;
df8ee190 4504 ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
6c64dd37
PZ
4505
4506 if (INTEL_GEN(dev_priv) >= 10)
4507 ret = add_fixed16_u32(ret, 1);
4508
2d41c0b5
PB
4509 return ret;
4510}
4511
b95320bd
MK
4512static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
4513 uint32_t pipe_htotal,
4514 uint32_t latency,
4515 uint_fixed_16_16_t plane_blocks_per_line)
2d41c0b5 4516{
d4c2aa60 4517 uint32_t wm_intermediate_val;
b95320bd 4518 uint_fixed_16_16_t ret;
2d41c0b5
PB
4519
4520 if (latency == 0)
b95320bd 4521 return FP_16_16_MAX;
2d41c0b5 4522
2d41c0b5 4523 wm_intermediate_val = latency * pixel_rate;
b95320bd
MK
4524 wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4525 pipe_htotal * 1000);
eac2cb81 4526 ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
2d41c0b5
PB
4527 return ret;
4528}
4529
d555cb58 4530static uint_fixed_16_16_t
b048a00b 4531intel_get_linetime_us(const struct intel_crtc_state *cstate)
d555cb58
KM
4532{
4533 uint32_t pixel_rate;
4534 uint32_t crtc_htotal;
4535 uint_fixed_16_16_t linetime_us;
4536
4537 if (!cstate->base.active)
eac2cb81 4538 return u32_to_fixed16(0);
d555cb58
KM
4539
4540 pixel_rate = cstate->pixel_rate;
4541
4542 if (WARN_ON(pixel_rate == 0))
eac2cb81 4543 return u32_to_fixed16(0);
d555cb58
KM
4544
4545 crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
eac2cb81 4546 linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
d555cb58
KM
4547
4548 return linetime_us;
4549}
4550
eb2fdcdf
KM
4551static uint32_t
4552skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4553 const struct intel_plane_state *pstate)
9c2f7a9d
KM
4554{
4555 uint64_t adjusted_pixel_rate;
7084b50b 4556 uint_fixed_16_16_t downscale_amount;
9c2f7a9d
KM
4557
4558 /* Shouldn't reach here on disabled planes... */
93aa2a1c 4559 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
9c2f7a9d
KM
4560 return 0;
4561
4562 /*
4563 * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4564 * with additional adjustments for plane-specific scaling.
4565 */
a7d1b3f4 4566 adjusted_pixel_rate = cstate->pixel_rate;
93aa2a1c 4567 downscale_amount = skl_plane_downscale_amount(cstate, pstate);
9c2f7a9d 4568
7084b50b
KM
4569 return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4570 downscale_amount);
9c2f7a9d
KM
4571}
4572
7e452fdb 4573static int
51de9c6d 4574skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
7e452fdb 4575 const struct intel_plane_state *intel_pstate,
45bee430 4576 struct skl_wm_params *wp, int color_plane)
2d41c0b5 4577{
93aa2a1c 4578 struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
51de9c6d 4579 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
eb2fdcdf
KM
4580 const struct drm_plane_state *pstate = &intel_pstate->base;
4581 const struct drm_framebuffer *fb = pstate->fb;
b95320bd 4582 uint32_t interm_pbpl;
ee3d532f
PZ
4583 struct intel_atomic_state *state =
4584 to_intel_atomic_state(cstate->base.state);
4585 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
2d41c0b5 4586
942aa2d0 4587 /* only NV12 format has two planes */
45bee430 4588 if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) {
942aa2d0
MK
4589 DRM_DEBUG_KMS("Non NV12 format have single plane\n");
4590 return -EINVAL;
4591 }
4592
7e452fdb
KM
4593 wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
4594 fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
4595 fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4596 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4597 wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
4598 wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4599 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
942aa2d0 4600 wp->is_planar = fb->format->format == DRM_FORMAT_NV12;
ee3d532f 4601
93aa2a1c 4602 if (plane->id == PLANE_CURSOR) {
7e452fdb 4603 wp->width = intel_pstate->base.crtc_w;
93aa2a1c 4604 } else {
fce5adf5
VS
4605 /*
4606 * Src coordinates are already rotated by 270 degrees for
4607 * the 90/270 degree plane rotation cases (to match the
4608 * GTT mapping), hence no need to account for rotation here.
4609 */
7e452fdb 4610 wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
93aa2a1c 4611 }
a280f7dd 4612
45bee430 4613 if (color_plane == 1 && wp->is_planar)
942aa2d0
MK
4614 wp->width /= 2;
4615
45bee430 4616 wp->cpp = fb->format->cpp[color_plane];
7e452fdb
KM
4617 wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
4618 intel_pstate);
9c2f7a9d 4619
df8ee190
MK
4620 if (INTEL_GEN(dev_priv) >= 11 &&
4621 fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
4622 wp->dbuf_block_size = 256;
4623 else
4624 wp->dbuf_block_size = 512;
4625
61d0a04d 4626 if (drm_rotation_90_or_270(pstate->rotation)) {
1186fa85 4627
7e452fdb 4628 switch (wp->cpp) {
1186fa85 4629 case 1:
7e452fdb 4630 wp->y_min_scanlines = 16;
1186fa85
PZ
4631 break;
4632 case 2:
7e452fdb 4633 wp->y_min_scanlines = 8;
1186fa85 4634 break;
1186fa85 4635 case 4:
7e452fdb 4636 wp->y_min_scanlines = 4;
1186fa85 4637 break;
86a462bc 4638 default:
7e452fdb 4639 MISSING_CASE(wp->cpp);
86a462bc 4640 return -EINVAL;
1186fa85
PZ
4641 }
4642 } else {
7e452fdb 4643 wp->y_min_scanlines = 4;
1186fa85
PZ
4644 }
4645
2ef32dee 4646 if (apply_memory_bw_wa)
7e452fdb 4647 wp->y_min_scanlines *= 2;
2ef32dee 4648
7e452fdb
KM
4649 wp->plane_bytes_per_line = wp->width * wp->cpp;
4650 if (wp->y_tiled) {
4651 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
df8ee190
MK
4652 wp->y_min_scanlines,
4653 wp->dbuf_block_size);
6c64dd37
PZ
4654
4655 if (INTEL_GEN(dev_priv) >= 10)
4656 interm_pbpl++;
4657
7e452fdb
KM
4658 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4659 wp->y_min_scanlines);
cf819eff 4660 } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) {
df8ee190
MK
4661 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4662 wp->dbuf_block_size);
7e452fdb 4663 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
ef8a4fb4 4664 } else {
df8ee190
MK
4665 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4666 wp->dbuf_block_size) + 1;
7e452fdb 4667 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
7a1a8aed
PZ
4668 }
4669
7e452fdb
KM
4670 wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4671 wp->plane_blocks_per_line);
4672 wp->linetime_us = fixed16_to_u32_round_up(
4673 intel_get_linetime_us(cstate));
4674
4675 return 0;
4676}
4677
d8e87498
MR
4678static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
4679 const struct intel_plane_state *intel_pstate,
4680 int level,
4681 const struct skl_wm_params *wp,
4682 const struct skl_wm_level *result_prev,
4683 struct skl_wm_level *result /* out */)
7e452fdb 4684{
51de9c6d
VS
4685 struct drm_i915_private *dev_priv =
4686 to_i915(intel_pstate->base.plane->dev);
7e452fdb
KM
4687 uint32_t latency = dev_priv->wm.skl_latency[level];
4688 uint_fixed_16_16_t method1, method2;
4689 uint_fixed_16_16_t selected_result;
4690 uint32_t res_blocks, res_lines;
4691 struct intel_atomic_state *state =
4692 to_intel_atomic_state(cstate->base.state);
4693 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
ce110ec3 4694
7e452fdb 4695 /* Display WA #1141: kbl,cfl */
d86ba628
KM
4696 if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4697 IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
7e452fdb
KM
4698 dev_priv->ipc_enabled)
4699 latency += 4;
4700
4701 if (apply_memory_bw_wa && wp->x_tiled)
4702 latency += 15;
4703
4704 method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
df8ee190 4705 wp->cpp, latency, wp->dbuf_block_size);
7e452fdb 4706 method2 = skl_wm_method2(wp->plane_pixel_rate,
024c9045 4707 cstate->base.adjusted_mode.crtc_htotal,
1186fa85 4708 latency,
7e452fdb 4709 wp->plane_blocks_per_line);
75676ed4 4710
7e452fdb
KM
4711 if (wp->y_tiled) {
4712 selected_result = max_fixed16(method2, wp->y_tile_minimum);
0fda6568 4713 } else {
7e452fdb 4714 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
df8ee190 4715 wp->dbuf_block_size < 1) &&
077b5820 4716 (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) {
f1db3eaf 4717 selected_result = method2;
077b5820 4718 } else if (latency >= wp->linetime_us) {
cf819eff 4719 if (IS_GEN(dev_priv, 9) &&
077b5820
PZ
4720 !IS_GEMINILAKE(dev_priv))
4721 selected_result = min_fixed16(method1, method2);
4722 else
4723 selected_result = method2;
4724 } else {
0fda6568 4725 selected_result = method1;
077b5820 4726 }
0fda6568 4727 }
2d41c0b5 4728
eac2cb81 4729 res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
d273ecce 4730 res_lines = div_round_up_fixed16(selected_result,
7e452fdb 4731 wp->plane_blocks_per_line);
e6d66171 4732
a5b79d34
PZ
4733 if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) {
4734 /* Display WA #1125: skl,bxt,kbl */
4735 if (level == 0 && wp->rc_surface)
4736 res_blocks +=
4737 fixed16_to_u32_round_up(wp->y_tile_minimum);
4738
4739 /* Display WA #1126: skl,bxt,kbl */
4740 if (level >= 1 && level <= 7) {
4741 if (wp->y_tiled) {
4742 res_blocks +=
4743 fixed16_to_u32_round_up(wp->y_tile_minimum);
4744 res_lines += wp->y_min_scanlines;
4745 } else {
4746 res_blocks++;
4747 }
8b2b53ce 4748
a5b79d34
PZ
4749 /*
4750 * Make sure result blocks for higher latency levels are
4751 * atleast as high as level below the current level.
4752 * Assumption in DDB algorithm optimization for special
4753 * cases. Also covers Display WA #1125 for RC.
4754 */
4755 if (result_prev->plane_res_b > res_blocks)
4756 res_blocks = result_prev->plane_res_b;
4757 }
0fda6568 4758 }
e6d66171 4759
31dade7d 4760 /* The number of lines are ignored for the level 0 watermark. */
d8e87498
MR
4761 if (level > 0 && res_lines > 31)
4762 return;
4763
4764 /*
4765 * If res_lines is valid, assume we can use this watermark level
4766 * for now. We'll come back and disable it after we calculate the
4767 * DDB allocation if it turns out we don't actually have enough
4768 * blocks to satisfy it.
4769 */
62027b77
MK
4770 result->plane_res_b = res_blocks;
4771 result->plane_res_l = res_lines;
4772 result->plane_en = true;
2d41c0b5
PB
4773}
4774
d8e87498 4775static void
51de9c6d 4776skl_compute_wm_levels(const struct intel_crtc_state *cstate,
d2f5e36d 4777 const struct intel_plane_state *intel_pstate,
7e452fdb 4778 const struct skl_wm_params *wm_params,
b048a00b 4779 struct skl_wm_level *levels)
2d41c0b5 4780{
51de9c6d
VS
4781 struct drm_i915_private *dev_priv =
4782 to_i915(intel_pstate->base.plane->dev);
d2f5e36d 4783 int level, max_level = ilk_wm_max_level(dev_priv);
b048a00b 4784 struct skl_wm_level *result_prev = &levels[0];
a62163e9 4785
d2f5e36d 4786 for (level = 0; level <= max_level; level++) {
b048a00b 4787 struct skl_wm_level *result = &levels[level];
d2f5e36d 4788
d8e87498
MR
4789 skl_compute_plane_wm(cstate, intel_pstate, level, wm_params,
4790 result_prev, result);
b048a00b
ML
4791
4792 result_prev = result;
d2f5e36d 4793 }
2d41c0b5
PB
4794}
4795
407b50f3 4796static uint32_t
b048a00b 4797skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
407b50f3 4798{
a3a8986c
MK
4799 struct drm_atomic_state *state = cstate->base.state;
4800 struct drm_i915_private *dev_priv = to_i915(state->dev);
d555cb58 4801 uint_fixed_16_16_t linetime_us;
a3a8986c 4802 uint32_t linetime_wm;
30d1b5fe 4803
d555cb58 4804 linetime_us = intel_get_linetime_us(cstate);
407b50f3 4805
d555cb58 4806 if (is_fixed16_zero(linetime_us))
661abfc0 4807 return 0;
407b50f3 4808
eac2cb81 4809 linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
a3a8986c 4810
446e850c
KM
4811 /* Display WA #1135: bxt:ALL GLK:ALL */
4812 if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) &&
4813 dev_priv->ipc_enabled)
4814 linetime_wm /= 2;
a3a8986c
MK
4815
4816 return linetime_wm;
407b50f3
DL
4817}
4818
b048a00b 4819static void skl_compute_transition_wm(const struct intel_crtc_state *cstate,
6a3c910b 4820 const struct skl_wm_params *wp,
d8e87498 4821 struct skl_plane_wm *wm)
407b50f3 4822{
ca47667f
KM
4823 struct drm_device *dev = cstate->base.crtc->dev;
4824 const struct drm_i915_private *dev_priv = to_i915(dev);
4825 uint16_t trans_min, trans_y_tile_min;
4826 const uint16_t trans_amount = 10; /* This is configurable amount */
cbacc79d 4827 uint16_t wm0_sel_res_b, trans_offset_b, res_blocks;
ca47667f 4828
ca47667f
KM
4829 /* Transition WM are not recommended by HW team for GEN9 */
4830 if (INTEL_GEN(dev_priv) <= 9)
14a43062 4831 return;
ca47667f
KM
4832
4833 /* Transition WM don't make any sense if ipc is disabled */
4834 if (!dev_priv->ipc_enabled)
14a43062 4835 return;
ca47667f 4836
91961a85
PZ
4837 trans_min = 14;
4838 if (INTEL_GEN(dev_priv) >= 11)
ca47667f
KM
4839 trans_min = 4;
4840
4841 trans_offset_b = trans_min + trans_amount;
4842
cbacc79d
PZ
4843 /*
4844 * The spec asks for Selected Result Blocks for wm0 (the real value),
4845 * not Result Blocks (the integer value). Pay attention to the capital
4846 * letters. The value wm_l0->plane_res_b is actually Result Blocks, but
4847 * since Result Blocks is the ceiling of Selected Result Blocks plus 1,
4848 * and since we later will have to get the ceiling of the sum in the
4849 * transition watermarks calculation, we can just pretend Selected
4850 * Result Blocks is Result Blocks minus 1 and it should work for the
4851 * current platforms.
4852 */
6a3c910b 4853 wm0_sel_res_b = wm->wm[0].plane_res_b - 1;
cbacc79d 4854
ca47667f
KM
4855 if (wp->y_tiled) {
4856 trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
4857 wp->y_tile_minimum);
cbacc79d 4858 res_blocks = max(wm0_sel_res_b, trans_y_tile_min) +
ca47667f
KM
4859 trans_offset_b;
4860 } else {
cbacc79d 4861 res_blocks = wm0_sel_res_b + trans_offset_b;
ca47667f
KM
4862
4863 /* WA BUG:1938466 add one block for non y-tile planes */
4864 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4865 res_blocks += 1;
4866
4867 }
4868
d8e87498
MR
4869 /*
4870 * Just assume we can enable the transition watermark. After
4871 * computing the DDB we'll come back and disable it if that
4872 * assumption turns out to be false.
4873 */
4874 wm->trans_wm.plane_res_b = res_blocks + 1;
4875 wm->trans_wm.plane_en = true;
407b50f3
DL
4876}
4877
ff43bc37 4878static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
8315847b
VS
4879 const struct intel_plane_state *plane_state,
4880 enum plane_id plane_id, int color_plane)
b048a00b 4881{
8315847b 4882 struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
b048a00b 4883 struct skl_wm_params wm_params;
b048a00b
ML
4884 int ret;
4885
51de9c6d 4886 ret = skl_compute_plane_wm_params(crtc_state, plane_state,
b048a00b
ML
4887 &wm_params, color_plane);
4888 if (ret)
4889 return ret;
4890
d8e87498
MR
4891 skl_compute_wm_levels(crtc_state, plane_state, &wm_params, wm->wm);
4892 skl_compute_transition_wm(crtc_state, &wm_params, wm);
b048a00b
ML
4893
4894 return 0;
4895}
4896
ff43bc37 4897static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
8315847b
VS
4898 const struct intel_plane_state *plane_state,
4899 enum plane_id plane_id)
b048a00b 4900{
8315847b 4901 struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
b048a00b 4902 struct skl_wm_params wm_params;
b048a00b
ML
4903 int ret;
4904
8315847b 4905 wm->is_planar = true;
b048a00b
ML
4906
4907 /* uv plane watermarks must also be validated for NV12/Planar */
51de9c6d 4908 ret = skl_compute_plane_wm_params(crtc_state, plane_state,
8315847b
VS
4909 &wm_params, 1);
4910 if (ret)
4911 return ret;
b048a00b 4912
d8e87498 4913 skl_compute_wm_levels(crtc_state, plane_state, &wm_params, wm->uv_wm);
b048a00b 4914
8315847b 4915 return 0;
b048a00b
ML
4916}
4917
ff43bc37 4918static int skl_build_plane_wm(struct skl_pipe_wm *pipe_wm,
8315847b
VS
4919 struct intel_crtc_state *crtc_state,
4920 const struct intel_plane_state *plane_state)
b048a00b 4921{
8315847b
VS
4922 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
4923 const struct drm_framebuffer *fb = plane_state->base.fb;
4924 enum plane_id plane_id = plane->id;
b048a00b 4925 int ret;
b048a00b 4926
8315847b
VS
4927 if (!intel_wm_plane_visible(crtc_state, plane_state))
4928 return 0;
4929
ff43bc37 4930 ret = skl_build_plane_wm_single(crtc_state, plane_state,
8315847b 4931 plane_id, 0);
b048a00b
ML
4932 if (ret)
4933 return ret;
4934
8315847b 4935 if (fb->format->is_yuv && fb->format->num_planes > 1) {
ff43bc37 4936 ret = skl_build_plane_wm_uv(crtc_state, plane_state,
8315847b
VS
4937 plane_id);
4938 if (ret)
4939 return ret;
4940 }
4941
4942 return 0;
4943}
4944
ff43bc37 4945static int icl_build_plane_wm(struct skl_pipe_wm *pipe_wm,
8315847b
VS
4946 struct intel_crtc_state *crtc_state,
4947 const struct intel_plane_state *plane_state)
4948{
4949 enum plane_id plane_id = to_intel_plane(plane_state->base.plane)->id;
4950 int ret;
4951
4952 /* Watermarks calculated in master */
4953 if (plane_state->slave)
4954 return 0;
4955
4956 if (plane_state->linked_plane) {
4957 const struct drm_framebuffer *fb = plane_state->base.fb;
4958 enum plane_id y_plane_id = plane_state->linked_plane->id;
4959
4960 WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state));
4961 WARN_ON(!fb->format->is_yuv ||
4962 fb->format->num_planes == 1);
4963
ff43bc37 4964 ret = skl_build_plane_wm_single(crtc_state, plane_state,
8315847b
VS
4965 y_plane_id, 0);
4966 if (ret)
4967 return ret;
4968
ff43bc37 4969 ret = skl_build_plane_wm_single(crtc_state, plane_state,
8315847b
VS
4970 plane_id, 1);
4971 if (ret)
4972 return ret;
4973 } else if (intel_wm_plane_visible(crtc_state, plane_state)) {
ff43bc37 4974 ret = skl_build_plane_wm_single(crtc_state, plane_state,
8315847b
VS
4975 plane_id, 0);
4976 if (ret)
4977 return ret;
4978 }
4979
4980 return 0;
b048a00b
ML
4981}
4982
55994c2c 4983static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
55994c2c 4984 struct skl_pipe_wm *pipe_wm)
2d41c0b5 4985{
8315847b 4986 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
eb2fdcdf 4987 struct drm_crtc_state *crtc_state = &cstate->base;
eb2fdcdf
KM
4988 struct drm_plane *plane;
4989 const struct drm_plane_state *pstate;
55994c2c 4990 int ret;
2d41c0b5 4991
a62163e9
L
4992 /*
4993 * We'll only calculate watermarks for planes that are actually
4994 * enabled, so make sure all other planes are set as disabled.
4995 */
4996 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
4997
eb2fdcdf
KM
4998 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4999 const struct intel_plane_state *intel_pstate =
5000 to_intel_plane_state(pstate);
eb2fdcdf 5001
8315847b 5002 if (INTEL_GEN(dev_priv) >= 11)
ff43bc37 5003 ret = icl_build_plane_wm(pipe_wm,
8315847b 5004 cstate, intel_pstate);
b048a00b 5005 else
ff43bc37 5006 ret = skl_build_plane_wm(pipe_wm,
8315847b 5007 cstate, intel_pstate);
d2f5e36d
KM
5008 if (ret)
5009 return ret;
2d41c0b5 5010 }
942aa2d0 5011
024c9045 5012 pipe_wm->linetime = skl_compute_linetime_wm(cstate);
2d41c0b5 5013
55994c2c 5014 return 0;
2d41c0b5
PB
5015}
5016
f0f59a00
VS
5017static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
5018 i915_reg_t reg,
16160e3d
DL
5019 const struct skl_ddb_entry *entry)
5020{
5021 if (entry->end)
ff43bc37 5022 I915_WRITE_FW(reg, (entry->end - 1) << 16 | entry->start);
16160e3d 5023 else
ff43bc37 5024 I915_WRITE_FW(reg, 0);
16160e3d
DL
5025}
5026
d8c0fafc 5027static void skl_write_wm_level(struct drm_i915_private *dev_priv,
5028 i915_reg_t reg,
5029 const struct skl_wm_level *level)
5030{
5031 uint32_t val = 0;
5032
5033 if (level->plane_en) {
5034 val |= PLANE_WM_EN;
5035 val |= level->plane_res_b;
5036 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
5037 }
5038
ff43bc37 5039 I915_WRITE_FW(reg, val);
d8c0fafc 5040}
5041
ff43bc37
VS
5042void skl_write_plane_wm(struct intel_plane *plane,
5043 const struct intel_crtc_state *crtc_state)
62e0fb88 5044{
ff43bc37 5045 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5db94019 5046 int level, max_level = ilk_wm_max_level(dev_priv);
ff43bc37
VS
5047 enum plane_id plane_id = plane->id;
5048 enum pipe pipe = plane->pipe;
5049 const struct skl_plane_wm *wm =
5050 &crtc_state->wm.skl.optimal.planes[plane_id];
5051 const struct skl_ddb_entry *ddb_y =
5052 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5053 const struct skl_ddb_entry *ddb_uv =
5054 &crtc_state->wm.skl.plane_ddb_uv[plane_id];
62e0fb88
L
5055
5056 for (level = 0; level <= max_level; level++) {
d5cdfdf5 5057 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
d8c0fafc 5058 &wm->wm[level]);
62e0fb88 5059 }
d5cdfdf5 5060 skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
d8c0fafc 5061 &wm->trans_wm);
27082493 5062
ff43bc37 5063 if (INTEL_GEN(dev_priv) >= 11) {
234059da 5064 skl_ddb_entry_write(dev_priv,
ff43bc37
VS
5065 PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5066 return;
b879d58f 5067 }
ff43bc37
VS
5068
5069 if (wm->is_planar)
5070 swap(ddb_y, ddb_uv);
5071
5072 skl_ddb_entry_write(dev_priv,
5073 PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5074 skl_ddb_entry_write(dev_priv,
5075 PLANE_NV12_BUF_CFG(pipe, plane_id), ddb_uv);
62e0fb88
L
5076}
5077
ff43bc37
VS
5078void skl_write_cursor_wm(struct intel_plane *plane,
5079 const struct intel_crtc_state *crtc_state)
62e0fb88 5080{
ff43bc37 5081 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5db94019 5082 int level, max_level = ilk_wm_max_level(dev_priv);
ff43bc37
VS
5083 enum plane_id plane_id = plane->id;
5084 enum pipe pipe = plane->pipe;
5085 const struct skl_plane_wm *wm =
5086 &crtc_state->wm.skl.optimal.planes[plane_id];
5087 const struct skl_ddb_entry *ddb =
5088 &crtc_state->wm.skl.plane_ddb_y[plane_id];
62e0fb88
L
5089
5090 for (level = 0; level <= max_level; level++) {
d8c0fafc 5091 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5092 &wm->wm[level]);
62e0fb88 5093 }
d8c0fafc 5094 skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5d374d96 5095
ff43bc37 5096 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), ddb);
2d41c0b5
PB
5097}
5098
45ece230 5099bool skl_wm_level_equals(const struct skl_wm_level *l1,
5100 const struct skl_wm_level *l2)
5101{
ff43bc37
VS
5102 return l1->plane_en == l2->plane_en &&
5103 l1->plane_res_l == l2->plane_res_l &&
5104 l1->plane_res_b == l2->plane_res_b;
5105}
45ece230 5106
ff43bc37
VS
5107static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
5108 const struct skl_plane_wm *wm1,
5109 const struct skl_plane_wm *wm2)
5110{
5111 int level, max_level = ilk_wm_max_level(dev_priv);
45ece230 5112
ff43bc37
VS
5113 for (level = 0; level <= max_level; level++) {
5114 if (!skl_wm_level_equals(&wm1->wm[level], &wm2->wm[level]) ||
5115 !skl_wm_level_equals(&wm1->uv_wm[level], &wm2->uv_wm[level]))
5116 return false;
5117 }
5118
5119 return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
45ece230 5120}
5121
27082493
L
5122static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5123 const struct skl_ddb_entry *b)
0e8fb7ba 5124{
27082493 5125 return a->start < b->end && b->start < a->end;
0e8fb7ba
DL
5126}
5127
53cc6880
VS
5128bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
5129 const struct skl_ddb_entry entries[],
5130 int num_entries, int ignore_idx)
0e8fb7ba 5131{
53cc6880 5132 int i;
0e8fb7ba 5133
53cc6880
VS
5134 for (i = 0; i < num_entries; i++) {
5135 if (i != ignore_idx &&
5136 skl_ddb_entries_overlap(ddb, &entries[i]))
27082493 5137 return true;
2b68504b 5138 }
0e8fb7ba 5139
27082493 5140 return false;
0e8fb7ba
DL
5141}
5142
cd1d3ee9 5143static int skl_update_pipe_wm(struct intel_crtc_state *cstate,
03af79e0 5144 const struct skl_pipe_wm *old_pipe_wm,
55994c2c
MR
5145 struct skl_pipe_wm *pipe_wm, /* out */
5146 bool *changed /* out */)
2d41c0b5 5147{
55994c2c 5148 int ret;
2d41c0b5 5149
cd1d3ee9 5150 ret = skl_build_pipe_wm(cstate, pipe_wm);
55994c2c
MR
5151 if (ret)
5152 return ret;
2d41c0b5 5153
03af79e0 5154 if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
55994c2c
MR
5155 *changed = false;
5156 else
5157 *changed = true;
2d41c0b5 5158
55994c2c 5159 return 0;
2d41c0b5
PB
5160}
5161
9b613022 5162static uint32_t
cd1d3ee9 5163pipes_modified(struct intel_atomic_state *state)
9b613022 5164{
cd1d3ee9
MR
5165 struct intel_crtc *crtc;
5166 struct intel_crtc_state *cstate;
9b613022
MR
5167 uint32_t i, ret = 0;
5168
cd1d3ee9
MR
5169 for_each_new_intel_crtc_in_state(state, crtc, cstate, i)
5170 ret |= drm_crtc_mask(&crtc->base);
9b613022
MR
5171
5172 return ret;
5173}
5174
bb7791bd 5175static int
ff43bc37
VS
5176skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
5177 struct intel_crtc_state *new_crtc_state)
9a30a261 5178{
ff43bc37
VS
5179 struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->base.state);
5180 struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
5181 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5182 struct intel_plane *plane;
9a30a261 5183
ff43bc37
VS
5184 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5185 struct intel_plane_state *plane_state;
5186 enum plane_id plane_id = plane->id;
9a30a261 5187
ff43bc37
VS
5188 if (skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_y[plane_id],
5189 &new_crtc_state->wm.skl.plane_ddb_y[plane_id]) &&
5190 skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_uv[plane_id],
5191 &new_crtc_state->wm.skl.plane_ddb_uv[plane_id]))
9a30a261
RV
5192 continue;
5193
ff43bc37 5194 plane_state = intel_atomic_get_plane_state(state, plane);
9a30a261
RV
5195 if (IS_ERR(plane_state))
5196 return PTR_ERR(plane_state);
1ab554b0 5197
ff43bc37 5198 new_crtc_state->update_planes |= BIT(plane_id);
9a30a261
RV
5199 }
5200
5201 return 0;
5202}
5203
5204static int
cd1d3ee9 5205skl_compute_ddb(struct intel_atomic_state *state)
98d39494 5206{
cd1d3ee9
MR
5207 const struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5208 struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
ff43bc37
VS
5209 struct intel_crtc_state *old_crtc_state;
5210 struct intel_crtc_state *new_crtc_state;
e1f96a66 5211 struct intel_crtc *crtc;
e1f96a66 5212 int ret, i;
98d39494 5213
5a920b85
PZ
5214 memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5215
cd1d3ee9 5216 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
ff43bc37
VS
5217 new_crtc_state, i) {
5218 ret = skl_allocate_pipe_ddb(new_crtc_state, ddb);
9a30a261
RV
5219 if (ret)
5220 return ret;
5221
ff43bc37
VS
5222 ret = skl_ddb_add_affected_planes(old_crtc_state,
5223 new_crtc_state);
9a30a261
RV
5224 if (ret)
5225 return ret;
98d39494
MR
5226 }
5227
5228 return 0;
5229}
5230
413fc530 5231static void
ff43bc37 5232skl_print_wm_changes(struct intel_atomic_state *state)
413fc530 5233{
ff43bc37
VS
5234 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5235 const struct intel_crtc_state *old_crtc_state;
5236 const struct intel_crtc_state *new_crtc_state;
5237 struct intel_plane *plane;
5238 struct intel_crtc *crtc;
7570498e 5239 int i;
413fc530 5240
ff43bc37
VS
5241 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5242 new_crtc_state, i) {
5243 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5244 enum plane_id plane_id = plane->id;
413fc530 5245 const struct skl_ddb_entry *old, *new;
5246
ff43bc37
VS
5247 old = &old_crtc_state->wm.skl.plane_ddb_y[plane_id];
5248 new = &new_crtc_state->wm.skl.plane_ddb_y[plane_id];
413fc530 5249
413fc530 5250 if (skl_ddb_entry_equal(old, new))
5251 continue;
5252
b9117149 5253 DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
ff43bc37 5254 plane->base.base.id, plane->base.name,
b9117149
PZ
5255 old->start, old->end,
5256 new->start, new->end);
413fc530 5257 }
5258 }
5259}
5260
98d39494 5261static int
cd1d3ee9 5262skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
98d39494 5263{
cd1d3ee9 5264 struct drm_device *dev = state->base.dev;
e1f96a66 5265 const struct drm_i915_private *dev_priv = to_i915(dev);
cd1d3ee9
MR
5266 struct intel_crtc *crtc;
5267 struct intel_crtc_state *crtc_state;
e1f96a66 5268 uint32_t realloc_pipes = pipes_modified(state);
734fa01f 5269 int ret, i;
98d39494 5270
367d73d2
ML
5271 /*
5272 * When we distrust bios wm we always need to recompute to set the
5273 * expected DDB allocations for each CRTC.
5274 */
e1f96a66
MK
5275 if (dev_priv->wm.distrust_bios_wm)
5276 (*changed) = true;
367d73d2 5277
98d39494
MR
5278 /*
5279 * If this transaction isn't actually touching any CRTC's, don't
5280 * bother with watermark calculation. Note that if we pass this
5281 * test, we're guaranteed to hold at least one CRTC state mutex,
5282 * which means we can safely use values like dev_priv->active_crtcs
5283 * since any racing commits that want to update them would need to
5284 * hold _all_ CRTC state mutexes.
5285 */
cd1d3ee9 5286 for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
e1f96a66 5287 (*changed) = true;
367d73d2 5288
e1f96a66 5289 if (!*changed)
98d39494
MR
5290 return 0;
5291
e1f96a66
MK
5292 /*
5293 * If this is our first atomic update following hardware readout,
5294 * we can't trust the DDB that the BIOS programmed for us. Let's
5295 * pretend that all pipes switched active status so that we'll
5296 * ensure a full DDB recompute.
5297 */
5298 if (dev_priv->wm.distrust_bios_wm) {
5299 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
cd1d3ee9 5300 state->base.acquire_ctx);
e1f96a66
MK
5301 if (ret)
5302 return ret;
5303
cd1d3ee9 5304 state->active_pipe_changes = ~0;
e1f96a66
MK
5305
5306 /*
cd1d3ee9 5307 * We usually only initialize state->active_crtcs if we
e1f96a66
MK
5308 * we're doing a modeset; make sure this field is always
5309 * initialized during the sanitization process that happens
5310 * on the first commit too.
5311 */
cd1d3ee9
MR
5312 if (!state->modeset)
5313 state->active_crtcs = dev_priv->active_crtcs;
e1f96a66
MK
5314 }
5315
5316 /*
5317 * If the modeset changes which CRTC's are active, we need to
5318 * recompute the DDB allocation for *all* active pipes, even
5319 * those that weren't otherwise being modified in any way by this
5320 * atomic commit. Due to the shrinking of the per-pipe allocations
5321 * when new active CRTC's are added, it's possible for a pipe that
5322 * we were already using and aren't changing at all here to suddenly
5323 * become invalid if its DDB needs exceeds its new allocation.
5324 *
5325 * Note that if we wind up doing a full DDB recompute, we can't let
5326 * any other display updates race with this transaction, so we need
5327 * to grab the lock on *all* CRTC's.
5328 */
cd1d3ee9 5329 if (state->active_pipe_changes || state->modeset) {
e1f96a66 5330 realloc_pipes = ~0;
cd1d3ee9 5331 state->wm_results.dirty_pipes = ~0;
e1f96a66
MK
5332 }
5333
5334 /*
5335 * We're not recomputing for the pipes not included in the commit, so
5336 * make sure we start with the current state.
5337 */
cd1d3ee9
MR
5338 for_each_intel_crtc_mask(dev, crtc, realloc_pipes) {
5339 crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
5340 if (IS_ERR(crtc_state))
5341 return PTR_ERR(crtc_state);
e1f96a66
MK
5342 }
5343
5344 return 0;
5345}
5346
ff43bc37
VS
5347/*
5348 * To make sure the cursor watermark registers are always consistent
5349 * with our computed state the following scenario needs special
5350 * treatment:
5351 *
5352 * 1. enable cursor
5353 * 2. move cursor entirely offscreen
5354 * 3. disable cursor
5355 *
5356 * Step 2. does call .disable_plane() but does not zero the watermarks
5357 * (since we consider an offscreen cursor still active for the purposes
5358 * of watermarks). Step 3. would not normally call .disable_plane()
5359 * because the actual plane visibility isn't changing, and we don't
5360 * deallocate the cursor ddb until the pipe gets disabled. So we must
5361 * force step 3. to call .disable_plane() to update the watermark
5362 * registers properly.
5363 *
5364 * Other planes do not suffer from this issues as their watermarks are
5365 * calculated based on the actual plane visibility. The only time this
5366 * can trigger for the other planes is during the initial readout as the
5367 * default value of the watermarks registers is not zero.
5368 */
5369static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
5370 struct intel_crtc *crtc)
5371{
5372 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5373 const struct intel_crtc_state *old_crtc_state =
5374 intel_atomic_get_old_crtc_state(state, crtc);
5375 struct intel_crtc_state *new_crtc_state =
5376 intel_atomic_get_new_crtc_state(state, crtc);
5377 struct intel_plane *plane;
5378
5379 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5380 struct intel_plane_state *plane_state;
5381 enum plane_id plane_id = plane->id;
5382
5383 /*
5384 * Force a full wm update for every plane on modeset.
5385 * Required because the reset value of the wm registers
5386 * is non-zero, whereas we want all disabled planes to
5387 * have zero watermarks. So if we turn off the relevant
5388 * power well the hardware state will go out of sync
5389 * with the software state.
5390 */
5391 if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->base) &&
5392 skl_plane_wm_equals(dev_priv,
5393 &old_crtc_state->wm.skl.optimal.planes[plane_id],
5394 &new_crtc_state->wm.skl.optimal.planes[plane_id]))
5395 continue;
5396
5397 plane_state = intel_atomic_get_plane_state(state, plane);
5398 if (IS_ERR(plane_state))
5399 return PTR_ERR(plane_state);
5400
5401 new_crtc_state->update_planes |= BIT(plane_id);
5402 }
5403
5404 return 0;
5405}
5406
e1f96a66 5407static int
cd1d3ee9 5408skl_compute_wm(struct intel_atomic_state *state)
e1f96a66 5409{
cd1d3ee9
MR
5410 struct intel_crtc *crtc;
5411 struct intel_crtc_state *cstate;
5412 struct intel_crtc_state *old_crtc_state;
5413 struct skl_ddb_values *results = &state->wm_results;
e1f96a66
MK
5414 struct skl_pipe_wm *pipe_wm;
5415 bool changed = false;
5416 int ret, i;
5417
734fa01f
MR
5418 /* Clear all dirty flags */
5419 results->dirty_pipes = 0;
5420
e1f96a66
MK
5421 ret = skl_ddb_add_affected_pipes(state, &changed);
5422 if (ret || !changed)
5423 return ret;
5424
734fa01f
MR
5425 /*
5426 * Calculate WM's for all pipes that are part of this transaction.
d8e87498 5427 * Note that skl_ddb_add_affected_pipes may have added more CRTC's that
734fa01f
MR
5428 * weren't otherwise being modified (and set bits in dirty_pipes) if
5429 * pipe allocations had to change.
734fa01f 5430 */
cd1d3ee9
MR
5431 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5432 cstate, i) {
03af79e0 5433 const struct skl_pipe_wm *old_pipe_wm =
cd1d3ee9 5434 &old_crtc_state->wm.skl.optimal;
734fa01f 5435
cd1d3ee9 5436 pipe_wm = &cstate->wm.skl.optimal;
ff43bc37
VS
5437 ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm, &changed);
5438 if (ret)
5439 return ret;
5440
cd1d3ee9 5441 ret = skl_wm_add_affected_planes(state, crtc);
734fa01f
MR
5442 if (ret)
5443 return ret;
5444
5445 if (changed)
cd1d3ee9 5446 results->dirty_pipes |= drm_crtc_mask(&crtc->base);
734fa01f
MR
5447 }
5448
d8e87498
MR
5449 ret = skl_compute_ddb(state);
5450 if (ret)
5451 return ret;
5452
cd1d3ee9 5453 skl_print_wm_changes(state);
413fc530 5454
98d39494
MR
5455 return 0;
5456}
5457
ccf010fb
ML
5458static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5459 struct intel_crtc_state *cstate)
5460{
5461 struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5462 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5463 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5464 enum pipe pipe = crtc->pipe;
e62929b3
ML
5465
5466 if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5467 return;
ccf010fb
ML
5468
5469 I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5470}
5471
e62929b3
ML
5472static void skl_initial_wm(struct intel_atomic_state *state,
5473 struct intel_crtc_state *cstate)
2d41c0b5 5474{
e62929b3 5475 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
432081bc 5476 struct drm_device *dev = intel_crtc->base.dev;
fac5e23e 5477 struct drm_i915_private *dev_priv = to_i915(dev);
60f8e873 5478 struct skl_ddb_values *results = &state->wm_results;
adda50b8 5479
432081bc 5480 if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
2d41c0b5
PB
5481 return;
5482
734fa01f 5483 mutex_lock(&dev_priv->wm.wm_mutex);
2d41c0b5 5484
e62929b3
ML
5485 if (cstate->base.active_changed)
5486 skl_atomic_update_crtc_wm(state, cstate);
27082493 5487
734fa01f 5488 mutex_unlock(&dev_priv->wm.wm_mutex);
2d41c0b5
PB
5489}
5490
cd1d3ee9 5491static void ilk_compute_wm_config(struct drm_i915_private *dev_priv,
d890565c
VS
5492 struct intel_wm_config *config)
5493{
5494 struct intel_crtc *crtc;
5495
5496 /* Compute the currently _active_ config */
cd1d3ee9 5497 for_each_intel_crtc(&dev_priv->drm, crtc) {
d890565c
VS
5498 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5499
5500 if (!wm->pipe_enabled)
5501 continue;
5502
5503 config->sprites_enabled |= wm->sprites_enabled;
5504 config->sprites_scaled |= wm->sprites_scaled;
5505 config->num_pipes_active++;
5506 }
5507}
5508
ed4a6a7c 5509static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
801bcfff 5510{
b9d5c839 5511 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
820c1980 5512 struct ilk_wm_maximums max;
d890565c 5513 struct intel_wm_config config = {};
820c1980 5514 struct ilk_wm_values results = {};
77c122bc 5515 enum intel_ddb_partitioning partitioning;
261a27d1 5516
cd1d3ee9 5517 ilk_compute_wm_config(dev_priv, &config);
d890565c 5518
cd1d3ee9
MR
5519 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_1_2, &max);
5520 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_1_2);
a485bfb8
VS
5521
5522 /* 5/6 split only in single pipe config on IVB+ */
175fded1 5523 if (INTEL_GEN(dev_priv) >= 7 &&
d890565c 5524 config.num_pipes_active == 1 && config.sprites_enabled) {
cd1d3ee9
MR
5525 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_5_6, &max);
5526 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_5_6);
0362c781 5527
cd1d3ee9 5528 best_lp_wm = ilk_find_best_result(dev_priv, &lp_wm_1_2, &lp_wm_5_6);
861f3389 5529 } else {
198a1e9b 5530 best_lp_wm = &lp_wm_1_2;
861f3389
PZ
5531 }
5532
198a1e9b 5533 partitioning = (best_lp_wm == &lp_wm_1_2) ?
77c122bc 5534 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
801bcfff 5535
cd1d3ee9 5536 ilk_compute_wm_results(dev_priv, best_lp_wm, partitioning, &results);
609cedef 5537
820c1980 5538 ilk_write_wm_values(dev_priv, &results);
1011d8c4
PZ
5539}
5540
ccf010fb
ML
5541static void ilk_initial_watermarks(struct intel_atomic_state *state,
5542 struct intel_crtc_state *cstate)
b9d5c839 5543{
ed4a6a7c
MR
5544 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5545 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
b9d5c839 5546
ed4a6a7c 5547 mutex_lock(&dev_priv->wm.wm_mutex);
e8f1f02e 5548 intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
ed4a6a7c
MR
5549 ilk_program_watermarks(dev_priv);
5550 mutex_unlock(&dev_priv->wm.wm_mutex);
5551}
bf220452 5552
ccf010fb
ML
5553static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5554 struct intel_crtc_state *cstate)
ed4a6a7c
MR
5555{
5556 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5557 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
bf220452 5558
ed4a6a7c
MR
5559 mutex_lock(&dev_priv->wm.wm_mutex);
5560 if (cstate->wm.need_postvbl_update) {
e8f1f02e 5561 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
ed4a6a7c
MR
5562 ilk_program_watermarks(dev_priv);
5563 }
5564 mutex_unlock(&dev_priv->wm.wm_mutex);
b9d5c839
VS
5565}
5566
d8c0fafc 5567static inline void skl_wm_level_from_reg_val(uint32_t val,
5568 struct skl_wm_level *level)
3078999f 5569{
d8c0fafc 5570 level->plane_en = val & PLANE_WM_EN;
5571 level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5572 level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5573 PLANE_WM_LINES_MASK;
3078999f
PB
5574}
5575
cd1d3ee9 5576void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
bf9d99ad 5577 struct skl_pipe_wm *out)
3078999f 5578{
cd1d3ee9
MR
5579 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5580 enum pipe pipe = crtc->pipe;
d5cdfdf5
VS
5581 int level, max_level;
5582 enum plane_id plane_id;
d8c0fafc 5583 uint32_t val;
3078999f 5584
5db94019 5585 max_level = ilk_wm_max_level(dev_priv);
3078999f 5586
cd1d3ee9 5587 for_each_plane_id_on_crtc(crtc, plane_id) {
d5cdfdf5 5588 struct skl_plane_wm *wm = &out->planes[plane_id];
3078999f 5589
d8c0fafc 5590 for (level = 0; level <= max_level; level++) {
d5cdfdf5
VS
5591 if (plane_id != PLANE_CURSOR)
5592 val = I915_READ(PLANE_WM(pipe, plane_id, level));
d8c0fafc 5593 else
5594 val = I915_READ(CUR_WM(pipe, level));
3078999f 5595
d8c0fafc 5596 skl_wm_level_from_reg_val(val, &wm->wm[level]);
3078999f 5597 }
3078999f 5598
d5cdfdf5
VS
5599 if (plane_id != PLANE_CURSOR)
5600 val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
d8c0fafc 5601 else
5602 val = I915_READ(CUR_WM_TRANS(pipe));
5603
5604 skl_wm_level_from_reg_val(val, &wm->trans_wm);
3078999f
PB
5605 }
5606
cd1d3ee9 5607 if (!crtc->active)
d8c0fafc 5608 return;
4e0963c7 5609
bf9d99ad 5610 out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
3078999f
PB
5611}
5612
cd1d3ee9 5613void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
3078999f 5614{
60f8e873 5615 struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
a269c583 5616 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
cd1d3ee9 5617 struct intel_crtc *crtc;
bf9d99ad 5618 struct intel_crtc_state *cstate;
3078999f 5619
a269c583 5620 skl_ddb_get_hw_state(dev_priv, ddb);
cd1d3ee9
MR
5621 for_each_intel_crtc(&dev_priv->drm, crtc) {
5622 cstate = to_intel_crtc_state(crtc->base.state);
bf9d99ad 5623
5624 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5625
cd1d3ee9
MR
5626 if (crtc->active)
5627 hw->dirty_pipes |= drm_crtc_mask(&crtc->base);
bf9d99ad 5628 }
a1de91e5 5629
279e99d7
MR
5630 if (dev_priv->active_crtcs) {
5631 /* Fully recompute DDB on first atomic commit */
5632 dev_priv->wm.distrust_bios_wm = true;
279e99d7 5633 }
3078999f
PB
5634}
5635
cd1d3ee9 5636static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
243e6a44 5637{
cd1d3ee9 5638 struct drm_device *dev = crtc->base.dev;
fac5e23e 5639 struct drm_i915_private *dev_priv = to_i915(dev);
820c1980 5640 struct ilk_wm_values *hw = &dev_priv->wm.hw;
cd1d3ee9 5641 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->base.state);
e8f1f02e 5642 struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
cd1d3ee9 5643 enum pipe pipe = crtc->pipe;
f0f59a00 5644 static const i915_reg_t wm0_pipe_reg[] = {
243e6a44
VS
5645 [PIPE_A] = WM0_PIPEA_ILK,
5646 [PIPE_B] = WM0_PIPEB_ILK,
5647 [PIPE_C] = WM0_PIPEC_IVB,
5648 };
5649
5650 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
8652744b 5651 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
ce0e0713 5652 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
243e6a44 5653
15606534
VS
5654 memset(active, 0, sizeof(*active));
5655
cd1d3ee9 5656 active->pipe_enabled = crtc->active;
2a44b76b
VS
5657
5658 if (active->pipe_enabled) {
243e6a44
VS
5659 u32 tmp = hw->wm_pipe[pipe];
5660
5661 /*
5662 * For active pipes LP0 watermark is marked as
5663 * enabled, and LP1+ watermaks as disabled since
5664 * we can't really reverse compute them in case
5665 * multiple pipes are active.
5666 */
5667 active->wm[0].enable = true;
5668 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5669 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5670 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5671 active->linetime = hw->wm_linetime[pipe];
5672 } else {
5db94019 5673 int level, max_level = ilk_wm_max_level(dev_priv);
243e6a44
VS
5674
5675 /*
5676 * For inactive pipes, all watermark levels
5677 * should be marked as enabled but zeroed,
5678 * which is what we'd compute them to.
5679 */
5680 for (level = 0; level <= max_level; level++)
5681 active->wm[level].enable = true;
5682 }
4e0963c7 5683
cd1d3ee9 5684 crtc->wm.active.ilk = *active;
243e6a44
VS
5685}
5686
6eb1a681
VS
5687#define _FW_WM(value, plane) \
5688 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5689#define _FW_WM_VLV(value, plane) \
5690 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5691
04548cba
VS
5692static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5693 struct g4x_wm_values *wm)
5694{
5695 uint32_t tmp;
5696
5697 tmp = I915_READ(DSPFW1);
5698 wm->sr.plane = _FW_WM(tmp, SR);
5699 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5700 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5701 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5702
5703 tmp = I915_READ(DSPFW2);
5704 wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5705 wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5706 wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5707 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5708 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5709 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5710
5711 tmp = I915_READ(DSPFW3);
5712 wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5713 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5714 wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5715 wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5716}
5717
6eb1a681
VS
5718static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5719 struct vlv_wm_values *wm)
5720{
5721 enum pipe pipe;
5722 uint32_t tmp;
5723
5724 for_each_pipe(dev_priv, pipe) {
5725 tmp = I915_READ(VLV_DDL(pipe));
5726
1b31389c 5727 wm->ddl[pipe].plane[PLANE_PRIMARY] =
6eb1a681 5728 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
1b31389c 5729 wm->ddl[pipe].plane[PLANE_CURSOR] =
6eb1a681 5730 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
1b31389c 5731 wm->ddl[pipe].plane[PLANE_SPRITE0] =
6eb1a681 5732 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
1b31389c 5733 wm->ddl[pipe].plane[PLANE_SPRITE1] =
6eb1a681
VS
5734 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5735 }
5736
5737 tmp = I915_READ(DSPFW1);
5738 wm->sr.plane = _FW_WM(tmp, SR);
1b31389c
VS
5739 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5740 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5741 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
6eb1a681
VS
5742
5743 tmp = I915_READ(DSPFW2);
1b31389c
VS
5744 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5745 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5746 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
6eb1a681
VS
5747
5748 tmp = I915_READ(DSPFW3);
5749 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5750
5751 if (IS_CHERRYVIEW(dev_priv)) {
5752 tmp = I915_READ(DSPFW7_CHV);
1b31389c
VS
5753 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5754 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
6eb1a681
VS
5755
5756 tmp = I915_READ(DSPFW8_CHV);
1b31389c
VS
5757 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5758 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
6eb1a681
VS
5759
5760 tmp = I915_READ(DSPFW9_CHV);
1b31389c
VS
5761 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5762 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
6eb1a681
VS
5763
5764 tmp = I915_READ(DSPHOWM);
5765 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
1b31389c
VS
5766 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5767 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5768 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5769 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5770 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5771 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5772 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5773 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5774 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
6eb1a681
VS
5775 } else {
5776 tmp = I915_READ(DSPFW7);
1b31389c
VS
5777 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5778 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
6eb1a681
VS
5779
5780 tmp = I915_READ(DSPHOWM);
5781 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
1b31389c
VS
5782 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5783 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5784 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5785 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5786 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5787 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
6eb1a681
VS
5788 }
5789}
5790
5791#undef _FW_WM
5792#undef _FW_WM_VLV
5793
cd1d3ee9 5794void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv)
04548cba 5795{
04548cba
VS
5796 struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5797 struct intel_crtc *crtc;
5798
5799 g4x_read_wm_values(dev_priv, wm);
5800
5801 wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5802
cd1d3ee9 5803 for_each_intel_crtc(&dev_priv->drm, crtc) {
04548cba
VS
5804 struct intel_crtc_state *crtc_state =
5805 to_intel_crtc_state(crtc->base.state);
5806 struct g4x_wm_state *active = &crtc->wm.active.g4x;
5807 struct g4x_pipe_wm *raw;
5808 enum pipe pipe = crtc->pipe;
5809 enum plane_id plane_id;
5810 int level, max_level;
5811
5812 active->cxsr = wm->cxsr;
5813 active->hpll_en = wm->hpll_en;
5814 active->fbc_en = wm->fbc_en;
5815
5816 active->sr = wm->sr;
5817 active->hpll = wm->hpll;
5818
5819 for_each_plane_id_on_crtc(crtc, plane_id) {
5820 active->wm.plane[plane_id] =
5821 wm->pipe[pipe].plane[plane_id];
5822 }
5823
5824 if (wm->cxsr && wm->hpll_en)
5825 max_level = G4X_WM_LEVEL_HPLL;
5826 else if (wm->cxsr)
5827 max_level = G4X_WM_LEVEL_SR;
5828 else
5829 max_level = G4X_WM_LEVEL_NORMAL;
5830
5831 level = G4X_WM_LEVEL_NORMAL;
5832 raw = &crtc_state->wm.g4x.raw[level];
5833 for_each_plane_id_on_crtc(crtc, plane_id)
5834 raw->plane[plane_id] = active->wm.plane[plane_id];
5835
5836 if (++level > max_level)
5837 goto out;
5838
5839 raw = &crtc_state->wm.g4x.raw[level];
5840 raw->plane[PLANE_PRIMARY] = active->sr.plane;
5841 raw->plane[PLANE_CURSOR] = active->sr.cursor;
5842 raw->plane[PLANE_SPRITE0] = 0;
5843 raw->fbc = active->sr.fbc;
5844
5845 if (++level > max_level)
5846 goto out;
5847
5848 raw = &crtc_state->wm.g4x.raw[level];
5849 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
5850 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
5851 raw->plane[PLANE_SPRITE0] = 0;
5852 raw->fbc = active->hpll.fbc;
5853
5854 out:
5855 for_each_plane_id_on_crtc(crtc, plane_id)
5856 g4x_raw_plane_wm_set(crtc_state, level,
5857 plane_id, USHRT_MAX);
5858 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
5859
5860 crtc_state->wm.g4x.optimal = *active;
5861 crtc_state->wm.g4x.intermediate = *active;
5862
5863 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
5864 pipe_name(pipe),
5865 wm->pipe[pipe].plane[PLANE_PRIMARY],
5866 wm->pipe[pipe].plane[PLANE_CURSOR],
5867 wm->pipe[pipe].plane[PLANE_SPRITE0]);
5868 }
5869
5870 DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
5871 wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
5872 DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
5873 wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
5874 DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
5875 yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
5876}
5877
5878void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
5879{
5880 struct intel_plane *plane;
5881 struct intel_crtc *crtc;
5882
5883 mutex_lock(&dev_priv->wm.wm_mutex);
5884
5885 for_each_intel_plane(&dev_priv->drm, plane) {
5886 struct intel_crtc *crtc =
5887 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
5888 struct intel_crtc_state *crtc_state =
5889 to_intel_crtc_state(crtc->base.state);
5890 struct intel_plane_state *plane_state =
5891 to_intel_plane_state(plane->base.state);
5892 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
5893 enum plane_id plane_id = plane->id;
5894 int level;
5895
5896 if (plane_state->base.visible)
5897 continue;
5898
5899 for (level = 0; level < 3; level++) {
5900 struct g4x_pipe_wm *raw =
5901 &crtc_state->wm.g4x.raw[level];
5902
5903 raw->plane[plane_id] = 0;
5904 wm_state->wm.plane[plane_id] = 0;
5905 }
5906
5907 if (plane_id == PLANE_PRIMARY) {
5908 for (level = 0; level < 3; level++) {
5909 struct g4x_pipe_wm *raw =
5910 &crtc_state->wm.g4x.raw[level];
5911 raw->fbc = 0;
5912 }
5913
5914 wm_state->sr.fbc = 0;
5915 wm_state->hpll.fbc = 0;
5916 wm_state->fbc_en = false;
5917 }
5918 }
5919
5920 for_each_intel_crtc(&dev_priv->drm, crtc) {
5921 struct intel_crtc_state *crtc_state =
5922 to_intel_crtc_state(crtc->base.state);
5923
5924 crtc_state->wm.g4x.intermediate =
5925 crtc_state->wm.g4x.optimal;
5926 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
5927 }
5928
5929 g4x_program_watermarks(dev_priv);
5930
5931 mutex_unlock(&dev_priv->wm.wm_mutex);
5932}
5933
cd1d3ee9 5934void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
6eb1a681 5935{
6eb1a681 5936 struct vlv_wm_values *wm = &dev_priv->wm.vlv;
f07d43d2 5937 struct intel_crtc *crtc;
6eb1a681
VS
5938 u32 val;
5939
5940 vlv_read_wm_values(dev_priv, wm);
5941
6eb1a681
VS
5942 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
5943 wm->level = VLV_WM_LEVEL_PM2;
5944
5945 if (IS_CHERRYVIEW(dev_priv)) {
9f817501 5946 mutex_lock(&dev_priv->pcu_lock);
6eb1a681
VS
5947
5948 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
5949 if (val & DSP_MAXFIFO_PM5_ENABLE)
5950 wm->level = VLV_WM_LEVEL_PM5;
5951
58590c14
VS
5952 /*
5953 * If DDR DVFS is disabled in the BIOS, Punit
5954 * will never ack the request. So if that happens
5955 * assume we don't have to enable/disable DDR DVFS
5956 * dynamically. To test that just set the REQ_ACK
5957 * bit to poke the Punit, but don't change the
5958 * HIGH/LOW bits so that we don't actually change
5959 * the current state.
5960 */
6eb1a681 5961 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
58590c14
VS
5962 val |= FORCE_DDR_FREQ_REQ_ACK;
5963 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
5964
5965 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
5966 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
5967 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
5968 "assuming DDR DVFS is disabled\n");
5969 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
5970 } else {
5971 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5972 if ((val & FORCE_DDR_HIGH_FREQ) == 0)
5973 wm->level = VLV_WM_LEVEL_DDR_DVFS;
5974 }
6eb1a681 5975
9f817501 5976 mutex_unlock(&dev_priv->pcu_lock);
6eb1a681
VS
5977 }
5978
cd1d3ee9 5979 for_each_intel_crtc(&dev_priv->drm, crtc) {
ff32c54e
VS
5980 struct intel_crtc_state *crtc_state =
5981 to_intel_crtc_state(crtc->base.state);
5982 struct vlv_wm_state *active = &crtc->wm.active.vlv;
5983 const struct vlv_fifo_state *fifo_state =
5984 &crtc_state->wm.vlv.fifo_state;
5985 enum pipe pipe = crtc->pipe;
5986 enum plane_id plane_id;
5987 int level;
5988
5989 vlv_get_fifo_size(crtc_state);
5990
5991 active->num_levels = wm->level + 1;
5992 active->cxsr = wm->cxsr;
5993
ff32c54e 5994 for (level = 0; level < active->num_levels; level++) {
114d7dc0 5995 struct g4x_pipe_wm *raw =
ff32c54e
VS
5996 &crtc_state->wm.vlv.raw[level];
5997
5998 active->sr[level].plane = wm->sr.plane;
5999 active->sr[level].cursor = wm->sr.cursor;
6000
6001 for_each_plane_id_on_crtc(crtc, plane_id) {
6002 active->wm[level].plane[plane_id] =
6003 wm->pipe[pipe].plane[plane_id];
6004
6005 raw->plane[plane_id] =
6006 vlv_invert_wm_value(active->wm[level].plane[plane_id],
6007 fifo_state->plane[plane_id]);
6008 }
6009 }
6010
6011 for_each_plane_id_on_crtc(crtc, plane_id)
6012 vlv_raw_plane_wm_set(crtc_state, level,
6013 plane_id, USHRT_MAX);
6014 vlv_invalidate_wms(crtc, active, level);
6015
6016 crtc_state->wm.vlv.optimal = *active;
4841da51 6017 crtc_state->wm.vlv.intermediate = *active;
ff32c54e 6018
6eb1a681 6019 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
1b31389c
VS
6020 pipe_name(pipe),
6021 wm->pipe[pipe].plane[PLANE_PRIMARY],
6022 wm->pipe[pipe].plane[PLANE_CURSOR],
6023 wm->pipe[pipe].plane[PLANE_SPRITE0],
6024 wm->pipe[pipe].plane[PLANE_SPRITE1]);
ff32c54e 6025 }
6eb1a681
VS
6026
6027 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6028 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
6029}
6030
602ae835
VS
6031void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
6032{
6033 struct intel_plane *plane;
6034 struct intel_crtc *crtc;
6035
6036 mutex_lock(&dev_priv->wm.wm_mutex);
6037
6038 for_each_intel_plane(&dev_priv->drm, plane) {
6039 struct intel_crtc *crtc =
6040 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6041 struct intel_crtc_state *crtc_state =
6042 to_intel_crtc_state(crtc->base.state);
6043 struct intel_plane_state *plane_state =
6044 to_intel_plane_state(plane->base.state);
6045 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6046 const struct vlv_fifo_state *fifo_state =
6047 &crtc_state->wm.vlv.fifo_state;
6048 enum plane_id plane_id = plane->id;
6049 int level;
6050
6051 if (plane_state->base.visible)
6052 continue;
6053
6054 for (level = 0; level < wm_state->num_levels; level++) {
114d7dc0 6055 struct g4x_pipe_wm *raw =
602ae835
VS
6056 &crtc_state->wm.vlv.raw[level];
6057
6058 raw->plane[plane_id] = 0;
6059
6060 wm_state->wm[level].plane[plane_id] =
6061 vlv_invert_wm_value(raw->plane[plane_id],
6062 fifo_state->plane[plane_id]);
6063 }
6064 }
6065
6066 for_each_intel_crtc(&dev_priv->drm, crtc) {
6067 struct intel_crtc_state *crtc_state =
6068 to_intel_crtc_state(crtc->base.state);
6069
6070 crtc_state->wm.vlv.intermediate =
6071 crtc_state->wm.vlv.optimal;
6072 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6073 }
6074
6075 vlv_program_watermarks(dev_priv);
6076
6077 mutex_unlock(&dev_priv->wm.wm_mutex);
6078}
6079
f72b84c6
VS
6080/*
6081 * FIXME should probably kill this and improve
6082 * the real watermark readout/sanitation instead
6083 */
6084static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6085{
6086 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6087 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6088 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6089
6090 /*
6091 * Don't touch WM1S_LP_EN here.
6092 * Doing so could cause underruns.
6093 */
6094}
6095
cd1d3ee9 6096void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv)
243e6a44 6097{
820c1980 6098 struct ilk_wm_values *hw = &dev_priv->wm.hw;
cd1d3ee9 6099 struct intel_crtc *crtc;
243e6a44 6100
f72b84c6
VS
6101 ilk_init_lp_watermarks(dev_priv);
6102
cd1d3ee9 6103 for_each_intel_crtc(&dev_priv->drm, crtc)
243e6a44
VS
6104 ilk_pipe_wm_get_hw_state(crtc);
6105
6106 hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6107 hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6108 hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6109
6110 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
175fded1 6111 if (INTEL_GEN(dev_priv) >= 7) {
cfa7698b
VS
6112 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6113 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6114 }
243e6a44 6115
8652744b 6116 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
ac9545fd
VS
6117 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6118 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
fd6b8f43 6119 else if (IS_IVYBRIDGE(dev_priv))
ac9545fd
VS
6120 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6121 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
243e6a44
VS
6122
6123 hw->enable_fbc_wm =
6124 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6125}
6126
b445e3b0
ED
6127/**
6128 * intel_update_watermarks - update FIFO watermark values based on current modes
31383410 6129 * @crtc: the #intel_crtc on which to compute the WM
b445e3b0
ED
6130 *
6131 * Calculate watermark values for the various WM regs based on current mode
6132 * and plane configuration.
6133 *
6134 * There are several cases to deal with here:
6135 * - normal (i.e. non-self-refresh)
6136 * - self-refresh (SR) mode
6137 * - lines are large relative to FIFO size (buffer can hold up to 2)
6138 * - lines are small relative to FIFO size (buffer can hold more than 2
6139 * lines), so need to account for TLB latency
6140 *
6141 * The normal calculation is:
6142 * watermark = dotclock * bytes per pixel * latency
6143 * where latency is platform & configuration dependent (we assume pessimal
6144 * values here).
6145 *
6146 * The SR calculation is:
6147 * watermark = (trunc(latency/line time)+1) * surface width *
6148 * bytes per pixel
6149 * where
6150 * line time = htotal / dotclock
6151 * surface width = hdisplay for normal plane and 64 for cursor
6152 * and latency is assumed to be high, as above.
6153 *
6154 * The final value programmed to the register should always be rounded up,
6155 * and include an extra 2 entries to account for clock crossings.
6156 *
6157 * We don't use the sprite, so we can ignore that. And on Crestline we have
6158 * to set the non-SR watermarks to 8.
6159 */
432081bc 6160void intel_update_watermarks(struct intel_crtc *crtc)
b445e3b0 6161{
432081bc 6162 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
b445e3b0
ED
6163
6164 if (dev_priv->display.update_wm)
46ba614c 6165 dev_priv->display.update_wm(crtc);
b445e3b0
ED
6166}
6167
2503a0fe
KM
6168void intel_enable_ipc(struct drm_i915_private *dev_priv)
6169{
6170 u32 val;
6171
fd847b8e
JRS
6172 if (!HAS_IPC(dev_priv))
6173 return;
6174
2503a0fe
KM
6175 val = I915_READ(DISP_ARB_CTL2);
6176
6177 if (dev_priv->ipc_enabled)
6178 val |= DISP_IPC_ENABLE;
6179 else
6180 val &= ~DISP_IPC_ENABLE;
6181
6182 I915_WRITE(DISP_ARB_CTL2, val);
6183}
6184
6185void intel_init_ipc(struct drm_i915_private *dev_priv)
6186{
2503a0fe
KM
6187 if (!HAS_IPC(dev_priv))
6188 return;
6189
c9b818d3
JRS
6190 /* Display WA #1141: SKL:all KBL:all CFL */
6191 if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
6192 dev_priv->ipc_enabled = dev_priv->dram_info.symmetric_memory;
6193 else
6194 dev_priv->ipc_enabled = true;
6195
2503a0fe
KM
6196 intel_enable_ipc(dev_priv);
6197}
6198
e2828914 6199/*
9270388e 6200 * Lock protecting IPS related data structures
9270388e
DV
6201 */
6202DEFINE_SPINLOCK(mchdev_lock);
6203
6204/* Global for IPS driver to get at the current i915 device. Protected by
6205 * mchdev_lock. */
6206static struct drm_i915_private *i915_mch_dev;
6207
91d14251 6208bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
2b4e57bd 6209{
2b4e57bd
ED
6210 u16 rgvswctl;
6211
67520415 6212 lockdep_assert_held(&mchdev_lock);
9270388e 6213
2b4e57bd
ED
6214 rgvswctl = I915_READ16(MEMSWCTL);
6215 if (rgvswctl & MEMCTL_CMD_STS) {
6216 DRM_DEBUG("gpu busy, RCS change rejected\n");
6217 return false; /* still busy with another command */
6218 }
6219
6220 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
6221 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
6222 I915_WRITE16(MEMSWCTL, rgvswctl);
6223 POSTING_READ16(MEMSWCTL);
6224
6225 rgvswctl |= MEMCTL_CMD_STS;
6226 I915_WRITE16(MEMSWCTL, rgvswctl);
6227
6228 return true;
6229}
6230
91d14251 6231static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
2b4e57bd 6232{
84f1b20f 6233 u32 rgvmodectl;
2b4e57bd
ED
6234 u8 fmax, fmin, fstart, vstart;
6235
9270388e
DV
6236 spin_lock_irq(&mchdev_lock);
6237
84f1b20f
TU
6238 rgvmodectl = I915_READ(MEMMODECTL);
6239
2b4e57bd
ED
6240 /* Enable temp reporting */
6241 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
6242 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
6243
6244 /* 100ms RC evaluation intervals */
6245 I915_WRITE(RCUPEI, 100000);
6246 I915_WRITE(RCDNEI, 100000);
6247
6248 /* Set max/min thresholds to 90ms and 80ms respectively */
6249 I915_WRITE(RCBMAXAVG, 90000);
6250 I915_WRITE(RCBMINAVG, 80000);
6251
6252 I915_WRITE(MEMIHYST, 1);
6253
6254 /* Set up min, max, and cur for interrupt handling */
6255 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
6256 fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
6257 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
6258 MEMMODE_FSTART_SHIFT;
6259
616847e7 6260 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
2b4e57bd
ED
6261 PXVFREQ_PX_SHIFT;
6262
20e4d407
DV
6263 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6264 dev_priv->ips.fstart = fstart;
2b4e57bd 6265
20e4d407
DV
6266 dev_priv->ips.max_delay = fstart;
6267 dev_priv->ips.min_delay = fmin;
6268 dev_priv->ips.cur_delay = fstart;
2b4e57bd
ED
6269
6270 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6271 fmax, fmin, fstart);
6272
6273 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6274
6275 /*
6276 * Interrupts will be enabled in ironlake_irq_postinstall
6277 */
6278
6279 I915_WRITE(VIDSTART, vstart);
6280 POSTING_READ(VIDSTART);
6281
6282 rgvmodectl |= MEMMODE_SWMODE_EN;
6283 I915_WRITE(MEMMODECTL, rgvmodectl);
6284
9270388e 6285 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
2b4e57bd 6286 DRM_ERROR("stuck trying to change perf mode\n");
dd92d8de 6287 mdelay(1);
2b4e57bd 6288
91d14251 6289 ironlake_set_drps(dev_priv, fstart);
2b4e57bd 6290
7d81c3e0
VS
6291 dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6292 I915_READ(DDREC) + I915_READ(CSIEC);
20e4d407 6293 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
7d81c3e0 6294 dev_priv->ips.last_count2 = I915_READ(GFXEC);
5ed0bdf2 6295 dev_priv->ips.last_time2 = ktime_get_raw_ns();
9270388e
DV
6296
6297 spin_unlock_irq(&mchdev_lock);
2b4e57bd
ED
6298}
6299
91d14251 6300static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
2b4e57bd 6301{
9270388e
DV
6302 u16 rgvswctl;
6303
6304 spin_lock_irq(&mchdev_lock);
6305
6306 rgvswctl = I915_READ16(MEMSWCTL);
2b4e57bd
ED
6307
6308 /* Ack interrupts, disable EFC interrupt */
6309 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6310 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6311 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6312 I915_WRITE(DEIIR, DE_PCU_EVENT);
6313 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6314
6315 /* Go back to the starting frequency */
91d14251 6316 ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
dd92d8de 6317 mdelay(1);
2b4e57bd
ED
6318 rgvswctl |= MEMCTL_CMD_STS;
6319 I915_WRITE(MEMSWCTL, rgvswctl);
dd92d8de 6320 mdelay(1);
2b4e57bd 6321
9270388e 6322 spin_unlock_irq(&mchdev_lock);
2b4e57bd
ED
6323}
6324
acbe9475
DV
6325/* There's a funny hw issue where the hw returns all 0 when reading from
6326 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6327 * ourselves, instead of doing a rmw cycle (which might result in us clearing
6328 * all limits and the gpu stuck at whatever frequency it is at atm).
6329 */
74ef1173 6330static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
2b4e57bd 6331{
562d9bae 6332 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7b9e0ae6 6333 u32 limits;
2b4e57bd 6334
20b46e59
DV
6335 /* Only set the down limit when we've reached the lowest level to avoid
6336 * getting more interrupts, otherwise leave this clear. This prevents a
6337 * race in the hw when coming out of rc6: There's a tiny window where
6338 * the hw runs at the minimal clock before selecting the desired
6339 * frequency, if the down threshold expires in that window we will not
6340 * receive a down interrupt. */
35ceabf3 6341 if (INTEL_GEN(dev_priv) >= 9) {
562d9bae
SAK
6342 limits = (rps->max_freq_softlimit) << 23;
6343 if (val <= rps->min_freq_softlimit)
6344 limits |= (rps->min_freq_softlimit) << 14;
74ef1173 6345 } else {
562d9bae
SAK
6346 limits = rps->max_freq_softlimit << 24;
6347 if (val <= rps->min_freq_softlimit)
6348 limits |= rps->min_freq_softlimit << 16;
74ef1173 6349 }
20b46e59
DV
6350
6351 return limits;
6352}
6353
60548c55 6354static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
dd75fdc8 6355{
562d9bae 6356 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8a586437
AG
6357 u32 threshold_up = 0, threshold_down = 0; /* in % */
6358 u32 ei_up = 0, ei_down = 0;
dd75fdc8 6359
60548c55 6360 lockdep_assert_held(&rps->power.mutex);
dd75fdc8 6361
60548c55 6362 if (new_power == rps->power.mode)
dd75fdc8
CW
6363 return;
6364
6365 /* Note the units here are not exactly 1us, but 1280ns. */
6366 switch (new_power) {
6367 case LOW_POWER:
6368 /* Upclock if more than 95% busy over 16ms */
8a586437
AG
6369 ei_up = 16000;
6370 threshold_up = 95;
dd75fdc8
CW
6371
6372 /* Downclock if less than 85% busy over 32ms */
8a586437
AG
6373 ei_down = 32000;
6374 threshold_down = 85;
dd75fdc8
CW
6375 break;
6376
6377 case BETWEEN:
6378 /* Upclock if more than 90% busy over 13ms */
8a586437
AG
6379 ei_up = 13000;
6380 threshold_up = 90;
dd75fdc8
CW
6381
6382 /* Downclock if less than 75% busy over 32ms */
8a586437
AG
6383 ei_down = 32000;
6384 threshold_down = 75;
dd75fdc8
CW
6385 break;
6386
6387 case HIGH_POWER:
6388 /* Upclock if more than 85% busy over 10ms */
8a586437
AG
6389 ei_up = 10000;
6390 threshold_up = 85;
dd75fdc8
CW
6391
6392 /* Downclock if less than 60% busy over 32ms */
8a586437
AG
6393 ei_down = 32000;
6394 threshold_down = 60;
dd75fdc8
CW
6395 break;
6396 }
6397
6067a27d
MK
6398 /* When byt can survive without system hang with dynamic
6399 * sw freq adjustments, this restriction can be lifted.
6400 */
6401 if (IS_VALLEYVIEW(dev_priv))
6402 goto skip_hw_write;
6403
8a586437 6404 I915_WRITE(GEN6_RP_UP_EI,
a72b5623 6405 GT_INTERVAL_FROM_US(dev_priv, ei_up));
8a586437 6406 I915_WRITE(GEN6_RP_UP_THRESHOLD,
a72b5623
CW
6407 GT_INTERVAL_FROM_US(dev_priv,
6408 ei_up * threshold_up / 100));
8a586437
AG
6409
6410 I915_WRITE(GEN6_RP_DOWN_EI,
a72b5623 6411 GT_INTERVAL_FROM_US(dev_priv, ei_down));
8a586437 6412 I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
a72b5623
CW
6413 GT_INTERVAL_FROM_US(dev_priv,
6414 ei_down * threshold_down / 100));
6415
6416 I915_WRITE(GEN6_RP_CONTROL,
6417 GEN6_RP_MEDIA_TURBO |
6418 GEN6_RP_MEDIA_HW_NORMAL_MODE |
6419 GEN6_RP_MEDIA_IS_GFX |
6420 GEN6_RP_ENABLE |
6421 GEN6_RP_UP_BUSY_AVG |
6422 GEN6_RP_DOWN_IDLE_AVG);
8a586437 6423
6067a27d 6424skip_hw_write:
60548c55
CW
6425 rps->power.mode = new_power;
6426 rps->power.up_threshold = threshold_up;
6427 rps->power.down_threshold = threshold_down;
6428}
6429
6430static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6431{
6432 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6433 int new_power;
6434
6435 new_power = rps->power.mode;
6436 switch (rps->power.mode) {
6437 case LOW_POWER:
6438 if (val > rps->efficient_freq + 1 &&
6439 val > rps->cur_freq)
6440 new_power = BETWEEN;
6441 break;
6442
6443 case BETWEEN:
6444 if (val <= rps->efficient_freq &&
6445 val < rps->cur_freq)
6446 new_power = LOW_POWER;
6447 else if (val >= rps->rp0_freq &&
6448 val > rps->cur_freq)
6449 new_power = HIGH_POWER;
6450 break;
6451
6452 case HIGH_POWER:
6453 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6454 val < rps->cur_freq)
6455 new_power = BETWEEN;
6456 break;
6457 }
6458 /* Max/min bins are special */
6459 if (val <= rps->min_freq_softlimit)
6460 new_power = LOW_POWER;
6461 if (val >= rps->max_freq_softlimit)
6462 new_power = HIGH_POWER;
6463
6464 mutex_lock(&rps->power.mutex);
6465 if (rps->power.interactive)
6466 new_power = HIGH_POWER;
6467 rps_set_power(dev_priv, new_power);
6468 mutex_unlock(&rps->power.mutex);
dd75fdc8
CW
6469}
6470
60548c55
CW
6471void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6472{
6473 struct intel_rps *rps = &i915->gt_pm.rps;
6474
6475 if (INTEL_GEN(i915) < 6)
6476 return;
6477
6478 mutex_lock(&rps->power.mutex);
6479 if (interactive) {
6480 if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6481 rps_set_power(i915, HIGH_POWER);
6482 } else {
6483 GEM_BUG_ON(!rps->power.interactive);
6484 rps->power.interactive--;
6485 }
6486 mutex_unlock(&rps->power.mutex);
6487}
6488
2876ce73
CW
6489static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6490{
562d9bae 6491 struct intel_rps *rps = &dev_priv->gt_pm.rps;
2876ce73
CW
6492 u32 mask = 0;
6493
e0e8c7cb 6494 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
562d9bae 6495 if (val > rps->min_freq_softlimit)
e0e8c7cb 6496 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
562d9bae 6497 if (val < rps->max_freq_softlimit)
6f4b12f8 6498 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
2876ce73 6499
7b3c29f6
CW
6500 mask &= dev_priv->pm_rps_events;
6501
59d02a1f 6502 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
2876ce73
CW
6503}
6504
b8a5ff8d
JM
6505/* gen6_set_rps is called to update the frequency request, but should also be
6506 * called when the range (min_delay and max_delay) is modified so that we can
6507 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
9fcee2f7 6508static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
20b46e59 6509{
562d9bae
SAK
6510 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6511
eb64cad1
CW
6512 /* min/max delay may still have been modified so be sure to
6513 * write the limits value.
6514 */
562d9bae 6515 if (val != rps->cur_freq) {
eb64cad1 6516 gen6_set_rps_thresholds(dev_priv, val);
b8a5ff8d 6517
35ceabf3 6518 if (INTEL_GEN(dev_priv) >= 9)
5704195c
AG
6519 I915_WRITE(GEN6_RPNSWREQ,
6520 GEN9_FREQUENCY(val));
dc97997a 6521 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
eb64cad1
CW
6522 I915_WRITE(GEN6_RPNSWREQ,
6523 HSW_FREQUENCY(val));
6524 else
6525 I915_WRITE(GEN6_RPNSWREQ,
6526 GEN6_FREQUENCY(val) |
6527 GEN6_OFFSET(0) |
6528 GEN6_AGGRESSIVE_TURBO);
b8a5ff8d 6529 }
7b9e0ae6 6530
7b9e0ae6
CW
6531 /* Make sure we continue to get interrupts
6532 * until we hit the minimum or maximum frequencies.
6533 */
74ef1173 6534 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
2876ce73 6535 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
7b9e0ae6 6536
562d9bae 6537 rps->cur_freq = val;
0f94592e 6538 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
9fcee2f7
CW
6539
6540 return 0;
2b4e57bd
ED
6541}
6542
9fcee2f7 6543static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
ffe02b40 6544{
9fcee2f7
CW
6545 int err;
6546
dc97997a 6547 if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
ffe02b40
VS
6548 "Odd GPU freq value\n"))
6549 val &= ~1;
6550
cd25dd5b
D
6551 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6552
562d9bae 6553 if (val != dev_priv->gt_pm.rps.cur_freq) {
9fcee2f7
CW
6554 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6555 if (err)
6556 return err;
6557
db4c5e0b 6558 gen6_set_rps_thresholds(dev_priv, val);
8fb55197 6559 }
ffe02b40 6560
562d9bae 6561 dev_priv->gt_pm.rps.cur_freq = val;
ffe02b40 6562 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
9fcee2f7
CW
6563
6564 return 0;
ffe02b40
VS
6565}
6566
a7f6e231 6567/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
76c3552f
D
6568 *
6569 * * If Gfx is Idle, then
a7f6e231
D
6570 * 1. Forcewake Media well.
6571 * 2. Request idle freq.
6572 * 3. Release Forcewake of Media well.
76c3552f
D
6573*/
6574static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6575{
562d9bae
SAK
6576 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6577 u32 val = rps->idle_freq;
9fcee2f7 6578 int err;
5549d25f 6579
562d9bae 6580 if (rps->cur_freq <= val)
76c3552f
D
6581 return;
6582
c9efef7b
CW
6583 /* The punit delays the write of the frequency and voltage until it
6584 * determines the GPU is awake. During normal usage we don't want to
6585 * waste power changing the frequency if the GPU is sleeping (rc6).
6586 * However, the GPU and driver is now idle and we do not want to delay
6587 * switching to minimum voltage (reducing power whilst idle) as we do
6588 * not expect to be woken in the near future and so must flush the
6589 * change by waking the device.
6590 *
6591 * We choose to take the media powerwell (either would do to trick the
6592 * punit into committing the voltage change) as that takes a lot less
6593 * power than the render powerwell.
6594 */
a7f6e231 6595 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
9fcee2f7 6596 err = valleyview_set_rps(dev_priv, val);
a7f6e231 6597 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
9fcee2f7
CW
6598
6599 if (err)
6600 DRM_ERROR("Failed to set RPS for idle\n");
76c3552f
D
6601}
6602
43cf3bf0
CW
6603void gen6_rps_busy(struct drm_i915_private *dev_priv)
6604{
562d9bae
SAK
6605 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6606
9f817501 6607 mutex_lock(&dev_priv->pcu_lock);
562d9bae 6608 if (rps->enabled) {
bd64818d
CW
6609 u8 freq;
6610
e0e8c7cb 6611 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
43cf3bf0
CW
6612 gen6_rps_reset_ei(dev_priv);
6613 I915_WRITE(GEN6_PMINTRMSK,
562d9bae 6614 gen6_rps_pm_mask(dev_priv, rps->cur_freq));
2b83c4c4 6615
c33d247d
CW
6616 gen6_enable_rps_interrupts(dev_priv);
6617
bd64818d
CW
6618 /* Use the user's desired frequency as a guide, but for better
6619 * performance, jump directly to RPe as our starting frequency.
6620 */
562d9bae
SAK
6621 freq = max(rps->cur_freq,
6622 rps->efficient_freq);
bd64818d 6623
9fcee2f7 6624 if (intel_set_rps(dev_priv,
bd64818d 6625 clamp(freq,
562d9bae
SAK
6626 rps->min_freq_softlimit,
6627 rps->max_freq_softlimit)))
9fcee2f7 6628 DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
43cf3bf0 6629 }
9f817501 6630 mutex_unlock(&dev_priv->pcu_lock);
43cf3bf0
CW
6631}
6632
b29c19b6
CW
6633void gen6_rps_idle(struct drm_i915_private *dev_priv)
6634{
562d9bae
SAK
6635 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6636
c33d247d
CW
6637 /* Flush our bottom-half so that it does not race with us
6638 * setting the idle frequency and so that it is bounded by
6639 * our rpm wakeref. And then disable the interrupts to stop any
6640 * futher RPS reclocking whilst we are asleep.
6641 */
6642 gen6_disable_rps_interrupts(dev_priv);
6643
9f817501 6644 mutex_lock(&dev_priv->pcu_lock);
562d9bae 6645 if (rps->enabled) {
dc97997a 6646 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
76c3552f 6647 vlv_set_rps_idle(dev_priv);
7526ed79 6648 else
562d9bae
SAK
6649 gen6_set_rps(dev_priv, rps->idle_freq);
6650 rps->last_adj = 0;
12c100bf
VS
6651 I915_WRITE(GEN6_PMINTRMSK,
6652 gen6_sanitize_rps_pm_mask(dev_priv, ~0));
c0951f0c 6653 }
9f817501 6654 mutex_unlock(&dev_priv->pcu_lock);
b29c19b6
CW
6655}
6656
e61e0f51 6657void gen6_rps_boost(struct i915_request *rq,
562d9bae 6658 struct intel_rps_client *rps_client)
b29c19b6 6659{
562d9bae 6660 struct intel_rps *rps = &rq->i915->gt_pm.rps;
74d290f8 6661 unsigned long flags;
7b92c1bd
CW
6662 bool boost;
6663
8d3afd7d
CW
6664 /* This is intentionally racy! We peek at the state here, then
6665 * validate inside the RPS worker.
6666 */
562d9bae 6667 if (!rps->enabled)
8d3afd7d 6668 return;
43cf3bf0 6669
253a2817
CW
6670 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
6671 return;
6672
e61e0f51 6673 /* Serializes with i915_request_retire() */
7b92c1bd 6674 boost = false;
74d290f8 6675 spin_lock_irqsave(&rq->lock, flags);
253a2817
CW
6676 if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
6677 boost = !atomic_fetch_inc(&rps->num_waiters);
7b92c1bd 6678 rq->waitboost = true;
c0951f0c 6679 }
74d290f8 6680 spin_unlock_irqrestore(&rq->lock, flags);
7b92c1bd
CW
6681 if (!boost)
6682 return;
6683
562d9bae
SAK
6684 if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6685 schedule_work(&rps->work);
7b92c1bd 6686
562d9bae 6687 atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
b29c19b6
CW
6688}
6689
9fcee2f7 6690int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
0a073b84 6691{
562d9bae 6692 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9fcee2f7
CW
6693 int err;
6694
9f817501 6695 lockdep_assert_held(&dev_priv->pcu_lock);
562d9bae
SAK
6696 GEM_BUG_ON(val > rps->max_freq);
6697 GEM_BUG_ON(val < rps->min_freq);
cfd1c488 6698
562d9bae
SAK
6699 if (!rps->enabled) {
6700 rps->cur_freq = val;
76e4e4b5
CW
6701 return 0;
6702 }
6703
dc97997a 6704 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
9fcee2f7 6705 err = valleyview_set_rps(dev_priv, val);
ffe02b40 6706 else
9fcee2f7
CW
6707 err = gen6_set_rps(dev_priv, val);
6708
6709 return err;
0a073b84
JB
6710}
6711
dc97997a 6712static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
20e49366 6713{
20e49366 6714 I915_WRITE(GEN6_RC_CONTROL, 0);
38c23527 6715 I915_WRITE(GEN9_PG_ENABLE, 0);
20e49366
ZW
6716}
6717
dc97997a 6718static void gen9_disable_rps(struct drm_i915_private *dev_priv)
2030d684 6719{
2030d684
AG
6720 I915_WRITE(GEN6_RP_CONTROL, 0);
6721}
6722
960e5465 6723static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
d20d4f0c 6724{
d20d4f0c 6725 I915_WRITE(GEN6_RC_CONTROL, 0);
960e5465
SAK
6726}
6727
6728static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6729{
44fc7d5c 6730 I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
2030d684 6731 I915_WRITE(GEN6_RP_CONTROL, 0);
44fc7d5c
DV
6732}
6733
d46b00dc 6734static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
38807746 6735{
38807746
D
6736 I915_WRITE(GEN6_RC_CONTROL, 0);
6737}
6738
d46b00dc
SAK
6739static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6740{
6741 I915_WRITE(GEN6_RP_CONTROL, 0);
6742}
6743
0d6fc92a 6744static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
44fc7d5c 6745{
0d6fc92a 6746 /* We're doing forcewake before Disabling RC6,
98a2e5f9 6747 * This what the BIOS expects when going into suspend */
59bad947 6748 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
98a2e5f9 6749
44fc7d5c 6750 I915_WRITE(GEN6_RC_CONTROL, 0);
d20d4f0c 6751
59bad947 6752 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
d20d4f0c
JB
6753}
6754
0d6fc92a
SAK
6755static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6756{
6757 I915_WRITE(GEN6_RP_CONTROL, 0);
6758}
6759
dc97997a 6760static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
274008e8 6761{
274008e8
SAK
6762 bool enable_rc6 = true;
6763 unsigned long rc6_ctx_base;
fc619841
ID
6764 u32 rc_ctl;
6765 int rc_sw_target;
6766
6767 rc_ctl = I915_READ(GEN6_RC_CONTROL);
6768 rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6769 RC_SW_TARGET_STATE_SHIFT;
6770 DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6771 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6772 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6773 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6774 rc_sw_target);
274008e8
SAK
6775
6776 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
b99d49cc 6777 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
274008e8
SAK
6778 enable_rc6 = false;
6779 }
6780
6781 /*
6782 * The exact context size is not known for BXT, so assume a page size
6783 * for this check.
6784 */
6785 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
17a05345
MA
6786 if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6787 (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
b99d49cc 6788 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
274008e8
SAK
6789 enable_rc6 = false;
6790 }
6791
6792 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6793 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6794 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6795 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
b99d49cc 6796 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
274008e8
SAK
6797 enable_rc6 = false;
6798 }
6799
fc619841
ID
6800 if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6801 !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6802 !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6803 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6804 enable_rc6 = false;
6805 }
6806
6807 if (!I915_READ(GEN6_GFXPAUSE)) {
6808 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6809 enable_rc6 = false;
6810 }
6811
6812 if (!I915_READ(GEN8_MISC_CTRL0)) {
6813 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
274008e8
SAK
6814 enable_rc6 = false;
6815 }
6816
6817 return enable_rc6;
6818}
6819
fb6db0f5 6820static bool sanitize_rc6(struct drm_i915_private *i915)
2b4e57bd 6821{
fb6db0f5 6822 struct intel_device_info *info = mkwrite_device_info(i915);
e6069ca8 6823
fb6db0f5
CW
6824 /* Powersaving is controlled by the host when inside a VM */
6825 if (intel_vgpu_active(i915))
6826 info->has_rc6 = 0;
274008e8 6827
fb6db0f5
CW
6828 if (info->has_rc6 &&
6829 IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
274008e8 6830 DRM_INFO("RC6 disabled by BIOS\n");
fb6db0f5 6831 info->has_rc6 = 0;
274008e8
SAK
6832 }
6833
fb6db0f5
CW
6834 /*
6835 * We assume that we do not have any deep rc6 levels if we don't have
6836 * have the previous rc6 level supported, i.e. we use HAS_RC6()
6837 * as the initial coarse check for rc6 in general, moving on to
6838 * progressively finer/deeper levels.
6839 */
6840 if (!info->has_rc6 && info->has_rc6p)
6841 info->has_rc6p = 0;
8bade1ad 6842
fb6db0f5 6843 return info->has_rc6;
2b4e57bd
ED
6844}
6845
dc97997a 6846static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
3280e8b0 6847{
562d9bae
SAK
6848 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6849
3280e8b0 6850 /* All of these values are in units of 50MHz */
773ea9a8 6851
93ee2920 6852 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
cc3f90f0 6853 if (IS_GEN9_LP(dev_priv)) {
773ea9a8 6854 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
562d9bae
SAK
6855 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
6856 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
6857 rps->min_freq = (rp_state_cap >> 0) & 0xff;
35040562 6858 } else {
773ea9a8 6859 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
562d9bae
SAK
6860 rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
6861 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
6862 rps->min_freq = (rp_state_cap >> 16) & 0xff;
35040562 6863 }
3280e8b0 6864 /* hw_max = RP0 until we check for overclocking */
562d9bae 6865 rps->max_freq = rps->rp0_freq;
3280e8b0 6866
562d9bae 6867 rps->efficient_freq = rps->rp1_freq;
dc97997a 6868 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
2b2874ef 6869 IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
773ea9a8
CW
6870 u32 ddcc_status = 0;
6871
6872 if (sandybridge_pcode_read(dev_priv,
6873 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
6874 &ddcc_status) == 0)
562d9bae 6875 rps->efficient_freq =
46efa4ab
TR
6876 clamp_t(u8,
6877 ((ddcc_status >> 8) & 0xff),
562d9bae
SAK
6878 rps->min_freq,
6879 rps->max_freq);
93ee2920
TR
6880 }
6881
2b2874ef 6882 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
c5e0688c 6883 /* Store the frequency values in 16.66 MHZ units, which is
773ea9a8
CW
6884 * the natural hardware unit for SKL
6885 */
562d9bae
SAK
6886 rps->rp0_freq *= GEN9_FREQ_SCALER;
6887 rps->rp1_freq *= GEN9_FREQ_SCALER;
6888 rps->min_freq *= GEN9_FREQ_SCALER;
6889 rps->max_freq *= GEN9_FREQ_SCALER;
6890 rps->efficient_freq *= GEN9_FREQ_SCALER;
c5e0688c 6891 }
3280e8b0
BW
6892}
6893
3a45b05c 6894static void reset_rps(struct drm_i915_private *dev_priv,
9fcee2f7 6895 int (*set)(struct drm_i915_private *, u8))
3a45b05c 6896{
562d9bae
SAK
6897 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6898 u8 freq = rps->cur_freq;
3a45b05c
CW
6899
6900 /* force a reset */
60548c55 6901 rps->power.mode = -1;
562d9bae 6902 rps->cur_freq = -1;
3a45b05c 6903
9fcee2f7
CW
6904 if (set(dev_priv, freq))
6905 DRM_ERROR("Failed to reset RPS to initial values\n");
3a45b05c
CW
6906}
6907
b6fef0ef 6908/* See the Gen9_GT_PM_Programming_Guide doc for the below */
dc97997a 6909static void gen9_enable_rps(struct drm_i915_private *dev_priv)
b6fef0ef 6910{
b6fef0ef
JB
6911 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6912
36fe778a 6913 /* Program defaults and thresholds for RPS */
cf819eff 6914 if (IS_GEN(dev_priv, 9))
36fe778a
DW
6915 I915_WRITE(GEN6_RC_VIDEO_FREQ,
6916 GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
0beb059a
AG
6917
6918 /* 1 second timeout*/
6919 I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
6920 GT_INTERVAL_FROM_US(dev_priv, 1000000));
6921
b6fef0ef 6922 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
b6fef0ef 6923
0beb059a
AG
6924 /* Leaning on the below call to gen6_set_rps to program/setup the
6925 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
6926 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
3a45b05c 6927 reset_rps(dev_priv, gen6_set_rps);
b6fef0ef
JB
6928
6929 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6930}
6931
dc97997a 6932static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
20e49366 6933{
e2f80391 6934 struct intel_engine_cs *engine;
3b3f1650 6935 enum intel_engine_id id;
fb6db0f5 6936 u32 rc6_mode;
20e49366
ZW
6937
6938 /* 1a: Software RC state - RC0 */
6939 I915_WRITE(GEN6_RC_STATE, 0);
6940
6941 /* 1b: Get forcewake during program sequence. Although the driver
6942 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
59bad947 6943 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
20e49366
ZW
6944
6945 /* 2a: Disable RC states. */
6946 I915_WRITE(GEN6_RC_CONTROL, 0);
6947
6948 /* 2b: Program RC6 thresholds.*/
0aab201b
RV
6949 if (INTEL_GEN(dev_priv) >= 10) {
6950 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
6951 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
6952 } else if (IS_SKYLAKE(dev_priv)) {
6953 /*
6954 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
6955 * when CPG is enabled
6956 */
63a4dec2 6957 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
0aab201b 6958 } else {
63a4dec2 6959 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
0aab201b
RV
6960 }
6961
20e49366
ZW
6962 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
6963 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
3b3f1650 6964 for_each_engine(engine, dev_priv, id)
e2f80391 6965 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
97c322e7 6966
1a3d1898 6967 if (HAS_GUC(dev_priv))
97c322e7
SAK
6968 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
6969
20e49366 6970 I915_WRITE(GEN6_RC_SLEEP, 0);
20e49366 6971
c1beabcf
CW
6972 /*
6973 * 2c: Program Coarse Power Gating Policies.
6974 *
6975 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
6976 * use instead is a more conservative estimate for the maximum time
6977 * it takes us to service a CS interrupt and submit a new ELSP - that
6978 * is the time which the GPU is idle waiting for the CPU to select the
6979 * next request to execute. If the idle hysteresis is less than that
6980 * interrupt service latency, the hardware will automatically gate
6981 * the power well and we will then incur the wake up cost on top of
6982 * the service latency. A similar guide from intel_pstate is that we
6983 * do not want the enable hysteresis to less than the wakeup latency.
6984 *
6985 * igt/gem_exec_nop/sequential provides a rough estimate for the
6986 * service latency, and puts it around 10us for Broadwell (and other
6987 * big core) and around 40us for Broxton (and other low power cores).
6988 * [Note that for legacy ringbuffer submission, this is less than 1us!]
6989 * However, the wakeup latency on Broxton is closer to 100us. To be
6990 * conservative, we have to factor in a context switch on top (due
6991 * to ksoftirqd).
6992 */
6993 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
6994 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
38c23527 6995
20e49366 6996 /* 3a: Enable RC6 */
1c044f9b 6997 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
e4ffc83d
RV
6998
6999 /* WaRsUseTimeoutMode:cnl (pre-prod) */
7000 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
7001 rc6_mode = GEN7_RC_CTL_TO_MODE;
7002 else
7003 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
7004
1c044f9b 7005 I915_WRITE(GEN6_RC_CONTROL,
fb6db0f5
CW
7006 GEN6_RC_CTL_HW_ENABLE |
7007 GEN6_RC_CTL_RC6_ENABLE |
7008 rc6_mode);
20e49366 7009
cb07bae0
SK
7010 /*
7011 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
d66047e4 7012 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
cb07bae0 7013 */
dc97997a 7014 if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
f2d2fe95
SAK
7015 I915_WRITE(GEN9_PG_ENABLE, 0);
7016 else
fb6db0f5
CW
7017 I915_WRITE(GEN9_PG_ENABLE,
7018 GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
38c23527 7019
59bad947 7020 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
20e49366
ZW
7021}
7022
3a85392c 7023static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
6edee7f3 7024{
e2f80391 7025 struct intel_engine_cs *engine;
3b3f1650 7026 enum intel_engine_id id;
6edee7f3
BW
7027
7028 /* 1a: Software RC state - RC0 */
7029 I915_WRITE(GEN6_RC_STATE, 0);
7030
3a85392c 7031 /* 1b: Get forcewake during program sequence. Although the driver
6edee7f3 7032 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
59bad947 7033 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6edee7f3
BW
7034
7035 /* 2a: Disable RC states. */
7036 I915_WRITE(GEN6_RC_CONTROL, 0);
7037
6edee7f3
BW
7038 /* 2b: Program RC6 thresholds.*/
7039 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7040 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7041 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
3b3f1650 7042 for_each_engine(engine, dev_priv, id)
e2f80391 7043 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6edee7f3 7044 I915_WRITE(GEN6_RC_SLEEP, 0);
415544d5 7045 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
6edee7f3
BW
7046
7047 /* 3: Enable RC6 */
415544d5 7048
fb6db0f5
CW
7049 I915_WRITE(GEN6_RC_CONTROL,
7050 GEN6_RC_CTL_HW_ENABLE |
7051 GEN7_RC_CTL_TO_MODE |
7052 GEN6_RC_CTL_RC6_ENABLE);
6edee7f3 7053
3a85392c
SAK
7054 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7055}
7056
7057static void gen8_enable_rps(struct drm_i915_private *dev_priv)
7058{
562d9bae
SAK
7059 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7060
3a85392c
SAK
7061 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7062
7063 /* 1 Program defaults and thresholds for RPS*/
f9bdc585 7064 I915_WRITE(GEN6_RPNSWREQ,
562d9bae 7065 HSW_FREQUENCY(rps->rp1_freq));
f9bdc585 7066 I915_WRITE(GEN6_RC_VIDEO_FREQ,
562d9bae 7067 HSW_FREQUENCY(rps->rp1_freq));
7526ed79
DV
7068 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7069 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
7070
7071 /* Docs recommend 900MHz, and 300 MHz respectively */
7072 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
562d9bae
SAK
7073 rps->max_freq_softlimit << 24 |
7074 rps->min_freq_softlimit << 16);
7526ed79
DV
7075
7076 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
7077 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7078 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
7079 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
7080
7081 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
6edee7f3 7082
3a85392c 7083 /* 2: Enable RPS */
7526ed79
DV
7084 I915_WRITE(GEN6_RP_CONTROL,
7085 GEN6_RP_MEDIA_TURBO |
7086 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7087 GEN6_RP_MEDIA_IS_GFX |
7088 GEN6_RP_ENABLE |
7089 GEN6_RP_UP_BUSY_AVG |
7090 GEN6_RP_DOWN_IDLE_AVG);
7091
3a45b05c 7092 reset_rps(dev_priv, gen6_set_rps);
7526ed79 7093
59bad947 7094 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6edee7f3
BW
7095}
7096
960e5465 7097static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
2b4e57bd 7098{
e2f80391 7099 struct intel_engine_cs *engine;
3b3f1650 7100 enum intel_engine_id id;
fb6db0f5 7101 u32 rc6vids, rc6_mask;
2b4e57bd 7102 u32 gtfifodbg;
b4ac5afc 7103 int ret;
2b4e57bd 7104
2b4e57bd 7105 I915_WRITE(GEN6_RC_STATE, 0);
2b4e57bd
ED
7106
7107 /* Clear the DBG now so we don't confuse earlier errors */
297b32ec
VS
7108 gtfifodbg = I915_READ(GTFIFODBG);
7109 if (gtfifodbg) {
2b4e57bd
ED
7110 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
7111 I915_WRITE(GTFIFODBG, gtfifodbg);
7112 }
7113
59bad947 7114 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
2b4e57bd
ED
7115
7116 /* disable the counters and set deterministic thresholds */
7117 I915_WRITE(GEN6_RC_CONTROL, 0);
7118
7119 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
7120 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
7121 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
7122 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7123 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7124
3b3f1650 7125 for_each_engine(engine, dev_priv, id)
e2f80391 7126 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
2b4e57bd
ED
7127
7128 I915_WRITE(GEN6_RC_SLEEP, 0);
7129 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
dc97997a 7130 if (IS_IVYBRIDGE(dev_priv))
351aa566
SM
7131 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
7132 else
7133 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
0920a487 7134 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
2b4e57bd
ED
7135 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7136
5a7dc92a 7137 /* We don't use those on Haswell */
fb6db0f5
CW
7138 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
7139 if (HAS_RC6p(dev_priv))
7140 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
7141 if (HAS_RC6pp(dev_priv))
7142 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
2b4e57bd
ED
7143 I915_WRITE(GEN6_RC_CONTROL,
7144 rc6_mask |
7145 GEN6_RC_CTL_EI_MODE(1) |
7146 GEN6_RC_CTL_HW_ENABLE);
7147
31643d54
BW
7148 rc6vids = 0;
7149 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
cf819eff 7150 if (IS_GEN(dev_priv, 6) && ret) {
31643d54 7151 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
cf819eff 7152 } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
31643d54
BW
7153 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7154 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
7155 rc6vids &= 0xffff00;
7156 rc6vids |= GEN6_ENCODE_RC6_VID(450);
7157 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
7158 if (ret)
7159 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7160 }
7161
59bad947 7162 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2b4e57bd
ED
7163}
7164
960e5465
SAK
7165static void gen6_enable_rps(struct drm_i915_private *dev_priv)
7166{
960e5465
SAK
7167 /* Here begins a magic sequence of register writes to enable
7168 * auto-downclocking.
7169 *
7170 * Perhaps there might be some value in exposing these to
7171 * userspace...
7172 */
7173 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7174
7175 /* Power down if completely idle for over 50ms */
7176 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
7177 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7178
7179 reset_rps(dev_priv, gen6_set_rps);
7180
7181 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7182}
7183
fb7404e8 7184static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
2b4e57bd 7185{
562d9bae 7186 struct intel_rps *rps = &dev_priv->gt_pm.rps;
66c1f77a
MK
7187 const int min_freq = 15;
7188 const int scaling_factor = 180;
3ebecd07
CW
7189 unsigned int gpu_freq;
7190 unsigned int max_ia_freq, min_ring_freq;
4c8c7743 7191 unsigned int max_gpu_freq, min_gpu_freq;
eda79642 7192 struct cpufreq_policy *policy;
2b4e57bd 7193
9f817501 7194 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
79f5b2c7 7195
66c1f77a
MK
7196 if (rps->max_freq <= rps->min_freq)
7197 return;
7198
eda79642
BW
7199 policy = cpufreq_cpu_get(0);
7200 if (policy) {
7201 max_ia_freq = policy->cpuinfo.max_freq;
7202 cpufreq_cpu_put(policy);
7203 } else {
7204 /*
7205 * Default to measured freq if none found, PCU will ensure we
7206 * don't go over
7207 */
2b4e57bd 7208 max_ia_freq = tsc_khz;
eda79642 7209 }
2b4e57bd
ED
7210
7211 /* Convert from kHz to MHz */
7212 max_ia_freq /= 1000;
7213
153b4b95 7214 min_ring_freq = I915_READ(DCLK) & 0xf;
f6aca45c
BW
7215 /* convert DDR frequency from units of 266.6MHz to bandwidth */
7216 min_ring_freq = mult_frac(min_ring_freq, 8, 3);
3ebecd07 7217
d586b5f4
CW
7218 min_gpu_freq = rps->min_freq;
7219 max_gpu_freq = rps->max_freq;
2b2874ef 7220 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
4c8c7743 7221 /* Convert GT frequency to 50 HZ units */
d586b5f4
CW
7222 min_gpu_freq /= GEN9_FREQ_SCALER;
7223 max_gpu_freq /= GEN9_FREQ_SCALER;
4c8c7743
AG
7224 }
7225
2b4e57bd
ED
7226 /*
7227 * For each potential GPU frequency, load a ring frequency we'd like
7228 * to use for memory access. We do this by specifying the IA frequency
7229 * the PCU should use as a reference to determine the ring frequency.
7230 */
4c8c7743 7231 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
66c1f77a 7232 const int diff = max_gpu_freq - gpu_freq;
3ebecd07
CW
7233 unsigned int ia_freq = 0, ring_freq = 0;
7234
2b2874ef 7235 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
4c8c7743
AG
7236 /*
7237 * ring_freq = 2 * GT. ring_freq is in 100MHz units
7238 * No floor required for ring frequency on SKL.
7239 */
7240 ring_freq = gpu_freq;
c56b89f1 7241 } else if (INTEL_GEN(dev_priv) >= 8) {
46c764d4
BW
7242 /* max(2 * GT, DDR). NB: GT is 50MHz units */
7243 ring_freq = max(min_ring_freq, gpu_freq);
dc97997a 7244 } else if (IS_HASWELL(dev_priv)) {
f6aca45c 7245 ring_freq = mult_frac(gpu_freq, 5, 4);
3ebecd07
CW
7246 ring_freq = max(min_ring_freq, ring_freq);
7247 /* leave ia_freq as the default, chosen by cpufreq */
7248 } else {
7249 /* On older processors, there is no separate ring
7250 * clock domain, so in order to boost the bandwidth
7251 * of the ring, we need to upclock the CPU (ia_freq).
7252 *
7253 * For GPU frequencies less than 750MHz,
7254 * just use the lowest ring freq.
7255 */
7256 if (gpu_freq < min_freq)
7257 ia_freq = 800;
7258 else
7259 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
7260 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
7261 }
2b4e57bd 7262
42c0526c
BW
7263 sandybridge_pcode_write(dev_priv,
7264 GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
3ebecd07
CW
7265 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
7266 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
7267 gpu_freq);
2b4e57bd 7268 }
2b4e57bd
ED
7269}
7270
03af2045 7271static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
2b6b3a09
D
7272{
7273 u32 val, rp0;
7274
5b5929cb 7275 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
2b6b3a09 7276
0258404f 7277 switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) {
5b5929cb
JN
7278 case 8:
7279 /* (2 * 4) config */
7280 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
7281 break;
7282 case 12:
7283 /* (2 * 6) config */
7284 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
7285 break;
7286 case 16:
7287 /* (2 * 8) config */
7288 default:
7289 /* Setting (2 * 8) Min RP0 for any other combination */
7290 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
7291 break;
095acd5f 7292 }
5b5929cb
JN
7293
7294 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
7295
2b6b3a09
D
7296 return rp0;
7297}
7298
7299static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7300{
7301 u32 val, rpe;
7302
7303 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7304 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7305
7306 return rpe;
7307}
7308
7707df4a
D
7309static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7310{
7311 u32 val, rp1;
7312
5b5929cb
JN
7313 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7314 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7315
7707df4a
D
7316 return rp1;
7317}
7318
96676fe3
D
7319static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7320{
7321 u32 val, rpn;
7322
7323 val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7324 rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7325 FB_GFX_FREQ_FUSE_MASK);
7326
7327 return rpn;
7328}
7329
f8f2b001
D
7330static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7331{
7332 u32 val, rp1;
7333
7334 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7335
7336 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7337
7338 return rp1;
7339}
7340
03af2045 7341static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
0a073b84
JB
7342{
7343 u32 val, rp0;
7344
64936258 7345 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
0a073b84
JB
7346
7347 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7348 /* Clamp to max */
7349 rp0 = min_t(u32, rp0, 0xea);
7350
7351 return rp0;
7352}
7353
7354static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7355{
7356 u32 val, rpe;
7357
64936258 7358 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
0a073b84 7359 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
64936258 7360 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
0a073b84
JB
7361 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7362
7363 return rpe;
7364}
7365
03af2045 7366static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
0a073b84 7367{
36146035
ID
7368 u32 val;
7369
7370 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7371 /*
7372 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7373 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7374 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7375 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7376 * to make sure it matches what Punit accepts.
7377 */
7378 return max_t(u32, val, 0xc0);
0a073b84
JB
7379}
7380
ae48434c
ID
7381/* Check that the pctx buffer wasn't move under us. */
7382static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7383{
7384 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7385
77894226 7386 WARN_ON(pctx_addr != dev_priv->dsm.start +
ae48434c
ID
7387 dev_priv->vlv_pctx->stolen->start);
7388}
7389
38807746
D
7390
7391/* Check that the pcbr address is not empty. */
7392static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7393{
7394 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7395
7396 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7397}
7398
dc97997a 7399static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
38807746 7400{
b7128ef1
MA
7401 resource_size_t pctx_paddr, paddr;
7402 resource_size_t pctx_size = 32*1024;
38807746 7403 u32 pcbr;
38807746 7404
38807746
D
7405 pcbr = I915_READ(VLV_PCBR);
7406 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
ce611ef8 7407 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
77894226
MA
7408 paddr = dev_priv->dsm.end + 1 - pctx_size;
7409 GEM_BUG_ON(paddr > U32_MAX);
38807746
D
7410
7411 pctx_paddr = (paddr & (~4095));
7412 I915_WRITE(VLV_PCBR, pctx_paddr);
7413 }
ce611ef8
VS
7414
7415 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
38807746
D
7416}
7417
dc97997a 7418static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
c9cddffc 7419{
c9cddffc 7420 struct drm_i915_gem_object *pctx;
b7128ef1
MA
7421 resource_size_t pctx_paddr;
7422 resource_size_t pctx_size = 24*1024;
c9cddffc 7423 u32 pcbr;
c9cddffc
JB
7424
7425 pcbr = I915_READ(VLV_PCBR);
7426 if (pcbr) {
7427 /* BIOS set it up already, grab the pre-alloc'd space */
b7128ef1 7428 resource_size_t pcbr_offset;
c9cddffc 7429
77894226 7430 pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
187685cb 7431 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
c9cddffc 7432 pcbr_offset,
190d6cd5 7433 I915_GTT_OFFSET_NONE,
c9cddffc
JB
7434 pctx_size);
7435 goto out;
7436 }
7437
ce611ef8
VS
7438 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7439
c9cddffc
JB
7440 /*
7441 * From the Gunit register HAS:
7442 * The Gfx driver is expected to program this register and ensure
7443 * proper allocation within Gfx stolen memory. For example, this
7444 * register should be programmed such than the PCBR range does not
7445 * overlap with other ranges, such as the frame buffer, protected
7446 * memory, or any other relevant ranges.
7447 */
187685cb 7448 pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
c9cddffc
JB
7449 if (!pctx) {
7450 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
ee504898 7451 goto out;
c9cddffc
JB
7452 }
7453
77894226
MA
7454 GEM_BUG_ON(range_overflows_t(u64,
7455 dev_priv->dsm.start,
7456 pctx->stolen->start,
7457 U32_MAX));
7458 pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
c9cddffc
JB
7459 I915_WRITE(VLV_PCBR, pctx_paddr);
7460
7461out:
ce611ef8 7462 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
c9cddffc
JB
7463 dev_priv->vlv_pctx = pctx;
7464}
7465
dc97997a 7466static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
ae48434c 7467{
818fed4f 7468 struct drm_i915_gem_object *pctx;
ae48434c 7469
818fed4f
CW
7470 pctx = fetch_and_zero(&dev_priv->vlv_pctx);
7471 if (pctx)
7472 i915_gem_object_put(pctx);
ae48434c
ID
7473}
7474
c30fec65
VS
7475static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7476{
562d9bae 7477 dev_priv->gt_pm.rps.gpll_ref_freq =
c30fec65
VS
7478 vlv_get_cck_clock(dev_priv, "GPLL ref",
7479 CCK_GPLL_CLOCK_CONTROL,
7480 dev_priv->czclk_freq);
7481
7482 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
562d9bae 7483 dev_priv->gt_pm.rps.gpll_ref_freq);
c30fec65
VS
7484}
7485
dc97997a 7486static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
4e80519e 7487{
562d9bae 7488 struct intel_rps *rps = &dev_priv->gt_pm.rps;
2bb25c17 7489 u32 val;
4e80519e 7490
dc97997a 7491 valleyview_setup_pctx(dev_priv);
4e80519e 7492
c30fec65
VS
7493 vlv_init_gpll_ref_freq(dev_priv);
7494
2bb25c17
VS
7495 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7496 switch ((val >> 6) & 3) {
7497 case 0:
7498 case 1:
7499 dev_priv->mem_freq = 800;
7500 break;
7501 case 2:
7502 dev_priv->mem_freq = 1066;
7503 break;
7504 case 3:
7505 dev_priv->mem_freq = 1333;
7506 break;
7507 }
80b83b62 7508 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
2bb25c17 7509
562d9bae
SAK
7510 rps->max_freq = valleyview_rps_max_freq(dev_priv);
7511 rps->rp0_freq = rps->max_freq;
4e80519e 7512 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7513 intel_gpu_freq(dev_priv, rps->max_freq),
7514 rps->max_freq);
4e80519e 7515
562d9bae 7516 rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
4e80519e 7517 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7518 intel_gpu_freq(dev_priv, rps->efficient_freq),
7519 rps->efficient_freq);
4e80519e 7520
562d9bae 7521 rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
f8f2b001 7522 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7523 intel_gpu_freq(dev_priv, rps->rp1_freq),
7524 rps->rp1_freq);
f8f2b001 7525
562d9bae 7526 rps->min_freq = valleyview_rps_min_freq(dev_priv);
4e80519e 7527 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7528 intel_gpu_freq(dev_priv, rps->min_freq),
7529 rps->min_freq);
4e80519e
ID
7530}
7531
dc97997a 7532static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
38807746 7533{
562d9bae 7534 struct intel_rps *rps = &dev_priv->gt_pm.rps;
2bb25c17 7535 u32 val;
2b6b3a09 7536
dc97997a 7537 cherryview_setup_pctx(dev_priv);
2b6b3a09 7538
c30fec65
VS
7539 vlv_init_gpll_ref_freq(dev_priv);
7540
a580516d 7541 mutex_lock(&dev_priv->sb_lock);
c6e8f39d 7542 val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
a580516d 7543 mutex_unlock(&dev_priv->sb_lock);
c6e8f39d 7544
2bb25c17 7545 switch ((val >> 2) & 0x7) {
2bb25c17 7546 case 3:
2bb25c17
VS
7547 dev_priv->mem_freq = 2000;
7548 break;
bfa7df01 7549 default:
2bb25c17
VS
7550 dev_priv->mem_freq = 1600;
7551 break;
7552 }
80b83b62 7553 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
2bb25c17 7554
562d9bae
SAK
7555 rps->max_freq = cherryview_rps_max_freq(dev_priv);
7556 rps->rp0_freq = rps->max_freq;
2b6b3a09 7557 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7558 intel_gpu_freq(dev_priv, rps->max_freq),
7559 rps->max_freq);
2b6b3a09 7560
562d9bae 7561 rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
2b6b3a09 7562 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7563 intel_gpu_freq(dev_priv, rps->efficient_freq),
7564 rps->efficient_freq);
2b6b3a09 7565
562d9bae 7566 rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7707df4a 7567 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7568 intel_gpu_freq(dev_priv, rps->rp1_freq),
7569 rps->rp1_freq);
7707df4a 7570
562d9bae 7571 rps->min_freq = cherryview_rps_min_freq(dev_priv);
2b6b3a09 7572 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7573 intel_gpu_freq(dev_priv, rps->min_freq),
7574 rps->min_freq);
2b6b3a09 7575
562d9bae
SAK
7576 WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7577 rps->min_freq) & 1,
1c14762d 7578 "Odd GPU freq values\n");
38807746
D
7579}
7580
dc97997a 7581static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
4e80519e 7582{
dc97997a 7583 valleyview_cleanup_pctx(dev_priv);
4e80519e
ID
7584}
7585
d46b00dc 7586static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
38807746 7587{
e2f80391 7588 struct intel_engine_cs *engine;
3b3f1650 7589 enum intel_engine_id id;
fb6db0f5 7590 u32 gtfifodbg, rc6_mode, pcbr;
38807746 7591
297b32ec
VS
7592 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7593 GT_FIFO_FREE_ENTRIES_CHV);
38807746
D
7594 if (gtfifodbg) {
7595 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7596 gtfifodbg);
7597 I915_WRITE(GTFIFODBG, gtfifodbg);
7598 }
7599
7600 cherryview_check_pctx(dev_priv);
7601
7602 /* 1a & 1b: Get forcewake during program sequence. Although the driver
7603 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
59bad947 7604 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
38807746 7605
160614a2
VS
7606 /* Disable RC states. */
7607 I915_WRITE(GEN6_RC_CONTROL, 0);
7608
38807746
D
7609 /* 2a: Program RC6 thresholds.*/
7610 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7611 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7612 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7613
3b3f1650 7614 for_each_engine(engine, dev_priv, id)
e2f80391 7615 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
38807746
D
7616 I915_WRITE(GEN6_RC_SLEEP, 0);
7617
f4f71c7d
D
7618 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7619 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
38807746 7620
d46b00dc 7621 /* Allows RC6 residency counter to work */
38807746
D
7622 I915_WRITE(VLV_COUNTER_CONTROL,
7623 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7624 VLV_MEDIA_RC6_COUNT_EN |
7625 VLV_RENDER_RC6_COUNT_EN));
7626
7627 /* For now we assume BIOS is allocating and populating the PCBR */
7628 pcbr = I915_READ(VLV_PCBR);
7629
38807746 7630 /* 3: Enable RC6 */
fb6db0f5
CW
7631 rc6_mode = 0;
7632 if (pcbr >> VLV_PCBR_ADDR_SHIFT)
af5a75a3 7633 rc6_mode = GEN7_RC_CTL_TO_MODE;
38807746
D
7634 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7635
d46b00dc
SAK
7636 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7637}
7638
7639static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7640{
7641 u32 val;
7642
d46b00dc
SAK
7643 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7644
7645 /* 1: Program defaults and thresholds for RPS*/
3cbdb48f 7646 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
2b6b3a09
D
7647 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7648 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7649 I915_WRITE(GEN6_RP_UP_EI, 66000);
7650 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7651
7652 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7653
d46b00dc 7654 /* 2: Enable RPS */
2b6b3a09
D
7655 I915_WRITE(GEN6_RP_CONTROL,
7656 GEN6_RP_MEDIA_HW_NORMAL_MODE |
eb973a5e 7657 GEN6_RP_MEDIA_IS_GFX |
2b6b3a09
D
7658 GEN6_RP_ENABLE |
7659 GEN6_RP_UP_BUSY_AVG |
7660 GEN6_RP_DOWN_IDLE_AVG);
7661
3ef62342
D
7662 /* Setting Fixed Bias */
7663 val = VLV_OVERRIDE_EN |
7664 VLV_SOC_TDP_EN |
7665 CHV_BIAS_CPU_50_SOC_50;
7666 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7667
2b6b3a09
D
7668 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7669
8d40c3ae
VS
7670 /* RPS code assumes GPLL is used */
7671 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7672
742f491d 7673 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
2b6b3a09
D
7674 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7675
3a45b05c 7676 reset_rps(dev_priv, valleyview_set_rps);
2b6b3a09 7677
59bad947 7678 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
38807746
D
7679}
7680
0d6fc92a 7681static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
0a073b84 7682{
e2f80391 7683 struct intel_engine_cs *engine;
3b3f1650 7684 enum intel_engine_id id;
fb6db0f5 7685 u32 gtfifodbg;
0a073b84 7686
ae48434c
ID
7687 valleyview_check_pctx(dev_priv);
7688
297b32ec
VS
7689 gtfifodbg = I915_READ(GTFIFODBG);
7690 if (gtfifodbg) {
f7d85c1e
JB
7691 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7692 gtfifodbg);
0a073b84
JB
7693 I915_WRITE(GTFIFODBG, gtfifodbg);
7694 }
7695
59bad947 7696 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
0a073b84 7697
160614a2
VS
7698 /* Disable RC states. */
7699 I915_WRITE(GEN6_RC_CONTROL, 0);
7700
0a073b84
JB
7701 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7702 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7703 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7704
3b3f1650 7705 for_each_engine(engine, dev_priv, id)
e2f80391 7706 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
0a073b84 7707
2f0aa304 7708 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
0a073b84 7709
0d6fc92a 7710 /* Allows RC6 residency counter to work */
49798eb2 7711 I915_WRITE(VLV_COUNTER_CONTROL,
6b7f6aa7
MK
7712 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7713 VLV_MEDIA_RC0_COUNT_EN |
31685c25 7714 VLV_RENDER_RC0_COUNT_EN |
49798eb2
JB
7715 VLV_MEDIA_RC6_COUNT_EN |
7716 VLV_RENDER_RC6_COUNT_EN));
31685c25 7717
fb6db0f5
CW
7718 I915_WRITE(GEN6_RC_CONTROL,
7719 GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
0a073b84 7720
0d6fc92a
SAK
7721 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7722}
7723
7724static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
7725{
7726 u32 val;
7727
0d6fc92a
SAK
7728 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7729
7730 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7731 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7732 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7733 I915_WRITE(GEN6_RP_UP_EI, 66000);
7734 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7735
7736 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7737
7738 I915_WRITE(GEN6_RP_CONTROL,
7739 GEN6_RP_MEDIA_TURBO |
7740 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7741 GEN6_RP_MEDIA_IS_GFX |
7742 GEN6_RP_ENABLE |
7743 GEN6_RP_UP_BUSY_AVG |
7744 GEN6_RP_DOWN_IDLE_CONT);
7745
3ef62342
D
7746 /* Setting Fixed Bias */
7747 val = VLV_OVERRIDE_EN |
7748 VLV_SOC_TDP_EN |
7749 VLV_BIAS_CPU_125_SOC_875;
7750 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7751
64936258 7752 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
0a073b84 7753
8d40c3ae
VS
7754 /* RPS code assumes GPLL is used */
7755 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7756
742f491d 7757 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
0a073b84
JB
7758 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7759
3a45b05c 7760 reset_rps(dev_priv, valleyview_set_rps);
0a073b84 7761
59bad947 7762 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
0a073b84
JB
7763}
7764
dde18883
ED
7765static unsigned long intel_pxfreq(u32 vidfreq)
7766{
7767 unsigned long freq;
7768 int div = (vidfreq & 0x3f0000) >> 16;
7769 int post = (vidfreq & 0x3000) >> 12;
7770 int pre = (vidfreq & 0x7);
7771
7772 if (!pre)
7773 return 0;
7774
7775 freq = ((div * 133333) / ((1<<post) * pre));
7776
7777 return freq;
7778}
7779
eb48eb00
DV
7780static const struct cparams {
7781 u16 i;
7782 u16 t;
7783 u16 m;
7784 u16 c;
7785} cparams[] = {
7786 { 1, 1333, 301, 28664 },
7787 { 1, 1066, 294, 24460 },
7788 { 1, 800, 294, 25192 },
7789 { 0, 1333, 276, 27605 },
7790 { 0, 1066, 276, 27605 },
7791 { 0, 800, 231, 23784 },
7792};
7793
f531dcb2 7794static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
eb48eb00
DV
7795{
7796 u64 total_count, diff, ret;
7797 u32 count1, count2, count3, m = 0, c = 0;
7798 unsigned long now = jiffies_to_msecs(jiffies), diff1;
7799 int i;
7800
67520415 7801 lockdep_assert_held(&mchdev_lock);
02d71956 7802
20e4d407 7803 diff1 = now - dev_priv->ips.last_time1;
eb48eb00
DV
7804
7805 /* Prevent division-by-zero if we are asking too fast.
7806 * Also, we don't get interesting results if we are polling
7807 * faster than once in 10ms, so just return the saved value
7808 * in such cases.
7809 */
7810 if (diff1 <= 10)
20e4d407 7811 return dev_priv->ips.chipset_power;
eb48eb00
DV
7812
7813 count1 = I915_READ(DMIEC);
7814 count2 = I915_READ(DDREC);
7815 count3 = I915_READ(CSIEC);
7816
7817 total_count = count1 + count2 + count3;
7818
7819 /* FIXME: handle per-counter overflow */
20e4d407
DV
7820 if (total_count < dev_priv->ips.last_count1) {
7821 diff = ~0UL - dev_priv->ips.last_count1;
eb48eb00
DV
7822 diff += total_count;
7823 } else {
20e4d407 7824 diff = total_count - dev_priv->ips.last_count1;
eb48eb00
DV
7825 }
7826
7827 for (i = 0; i < ARRAY_SIZE(cparams); i++) {
20e4d407
DV
7828 if (cparams[i].i == dev_priv->ips.c_m &&
7829 cparams[i].t == dev_priv->ips.r_t) {
eb48eb00
DV
7830 m = cparams[i].m;
7831 c = cparams[i].c;
7832 break;
7833 }
7834 }
7835
7836 diff = div_u64(diff, diff1);
7837 ret = ((m * diff) + c);
7838 ret = div_u64(ret, 10);
7839
20e4d407
DV
7840 dev_priv->ips.last_count1 = total_count;
7841 dev_priv->ips.last_time1 = now;
eb48eb00 7842
20e4d407 7843 dev_priv->ips.chipset_power = ret;
eb48eb00
DV
7844
7845 return ret;
7846}
7847
f531dcb2
CW
7848unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
7849{
7850 unsigned long val;
7851
cf819eff 7852 if (!IS_GEN(dev_priv, 5))
f531dcb2
CW
7853 return 0;
7854
7855 spin_lock_irq(&mchdev_lock);
7856
7857 val = __i915_chipset_val(dev_priv);
7858
7859 spin_unlock_irq(&mchdev_lock);
7860
7861 return val;
7862}
7863
eb48eb00
DV
7864unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
7865{
7866 unsigned long m, x, b;
7867 u32 tsfs;
7868
7869 tsfs = I915_READ(TSFS);
7870
7871 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
7872 x = I915_READ8(TR1);
7873
7874 b = tsfs & TSFS_INTR_MASK;
7875
7876 return ((m * x) / 127) - b;
7877}
7878
d972d6ee
MK
7879static int _pxvid_to_vd(u8 pxvid)
7880{
7881 if (pxvid == 0)
7882 return 0;
7883
7884 if (pxvid >= 8 && pxvid < 31)
7885 pxvid = 31;
7886
7887 return (pxvid + 2) * 125;
7888}
7889
7890static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
eb48eb00 7891{
d972d6ee
MK
7892 const int vd = _pxvid_to_vd(pxvid);
7893 const int vm = vd - 1125;
7894
dc97997a 7895 if (INTEL_INFO(dev_priv)->is_mobile)
d972d6ee
MK
7896 return vm > 0 ? vm : 0;
7897
7898 return vd;
eb48eb00
DV
7899}
7900
02d71956 7901static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
eb48eb00 7902{
5ed0bdf2 7903 u64 now, diff, diffms;
eb48eb00
DV
7904 u32 count;
7905
67520415 7906 lockdep_assert_held(&mchdev_lock);
eb48eb00 7907
5ed0bdf2
TG
7908 now = ktime_get_raw_ns();
7909 diffms = now - dev_priv->ips.last_time2;
7910 do_div(diffms, NSEC_PER_MSEC);
eb48eb00
DV
7911
7912 /* Don't divide by 0 */
eb48eb00
DV
7913 if (!diffms)
7914 return;
7915
7916 count = I915_READ(GFXEC);
7917
20e4d407
DV
7918 if (count < dev_priv->ips.last_count2) {
7919 diff = ~0UL - dev_priv->ips.last_count2;
eb48eb00
DV
7920 diff += count;
7921 } else {
20e4d407 7922 diff = count - dev_priv->ips.last_count2;
eb48eb00
DV
7923 }
7924
20e4d407
DV
7925 dev_priv->ips.last_count2 = count;
7926 dev_priv->ips.last_time2 = now;
eb48eb00
DV
7927
7928 /* More magic constants... */
7929 diff = diff * 1181;
7930 diff = div_u64(diff, diffms * 10);
20e4d407 7931 dev_priv->ips.gfx_power = diff;
eb48eb00
DV
7932}
7933
02d71956
DV
7934void i915_update_gfx_val(struct drm_i915_private *dev_priv)
7935{
cf819eff 7936 if (!IS_GEN(dev_priv, 5))
02d71956
DV
7937 return;
7938
9270388e 7939 spin_lock_irq(&mchdev_lock);
02d71956
DV
7940
7941 __i915_update_gfx_val(dev_priv);
7942
9270388e 7943 spin_unlock_irq(&mchdev_lock);
02d71956
DV
7944}
7945
f531dcb2 7946static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
eb48eb00
DV
7947{
7948 unsigned long t, corr, state1, corr2, state2;
7949 u32 pxvid, ext_v;
7950
67520415 7951 lockdep_assert_held(&mchdev_lock);
02d71956 7952
562d9bae 7953 pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
eb48eb00
DV
7954 pxvid = (pxvid >> 24) & 0x7f;
7955 ext_v = pvid_to_extvid(dev_priv, pxvid);
7956
7957 state1 = ext_v;
7958
7959 t = i915_mch_val(dev_priv);
7960
7961 /* Revel in the empirically derived constants */
7962
7963 /* Correction factor in 1/100000 units */
7964 if (t > 80)
7965 corr = ((t * 2349) + 135940);
7966 else if (t >= 50)
7967 corr = ((t * 964) + 29317);
7968 else /* < 50 */
7969 corr = ((t * 301) + 1004);
7970
7971 corr = corr * ((150142 * state1) / 10000 - 78642);
7972 corr /= 100000;
20e4d407 7973 corr2 = (corr * dev_priv->ips.corr);
eb48eb00
DV
7974
7975 state2 = (corr2 * state1) / 10000;
7976 state2 /= 100; /* convert to mW */
7977
02d71956 7978 __i915_update_gfx_val(dev_priv);
eb48eb00 7979
20e4d407 7980 return dev_priv->ips.gfx_power + state2;
eb48eb00
DV
7981}
7982
f531dcb2
CW
7983unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
7984{
7985 unsigned long val;
7986
cf819eff 7987 if (!IS_GEN(dev_priv, 5))
f531dcb2
CW
7988 return 0;
7989
7990 spin_lock_irq(&mchdev_lock);
7991
7992 val = __i915_gfx_val(dev_priv);
7993
7994 spin_unlock_irq(&mchdev_lock);
7995
7996 return val;
7997}
7998
eb48eb00
DV
7999/**
8000 * i915_read_mch_val - return value for IPS use
8001 *
8002 * Calculate and return a value for the IPS driver to use when deciding whether
8003 * we have thermal and power headroom to increase CPU or GPU power budget.
8004 */
8005unsigned long i915_read_mch_val(void)
8006{
8007 struct drm_i915_private *dev_priv;
8008 unsigned long chipset_val, graphics_val, ret = 0;
8009
9270388e 8010 spin_lock_irq(&mchdev_lock);
eb48eb00
DV
8011 if (!i915_mch_dev)
8012 goto out_unlock;
8013 dev_priv = i915_mch_dev;
8014
f531dcb2
CW
8015 chipset_val = __i915_chipset_val(dev_priv);
8016 graphics_val = __i915_gfx_val(dev_priv);
eb48eb00
DV
8017
8018 ret = chipset_val + graphics_val;
8019
8020out_unlock:
9270388e 8021 spin_unlock_irq(&mchdev_lock);
eb48eb00
DV
8022
8023 return ret;
8024}
8025EXPORT_SYMBOL_GPL(i915_read_mch_val);
8026
8027/**
8028 * i915_gpu_raise - raise GPU frequency limit
8029 *
8030 * Raise the limit; IPS indicates we have thermal headroom.
8031 */
8032bool i915_gpu_raise(void)
8033{
8034 struct drm_i915_private *dev_priv;
8035 bool ret = true;
8036
9270388e 8037 spin_lock_irq(&mchdev_lock);
eb48eb00
DV
8038 if (!i915_mch_dev) {
8039 ret = false;
8040 goto out_unlock;
8041 }
8042 dev_priv = i915_mch_dev;
8043
20e4d407
DV
8044 if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
8045 dev_priv->ips.max_delay--;
eb48eb00
DV
8046
8047out_unlock:
9270388e 8048 spin_unlock_irq(&mchdev_lock);
eb48eb00
DV
8049
8050 return ret;
8051}
8052EXPORT_SYMBOL_GPL(i915_gpu_raise);
8053
8054/**
8055 * i915_gpu_lower - lower GPU frequency limit
8056 *
8057 * IPS indicates we're close to a thermal limit, so throttle back the GPU
8058 * frequency maximum.
8059 */
8060bool i915_gpu_lower(void)
8061{
8062 struct drm_i915_private *dev_priv;
8063 bool ret = true;
8064
9270388e 8065 spin_lock_irq(&mchdev_lock);
eb48eb00
DV
8066 if (!i915_mch_dev) {
8067 ret = false;
8068 goto out_unlock;
8069 }
8070 dev_priv = i915_mch_dev;
8071
20e4d407
DV
8072 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
8073 dev_priv->ips.max_delay++;
eb48eb00
DV
8074
8075out_unlock:
9270388e 8076 spin_unlock_irq(&mchdev_lock);
eb48eb00
DV
8077
8078 return ret;
8079}
8080EXPORT_SYMBOL_GPL(i915_gpu_lower);
8081
8082/**
8083 * i915_gpu_busy - indicate GPU business to IPS
8084 *
8085 * Tell the IPS driver whether or not the GPU is busy.
8086 */
8087bool i915_gpu_busy(void)
8088{
eb48eb00
DV
8089 bool ret = false;
8090
9270388e 8091 spin_lock_irq(&mchdev_lock);
dcff85c8
CW
8092 if (i915_mch_dev)
8093 ret = i915_mch_dev->gt.awake;
9270388e 8094 spin_unlock_irq(&mchdev_lock);
eb48eb00
DV
8095
8096 return ret;
8097}
8098EXPORT_SYMBOL_GPL(i915_gpu_busy);
8099
8100/**
8101 * i915_gpu_turbo_disable - disable graphics turbo
8102 *
8103 * Disable graphics turbo by resetting the max frequency and setting the
8104 * current frequency to the default.
8105 */
8106bool i915_gpu_turbo_disable(void)
8107{
8108 struct drm_i915_private *dev_priv;
8109 bool ret = true;
8110
9270388e 8111 spin_lock_irq(&mchdev_lock);
eb48eb00
DV
8112 if (!i915_mch_dev) {
8113 ret = false;
8114 goto out_unlock;
8115 }
8116 dev_priv = i915_mch_dev;
8117
20e4d407 8118 dev_priv->ips.max_delay = dev_priv->ips.fstart;
eb48eb00 8119
91d14251 8120 if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
eb48eb00
DV
8121 ret = false;
8122
8123out_unlock:
9270388e 8124 spin_unlock_irq(&mchdev_lock);
eb48eb00
DV
8125
8126 return ret;
8127}
8128EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
8129
8130/**
8131 * Tells the intel_ips driver that the i915 driver is now loaded, if
8132 * IPS got loaded first.
8133 *
8134 * This awkward dance is so that neither module has to depend on the
8135 * other in order for IPS to do the appropriate communication of
8136 * GPU turbo limits to i915.
8137 */
8138static void
8139ips_ping_for_i915_load(void)
8140{
8141 void (*link)(void);
8142
8143 link = symbol_get(ips_link_to_i915_driver);
8144 if (link) {
8145 link();
8146 symbol_put(ips_link_to_i915_driver);
8147 }
8148}
8149
8150void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
8151{
02d71956
DV
8152 /* We only register the i915 ips part with intel-ips once everything is
8153 * set up, to avoid intel-ips sneaking in and reading bogus values. */
9270388e 8154 spin_lock_irq(&mchdev_lock);
eb48eb00 8155 i915_mch_dev = dev_priv;
9270388e 8156 spin_unlock_irq(&mchdev_lock);
eb48eb00
DV
8157
8158 ips_ping_for_i915_load();
8159}
8160
8161void intel_gpu_ips_teardown(void)
8162{
9270388e 8163 spin_lock_irq(&mchdev_lock);
eb48eb00 8164 i915_mch_dev = NULL;
9270388e 8165 spin_unlock_irq(&mchdev_lock);
eb48eb00 8166}
76c3552f 8167
dc97997a 8168static void intel_init_emon(struct drm_i915_private *dev_priv)
dde18883 8169{
dde18883
ED
8170 u32 lcfuse;
8171 u8 pxw[16];
8172 int i;
8173
8174 /* Disable to program */
8175 I915_WRITE(ECR, 0);
8176 POSTING_READ(ECR);
8177
8178 /* Program energy weights for various events */
8179 I915_WRITE(SDEW, 0x15040d00);
8180 I915_WRITE(CSIEW0, 0x007f0000);
8181 I915_WRITE(CSIEW1, 0x1e220004);
8182 I915_WRITE(CSIEW2, 0x04000004);
8183
8184 for (i = 0; i < 5; i++)
616847e7 8185 I915_WRITE(PEW(i), 0);
dde18883 8186 for (i = 0; i < 3; i++)
616847e7 8187 I915_WRITE(DEW(i), 0);
dde18883
ED
8188
8189 /* Program P-state weights to account for frequency power adjustment */
8190 for (i = 0; i < 16; i++) {
616847e7 8191 u32 pxvidfreq = I915_READ(PXVFREQ(i));
dde18883
ED
8192 unsigned long freq = intel_pxfreq(pxvidfreq);
8193 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
8194 PXVFREQ_PX_SHIFT;
8195 unsigned long val;
8196
8197 val = vid * vid;
8198 val *= (freq / 1000);
8199 val *= 255;
8200 val /= (127*127*900);
8201 if (val > 0xff)
8202 DRM_ERROR("bad pxval: %ld\n", val);
8203 pxw[i] = val;
8204 }
8205 /* Render standby states get 0 weight */
8206 pxw[14] = 0;
8207 pxw[15] = 0;
8208
8209 for (i = 0; i < 4; i++) {
8210 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
8211 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
616847e7 8212 I915_WRITE(PXW(i), val);
dde18883
ED
8213 }
8214
8215 /* Adjust magic regs to magic values (more experimental results) */
8216 I915_WRITE(OGW0, 0);
8217 I915_WRITE(OGW1, 0);
8218 I915_WRITE(EG0, 0x00007f00);
8219 I915_WRITE(EG1, 0x0000000e);
8220 I915_WRITE(EG2, 0x000e0000);
8221 I915_WRITE(EG3, 0x68000300);
8222 I915_WRITE(EG4, 0x42000000);
8223 I915_WRITE(EG5, 0x00140031);
8224 I915_WRITE(EG6, 0);
8225 I915_WRITE(EG7, 0);
8226
8227 for (i = 0; i < 8; i++)
616847e7 8228 I915_WRITE(PXWL(i), 0);
dde18883
ED
8229
8230 /* Enable PMON + select events */
8231 I915_WRITE(ECR, 0x80000019);
8232
8233 lcfuse = I915_READ(LCFUSE02);
8234
20e4d407 8235 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
dde18883
ED
8236}
8237
dc97997a 8238void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
ae48434c 8239{
562d9bae
SAK
8240 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8241
b268c699
ID
8242 /*
8243 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8244 * requirement.
8245 */
fb6db0f5 8246 if (!sanitize_rc6(dev_priv)) {
b268c699 8247 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
08ea70a4 8248 pm_runtime_get(&dev_priv->drm.pdev->dev);
b268c699 8249 }
e6069ca8 8250
9f817501 8251 mutex_lock(&dev_priv->pcu_lock);
773ea9a8
CW
8252
8253 /* Initialize RPS limits (for userspace) */
dc97997a
CW
8254 if (IS_CHERRYVIEW(dev_priv))
8255 cherryview_init_gt_powersave(dev_priv);
8256 else if (IS_VALLEYVIEW(dev_priv))
8257 valleyview_init_gt_powersave(dev_priv);
2a13ae79 8258 else if (INTEL_GEN(dev_priv) >= 6)
773ea9a8
CW
8259 gen6_init_rps_frequencies(dev_priv);
8260
8261 /* Derive initial user preferences/limits from the hardware limits */
562d9bae
SAK
8262 rps->idle_freq = rps->min_freq;
8263 rps->cur_freq = rps->idle_freq;
773ea9a8 8264
562d9bae
SAK
8265 rps->max_freq_softlimit = rps->max_freq;
8266 rps->min_freq_softlimit = rps->min_freq;
773ea9a8
CW
8267
8268 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
562d9bae 8269 rps->min_freq_softlimit =
773ea9a8 8270 max_t(int,
562d9bae 8271 rps->efficient_freq,
773ea9a8
CW
8272 intel_freq_opcode(dev_priv, 450));
8273
99ac9612 8274 /* After setting max-softlimit, find the overclock max freq */
cf819eff 8275 if (IS_GEN(dev_priv, 6) ||
99ac9612
CW
8276 IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8277 u32 params = 0;
8278
8279 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
8280 if (params & BIT(31)) { /* OC supported */
8281 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
562d9bae 8282 (rps->max_freq & 0xff) * 50,
99ac9612 8283 (params & 0xff) * 50);
562d9bae 8284 rps->max_freq = params & 0xff;
99ac9612
CW
8285 }
8286 }
8287
29ecd78d 8288 /* Finally allow us to boost to max by default */
562d9bae 8289 rps->boost_freq = rps->max_freq;
29ecd78d 8290
9f817501 8291 mutex_unlock(&dev_priv->pcu_lock);
ae48434c
ID
8292}
8293
dc97997a 8294void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
ae48434c 8295{
8dac1e1f 8296 if (IS_VALLEYVIEW(dev_priv))
dc97997a 8297 valleyview_cleanup_gt_powersave(dev_priv);
b268c699 8298
fb6db0f5 8299 if (!HAS_RC6(dev_priv))
08ea70a4 8300 pm_runtime_put(&dev_priv->drm.pdev->dev);
ae48434c
ID
8301}
8302
54b4f68f
CW
8303/**
8304 * intel_suspend_gt_powersave - suspend PM work and helper threads
8305 * @dev_priv: i915 device
8306 *
8307 * We don't want to disable RC6 or other features here, we just want
8308 * to make sure any work we've queued has finished and won't bother
8309 * us while we're suspended.
8310 */
8311void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
8312{
8313 if (INTEL_GEN(dev_priv) < 6)
8314 return;
8315
54b4f68f
CW
8316 /* gen6_rps_idle() will be called later to disable interrupts */
8317}
8318
b7137e0c
CW
8319void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8320{
37d933fc
SAK
8321 dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8322 dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
b7137e0c 8323 intel_disable_gt_powersave(dev_priv);
54b4f68f 8324
d02b98b8
OM
8325 if (INTEL_GEN(dev_priv) >= 11)
8326 gen11_reset_rps_interrupts(dev_priv);
61e1e376 8327 else if (INTEL_GEN(dev_priv) >= 6)
d02b98b8 8328 gen6_reset_rps_interrupts(dev_priv);
156c7ca0
JB
8329}
8330
0870a2a4
SAK
8331static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8332{
8333 lockdep_assert_held(&i915->pcu_lock);
8334
37d933fc
SAK
8335 if (!i915->gt_pm.llc_pstate.enabled)
8336 return;
8337
0870a2a4 8338 /* Currently there is no HW configuration to be done to disable. */
37d933fc
SAK
8339
8340 i915->gt_pm.llc_pstate.enabled = false;
0870a2a4
SAK
8341}
8342
fc77426a 8343static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8090c6b9 8344{
fc77426a 8345 lockdep_assert_held(&dev_priv->pcu_lock);
562d9bae 8346
37d933fc
SAK
8347 if (!dev_priv->gt_pm.rc6.enabled)
8348 return;
8349
fc77426a
SAK
8350 if (INTEL_GEN(dev_priv) >= 9)
8351 gen9_disable_rc6(dev_priv);
8352 else if (IS_CHERRYVIEW(dev_priv))
8353 cherryview_disable_rc6(dev_priv);
8354 else if (IS_VALLEYVIEW(dev_priv))
8355 valleyview_disable_rc6(dev_priv);
8356 else if (INTEL_GEN(dev_priv) >= 6)
8357 gen6_disable_rc6(dev_priv);
37d933fc
SAK
8358
8359 dev_priv->gt_pm.rc6.enabled = false;
fc77426a 8360}
e494837a 8361
fc77426a
SAK
8362static void intel_disable_rps(struct drm_i915_private *dev_priv)
8363{
8364 lockdep_assert_held(&dev_priv->pcu_lock);
e534770a 8365
37d933fc
SAK
8366 if (!dev_priv->gt_pm.rps.enabled)
8367 return;
8368
fc77426a 8369 if (INTEL_GEN(dev_priv) >= 9)
b7137e0c 8370 gen9_disable_rps(dev_priv);
fc77426a 8371 else if (IS_CHERRYVIEW(dev_priv))
b7137e0c 8372 cherryview_disable_rps(dev_priv);
fc77426a 8373 else if (IS_VALLEYVIEW(dev_priv))
b7137e0c 8374 valleyview_disable_rps(dev_priv);
fc77426a 8375 else if (INTEL_GEN(dev_priv) >= 6)
b7137e0c 8376 gen6_disable_rps(dev_priv);
fc77426a 8377 else if (IS_IRONLAKE_M(dev_priv))
b7137e0c 8378 ironlake_disable_drps(dev_priv);
37d933fc
SAK
8379
8380 dev_priv->gt_pm.rps.enabled = false;
fc77426a
SAK
8381}
8382
8383void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8384{
fc77426a 8385 mutex_lock(&dev_priv->pcu_lock);
b7137e0c 8386
fc77426a
SAK
8387 intel_disable_rc6(dev_priv);
8388 intel_disable_rps(dev_priv);
0870a2a4
SAK
8389 if (HAS_LLC(dev_priv))
8390 intel_disable_llc_pstate(dev_priv);
8391
9f817501 8392 mutex_unlock(&dev_priv->pcu_lock);
8090c6b9
DV
8393}
8394
0870a2a4
SAK
8395static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8396{
8397 lockdep_assert_held(&i915->pcu_lock);
8398
37d933fc
SAK
8399 if (i915->gt_pm.llc_pstate.enabled)
8400 return;
8401
0870a2a4 8402 gen6_update_ring_freq(i915);
37d933fc
SAK
8403
8404 i915->gt_pm.llc_pstate.enabled = true;
0870a2a4
SAK
8405}
8406
fc77426a 8407static void intel_enable_rc6(struct drm_i915_private *dev_priv)
1a01ab3b 8408{
fc77426a 8409 lockdep_assert_held(&dev_priv->pcu_lock);
562d9bae 8410
37d933fc
SAK
8411 if (dev_priv->gt_pm.rc6.enabled)
8412 return;
8413
fc77426a
SAK
8414 if (IS_CHERRYVIEW(dev_priv))
8415 cherryview_enable_rc6(dev_priv);
8416 else if (IS_VALLEYVIEW(dev_priv))
8417 valleyview_enable_rc6(dev_priv);
8418 else if (INTEL_GEN(dev_priv) >= 9)
8419 gen9_enable_rc6(dev_priv);
8420 else if (IS_BROADWELL(dev_priv))
8421 gen8_enable_rc6(dev_priv);
8422 else if (INTEL_GEN(dev_priv) >= 6)
8423 gen6_enable_rc6(dev_priv);
37d933fc
SAK
8424
8425 dev_priv->gt_pm.rc6.enabled = true;
fc77426a 8426}
1a01ab3b 8427
fc77426a
SAK
8428static void intel_enable_rps(struct drm_i915_private *dev_priv)
8429{
8430 struct intel_rps *rps = &dev_priv->gt_pm.rps;
0a073b84 8431
fc77426a 8432 lockdep_assert_held(&dev_priv->pcu_lock);
dc97997a 8433
37d933fc
SAK
8434 if (rps->enabled)
8435 return;
8436
dc97997a
CW
8437 if (IS_CHERRYVIEW(dev_priv)) {
8438 cherryview_enable_rps(dev_priv);
8439 } else if (IS_VALLEYVIEW(dev_priv)) {
8440 valleyview_enable_rps(dev_priv);
b7137e0c 8441 } else if (INTEL_GEN(dev_priv) >= 9) {
dc97997a 8442 gen9_enable_rps(dev_priv);
dc97997a
CW
8443 } else if (IS_BROADWELL(dev_priv)) {
8444 gen8_enable_rps(dev_priv);
b7137e0c 8445 } else if (INTEL_GEN(dev_priv) >= 6) {
dc97997a 8446 gen6_enable_rps(dev_priv);
b7137e0c
CW
8447 } else if (IS_IRONLAKE_M(dev_priv)) {
8448 ironlake_enable_drps(dev_priv);
8449 intel_init_emon(dev_priv);
0a073b84 8450 }
aed242ff 8451
562d9bae
SAK
8452 WARN_ON(rps->max_freq < rps->min_freq);
8453 WARN_ON(rps->idle_freq > rps->max_freq);
aed242ff 8454
562d9bae
SAK
8455 WARN_ON(rps->efficient_freq < rps->min_freq);
8456 WARN_ON(rps->efficient_freq > rps->max_freq);
37d933fc
SAK
8457
8458 rps->enabled = true;
fc77426a
SAK
8459}
8460
8461void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8462{
fc77426a
SAK
8463 /* Powersaving is controlled by the host when inside a VM */
8464 if (intel_vgpu_active(dev_priv))
8465 return;
8466
8467 mutex_lock(&dev_priv->pcu_lock);
8468
fb6db0f5
CW
8469 if (HAS_RC6(dev_priv))
8470 intel_enable_rc6(dev_priv);
fc77426a
SAK
8471 intel_enable_rps(dev_priv);
8472 if (HAS_LLC(dev_priv))
8473 intel_enable_llc_pstate(dev_priv);
aed242ff 8474
9f817501 8475 mutex_unlock(&dev_priv->pcu_lock);
b7137e0c 8476}
3cc134e3 8477
46f16e63 8478static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
3107bd48 8479{
3107bd48
DV
8480 /*
8481 * On Ibex Peak and Cougar Point, we need to disable clock
8482 * gating for the panel power sequencer or it will fail to
8483 * start up when no ports are active.
8484 */
8485 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8486}
8487
46f16e63 8488static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
0e088b8f 8489{
b12ce1d8 8490 enum pipe pipe;
0e088b8f 8491
055e393f 8492 for_each_pipe(dev_priv, pipe) {
0e088b8f
VS
8493 I915_WRITE(DSPCNTR(pipe),
8494 I915_READ(DSPCNTR(pipe)) |
8495 DISPPLANE_TRICKLE_FEED_DISABLE);
b12ce1d8
VS
8496
8497 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8498 POSTING_READ(DSPSURF(pipe));
0e088b8f
VS
8499 }
8500}
8501
91200c09 8502static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 8503{
231e54f6 8504 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6f1d69b0 8505
f1e8fa56
DL
8506 /*
8507 * Required for FBC
8508 * WaFbcDisableDpfcClockGating:ilk
8509 */
4d47e4f5
DL
8510 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8511 ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8512 ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
6f1d69b0
ED
8513
8514 I915_WRITE(PCH_3DCGDIS0,
8515 MARIUNIT_CLOCK_GATE_DISABLE |
8516 SVSMUNIT_CLOCK_GATE_DISABLE);
8517 I915_WRITE(PCH_3DCGDIS1,
8518 VFMUNIT_CLOCK_GATE_DISABLE);
8519
6f1d69b0
ED
8520 /*
8521 * According to the spec the following bits should be set in
8522 * order to enable memory self-refresh
8523 * The bit 22/21 of 0x42004
8524 * The bit 5 of 0x42020
8525 * The bit 15 of 0x45000
8526 */
8527 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8528 (I915_READ(ILK_DISPLAY_CHICKEN2) |
8529 ILK_DPARB_GATE | ILK_VSDPFD_FULL));
4d47e4f5 8530 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
6f1d69b0
ED
8531 I915_WRITE(DISP_ARB_CTL,
8532 (I915_READ(DISP_ARB_CTL) |
8533 DISP_FBC_WM_DIS));
017636cc 8534
6f1d69b0
ED
8535 /*
8536 * Based on the document from hardware guys the following bits
8537 * should be set unconditionally in order to enable FBC.
8538 * The bit 22 of 0x42000
8539 * The bit 22 of 0x42004
8540 * The bit 7,8,9 of 0x42020.
8541 */
50a0bc90 8542 if (IS_IRONLAKE_M(dev_priv)) {
4bb35334 8543 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
6f1d69b0
ED
8544 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8545 I915_READ(ILK_DISPLAY_CHICKEN1) |
8546 ILK_FBCQ_DIS);
8547 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8548 I915_READ(ILK_DISPLAY_CHICKEN2) |
8549 ILK_DPARB_GATE);
6f1d69b0
ED
8550 }
8551
4d47e4f5
DL
8552 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8553
6f1d69b0
ED
8554 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8555 I915_READ(ILK_DISPLAY_CHICKEN2) |
8556 ILK_ELPIN_409_SELECT);
8557 I915_WRITE(_3D_CHICKEN2,
8558 _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8559 _3D_CHICKEN2_WM_READ_PIPELINED);
4358a374 8560
ecdb4eb7 8561 /* WaDisableRenderCachePipelinedFlush:ilk */
4358a374
DV
8562 I915_WRITE(CACHE_MODE_0,
8563 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
3107bd48 8564
4e04632e
AG
8565 /* WaDisable_RenderCache_OperationalFlush:ilk */
8566 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8567
46f16e63 8568 g4x_disable_trickle_feed(dev_priv);
bdad2b2f 8569
46f16e63 8570 ibx_init_clock_gating(dev_priv);
3107bd48
DV
8571}
8572
46f16e63 8573static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
3107bd48 8574{
3107bd48 8575 int pipe;
3f704fa2 8576 uint32_t val;
3107bd48
DV
8577
8578 /*
8579 * On Ibex Peak and Cougar Point, we need to disable clock
8580 * gating for the panel power sequencer or it will fail to
8581 * start up when no ports are active.
8582 */
cd664078
JB
8583 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8584 PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8585 PCH_CPUNIT_CLOCK_GATE_DISABLE);
3107bd48
DV
8586 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8587 DPLS_EDP_PPS_FIX_DIS);
335c07b7
TI
8588 /* The below fixes the weird display corruption, a few pixels shifted
8589 * downward, on (only) LVDS of some HP laptops with IVY.
8590 */
055e393f 8591 for_each_pipe(dev_priv, pipe) {
dc4bd2d1
PZ
8592 val = I915_READ(TRANS_CHICKEN2(pipe));
8593 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8594 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
41aa3448 8595 if (dev_priv->vbt.fdi_rx_polarity_inverted)
3f704fa2 8596 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
dc4bd2d1
PZ
8597 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8598 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8599 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
3f704fa2
PZ
8600 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8601 }
3107bd48 8602 /* WADP0ClockGatingDisable */
055e393f 8603 for_each_pipe(dev_priv, pipe) {
3107bd48
DV
8604 I915_WRITE(TRANS_CHICKEN1(pipe),
8605 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8606 }
6f1d69b0
ED
8607}
8608
46f16e63 8609static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
1d7aaa0c 8610{
1d7aaa0c
DV
8611 uint32_t tmp;
8612
8613 tmp = I915_READ(MCH_SSKPD);
df662a28
DV
8614 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8615 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8616 tmp);
1d7aaa0c
DV
8617}
8618
46f16e63 8619static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 8620{
231e54f6 8621 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6f1d69b0 8622
231e54f6 8623 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6f1d69b0
ED
8624
8625 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8626 I915_READ(ILK_DISPLAY_CHICKEN2) |
8627 ILK_ELPIN_409_SELECT);
8628
ecdb4eb7 8629 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
4283908e
DV
8630 I915_WRITE(_3D_CHICKEN,
8631 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8632
4e04632e
AG
8633 /* WaDisable_RenderCache_OperationalFlush:snb */
8634 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8635
8d85d272
VS
8636 /*
8637 * BSpec recoomends 8x4 when MSAA is used,
8638 * however in practice 16x4 seems fastest.
c5c98a58
VS
8639 *
8640 * Note that PS/WM thread counts depend on the WIZ hashing
8641 * disable bit, which we don't touch here, but it's good
8642 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8d85d272
VS
8643 */
8644 I915_WRITE(GEN6_GT_MODE,
98533251 8645 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8d85d272 8646
6f1d69b0 8647 I915_WRITE(CACHE_MODE_0,
50743298 8648 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
6f1d69b0
ED
8649
8650 I915_WRITE(GEN6_UCGCTL1,
8651 I915_READ(GEN6_UCGCTL1) |
8652 GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8653 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8654
8655 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8656 * gating disable must be set. Failure to set it results in
8657 * flickering pixels due to Z write ordering failures after
8658 * some amount of runtime in the Mesa "fire" demo, and Unigine
8659 * Sanctuary and Tropics, and apparently anything else with
8660 * alpha test or pixel discard.
8661 *
8662 * According to the spec, bit 11 (RCCUNIT) must also be set,
8663 * but we didn't debug actual testcases to find it out.
0f846f81 8664 *
ef59318c
VS
8665 * WaDisableRCCUnitClockGating:snb
8666 * WaDisableRCPBUnitClockGating:snb
6f1d69b0
ED
8667 */
8668 I915_WRITE(GEN6_UCGCTL2,
8669 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8670 GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8671
5eb146dd 8672 /* WaStripsFansDisableFastClipPerformanceFix:snb */
743b57d8
VS
8673 I915_WRITE(_3D_CHICKEN3,
8674 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
6f1d69b0 8675
e927ecde
VS
8676 /*
8677 * Bspec says:
8678 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8679 * 3DSTATE_SF number of SF output attributes is more than 16."
8680 */
8681 I915_WRITE(_3D_CHICKEN3,
8682 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8683
6f1d69b0
ED
8684 /*
8685 * According to the spec the following bits should be
8686 * set in order to enable memory self-refresh and fbc:
8687 * The bit21 and bit22 of 0x42000
8688 * The bit21 and bit22 of 0x42004
8689 * The bit5 and bit7 of 0x42020
8690 * The bit14 of 0x70180
8691 * The bit14 of 0x71180
4bb35334
DL
8692 *
8693 * WaFbcAsynchFlipDisableFbcQueue:snb
6f1d69b0
ED
8694 */
8695 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8696 I915_READ(ILK_DISPLAY_CHICKEN1) |
8697 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8698 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8699 I915_READ(ILK_DISPLAY_CHICKEN2) |
8700 ILK_DPARB_GATE | ILK_VSDPFD_FULL);
231e54f6
DL
8701 I915_WRITE(ILK_DSPCLK_GATE_D,
8702 I915_READ(ILK_DSPCLK_GATE_D) |
8703 ILK_DPARBUNIT_CLOCK_GATE_ENABLE |
8704 ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
6f1d69b0 8705
46f16e63 8706 g4x_disable_trickle_feed(dev_priv);
f8f2ac9a 8707
46f16e63 8708 cpt_init_clock_gating(dev_priv);
1d7aaa0c 8709
46f16e63 8710 gen6_check_mch_setup(dev_priv);
6f1d69b0
ED
8711}
8712
8713static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8714{
8715 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
8716
3aad9059 8717 /*
46680e0a 8718 * WaVSThreadDispatchOverride:ivb,vlv
3aad9059
VS
8719 *
8720 * This actually overrides the dispatch
8721 * mode for all thread types.
8722 */
6f1d69b0
ED
8723 reg &= ~GEN7_FF_SCHED_MASK;
8724 reg |= GEN7_FF_TS_SCHED_HW;
8725 reg |= GEN7_FF_VS_SCHED_HW;
8726 reg |= GEN7_FF_DS_SCHED_HW;
8727
8728 I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8729}
8730
46f16e63 8731static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
17a303ec 8732{
17a303ec
PZ
8733 /*
8734 * TODO: this bit should only be enabled when really needed, then
8735 * disabled when not needed anymore in order to save power.
8736 */
4f8036a2 8737 if (HAS_PCH_LPT_LP(dev_priv))
17a303ec
PZ
8738 I915_WRITE(SOUTH_DSPCLK_GATE_D,
8739 I915_READ(SOUTH_DSPCLK_GATE_D) |
8740 PCH_LP_PARTITION_LEVEL_DISABLE);
0a790cdb
PZ
8741
8742 /* WADPOClockGatingDisable:hsw */
36c0d0cf
VS
8743 I915_WRITE(TRANS_CHICKEN1(PIPE_A),
8744 I915_READ(TRANS_CHICKEN1(PIPE_A)) |
0a790cdb 8745 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
17a303ec
PZ
8746}
8747
712bf364 8748static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
7d708ee4 8749{
4f8036a2 8750 if (HAS_PCH_LPT_LP(dev_priv)) {
7d708ee4
ID
8751 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
8752
8753 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
8754 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
8755 }
8756}
8757
450174fe
ID
8758static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
8759 int general_prio_credits,
8760 int high_prio_credits)
8761{
8762 u32 misccpctl;
930a784d 8763 u32 val;
450174fe
ID
8764
8765 /* WaTempDisableDOPClkGating:bdw */
8766 misccpctl = I915_READ(GEN7_MISCCPCTL);
8767 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
8768
930a784d
OM
8769 val = I915_READ(GEN8_L3SQCREG1);
8770 val &= ~L3_PRIO_CREDITS_MASK;
8771 val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
8772 val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
8773 I915_WRITE(GEN8_L3SQCREG1, val);
450174fe
ID
8774
8775 /*
8776 * Wait at least 100 clocks before re-enabling clock gating.
8777 * See the definition of L3SQCREG1 in BSpec.
8778 */
8779 POSTING_READ(GEN8_L3SQCREG1);
8780 udelay(1);
8781 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
8782}
8783
d65dc3e4
OM
8784static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
8785{
8786 /* This is not an Wa. Enable to reduce Sampler power */
8787 I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
8788 I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
622b3f68
RS
8789
8790 /* WaEnable32PlaneMode:icl */
8791 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
8792 _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE));
d65dc3e4
OM
8793}
8794
0a46ddd5
RV
8795static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
8796{
8797 if (!HAS_PCH_CNP(dev_priv))
8798 return;
8799
470e7c61 8800 /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
4cc6feb7
RV
8801 I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
8802 CNP_PWM_CGE_GATING_DISABLE);
0a46ddd5
RV
8803}
8804
91200c09 8805static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
90007bca 8806{
8f067837 8807 u32 val;
0a46ddd5
RV
8808 cnp_init_clock_gating(dev_priv);
8809
1a25db65
RV
8810 /* This is not an Wa. Enable for better image quality */
8811 I915_WRITE(_3D_CHICKEN3,
8812 _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
8813
90007bca
RV
8814 /* WaEnableChickenDCPR:cnl */
8815 I915_WRITE(GEN8_CHICKEN_DCPR_1,
8816 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
8817
8818 /* WaFbcWakeMemOn:cnl */
8819 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
8820 DISP_FBC_MEMORY_WAKE);
8821
34991bd4
CW
8822 val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
8823 /* ReadHitWriteOnlyDisable:cnl */
8824 val |= RCCUNIT_CLKGATE_DIS;
90007bca
RV
8825 /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
8826 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
34991bd4
CW
8827 val |= SARBUNIT_CLKGATE_DIS;
8828 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
01ab0f92 8829
a4713c5a
RV
8830 /* Wa_2201832410:cnl */
8831 val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
8832 val |= GWUNIT_CLKGATE_DIS;
8833 I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
8834
01ab0f92 8835 /* WaDisableVFclkgate:cnl */
14941b6e 8836 /* WaVFUnitClockGatingDisable:cnl */
01ab0f92
RA
8837 val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
8838 val |= VFUNIT_CLKGATE_DIS;
8839 I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
90007bca
RV
8840}
8841
0a46ddd5
RV
8842static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
8843{
8844 cnp_init_clock_gating(dev_priv);
8845 gen9_init_clock_gating(dev_priv);
8846
8847 /* WaFbcNukeOnHostModify:cfl */
8848 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8849 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8850}
8851
91200c09 8852static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
9498dba7 8853{
46f16e63 8854 gen9_init_clock_gating(dev_priv);
9498dba7
MK
8855
8856 /* WaDisableSDEUnitClockGating:kbl */
8857 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8858 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8859 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8aeb7f62
MK
8860
8861 /* WaDisableGamClockGating:kbl */
8862 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8863 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
8864 GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
031cd8c8 8865
0a46ddd5 8866 /* WaFbcNukeOnHostModify:kbl */
031cd8c8
MK
8867 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8868 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9498dba7
MK
8869}
8870
91200c09 8871static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
dc00b6a0 8872{
46f16e63 8873 gen9_init_clock_gating(dev_priv);
44fff99f
MK
8874
8875 /* WAC6entrylatency:skl */
8876 I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
8877 FBC_LLC_FULLY_OPEN);
031cd8c8
MK
8878
8879 /* WaFbcNukeOnHostModify:skl */
8880 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8881 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
dc00b6a0
DV
8882}
8883
91200c09 8884static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
1020a5c2 8885{
8cb09836
MA
8886 /* The GTT cache must be disabled if the system is using 2M pages. */
8887 bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
8888 I915_GTT_PAGE_SIZE_2M);
07d27e20 8889 enum pipe pipe;
1020a5c2 8890
ab57fff1 8891 /* WaSwitchSolVfFArbitrationPriority:bdw */
50ed5fbd 8892 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
fe4ab3ce 8893
ab57fff1 8894 /* WaPsrDPAMaskVBlankInSRD:bdw */
fe4ab3ce
BW
8895 I915_WRITE(CHICKEN_PAR1_1,
8896 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
8897
ab57fff1 8898 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
055e393f 8899 for_each_pipe(dev_priv, pipe) {
07d27e20 8900 I915_WRITE(CHICKEN_PIPESL_1(pipe),
c7c65622 8901 I915_READ(CHICKEN_PIPESL_1(pipe)) |
8f670bb1 8902 BDW_DPRS_MASK_VBLANK_SRD);
fe4ab3ce 8903 }
63801f21 8904
ab57fff1
BW
8905 /* WaVSRefCountFullforceMissDisable:bdw */
8906 /* WaDSRefCountFullforceMissDisable:bdw */
8907 I915_WRITE(GEN7_FF_THREAD_MODE,
8908 I915_READ(GEN7_FF_THREAD_MODE) &
8909 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
36075a4c 8910
295e8bb7
VS
8911 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
8912 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
4f1ca9e9
VS
8913
8914 /* WaDisableSDEUnitClockGating:bdw */
8915 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8916 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
5d708680 8917
450174fe
ID
8918 /* WaProgramL3SqcReg1Default:bdw */
8919 gen8_set_l3sqc_credits(dev_priv, 30, 2);
4d487cff 8920
8cb09836
MA
8921 /* WaGttCachingOffByDefault:bdw */
8922 I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
6d50b065 8923
17e0adf0
MK
8924 /* WaKVMNotificationOnConfigChange:bdw */
8925 I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
8926 | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
8927
46f16e63 8928 lpt_init_clock_gating(dev_priv);
9cc19733
RB
8929
8930 /* WaDisableDopClockGating:bdw
8931 *
8932 * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
8933 * clock gating.
8934 */
8935 I915_WRITE(GEN6_UCGCTL1,
8936 I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
1020a5c2
BW
8937}
8938
91200c09 8939static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
cad2a2d7 8940{
f3fc4884
FJ
8941 /* L3 caching of data atomics doesn't work -- disable it. */
8942 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
8943 I915_WRITE(HSW_ROW_CHICKEN3,
8944 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
8945
ecdb4eb7 8946 /* This is required by WaCatErrorRejectionIssue:hsw */
cad2a2d7
ED
8947 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
8948 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
8949 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
8950
e36ea7ff
VS
8951 /* WaVSRefCountFullforceMissDisable:hsw */
8952 I915_WRITE(GEN7_FF_THREAD_MODE,
8953 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
cad2a2d7 8954
4e04632e
AG
8955 /* WaDisable_RenderCache_OperationalFlush:hsw */
8956 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8957
fe27c606
CW
8958 /* enable HiZ Raw Stall Optimization */
8959 I915_WRITE(CACHE_MODE_0_GEN7,
8960 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
8961
ecdb4eb7 8962 /* WaDisable4x2SubspanOptimization:hsw */
cad2a2d7
ED
8963 I915_WRITE(CACHE_MODE_1,
8964 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
1544d9d5 8965
a12c4967
VS
8966 /*
8967 * BSpec recommends 8x4 when MSAA is used,
8968 * however in practice 16x4 seems fastest.
c5c98a58
VS
8969 *
8970 * Note that PS/WM thread counts depend on the WIZ hashing
8971 * disable bit, which we don't touch here, but it's good
8972 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
a12c4967
VS
8973 */
8974 I915_WRITE(GEN7_GT_MODE,
98533251 8975 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
a12c4967 8976
94411593
KG
8977 /* WaSampleCChickenBitEnable:hsw */
8978 I915_WRITE(HALF_SLICE_CHICKEN3,
8979 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
8980
ecdb4eb7 8981 /* WaSwitchSolVfFArbitrationPriority:hsw */
e3dff585
BW
8982 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8983
46f16e63 8984 lpt_init_clock_gating(dev_priv);
cad2a2d7
ED
8985}
8986
91200c09 8987static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 8988{
20848223 8989 uint32_t snpcr;
6f1d69b0 8990
231e54f6 8991 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6f1d69b0 8992
ecdb4eb7 8993 /* WaDisableEarlyCull:ivb */
87f8020e
JB
8994 I915_WRITE(_3D_CHICKEN3,
8995 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
8996
ecdb4eb7 8997 /* WaDisableBackToBackFlipFix:ivb */
6f1d69b0
ED
8998 I915_WRITE(IVB_CHICKEN3,
8999 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9000 CHICKEN3_DGMG_DONE_FIX_DISABLE);
9001
ecdb4eb7 9002 /* WaDisablePSDDualDispatchEnable:ivb */
50a0bc90 9003 if (IS_IVB_GT1(dev_priv))
12f3382b
JB
9004 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9005 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
12f3382b 9006
4e04632e
AG
9007 /* WaDisable_RenderCache_OperationalFlush:ivb */
9008 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9009
ecdb4eb7 9010 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6f1d69b0
ED
9011 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
9012 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
9013
ecdb4eb7 9014 /* WaApplyL3ControlAndL3ChickenMode:ivb */
6f1d69b0
ED
9015 I915_WRITE(GEN7_L3CNTLREG1,
9016 GEN7_WA_FOR_GEN7_L3_CONTROL);
9017 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
8ab43976 9018 GEN7_WA_L3_CHICKEN_MODE);
50a0bc90 9019 if (IS_IVB_GT1(dev_priv))
8ab43976
JB
9020 I915_WRITE(GEN7_ROW_CHICKEN2,
9021 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
412236c2
VS
9022 else {
9023 /* must write both registers */
9024 I915_WRITE(GEN7_ROW_CHICKEN2,
9025 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8ab43976
JB
9026 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
9027 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
412236c2 9028 }
6f1d69b0 9029
ecdb4eb7 9030 /* WaForceL3Serialization:ivb */
61939d97
JB
9031 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9032 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9033
1b80a19a 9034 /*
0f846f81 9035 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
ecdb4eb7 9036 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
0f846f81
JB
9037 */
9038 I915_WRITE(GEN6_UCGCTL2,
28acf3b2 9039 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
0f846f81 9040
ecdb4eb7 9041 /* This is required by WaCatErrorRejectionIssue:ivb */
6f1d69b0
ED
9042 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9043 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9044 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9045
46f16e63 9046 g4x_disable_trickle_feed(dev_priv);
6f1d69b0
ED
9047
9048 gen7_setup_fixed_func_scheduler(dev_priv);
97e1930f 9049
22721343
CW
9050 if (0) { /* causes HiZ corruption on ivb:gt1 */
9051 /* enable HiZ Raw Stall Optimization */
9052 I915_WRITE(CACHE_MODE_0_GEN7,
9053 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9054 }
116f2b6d 9055
ecdb4eb7 9056 /* WaDisable4x2SubspanOptimization:ivb */
97e1930f
DV
9057 I915_WRITE(CACHE_MODE_1,
9058 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
20848223 9059
a607c1a4
VS
9060 /*
9061 * BSpec recommends 8x4 when MSAA is used,
9062 * however in practice 16x4 seems fastest.
c5c98a58
VS
9063 *
9064 * Note that PS/WM thread counts depend on the WIZ hashing
9065 * disable bit, which we don't touch here, but it's good
9066 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
a607c1a4
VS
9067 */
9068 I915_WRITE(GEN7_GT_MODE,
98533251 9069 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
a607c1a4 9070
20848223
BW
9071 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
9072 snpcr &= ~GEN6_MBC_SNPCR_MASK;
9073 snpcr |= GEN6_MBC_SNPCR_MED;
9074 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
3107bd48 9075
6e266956 9076 if (!HAS_PCH_NOP(dev_priv))
46f16e63 9077 cpt_init_clock_gating(dev_priv);
1d7aaa0c 9078
46f16e63 9079 gen6_check_mch_setup(dev_priv);
6f1d69b0
ED
9080}
9081
91200c09 9082static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9083{
ecdb4eb7 9084 /* WaDisableEarlyCull:vlv */
87f8020e
JB
9085 I915_WRITE(_3D_CHICKEN3,
9086 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9087
ecdb4eb7 9088 /* WaDisableBackToBackFlipFix:vlv */
6f1d69b0
ED
9089 I915_WRITE(IVB_CHICKEN3,
9090 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9091 CHICKEN3_DGMG_DONE_FIX_DISABLE);
9092
fad7d36e 9093 /* WaPsdDispatchEnable:vlv */
ecdb4eb7 9094 /* WaDisablePSDDualDispatchEnable:vlv */
12f3382b 9095 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
d3bc0303
JB
9096 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
9097 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
12f3382b 9098
4e04632e
AG
9099 /* WaDisable_RenderCache_OperationalFlush:vlv */
9100 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9101
ecdb4eb7 9102 /* WaForceL3Serialization:vlv */
61939d97
JB
9103 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9104 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9105
ecdb4eb7 9106 /* WaDisableDopClockGating:vlv */
8ab43976
JB
9107 I915_WRITE(GEN7_ROW_CHICKEN2,
9108 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9109
ecdb4eb7 9110 /* This is required by WaCatErrorRejectionIssue:vlv */
6f1d69b0
ED
9111 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9112 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9113 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9114
46680e0a
VS
9115 gen7_setup_fixed_func_scheduler(dev_priv);
9116
3c0edaeb 9117 /*
0f846f81 9118 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
ecdb4eb7 9119 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
0f846f81
JB
9120 */
9121 I915_WRITE(GEN6_UCGCTL2,
3c0edaeb 9122 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
0f846f81 9123
c98f5062
AG
9124 /* WaDisableL3Bank2xClockGate:vlv
9125 * Disabling L3 clock gating- MMIO 940c[25] = 1
9126 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9127 I915_WRITE(GEN7_UCGCTL4,
9128 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
e3f33d46 9129
afd58e79
VS
9130 /*
9131 * BSpec says this must be set, even though
9132 * WaDisable4x2SubspanOptimization isn't listed for VLV.
9133 */
6b26c86d
DV
9134 I915_WRITE(CACHE_MODE_1,
9135 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
7983117f 9136
da2518f9
VS
9137 /*
9138 * BSpec recommends 8x4 when MSAA is used,
9139 * however in practice 16x4 seems fastest.
9140 *
9141 * Note that PS/WM thread counts depend on the WIZ hashing
9142 * disable bit, which we don't touch here, but it's good
9143 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9144 */
9145 I915_WRITE(GEN7_GT_MODE,
9146 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9147
031994ee
VS
9148 /*
9149 * WaIncreaseL3CreditsForVLVB0:vlv
9150 * This is the hardware default actually.
9151 */
9152 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
9153
2d809570 9154 /*
ecdb4eb7 9155 * WaDisableVLVClockGating_VBIIssue:vlv
2d809570
JB
9156 * Disable clock gating on th GCFG unit to prevent a delay
9157 * in the reporting of vblank events.
9158 */
7a0d1eed 9159 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
6f1d69b0
ED
9160}
9161
91200c09 9162static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
a4565da8 9163{
232ce337
VS
9164 /* WaVSRefCountFullforceMissDisable:chv */
9165 /* WaDSRefCountFullforceMissDisable:chv */
9166 I915_WRITE(GEN7_FF_THREAD_MODE,
9167 I915_READ(GEN7_FF_THREAD_MODE) &
9168 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
acea6f95
VS
9169
9170 /* WaDisableSemaphoreAndSyncFlipWait:chv */
9171 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9172 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
0846697c
VS
9173
9174 /* WaDisableCSUnitClockGating:chv */
9175 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9176 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
c631780f
VS
9177
9178 /* WaDisableSDEUnitClockGating:chv */
9179 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9180 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6d50b065 9181
450174fe
ID
9182 /*
9183 * WaProgramL3SqcReg1Default:chv
9184 * See gfxspecs/Related Documents/Performance Guide/
9185 * LSQC Setting Recommendations.
9186 */
9187 gen8_set_l3sqc_credits(dev_priv, 38, 2);
9188
6d50b065
VS
9189 /*
9190 * GTT cache may not work with big pages, so if those
9191 * are ever enabled GTT cache may need to be disabled.
9192 */
9193 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
a4565da8
VS
9194}
9195
46f16e63 9196static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9197{
6f1d69b0
ED
9198 uint32_t dspclk_gate;
9199
9200 I915_WRITE(RENCLK_GATE_D1, 0);
9201 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9202 GS_UNIT_CLOCK_GATE_DISABLE |
9203 CL_UNIT_CLOCK_GATE_DISABLE);
9204 I915_WRITE(RAMCLK_GATE_D, 0);
9205 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9206 OVRUNIT_CLOCK_GATE_DISABLE |
9207 OVCUNIT_CLOCK_GATE_DISABLE;
50a0bc90 9208 if (IS_GM45(dev_priv))
6f1d69b0
ED
9209 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9210 I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
4358a374
DV
9211
9212 /* WaDisableRenderCachePipelinedFlush */
9213 I915_WRITE(CACHE_MODE_0,
9214 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
de1aa629 9215
4e04632e
AG
9216 /* WaDisable_RenderCache_OperationalFlush:g4x */
9217 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9218
46f16e63 9219 g4x_disable_trickle_feed(dev_priv);
6f1d69b0
ED
9220}
9221
91200c09 9222static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9223{
6f1d69b0
ED
9224 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9225 I915_WRITE(RENCLK_GATE_D2, 0);
9226 I915_WRITE(DSPCLK_GATE_D, 0);
9227 I915_WRITE(RAMCLK_GATE_D, 0);
9228 I915_WRITE16(DEUC, 0);
20f94967
VS
9229 I915_WRITE(MI_ARB_STATE,
9230 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
4e04632e
AG
9231
9232 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9233 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6f1d69b0
ED
9234}
9235
91200c09 9236static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9237{
6f1d69b0
ED
9238 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9239 I965_RCC_CLOCK_GATE_DISABLE |
9240 I965_RCPB_CLOCK_GATE_DISABLE |
9241 I965_ISC_CLOCK_GATE_DISABLE |
9242 I965_FBC_CLOCK_GATE_DISABLE);
9243 I915_WRITE(RENCLK_GATE_D2, 0);
20f94967
VS
9244 I915_WRITE(MI_ARB_STATE,
9245 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
4e04632e
AG
9246
9247 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9248 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6f1d69b0
ED
9249}
9250
46f16e63 9251static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9252{
6f1d69b0
ED
9253 u32 dstate = I915_READ(D_STATE);
9254
9255 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9256 DSTATE_DOT_CLOCK_GATING;
9257 I915_WRITE(D_STATE, dstate);
13a86b85 9258
9b1e14f4 9259 if (IS_PINEVIEW(dev_priv))
13a86b85 9260 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
974a3b0f
DV
9261
9262 /* IIR "flip pending" means done if this bit is set */
9263 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
12fabbcb
VS
9264
9265 /* interrupts should cause a wake up from C3 */
3299254f 9266 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
dbb42748
VS
9267
9268 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9269 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
1038392b
VS
9270
9271 I915_WRITE(MI_ARB_STATE,
9272 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6f1d69b0
ED
9273}
9274
46f16e63 9275static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9276{
6f1d69b0 9277 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
54e472ae
VS
9278
9279 /* interrupts should cause a wake up from C3 */
9280 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9281 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
1038392b
VS
9282
9283 I915_WRITE(MEM_MODE,
9284 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
6f1d69b0
ED
9285}
9286
46f16e63 9287static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9288{
1038392b
VS
9289 I915_WRITE(MEM_MODE,
9290 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9291 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
6f1d69b0
ED
9292}
9293
46f16e63 9294void intel_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9295{
46f16e63 9296 dev_priv->display.init_clock_gating(dev_priv);
6f1d69b0
ED
9297}
9298
712bf364 9299void intel_suspend_hw(struct drm_i915_private *dev_priv)
7d708ee4 9300{
712bf364
VS
9301 if (HAS_PCH_LPT(dev_priv))
9302 lpt_suspend_hw(dev_priv);
7d708ee4
ID
9303}
9304
46f16e63 9305static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
bb400da9
ID
9306{
9307 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9308}
9309
9310/**
9311 * intel_init_clock_gating_hooks - setup the clock gating hooks
9312 * @dev_priv: device private
9313 *
9314 * Setup the hooks that configure which clocks of a given platform can be
9315 * gated and also apply various GT and display specific workarounds for these
9316 * platforms. Note that some GT specific workarounds are applied separately
9317 * when GPU contexts or batchbuffers start their execution.
9318 */
9319void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9320{
cc38cae7 9321 if (IS_ICELAKE(dev_priv))
d65dc3e4 9322 dev_priv->display.init_clock_gating = icl_init_clock_gating;
cc38cae7 9323 else if (IS_CANNONLAKE(dev_priv))
91200c09 9324 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
0a46ddd5
RV
9325 else if (IS_COFFEELAKE(dev_priv))
9326 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
90007bca 9327 else if (IS_SKYLAKE(dev_priv))
91200c09 9328 dev_priv->display.init_clock_gating = skl_init_clock_gating;
0a46ddd5 9329 else if (IS_KABYLAKE(dev_priv))
91200c09 9330 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9fb5026f 9331 else if (IS_BROXTON(dev_priv))
bb400da9 9332 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9fb5026f
ACO
9333 else if (IS_GEMINILAKE(dev_priv))
9334 dev_priv->display.init_clock_gating = glk_init_clock_gating;
bb400da9 9335 else if (IS_BROADWELL(dev_priv))
91200c09 9336 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
bb400da9 9337 else if (IS_CHERRYVIEW(dev_priv))
91200c09 9338 dev_priv->display.init_clock_gating = chv_init_clock_gating;
bb400da9 9339 else if (IS_HASWELL(dev_priv))
91200c09 9340 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
bb400da9 9341 else if (IS_IVYBRIDGE(dev_priv))
91200c09 9342 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
bb400da9 9343 else if (IS_VALLEYVIEW(dev_priv))
91200c09 9344 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
cf819eff 9345 else if (IS_GEN(dev_priv, 6))
bb400da9 9346 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
cf819eff 9347 else if (IS_GEN(dev_priv, 5))
91200c09 9348 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
bb400da9
ID
9349 else if (IS_G4X(dev_priv))
9350 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
c0f86832 9351 else if (IS_I965GM(dev_priv))
91200c09 9352 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
c0f86832 9353 else if (IS_I965G(dev_priv))
91200c09 9354 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
cf819eff 9355 else if (IS_GEN(dev_priv, 3))
bb400da9
ID
9356 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9357 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9358 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
cf819eff 9359 else if (IS_GEN(dev_priv, 2))
bb400da9
ID
9360 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9361 else {
9362 MISSING_CASE(INTEL_DEVID(dev_priv));
9363 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9364 }
9365}
9366
1fa61106 9367/* Set up chip specific power management-related functions */
62d75df7 9368void intel_init_pm(struct drm_i915_private *dev_priv)
1fa61106 9369{
c921aba8 9370 /* For cxsr */
9b1e14f4 9371 if (IS_PINEVIEW(dev_priv))
148ac1f3 9372 i915_pineview_get_mem_freq(dev_priv);
cf819eff 9373 else if (IS_GEN(dev_priv, 5))
148ac1f3 9374 i915_ironlake_get_mem_freq(dev_priv);
c921aba8 9375
1fa61106 9376 /* For FIFO watermark updates */
62d75df7 9377 if (INTEL_GEN(dev_priv) >= 9) {
bb726519 9378 skl_setup_wm_latency(dev_priv);
e62929b3 9379 dev_priv->display.initial_watermarks = skl_initial_wm;
ccf010fb 9380 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
98d39494 9381 dev_priv->display.compute_global_watermarks = skl_compute_wm;
6e266956 9382 } else if (HAS_PCH_SPLIT(dev_priv)) {
bb726519 9383 ilk_setup_wm_latency(dev_priv);
53615a5e 9384
cf819eff 9385 if ((IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[1] &&
bd602544 9386 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
cf819eff 9387 (!IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[0] &&
bd602544 9388 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
86c8bbbe 9389 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
ed4a6a7c
MR
9390 dev_priv->display.compute_intermediate_wm =
9391 ilk_compute_intermediate_wm;
9392 dev_priv->display.initial_watermarks =
9393 ilk_initial_watermarks;
9394 dev_priv->display.optimize_watermarks =
9395 ilk_optimize_watermarks;
bd602544
VS
9396 } else {
9397 DRM_DEBUG_KMS("Failed to read display plane latency. "
9398 "Disable CxSR\n");
9399 }
6b6b3eef 9400 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
bb726519 9401 vlv_setup_wm_latency(dev_priv);
ff32c54e 9402 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
4841da51 9403 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
ff32c54e 9404 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
4841da51 9405 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
ff32c54e 9406 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
04548cba
VS
9407 } else if (IS_G4X(dev_priv)) {
9408 g4x_setup_wm_latency(dev_priv);
9409 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9410 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9411 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9412 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9b1e14f4 9413 } else if (IS_PINEVIEW(dev_priv)) {
50a0bc90 9414 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
1fa61106
ED
9415 dev_priv->is_ddr3,
9416 dev_priv->fsb_freq,
9417 dev_priv->mem_freq)) {
9418 DRM_INFO("failed to find known CxSR latency "
9419 "(found ddr%s fsb freq %d, mem freq %d), "
9420 "disabling CxSR\n",
9421 (dev_priv->is_ddr3 == 1) ? "3" : "2",
9422 dev_priv->fsb_freq, dev_priv->mem_freq);
9423 /* Disable CxSR and never update its watermark again */
5209b1f4 9424 intel_set_memory_cxsr(dev_priv, false);
1fa61106
ED
9425 dev_priv->display.update_wm = NULL;
9426 } else
9427 dev_priv->display.update_wm = pineview_update_wm;
cf819eff 9428 } else if (IS_GEN(dev_priv, 4)) {
1fa61106 9429 dev_priv->display.update_wm = i965_update_wm;
cf819eff 9430 } else if (IS_GEN(dev_priv, 3)) {
1fa61106
ED
9431 dev_priv->display.update_wm = i9xx_update_wm;
9432 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
cf819eff 9433 } else if (IS_GEN(dev_priv, 2)) {
62d75df7 9434 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
feb56b93 9435 dev_priv->display.update_wm = i845_update_wm;
1fa61106 9436 dev_priv->display.get_fifo_size = i845_get_fifo_size;
feb56b93
DV
9437 } else {
9438 dev_priv->display.update_wm = i9xx_update_wm;
1fa61106 9439 dev_priv->display.get_fifo_size = i830_get_fifo_size;
feb56b93 9440 }
feb56b93
DV
9441 } else {
9442 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
1fa61106
ED
9443 }
9444}
9445
87660502
L
9446static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
9447{
9448 uint32_t flags =
9449 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9450
9451 switch (flags) {
9452 case GEN6_PCODE_SUCCESS:
9453 return 0;
9454 case GEN6_PCODE_UNIMPLEMENTED_CMD:
5a9cfff4 9455 return -ENODEV;
87660502
L
9456 case GEN6_PCODE_ILLEGAL_CMD:
9457 return -ENXIO;
9458 case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
7850d1c3 9459 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
87660502
L
9460 return -EOVERFLOW;
9461 case GEN6_PCODE_TIMEOUT:
9462 return -ETIMEDOUT;
9463 default:
f0d66153 9464 MISSING_CASE(flags);
87660502
L
9465 return 0;
9466 }
9467}
9468
9469static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
9470{
9471 uint32_t flags =
9472 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9473
9474 switch (flags) {
9475 case GEN6_PCODE_SUCCESS:
9476 return 0;
9477 case GEN6_PCODE_ILLEGAL_CMD:
9478 return -ENXIO;
9479 case GEN7_PCODE_TIMEOUT:
9480 return -ETIMEDOUT;
9481 case GEN7_PCODE_ILLEGAL_DATA:
9482 return -EINVAL;
9483 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9484 return -EOVERFLOW;
9485 default:
9486 MISSING_CASE(flags);
9487 return 0;
9488 }
9489}
9490
151a49d0 9491int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
42c0526c 9492{
87660502
L
9493 int status;
9494
9f817501 9495 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
42c0526c 9496
3f5582dd
CW
9497 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9498 * use te fw I915_READ variants to reduce the amount of work
9499 * required when reading/writing.
9500 */
9501
9502 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
5a9cfff4
CW
9503 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9504 mbox, __builtin_return_address(0));
42c0526c
BW
9505 return -EAGAIN;
9506 }
9507
3f5582dd
CW
9508 I915_WRITE_FW(GEN6_PCODE_DATA, *val);
9509 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9510 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
42c0526c 9511
e09a3036
CW
9512 if (__intel_wait_for_register_fw(dev_priv,
9513 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9514 500, 0, NULL)) {
5a9cfff4
CW
9515 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9516 mbox, __builtin_return_address(0));
42c0526c
BW
9517 return -ETIMEDOUT;
9518 }
9519
3f5582dd
CW
9520 *val = I915_READ_FW(GEN6_PCODE_DATA);
9521 I915_WRITE_FW(GEN6_PCODE_DATA, 0);
42c0526c 9522
87660502
L
9523 if (INTEL_GEN(dev_priv) > 6)
9524 status = gen7_check_mailbox_status(dev_priv);
9525 else
9526 status = gen6_check_mailbox_status(dev_priv);
9527
9528 if (status) {
5a9cfff4
CW
9529 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9530 mbox, __builtin_return_address(0), status);
87660502
L
9531 return status;
9532 }
9533
42c0526c
BW
9534 return 0;
9535}
9536
e76019a8 9537int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
006bb4cc
ID
9538 u32 mbox, u32 val,
9539 int fast_timeout_us, int slow_timeout_ms)
42c0526c 9540{
87660502
L
9541 int status;
9542
9f817501 9543 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
42c0526c 9544
3f5582dd
CW
9545 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9546 * use te fw I915_READ variants to reduce the amount of work
9547 * required when reading/writing.
9548 */
9549
9550 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
5a9cfff4
CW
9551 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9552 val, mbox, __builtin_return_address(0));
42c0526c
BW
9553 return -EAGAIN;
9554 }
9555
3f5582dd 9556 I915_WRITE_FW(GEN6_PCODE_DATA, val);
8bf41b72 9557 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
3f5582dd 9558 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
42c0526c 9559
e09a3036
CW
9560 if (__intel_wait_for_register_fw(dev_priv,
9561 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
006bb4cc
ID
9562 fast_timeout_us, slow_timeout_ms,
9563 NULL)) {
5a9cfff4
CW
9564 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9565 val, mbox, __builtin_return_address(0));
42c0526c
BW
9566 return -ETIMEDOUT;
9567 }
9568
3f5582dd 9569 I915_WRITE_FW(GEN6_PCODE_DATA, 0);
42c0526c 9570
87660502
L
9571 if (INTEL_GEN(dev_priv) > 6)
9572 status = gen7_check_mailbox_status(dev_priv);
9573 else
9574 status = gen6_check_mailbox_status(dev_priv);
9575
9576 if (status) {
5a9cfff4
CW
9577 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9578 val, mbox, __builtin_return_address(0), status);
87660502
L
9579 return status;
9580 }
9581
42c0526c
BW
9582 return 0;
9583}
a0e4e199 9584
a0b8a1fe
ID
9585static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
9586 u32 request, u32 reply_mask, u32 reply,
9587 u32 *status)
9588{
9589 u32 val = request;
9590
9591 *status = sandybridge_pcode_read(dev_priv, mbox, &val);
9592
9593 return *status || ((val & reply_mask) == reply);
9594}
9595
9596/**
9597 * skl_pcode_request - send PCODE request until acknowledgment
9598 * @dev_priv: device private
9599 * @mbox: PCODE mailbox ID the request is targeted for
9600 * @request: request ID
9601 * @reply_mask: mask used to check for request acknowledgment
9602 * @reply: value used to check for request acknowledgment
9603 * @timeout_base_ms: timeout for polling with preemption enabled
9604 *
9605 * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
0129936d 9606 * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
a0b8a1fe
ID
9607 * The request is acknowledged once the PCODE reply dword equals @reply after
9608 * applying @reply_mask. Polling is first attempted with preemption enabled
0129936d 9609 * for @timeout_base_ms and if this times out for another 50 ms with
a0b8a1fe
ID
9610 * preemption disabled.
9611 *
9612 * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9613 * other error as reported by PCODE.
9614 */
9615int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
9616 u32 reply_mask, u32 reply, int timeout_base_ms)
9617{
9618 u32 status;
9619 int ret;
9620
9f817501 9621 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
a0b8a1fe
ID
9622
9623#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9624 &status)
9625
9626 /*
9627 * Prime the PCODE by doing a request first. Normally it guarantees
9628 * that a subsequent request, at most @timeout_base_ms later, succeeds.
9629 * _wait_for() doesn't guarantee when its passed condition is evaluated
9630 * first, so send the first request explicitly.
9631 */
9632 if (COND) {
9633 ret = 0;
9634 goto out;
9635 }
a54b1873 9636 ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
a0b8a1fe
ID
9637 if (!ret)
9638 goto out;
9639
9640 /*
9641 * The above can time out if the number of requests was low (2 in the
9642 * worst case) _and_ PCODE was busy for some reason even after a
9643 * (queued) request and @timeout_base_ms delay. As a workaround retry
9644 * the poll with preemption disabled to maximize the number of
0129936d 9645 * requests. Increase the timeout from @timeout_base_ms to 50ms to
a0b8a1fe 9646 * account for interrupts that could reduce the number of these
0129936d
ID
9647 * requests, and for any quirks of the PCODE firmware that delays
9648 * the request completion.
a0b8a1fe
ID
9649 */
9650 DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9651 WARN_ON_ONCE(timeout_base_ms > 3);
9652 preempt_disable();
0129936d 9653 ret = wait_for_atomic(COND, 50);
a0b8a1fe
ID
9654 preempt_enable();
9655
9656out:
9657 return ret ? ret : status;
9658#undef COND
9659}
9660
dd06f88c
VS
9661static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9662{
562d9bae
SAK
9663 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9664
c30fec65
VS
9665 /*
9666 * N = val - 0xb7
9667 * Slow = Fast = GPLL ref * N
9668 */
562d9bae 9669 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
855ba3be
JB
9670}
9671
b55dd647 9672static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
855ba3be 9673{
562d9bae
SAK
9674 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9675
9676 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
855ba3be
JB
9677}
9678
b55dd647 9679static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
22b1b2f8 9680{
562d9bae
SAK
9681 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9682
c30fec65
VS
9683 /*
9684 * N = val / 2
9685 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9686 */
562d9bae 9687 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
22b1b2f8
D
9688}
9689
b55dd647 9690static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
22b1b2f8 9691{
562d9bae
SAK
9692 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9693
1c14762d 9694 /* CHV needs even values */
562d9bae 9695 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
22b1b2f8
D
9696}
9697
616bc820 9698int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
22b1b2f8 9699{
35ceabf3 9700 if (INTEL_GEN(dev_priv) >= 9)
500a3d2e
MK
9701 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9702 GEN9_FREQ_SCALER);
2d1fe073 9703 else if (IS_CHERRYVIEW(dev_priv))
616bc820 9704 return chv_gpu_freq(dev_priv, val);
2d1fe073 9705 else if (IS_VALLEYVIEW(dev_priv))
616bc820
VS
9706 return byt_gpu_freq(dev_priv, val);
9707 else
9708 return val * GT_FREQUENCY_MULTIPLIER;
22b1b2f8
D
9709}
9710
616bc820
VS
9711int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9712{
35ceabf3 9713 if (INTEL_GEN(dev_priv) >= 9)
500a3d2e
MK
9714 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9715 GT_FREQUENCY_MULTIPLIER);
2d1fe073 9716 else if (IS_CHERRYVIEW(dev_priv))
616bc820 9717 return chv_freq_opcode(dev_priv, val);
2d1fe073 9718 else if (IS_VALLEYVIEW(dev_priv))
616bc820
VS
9719 return byt_freq_opcode(dev_priv, val);
9720 else
500a3d2e 9721 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
616bc820 9722}
22b1b2f8 9723
192aa181 9724void intel_pm_setup(struct drm_i915_private *dev_priv)
907b28c5 9725{
9f817501 9726 mutex_init(&dev_priv->pcu_lock);
60548c55 9727 mutex_init(&dev_priv->gt_pm.rps.power.mutex);
f742a552 9728
562d9bae 9729 atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
5d584b2e 9730
ad1443f0
SAK
9731 dev_priv->runtime_pm.suspended = false;
9732 atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
907b28c5 9733}
135bafa5 9734
47c21d9a
MK
9735static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9736 const i915_reg_t reg)
9737{
facbecad 9738 u32 lower, upper, tmp;
71cc2b18 9739 int loop = 2;
47c21d9a 9740
817cc079
TU
9741 /*
9742 * The register accessed do not need forcewake. We borrow
47c21d9a
MK
9743 * uncore lock to prevent concurrent access to range reg.
9744 */
817cc079 9745 lockdep_assert_held(&dev_priv->uncore.lock);
47c21d9a 9746
817cc079
TU
9747 /*
9748 * vlv and chv residency counters are 40 bits in width.
47c21d9a
MK
9749 * With a control bit, we can choose between upper or lower
9750 * 32bit window into this counter.
facbecad
CW
9751 *
9752 * Although we always use the counter in high-range mode elsewhere,
9753 * userspace may attempt to read the value before rc6 is initialised,
9754 * before we have set the default VLV_COUNTER_CONTROL value. So always
9755 * set the high bit to be safe.
47c21d9a 9756 */
facbecad
CW
9757 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9758 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
47c21d9a
MK
9759 upper = I915_READ_FW(reg);
9760 do {
9761 tmp = upper;
9762
9763 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9764 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9765 lower = I915_READ_FW(reg);
9766
9767 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9768 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9769 upper = I915_READ_FW(reg);
71cc2b18 9770 } while (upper != tmp && --loop);
47c21d9a 9771
817cc079
TU
9772 /*
9773 * Everywhere else we always use VLV_COUNTER_CONTROL with the
facbecad
CW
9774 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9775 * now.
9776 */
9777
47c21d9a
MK
9778 return lower | (u64)upper << 8;
9779}
9780
36cc8b96 9781u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
c5a0ad11 9782 const i915_reg_t reg)
135bafa5 9783{
817cc079
TU
9784 u64 time_hw, prev_hw, overflow_hw;
9785 unsigned int fw_domains;
9786 unsigned long flags;
9787 unsigned int i;
36cc8b96 9788 u32 mul, div;
135bafa5 9789
fb6db0f5 9790 if (!HAS_RC6(dev_priv))
135bafa5
MK
9791 return 0;
9792
817cc079
TU
9793 /*
9794 * Store previous hw counter values for counter wrap-around handling.
9795 *
9796 * There are only four interesting registers and they live next to each
9797 * other so we can use the relative address, compared to the smallest
9798 * one as the index into driver storage.
9799 */
9800 i = (i915_mmio_reg_offset(reg) -
9801 i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
9802 if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
9803 return 0;
9804
9805 fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
9806
9807 spin_lock_irqsave(&dev_priv->uncore.lock, flags);
9808 intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
9809
135bafa5
MK
9810 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9811 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
36cc8b96 9812 mul = 1000000;
135bafa5 9813 div = dev_priv->czclk_freq;
817cc079 9814 overflow_hw = BIT_ULL(40);
47c21d9a 9815 time_hw = vlv_residency_raw(dev_priv, reg);
47c21d9a 9816 } else {
36cc8b96
TU
9817 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
9818 if (IS_GEN9_LP(dev_priv)) {
9819 mul = 10000;
9820 div = 12;
9821 } else {
9822 mul = 1280;
9823 div = 1;
9824 }
47c21d9a 9825
817cc079
TU
9826 overflow_hw = BIT_ULL(32);
9827 time_hw = I915_READ_FW(reg);
47c21d9a 9828 }
135bafa5 9829
817cc079
TU
9830 /*
9831 * Counter wrap handling.
9832 *
9833 * But relying on a sufficient frequency of queries otherwise counters
9834 * can still wrap.
9835 */
9836 prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
9837 dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
9838
9839 /* RC6 delta from last sample. */
9840 if (time_hw >= prev_hw)
9841 time_hw -= prev_hw;
9842 else
9843 time_hw += overflow_hw - prev_hw;
9844
9845 /* Add delta to RC6 extended raw driver copy. */
9846 time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
9847 dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
9848
9849 intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
9850 spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
9851
9852 return mul_u64_u32_div(time_hw, mul, div);
135bafa5 9853}
c84b2705
TU
9854
9855u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
9856{
9857 u32 cagf;
9858
9859 if (INTEL_GEN(dev_priv) >= 9)
9860 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
9861 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
9862 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
9863 else
9864 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
9865
9866 return cagf;
9867}