drm/i915: use intel_uncore for all forcewake get/put
[linux-2.6-block.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include <linux/cpufreq.h>
29 #include <linux/module.h>
30 #include <linux/pm_runtime.h>
31
32 #include <drm/drm_atomic_helper.h>
33 #include <drm/drm_fourcc.h>
34 #include <drm/drm_plane_helper.h>
35
36 #include "i915_drv.h"
37 #include "intel_drv.h"
38 #include "../../../platform/x86/intel_ips.h"
39
40 /**
41  * DOC: RC6
42  *
43  * RC6 is a special power stage which allows the GPU to enter an very
44  * low-voltage mode when idle, using down to 0V while at this stage.  This
45  * stage is entered automatically when the GPU is idle when RC6 support is
46  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
47  *
48  * There are different RC6 modes available in Intel GPU, which differentiate
49  * among each other with the latency required to enter and leave RC6 and
50  * voltage consumed by the GPU in different states.
51  *
52  * The combination of the following flags define which states GPU is allowed
53  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
54  * RC6pp is deepest RC6. Their support by hardware varies according to the
55  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
56  * which brings the most power savings; deeper states save more power, but
57  * require higher latency to switch to and wake up.
58  */
59
60 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
61 {
62         if (HAS_LLC(dev_priv)) {
63                 /*
64                  * WaCompressedResourceDisplayNewHashMode:skl,kbl
65                  * Display WA #0390: skl,kbl
66                  *
67                  * Must match Sampler, Pixel Back End, and Media. See
68                  * WaCompressedResourceSamplerPbeMediaNewHashMode.
69                  */
70                 I915_WRITE(CHICKEN_PAR1_1,
71                            I915_READ(CHICKEN_PAR1_1) |
72                            SKL_DE_COMPRESSED_HASH_MODE);
73         }
74
75         /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
76         I915_WRITE(CHICKEN_PAR1_1,
77                    I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
78
79         /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
80         I915_WRITE(GEN8_CHICKEN_DCPR_1,
81                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
82
83         /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
84         /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
85         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
86                    DISP_FBC_WM_DIS |
87                    DISP_FBC_MEMORY_WAKE);
88
89         /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
90         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
91                    ILK_DPFC_DISABLE_DUMMY0);
92
93         if (IS_SKYLAKE(dev_priv)) {
94                 /* WaDisableDopClockGating */
95                 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
96                            & ~GEN7_DOP_CLOCK_GATE_ENABLE);
97         }
98 }
99
100 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
101 {
102         gen9_init_clock_gating(dev_priv);
103
104         /* WaDisableSDEUnitClockGating:bxt */
105         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
106                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
107
108         /*
109          * FIXME:
110          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
111          */
112         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
113                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
114
115         /*
116          * Wa: Backlight PWM may stop in the asserted state, causing backlight
117          * to stay fully on.
118          */
119         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
120                    PWM1_GATING_DIS | PWM2_GATING_DIS);
121 }
122
123 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
124 {
125         gen9_init_clock_gating(dev_priv);
126
127         /*
128          * WaDisablePWMClockGating:glk
129          * Backlight PWM may stop in the asserted state, causing backlight
130          * to stay fully on.
131          */
132         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
133                    PWM1_GATING_DIS | PWM2_GATING_DIS);
134
135         /* WaDDIIOTimeout:glk */
136         if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
137                 u32 val = I915_READ(CHICKEN_MISC_2);
138                 val &= ~(GLK_CL0_PWR_DOWN |
139                          GLK_CL1_PWR_DOWN |
140                          GLK_CL2_PWR_DOWN);
141                 I915_WRITE(CHICKEN_MISC_2, val);
142         }
143
144 }
145
146 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
147 {
148         u32 tmp;
149
150         tmp = I915_READ(CLKCFG);
151
152         switch (tmp & CLKCFG_FSB_MASK) {
153         case CLKCFG_FSB_533:
154                 dev_priv->fsb_freq = 533; /* 133*4 */
155                 break;
156         case CLKCFG_FSB_800:
157                 dev_priv->fsb_freq = 800; /* 200*4 */
158                 break;
159         case CLKCFG_FSB_667:
160                 dev_priv->fsb_freq =  667; /* 167*4 */
161                 break;
162         case CLKCFG_FSB_400:
163                 dev_priv->fsb_freq = 400; /* 100*4 */
164                 break;
165         }
166
167         switch (tmp & CLKCFG_MEM_MASK) {
168         case CLKCFG_MEM_533:
169                 dev_priv->mem_freq = 533;
170                 break;
171         case CLKCFG_MEM_667:
172                 dev_priv->mem_freq = 667;
173                 break;
174         case CLKCFG_MEM_800:
175                 dev_priv->mem_freq = 800;
176                 break;
177         }
178
179         /* detect pineview DDR3 setting */
180         tmp = I915_READ(CSHRDDR3CTL);
181         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
182 }
183
184 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
185 {
186         u16 ddrpll, csipll;
187
188         ddrpll = I915_READ16(DDRMPLL1);
189         csipll = I915_READ16(CSIPLL0);
190
191         switch (ddrpll & 0xff) {
192         case 0xc:
193                 dev_priv->mem_freq = 800;
194                 break;
195         case 0x10:
196                 dev_priv->mem_freq = 1066;
197                 break;
198         case 0x14:
199                 dev_priv->mem_freq = 1333;
200                 break;
201         case 0x18:
202                 dev_priv->mem_freq = 1600;
203                 break;
204         default:
205                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
206                                  ddrpll & 0xff);
207                 dev_priv->mem_freq = 0;
208                 break;
209         }
210
211         dev_priv->ips.r_t = dev_priv->mem_freq;
212
213         switch (csipll & 0x3ff) {
214         case 0x00c:
215                 dev_priv->fsb_freq = 3200;
216                 break;
217         case 0x00e:
218                 dev_priv->fsb_freq = 3733;
219                 break;
220         case 0x010:
221                 dev_priv->fsb_freq = 4266;
222                 break;
223         case 0x012:
224                 dev_priv->fsb_freq = 4800;
225                 break;
226         case 0x014:
227                 dev_priv->fsb_freq = 5333;
228                 break;
229         case 0x016:
230                 dev_priv->fsb_freq = 5866;
231                 break;
232         case 0x018:
233                 dev_priv->fsb_freq = 6400;
234                 break;
235         default:
236                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
237                                  csipll & 0x3ff);
238                 dev_priv->fsb_freq = 0;
239                 break;
240         }
241
242         if (dev_priv->fsb_freq == 3200) {
243                 dev_priv->ips.c_m = 0;
244         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
245                 dev_priv->ips.c_m = 1;
246         } else {
247                 dev_priv->ips.c_m = 2;
248         }
249 }
250
251 static const struct cxsr_latency cxsr_latency_table[] = {
252         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
253         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
254         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
255         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
256         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
257
258         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
259         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
260         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
261         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
262         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
263
264         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
265         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
266         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
267         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
268         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
269
270         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
271         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
272         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
273         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
274         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
275
276         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
277         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
278         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
279         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
280         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
281
282         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
283         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
284         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
285         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
286         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
287 };
288
289 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
290                                                          bool is_ddr3,
291                                                          int fsb,
292                                                          int mem)
293 {
294         const struct cxsr_latency *latency;
295         int i;
296
297         if (fsb == 0 || mem == 0)
298                 return NULL;
299
300         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
301                 latency = &cxsr_latency_table[i];
302                 if (is_desktop == latency->is_desktop &&
303                     is_ddr3 == latency->is_ddr3 &&
304                     fsb == latency->fsb_freq && mem == latency->mem_freq)
305                         return latency;
306         }
307
308         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
309
310         return NULL;
311 }
312
313 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
314 {
315         u32 val;
316
317         mutex_lock(&dev_priv->pcu_lock);
318
319         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
320         if (enable)
321                 val &= ~FORCE_DDR_HIGH_FREQ;
322         else
323                 val |= FORCE_DDR_HIGH_FREQ;
324         val &= ~FORCE_DDR_LOW_FREQ;
325         val |= FORCE_DDR_FREQ_REQ_ACK;
326         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
327
328         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
329                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
330                 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
331
332         mutex_unlock(&dev_priv->pcu_lock);
333 }
334
335 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
336 {
337         u32 val;
338
339         mutex_lock(&dev_priv->pcu_lock);
340
341         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
342         if (enable)
343                 val |= DSP_MAXFIFO_PM5_ENABLE;
344         else
345                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
346         vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val);
347
348         mutex_unlock(&dev_priv->pcu_lock);
349 }
350
351 #define FW_WM(value, plane) \
352         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
353
354 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
355 {
356         bool was_enabled;
357         u32 val;
358
359         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
360                 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
361                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
362                 POSTING_READ(FW_BLC_SELF_VLV);
363         } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
364                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
365                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
366                 POSTING_READ(FW_BLC_SELF);
367         } else if (IS_PINEVIEW(dev_priv)) {
368                 val = I915_READ(DSPFW3);
369                 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
370                 if (enable)
371                         val |= PINEVIEW_SELF_REFRESH_EN;
372                 else
373                         val &= ~PINEVIEW_SELF_REFRESH_EN;
374                 I915_WRITE(DSPFW3, val);
375                 POSTING_READ(DSPFW3);
376         } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
377                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
378                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
379                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
380                 I915_WRITE(FW_BLC_SELF, val);
381                 POSTING_READ(FW_BLC_SELF);
382         } else if (IS_I915GM(dev_priv)) {
383                 /*
384                  * FIXME can't find a bit like this for 915G, and
385                  * and yet it does have the related watermark in
386                  * FW_BLC_SELF. What's going on?
387                  */
388                 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
389                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
390                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
391                 I915_WRITE(INSTPM, val);
392                 POSTING_READ(INSTPM);
393         } else {
394                 return false;
395         }
396
397         trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
398
399         DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
400                       enableddisabled(enable),
401                       enableddisabled(was_enabled));
402
403         return was_enabled;
404 }
405
406 /**
407  * intel_set_memory_cxsr - Configure CxSR state
408  * @dev_priv: i915 device
409  * @enable: Allow vs. disallow CxSR
410  *
411  * Allow or disallow the system to enter a special CxSR
412  * (C-state self refresh) state. What typically happens in CxSR mode
413  * is that several display FIFOs may get combined into a single larger
414  * FIFO for a particular plane (so called max FIFO mode) to allow the
415  * system to defer memory fetches longer, and the memory will enter
416  * self refresh.
417  *
418  * Note that enabling CxSR does not guarantee that the system enter
419  * this special mode, nor does it guarantee that the system stays
420  * in that mode once entered. So this just allows/disallows the system
421  * to autonomously utilize the CxSR mode. Other factors such as core
422  * C-states will affect when/if the system actually enters/exits the
423  * CxSR mode.
424  *
425  * Note that on VLV/CHV this actually only controls the max FIFO mode,
426  * and the system is free to enter/exit memory self refresh at any time
427  * even when the use of CxSR has been disallowed.
428  *
429  * While the system is actually in the CxSR/max FIFO mode, some plane
430  * control registers will not get latched on vblank. Thus in order to
431  * guarantee the system will respond to changes in the plane registers
432  * we must always disallow CxSR prior to making changes to those registers.
433  * Unfortunately the system will re-evaluate the CxSR conditions at
434  * frame start which happens after vblank start (which is when the plane
435  * registers would get latched), so we can't proceed with the plane update
436  * during the same frame where we disallowed CxSR.
437  *
438  * Certain platforms also have a deeper HPLL SR mode. Fortunately the
439  * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
440  * the hardware w.r.t. HPLL SR when writing to plane registers.
441  * Disallowing just CxSR is sufficient.
442  */
443 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
444 {
445         bool ret;
446
447         mutex_lock(&dev_priv->wm.wm_mutex);
448         ret = _intel_set_memory_cxsr(dev_priv, enable);
449         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
450                 dev_priv->wm.vlv.cxsr = enable;
451         else if (IS_G4X(dev_priv))
452                 dev_priv->wm.g4x.cxsr = enable;
453         mutex_unlock(&dev_priv->wm.wm_mutex);
454
455         return ret;
456 }
457
458 /*
459  * Latency for FIFO fetches is dependent on several factors:
460  *   - memory configuration (speed, channels)
461  *   - chipset
462  *   - current MCH state
463  * It can be fairly high in some situations, so here we assume a fairly
464  * pessimal value.  It's a tradeoff between extra memory fetches (if we
465  * set this value too high, the FIFO will fetch frequently to stay full)
466  * and power consumption (set it too low to save power and we might see
467  * FIFO underruns and display "flicker").
468  *
469  * A value of 5us seems to be a good balance; safe for very low end
470  * platforms but not overly aggressive on lower latency configs.
471  */
472 static const int pessimal_latency_ns = 5000;
473
474 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
475         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
476
477 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
478 {
479         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
480         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
481         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
482         enum pipe pipe = crtc->pipe;
483         int sprite0_start, sprite1_start;
484
485         switch (pipe) {
486                 u32 dsparb, dsparb2, dsparb3;
487         case PIPE_A:
488                 dsparb = I915_READ(DSPARB);
489                 dsparb2 = I915_READ(DSPARB2);
490                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
491                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
492                 break;
493         case PIPE_B:
494                 dsparb = I915_READ(DSPARB);
495                 dsparb2 = I915_READ(DSPARB2);
496                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
497                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
498                 break;
499         case PIPE_C:
500                 dsparb2 = I915_READ(DSPARB2);
501                 dsparb3 = I915_READ(DSPARB3);
502                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
503                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
504                 break;
505         default:
506                 MISSING_CASE(pipe);
507                 return;
508         }
509
510         fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
511         fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
512         fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
513         fifo_state->plane[PLANE_CURSOR] = 63;
514 }
515
516 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
517                               enum i9xx_plane_id i9xx_plane)
518 {
519         u32 dsparb = I915_READ(DSPARB);
520         int size;
521
522         size = dsparb & 0x7f;
523         if (i9xx_plane == PLANE_B)
524                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
525
526         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
527                       dsparb, plane_name(i9xx_plane), size);
528
529         return size;
530 }
531
532 static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
533                               enum i9xx_plane_id i9xx_plane)
534 {
535         u32 dsparb = I915_READ(DSPARB);
536         int size;
537
538         size = dsparb & 0x1ff;
539         if (i9xx_plane == PLANE_B)
540                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
541         size >>= 1; /* Convert to cachelines */
542
543         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
544                       dsparb, plane_name(i9xx_plane), size);
545
546         return size;
547 }
548
549 static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
550                               enum i9xx_plane_id i9xx_plane)
551 {
552         u32 dsparb = I915_READ(DSPARB);
553         int size;
554
555         size = dsparb & 0x7f;
556         size >>= 2; /* Convert to cachelines */
557
558         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
559                       dsparb, plane_name(i9xx_plane), size);
560
561         return size;
562 }
563
564 /* Pineview has different values for various configs */
565 static const struct intel_watermark_params pineview_display_wm = {
566         .fifo_size = PINEVIEW_DISPLAY_FIFO,
567         .max_wm = PINEVIEW_MAX_WM,
568         .default_wm = PINEVIEW_DFT_WM,
569         .guard_size = PINEVIEW_GUARD_WM,
570         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
571 };
572 static const struct intel_watermark_params pineview_display_hplloff_wm = {
573         .fifo_size = PINEVIEW_DISPLAY_FIFO,
574         .max_wm = PINEVIEW_MAX_WM,
575         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
576         .guard_size = PINEVIEW_GUARD_WM,
577         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
578 };
579 static const struct intel_watermark_params pineview_cursor_wm = {
580         .fifo_size = PINEVIEW_CURSOR_FIFO,
581         .max_wm = PINEVIEW_CURSOR_MAX_WM,
582         .default_wm = PINEVIEW_CURSOR_DFT_WM,
583         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
584         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
585 };
586 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
587         .fifo_size = PINEVIEW_CURSOR_FIFO,
588         .max_wm = PINEVIEW_CURSOR_MAX_WM,
589         .default_wm = PINEVIEW_CURSOR_DFT_WM,
590         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
591         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
592 };
593 static const struct intel_watermark_params i965_cursor_wm_info = {
594         .fifo_size = I965_CURSOR_FIFO,
595         .max_wm = I965_CURSOR_MAX_WM,
596         .default_wm = I965_CURSOR_DFT_WM,
597         .guard_size = 2,
598         .cacheline_size = I915_FIFO_LINE_SIZE,
599 };
600 static const struct intel_watermark_params i945_wm_info = {
601         .fifo_size = I945_FIFO_SIZE,
602         .max_wm = I915_MAX_WM,
603         .default_wm = 1,
604         .guard_size = 2,
605         .cacheline_size = I915_FIFO_LINE_SIZE,
606 };
607 static const struct intel_watermark_params i915_wm_info = {
608         .fifo_size = I915_FIFO_SIZE,
609         .max_wm = I915_MAX_WM,
610         .default_wm = 1,
611         .guard_size = 2,
612         .cacheline_size = I915_FIFO_LINE_SIZE,
613 };
614 static const struct intel_watermark_params i830_a_wm_info = {
615         .fifo_size = I855GM_FIFO_SIZE,
616         .max_wm = I915_MAX_WM,
617         .default_wm = 1,
618         .guard_size = 2,
619         .cacheline_size = I830_FIFO_LINE_SIZE,
620 };
621 static const struct intel_watermark_params i830_bc_wm_info = {
622         .fifo_size = I855GM_FIFO_SIZE,
623         .max_wm = I915_MAX_WM/2,
624         .default_wm = 1,
625         .guard_size = 2,
626         .cacheline_size = I830_FIFO_LINE_SIZE,
627 };
628 static const struct intel_watermark_params i845_wm_info = {
629         .fifo_size = I830_FIFO_SIZE,
630         .max_wm = I915_MAX_WM,
631         .default_wm = 1,
632         .guard_size = 2,
633         .cacheline_size = I830_FIFO_LINE_SIZE,
634 };
635
636 /**
637  * intel_wm_method1 - Method 1 / "small buffer" watermark formula
638  * @pixel_rate: Pipe pixel rate in kHz
639  * @cpp: Plane bytes per pixel
640  * @latency: Memory wakeup latency in 0.1us units
641  *
642  * Compute the watermark using the method 1 or "small buffer"
643  * formula. The caller may additonally add extra cachelines
644  * to account for TLB misses and clock crossings.
645  *
646  * This method is concerned with the short term drain rate
647  * of the FIFO, ie. it does not account for blanking periods
648  * which would effectively reduce the average drain rate across
649  * a longer period. The name "small" refers to the fact the
650  * FIFO is relatively small compared to the amount of data
651  * fetched.
652  *
653  * The FIFO level vs. time graph might look something like:
654  *
655  *   |\   |\
656  *   | \  | \
657  * __---__---__ (- plane active, _ blanking)
658  * -> time
659  *
660  * or perhaps like this:
661  *
662  *   |\|\  |\|\
663  * __----__----__ (- plane active, _ blanking)
664  * -> time
665  *
666  * Returns:
667  * The watermark in bytes
668  */
669 static unsigned int intel_wm_method1(unsigned int pixel_rate,
670                                      unsigned int cpp,
671                                      unsigned int latency)
672 {
673         u64 ret;
674
675         ret = (u64)pixel_rate * cpp * latency;
676         ret = DIV_ROUND_UP_ULL(ret, 10000);
677
678         return ret;
679 }
680
681 /**
682  * intel_wm_method2 - Method 2 / "large buffer" watermark formula
683  * @pixel_rate: Pipe pixel rate in kHz
684  * @htotal: Pipe horizontal total
685  * @width: Plane width in pixels
686  * @cpp: Plane bytes per pixel
687  * @latency: Memory wakeup latency in 0.1us units
688  *
689  * Compute the watermark using the method 2 or "large buffer"
690  * formula. The caller may additonally add extra cachelines
691  * to account for TLB misses and clock crossings.
692  *
693  * This method is concerned with the long term drain rate
694  * of the FIFO, ie. it does account for blanking periods
695  * which effectively reduce the average drain rate across
696  * a longer period. The name "large" refers to the fact the
697  * FIFO is relatively large compared to the amount of data
698  * fetched.
699  *
700  * The FIFO level vs. time graph might look something like:
701  *
702  *    |\___       |\___
703  *    |    \___   |    \___
704  *    |        \  |        \
705  * __ --__--__--__--__--__--__ (- plane active, _ blanking)
706  * -> time
707  *
708  * Returns:
709  * The watermark in bytes
710  */
711 static unsigned int intel_wm_method2(unsigned int pixel_rate,
712                                      unsigned int htotal,
713                                      unsigned int width,
714                                      unsigned int cpp,
715                                      unsigned int latency)
716 {
717         unsigned int ret;
718
719         /*
720          * FIXME remove once all users are computing
721          * watermarks in the correct place.
722          */
723         if (WARN_ON_ONCE(htotal == 0))
724                 htotal = 1;
725
726         ret = (latency * pixel_rate) / (htotal * 10000);
727         ret = (ret + 1) * width * cpp;
728
729         return ret;
730 }
731
732 /**
733  * intel_calculate_wm - calculate watermark level
734  * @pixel_rate: pixel clock
735  * @wm: chip FIFO params
736  * @fifo_size: size of the FIFO buffer
737  * @cpp: bytes per pixel
738  * @latency_ns: memory latency for the platform
739  *
740  * Calculate the watermark level (the level at which the display plane will
741  * start fetching from memory again).  Each chip has a different display
742  * FIFO size and allocation, so the caller needs to figure that out and pass
743  * in the correct intel_watermark_params structure.
744  *
745  * As the pixel clock runs, the FIFO will be drained at a rate that depends
746  * on the pixel size.  When it reaches the watermark level, it'll start
747  * fetching FIFO line sized based chunks from memory until the FIFO fills
748  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
749  * will occur, and a display engine hang could result.
750  */
751 static unsigned int intel_calculate_wm(int pixel_rate,
752                                        const struct intel_watermark_params *wm,
753                                        int fifo_size, int cpp,
754                                        unsigned int latency_ns)
755 {
756         int entries, wm_size;
757
758         /*
759          * Note: we need to make sure we don't overflow for various clock &
760          * latency values.
761          * clocks go from a few thousand to several hundred thousand.
762          * latency is usually a few thousand
763          */
764         entries = intel_wm_method1(pixel_rate, cpp,
765                                    latency_ns / 100);
766         entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
767                 wm->guard_size;
768         DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
769
770         wm_size = fifo_size - entries;
771         DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
772
773         /* Don't promote wm_size to unsigned... */
774         if (wm_size > wm->max_wm)
775                 wm_size = wm->max_wm;
776         if (wm_size <= 0)
777                 wm_size = wm->default_wm;
778
779         /*
780          * Bspec seems to indicate that the value shouldn't be lower than
781          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
782          * Lets go for 8 which is the burst size since certain platforms
783          * already use a hardcoded 8 (which is what the spec says should be
784          * done).
785          */
786         if (wm_size <= 8)
787                 wm_size = 8;
788
789         return wm_size;
790 }
791
792 static bool is_disabling(int old, int new, int threshold)
793 {
794         return old >= threshold && new < threshold;
795 }
796
797 static bool is_enabling(int old, int new, int threshold)
798 {
799         return old < threshold && new >= threshold;
800 }
801
802 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
803 {
804         return dev_priv->wm.max_level + 1;
805 }
806
807 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
808                                    const struct intel_plane_state *plane_state)
809 {
810         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
811
812         /* FIXME check the 'enable' instead */
813         if (!crtc_state->base.active)
814                 return false;
815
816         /*
817          * Treat cursor with fb as always visible since cursor updates
818          * can happen faster than the vrefresh rate, and the current
819          * watermark code doesn't handle that correctly. Cursor updates
820          * which set/clear the fb or change the cursor size are going
821          * to get throttled by intel_legacy_cursor_update() to work
822          * around this problem with the watermark code.
823          */
824         if (plane->id == PLANE_CURSOR)
825                 return plane_state->base.fb != NULL;
826         else
827                 return plane_state->base.visible;
828 }
829
830 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
831 {
832         struct intel_crtc *crtc, *enabled = NULL;
833
834         for_each_intel_crtc(&dev_priv->drm, crtc) {
835                 if (intel_crtc_active(crtc)) {
836                         if (enabled)
837                                 return NULL;
838                         enabled = crtc;
839                 }
840         }
841
842         return enabled;
843 }
844
845 static void pineview_update_wm(struct intel_crtc *unused_crtc)
846 {
847         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
848         struct intel_crtc *crtc;
849         const struct cxsr_latency *latency;
850         u32 reg;
851         unsigned int wm;
852
853         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
854                                          dev_priv->is_ddr3,
855                                          dev_priv->fsb_freq,
856                                          dev_priv->mem_freq);
857         if (!latency) {
858                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
859                 intel_set_memory_cxsr(dev_priv, false);
860                 return;
861         }
862
863         crtc = single_enabled_crtc(dev_priv);
864         if (crtc) {
865                 const struct drm_display_mode *adjusted_mode =
866                         &crtc->config->base.adjusted_mode;
867                 const struct drm_framebuffer *fb =
868                         crtc->base.primary->state->fb;
869                 int cpp = fb->format->cpp[0];
870                 int clock = adjusted_mode->crtc_clock;
871
872                 /* Display SR */
873                 wm = intel_calculate_wm(clock, &pineview_display_wm,
874                                         pineview_display_wm.fifo_size,
875                                         cpp, latency->display_sr);
876                 reg = I915_READ(DSPFW1);
877                 reg &= ~DSPFW_SR_MASK;
878                 reg |= FW_WM(wm, SR);
879                 I915_WRITE(DSPFW1, reg);
880                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
881
882                 /* cursor SR */
883                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
884                                         pineview_display_wm.fifo_size,
885                                         4, latency->cursor_sr);
886                 reg = I915_READ(DSPFW3);
887                 reg &= ~DSPFW_CURSOR_SR_MASK;
888                 reg |= FW_WM(wm, CURSOR_SR);
889                 I915_WRITE(DSPFW3, reg);
890
891                 /* Display HPLL off SR */
892                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
893                                         pineview_display_hplloff_wm.fifo_size,
894                                         cpp, latency->display_hpll_disable);
895                 reg = I915_READ(DSPFW3);
896                 reg &= ~DSPFW_HPLL_SR_MASK;
897                 reg |= FW_WM(wm, HPLL_SR);
898                 I915_WRITE(DSPFW3, reg);
899
900                 /* cursor HPLL off SR */
901                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
902                                         pineview_display_hplloff_wm.fifo_size,
903                                         4, latency->cursor_hpll_disable);
904                 reg = I915_READ(DSPFW3);
905                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
906                 reg |= FW_WM(wm, HPLL_CURSOR);
907                 I915_WRITE(DSPFW3, reg);
908                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
909
910                 intel_set_memory_cxsr(dev_priv, true);
911         } else {
912                 intel_set_memory_cxsr(dev_priv, false);
913         }
914 }
915
916 /*
917  * Documentation says:
918  * "If the line size is small, the TLB fetches can get in the way of the
919  *  data fetches, causing some lag in the pixel data return which is not
920  *  accounted for in the above formulas. The following adjustment only
921  *  needs to be applied if eight whole lines fit in the buffer at once.
922  *  The WM is adjusted upwards by the difference between the FIFO size
923  *  and the size of 8 whole lines. This adjustment is always performed
924  *  in the actual pixel depth regardless of whether FBC is enabled or not."
925  */
926 static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
927 {
928         int tlb_miss = fifo_size * 64 - width * cpp * 8;
929
930         return max(0, tlb_miss);
931 }
932
933 static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
934                                 const struct g4x_wm_values *wm)
935 {
936         enum pipe pipe;
937
938         for_each_pipe(dev_priv, pipe)
939                 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
940
941         I915_WRITE(DSPFW1,
942                    FW_WM(wm->sr.plane, SR) |
943                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
944                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
945                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
946         I915_WRITE(DSPFW2,
947                    (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
948                    FW_WM(wm->sr.fbc, FBC_SR) |
949                    FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
950                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
951                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
952                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
953         I915_WRITE(DSPFW3,
954                    (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
955                    FW_WM(wm->sr.cursor, CURSOR_SR) |
956                    FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
957                    FW_WM(wm->hpll.plane, HPLL_SR));
958
959         POSTING_READ(DSPFW1);
960 }
961
962 #define FW_WM_VLV(value, plane) \
963         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
964
965 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
966                                 const struct vlv_wm_values *wm)
967 {
968         enum pipe pipe;
969
970         for_each_pipe(dev_priv, pipe) {
971                 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
972
973                 I915_WRITE(VLV_DDL(pipe),
974                            (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
975                            (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
976                            (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
977                            (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
978         }
979
980         /*
981          * Zero the (unused) WM1 watermarks, and also clear all the
982          * high order bits so that there are no out of bounds values
983          * present in the registers during the reprogramming.
984          */
985         I915_WRITE(DSPHOWM, 0);
986         I915_WRITE(DSPHOWM1, 0);
987         I915_WRITE(DSPFW4, 0);
988         I915_WRITE(DSPFW5, 0);
989         I915_WRITE(DSPFW6, 0);
990
991         I915_WRITE(DSPFW1,
992                    FW_WM(wm->sr.plane, SR) |
993                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
994                    FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
995                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
996         I915_WRITE(DSPFW2,
997                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
998                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
999                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
1000         I915_WRITE(DSPFW3,
1001                    FW_WM(wm->sr.cursor, CURSOR_SR));
1002
1003         if (IS_CHERRYVIEW(dev_priv)) {
1004                 I915_WRITE(DSPFW7_CHV,
1005                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1006                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1007                 I915_WRITE(DSPFW8_CHV,
1008                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1009                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1010                 I915_WRITE(DSPFW9_CHV,
1011                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1012                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1013                 I915_WRITE(DSPHOWM,
1014                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1015                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1016                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1017                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1018                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1019                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1020                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1021                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1022                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1023                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1024         } else {
1025                 I915_WRITE(DSPFW7,
1026                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1027                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1028                 I915_WRITE(DSPHOWM,
1029                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1030                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1031                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1032                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1033                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1034                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1035                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1036         }
1037
1038         POSTING_READ(DSPFW1);
1039 }
1040
1041 #undef FW_WM_VLV
1042
1043 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1044 {
1045         /* all latencies in usec */
1046         dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1047         dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1048         dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1049
1050         dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1051 }
1052
1053 static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1054 {
1055         /*
1056          * DSPCNTR[13] supposedly controls whether the
1057          * primary plane can use the FIFO space otherwise
1058          * reserved for the sprite plane. It's not 100% clear
1059          * what the actual FIFO size is, but it looks like we
1060          * can happily set both primary and sprite watermarks
1061          * up to 127 cachelines. So that would seem to mean
1062          * that either DSPCNTR[13] doesn't do anything, or that
1063          * the total FIFO is >= 256 cachelines in size. Either
1064          * way, we don't seem to have to worry about this
1065          * repartitioning as the maximum watermark value the
1066          * register can hold for each plane is lower than the
1067          * minimum FIFO size.
1068          */
1069         switch (plane_id) {
1070         case PLANE_CURSOR:
1071                 return 63;
1072         case PLANE_PRIMARY:
1073                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1074         case PLANE_SPRITE0:
1075                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1076         default:
1077                 MISSING_CASE(plane_id);
1078                 return 0;
1079         }
1080 }
1081
1082 static int g4x_fbc_fifo_size(int level)
1083 {
1084         switch (level) {
1085         case G4X_WM_LEVEL_SR:
1086                 return 7;
1087         case G4X_WM_LEVEL_HPLL:
1088                 return 15;
1089         default:
1090                 MISSING_CASE(level);
1091                 return 0;
1092         }
1093 }
1094
1095 static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1096                           const struct intel_plane_state *plane_state,
1097                           int level)
1098 {
1099         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1100         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1101         const struct drm_display_mode *adjusted_mode =
1102                 &crtc_state->base.adjusted_mode;
1103         unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1104         unsigned int clock, htotal, cpp, width, wm;
1105
1106         if (latency == 0)
1107                 return USHRT_MAX;
1108
1109         if (!intel_wm_plane_visible(crtc_state, plane_state))
1110                 return 0;
1111
1112         /*
1113          * Not 100% sure which way ELK should go here as the
1114          * spec only says CL/CTG should assume 32bpp and BW
1115          * doesn't need to. But as these things followed the
1116          * mobile vs. desktop lines on gen3 as well, let's
1117          * assume ELK doesn't need this.
1118          *
1119          * The spec also fails to list such a restriction for
1120          * the HPLL watermark, which seems a little strange.
1121          * Let's use 32bpp for the HPLL watermark as well.
1122          */
1123         if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1124             level != G4X_WM_LEVEL_NORMAL)
1125                 cpp = 4;
1126         else
1127                 cpp = plane_state->base.fb->format->cpp[0];
1128
1129         clock = adjusted_mode->crtc_clock;
1130         htotal = adjusted_mode->crtc_htotal;
1131
1132         if (plane->id == PLANE_CURSOR)
1133                 width = plane_state->base.crtc_w;
1134         else
1135                 width = drm_rect_width(&plane_state->base.dst);
1136
1137         if (plane->id == PLANE_CURSOR) {
1138                 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1139         } else if (plane->id == PLANE_PRIMARY &&
1140                    level == G4X_WM_LEVEL_NORMAL) {
1141                 wm = intel_wm_method1(clock, cpp, latency);
1142         } else {
1143                 unsigned int small, large;
1144
1145                 small = intel_wm_method1(clock, cpp, latency);
1146                 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1147
1148                 wm = min(small, large);
1149         }
1150
1151         wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1152                               width, cpp);
1153
1154         wm = DIV_ROUND_UP(wm, 64) + 2;
1155
1156         return min_t(unsigned int, wm, USHRT_MAX);
1157 }
1158
1159 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1160                                  int level, enum plane_id plane_id, u16 value)
1161 {
1162         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1163         bool dirty = false;
1164
1165         for (; level < intel_wm_num_levels(dev_priv); level++) {
1166                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1167
1168                 dirty |= raw->plane[plane_id] != value;
1169                 raw->plane[plane_id] = value;
1170         }
1171
1172         return dirty;
1173 }
1174
1175 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1176                                int level, u16 value)
1177 {
1178         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1179         bool dirty = false;
1180
1181         /* NORMAL level doesn't have an FBC watermark */
1182         level = max(level, G4X_WM_LEVEL_SR);
1183
1184         for (; level < intel_wm_num_levels(dev_priv); level++) {
1185                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1186
1187                 dirty |= raw->fbc != value;
1188                 raw->fbc = value;
1189         }
1190
1191         return dirty;
1192 }
1193
1194 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1195                               const struct intel_plane_state *pstate,
1196                               u32 pri_val);
1197
1198 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1199                                      const struct intel_plane_state *plane_state)
1200 {
1201         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1202         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1203         enum plane_id plane_id = plane->id;
1204         bool dirty = false;
1205         int level;
1206
1207         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1208                 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1209                 if (plane_id == PLANE_PRIMARY)
1210                         dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1211                 goto out;
1212         }
1213
1214         for (level = 0; level < num_levels; level++) {
1215                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1216                 int wm, max_wm;
1217
1218                 wm = g4x_compute_wm(crtc_state, plane_state, level);
1219                 max_wm = g4x_plane_fifo_size(plane_id, level);
1220
1221                 if (wm > max_wm)
1222                         break;
1223
1224                 dirty |= raw->plane[plane_id] != wm;
1225                 raw->plane[plane_id] = wm;
1226
1227                 if (plane_id != PLANE_PRIMARY ||
1228                     level == G4X_WM_LEVEL_NORMAL)
1229                         continue;
1230
1231                 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1232                                         raw->plane[plane_id]);
1233                 max_wm = g4x_fbc_fifo_size(level);
1234
1235                 /*
1236                  * FBC wm is not mandatory as we
1237                  * can always just disable its use.
1238                  */
1239                 if (wm > max_wm)
1240                         wm = USHRT_MAX;
1241
1242                 dirty |= raw->fbc != wm;
1243                 raw->fbc = wm;
1244         }
1245
1246         /* mark watermarks as invalid */
1247         dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1248
1249         if (plane_id == PLANE_PRIMARY)
1250                 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1251
1252  out:
1253         if (dirty) {
1254                 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1255                               plane->base.name,
1256                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1257                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1258                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1259
1260                 if (plane_id == PLANE_PRIMARY)
1261                         DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1262                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1263                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1264         }
1265
1266         return dirty;
1267 }
1268
1269 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1270                                       enum plane_id plane_id, int level)
1271 {
1272         const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1273
1274         return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1275 }
1276
1277 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1278                                      int level)
1279 {
1280         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1281
1282         if (level > dev_priv->wm.max_level)
1283                 return false;
1284
1285         return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1286                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1287                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1288 }
1289
1290 /* mark all levels starting from 'level' as invalid */
1291 static void g4x_invalidate_wms(struct intel_crtc *crtc,
1292                                struct g4x_wm_state *wm_state, int level)
1293 {
1294         if (level <= G4X_WM_LEVEL_NORMAL) {
1295                 enum plane_id plane_id;
1296
1297                 for_each_plane_id_on_crtc(crtc, plane_id)
1298                         wm_state->wm.plane[plane_id] = USHRT_MAX;
1299         }
1300
1301         if (level <= G4X_WM_LEVEL_SR) {
1302                 wm_state->cxsr = false;
1303                 wm_state->sr.cursor = USHRT_MAX;
1304                 wm_state->sr.plane = USHRT_MAX;
1305                 wm_state->sr.fbc = USHRT_MAX;
1306         }
1307
1308         if (level <= G4X_WM_LEVEL_HPLL) {
1309                 wm_state->hpll_en = false;
1310                 wm_state->hpll.cursor = USHRT_MAX;
1311                 wm_state->hpll.plane = USHRT_MAX;
1312                 wm_state->hpll.fbc = USHRT_MAX;
1313         }
1314 }
1315
1316 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1317 {
1318         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1319         struct intel_atomic_state *state =
1320                 to_intel_atomic_state(crtc_state->base.state);
1321         struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1322         int num_active_planes = hweight32(crtc_state->active_planes &
1323                                           ~BIT(PLANE_CURSOR));
1324         const struct g4x_pipe_wm *raw;
1325         const struct intel_plane_state *old_plane_state;
1326         const struct intel_plane_state *new_plane_state;
1327         struct intel_plane *plane;
1328         enum plane_id plane_id;
1329         int i, level;
1330         unsigned int dirty = 0;
1331
1332         for_each_oldnew_intel_plane_in_state(state, plane,
1333                                              old_plane_state,
1334                                              new_plane_state, i) {
1335                 if (new_plane_state->base.crtc != &crtc->base &&
1336                     old_plane_state->base.crtc != &crtc->base)
1337                         continue;
1338
1339                 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1340                         dirty |= BIT(plane->id);
1341         }
1342
1343         if (!dirty)
1344                 return 0;
1345
1346         level = G4X_WM_LEVEL_NORMAL;
1347         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1348                 goto out;
1349
1350         raw = &crtc_state->wm.g4x.raw[level];
1351         for_each_plane_id_on_crtc(crtc, plane_id)
1352                 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1353
1354         level = G4X_WM_LEVEL_SR;
1355
1356         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1357                 goto out;
1358
1359         raw = &crtc_state->wm.g4x.raw[level];
1360         wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1361         wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1362         wm_state->sr.fbc = raw->fbc;
1363
1364         wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1365
1366         level = G4X_WM_LEVEL_HPLL;
1367
1368         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1369                 goto out;
1370
1371         raw = &crtc_state->wm.g4x.raw[level];
1372         wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1373         wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1374         wm_state->hpll.fbc = raw->fbc;
1375
1376         wm_state->hpll_en = wm_state->cxsr;
1377
1378         level++;
1379
1380  out:
1381         if (level == G4X_WM_LEVEL_NORMAL)
1382                 return -EINVAL;
1383
1384         /* invalidate the higher levels */
1385         g4x_invalidate_wms(crtc, wm_state, level);
1386
1387         /*
1388          * Determine if the FBC watermark(s) can be used. IF
1389          * this isn't the case we prefer to disable the FBC
1390          ( watermark(s) rather than disable the SR/HPLL
1391          * level(s) entirely.
1392          */
1393         wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1394
1395         if (level >= G4X_WM_LEVEL_SR &&
1396             wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1397                 wm_state->fbc_en = false;
1398         else if (level >= G4X_WM_LEVEL_HPLL &&
1399                  wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1400                 wm_state->fbc_en = false;
1401
1402         return 0;
1403 }
1404
1405 static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
1406 {
1407         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
1408         struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1409         const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1410         struct intel_atomic_state *intel_state =
1411                 to_intel_atomic_state(new_crtc_state->base.state);
1412         const struct intel_crtc_state *old_crtc_state =
1413                 intel_atomic_get_old_crtc_state(intel_state, crtc);
1414         const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
1415         enum plane_id plane_id;
1416
1417         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1418                 *intermediate = *optimal;
1419
1420                 intermediate->cxsr = false;
1421                 intermediate->hpll_en = false;
1422                 goto out;
1423         }
1424
1425         intermediate->cxsr = optimal->cxsr && active->cxsr &&
1426                 !new_crtc_state->disable_cxsr;
1427         intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1428                 !new_crtc_state->disable_cxsr;
1429         intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1430
1431         for_each_plane_id_on_crtc(crtc, plane_id) {
1432                 intermediate->wm.plane[plane_id] =
1433                         max(optimal->wm.plane[plane_id],
1434                             active->wm.plane[plane_id]);
1435
1436                 WARN_ON(intermediate->wm.plane[plane_id] >
1437                         g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1438         }
1439
1440         intermediate->sr.plane = max(optimal->sr.plane,
1441                                      active->sr.plane);
1442         intermediate->sr.cursor = max(optimal->sr.cursor,
1443                                       active->sr.cursor);
1444         intermediate->sr.fbc = max(optimal->sr.fbc,
1445                                    active->sr.fbc);
1446
1447         intermediate->hpll.plane = max(optimal->hpll.plane,
1448                                        active->hpll.plane);
1449         intermediate->hpll.cursor = max(optimal->hpll.cursor,
1450                                         active->hpll.cursor);
1451         intermediate->hpll.fbc = max(optimal->hpll.fbc,
1452                                      active->hpll.fbc);
1453
1454         WARN_ON((intermediate->sr.plane >
1455                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1456                  intermediate->sr.cursor >
1457                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1458                 intermediate->cxsr);
1459         WARN_ON((intermediate->sr.plane >
1460                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1461                  intermediate->sr.cursor >
1462                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1463                 intermediate->hpll_en);
1464
1465         WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1466                 intermediate->fbc_en && intermediate->cxsr);
1467         WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1468                 intermediate->fbc_en && intermediate->hpll_en);
1469
1470 out:
1471         /*
1472          * If our intermediate WM are identical to the final WM, then we can
1473          * omit the post-vblank programming; only update if it's different.
1474          */
1475         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1476                 new_crtc_state->wm.need_postvbl_update = true;
1477
1478         return 0;
1479 }
1480
1481 static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1482                          struct g4x_wm_values *wm)
1483 {
1484         struct intel_crtc *crtc;
1485         int num_active_crtcs = 0;
1486
1487         wm->cxsr = true;
1488         wm->hpll_en = true;
1489         wm->fbc_en = true;
1490
1491         for_each_intel_crtc(&dev_priv->drm, crtc) {
1492                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1493
1494                 if (!crtc->active)
1495                         continue;
1496
1497                 if (!wm_state->cxsr)
1498                         wm->cxsr = false;
1499                 if (!wm_state->hpll_en)
1500                         wm->hpll_en = false;
1501                 if (!wm_state->fbc_en)
1502                         wm->fbc_en = false;
1503
1504                 num_active_crtcs++;
1505         }
1506
1507         if (num_active_crtcs != 1) {
1508                 wm->cxsr = false;
1509                 wm->hpll_en = false;
1510                 wm->fbc_en = false;
1511         }
1512
1513         for_each_intel_crtc(&dev_priv->drm, crtc) {
1514                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1515                 enum pipe pipe = crtc->pipe;
1516
1517                 wm->pipe[pipe] = wm_state->wm;
1518                 if (crtc->active && wm->cxsr)
1519                         wm->sr = wm_state->sr;
1520                 if (crtc->active && wm->hpll_en)
1521                         wm->hpll = wm_state->hpll;
1522         }
1523 }
1524
1525 static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1526 {
1527         struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1528         struct g4x_wm_values new_wm = {};
1529
1530         g4x_merge_wm(dev_priv, &new_wm);
1531
1532         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1533                 return;
1534
1535         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1536                 _intel_set_memory_cxsr(dev_priv, false);
1537
1538         g4x_write_wm_values(dev_priv, &new_wm);
1539
1540         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1541                 _intel_set_memory_cxsr(dev_priv, true);
1542
1543         *old_wm = new_wm;
1544 }
1545
1546 static void g4x_initial_watermarks(struct intel_atomic_state *state,
1547                                    struct intel_crtc_state *crtc_state)
1548 {
1549         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1550         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1551
1552         mutex_lock(&dev_priv->wm.wm_mutex);
1553         crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1554         g4x_program_watermarks(dev_priv);
1555         mutex_unlock(&dev_priv->wm.wm_mutex);
1556 }
1557
1558 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1559                                     struct intel_crtc_state *crtc_state)
1560 {
1561         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1562         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1563
1564         if (!crtc_state->wm.need_postvbl_update)
1565                 return;
1566
1567         mutex_lock(&dev_priv->wm.wm_mutex);
1568         intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1569         g4x_program_watermarks(dev_priv);
1570         mutex_unlock(&dev_priv->wm.wm_mutex);
1571 }
1572
1573 /* latency must be in 0.1us units. */
1574 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1575                                    unsigned int htotal,
1576                                    unsigned int width,
1577                                    unsigned int cpp,
1578                                    unsigned int latency)
1579 {
1580         unsigned int ret;
1581
1582         ret = intel_wm_method2(pixel_rate, htotal,
1583                                width, cpp, latency);
1584         ret = DIV_ROUND_UP(ret, 64);
1585
1586         return ret;
1587 }
1588
1589 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1590 {
1591         /* all latencies in usec */
1592         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1593
1594         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1595
1596         if (IS_CHERRYVIEW(dev_priv)) {
1597                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1598                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1599
1600                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1601         }
1602 }
1603
1604 static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1605                                 const struct intel_plane_state *plane_state,
1606                                 int level)
1607 {
1608         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1609         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1610         const struct drm_display_mode *adjusted_mode =
1611                 &crtc_state->base.adjusted_mode;
1612         unsigned int clock, htotal, cpp, width, wm;
1613
1614         if (dev_priv->wm.pri_latency[level] == 0)
1615                 return USHRT_MAX;
1616
1617         if (!intel_wm_plane_visible(crtc_state, plane_state))
1618                 return 0;
1619
1620         cpp = plane_state->base.fb->format->cpp[0];
1621         clock = adjusted_mode->crtc_clock;
1622         htotal = adjusted_mode->crtc_htotal;
1623         width = crtc_state->pipe_src_w;
1624
1625         if (plane->id == PLANE_CURSOR) {
1626                 /*
1627                  * FIXME the formula gives values that are
1628                  * too big for the cursor FIFO, and hence we
1629                  * would never be able to use cursors. For
1630                  * now just hardcode the watermark.
1631                  */
1632                 wm = 63;
1633         } else {
1634                 wm = vlv_wm_method2(clock, htotal, width, cpp,
1635                                     dev_priv->wm.pri_latency[level] * 10);
1636         }
1637
1638         return min_t(unsigned int, wm, USHRT_MAX);
1639 }
1640
1641 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1642 {
1643         return (active_planes & (BIT(PLANE_SPRITE0) |
1644                                  BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1645 }
1646
1647 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1648 {
1649         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1650         const struct g4x_pipe_wm *raw =
1651                 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1652         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1653         unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1654         int num_active_planes = hweight32(active_planes);
1655         const int fifo_size = 511;
1656         int fifo_extra, fifo_left = fifo_size;
1657         int sprite0_fifo_extra = 0;
1658         unsigned int total_rate;
1659         enum plane_id plane_id;
1660
1661         /*
1662          * When enabling sprite0 after sprite1 has already been enabled
1663          * we tend to get an underrun unless sprite0 already has some
1664          * FIFO space allcoated. Hence we always allocate at least one
1665          * cacheline for sprite0 whenever sprite1 is enabled.
1666          *
1667          * All other plane enable sequences appear immune to this problem.
1668          */
1669         if (vlv_need_sprite0_fifo_workaround(active_planes))
1670                 sprite0_fifo_extra = 1;
1671
1672         total_rate = raw->plane[PLANE_PRIMARY] +
1673                 raw->plane[PLANE_SPRITE0] +
1674                 raw->plane[PLANE_SPRITE1] +
1675                 sprite0_fifo_extra;
1676
1677         if (total_rate > fifo_size)
1678                 return -EINVAL;
1679
1680         if (total_rate == 0)
1681                 total_rate = 1;
1682
1683         for_each_plane_id_on_crtc(crtc, plane_id) {
1684                 unsigned int rate;
1685
1686                 if ((active_planes & BIT(plane_id)) == 0) {
1687                         fifo_state->plane[plane_id] = 0;
1688                         continue;
1689                 }
1690
1691                 rate = raw->plane[plane_id];
1692                 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1693                 fifo_left -= fifo_state->plane[plane_id];
1694         }
1695
1696         fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1697         fifo_left -= sprite0_fifo_extra;
1698
1699         fifo_state->plane[PLANE_CURSOR] = 63;
1700
1701         fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1702
1703         /* spread the remainder evenly */
1704         for_each_plane_id_on_crtc(crtc, plane_id) {
1705                 int plane_extra;
1706
1707                 if (fifo_left == 0)
1708                         break;
1709
1710                 if ((active_planes & BIT(plane_id)) == 0)
1711                         continue;
1712
1713                 plane_extra = min(fifo_extra, fifo_left);
1714                 fifo_state->plane[plane_id] += plane_extra;
1715                 fifo_left -= plane_extra;
1716         }
1717
1718         WARN_ON(active_planes != 0 && fifo_left != 0);
1719
1720         /* give it all to the first plane if none are active */
1721         if (active_planes == 0) {
1722                 WARN_ON(fifo_left != fifo_size);
1723                 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1724         }
1725
1726         return 0;
1727 }
1728
1729 /* mark all levels starting from 'level' as invalid */
1730 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1731                                struct vlv_wm_state *wm_state, int level)
1732 {
1733         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1734
1735         for (; level < intel_wm_num_levels(dev_priv); level++) {
1736                 enum plane_id plane_id;
1737
1738                 for_each_plane_id_on_crtc(crtc, plane_id)
1739                         wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1740
1741                 wm_state->sr[level].cursor = USHRT_MAX;
1742                 wm_state->sr[level].plane = USHRT_MAX;
1743         }
1744 }
1745
1746 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1747 {
1748         if (wm > fifo_size)
1749                 return USHRT_MAX;
1750         else
1751                 return fifo_size - wm;
1752 }
1753
1754 /*
1755  * Starting from 'level' set all higher
1756  * levels to 'value' in the "raw" watermarks.
1757  */
1758 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1759                                  int level, enum plane_id plane_id, u16 value)
1760 {
1761         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1762         int num_levels = intel_wm_num_levels(dev_priv);
1763         bool dirty = false;
1764
1765         for (; level < num_levels; level++) {
1766                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1767
1768                 dirty |= raw->plane[plane_id] != value;
1769                 raw->plane[plane_id] = value;
1770         }
1771
1772         return dirty;
1773 }
1774
1775 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1776                                      const struct intel_plane_state *plane_state)
1777 {
1778         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1779         enum plane_id plane_id = plane->id;
1780         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1781         int level;
1782         bool dirty = false;
1783
1784         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1785                 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1786                 goto out;
1787         }
1788
1789         for (level = 0; level < num_levels; level++) {
1790                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1791                 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1792                 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1793
1794                 if (wm > max_wm)
1795                         break;
1796
1797                 dirty |= raw->plane[plane_id] != wm;
1798                 raw->plane[plane_id] = wm;
1799         }
1800
1801         /* mark all higher levels as invalid */
1802         dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1803
1804 out:
1805         if (dirty)
1806                 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1807                               plane->base.name,
1808                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1809                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1810                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1811
1812         return dirty;
1813 }
1814
1815 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1816                                       enum plane_id plane_id, int level)
1817 {
1818         const struct g4x_pipe_wm *raw =
1819                 &crtc_state->wm.vlv.raw[level];
1820         const struct vlv_fifo_state *fifo_state =
1821                 &crtc_state->wm.vlv.fifo_state;
1822
1823         return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1824 }
1825
1826 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1827 {
1828         return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1829                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1830                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1831                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1832 }
1833
1834 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1835 {
1836         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1837         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1838         struct intel_atomic_state *state =
1839                 to_intel_atomic_state(crtc_state->base.state);
1840         struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1841         const struct vlv_fifo_state *fifo_state =
1842                 &crtc_state->wm.vlv.fifo_state;
1843         int num_active_planes = hweight32(crtc_state->active_planes &
1844                                           ~BIT(PLANE_CURSOR));
1845         bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1846         const struct intel_plane_state *old_plane_state;
1847         const struct intel_plane_state *new_plane_state;
1848         struct intel_plane *plane;
1849         enum plane_id plane_id;
1850         int level, ret, i;
1851         unsigned int dirty = 0;
1852
1853         for_each_oldnew_intel_plane_in_state(state, plane,
1854                                              old_plane_state,
1855                                              new_plane_state, i) {
1856                 if (new_plane_state->base.crtc != &crtc->base &&
1857                     old_plane_state->base.crtc != &crtc->base)
1858                         continue;
1859
1860                 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1861                         dirty |= BIT(plane->id);
1862         }
1863
1864         /*
1865          * DSPARB registers may have been reset due to the
1866          * power well being turned off. Make sure we restore
1867          * them to a consistent state even if no primary/sprite
1868          * planes are initially active.
1869          */
1870         if (needs_modeset)
1871                 crtc_state->fifo_changed = true;
1872
1873         if (!dirty)
1874                 return 0;
1875
1876         /* cursor changes don't warrant a FIFO recompute */
1877         if (dirty & ~BIT(PLANE_CURSOR)) {
1878                 const struct intel_crtc_state *old_crtc_state =
1879                         intel_atomic_get_old_crtc_state(state, crtc);
1880                 const struct vlv_fifo_state *old_fifo_state =
1881                         &old_crtc_state->wm.vlv.fifo_state;
1882
1883                 ret = vlv_compute_fifo(crtc_state);
1884                 if (ret)
1885                         return ret;
1886
1887                 if (needs_modeset ||
1888                     memcmp(old_fifo_state, fifo_state,
1889                            sizeof(*fifo_state)) != 0)
1890                         crtc_state->fifo_changed = true;
1891         }
1892
1893         /* initially allow all levels */
1894         wm_state->num_levels = intel_wm_num_levels(dev_priv);
1895         /*
1896          * Note that enabling cxsr with no primary/sprite planes
1897          * enabled can wedge the pipe. Hence we only allow cxsr
1898          * with exactly one enabled primary/sprite plane.
1899          */
1900         wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1901
1902         for (level = 0; level < wm_state->num_levels; level++) {
1903                 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1904                 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1905
1906                 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1907                         break;
1908
1909                 for_each_plane_id_on_crtc(crtc, plane_id) {
1910                         wm_state->wm[level].plane[plane_id] =
1911                                 vlv_invert_wm_value(raw->plane[plane_id],
1912                                                     fifo_state->plane[plane_id]);
1913                 }
1914
1915                 wm_state->sr[level].plane =
1916                         vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1917                                                  raw->plane[PLANE_SPRITE0],
1918                                                  raw->plane[PLANE_SPRITE1]),
1919                                             sr_fifo_size);
1920
1921                 wm_state->sr[level].cursor =
1922                         vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1923                                             63);
1924         }
1925
1926         if (level == 0)
1927                 return -EINVAL;
1928
1929         /* limit to only levels we can actually handle */
1930         wm_state->num_levels = level;
1931
1932         /* invalidate the higher levels */
1933         vlv_invalidate_wms(crtc, wm_state, level);
1934
1935         return 0;
1936 }
1937
1938 #define VLV_FIFO(plane, value) \
1939         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1940
1941 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1942                                    struct intel_crtc_state *crtc_state)
1943 {
1944         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1945         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1946         const struct vlv_fifo_state *fifo_state =
1947                 &crtc_state->wm.vlv.fifo_state;
1948         int sprite0_start, sprite1_start, fifo_size;
1949
1950         if (!crtc_state->fifo_changed)
1951                 return;
1952
1953         sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1954         sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1955         fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1956
1957         WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1958         WARN_ON(fifo_size != 511);
1959
1960         trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1961
1962         /*
1963          * uncore.lock serves a double purpose here. It allows us to
1964          * use the less expensive I915_{READ,WRITE}_FW() functions, and
1965          * it protects the DSPARB registers from getting clobbered by
1966          * parallel updates from multiple pipes.
1967          *
1968          * intel_pipe_update_start() has already disabled interrupts
1969          * for us, so a plain spin_lock() is sufficient here.
1970          */
1971         spin_lock(&dev_priv->uncore.lock);
1972
1973         switch (crtc->pipe) {
1974                 u32 dsparb, dsparb2, dsparb3;
1975         case PIPE_A:
1976                 dsparb = I915_READ_FW(DSPARB);
1977                 dsparb2 = I915_READ_FW(DSPARB2);
1978
1979                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1980                             VLV_FIFO(SPRITEB, 0xff));
1981                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1982                            VLV_FIFO(SPRITEB, sprite1_start));
1983
1984                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1985                              VLV_FIFO(SPRITEB_HI, 0x1));
1986                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1987                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1988
1989                 I915_WRITE_FW(DSPARB, dsparb);
1990                 I915_WRITE_FW(DSPARB2, dsparb2);
1991                 break;
1992         case PIPE_B:
1993                 dsparb = I915_READ_FW(DSPARB);
1994                 dsparb2 = I915_READ_FW(DSPARB2);
1995
1996                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1997                             VLV_FIFO(SPRITED, 0xff));
1998                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1999                            VLV_FIFO(SPRITED, sprite1_start));
2000
2001                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
2002                              VLV_FIFO(SPRITED_HI, 0xff));
2003                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2004                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2005
2006                 I915_WRITE_FW(DSPARB, dsparb);
2007                 I915_WRITE_FW(DSPARB2, dsparb2);
2008                 break;
2009         case PIPE_C:
2010                 dsparb3 = I915_READ_FW(DSPARB3);
2011                 dsparb2 = I915_READ_FW(DSPARB2);
2012
2013                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2014                              VLV_FIFO(SPRITEF, 0xff));
2015                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2016                             VLV_FIFO(SPRITEF, sprite1_start));
2017
2018                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2019                              VLV_FIFO(SPRITEF_HI, 0xff));
2020                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2021                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2022
2023                 I915_WRITE_FW(DSPARB3, dsparb3);
2024                 I915_WRITE_FW(DSPARB2, dsparb2);
2025                 break;
2026         default:
2027                 break;
2028         }
2029
2030         POSTING_READ_FW(DSPARB);
2031
2032         spin_unlock(&dev_priv->uncore.lock);
2033 }
2034
2035 #undef VLV_FIFO
2036
2037 static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
2038 {
2039         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
2040         struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2041         const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2042         struct intel_atomic_state *intel_state =
2043                 to_intel_atomic_state(new_crtc_state->base.state);
2044         const struct intel_crtc_state *old_crtc_state =
2045                 intel_atomic_get_old_crtc_state(intel_state, crtc);
2046         const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
2047         int level;
2048
2049         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2050                 *intermediate = *optimal;
2051
2052                 intermediate->cxsr = false;
2053                 goto out;
2054         }
2055
2056         intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2057         intermediate->cxsr = optimal->cxsr && active->cxsr &&
2058                 !new_crtc_state->disable_cxsr;
2059
2060         for (level = 0; level < intermediate->num_levels; level++) {
2061                 enum plane_id plane_id;
2062
2063                 for_each_plane_id_on_crtc(crtc, plane_id) {
2064                         intermediate->wm[level].plane[plane_id] =
2065                                 min(optimal->wm[level].plane[plane_id],
2066                                     active->wm[level].plane[plane_id]);
2067                 }
2068
2069                 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2070                                                     active->sr[level].plane);
2071                 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2072                                                      active->sr[level].cursor);
2073         }
2074
2075         vlv_invalidate_wms(crtc, intermediate, level);
2076
2077 out:
2078         /*
2079          * If our intermediate WM are identical to the final WM, then we can
2080          * omit the post-vblank programming; only update if it's different.
2081          */
2082         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2083                 new_crtc_state->wm.need_postvbl_update = true;
2084
2085         return 0;
2086 }
2087
2088 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2089                          struct vlv_wm_values *wm)
2090 {
2091         struct intel_crtc *crtc;
2092         int num_active_crtcs = 0;
2093
2094         wm->level = dev_priv->wm.max_level;
2095         wm->cxsr = true;
2096
2097         for_each_intel_crtc(&dev_priv->drm, crtc) {
2098                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2099
2100                 if (!crtc->active)
2101                         continue;
2102
2103                 if (!wm_state->cxsr)
2104                         wm->cxsr = false;
2105
2106                 num_active_crtcs++;
2107                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2108         }
2109
2110         if (num_active_crtcs != 1)
2111                 wm->cxsr = false;
2112
2113         if (num_active_crtcs > 1)
2114                 wm->level = VLV_WM_LEVEL_PM2;
2115
2116         for_each_intel_crtc(&dev_priv->drm, crtc) {
2117                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2118                 enum pipe pipe = crtc->pipe;
2119
2120                 wm->pipe[pipe] = wm_state->wm[wm->level];
2121                 if (crtc->active && wm->cxsr)
2122                         wm->sr = wm_state->sr[wm->level];
2123
2124                 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2125                 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2126                 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2127                 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2128         }
2129 }
2130
2131 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2132 {
2133         struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2134         struct vlv_wm_values new_wm = {};
2135
2136         vlv_merge_wm(dev_priv, &new_wm);
2137
2138         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2139                 return;
2140
2141         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2142                 chv_set_memory_dvfs(dev_priv, false);
2143
2144         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2145                 chv_set_memory_pm5(dev_priv, false);
2146
2147         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2148                 _intel_set_memory_cxsr(dev_priv, false);
2149
2150         vlv_write_wm_values(dev_priv, &new_wm);
2151
2152         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2153                 _intel_set_memory_cxsr(dev_priv, true);
2154
2155         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2156                 chv_set_memory_pm5(dev_priv, true);
2157
2158         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2159                 chv_set_memory_dvfs(dev_priv, true);
2160
2161         *old_wm = new_wm;
2162 }
2163
2164 static void vlv_initial_watermarks(struct intel_atomic_state *state,
2165                                    struct intel_crtc_state *crtc_state)
2166 {
2167         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2168         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2169
2170         mutex_lock(&dev_priv->wm.wm_mutex);
2171         crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2172         vlv_program_watermarks(dev_priv);
2173         mutex_unlock(&dev_priv->wm.wm_mutex);
2174 }
2175
2176 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2177                                     struct intel_crtc_state *crtc_state)
2178 {
2179         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2180         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2181
2182         if (!crtc_state->wm.need_postvbl_update)
2183                 return;
2184
2185         mutex_lock(&dev_priv->wm.wm_mutex);
2186         intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2187         vlv_program_watermarks(dev_priv);
2188         mutex_unlock(&dev_priv->wm.wm_mutex);
2189 }
2190
2191 static void i965_update_wm(struct intel_crtc *unused_crtc)
2192 {
2193         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2194         struct intel_crtc *crtc;
2195         int srwm = 1;
2196         int cursor_sr = 16;
2197         bool cxsr_enabled;
2198
2199         /* Calc sr entries for one plane configs */
2200         crtc = single_enabled_crtc(dev_priv);
2201         if (crtc) {
2202                 /* self-refresh has much higher latency */
2203                 static const int sr_latency_ns = 12000;
2204                 const struct drm_display_mode *adjusted_mode =
2205                         &crtc->config->base.adjusted_mode;
2206                 const struct drm_framebuffer *fb =
2207                         crtc->base.primary->state->fb;
2208                 int clock = adjusted_mode->crtc_clock;
2209                 int htotal = adjusted_mode->crtc_htotal;
2210                 int hdisplay = crtc->config->pipe_src_w;
2211                 int cpp = fb->format->cpp[0];
2212                 int entries;
2213
2214                 entries = intel_wm_method2(clock, htotal,
2215                                            hdisplay, cpp, sr_latency_ns / 100);
2216                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2217                 srwm = I965_FIFO_SIZE - entries;
2218                 if (srwm < 0)
2219                         srwm = 1;
2220                 srwm &= 0x1ff;
2221                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2222                               entries, srwm);
2223
2224                 entries = intel_wm_method2(clock, htotal,
2225                                            crtc->base.cursor->state->crtc_w, 4,
2226                                            sr_latency_ns / 100);
2227                 entries = DIV_ROUND_UP(entries,
2228                                        i965_cursor_wm_info.cacheline_size) +
2229                         i965_cursor_wm_info.guard_size;
2230
2231                 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2232                 if (cursor_sr > i965_cursor_wm_info.max_wm)
2233                         cursor_sr = i965_cursor_wm_info.max_wm;
2234
2235                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2236                               "cursor %d\n", srwm, cursor_sr);
2237
2238                 cxsr_enabled = true;
2239         } else {
2240                 cxsr_enabled = false;
2241                 /* Turn off self refresh if both pipes are enabled */
2242                 intel_set_memory_cxsr(dev_priv, false);
2243         }
2244
2245         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2246                       srwm);
2247
2248         /* 965 has limitations... */
2249         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2250                    FW_WM(8, CURSORB) |
2251                    FW_WM(8, PLANEB) |
2252                    FW_WM(8, PLANEA));
2253         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2254                    FW_WM(8, PLANEC_OLD));
2255         /* update cursor SR watermark */
2256         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2257
2258         if (cxsr_enabled)
2259                 intel_set_memory_cxsr(dev_priv, true);
2260 }
2261
2262 #undef FW_WM
2263
2264 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2265 {
2266         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2267         const struct intel_watermark_params *wm_info;
2268         u32 fwater_lo;
2269         u32 fwater_hi;
2270         int cwm, srwm = 1;
2271         int fifo_size;
2272         int planea_wm, planeb_wm;
2273         struct intel_crtc *crtc, *enabled = NULL;
2274
2275         if (IS_I945GM(dev_priv))
2276                 wm_info = &i945_wm_info;
2277         else if (!IS_GEN(dev_priv, 2))
2278                 wm_info = &i915_wm_info;
2279         else
2280                 wm_info = &i830_a_wm_info;
2281
2282         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2283         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
2284         if (intel_crtc_active(crtc)) {
2285                 const struct drm_display_mode *adjusted_mode =
2286                         &crtc->config->base.adjusted_mode;
2287                 const struct drm_framebuffer *fb =
2288                         crtc->base.primary->state->fb;
2289                 int cpp;
2290
2291                 if (IS_GEN(dev_priv, 2))
2292                         cpp = 4;
2293                 else
2294                         cpp = fb->format->cpp[0];
2295
2296                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2297                                                wm_info, fifo_size, cpp,
2298                                                pessimal_latency_ns);
2299                 enabled = crtc;
2300         } else {
2301                 planea_wm = fifo_size - wm_info->guard_size;
2302                 if (planea_wm > (long)wm_info->max_wm)
2303                         planea_wm = wm_info->max_wm;
2304         }
2305
2306         if (IS_GEN(dev_priv, 2))
2307                 wm_info = &i830_bc_wm_info;
2308
2309         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2310         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
2311         if (intel_crtc_active(crtc)) {
2312                 const struct drm_display_mode *adjusted_mode =
2313                         &crtc->config->base.adjusted_mode;
2314                 const struct drm_framebuffer *fb =
2315                         crtc->base.primary->state->fb;
2316                 int cpp;
2317
2318                 if (IS_GEN(dev_priv, 2))
2319                         cpp = 4;
2320                 else
2321                         cpp = fb->format->cpp[0];
2322
2323                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2324                                                wm_info, fifo_size, cpp,
2325                                                pessimal_latency_ns);
2326                 if (enabled == NULL)
2327                         enabled = crtc;
2328                 else
2329                         enabled = NULL;
2330         } else {
2331                 planeb_wm = fifo_size - wm_info->guard_size;
2332                 if (planeb_wm > (long)wm_info->max_wm)
2333                         planeb_wm = wm_info->max_wm;
2334         }
2335
2336         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2337
2338         if (IS_I915GM(dev_priv) && enabled) {
2339                 struct drm_i915_gem_object *obj;
2340
2341                 obj = intel_fb_obj(enabled->base.primary->state->fb);
2342
2343                 /* self-refresh seems busted with untiled */
2344                 if (!i915_gem_object_is_tiled(obj))
2345                         enabled = NULL;
2346         }
2347
2348         /*
2349          * Overlay gets an aggressive default since video jitter is bad.
2350          */
2351         cwm = 2;
2352
2353         /* Play safe and disable self-refresh before adjusting watermarks. */
2354         intel_set_memory_cxsr(dev_priv, false);
2355
2356         /* Calc sr entries for one plane configs */
2357         if (HAS_FW_BLC(dev_priv) && enabled) {
2358                 /* self-refresh has much higher latency */
2359                 static const int sr_latency_ns = 6000;
2360                 const struct drm_display_mode *adjusted_mode =
2361                         &enabled->config->base.adjusted_mode;
2362                 const struct drm_framebuffer *fb =
2363                         enabled->base.primary->state->fb;
2364                 int clock = adjusted_mode->crtc_clock;
2365                 int htotal = adjusted_mode->crtc_htotal;
2366                 int hdisplay = enabled->config->pipe_src_w;
2367                 int cpp;
2368                 int entries;
2369
2370                 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2371                         cpp = 4;
2372                 else
2373                         cpp = fb->format->cpp[0];
2374
2375                 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2376                                            sr_latency_ns / 100);
2377                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2378                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2379                 srwm = wm_info->fifo_size - entries;
2380                 if (srwm < 0)
2381                         srwm = 1;
2382
2383                 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2384                         I915_WRITE(FW_BLC_SELF,
2385                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2386                 else
2387                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2388         }
2389
2390         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2391                       planea_wm, planeb_wm, cwm, srwm);
2392
2393         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2394         fwater_hi = (cwm & 0x1f);
2395
2396         /* Set request length to 8 cachelines per fetch */
2397         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2398         fwater_hi = fwater_hi | (1 << 8);
2399
2400         I915_WRITE(FW_BLC, fwater_lo);
2401         I915_WRITE(FW_BLC2, fwater_hi);
2402
2403         if (enabled)
2404                 intel_set_memory_cxsr(dev_priv, true);
2405 }
2406
2407 static void i845_update_wm(struct intel_crtc *unused_crtc)
2408 {
2409         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2410         struct intel_crtc *crtc;
2411         const struct drm_display_mode *adjusted_mode;
2412         u32 fwater_lo;
2413         int planea_wm;
2414
2415         crtc = single_enabled_crtc(dev_priv);
2416         if (crtc == NULL)
2417                 return;
2418
2419         adjusted_mode = &crtc->config->base.adjusted_mode;
2420         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2421                                        &i845_wm_info,
2422                                        dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
2423                                        4, pessimal_latency_ns);
2424         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2425         fwater_lo |= (3<<8) | planea_wm;
2426
2427         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2428
2429         I915_WRITE(FW_BLC, fwater_lo);
2430 }
2431
2432 /* latency must be in 0.1us units. */
2433 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2434                                    unsigned int cpp,
2435                                    unsigned int latency)
2436 {
2437         unsigned int ret;
2438
2439         ret = intel_wm_method1(pixel_rate, cpp, latency);
2440         ret = DIV_ROUND_UP(ret, 64) + 2;
2441
2442         return ret;
2443 }
2444
2445 /* latency must be in 0.1us units. */
2446 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2447                                    unsigned int htotal,
2448                                    unsigned int width,
2449                                    unsigned int cpp,
2450                                    unsigned int latency)
2451 {
2452         unsigned int ret;
2453
2454         ret = intel_wm_method2(pixel_rate, htotal,
2455                                width, cpp, latency);
2456         ret = DIV_ROUND_UP(ret, 64) + 2;
2457
2458         return ret;
2459 }
2460
2461 static u32 ilk_wm_fbc(u32 pri_val, u32 horiz_pixels, u8 cpp)
2462 {
2463         /*
2464          * Neither of these should be possible since this function shouldn't be
2465          * called if the CRTC is off or the plane is invisible.  But let's be
2466          * extra paranoid to avoid a potential divide-by-zero if we screw up
2467          * elsewhere in the driver.
2468          */
2469         if (WARN_ON(!cpp))
2470                 return 0;
2471         if (WARN_ON(!horiz_pixels))
2472                 return 0;
2473
2474         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2475 }
2476
2477 struct ilk_wm_maximums {
2478         u16 pri;
2479         u16 spr;
2480         u16 cur;
2481         u16 fbc;
2482 };
2483
2484 /*
2485  * For both WM_PIPE and WM_LP.
2486  * mem_value must be in 0.1us units.
2487  */
2488 static u32 ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2489                               const struct intel_plane_state *pstate,
2490                               u32 mem_value, bool is_lp)
2491 {
2492         u32 method1, method2;
2493         int cpp;
2494
2495         if (mem_value == 0)
2496                 return U32_MAX;
2497
2498         if (!intel_wm_plane_visible(cstate, pstate))
2499                 return 0;
2500
2501         cpp = pstate->base.fb->format->cpp[0];
2502
2503         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2504
2505         if (!is_lp)
2506                 return method1;
2507
2508         method2 = ilk_wm_method2(cstate->pixel_rate,
2509                                  cstate->base.adjusted_mode.crtc_htotal,
2510                                  drm_rect_width(&pstate->base.dst),
2511                                  cpp, mem_value);
2512
2513         return min(method1, method2);
2514 }
2515
2516 /*
2517  * For both WM_PIPE and WM_LP.
2518  * mem_value must be in 0.1us units.
2519  */
2520 static u32 ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2521                               const struct intel_plane_state *pstate,
2522                               u32 mem_value)
2523 {
2524         u32 method1, method2;
2525         int cpp;
2526
2527         if (mem_value == 0)
2528                 return U32_MAX;
2529
2530         if (!intel_wm_plane_visible(cstate, pstate))
2531                 return 0;
2532
2533         cpp = pstate->base.fb->format->cpp[0];
2534
2535         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2536         method2 = ilk_wm_method2(cstate->pixel_rate,
2537                                  cstate->base.adjusted_mode.crtc_htotal,
2538                                  drm_rect_width(&pstate->base.dst),
2539                                  cpp, mem_value);
2540         return min(method1, method2);
2541 }
2542
2543 /*
2544  * For both WM_PIPE and WM_LP.
2545  * mem_value must be in 0.1us units.
2546  */
2547 static u32 ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2548                               const struct intel_plane_state *pstate,
2549                               u32 mem_value)
2550 {
2551         int cpp;
2552
2553         if (mem_value == 0)
2554                 return U32_MAX;
2555
2556         if (!intel_wm_plane_visible(cstate, pstate))
2557                 return 0;
2558
2559         cpp = pstate->base.fb->format->cpp[0];
2560
2561         return ilk_wm_method2(cstate->pixel_rate,
2562                               cstate->base.adjusted_mode.crtc_htotal,
2563                               pstate->base.crtc_w, cpp, mem_value);
2564 }
2565
2566 /* Only for WM_LP. */
2567 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2568                               const struct intel_plane_state *pstate,
2569                               u32 pri_val)
2570 {
2571         int cpp;
2572
2573         if (!intel_wm_plane_visible(cstate, pstate))
2574                 return 0;
2575
2576         cpp = pstate->base.fb->format->cpp[0];
2577
2578         return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
2579 }
2580
2581 static unsigned int
2582 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2583 {
2584         if (INTEL_GEN(dev_priv) >= 8)
2585                 return 3072;
2586         else if (INTEL_GEN(dev_priv) >= 7)
2587                 return 768;
2588         else
2589                 return 512;
2590 }
2591
2592 static unsigned int
2593 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2594                      int level, bool is_sprite)
2595 {
2596         if (INTEL_GEN(dev_priv) >= 8)
2597                 /* BDW primary/sprite plane watermarks */
2598                 return level == 0 ? 255 : 2047;
2599         else if (INTEL_GEN(dev_priv) >= 7)
2600                 /* IVB/HSW primary/sprite plane watermarks */
2601                 return level == 0 ? 127 : 1023;
2602         else if (!is_sprite)
2603                 /* ILK/SNB primary plane watermarks */
2604                 return level == 0 ? 127 : 511;
2605         else
2606                 /* ILK/SNB sprite plane watermarks */
2607                 return level == 0 ? 63 : 255;
2608 }
2609
2610 static unsigned int
2611 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2612 {
2613         if (INTEL_GEN(dev_priv) >= 7)
2614                 return level == 0 ? 63 : 255;
2615         else
2616                 return level == 0 ? 31 : 63;
2617 }
2618
2619 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2620 {
2621         if (INTEL_GEN(dev_priv) >= 8)
2622                 return 31;
2623         else
2624                 return 15;
2625 }
2626
2627 /* Calculate the maximum primary/sprite plane watermark */
2628 static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
2629                                      int level,
2630                                      const struct intel_wm_config *config,
2631                                      enum intel_ddb_partitioning ddb_partitioning,
2632                                      bool is_sprite)
2633 {
2634         unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2635
2636         /* if sprites aren't enabled, sprites get nothing */
2637         if (is_sprite && !config->sprites_enabled)
2638                 return 0;
2639
2640         /* HSW allows LP1+ watermarks even with multiple pipes */
2641         if (level == 0 || config->num_pipes_active > 1) {
2642                 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2643
2644                 /*
2645                  * For some reason the non self refresh
2646                  * FIFO size is only half of the self
2647                  * refresh FIFO size on ILK/SNB.
2648                  */
2649                 if (INTEL_GEN(dev_priv) <= 6)
2650                         fifo_size /= 2;
2651         }
2652
2653         if (config->sprites_enabled) {
2654                 /* level 0 is always calculated with 1:1 split */
2655                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2656                         if (is_sprite)
2657                                 fifo_size *= 5;
2658                         fifo_size /= 6;
2659                 } else {
2660                         fifo_size /= 2;
2661                 }
2662         }
2663
2664         /* clamp to max that the registers can hold */
2665         return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2666 }
2667
2668 /* Calculate the maximum cursor plane watermark */
2669 static unsigned int ilk_cursor_wm_max(const struct drm_i915_private *dev_priv,
2670                                       int level,
2671                                       const struct intel_wm_config *config)
2672 {
2673         /* HSW LP1+ watermarks w/ multiple pipes */
2674         if (level > 0 && config->num_pipes_active > 1)
2675                 return 64;
2676
2677         /* otherwise just report max that registers can hold */
2678         return ilk_cursor_wm_reg_max(dev_priv, level);
2679 }
2680
2681 static void ilk_compute_wm_maximums(const struct drm_i915_private *dev_priv,
2682                                     int level,
2683                                     const struct intel_wm_config *config,
2684                                     enum intel_ddb_partitioning ddb_partitioning,
2685                                     struct ilk_wm_maximums *max)
2686 {
2687         max->pri = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, false);
2688         max->spr = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, true);
2689         max->cur = ilk_cursor_wm_max(dev_priv, level, config);
2690         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2691 }
2692
2693 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2694                                         int level,
2695                                         struct ilk_wm_maximums *max)
2696 {
2697         max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2698         max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2699         max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2700         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2701 }
2702
2703 static bool ilk_validate_wm_level(int level,
2704                                   const struct ilk_wm_maximums *max,
2705                                   struct intel_wm_level *result)
2706 {
2707         bool ret;
2708
2709         /* already determined to be invalid? */
2710         if (!result->enable)
2711                 return false;
2712
2713         result->enable = result->pri_val <= max->pri &&
2714                          result->spr_val <= max->spr &&
2715                          result->cur_val <= max->cur;
2716
2717         ret = result->enable;
2718
2719         /*
2720          * HACK until we can pre-compute everything,
2721          * and thus fail gracefully if LP0 watermarks
2722          * are exceeded...
2723          */
2724         if (level == 0 && !result->enable) {
2725                 if (result->pri_val > max->pri)
2726                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2727                                       level, result->pri_val, max->pri);
2728                 if (result->spr_val > max->spr)
2729                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2730                                       level, result->spr_val, max->spr);
2731                 if (result->cur_val > max->cur)
2732                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2733                                       level, result->cur_val, max->cur);
2734
2735                 result->pri_val = min_t(u32, result->pri_val, max->pri);
2736                 result->spr_val = min_t(u32, result->spr_val, max->spr);
2737                 result->cur_val = min_t(u32, result->cur_val, max->cur);
2738                 result->enable = true;
2739         }
2740
2741         return ret;
2742 }
2743
2744 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2745                                  const struct intel_crtc *intel_crtc,
2746                                  int level,
2747                                  struct intel_crtc_state *cstate,
2748                                  const struct intel_plane_state *pristate,
2749                                  const struct intel_plane_state *sprstate,
2750                                  const struct intel_plane_state *curstate,
2751                                  struct intel_wm_level *result)
2752 {
2753         u16 pri_latency = dev_priv->wm.pri_latency[level];
2754         u16 spr_latency = dev_priv->wm.spr_latency[level];
2755         u16 cur_latency = dev_priv->wm.cur_latency[level];
2756
2757         /* WM1+ latency values stored in 0.5us units */
2758         if (level > 0) {
2759                 pri_latency *= 5;
2760                 spr_latency *= 5;
2761                 cur_latency *= 5;
2762         }
2763
2764         if (pristate) {
2765                 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2766                                                      pri_latency, level);
2767                 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2768         }
2769
2770         if (sprstate)
2771                 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2772
2773         if (curstate)
2774                 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2775
2776         result->enable = true;
2777 }
2778
2779 static u32
2780 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
2781 {
2782         const struct intel_atomic_state *intel_state =
2783                 to_intel_atomic_state(cstate->base.state);
2784         const struct drm_display_mode *adjusted_mode =
2785                 &cstate->base.adjusted_mode;
2786         u32 linetime, ips_linetime;
2787
2788         if (!cstate->base.active)
2789                 return 0;
2790         if (WARN_ON(adjusted_mode->crtc_clock == 0))
2791                 return 0;
2792         if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2793                 return 0;
2794
2795         /* The WM are computed with base on how long it takes to fill a single
2796          * row at the given clock rate, multiplied by 8.
2797          * */
2798         linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2799                                      adjusted_mode->crtc_clock);
2800         ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2801                                          intel_state->cdclk.logical.cdclk);
2802
2803         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2804                PIPE_WM_LINETIME_TIME(linetime);
2805 }
2806
2807 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2808                                   u16 wm[8])
2809 {
2810         if (INTEL_GEN(dev_priv) >= 9) {
2811                 u32 val;
2812                 int ret, i;
2813                 int level, max_level = ilk_wm_max_level(dev_priv);
2814
2815                 /* read the first set of memory latencies[0:3] */
2816                 val = 0; /* data0 to be programmed to 0 for first set */
2817                 mutex_lock(&dev_priv->pcu_lock);
2818                 ret = sandybridge_pcode_read(dev_priv,
2819                                              GEN9_PCODE_READ_MEM_LATENCY,
2820                                              &val);
2821                 mutex_unlock(&dev_priv->pcu_lock);
2822
2823                 if (ret) {
2824                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2825                         return;
2826                 }
2827
2828                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2829                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2830                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2831                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2832                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2833                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2834                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2835
2836                 /* read the second set of memory latencies[4:7] */
2837                 val = 1; /* data0 to be programmed to 1 for second set */
2838                 mutex_lock(&dev_priv->pcu_lock);
2839                 ret = sandybridge_pcode_read(dev_priv,
2840                                              GEN9_PCODE_READ_MEM_LATENCY,
2841                                              &val);
2842                 mutex_unlock(&dev_priv->pcu_lock);
2843                 if (ret) {
2844                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2845                         return;
2846                 }
2847
2848                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2849                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2850                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2851                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2852                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2853                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2854                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2855
2856                 /*
2857                  * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2858                  * need to be disabled. We make sure to sanitize the values out
2859                  * of the punit to satisfy this requirement.
2860                  */
2861                 for (level = 1; level <= max_level; level++) {
2862                         if (wm[level] == 0) {
2863                                 for (i = level + 1; i <= max_level; i++)
2864                                         wm[i] = 0;
2865                                 break;
2866                         }
2867                 }
2868
2869                 /*
2870                  * WaWmMemoryReadLatency:skl+,glk
2871                  *
2872                  * punit doesn't take into account the read latency so we need
2873                  * to add 2us to the various latency levels we retrieve from the
2874                  * punit when level 0 response data us 0us.
2875                  */
2876                 if (wm[0] == 0) {
2877                         wm[0] += 2;
2878                         for (level = 1; level <= max_level; level++) {
2879                                 if (wm[level] == 0)
2880                                         break;
2881                                 wm[level] += 2;
2882                         }
2883                 }
2884
2885                 /*
2886                  * WA Level-0 adjustment for 16GB DIMMs: SKL+
2887                  * If we could not get dimm info enable this WA to prevent from
2888                  * any underrun. If not able to get Dimm info assume 16GB dimm
2889                  * to avoid any underrun.
2890                  */
2891                 if (dev_priv->dram_info.is_16gb_dimm)
2892                         wm[0] += 1;
2893
2894         } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2895                 u64 sskpd = I915_READ64(MCH_SSKPD);
2896
2897                 wm[0] = (sskpd >> 56) & 0xFF;
2898                 if (wm[0] == 0)
2899                         wm[0] = sskpd & 0xF;
2900                 wm[1] = (sskpd >> 4) & 0xFF;
2901                 wm[2] = (sskpd >> 12) & 0xFF;
2902                 wm[3] = (sskpd >> 20) & 0x1FF;
2903                 wm[4] = (sskpd >> 32) & 0x1FF;
2904         } else if (INTEL_GEN(dev_priv) >= 6) {
2905                 u32 sskpd = I915_READ(MCH_SSKPD);
2906
2907                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2908                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2909                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2910                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2911         } else if (INTEL_GEN(dev_priv) >= 5) {
2912                 u32 mltr = I915_READ(MLTR_ILK);
2913
2914                 /* ILK primary LP0 latency is 700 ns */
2915                 wm[0] = 7;
2916                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2917                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2918         } else {
2919                 MISSING_CASE(INTEL_DEVID(dev_priv));
2920         }
2921 }
2922
2923 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2924                                        u16 wm[5])
2925 {
2926         /* ILK sprite LP0 latency is 1300 ns */
2927         if (IS_GEN(dev_priv, 5))
2928                 wm[0] = 13;
2929 }
2930
2931 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2932                                        u16 wm[5])
2933 {
2934         /* ILK cursor LP0 latency is 1300 ns */
2935         if (IS_GEN(dev_priv, 5))
2936                 wm[0] = 13;
2937 }
2938
2939 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2940 {
2941         /* how many WM levels are we expecting */
2942         if (INTEL_GEN(dev_priv) >= 9)
2943                 return 7;
2944         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2945                 return 4;
2946         else if (INTEL_GEN(dev_priv) >= 6)
2947                 return 3;
2948         else
2949                 return 2;
2950 }
2951
2952 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2953                                    const char *name,
2954                                    const u16 wm[8])
2955 {
2956         int level, max_level = ilk_wm_max_level(dev_priv);
2957
2958         for (level = 0; level <= max_level; level++) {
2959                 unsigned int latency = wm[level];
2960
2961                 if (latency == 0) {
2962                         DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2963                                       name, level);
2964                         continue;
2965                 }
2966
2967                 /*
2968                  * - latencies are in us on gen9.
2969                  * - before then, WM1+ latency values are in 0.5us units
2970                  */
2971                 if (INTEL_GEN(dev_priv) >= 9)
2972                         latency *= 10;
2973                 else if (level > 0)
2974                         latency *= 5;
2975
2976                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2977                               name, level, wm[level],
2978                               latency / 10, latency % 10);
2979         }
2980 }
2981
2982 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2983                                     u16 wm[5], u16 min)
2984 {
2985         int level, max_level = ilk_wm_max_level(dev_priv);
2986
2987         if (wm[0] >= min)
2988                 return false;
2989
2990         wm[0] = max(wm[0], min);
2991         for (level = 1; level <= max_level; level++)
2992                 wm[level] = max_t(u16, wm[level], DIV_ROUND_UP(min, 5));
2993
2994         return true;
2995 }
2996
2997 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
2998 {
2999         bool changed;
3000
3001         /*
3002          * The BIOS provided WM memory latency values are often
3003          * inadequate for high resolution displays. Adjust them.
3004          */
3005         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
3006                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
3007                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3008
3009         if (!changed)
3010                 return;
3011
3012         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
3013         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3014         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3015         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3016 }
3017
3018 static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3019 {
3020         /*
3021          * On some SNB machines (Thinkpad X220 Tablet at least)
3022          * LP3 usage can cause vblank interrupts to be lost.
3023          * The DEIIR bit will go high but it looks like the CPU
3024          * never gets interrupted.
3025          *
3026          * It's not clear whether other interrupt source could
3027          * be affected or if this is somehow limited to vblank
3028          * interrupts only. To play it safe we disable LP3
3029          * watermarks entirely.
3030          */
3031         if (dev_priv->wm.pri_latency[3] == 0 &&
3032             dev_priv->wm.spr_latency[3] == 0 &&
3033             dev_priv->wm.cur_latency[3] == 0)
3034                 return;
3035
3036         dev_priv->wm.pri_latency[3] = 0;
3037         dev_priv->wm.spr_latency[3] = 0;
3038         dev_priv->wm.cur_latency[3] = 0;
3039
3040         DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3041         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3042         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3043         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3044 }
3045
3046 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3047 {
3048         intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3049
3050         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3051                sizeof(dev_priv->wm.pri_latency));
3052         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3053                sizeof(dev_priv->wm.pri_latency));
3054
3055         intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3056         intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3057
3058         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3059         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3060         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3061
3062         if (IS_GEN(dev_priv, 6)) {
3063                 snb_wm_latency_quirk(dev_priv);
3064                 snb_wm_lp3_irq_quirk(dev_priv);
3065         }
3066 }
3067
3068 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3069 {
3070         intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3071         intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3072 }
3073
3074 static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
3075                                  struct intel_pipe_wm *pipe_wm)
3076 {
3077         /* LP0 watermark maximums depend on this pipe alone */
3078         const struct intel_wm_config config = {
3079                 .num_pipes_active = 1,
3080                 .sprites_enabled = pipe_wm->sprites_enabled,
3081                 .sprites_scaled = pipe_wm->sprites_scaled,
3082         };
3083         struct ilk_wm_maximums max;
3084
3085         /* LP0 watermarks always use 1/2 DDB partitioning */
3086         ilk_compute_wm_maximums(dev_priv, 0, &config, INTEL_DDB_PART_1_2, &max);
3087
3088         /* At least LP0 must be valid */
3089         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3090                 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3091                 return false;
3092         }
3093
3094         return true;
3095 }
3096
3097 /* Compute new watermarks for the pipe */
3098 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
3099 {
3100         struct drm_atomic_state *state = cstate->base.state;
3101         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3102         struct intel_pipe_wm *pipe_wm;
3103         struct drm_device *dev = state->dev;
3104         const struct drm_i915_private *dev_priv = to_i915(dev);
3105         struct drm_plane *plane;
3106         const struct drm_plane_state *plane_state;
3107         const struct intel_plane_state *pristate = NULL;
3108         const struct intel_plane_state *sprstate = NULL;
3109         const struct intel_plane_state *curstate = NULL;
3110         int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3111         struct ilk_wm_maximums max;
3112
3113         pipe_wm = &cstate->wm.ilk.optimal;
3114
3115         drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3116                 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
3117
3118                 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
3119                         pristate = ps;
3120                 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
3121                         sprstate = ps;
3122                 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
3123                         curstate = ps;
3124         }
3125
3126         pipe_wm->pipe_enabled = cstate->base.active;
3127         if (sprstate) {
3128                 pipe_wm->sprites_enabled = sprstate->base.visible;
3129                 pipe_wm->sprites_scaled = sprstate->base.visible &&
3130                         (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3131                          drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
3132         }
3133
3134         usable_level = max_level;
3135
3136         /* ILK/SNB: LP2+ watermarks only w/o sprites */
3137         if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3138                 usable_level = 1;
3139
3140         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3141         if (pipe_wm->sprites_scaled)
3142                 usable_level = 0;
3143
3144         memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3145         ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3146                              pristate, sprstate, curstate, &pipe_wm->wm[0]);
3147
3148         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3149                 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
3150
3151         if (!ilk_validate_pipe_wm(dev_priv, pipe_wm))
3152                 return -EINVAL;
3153
3154         ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3155
3156         for (level = 1; level <= usable_level; level++) {
3157                 struct intel_wm_level *wm = &pipe_wm->wm[level];
3158
3159                 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
3160                                      pristate, sprstate, curstate, wm);
3161
3162                 /*
3163                  * Disable any watermark level that exceeds the
3164                  * register maximums since such watermarks are
3165                  * always invalid.
3166                  */
3167                 if (!ilk_validate_wm_level(level, &max, wm)) {
3168                         memset(wm, 0, sizeof(*wm));
3169                         break;
3170                 }
3171         }
3172
3173         return 0;
3174 }
3175
3176 /*
3177  * Build a set of 'intermediate' watermark values that satisfy both the old
3178  * state and the new state.  These can be programmed to the hardware
3179  * immediately.
3180  */
3181 static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate)
3182 {
3183         struct intel_crtc *intel_crtc = to_intel_crtc(newstate->base.crtc);
3184         struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
3185         struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3186         struct intel_atomic_state *intel_state =
3187                 to_intel_atomic_state(newstate->base.state);
3188         const struct intel_crtc_state *oldstate =
3189                 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3190         const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3191         int level, max_level = ilk_wm_max_level(dev_priv);
3192
3193         /*
3194          * Start with the final, target watermarks, then combine with the
3195          * currently active watermarks to get values that are safe both before
3196          * and after the vblank.
3197          */
3198         *a = newstate->wm.ilk.optimal;
3199         if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base) ||
3200             intel_state->skip_intermediate_wm)
3201                 return 0;
3202
3203         a->pipe_enabled |= b->pipe_enabled;
3204         a->sprites_enabled |= b->sprites_enabled;
3205         a->sprites_scaled |= b->sprites_scaled;
3206
3207         for (level = 0; level <= max_level; level++) {
3208                 struct intel_wm_level *a_wm = &a->wm[level];
3209                 const struct intel_wm_level *b_wm = &b->wm[level];
3210
3211                 a_wm->enable &= b_wm->enable;
3212                 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3213                 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3214                 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3215                 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3216         }
3217
3218         /*
3219          * We need to make sure that these merged watermark values are
3220          * actually a valid configuration themselves.  If they're not,
3221          * there's no safe way to transition from the old state to
3222          * the new state, so we need to fail the atomic transaction.
3223          */
3224         if (!ilk_validate_pipe_wm(dev_priv, a))
3225                 return -EINVAL;
3226
3227         /*
3228          * If our intermediate WM are identical to the final WM, then we can
3229          * omit the post-vblank programming; only update if it's different.
3230          */
3231         if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3232                 newstate->wm.need_postvbl_update = true;
3233
3234         return 0;
3235 }
3236
3237 /*
3238  * Merge the watermarks from all active pipes for a specific level.
3239  */
3240 static void ilk_merge_wm_level(struct drm_i915_private *dev_priv,
3241                                int level,
3242                                struct intel_wm_level *ret_wm)
3243 {
3244         const struct intel_crtc *intel_crtc;
3245
3246         ret_wm->enable = true;
3247
3248         for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3249                 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3250                 const struct intel_wm_level *wm = &active->wm[level];
3251
3252                 if (!active->pipe_enabled)
3253                         continue;
3254
3255                 /*
3256                  * The watermark values may have been used in the past,
3257                  * so we must maintain them in the registers for some
3258                  * time even if the level is now disabled.
3259                  */
3260                 if (!wm->enable)
3261                         ret_wm->enable = false;
3262
3263                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3264                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3265                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3266                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3267         }
3268 }
3269
3270 /*
3271  * Merge all low power watermarks for all active pipes.
3272  */
3273 static void ilk_wm_merge(struct drm_i915_private *dev_priv,
3274                          const struct intel_wm_config *config,
3275                          const struct ilk_wm_maximums *max,
3276                          struct intel_pipe_wm *merged)
3277 {
3278         int level, max_level = ilk_wm_max_level(dev_priv);
3279         int last_enabled_level = max_level;
3280
3281         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3282         if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3283             config->num_pipes_active > 1)
3284                 last_enabled_level = 0;
3285
3286         /* ILK: FBC WM must be disabled always */
3287         merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3288
3289         /* merge each WM1+ level */
3290         for (level = 1; level <= max_level; level++) {
3291                 struct intel_wm_level *wm = &merged->wm[level];
3292
3293                 ilk_merge_wm_level(dev_priv, level, wm);
3294
3295                 if (level > last_enabled_level)
3296                         wm->enable = false;
3297                 else if (!ilk_validate_wm_level(level, max, wm))
3298                         /* make sure all following levels get disabled */
3299                         last_enabled_level = level - 1;
3300
3301                 /*
3302                  * The spec says it is preferred to disable
3303                  * FBC WMs instead of disabling a WM level.
3304                  */
3305                 if (wm->fbc_val > max->fbc) {
3306                         if (wm->enable)
3307                                 merged->fbc_wm_enabled = false;
3308                         wm->fbc_val = 0;
3309                 }
3310         }
3311
3312         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3313         /*
3314          * FIXME this is racy. FBC might get enabled later.
3315          * What we should check here is whether FBC can be
3316          * enabled sometime later.
3317          */
3318         if (IS_GEN(dev_priv, 5) && !merged->fbc_wm_enabled &&
3319             intel_fbc_is_active(dev_priv)) {
3320                 for (level = 2; level <= max_level; level++) {
3321                         struct intel_wm_level *wm = &merged->wm[level];
3322
3323                         wm->enable = false;
3324                 }
3325         }
3326 }
3327
3328 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3329 {
3330         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3331         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3332 }
3333
3334 /* The value we need to program into the WM_LPx latency field */
3335 static unsigned int ilk_wm_lp_latency(struct drm_i915_private *dev_priv,
3336                                       int level)
3337 {
3338         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3339                 return 2 * level;
3340         else
3341                 return dev_priv->wm.pri_latency[level];
3342 }
3343
3344 static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
3345                                    const struct intel_pipe_wm *merged,
3346                                    enum intel_ddb_partitioning partitioning,
3347                                    struct ilk_wm_values *results)
3348 {
3349         struct intel_crtc *intel_crtc;
3350         int level, wm_lp;
3351
3352         results->enable_fbc_wm = merged->fbc_wm_enabled;
3353         results->partitioning = partitioning;
3354
3355         /* LP1+ register values */
3356         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3357                 const struct intel_wm_level *r;
3358
3359                 level = ilk_wm_lp_to_level(wm_lp, merged);
3360
3361                 r = &merged->wm[level];
3362
3363                 /*
3364                  * Maintain the watermark values even if the level is
3365                  * disabled. Doing otherwise could cause underruns.
3366                  */
3367                 results->wm_lp[wm_lp - 1] =
3368                         (ilk_wm_lp_latency(dev_priv, level) << WM1_LP_LATENCY_SHIFT) |
3369                         (r->pri_val << WM1_LP_SR_SHIFT) |
3370                         r->cur_val;
3371
3372                 if (r->enable)
3373                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3374
3375                 if (INTEL_GEN(dev_priv) >= 8)
3376                         results->wm_lp[wm_lp - 1] |=
3377                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3378                 else
3379                         results->wm_lp[wm_lp - 1] |=
3380                                 r->fbc_val << WM1_LP_FBC_SHIFT;
3381
3382                 /*
3383                  * Always set WM1S_LP_EN when spr_val != 0, even if the
3384                  * level is disabled. Doing otherwise could cause underruns.
3385                  */
3386                 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3387                         WARN_ON(wm_lp != 1);
3388                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3389                 } else
3390                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3391         }
3392
3393         /* LP0 register values */
3394         for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3395                 enum pipe pipe = intel_crtc->pipe;
3396                 const struct intel_wm_level *r =
3397                         &intel_crtc->wm.active.ilk.wm[0];
3398
3399                 if (WARN_ON(!r->enable))
3400                         continue;
3401
3402                 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
3403
3404                 results->wm_pipe[pipe] =
3405                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3406                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3407                         r->cur_val;
3408         }
3409 }
3410
3411 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3412  * case both are at the same level. Prefer r1 in case they're the same. */
3413 static struct intel_pipe_wm *
3414 ilk_find_best_result(struct drm_i915_private *dev_priv,
3415                      struct intel_pipe_wm *r1,
3416                      struct intel_pipe_wm *r2)
3417 {
3418         int level, max_level = ilk_wm_max_level(dev_priv);
3419         int level1 = 0, level2 = 0;
3420
3421         for (level = 1; level <= max_level; level++) {
3422                 if (r1->wm[level].enable)
3423                         level1 = level;
3424                 if (r2->wm[level].enable)
3425                         level2 = level;
3426         }
3427
3428         if (level1 == level2) {
3429                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3430                         return r2;
3431                 else
3432                         return r1;
3433         } else if (level1 > level2) {
3434                 return r1;
3435         } else {
3436                 return r2;
3437         }
3438 }
3439
3440 /* dirty bits used to track which watermarks need changes */
3441 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3442 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3443 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3444 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3445 #define WM_DIRTY_FBC (1 << 24)
3446 #define WM_DIRTY_DDB (1 << 25)
3447
3448 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3449                                          const struct ilk_wm_values *old,
3450                                          const struct ilk_wm_values *new)
3451 {
3452         unsigned int dirty = 0;
3453         enum pipe pipe;
3454         int wm_lp;
3455
3456         for_each_pipe(dev_priv, pipe) {
3457                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3458                         dirty |= WM_DIRTY_LINETIME(pipe);
3459                         /* Must disable LP1+ watermarks too */
3460                         dirty |= WM_DIRTY_LP_ALL;
3461                 }
3462
3463                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3464                         dirty |= WM_DIRTY_PIPE(pipe);
3465                         /* Must disable LP1+ watermarks too */
3466                         dirty |= WM_DIRTY_LP_ALL;
3467                 }
3468         }
3469
3470         if (old->enable_fbc_wm != new->enable_fbc_wm) {
3471                 dirty |= WM_DIRTY_FBC;
3472                 /* Must disable LP1+ watermarks too */
3473                 dirty |= WM_DIRTY_LP_ALL;
3474         }
3475
3476         if (old->partitioning != new->partitioning) {
3477                 dirty |= WM_DIRTY_DDB;
3478                 /* Must disable LP1+ watermarks too */
3479                 dirty |= WM_DIRTY_LP_ALL;
3480         }
3481
3482         /* LP1+ watermarks already deemed dirty, no need to continue */
3483         if (dirty & WM_DIRTY_LP_ALL)
3484                 return dirty;
3485
3486         /* Find the lowest numbered LP1+ watermark in need of an update... */
3487         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3488                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3489                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3490                         break;
3491         }
3492
3493         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3494         for (; wm_lp <= 3; wm_lp++)
3495                 dirty |= WM_DIRTY_LP(wm_lp);
3496
3497         return dirty;
3498 }
3499
3500 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3501                                unsigned int dirty)
3502 {
3503         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3504         bool changed = false;
3505
3506         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3507                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3508                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3509                 changed = true;
3510         }
3511         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3512                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3513                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3514                 changed = true;
3515         }
3516         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3517                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3518                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3519                 changed = true;
3520         }
3521
3522         /*
3523          * Don't touch WM1S_LP_EN here.
3524          * Doing so could cause underruns.
3525          */
3526
3527         return changed;
3528 }
3529
3530 /*
3531  * The spec says we shouldn't write when we don't need, because every write
3532  * causes WMs to be re-evaluated, expending some power.
3533  */
3534 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3535                                 struct ilk_wm_values *results)
3536 {
3537         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3538         unsigned int dirty;
3539         u32 val;
3540
3541         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3542         if (!dirty)
3543                 return;
3544
3545         _ilk_disable_lp_wm(dev_priv, dirty);
3546
3547         if (dirty & WM_DIRTY_PIPE(PIPE_A))
3548                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3549         if (dirty & WM_DIRTY_PIPE(PIPE_B))
3550                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3551         if (dirty & WM_DIRTY_PIPE(PIPE_C))
3552                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3553
3554         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3555                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3556         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3557                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3558         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3559                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3560
3561         if (dirty & WM_DIRTY_DDB) {
3562                 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3563                         val = I915_READ(WM_MISC);
3564                         if (results->partitioning == INTEL_DDB_PART_1_2)
3565                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
3566                         else
3567                                 val |= WM_MISC_DATA_PARTITION_5_6;
3568                         I915_WRITE(WM_MISC, val);
3569                 } else {
3570                         val = I915_READ(DISP_ARB_CTL2);
3571                         if (results->partitioning == INTEL_DDB_PART_1_2)
3572                                 val &= ~DISP_DATA_PARTITION_5_6;
3573                         else
3574                                 val |= DISP_DATA_PARTITION_5_6;
3575                         I915_WRITE(DISP_ARB_CTL2, val);
3576                 }
3577         }
3578
3579         if (dirty & WM_DIRTY_FBC) {
3580                 val = I915_READ(DISP_ARB_CTL);
3581                 if (results->enable_fbc_wm)
3582                         val &= ~DISP_FBC_WM_DIS;
3583                 else
3584                         val |= DISP_FBC_WM_DIS;
3585                 I915_WRITE(DISP_ARB_CTL, val);
3586         }
3587
3588         if (dirty & WM_DIRTY_LP(1) &&
3589             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3590                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3591
3592         if (INTEL_GEN(dev_priv) >= 7) {
3593                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3594                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3595                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3596                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3597         }
3598
3599         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3600                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3601         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3602                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3603         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3604                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3605
3606         dev_priv->wm.hw = *results;
3607 }
3608
3609 bool ilk_disable_lp_wm(struct drm_device *dev)
3610 {
3611         struct drm_i915_private *dev_priv = to_i915(dev);
3612
3613         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3614 }
3615
3616 static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
3617 {
3618         u8 enabled_slices;
3619
3620         /* Slice 1 will always be enabled */
3621         enabled_slices = 1;
3622
3623         /* Gen prior to GEN11 have only one DBuf slice */
3624         if (INTEL_GEN(dev_priv) < 11)
3625                 return enabled_slices;
3626
3627         /*
3628          * FIXME: for now we'll only ever use 1 slice; pretend that we have
3629          * only that 1 slice enabled until we have a proper way for on-demand
3630          * toggling of the second slice.
3631          */
3632         if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
3633                 enabled_slices++;
3634
3635         return enabled_slices;
3636 }
3637
3638 /*
3639  * FIXME: We still don't have the proper code detect if we need to apply the WA,
3640  * so assume we'll always need it in order to avoid underruns.
3641  */
3642 static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
3643 {
3644         return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);
3645 }
3646
3647 static bool
3648 intel_has_sagv(struct drm_i915_private *dev_priv)
3649 {
3650         return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&
3651                 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
3652 }
3653
3654 /*
3655  * SAGV dynamically adjusts the system agent voltage and clock frequencies
3656  * depending on power and performance requirements. The display engine access
3657  * to system memory is blocked during the adjustment time. Because of the
3658  * blocking time, having this enabled can cause full system hangs and/or pipe
3659  * underruns if we don't meet all of the following requirements:
3660  *
3661  *  - <= 1 pipe enabled
3662  *  - All planes can enable watermarks for latencies >= SAGV engine block time
3663  *  - We're not using an interlaced display configuration
3664  */
3665 int
3666 intel_enable_sagv(struct drm_i915_private *dev_priv)
3667 {
3668         int ret;
3669
3670         if (!intel_has_sagv(dev_priv))
3671                 return 0;
3672
3673         if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3674                 return 0;
3675
3676         DRM_DEBUG_KMS("Enabling SAGV\n");
3677         mutex_lock(&dev_priv->pcu_lock);
3678
3679         ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3680                                       GEN9_SAGV_ENABLE);
3681
3682         /* We don't need to wait for SAGV when enabling */
3683         mutex_unlock(&dev_priv->pcu_lock);
3684
3685         /*
3686          * Some skl systems, pre-release machines in particular,
3687          * don't actually have SAGV.
3688          */
3689         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3690                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3691                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3692                 return 0;
3693         } else if (ret < 0) {
3694                 DRM_ERROR("Failed to enable SAGV\n");
3695                 return ret;
3696         }
3697
3698         dev_priv->sagv_status = I915_SAGV_ENABLED;
3699         return 0;
3700 }
3701
3702 int
3703 intel_disable_sagv(struct drm_i915_private *dev_priv)
3704 {
3705         int ret;
3706
3707         if (!intel_has_sagv(dev_priv))
3708                 return 0;
3709
3710         if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3711                 return 0;
3712
3713         DRM_DEBUG_KMS("Disabling SAGV\n");
3714         mutex_lock(&dev_priv->pcu_lock);
3715
3716         /* bspec says to keep retrying for at least 1 ms */
3717         ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3718                                 GEN9_SAGV_DISABLE,
3719                                 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3720                                 1);
3721         mutex_unlock(&dev_priv->pcu_lock);
3722
3723         /*
3724          * Some skl systems, pre-release machines in particular,
3725          * don't actually have SAGV.
3726          */
3727         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3728                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3729                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3730                 return 0;
3731         } else if (ret < 0) {
3732                 DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
3733                 return ret;
3734         }
3735
3736         dev_priv->sagv_status = I915_SAGV_DISABLED;
3737         return 0;
3738 }
3739
3740 bool intel_can_enable_sagv(struct drm_atomic_state *state)
3741 {
3742         struct drm_device *dev = state->dev;
3743         struct drm_i915_private *dev_priv = to_i915(dev);
3744         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3745         struct intel_crtc *crtc;
3746         struct intel_plane *plane;
3747         struct intel_crtc_state *cstate;
3748         enum pipe pipe;
3749         int level, latency;
3750         int sagv_block_time_us;
3751
3752         if (!intel_has_sagv(dev_priv))
3753                 return false;
3754
3755         if (IS_GEN(dev_priv, 9))
3756                 sagv_block_time_us = 30;
3757         else if (IS_GEN(dev_priv, 10))
3758                 sagv_block_time_us = 20;
3759         else
3760                 sagv_block_time_us = 10;
3761
3762         /*
3763          * SKL+ workaround: bspec recommends we disable SAGV when we have
3764          * more then one pipe enabled
3765          *
3766          * If there are no active CRTCs, no additional checks need be performed
3767          */
3768         if (hweight32(intel_state->active_crtcs) == 0)
3769                 return true;
3770         else if (hweight32(intel_state->active_crtcs) > 1)
3771                 return false;
3772
3773         /* Since we're now guaranteed to only have one active CRTC... */
3774         pipe = ffs(intel_state->active_crtcs) - 1;
3775         crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3776         cstate = to_intel_crtc_state(crtc->base.state);
3777
3778         if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3779                 return false;
3780
3781         for_each_intel_plane_on_crtc(dev, crtc, plane) {
3782                 struct skl_plane_wm *wm =
3783                         &cstate->wm.skl.optimal.planes[plane->id];
3784
3785                 /* Skip this plane if it's not enabled */
3786                 if (!wm->wm[0].plane_en)
3787                         continue;
3788
3789                 /* Find the highest enabled wm level for this plane */
3790                 for (level = ilk_wm_max_level(dev_priv);
3791                      !wm->wm[level].plane_en; --level)
3792                      { }
3793
3794                 latency = dev_priv->wm.skl_latency[level];
3795
3796                 if (skl_needs_memory_bw_wa(dev_priv) &&
3797                     plane->base.state->fb->modifier ==
3798                     I915_FORMAT_MOD_X_TILED)
3799                         latency += 15;
3800
3801                 /*
3802                  * If any of the planes on this pipe don't enable wm levels that
3803                  * incur memory latencies higher than sagv_block_time_us we
3804                  * can't enable SAGV.
3805                  */
3806                 if (latency < sagv_block_time_us)
3807                         return false;
3808         }
3809
3810         return true;
3811 }
3812
3813 static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
3814                               const struct intel_crtc_state *cstate,
3815                               const u64 total_data_rate,
3816                               const int num_active,
3817                               struct skl_ddb_allocation *ddb)
3818 {
3819         const struct drm_display_mode *adjusted_mode;
3820         u64 total_data_bw;
3821         u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3822
3823         WARN_ON(ddb_size == 0);
3824
3825         if (INTEL_GEN(dev_priv) < 11)
3826                 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3827
3828         adjusted_mode = &cstate->base.adjusted_mode;
3829         total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
3830
3831         /*
3832          * 12GB/s is maximum BW supported by single DBuf slice.
3833          *
3834          * FIXME dbuf slice code is broken:
3835          * - must wait for planes to stop using the slice before powering it off
3836          * - plane straddling both slices is illegal in multi-pipe scenarios
3837          * - should validate we stay within the hw bandwidth limits
3838          */
3839         if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
3840                 ddb->enabled_slices = 2;
3841         } else {
3842                 ddb->enabled_slices = 1;
3843                 ddb_size /= 2;
3844         }
3845
3846         return ddb_size;
3847 }
3848
3849 static void
3850 skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
3851                                    const struct intel_crtc_state *cstate,
3852                                    const u64 total_data_rate,
3853                                    struct skl_ddb_allocation *ddb,
3854                                    struct skl_ddb_entry *alloc, /* out */
3855                                    int *num_active /* out */)
3856 {
3857         struct drm_atomic_state *state = cstate->base.state;
3858         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3859         struct drm_crtc *for_crtc = cstate->base.crtc;
3860         const struct drm_crtc_state *crtc_state;
3861         const struct drm_crtc *crtc;
3862         u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
3863         enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
3864         u16 ddb_size;
3865         u32 i;
3866
3867         if (WARN_ON(!state) || !cstate->base.active) {
3868                 alloc->start = 0;
3869                 alloc->end = 0;
3870                 *num_active = hweight32(dev_priv->active_crtcs);
3871                 return;
3872         }
3873
3874         if (intel_state->active_pipe_changes)
3875                 *num_active = hweight32(intel_state->active_crtcs);
3876         else
3877                 *num_active = hweight32(dev_priv->active_crtcs);
3878
3879         ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
3880                                       *num_active, ddb);
3881
3882         /*
3883          * If the state doesn't change the active CRTC's or there is no
3884          * modeset request, then there's no need to recalculate;
3885          * the existing pipe allocation limits should remain unchanged.
3886          * Note that we're safe from racing commits since any racing commit
3887          * that changes the active CRTC list or do modeset would need to
3888          * grab _all_ crtc locks, including the one we currently hold.
3889          */
3890         if (!intel_state->active_pipe_changes && !intel_state->modeset) {
3891                 /*
3892                  * alloc may be cleared by clear_intel_crtc_state,
3893                  * copy from old state to be sure
3894                  */
3895                 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3896                 return;
3897         }
3898
3899         /*
3900          * Watermark/ddb requirement highly depends upon width of the
3901          * framebuffer, So instead of allocating DDB equally among pipes
3902          * distribute DDB based on resolution/width of the display.
3903          */
3904         for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
3905                 const struct drm_display_mode *adjusted_mode;
3906                 int hdisplay, vdisplay;
3907                 enum pipe pipe;
3908
3909                 if (!crtc_state->enable)
3910                         continue;
3911
3912                 pipe = to_intel_crtc(crtc)->pipe;
3913                 adjusted_mode = &crtc_state->adjusted_mode;
3914                 drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
3915                 total_width += hdisplay;
3916
3917                 if (pipe < for_pipe)
3918                         width_before_pipe += hdisplay;
3919                 else if (pipe == for_pipe)
3920                         pipe_width = hdisplay;
3921         }
3922
3923         alloc->start = ddb_size * width_before_pipe / total_width;
3924         alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
3925 }
3926
3927 static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
3928                                  int width, const struct drm_format_info *format,
3929                                  u64 modifier, unsigned int rotation,
3930                                  u32 plane_pixel_rate, struct skl_wm_params *wp,
3931                                  int color_plane);
3932 static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
3933                                  int level,
3934                                  const struct skl_wm_params *wp,
3935                                  const struct skl_wm_level *result_prev,
3936                                  struct skl_wm_level *result /* out */);
3937
3938 static unsigned int
3939 skl_cursor_allocation(const struct intel_crtc_state *crtc_state,
3940                       int num_active)
3941 {
3942         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
3943         int level, max_level = ilk_wm_max_level(dev_priv);
3944         struct skl_wm_level wm = {};
3945         int ret, min_ddb_alloc = 0;
3946         struct skl_wm_params wp;
3947
3948         ret = skl_compute_wm_params(crtc_state, 256,
3949                                     drm_format_info(DRM_FORMAT_ARGB8888),
3950                                     DRM_FORMAT_MOD_LINEAR,
3951                                     DRM_MODE_ROTATE_0,
3952                                     crtc_state->pixel_rate, &wp, 0);
3953         WARN_ON(ret);
3954
3955         for (level = 0; level <= max_level; level++) {
3956                 skl_compute_plane_wm(crtc_state, 7, &wp, &wm, &wm);
3957                 if (wm.min_ddb_alloc == U16_MAX)
3958                         break;
3959
3960                 min_ddb_alloc = wm.min_ddb_alloc;
3961         }
3962
3963         return max(num_active == 1 ? 32 : 8, min_ddb_alloc);
3964 }
3965
3966 static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
3967                                        struct skl_ddb_entry *entry, u32 reg)
3968 {
3969
3970         entry->start = reg & DDB_ENTRY_MASK;
3971         entry->end = (reg >> DDB_ENTRY_END_SHIFT) & DDB_ENTRY_MASK;
3972
3973         if (entry->end)
3974                 entry->end += 1;
3975 }
3976
3977 static void
3978 skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
3979                            const enum pipe pipe,
3980                            const enum plane_id plane_id,
3981                            struct skl_ddb_entry *ddb_y,
3982                            struct skl_ddb_entry *ddb_uv)
3983 {
3984         u32 val, val2;
3985         u32 fourcc = 0;
3986
3987         /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3988         if (plane_id == PLANE_CURSOR) {
3989                 val = I915_READ(CUR_BUF_CFG(pipe));
3990                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
3991                 return;
3992         }
3993
3994         val = I915_READ(PLANE_CTL(pipe, plane_id));
3995
3996         /* No DDB allocated for disabled planes */
3997         if (val & PLANE_CTL_ENABLE)
3998                 fourcc = skl_format_to_fourcc(val & PLANE_CTL_FORMAT_MASK,
3999                                               val & PLANE_CTL_ORDER_RGBX,
4000                                               val & PLANE_CTL_ALPHA_MASK);
4001
4002         if (INTEL_GEN(dev_priv) >= 11) {
4003                 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
4004                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4005         } else {
4006                 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
4007                 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
4008
4009                 if (is_planar_yuv_format(fourcc))
4010                         swap(val, val2);
4011
4012                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4013                 skl_ddb_entry_init_from_hw(dev_priv, ddb_uv, val2);
4014         }
4015 }
4016
4017 void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
4018                                struct skl_ddb_entry *ddb_y,
4019                                struct skl_ddb_entry *ddb_uv)
4020 {
4021         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4022         enum intel_display_power_domain power_domain;
4023         enum pipe pipe = crtc->pipe;
4024         intel_wakeref_t wakeref;
4025         enum plane_id plane_id;
4026
4027         power_domain = POWER_DOMAIN_PIPE(pipe);
4028         wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
4029         if (!wakeref)
4030                 return;
4031
4032         for_each_plane_id_on_crtc(crtc, plane_id)
4033                 skl_ddb_get_hw_plane_state(dev_priv, pipe,
4034                                            plane_id,
4035                                            &ddb_y[plane_id],
4036                                            &ddb_uv[plane_id]);
4037
4038         intel_display_power_put(dev_priv, power_domain, wakeref);
4039 }
4040
4041 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
4042                           struct skl_ddb_allocation *ddb /* out */)
4043 {
4044         ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
4045 }
4046
4047 /*
4048  * Determines the downscale amount of a plane for the purposes of watermark calculations.
4049  * The bspec defines downscale amount as:
4050  *
4051  * """
4052  * Horizontal down scale amount = maximum[1, Horizontal source size /
4053  *                                           Horizontal destination size]
4054  * Vertical down scale amount = maximum[1, Vertical source size /
4055  *                                         Vertical destination size]
4056  * Total down scale amount = Horizontal down scale amount *
4057  *                           Vertical down scale amount
4058  * """
4059  *
4060  * Return value is provided in 16.16 fixed point form to retain fractional part.
4061  * Caller should take care of dividing & rounding off the value.
4062  */
4063 static uint_fixed_16_16_t
4064 skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
4065                            const struct intel_plane_state *pstate)
4066 {
4067         struct intel_plane *plane = to_intel_plane(pstate->base.plane);
4068         u32 src_w, src_h, dst_w, dst_h;
4069         uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4070         uint_fixed_16_16_t downscale_h, downscale_w;
4071
4072         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4073                 return u32_to_fixed16(0);
4074
4075         /* n.b., src is 16.16 fixed point, dst is whole integer */
4076         if (plane->id == PLANE_CURSOR) {
4077                 /*
4078                  * Cursors only support 0/180 degree rotation,
4079                  * hence no need to account for rotation here.
4080                  */
4081                 src_w = pstate->base.src_w >> 16;
4082                 src_h = pstate->base.src_h >> 16;
4083                 dst_w = pstate->base.crtc_w;
4084                 dst_h = pstate->base.crtc_h;
4085         } else {
4086                 /*
4087                  * Src coordinates are already rotated by 270 degrees for
4088                  * the 90/270 degree plane rotation cases (to match the
4089                  * GTT mapping), hence no need to account for rotation here.
4090                  */
4091                 src_w = drm_rect_width(&pstate->base.src) >> 16;
4092                 src_h = drm_rect_height(&pstate->base.src) >> 16;
4093                 dst_w = drm_rect_width(&pstate->base.dst);
4094                 dst_h = drm_rect_height(&pstate->base.dst);
4095         }
4096
4097         fp_w_ratio = div_fixed16(src_w, dst_w);
4098         fp_h_ratio = div_fixed16(src_h, dst_h);
4099         downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4100         downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4101
4102         return mul_fixed16(downscale_w, downscale_h);
4103 }
4104
4105 static uint_fixed_16_16_t
4106 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
4107 {
4108         uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
4109
4110         if (!crtc_state->base.enable)
4111                 return pipe_downscale;
4112
4113         if (crtc_state->pch_pfit.enabled) {
4114                 u32 src_w, src_h, dst_w, dst_h;
4115                 u32 pfit_size = crtc_state->pch_pfit.size;
4116                 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4117                 uint_fixed_16_16_t downscale_h, downscale_w;
4118
4119                 src_w = crtc_state->pipe_src_w;
4120                 src_h = crtc_state->pipe_src_h;
4121                 dst_w = pfit_size >> 16;
4122                 dst_h = pfit_size & 0xffff;
4123
4124                 if (!dst_w || !dst_h)
4125                         return pipe_downscale;
4126
4127                 fp_w_ratio = div_fixed16(src_w, dst_w);
4128                 fp_h_ratio = div_fixed16(src_h, dst_h);
4129                 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4130                 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4131
4132                 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
4133         }
4134
4135         return pipe_downscale;
4136 }
4137
4138 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
4139                                   struct intel_crtc_state *cstate)
4140 {
4141         struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
4142         struct drm_crtc_state *crtc_state = &cstate->base;
4143         struct drm_atomic_state *state = crtc_state->state;
4144         struct drm_plane *plane;
4145         const struct drm_plane_state *pstate;
4146         struct intel_plane_state *intel_pstate;
4147         int crtc_clock, dotclk;
4148         u32 pipe_max_pixel_rate;
4149         uint_fixed_16_16_t pipe_downscale;
4150         uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
4151
4152         if (!cstate->base.enable)
4153                 return 0;
4154
4155         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4156                 uint_fixed_16_16_t plane_downscale;
4157                 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
4158                 int bpp;
4159
4160                 if (!intel_wm_plane_visible(cstate,
4161                                             to_intel_plane_state(pstate)))
4162                         continue;
4163
4164                 if (WARN_ON(!pstate->fb))
4165                         return -EINVAL;
4166
4167                 intel_pstate = to_intel_plane_state(pstate);
4168                 plane_downscale = skl_plane_downscale_amount(cstate,
4169                                                              intel_pstate);
4170                 bpp = pstate->fb->format->cpp[0] * 8;
4171                 if (bpp == 64)
4172                         plane_downscale = mul_fixed16(plane_downscale,
4173                                                       fp_9_div_8);
4174
4175                 max_downscale = max_fixed16(plane_downscale, max_downscale);
4176         }
4177         pipe_downscale = skl_pipe_downscale_amount(cstate);
4178
4179         pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4180
4181         crtc_clock = crtc_state->adjusted_mode.crtc_clock;
4182         dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4183
4184         if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
4185                 dotclk *= 2;
4186
4187         pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
4188
4189         if (pipe_max_pixel_rate < crtc_clock) {
4190                 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4191                 return -EINVAL;
4192         }
4193
4194         return 0;
4195 }
4196
4197 static u64
4198 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
4199                              const struct intel_plane_state *intel_pstate,
4200                              const int plane)
4201 {
4202         struct intel_plane *intel_plane =
4203                 to_intel_plane(intel_pstate->base.plane);
4204         u32 data_rate;
4205         u32 width = 0, height = 0;
4206         struct drm_framebuffer *fb;
4207         u32 format;
4208         uint_fixed_16_16_t down_scale_amount;
4209         u64 rate;
4210
4211         if (!intel_pstate->base.visible)
4212                 return 0;
4213
4214         fb = intel_pstate->base.fb;
4215         format = fb->format->format;
4216
4217         if (intel_plane->id == PLANE_CURSOR)
4218                 return 0;
4219         if (plane == 1 && !is_planar_yuv_format(format))
4220                 return 0;
4221
4222         /*
4223          * Src coordinates are already rotated by 270 degrees for
4224          * the 90/270 degree plane rotation cases (to match the
4225          * GTT mapping), hence no need to account for rotation here.
4226          */
4227         width = drm_rect_width(&intel_pstate->base.src) >> 16;
4228         height = drm_rect_height(&intel_pstate->base.src) >> 16;
4229
4230         /* UV plane does 1/2 pixel sub-sampling */
4231         if (plane == 1 && is_planar_yuv_format(format)) {
4232                 width /= 2;
4233                 height /= 2;
4234         }
4235
4236         data_rate = width * height;
4237
4238         down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
4239
4240         rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4241
4242         rate *= fb->format->cpp[plane];
4243         return rate;
4244 }
4245
4246 static u64
4247 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4248                                  u64 *plane_data_rate,
4249                                  u64 *uv_plane_data_rate)
4250 {
4251         struct drm_crtc_state *cstate = &intel_cstate->base;
4252         struct drm_atomic_state *state = cstate->state;
4253         struct drm_plane *plane;
4254         const struct drm_plane_state *pstate;
4255         u64 total_data_rate = 0;
4256
4257         if (WARN_ON(!state))
4258                 return 0;
4259
4260         /* Calculate and cache data rate for each plane */
4261         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4262                 enum plane_id plane_id = to_intel_plane(plane)->id;
4263                 u64 rate;
4264                 const struct intel_plane_state *intel_pstate =
4265                         to_intel_plane_state(pstate);
4266
4267                 /* packed/y */
4268                 rate = skl_plane_relative_data_rate(intel_cstate,
4269                                                     intel_pstate, 0);
4270                 plane_data_rate[plane_id] = rate;
4271                 total_data_rate += rate;
4272
4273                 /* uv-plane */
4274                 rate = skl_plane_relative_data_rate(intel_cstate,
4275                                                     intel_pstate, 1);
4276                 uv_plane_data_rate[plane_id] = rate;
4277                 total_data_rate += rate;
4278         }
4279
4280         return total_data_rate;
4281 }
4282
4283 static u64
4284 icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4285                                  u64 *plane_data_rate)
4286 {
4287         struct drm_crtc_state *cstate = &intel_cstate->base;
4288         struct drm_atomic_state *state = cstate->state;
4289         struct drm_plane *plane;
4290         const struct drm_plane_state *pstate;
4291         u64 total_data_rate = 0;
4292
4293         if (WARN_ON(!state))
4294                 return 0;
4295
4296         /* Calculate and cache data rate for each plane */
4297         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4298                 const struct intel_plane_state *intel_pstate =
4299                         to_intel_plane_state(pstate);
4300                 enum plane_id plane_id = to_intel_plane(plane)->id;
4301                 u64 rate;
4302
4303                 if (!intel_pstate->linked_plane) {
4304                         rate = skl_plane_relative_data_rate(intel_cstate,
4305                                                             intel_pstate, 0);
4306                         plane_data_rate[plane_id] = rate;
4307                         total_data_rate += rate;
4308                 } else {
4309                         enum plane_id y_plane_id;
4310
4311                         /*
4312                          * The slave plane might not iterate in
4313                          * drm_atomic_crtc_state_for_each_plane_state(),
4314                          * and needs the master plane state which may be
4315                          * NULL if we try get_new_plane_state(), so we
4316                          * always calculate from the master.
4317                          */
4318                         if (intel_pstate->slave)
4319                                 continue;
4320
4321                         /* Y plane rate is calculated on the slave */
4322                         rate = skl_plane_relative_data_rate(intel_cstate,
4323                                                             intel_pstate, 0);
4324                         y_plane_id = intel_pstate->linked_plane->id;
4325                         plane_data_rate[y_plane_id] = rate;
4326                         total_data_rate += rate;
4327
4328                         rate = skl_plane_relative_data_rate(intel_cstate,
4329                                                             intel_pstate, 1);
4330                         plane_data_rate[plane_id] = rate;
4331                         total_data_rate += rate;
4332                 }
4333         }
4334
4335         return total_data_rate;
4336 }
4337
4338 static int
4339 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
4340                       struct skl_ddb_allocation *ddb /* out */)
4341 {
4342         struct drm_atomic_state *state = cstate->base.state;
4343         struct drm_crtc *crtc = cstate->base.crtc;
4344         struct drm_i915_private *dev_priv = to_i915(crtc->dev);
4345         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4346         struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
4347         u16 alloc_size, start = 0;
4348         u16 total[I915_MAX_PLANES] = {};
4349         u16 uv_total[I915_MAX_PLANES] = {};
4350         u64 total_data_rate;
4351         enum plane_id plane_id;
4352         int num_active;
4353         u64 plane_data_rate[I915_MAX_PLANES] = {};
4354         u64 uv_plane_data_rate[I915_MAX_PLANES] = {};
4355         u32 blocks;
4356         int level;
4357
4358         /* Clear the partitioning for disabled planes. */
4359         memset(cstate->wm.skl.plane_ddb_y, 0, sizeof(cstate->wm.skl.plane_ddb_y));
4360         memset(cstate->wm.skl.plane_ddb_uv, 0, sizeof(cstate->wm.skl.plane_ddb_uv));
4361
4362         if (WARN_ON(!state))
4363                 return 0;
4364
4365         if (!cstate->base.active) {
4366                 alloc->start = alloc->end = 0;
4367                 return 0;
4368         }
4369
4370         if (INTEL_GEN(dev_priv) < 11)
4371                 total_data_rate =
4372                         skl_get_total_relative_data_rate(cstate,
4373                                                          plane_data_rate,
4374                                                          uv_plane_data_rate);
4375         else
4376                 total_data_rate =
4377                         icl_get_total_relative_data_rate(cstate,
4378                                                          plane_data_rate);
4379
4380         skl_ddb_get_pipe_allocation_limits(dev_priv, cstate, total_data_rate,
4381                                            ddb, alloc, &num_active);
4382         alloc_size = skl_ddb_entry_size(alloc);
4383         if (alloc_size == 0)
4384                 return 0;
4385
4386         /* Allocate fixed number of blocks for cursor. */
4387         total[PLANE_CURSOR] = skl_cursor_allocation(cstate, num_active);
4388         alloc_size -= total[PLANE_CURSOR];
4389         cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].start =
4390                 alloc->end - total[PLANE_CURSOR];
4391         cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
4392
4393         if (total_data_rate == 0)
4394                 return 0;
4395
4396         /*
4397          * Find the highest watermark level for which we can satisfy the block
4398          * requirement of active planes.
4399          */
4400         for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) {
4401                 blocks = 0;
4402                 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4403                         const struct skl_plane_wm *wm =
4404                                 &cstate->wm.skl.optimal.planes[plane_id];
4405
4406                         if (plane_id == PLANE_CURSOR) {
4407                                 if (WARN_ON(wm->wm[level].min_ddb_alloc >
4408                                             total[PLANE_CURSOR])) {
4409                                         blocks = U32_MAX;
4410                                         break;
4411                                 }
4412                                 continue;
4413                         }
4414
4415                         blocks += wm->wm[level].min_ddb_alloc;
4416                         blocks += wm->uv_wm[level].min_ddb_alloc;
4417                 }
4418
4419                 if (blocks <= alloc_size) {
4420                         alloc_size -= blocks;
4421                         break;
4422                 }
4423         }
4424
4425         if (level < 0) {
4426                 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4427                 DRM_DEBUG_KMS("minimum required %d/%d\n", blocks,
4428                               alloc_size);
4429                 return -EINVAL;
4430         }
4431
4432         /*
4433          * Grant each plane the blocks it requires at the highest achievable
4434          * watermark level, plus an extra share of the leftover blocks
4435          * proportional to its relative data rate.
4436          */
4437         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4438                 const struct skl_plane_wm *wm =
4439                         &cstate->wm.skl.optimal.planes[plane_id];
4440                 u64 rate;
4441                 u16 extra;
4442
4443                 if (plane_id == PLANE_CURSOR)
4444                         continue;
4445
4446                 /*
4447                  * We've accounted for all active planes; remaining planes are
4448                  * all disabled.
4449                  */
4450                 if (total_data_rate == 0)
4451                         break;
4452
4453                 rate = plane_data_rate[plane_id];
4454                 extra = min_t(u16, alloc_size,
4455                               DIV64_U64_ROUND_UP(alloc_size * rate,
4456                                                  total_data_rate));
4457                 total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
4458                 alloc_size -= extra;
4459                 total_data_rate -= rate;
4460
4461                 if (total_data_rate == 0)
4462                         break;
4463
4464                 rate = uv_plane_data_rate[plane_id];
4465                 extra = min_t(u16, alloc_size,
4466                               DIV64_U64_ROUND_UP(alloc_size * rate,
4467                                                  total_data_rate));
4468                 uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
4469                 alloc_size -= extra;
4470                 total_data_rate -= rate;
4471         }
4472         WARN_ON(alloc_size != 0 || total_data_rate != 0);
4473
4474         /* Set the actual DDB start/end points for each plane */
4475         start = alloc->start;
4476         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4477                 struct skl_ddb_entry *plane_alloc =
4478                         &cstate->wm.skl.plane_ddb_y[plane_id];
4479                 struct skl_ddb_entry *uv_plane_alloc =
4480                         &cstate->wm.skl.plane_ddb_uv[plane_id];
4481
4482                 if (plane_id == PLANE_CURSOR)
4483                         continue;
4484
4485                 /* Gen11+ uses a separate plane for UV watermarks */
4486                 WARN_ON(INTEL_GEN(dev_priv) >= 11 && uv_total[plane_id]);
4487
4488                 /* Leave disabled planes at (0,0) */
4489                 if (total[plane_id]) {
4490                         plane_alloc->start = start;
4491                         start += total[plane_id];
4492                         plane_alloc->end = start;
4493                 }
4494
4495                 if (uv_total[plane_id]) {
4496                         uv_plane_alloc->start = start;
4497                         start += uv_total[plane_id];
4498                         uv_plane_alloc->end = start;
4499                 }
4500         }
4501
4502         /*
4503          * When we calculated watermark values we didn't know how high
4504          * of a level we'd actually be able to hit, so we just marked
4505          * all levels as "enabled."  Go back now and disable the ones
4506          * that aren't actually possible.
4507          */
4508         for (level++; level <= ilk_wm_max_level(dev_priv); level++) {
4509                 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4510                         struct skl_plane_wm *wm =
4511                                 &cstate->wm.skl.optimal.planes[plane_id];
4512
4513                         /*
4514                          * We only disable the watermarks for each plane if
4515                          * they exceed the ddb allocation of said plane. This
4516                          * is done so that we don't end up touching cursor
4517                          * watermarks needlessly when some other plane reduces
4518                          * our max possible watermark level.
4519                          *
4520                          * Bspec has this to say about the PLANE_WM enable bit:
4521                          * "All the watermarks at this level for all enabled
4522                          *  planes must be enabled before the level will be used."
4523                          * So this is actually safe to do.
4524                          */
4525                         if (wm->wm[level].min_ddb_alloc > total[plane_id] ||
4526                             wm->uv_wm[level].min_ddb_alloc > uv_total[plane_id])
4527                                 memset(&wm->wm[level], 0, sizeof(wm->wm[level]));
4528
4529                         /*
4530                          * Wa_1408961008:icl
4531                          * Underruns with WM1+ disabled
4532                          */
4533                         if (IS_ICELAKE(dev_priv) &&
4534                             level == 1 && wm->wm[0].plane_en) {
4535                                 wm->wm[level].plane_res_b = wm->wm[0].plane_res_b;
4536                                 wm->wm[level].plane_res_l = wm->wm[0].plane_res_l;
4537                                 wm->wm[level].ignore_lines = wm->wm[0].ignore_lines;
4538                         }
4539                 }
4540         }
4541
4542         /*
4543          * Go back and disable the transition watermark if it turns out we
4544          * don't have enough DDB blocks for it.
4545          */
4546         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4547                 struct skl_plane_wm *wm =
4548                         &cstate->wm.skl.optimal.planes[plane_id];
4549
4550                 if (wm->trans_wm.plane_res_b >= total[plane_id])
4551                         memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
4552         }
4553
4554         return 0;
4555 }
4556
4557 /*
4558  * The max latency should be 257 (max the punit can code is 255 and we add 2us
4559  * for the read latency) and cpp should always be <= 8, so that
4560  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4561  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4562 */
4563 static uint_fixed_16_16_t
4564 skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate,
4565                u8 cpp, u32 latency, u32 dbuf_block_size)
4566 {
4567         u32 wm_intermediate_val;
4568         uint_fixed_16_16_t ret;
4569
4570         if (latency == 0)
4571                 return FP_16_16_MAX;
4572
4573         wm_intermediate_val = latency * pixel_rate * cpp;
4574         ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
4575
4576         if (INTEL_GEN(dev_priv) >= 10)
4577                 ret = add_fixed16_u32(ret, 1);
4578
4579         return ret;
4580 }
4581
4582 static uint_fixed_16_16_t
4583 skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency,
4584                uint_fixed_16_16_t plane_blocks_per_line)
4585 {
4586         u32 wm_intermediate_val;
4587         uint_fixed_16_16_t ret;
4588
4589         if (latency == 0)
4590                 return FP_16_16_MAX;
4591
4592         wm_intermediate_val = latency * pixel_rate;
4593         wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4594                                            pipe_htotal * 1000);
4595         ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4596         return ret;
4597 }
4598
4599 static uint_fixed_16_16_t
4600 intel_get_linetime_us(const struct intel_crtc_state *cstate)
4601 {
4602         u32 pixel_rate;
4603         u32 crtc_htotal;
4604         uint_fixed_16_16_t linetime_us;
4605
4606         if (!cstate->base.active)
4607                 return u32_to_fixed16(0);
4608
4609         pixel_rate = cstate->pixel_rate;
4610
4611         if (WARN_ON(pixel_rate == 0))
4612                 return u32_to_fixed16(0);
4613
4614         crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
4615         linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4616
4617         return linetime_us;
4618 }
4619
4620 static u32
4621 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4622                               const struct intel_plane_state *pstate)
4623 {
4624         u64 adjusted_pixel_rate;
4625         uint_fixed_16_16_t downscale_amount;
4626
4627         /* Shouldn't reach here on disabled planes... */
4628         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4629                 return 0;
4630
4631         /*
4632          * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4633          * with additional adjustments for plane-specific scaling.
4634          */
4635         adjusted_pixel_rate = cstate->pixel_rate;
4636         downscale_amount = skl_plane_downscale_amount(cstate, pstate);
4637
4638         return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4639                                             downscale_amount);
4640 }
4641
4642 static int
4643 skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
4644                       int width, const struct drm_format_info *format,
4645                       u64 modifier, unsigned int rotation,
4646                       u32 plane_pixel_rate, struct skl_wm_params *wp,
4647                       int color_plane)
4648 {
4649         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
4650         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4651         u32 interm_pbpl;
4652
4653         /* only planar format has two planes */
4654         if (color_plane == 1 && !is_planar_yuv_format(format->format)) {
4655                 DRM_DEBUG_KMS("Non planar format have single plane\n");
4656                 return -EINVAL;
4657         }
4658
4659         wp->y_tiled = modifier == I915_FORMAT_MOD_Y_TILED ||
4660                       modifier == I915_FORMAT_MOD_Yf_TILED ||
4661                       modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4662                       modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4663         wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
4664         wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4665                          modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4666         wp->is_planar = is_planar_yuv_format(format->format);
4667
4668         wp->width = width;
4669         if (color_plane == 1 && wp->is_planar)
4670                 wp->width /= 2;
4671
4672         wp->cpp = format->cpp[color_plane];
4673         wp->plane_pixel_rate = plane_pixel_rate;
4674
4675         if (INTEL_GEN(dev_priv) >= 11 &&
4676             modifier == I915_FORMAT_MOD_Yf_TILED  && wp->cpp == 1)
4677                 wp->dbuf_block_size = 256;
4678         else
4679                 wp->dbuf_block_size = 512;
4680
4681         if (drm_rotation_90_or_270(rotation)) {
4682                 switch (wp->cpp) {
4683                 case 1:
4684                         wp->y_min_scanlines = 16;
4685                         break;
4686                 case 2:
4687                         wp->y_min_scanlines = 8;
4688                         break;
4689                 case 4:
4690                         wp->y_min_scanlines = 4;
4691                         break;
4692                 default:
4693                         MISSING_CASE(wp->cpp);
4694                         return -EINVAL;
4695                 }
4696         } else {
4697                 wp->y_min_scanlines = 4;
4698         }
4699
4700         if (skl_needs_memory_bw_wa(dev_priv))
4701                 wp->y_min_scanlines *= 2;
4702
4703         wp->plane_bytes_per_line = wp->width * wp->cpp;
4704         if (wp->y_tiled) {
4705                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4706                                            wp->y_min_scanlines,
4707                                            wp->dbuf_block_size);
4708
4709                 if (INTEL_GEN(dev_priv) >= 10)
4710                         interm_pbpl++;
4711
4712                 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4713                                                         wp->y_min_scanlines);
4714         } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) {
4715                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4716                                            wp->dbuf_block_size);
4717                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4718         } else {
4719                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4720                                            wp->dbuf_block_size) + 1;
4721                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4722         }
4723
4724         wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4725                                              wp->plane_blocks_per_line);
4726
4727         wp->linetime_us = fixed16_to_u32_round_up(
4728                                         intel_get_linetime_us(crtc_state));
4729
4730         return 0;
4731 }
4732
4733 static int
4734 skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state,
4735                             const struct intel_plane_state *plane_state,
4736                             struct skl_wm_params *wp, int color_plane)
4737 {
4738         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
4739         const struct drm_framebuffer *fb = plane_state->base.fb;
4740         int width;
4741
4742         if (plane->id == PLANE_CURSOR) {
4743                 width = plane_state->base.crtc_w;
4744         } else {
4745                 /*
4746                  * Src coordinates are already rotated by 270 degrees for
4747                  * the 90/270 degree plane rotation cases (to match the
4748                  * GTT mapping), hence no need to account for rotation here.
4749                  */
4750                 width = drm_rect_width(&plane_state->base.src) >> 16;
4751         }
4752
4753         return skl_compute_wm_params(crtc_state, width,
4754                                      fb->format, fb->modifier,
4755                                      plane_state->base.rotation,
4756                                      skl_adjusted_plane_pixel_rate(crtc_state, plane_state),
4757                                      wp, color_plane);
4758 }
4759
4760 static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
4761 {
4762         if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
4763                 return true;
4764
4765         /* The number of lines are ignored for the level 0 watermark. */
4766         return level > 0;
4767 }
4768
4769 static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
4770                                  int level,
4771                                  const struct skl_wm_params *wp,
4772                                  const struct skl_wm_level *result_prev,
4773                                  struct skl_wm_level *result /* out */)
4774 {
4775         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
4776         u32 latency = dev_priv->wm.skl_latency[level];
4777         uint_fixed_16_16_t method1, method2;
4778         uint_fixed_16_16_t selected_result;
4779         u32 res_blocks, res_lines, min_ddb_alloc = 0;
4780
4781         if (latency == 0) {
4782                 /* reject it */
4783                 result->min_ddb_alloc = U16_MAX;
4784                 return;
4785         }
4786
4787         /* Display WA #1141: kbl,cfl */
4788         if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4789             IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
4790             dev_priv->ipc_enabled)
4791                 latency += 4;
4792
4793         if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)
4794                 latency += 15;
4795
4796         method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4797                                  wp->cpp, latency, wp->dbuf_block_size);
4798         method2 = skl_wm_method2(wp->plane_pixel_rate,
4799                                  cstate->base.adjusted_mode.crtc_htotal,
4800                                  latency,
4801                                  wp->plane_blocks_per_line);
4802
4803         if (wp->y_tiled) {
4804                 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4805         } else {
4806                 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
4807                      wp->dbuf_block_size < 1) &&
4808                      (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) {
4809                         selected_result = method2;
4810                 } else if (latency >= wp->linetime_us) {
4811                         if (IS_GEN(dev_priv, 9) &&
4812                             !IS_GEMINILAKE(dev_priv))
4813                                 selected_result = min_fixed16(method1, method2);
4814                         else
4815                                 selected_result = method2;
4816                 } else {
4817                         selected_result = method1;
4818                 }
4819         }
4820
4821         res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4822         res_lines = div_round_up_fixed16(selected_result,
4823                                          wp->plane_blocks_per_line);
4824
4825         if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) {
4826                 /* Display WA #1125: skl,bxt,kbl */
4827                 if (level == 0 && wp->rc_surface)
4828                         res_blocks +=
4829                                 fixed16_to_u32_round_up(wp->y_tile_minimum);
4830
4831                 /* Display WA #1126: skl,bxt,kbl */
4832                 if (level >= 1 && level <= 7) {
4833                         if (wp->y_tiled) {
4834                                 res_blocks +=
4835                                     fixed16_to_u32_round_up(wp->y_tile_minimum);
4836                                 res_lines += wp->y_min_scanlines;
4837                         } else {
4838                                 res_blocks++;
4839                         }
4840
4841                         /*
4842                          * Make sure result blocks for higher latency levels are
4843                          * atleast as high as level below the current level.
4844                          * Assumption in DDB algorithm optimization for special
4845                          * cases. Also covers Display WA #1125 for RC.
4846                          */
4847                         if (result_prev->plane_res_b > res_blocks)
4848                                 res_blocks = result_prev->plane_res_b;
4849                 }
4850         }
4851
4852         if (INTEL_GEN(dev_priv) >= 11) {
4853                 if (wp->y_tiled) {
4854                         int extra_lines;
4855
4856                         if (res_lines % wp->y_min_scanlines == 0)
4857                                 extra_lines = wp->y_min_scanlines;
4858                         else
4859                                 extra_lines = wp->y_min_scanlines * 2 -
4860                                         res_lines % wp->y_min_scanlines;
4861
4862                         min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines,
4863                                                                  wp->plane_blocks_per_line);
4864                 } else {
4865                         min_ddb_alloc = res_blocks +
4866                                 DIV_ROUND_UP(res_blocks, 10);
4867                 }
4868         }
4869
4870         if (!skl_wm_has_lines(dev_priv, level))
4871                 res_lines = 0;
4872
4873         if (res_lines > 31) {
4874                 /* reject it */
4875                 result->min_ddb_alloc = U16_MAX;
4876                 return;
4877         }
4878
4879         /*
4880          * If res_lines is valid, assume we can use this watermark level
4881          * for now.  We'll come back and disable it after we calculate the
4882          * DDB allocation if it turns out we don't actually have enough
4883          * blocks to satisfy it.
4884          */
4885         result->plane_res_b = res_blocks;
4886         result->plane_res_l = res_lines;
4887         /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
4888         result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;
4889         result->plane_en = true;
4890 }
4891
4892 static void
4893 skl_compute_wm_levels(const struct intel_crtc_state *cstate,
4894                       const struct skl_wm_params *wm_params,
4895                       struct skl_wm_level *levels)
4896 {
4897         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
4898         int level, max_level = ilk_wm_max_level(dev_priv);
4899         struct skl_wm_level *result_prev = &levels[0];
4900
4901         for (level = 0; level <= max_level; level++) {
4902                 struct skl_wm_level *result = &levels[level];
4903
4904                 skl_compute_plane_wm(cstate, level, wm_params,
4905                                      result_prev, result);
4906
4907                 result_prev = result;
4908         }
4909 }
4910
4911 static u32
4912 skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
4913 {
4914         struct drm_atomic_state *state = cstate->base.state;
4915         struct drm_i915_private *dev_priv = to_i915(state->dev);
4916         uint_fixed_16_16_t linetime_us;
4917         u32 linetime_wm;
4918
4919         linetime_us = intel_get_linetime_us(cstate);
4920         linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
4921
4922         /* Display WA #1135: BXT:ALL GLK:ALL */
4923         if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled)
4924                 linetime_wm /= 2;
4925
4926         return linetime_wm;
4927 }
4928
4929 static void skl_compute_transition_wm(const struct intel_crtc_state *cstate,
4930                                       const struct skl_wm_params *wp,
4931                                       struct skl_plane_wm *wm)
4932 {
4933         struct drm_device *dev = cstate->base.crtc->dev;
4934         const struct drm_i915_private *dev_priv = to_i915(dev);
4935         u16 trans_min, trans_y_tile_min;
4936         const u16 trans_amount = 10; /* This is configurable amount */
4937         u16 wm0_sel_res_b, trans_offset_b, res_blocks;
4938
4939         /* Transition WM are not recommended by HW team for GEN9 */
4940         if (INTEL_GEN(dev_priv) <= 9)
4941                 return;
4942
4943         /* Transition WM don't make any sense if ipc is disabled */
4944         if (!dev_priv->ipc_enabled)
4945                 return;
4946
4947         trans_min = 14;
4948         if (INTEL_GEN(dev_priv) >= 11)
4949                 trans_min = 4;
4950
4951         trans_offset_b = trans_min + trans_amount;
4952
4953         /*
4954          * The spec asks for Selected Result Blocks for wm0 (the real value),
4955          * not Result Blocks (the integer value). Pay attention to the capital
4956          * letters. The value wm_l0->plane_res_b is actually Result Blocks, but
4957          * since Result Blocks is the ceiling of Selected Result Blocks plus 1,
4958          * and since we later will have to get the ceiling of the sum in the
4959          * transition watermarks calculation, we can just pretend Selected
4960          * Result Blocks is Result Blocks minus 1 and it should work for the
4961          * current platforms.
4962          */
4963         wm0_sel_res_b = wm->wm[0].plane_res_b - 1;
4964
4965         if (wp->y_tiled) {
4966                 trans_y_tile_min =
4967                         (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum);
4968                 res_blocks = max(wm0_sel_res_b, trans_y_tile_min) +
4969                                 trans_offset_b;
4970         } else {
4971                 res_blocks = wm0_sel_res_b + trans_offset_b;
4972
4973                 /* WA BUG:1938466 add one block for non y-tile planes */
4974                 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4975                         res_blocks += 1;
4976
4977         }
4978
4979         /*
4980          * Just assume we can enable the transition watermark.  After
4981          * computing the DDB we'll come back and disable it if that
4982          * assumption turns out to be false.
4983          */
4984         wm->trans_wm.plane_res_b = res_blocks + 1;
4985         wm->trans_wm.plane_en = true;
4986 }
4987
4988 static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
4989                                      const struct intel_plane_state *plane_state,
4990                                      enum plane_id plane_id, int color_plane)
4991 {
4992         struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
4993         struct skl_wm_params wm_params;
4994         int ret;
4995
4996         ret = skl_compute_plane_wm_params(crtc_state, plane_state,
4997                                           &wm_params, color_plane);
4998         if (ret)
4999                 return ret;
5000
5001         skl_compute_wm_levels(crtc_state, &wm_params, wm->wm);
5002         skl_compute_transition_wm(crtc_state, &wm_params, wm);
5003
5004         return 0;
5005 }
5006
5007 static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
5008                                  const struct intel_plane_state *plane_state,
5009                                  enum plane_id plane_id)
5010 {
5011         struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
5012         struct skl_wm_params wm_params;
5013         int ret;
5014
5015         wm->is_planar = true;
5016
5017         /* uv plane watermarks must also be validated for NV12/Planar */
5018         ret = skl_compute_plane_wm_params(crtc_state, plane_state,
5019                                           &wm_params, 1);
5020         if (ret)
5021                 return ret;
5022
5023         skl_compute_wm_levels(crtc_state, &wm_params, wm->uv_wm);
5024
5025         return 0;
5026 }
5027
5028 static int skl_build_plane_wm(struct intel_crtc_state *crtc_state,
5029                               const struct intel_plane_state *plane_state)
5030 {
5031         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
5032         const struct drm_framebuffer *fb = plane_state->base.fb;
5033         enum plane_id plane_id = plane->id;
5034         int ret;
5035
5036         if (!intel_wm_plane_visible(crtc_state, plane_state))
5037                 return 0;
5038
5039         ret = skl_build_plane_wm_single(crtc_state, plane_state,
5040                                         plane_id, 0);
5041         if (ret)
5042                 return ret;
5043
5044         if (fb->format->is_yuv && fb->format->num_planes > 1) {
5045                 ret = skl_build_plane_wm_uv(crtc_state, plane_state,
5046                                             plane_id);
5047                 if (ret)
5048                         return ret;
5049         }
5050
5051         return 0;
5052 }
5053
5054 static int icl_build_plane_wm(struct intel_crtc_state *crtc_state,
5055                               const struct intel_plane_state *plane_state)
5056 {
5057         enum plane_id plane_id = to_intel_plane(plane_state->base.plane)->id;
5058         int ret;
5059
5060         /* Watermarks calculated in master */
5061         if (plane_state->slave)
5062                 return 0;
5063
5064         if (plane_state->linked_plane) {
5065                 const struct drm_framebuffer *fb = plane_state->base.fb;
5066                 enum plane_id y_plane_id = plane_state->linked_plane->id;
5067
5068                 WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state));
5069                 WARN_ON(!fb->format->is_yuv ||
5070                         fb->format->num_planes == 1);
5071
5072                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5073                                                 y_plane_id, 0);
5074                 if (ret)
5075                         return ret;
5076
5077                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5078                                                 plane_id, 1);
5079                 if (ret)
5080                         return ret;
5081         } else if (intel_wm_plane_visible(crtc_state, plane_state)) {
5082                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5083                                                 plane_id, 0);
5084                 if (ret)
5085                         return ret;
5086         }
5087
5088         return 0;
5089 }
5090
5091 static int skl_build_pipe_wm(struct intel_crtc_state *cstate)
5092 {
5093         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5094         struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5095         struct drm_crtc_state *crtc_state = &cstate->base;
5096         struct drm_plane *plane;
5097         const struct drm_plane_state *pstate;
5098         int ret;
5099
5100         /*
5101          * We'll only calculate watermarks for planes that are actually
5102          * enabled, so make sure all other planes are set as disabled.
5103          */
5104         memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
5105
5106         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
5107                 const struct intel_plane_state *intel_pstate =
5108                                                 to_intel_plane_state(pstate);
5109
5110                 if (INTEL_GEN(dev_priv) >= 11)
5111                         ret = icl_build_plane_wm(cstate, intel_pstate);
5112                 else
5113                         ret = skl_build_plane_wm(cstate, intel_pstate);
5114                 if (ret)
5115                         return ret;
5116         }
5117
5118         pipe_wm->linetime = skl_compute_linetime_wm(cstate);
5119
5120         return 0;
5121 }
5122
5123 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
5124                                 i915_reg_t reg,
5125                                 const struct skl_ddb_entry *entry)
5126 {
5127         if (entry->end)
5128                 I915_WRITE_FW(reg, (entry->end - 1) << 16 | entry->start);
5129         else
5130                 I915_WRITE_FW(reg, 0);
5131 }
5132
5133 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
5134                                i915_reg_t reg,
5135                                const struct skl_wm_level *level)
5136 {
5137         u32 val = 0;
5138
5139         if (level->plane_en)
5140                 val |= PLANE_WM_EN;
5141         if (level->ignore_lines)
5142                 val |= PLANE_WM_IGNORE_LINES;
5143         val |= level->plane_res_b;
5144         val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
5145
5146         I915_WRITE_FW(reg, val);
5147 }
5148
5149 void skl_write_plane_wm(struct intel_plane *plane,
5150                         const struct intel_crtc_state *crtc_state)
5151 {
5152         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5153         int level, max_level = ilk_wm_max_level(dev_priv);
5154         enum plane_id plane_id = plane->id;
5155         enum pipe pipe = plane->pipe;
5156         const struct skl_plane_wm *wm =
5157                 &crtc_state->wm.skl.optimal.planes[plane_id];
5158         const struct skl_ddb_entry *ddb_y =
5159                 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5160         const struct skl_ddb_entry *ddb_uv =
5161                 &crtc_state->wm.skl.plane_ddb_uv[plane_id];
5162
5163         for (level = 0; level <= max_level; level++) {
5164                 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
5165                                    &wm->wm[level]);
5166         }
5167         skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
5168                            &wm->trans_wm);
5169
5170         if (INTEL_GEN(dev_priv) >= 11) {
5171                 skl_ddb_entry_write(dev_priv,
5172                                     PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5173                 return;
5174         }
5175
5176         if (wm->is_planar)
5177                 swap(ddb_y, ddb_uv);
5178
5179         skl_ddb_entry_write(dev_priv,
5180                             PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5181         skl_ddb_entry_write(dev_priv,
5182                             PLANE_NV12_BUF_CFG(pipe, plane_id), ddb_uv);
5183 }
5184
5185 void skl_write_cursor_wm(struct intel_plane *plane,
5186                          const struct intel_crtc_state *crtc_state)
5187 {
5188         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5189         int level, max_level = ilk_wm_max_level(dev_priv);
5190         enum plane_id plane_id = plane->id;
5191         enum pipe pipe = plane->pipe;
5192         const struct skl_plane_wm *wm =
5193                 &crtc_state->wm.skl.optimal.planes[plane_id];
5194         const struct skl_ddb_entry *ddb =
5195                 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5196
5197         for (level = 0; level <= max_level; level++) {
5198                 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5199                                    &wm->wm[level]);
5200         }
5201         skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5202
5203         skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), ddb);
5204 }
5205
5206 bool skl_wm_level_equals(const struct skl_wm_level *l1,
5207                          const struct skl_wm_level *l2)
5208 {
5209         return l1->plane_en == l2->plane_en &&
5210                 l1->ignore_lines == l2->ignore_lines &&
5211                 l1->plane_res_l == l2->plane_res_l &&
5212                 l1->plane_res_b == l2->plane_res_b;
5213 }
5214
5215 static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
5216                                 const struct skl_plane_wm *wm1,
5217                                 const struct skl_plane_wm *wm2)
5218 {
5219         int level, max_level = ilk_wm_max_level(dev_priv);
5220
5221         for (level = 0; level <= max_level; level++) {
5222                 if (!skl_wm_level_equals(&wm1->wm[level], &wm2->wm[level]) ||
5223                     !skl_wm_level_equals(&wm1->uv_wm[level], &wm2->uv_wm[level]))
5224                         return false;
5225         }
5226
5227         return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
5228 }
5229
5230 static bool skl_pipe_wm_equals(struct intel_crtc *crtc,
5231                                const struct skl_pipe_wm *wm1,
5232                                const struct skl_pipe_wm *wm2)
5233 {
5234         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5235         enum plane_id plane_id;
5236
5237         for_each_plane_id_on_crtc(crtc, plane_id) {
5238                 if (!skl_plane_wm_equals(dev_priv,
5239                                          &wm1->planes[plane_id],
5240                                          &wm2->planes[plane_id]))
5241                         return false;
5242         }
5243
5244         return wm1->linetime == wm2->linetime;
5245 }
5246
5247 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5248                                            const struct skl_ddb_entry *b)
5249 {
5250         return a->start < b->end && b->start < a->end;
5251 }
5252
5253 bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
5254                                  const struct skl_ddb_entry entries[],
5255                                  int num_entries, int ignore_idx)
5256 {
5257         int i;
5258
5259         for (i = 0; i < num_entries; i++) {
5260                 if (i != ignore_idx &&
5261                     skl_ddb_entries_overlap(ddb, &entries[i]))
5262                         return true;
5263         }
5264
5265         return false;
5266 }
5267
5268 static u32
5269 pipes_modified(struct intel_atomic_state *state)
5270 {
5271         struct intel_crtc *crtc;
5272         struct intel_crtc_state *cstate;
5273         u32 i, ret = 0;
5274
5275         for_each_new_intel_crtc_in_state(state, crtc, cstate, i)
5276                 ret |= drm_crtc_mask(&crtc->base);
5277
5278         return ret;
5279 }
5280
5281 static int
5282 skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
5283                             struct intel_crtc_state *new_crtc_state)
5284 {
5285         struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->base.state);
5286         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
5287         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5288         struct intel_plane *plane;
5289
5290         for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5291                 struct intel_plane_state *plane_state;
5292                 enum plane_id plane_id = plane->id;
5293
5294                 if (skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_y[plane_id],
5295                                         &new_crtc_state->wm.skl.plane_ddb_y[plane_id]) &&
5296                     skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_uv[plane_id],
5297                                         &new_crtc_state->wm.skl.plane_ddb_uv[plane_id]))
5298                         continue;
5299
5300                 plane_state = intel_atomic_get_plane_state(state, plane);
5301                 if (IS_ERR(plane_state))
5302                         return PTR_ERR(plane_state);
5303
5304                 new_crtc_state->update_planes |= BIT(plane_id);
5305         }
5306
5307         return 0;
5308 }
5309
5310 static int
5311 skl_compute_ddb(struct intel_atomic_state *state)
5312 {
5313         const struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5314         struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
5315         struct intel_crtc_state *old_crtc_state;
5316         struct intel_crtc_state *new_crtc_state;
5317         struct intel_crtc *crtc;
5318         int ret, i;
5319
5320         memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5321
5322         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5323                                             new_crtc_state, i) {
5324                 ret = skl_allocate_pipe_ddb(new_crtc_state, ddb);
5325                 if (ret)
5326                         return ret;
5327
5328                 ret = skl_ddb_add_affected_planes(old_crtc_state,
5329                                                   new_crtc_state);
5330                 if (ret)
5331                         return ret;
5332         }
5333
5334         return 0;
5335 }
5336
5337 static char enast(bool enable)
5338 {
5339         return enable ? '*' : ' ';
5340 }
5341
5342 static void
5343 skl_print_wm_changes(struct intel_atomic_state *state)
5344 {
5345         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5346         const struct intel_crtc_state *old_crtc_state;
5347         const struct intel_crtc_state *new_crtc_state;
5348         struct intel_plane *plane;
5349         struct intel_crtc *crtc;
5350         int i;
5351
5352         if ((drm_debug & DRM_UT_KMS) == 0)
5353                 return;
5354
5355         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5356                                             new_crtc_state, i) {
5357                 const struct skl_pipe_wm *old_pipe_wm, *new_pipe_wm;
5358
5359                 old_pipe_wm = &old_crtc_state->wm.skl.optimal;
5360                 new_pipe_wm = &new_crtc_state->wm.skl.optimal;
5361
5362                 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5363                         enum plane_id plane_id = plane->id;
5364                         const struct skl_ddb_entry *old, *new;
5365
5366                         old = &old_crtc_state->wm.skl.plane_ddb_y[plane_id];
5367                         new = &new_crtc_state->wm.skl.plane_ddb_y[plane_id];
5368
5369                         if (skl_ddb_entry_equal(old, new))
5370                                 continue;
5371
5372                         DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
5373                                       plane->base.base.id, plane->base.name,
5374                                       old->start, old->end, new->start, new->end,
5375                                       skl_ddb_entry_size(old), skl_ddb_entry_size(new));
5376                 }
5377
5378                 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5379                         enum plane_id plane_id = plane->id;
5380                         const struct skl_plane_wm *old_wm, *new_wm;
5381
5382                         old_wm = &old_pipe_wm->planes[plane_id];
5383                         new_wm = &new_pipe_wm->planes[plane_id];
5384
5385                         if (skl_plane_wm_equals(dev_priv, old_wm, new_wm))
5386                                 continue;
5387
5388                         DRM_DEBUG_KMS("[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
5389                                       " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
5390                                       plane->base.base.id, plane->base.name,
5391                                       enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
5392                                       enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
5393                                       enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
5394                                       enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
5395                                       enast(old_wm->trans_wm.plane_en),
5396                                       enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
5397                                       enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
5398                                       enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
5399                                       enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
5400                                       enast(new_wm->trans_wm.plane_en));
5401
5402                         DRM_DEBUG_KMS("[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
5403                                       " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
5404                                       plane->base.base.id, plane->base.name,
5405                                       enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
5406                                       enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
5407                                       enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
5408                                       enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
5409                                       enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
5410                                       enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
5411                                       enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
5412                                       enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
5413                                       enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
5414
5415                                       enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
5416                                       enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
5417                                       enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
5418                                       enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
5419                                       enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
5420                                       enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
5421                                       enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
5422                                       enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
5423                                       enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
5424
5425                         DRM_DEBUG_KMS("[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5426                                       " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5427                                       plane->base.base.id, plane->base.name,
5428                                       old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
5429                                       old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
5430                                       old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
5431                                       old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
5432                                       old_wm->trans_wm.plane_res_b,
5433                                       new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
5434                                       new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
5435                                       new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
5436                                       new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
5437                                       new_wm->trans_wm.plane_res_b);
5438
5439                         DRM_DEBUG_KMS("[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5440                                       " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5441                                       plane->base.base.id, plane->base.name,
5442                                       old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
5443                                       old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
5444                                       old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
5445                                       old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
5446                                       old_wm->trans_wm.min_ddb_alloc,
5447                                       new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
5448                                       new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
5449                                       new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
5450                                       new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
5451                                       new_wm->trans_wm.min_ddb_alloc);
5452                 }
5453         }
5454 }
5455
5456 static int
5457 skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
5458 {
5459         struct drm_device *dev = state->base.dev;
5460         const struct drm_i915_private *dev_priv = to_i915(dev);
5461         struct intel_crtc *crtc;
5462         struct intel_crtc_state *crtc_state;
5463         u32 realloc_pipes = pipes_modified(state);
5464         int ret, i;
5465
5466         /*
5467          * When we distrust bios wm we always need to recompute to set the
5468          * expected DDB allocations for each CRTC.
5469          */
5470         if (dev_priv->wm.distrust_bios_wm)
5471                 (*changed) = true;
5472
5473         /*
5474          * If this transaction isn't actually touching any CRTC's, don't
5475          * bother with watermark calculation.  Note that if we pass this
5476          * test, we're guaranteed to hold at least one CRTC state mutex,
5477          * which means we can safely use values like dev_priv->active_crtcs
5478          * since any racing commits that want to update them would need to
5479          * hold _all_ CRTC state mutexes.
5480          */
5481         for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
5482                 (*changed) = true;
5483
5484         if (!*changed)
5485                 return 0;
5486
5487         /*
5488          * If this is our first atomic update following hardware readout,
5489          * we can't trust the DDB that the BIOS programmed for us.  Let's
5490          * pretend that all pipes switched active status so that we'll
5491          * ensure a full DDB recompute.
5492          */
5493         if (dev_priv->wm.distrust_bios_wm) {
5494                 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
5495                                        state->base.acquire_ctx);
5496                 if (ret)
5497                         return ret;
5498
5499                 state->active_pipe_changes = ~0;
5500
5501                 /*
5502                  * We usually only initialize state->active_crtcs if we
5503                  * we're doing a modeset; make sure this field is always
5504                  * initialized during the sanitization process that happens
5505                  * on the first commit too.
5506                  */
5507                 if (!state->modeset)
5508                         state->active_crtcs = dev_priv->active_crtcs;
5509         }
5510
5511         /*
5512          * If the modeset changes which CRTC's are active, we need to
5513          * recompute the DDB allocation for *all* active pipes, even
5514          * those that weren't otherwise being modified in any way by this
5515          * atomic commit.  Due to the shrinking of the per-pipe allocations
5516          * when new active CRTC's are added, it's possible for a pipe that
5517          * we were already using and aren't changing at all here to suddenly
5518          * become invalid if its DDB needs exceeds its new allocation.
5519          *
5520          * Note that if we wind up doing a full DDB recompute, we can't let
5521          * any other display updates race with this transaction, so we need
5522          * to grab the lock on *all* CRTC's.
5523          */
5524         if (state->active_pipe_changes || state->modeset) {
5525                 realloc_pipes = ~0;
5526                 state->wm_results.dirty_pipes = ~0;
5527         }
5528
5529         /*
5530          * We're not recomputing for the pipes not included in the commit, so
5531          * make sure we start with the current state.
5532          */
5533         for_each_intel_crtc_mask(dev, crtc, realloc_pipes) {
5534                 crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
5535                 if (IS_ERR(crtc_state))
5536                         return PTR_ERR(crtc_state);
5537         }
5538
5539         return 0;
5540 }
5541
5542 /*
5543  * To make sure the cursor watermark registers are always consistent
5544  * with our computed state the following scenario needs special
5545  * treatment:
5546  *
5547  * 1. enable cursor
5548  * 2. move cursor entirely offscreen
5549  * 3. disable cursor
5550  *
5551  * Step 2. does call .disable_plane() but does not zero the watermarks
5552  * (since we consider an offscreen cursor still active for the purposes
5553  * of watermarks). Step 3. would not normally call .disable_plane()
5554  * because the actual plane visibility isn't changing, and we don't
5555  * deallocate the cursor ddb until the pipe gets disabled. So we must
5556  * force step 3. to call .disable_plane() to update the watermark
5557  * registers properly.
5558  *
5559  * Other planes do not suffer from this issues as their watermarks are
5560  * calculated based on the actual plane visibility. The only time this
5561  * can trigger for the other planes is during the initial readout as the
5562  * default value of the watermarks registers is not zero.
5563  */
5564 static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
5565                                       struct intel_crtc *crtc)
5566 {
5567         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5568         const struct intel_crtc_state *old_crtc_state =
5569                 intel_atomic_get_old_crtc_state(state, crtc);
5570         struct intel_crtc_state *new_crtc_state =
5571                 intel_atomic_get_new_crtc_state(state, crtc);
5572         struct intel_plane *plane;
5573
5574         for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5575                 struct intel_plane_state *plane_state;
5576                 enum plane_id plane_id = plane->id;
5577
5578                 /*
5579                  * Force a full wm update for every plane on modeset.
5580                  * Required because the reset value of the wm registers
5581                  * is non-zero, whereas we want all disabled planes to
5582                  * have zero watermarks. So if we turn off the relevant
5583                  * power well the hardware state will go out of sync
5584                  * with the software state.
5585                  */
5586                 if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->base) &&
5587                     skl_plane_wm_equals(dev_priv,
5588                                         &old_crtc_state->wm.skl.optimal.planes[plane_id],
5589                                         &new_crtc_state->wm.skl.optimal.planes[plane_id]))
5590                         continue;
5591
5592                 plane_state = intel_atomic_get_plane_state(state, plane);
5593                 if (IS_ERR(plane_state))
5594                         return PTR_ERR(plane_state);
5595
5596                 new_crtc_state->update_planes |= BIT(plane_id);
5597         }
5598
5599         return 0;
5600 }
5601
5602 static int
5603 skl_compute_wm(struct intel_atomic_state *state)
5604 {
5605         struct intel_crtc *crtc;
5606         struct intel_crtc_state *new_crtc_state;
5607         struct intel_crtc_state *old_crtc_state;
5608         struct skl_ddb_values *results = &state->wm_results;
5609         bool changed = false;
5610         int ret, i;
5611
5612         /* Clear all dirty flags */
5613         results->dirty_pipes = 0;
5614
5615         ret = skl_ddb_add_affected_pipes(state, &changed);
5616         if (ret || !changed)
5617                 return ret;
5618
5619         /*
5620          * Calculate WM's for all pipes that are part of this transaction.
5621          * Note that skl_ddb_add_affected_pipes may have added more CRTC's that
5622          * weren't otherwise being modified (and set bits in dirty_pipes) if
5623          * pipe allocations had to change.
5624          */
5625         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5626                                             new_crtc_state, i) {
5627                 ret = skl_build_pipe_wm(new_crtc_state);
5628                 if (ret)
5629                         return ret;
5630
5631                 ret = skl_wm_add_affected_planes(state, crtc);
5632                 if (ret)
5633                         return ret;
5634
5635                 if (!skl_pipe_wm_equals(crtc,
5636                                         &old_crtc_state->wm.skl.optimal,
5637                                         &new_crtc_state->wm.skl.optimal))
5638                         results->dirty_pipes |= drm_crtc_mask(&crtc->base);
5639         }
5640
5641         ret = skl_compute_ddb(state);
5642         if (ret)
5643                 return ret;
5644
5645         skl_print_wm_changes(state);
5646
5647         return 0;
5648 }
5649
5650 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5651                                       struct intel_crtc_state *cstate)
5652 {
5653         struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5654         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5655         struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5656         enum pipe pipe = crtc->pipe;
5657
5658         if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5659                 return;
5660
5661         I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5662 }
5663
5664 static void skl_initial_wm(struct intel_atomic_state *state,
5665                            struct intel_crtc_state *cstate)
5666 {
5667         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5668         struct drm_device *dev = intel_crtc->base.dev;
5669         struct drm_i915_private *dev_priv = to_i915(dev);
5670         struct skl_ddb_values *results = &state->wm_results;
5671
5672         if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
5673                 return;
5674
5675         mutex_lock(&dev_priv->wm.wm_mutex);
5676
5677         if (cstate->base.active_changed)
5678                 skl_atomic_update_crtc_wm(state, cstate);
5679
5680         mutex_unlock(&dev_priv->wm.wm_mutex);
5681 }
5682
5683 static void ilk_compute_wm_config(struct drm_i915_private *dev_priv,
5684                                   struct intel_wm_config *config)
5685 {
5686         struct intel_crtc *crtc;
5687
5688         /* Compute the currently _active_ config */
5689         for_each_intel_crtc(&dev_priv->drm, crtc) {
5690                 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5691
5692                 if (!wm->pipe_enabled)
5693                         continue;
5694
5695                 config->sprites_enabled |= wm->sprites_enabled;
5696                 config->sprites_scaled |= wm->sprites_scaled;
5697                 config->num_pipes_active++;
5698         }
5699 }
5700
5701 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5702 {
5703         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5704         struct ilk_wm_maximums max;
5705         struct intel_wm_config config = {};
5706         struct ilk_wm_values results = {};
5707         enum intel_ddb_partitioning partitioning;
5708
5709         ilk_compute_wm_config(dev_priv, &config);
5710
5711         ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_1_2, &max);
5712         ilk_wm_merge(dev_priv, &config, &max, &lp_wm_1_2);
5713
5714         /* 5/6 split only in single pipe config on IVB+ */
5715         if (INTEL_GEN(dev_priv) >= 7 &&
5716             config.num_pipes_active == 1 && config.sprites_enabled) {
5717                 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_5_6, &max);
5718                 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_5_6);
5719
5720                 best_lp_wm = ilk_find_best_result(dev_priv, &lp_wm_1_2, &lp_wm_5_6);
5721         } else {
5722                 best_lp_wm = &lp_wm_1_2;
5723         }
5724
5725         partitioning = (best_lp_wm == &lp_wm_1_2) ?
5726                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5727
5728         ilk_compute_wm_results(dev_priv, best_lp_wm, partitioning, &results);
5729
5730         ilk_write_wm_values(dev_priv, &results);
5731 }
5732
5733 static void ilk_initial_watermarks(struct intel_atomic_state *state,
5734                                    struct intel_crtc_state *cstate)
5735 {
5736         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5737         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5738
5739         mutex_lock(&dev_priv->wm.wm_mutex);
5740         intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
5741         ilk_program_watermarks(dev_priv);
5742         mutex_unlock(&dev_priv->wm.wm_mutex);
5743 }
5744
5745 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5746                                     struct intel_crtc_state *cstate)
5747 {
5748         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5749         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5750
5751         mutex_lock(&dev_priv->wm.wm_mutex);
5752         if (cstate->wm.need_postvbl_update) {
5753                 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
5754                 ilk_program_watermarks(dev_priv);
5755         }
5756         mutex_unlock(&dev_priv->wm.wm_mutex);
5757 }
5758
5759 static inline void skl_wm_level_from_reg_val(u32 val,
5760                                              struct skl_wm_level *level)
5761 {
5762         level->plane_en = val & PLANE_WM_EN;
5763         level->ignore_lines = val & PLANE_WM_IGNORE_LINES;
5764         level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5765         level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5766                 PLANE_WM_LINES_MASK;
5767 }
5768
5769 void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
5770                               struct skl_pipe_wm *out)
5771 {
5772         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5773         enum pipe pipe = crtc->pipe;
5774         int level, max_level;
5775         enum plane_id plane_id;
5776         u32 val;
5777
5778         max_level = ilk_wm_max_level(dev_priv);
5779
5780         for_each_plane_id_on_crtc(crtc, plane_id) {
5781                 struct skl_plane_wm *wm = &out->planes[plane_id];
5782
5783                 for (level = 0; level <= max_level; level++) {
5784                         if (plane_id != PLANE_CURSOR)
5785                                 val = I915_READ(PLANE_WM(pipe, plane_id, level));
5786                         else
5787                                 val = I915_READ(CUR_WM(pipe, level));
5788
5789                         skl_wm_level_from_reg_val(val, &wm->wm[level]);
5790                 }
5791
5792                 if (plane_id != PLANE_CURSOR)
5793                         val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5794                 else
5795                         val = I915_READ(CUR_WM_TRANS(pipe));
5796
5797                 skl_wm_level_from_reg_val(val, &wm->trans_wm);
5798         }
5799
5800         if (!crtc->active)
5801                 return;
5802
5803         out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
5804 }
5805
5806 void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
5807 {
5808         struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
5809         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
5810         struct intel_crtc *crtc;
5811         struct intel_crtc_state *cstate;
5812
5813         skl_ddb_get_hw_state(dev_priv, ddb);
5814         for_each_intel_crtc(&dev_priv->drm, crtc) {
5815                 cstate = to_intel_crtc_state(crtc->base.state);
5816
5817                 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5818
5819                 if (crtc->active)
5820                         hw->dirty_pipes |= drm_crtc_mask(&crtc->base);
5821         }
5822
5823         if (dev_priv->active_crtcs) {
5824                 /* Fully recompute DDB on first atomic commit */
5825                 dev_priv->wm.distrust_bios_wm = true;
5826         }
5827 }
5828
5829 static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
5830 {
5831         struct drm_device *dev = crtc->base.dev;
5832         struct drm_i915_private *dev_priv = to_i915(dev);
5833         struct ilk_wm_values *hw = &dev_priv->wm.hw;
5834         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->base.state);
5835         struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
5836         enum pipe pipe = crtc->pipe;
5837         static const i915_reg_t wm0_pipe_reg[] = {
5838                 [PIPE_A] = WM0_PIPEA_ILK,
5839                 [PIPE_B] = WM0_PIPEB_ILK,
5840                 [PIPE_C] = WM0_PIPEC_IVB,
5841         };
5842
5843         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5844         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5845                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
5846
5847         memset(active, 0, sizeof(*active));
5848
5849         active->pipe_enabled = crtc->active;
5850
5851         if (active->pipe_enabled) {
5852                 u32 tmp = hw->wm_pipe[pipe];
5853
5854                 /*
5855                  * For active pipes LP0 watermark is marked as
5856                  * enabled, and LP1+ watermaks as disabled since
5857                  * we can't really reverse compute them in case
5858                  * multiple pipes are active.
5859                  */
5860                 active->wm[0].enable = true;
5861                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5862                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5863                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5864                 active->linetime = hw->wm_linetime[pipe];
5865         } else {
5866                 int level, max_level = ilk_wm_max_level(dev_priv);
5867
5868                 /*
5869                  * For inactive pipes, all watermark levels
5870                  * should be marked as enabled but zeroed,
5871                  * which is what we'd compute them to.
5872                  */
5873                 for (level = 0; level <= max_level; level++)
5874                         active->wm[level].enable = true;
5875         }
5876
5877         crtc->wm.active.ilk = *active;
5878 }
5879
5880 #define _FW_WM(value, plane) \
5881         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5882 #define _FW_WM_VLV(value, plane) \
5883         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5884
5885 static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5886                                struct g4x_wm_values *wm)
5887 {
5888         u32 tmp;
5889
5890         tmp = I915_READ(DSPFW1);
5891         wm->sr.plane = _FW_WM(tmp, SR);
5892         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5893         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5894         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5895
5896         tmp = I915_READ(DSPFW2);
5897         wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5898         wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5899         wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5900         wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5901         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5902         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5903
5904         tmp = I915_READ(DSPFW3);
5905         wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5906         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5907         wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5908         wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5909 }
5910
5911 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5912                                struct vlv_wm_values *wm)
5913 {
5914         enum pipe pipe;
5915         u32 tmp;
5916
5917         for_each_pipe(dev_priv, pipe) {
5918                 tmp = I915_READ(VLV_DDL(pipe));
5919
5920                 wm->ddl[pipe].plane[PLANE_PRIMARY] =
5921                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5922                 wm->ddl[pipe].plane[PLANE_CURSOR] =
5923                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5924                 wm->ddl[pipe].plane[PLANE_SPRITE0] =
5925                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5926                 wm->ddl[pipe].plane[PLANE_SPRITE1] =
5927                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5928         }
5929
5930         tmp = I915_READ(DSPFW1);
5931         wm->sr.plane = _FW_WM(tmp, SR);
5932         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5933         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5934         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
5935
5936         tmp = I915_READ(DSPFW2);
5937         wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5938         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5939         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
5940
5941         tmp = I915_READ(DSPFW3);
5942         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5943
5944         if (IS_CHERRYVIEW(dev_priv)) {
5945                 tmp = I915_READ(DSPFW7_CHV);
5946                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5947                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5948
5949                 tmp = I915_READ(DSPFW8_CHV);
5950                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5951                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
5952
5953                 tmp = I915_READ(DSPFW9_CHV);
5954                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5955                 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
5956
5957                 tmp = I915_READ(DSPHOWM);
5958                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5959                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5960                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5961                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5962                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5963                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5964                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5965                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5966                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5967                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5968         } else {
5969                 tmp = I915_READ(DSPFW7);
5970                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5971                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5972
5973                 tmp = I915_READ(DSPHOWM);
5974                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5975                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5976                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5977                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5978                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5979                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5980                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5981         }
5982 }
5983
5984 #undef _FW_WM
5985 #undef _FW_WM_VLV
5986
5987 void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv)
5988 {
5989         struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5990         struct intel_crtc *crtc;
5991
5992         g4x_read_wm_values(dev_priv, wm);
5993
5994         wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5995
5996         for_each_intel_crtc(&dev_priv->drm, crtc) {
5997                 struct intel_crtc_state *crtc_state =
5998                         to_intel_crtc_state(crtc->base.state);
5999                 struct g4x_wm_state *active = &crtc->wm.active.g4x;
6000                 struct g4x_pipe_wm *raw;
6001                 enum pipe pipe = crtc->pipe;
6002                 enum plane_id plane_id;
6003                 int level, max_level;
6004
6005                 active->cxsr = wm->cxsr;
6006                 active->hpll_en = wm->hpll_en;
6007                 active->fbc_en = wm->fbc_en;
6008
6009                 active->sr = wm->sr;
6010                 active->hpll = wm->hpll;
6011
6012                 for_each_plane_id_on_crtc(crtc, plane_id) {
6013                         active->wm.plane[plane_id] =
6014                                 wm->pipe[pipe].plane[plane_id];
6015                 }
6016
6017                 if (wm->cxsr && wm->hpll_en)
6018                         max_level = G4X_WM_LEVEL_HPLL;
6019                 else if (wm->cxsr)
6020                         max_level = G4X_WM_LEVEL_SR;
6021                 else
6022                         max_level = G4X_WM_LEVEL_NORMAL;
6023
6024                 level = G4X_WM_LEVEL_NORMAL;
6025                 raw = &crtc_state->wm.g4x.raw[level];
6026                 for_each_plane_id_on_crtc(crtc, plane_id)
6027                         raw->plane[plane_id] = active->wm.plane[plane_id];
6028
6029                 if (++level > max_level)
6030                         goto out;
6031
6032                 raw = &crtc_state->wm.g4x.raw[level];
6033                 raw->plane[PLANE_PRIMARY] = active->sr.plane;
6034                 raw->plane[PLANE_CURSOR] = active->sr.cursor;
6035                 raw->plane[PLANE_SPRITE0] = 0;
6036                 raw->fbc = active->sr.fbc;
6037
6038                 if (++level > max_level)
6039                         goto out;
6040
6041                 raw = &crtc_state->wm.g4x.raw[level];
6042                 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
6043                 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
6044                 raw->plane[PLANE_SPRITE0] = 0;
6045                 raw->fbc = active->hpll.fbc;
6046
6047         out:
6048                 for_each_plane_id_on_crtc(crtc, plane_id)
6049                         g4x_raw_plane_wm_set(crtc_state, level,
6050                                              plane_id, USHRT_MAX);
6051                 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
6052
6053                 crtc_state->wm.g4x.optimal = *active;
6054                 crtc_state->wm.g4x.intermediate = *active;
6055
6056                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
6057                               pipe_name(pipe),
6058                               wm->pipe[pipe].plane[PLANE_PRIMARY],
6059                               wm->pipe[pipe].plane[PLANE_CURSOR],
6060                               wm->pipe[pipe].plane[PLANE_SPRITE0]);
6061         }
6062
6063         DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
6064                       wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
6065         DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
6066                       wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
6067         DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
6068                       yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
6069 }
6070
6071 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
6072 {
6073         struct intel_plane *plane;
6074         struct intel_crtc *crtc;
6075
6076         mutex_lock(&dev_priv->wm.wm_mutex);
6077
6078         for_each_intel_plane(&dev_priv->drm, plane) {
6079                 struct intel_crtc *crtc =
6080                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6081                 struct intel_crtc_state *crtc_state =
6082                         to_intel_crtc_state(crtc->base.state);
6083                 struct intel_plane_state *plane_state =
6084                         to_intel_plane_state(plane->base.state);
6085                 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
6086                 enum plane_id plane_id = plane->id;
6087                 int level;
6088
6089                 if (plane_state->base.visible)
6090                         continue;
6091
6092                 for (level = 0; level < 3; level++) {
6093                         struct g4x_pipe_wm *raw =
6094                                 &crtc_state->wm.g4x.raw[level];
6095
6096                         raw->plane[plane_id] = 0;
6097                         wm_state->wm.plane[plane_id] = 0;
6098                 }
6099
6100                 if (plane_id == PLANE_PRIMARY) {
6101                         for (level = 0; level < 3; level++) {
6102                                 struct g4x_pipe_wm *raw =
6103                                         &crtc_state->wm.g4x.raw[level];
6104                                 raw->fbc = 0;
6105                         }
6106
6107                         wm_state->sr.fbc = 0;
6108                         wm_state->hpll.fbc = 0;
6109                         wm_state->fbc_en = false;
6110                 }
6111         }
6112
6113         for_each_intel_crtc(&dev_priv->drm, crtc) {
6114                 struct intel_crtc_state *crtc_state =
6115                         to_intel_crtc_state(crtc->base.state);
6116
6117                 crtc_state->wm.g4x.intermediate =
6118                         crtc_state->wm.g4x.optimal;
6119                 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
6120         }
6121
6122         g4x_program_watermarks(dev_priv);
6123
6124         mutex_unlock(&dev_priv->wm.wm_mutex);
6125 }
6126
6127 void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
6128 {
6129         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
6130         struct intel_crtc *crtc;
6131         u32 val;
6132
6133         vlv_read_wm_values(dev_priv, wm);
6134
6135         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
6136         wm->level = VLV_WM_LEVEL_PM2;
6137
6138         if (IS_CHERRYVIEW(dev_priv)) {
6139                 mutex_lock(&dev_priv->pcu_lock);
6140
6141                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
6142                 if (val & DSP_MAXFIFO_PM5_ENABLE)
6143                         wm->level = VLV_WM_LEVEL_PM5;
6144
6145                 /*
6146                  * If DDR DVFS is disabled in the BIOS, Punit
6147                  * will never ack the request. So if that happens
6148                  * assume we don't have to enable/disable DDR DVFS
6149                  * dynamically. To test that just set the REQ_ACK
6150                  * bit to poke the Punit, but don't change the
6151                  * HIGH/LOW bits so that we don't actually change
6152                  * the current state.
6153                  */
6154                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6155                 val |= FORCE_DDR_FREQ_REQ_ACK;
6156                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
6157
6158                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
6159                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
6160                         DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
6161                                       "assuming DDR DVFS is disabled\n");
6162                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
6163                 } else {
6164                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6165                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
6166                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
6167                 }
6168
6169                 mutex_unlock(&dev_priv->pcu_lock);
6170         }
6171
6172         for_each_intel_crtc(&dev_priv->drm, crtc) {
6173                 struct intel_crtc_state *crtc_state =
6174                         to_intel_crtc_state(crtc->base.state);
6175                 struct vlv_wm_state *active = &crtc->wm.active.vlv;
6176                 const struct vlv_fifo_state *fifo_state =
6177                         &crtc_state->wm.vlv.fifo_state;
6178                 enum pipe pipe = crtc->pipe;
6179                 enum plane_id plane_id;
6180                 int level;
6181
6182                 vlv_get_fifo_size(crtc_state);
6183
6184                 active->num_levels = wm->level + 1;
6185                 active->cxsr = wm->cxsr;
6186
6187                 for (level = 0; level < active->num_levels; level++) {
6188                         struct g4x_pipe_wm *raw =
6189                                 &crtc_state->wm.vlv.raw[level];
6190
6191                         active->sr[level].plane = wm->sr.plane;
6192                         active->sr[level].cursor = wm->sr.cursor;
6193
6194                         for_each_plane_id_on_crtc(crtc, plane_id) {
6195                                 active->wm[level].plane[plane_id] =
6196                                         wm->pipe[pipe].plane[plane_id];
6197
6198                                 raw->plane[plane_id] =
6199                                         vlv_invert_wm_value(active->wm[level].plane[plane_id],
6200                                                             fifo_state->plane[plane_id]);
6201                         }
6202                 }
6203
6204                 for_each_plane_id_on_crtc(crtc, plane_id)
6205                         vlv_raw_plane_wm_set(crtc_state, level,
6206                                              plane_id, USHRT_MAX);
6207                 vlv_invalidate_wms(crtc, active, level);
6208
6209                 crtc_state->wm.vlv.optimal = *active;
6210                 crtc_state->wm.vlv.intermediate = *active;
6211
6212                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
6213                               pipe_name(pipe),
6214                               wm->pipe[pipe].plane[PLANE_PRIMARY],
6215                               wm->pipe[pipe].plane[PLANE_CURSOR],
6216                               wm->pipe[pipe].plane[PLANE_SPRITE0],
6217                               wm->pipe[pipe].plane[PLANE_SPRITE1]);
6218         }
6219
6220         DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6221                       wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
6222 }
6223
6224 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
6225 {
6226         struct intel_plane *plane;
6227         struct intel_crtc *crtc;
6228
6229         mutex_lock(&dev_priv->wm.wm_mutex);
6230
6231         for_each_intel_plane(&dev_priv->drm, plane) {
6232                 struct intel_crtc *crtc =
6233                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6234                 struct intel_crtc_state *crtc_state =
6235                         to_intel_crtc_state(crtc->base.state);
6236                 struct intel_plane_state *plane_state =
6237                         to_intel_plane_state(plane->base.state);
6238                 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6239                 const struct vlv_fifo_state *fifo_state =
6240                         &crtc_state->wm.vlv.fifo_state;
6241                 enum plane_id plane_id = plane->id;
6242                 int level;
6243
6244                 if (plane_state->base.visible)
6245                         continue;
6246
6247                 for (level = 0; level < wm_state->num_levels; level++) {
6248                         struct g4x_pipe_wm *raw =
6249                                 &crtc_state->wm.vlv.raw[level];
6250
6251                         raw->plane[plane_id] = 0;
6252
6253                         wm_state->wm[level].plane[plane_id] =
6254                                 vlv_invert_wm_value(raw->plane[plane_id],
6255                                                     fifo_state->plane[plane_id]);
6256                 }
6257         }
6258
6259         for_each_intel_crtc(&dev_priv->drm, crtc) {
6260                 struct intel_crtc_state *crtc_state =
6261                         to_intel_crtc_state(crtc->base.state);
6262
6263                 crtc_state->wm.vlv.intermediate =
6264                         crtc_state->wm.vlv.optimal;
6265                 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6266         }
6267
6268         vlv_program_watermarks(dev_priv);
6269
6270         mutex_unlock(&dev_priv->wm.wm_mutex);
6271 }
6272
6273 /*
6274  * FIXME should probably kill this and improve
6275  * the real watermark readout/sanitation instead
6276  */
6277 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6278 {
6279         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6280         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6281         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6282
6283         /*
6284          * Don't touch WM1S_LP_EN here.
6285          * Doing so could cause underruns.
6286          */
6287 }
6288
6289 void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv)
6290 {
6291         struct ilk_wm_values *hw = &dev_priv->wm.hw;
6292         struct intel_crtc *crtc;
6293
6294         ilk_init_lp_watermarks(dev_priv);
6295
6296         for_each_intel_crtc(&dev_priv->drm, crtc)
6297                 ilk_pipe_wm_get_hw_state(crtc);
6298
6299         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6300         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6301         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6302
6303         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
6304         if (INTEL_GEN(dev_priv) >= 7) {
6305                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6306                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6307         }
6308
6309         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6310                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6311                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6312         else if (IS_IVYBRIDGE(dev_priv))
6313                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6314                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6315
6316         hw->enable_fbc_wm =
6317                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6318 }
6319
6320 /**
6321  * intel_update_watermarks - update FIFO watermark values based on current modes
6322  * @crtc: the #intel_crtc on which to compute the WM
6323  *
6324  * Calculate watermark values for the various WM regs based on current mode
6325  * and plane configuration.
6326  *
6327  * There are several cases to deal with here:
6328  *   - normal (i.e. non-self-refresh)
6329  *   - self-refresh (SR) mode
6330  *   - lines are large relative to FIFO size (buffer can hold up to 2)
6331  *   - lines are small relative to FIFO size (buffer can hold more than 2
6332  *     lines), so need to account for TLB latency
6333  *
6334  *   The normal calculation is:
6335  *     watermark = dotclock * bytes per pixel * latency
6336  *   where latency is platform & configuration dependent (we assume pessimal
6337  *   values here).
6338  *
6339  *   The SR calculation is:
6340  *     watermark = (trunc(latency/line time)+1) * surface width *
6341  *       bytes per pixel
6342  *   where
6343  *     line time = htotal / dotclock
6344  *     surface width = hdisplay for normal plane and 64 for cursor
6345  *   and latency is assumed to be high, as above.
6346  *
6347  * The final value programmed to the register should always be rounded up,
6348  * and include an extra 2 entries to account for clock crossings.
6349  *
6350  * We don't use the sprite, so we can ignore that.  And on Crestline we have
6351  * to set the non-SR watermarks to 8.
6352  */
6353 void intel_update_watermarks(struct intel_crtc *crtc)
6354 {
6355         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
6356
6357         if (dev_priv->display.update_wm)
6358                 dev_priv->display.update_wm(crtc);
6359 }
6360
6361 void intel_enable_ipc(struct drm_i915_private *dev_priv)
6362 {
6363         u32 val;
6364
6365         if (!HAS_IPC(dev_priv))
6366                 return;
6367
6368         val = I915_READ(DISP_ARB_CTL2);
6369
6370         if (dev_priv->ipc_enabled)
6371                 val |= DISP_IPC_ENABLE;
6372         else
6373                 val &= ~DISP_IPC_ENABLE;
6374
6375         I915_WRITE(DISP_ARB_CTL2, val);
6376 }
6377
6378 void intel_init_ipc(struct drm_i915_private *dev_priv)
6379 {
6380         if (!HAS_IPC(dev_priv))
6381                 return;
6382
6383         /* Display WA #1141: SKL:all KBL:all CFL */
6384         if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
6385                 dev_priv->ipc_enabled = dev_priv->dram_info.symmetric_memory;
6386         else
6387                 dev_priv->ipc_enabled = true;
6388
6389         intel_enable_ipc(dev_priv);
6390 }
6391
6392 /*
6393  * Lock protecting IPS related data structures
6394  */
6395 DEFINE_SPINLOCK(mchdev_lock);
6396
6397 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
6398 {
6399         u16 rgvswctl;
6400
6401         lockdep_assert_held(&mchdev_lock);
6402
6403         rgvswctl = I915_READ16(MEMSWCTL);
6404         if (rgvswctl & MEMCTL_CMD_STS) {
6405                 DRM_DEBUG("gpu busy, RCS change rejected\n");
6406                 return false; /* still busy with another command */
6407         }
6408
6409         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
6410                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
6411         I915_WRITE16(MEMSWCTL, rgvswctl);
6412         POSTING_READ16(MEMSWCTL);
6413
6414         rgvswctl |= MEMCTL_CMD_STS;
6415         I915_WRITE16(MEMSWCTL, rgvswctl);
6416
6417         return true;
6418 }
6419
6420 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
6421 {
6422         u32 rgvmodectl;
6423         u8 fmax, fmin, fstart, vstart;
6424
6425         spin_lock_irq(&mchdev_lock);
6426
6427         rgvmodectl = I915_READ(MEMMODECTL);
6428
6429         /* Enable temp reporting */
6430         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
6431         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
6432
6433         /* 100ms RC evaluation intervals */
6434         I915_WRITE(RCUPEI, 100000);
6435         I915_WRITE(RCDNEI, 100000);
6436
6437         /* Set max/min thresholds to 90ms and 80ms respectively */
6438         I915_WRITE(RCBMAXAVG, 90000);
6439         I915_WRITE(RCBMINAVG, 80000);
6440
6441         I915_WRITE(MEMIHYST, 1);
6442
6443         /* Set up min, max, and cur for interrupt handling */
6444         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
6445         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
6446         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
6447                 MEMMODE_FSTART_SHIFT;
6448
6449         vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
6450                 PXVFREQ_PX_SHIFT;
6451
6452         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6453         dev_priv->ips.fstart = fstart;
6454
6455         dev_priv->ips.max_delay = fstart;
6456         dev_priv->ips.min_delay = fmin;
6457         dev_priv->ips.cur_delay = fstart;
6458
6459         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6460                          fmax, fmin, fstart);
6461
6462         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6463
6464         /*
6465          * Interrupts will be enabled in ironlake_irq_postinstall
6466          */
6467
6468         I915_WRITE(VIDSTART, vstart);
6469         POSTING_READ(VIDSTART);
6470
6471         rgvmodectl |= MEMMODE_SWMODE_EN;
6472         I915_WRITE(MEMMODECTL, rgvmodectl);
6473
6474         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
6475                 DRM_ERROR("stuck trying to change perf mode\n");
6476         mdelay(1);
6477
6478         ironlake_set_drps(dev_priv, fstart);
6479
6480         dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6481                 I915_READ(DDREC) + I915_READ(CSIEC);
6482         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
6483         dev_priv->ips.last_count2 = I915_READ(GFXEC);
6484         dev_priv->ips.last_time2 = ktime_get_raw_ns();
6485
6486         spin_unlock_irq(&mchdev_lock);
6487 }
6488
6489 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
6490 {
6491         u16 rgvswctl;
6492
6493         spin_lock_irq(&mchdev_lock);
6494
6495         rgvswctl = I915_READ16(MEMSWCTL);
6496
6497         /* Ack interrupts, disable EFC interrupt */
6498         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6499         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6500         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6501         I915_WRITE(DEIIR, DE_PCU_EVENT);
6502         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6503
6504         /* Go back to the starting frequency */
6505         ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
6506         mdelay(1);
6507         rgvswctl |= MEMCTL_CMD_STS;
6508         I915_WRITE(MEMSWCTL, rgvswctl);
6509         mdelay(1);
6510
6511         spin_unlock_irq(&mchdev_lock);
6512 }
6513
6514 /* There's a funny hw issue where the hw returns all 0 when reading from
6515  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6516  * ourselves, instead of doing a rmw cycle (which might result in us clearing
6517  * all limits and the gpu stuck at whatever frequency it is at atm).
6518  */
6519 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
6520 {
6521         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6522         u32 limits;
6523
6524         /* Only set the down limit when we've reached the lowest level to avoid
6525          * getting more interrupts, otherwise leave this clear. This prevents a
6526          * race in the hw when coming out of rc6: There's a tiny window where
6527          * the hw runs at the minimal clock before selecting the desired
6528          * frequency, if the down threshold expires in that window we will not
6529          * receive a down interrupt. */
6530         if (INTEL_GEN(dev_priv) >= 9) {
6531                 limits = (rps->max_freq_softlimit) << 23;
6532                 if (val <= rps->min_freq_softlimit)
6533                         limits |= (rps->min_freq_softlimit) << 14;
6534         } else {
6535                 limits = rps->max_freq_softlimit << 24;
6536                 if (val <= rps->min_freq_softlimit)
6537                         limits |= rps->min_freq_softlimit << 16;
6538         }
6539
6540         return limits;
6541 }
6542
6543 static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
6544 {
6545         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6546         u32 threshold_up = 0, threshold_down = 0; /* in % */
6547         u32 ei_up = 0, ei_down = 0;
6548
6549         lockdep_assert_held(&rps->power.mutex);
6550
6551         if (new_power == rps->power.mode)
6552                 return;
6553
6554         /* Note the units here are not exactly 1us, but 1280ns. */
6555         switch (new_power) {
6556         case LOW_POWER:
6557                 /* Upclock if more than 95% busy over 16ms */
6558                 ei_up = 16000;
6559                 threshold_up = 95;
6560
6561                 /* Downclock if less than 85% busy over 32ms */
6562                 ei_down = 32000;
6563                 threshold_down = 85;
6564                 break;
6565
6566         case BETWEEN:
6567                 /* Upclock if more than 90% busy over 13ms */
6568                 ei_up = 13000;
6569                 threshold_up = 90;
6570
6571                 /* Downclock if less than 75% busy over 32ms */
6572                 ei_down = 32000;
6573                 threshold_down = 75;
6574                 break;
6575
6576         case HIGH_POWER:
6577                 /* Upclock if more than 85% busy over 10ms */
6578                 ei_up = 10000;
6579                 threshold_up = 85;
6580
6581                 /* Downclock if less than 60% busy over 32ms */
6582                 ei_down = 32000;
6583                 threshold_down = 60;
6584                 break;
6585         }
6586
6587         /* When byt can survive without system hang with dynamic
6588          * sw freq adjustments, this restriction can be lifted.
6589          */
6590         if (IS_VALLEYVIEW(dev_priv))
6591                 goto skip_hw_write;
6592
6593         I915_WRITE(GEN6_RP_UP_EI,
6594                    GT_INTERVAL_FROM_US(dev_priv, ei_up));
6595         I915_WRITE(GEN6_RP_UP_THRESHOLD,
6596                    GT_INTERVAL_FROM_US(dev_priv,
6597                                        ei_up * threshold_up / 100));
6598
6599         I915_WRITE(GEN6_RP_DOWN_EI,
6600                    GT_INTERVAL_FROM_US(dev_priv, ei_down));
6601         I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
6602                    GT_INTERVAL_FROM_US(dev_priv,
6603                                        ei_down * threshold_down / 100));
6604
6605         I915_WRITE(GEN6_RP_CONTROL,
6606                    GEN6_RP_MEDIA_TURBO |
6607                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
6608                    GEN6_RP_MEDIA_IS_GFX |
6609                    GEN6_RP_ENABLE |
6610                    GEN6_RP_UP_BUSY_AVG |
6611                    GEN6_RP_DOWN_IDLE_AVG);
6612
6613 skip_hw_write:
6614         rps->power.mode = new_power;
6615         rps->power.up_threshold = threshold_up;
6616         rps->power.down_threshold = threshold_down;
6617 }
6618
6619 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6620 {
6621         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6622         int new_power;
6623
6624         new_power = rps->power.mode;
6625         switch (rps->power.mode) {
6626         case LOW_POWER:
6627                 if (val > rps->efficient_freq + 1 &&
6628                     val > rps->cur_freq)
6629                         new_power = BETWEEN;
6630                 break;
6631
6632         case BETWEEN:
6633                 if (val <= rps->efficient_freq &&
6634                     val < rps->cur_freq)
6635                         new_power = LOW_POWER;
6636                 else if (val >= rps->rp0_freq &&
6637                          val > rps->cur_freq)
6638                         new_power = HIGH_POWER;
6639                 break;
6640
6641         case HIGH_POWER:
6642                 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6643                     val < rps->cur_freq)
6644                         new_power = BETWEEN;
6645                 break;
6646         }
6647         /* Max/min bins are special */
6648         if (val <= rps->min_freq_softlimit)
6649                 new_power = LOW_POWER;
6650         if (val >= rps->max_freq_softlimit)
6651                 new_power = HIGH_POWER;
6652
6653         mutex_lock(&rps->power.mutex);
6654         if (rps->power.interactive)
6655                 new_power = HIGH_POWER;
6656         rps_set_power(dev_priv, new_power);
6657         mutex_unlock(&rps->power.mutex);
6658 }
6659
6660 void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6661 {
6662         struct intel_rps *rps = &i915->gt_pm.rps;
6663
6664         if (INTEL_GEN(i915) < 6)
6665                 return;
6666
6667         mutex_lock(&rps->power.mutex);
6668         if (interactive) {
6669                 if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6670                         rps_set_power(i915, HIGH_POWER);
6671         } else {
6672                 GEM_BUG_ON(!rps->power.interactive);
6673                 rps->power.interactive--;
6674         }
6675         mutex_unlock(&rps->power.mutex);
6676 }
6677
6678 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6679 {
6680         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6681         u32 mask = 0;
6682
6683         /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6684         if (val > rps->min_freq_softlimit)
6685                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
6686         if (val < rps->max_freq_softlimit)
6687                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
6688
6689         mask &= dev_priv->pm_rps_events;
6690
6691         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
6692 }
6693
6694 /* gen6_set_rps is called to update the frequency request, but should also be
6695  * called when the range (min_delay and max_delay) is modified so that we can
6696  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6697 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
6698 {
6699         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6700
6701         /* min/max delay may still have been modified so be sure to
6702          * write the limits value.
6703          */
6704         if (val != rps->cur_freq) {
6705                 gen6_set_rps_thresholds(dev_priv, val);
6706
6707                 if (INTEL_GEN(dev_priv) >= 9)
6708                         I915_WRITE(GEN6_RPNSWREQ,
6709                                    GEN9_FREQUENCY(val));
6710                 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6711                         I915_WRITE(GEN6_RPNSWREQ,
6712                                    HSW_FREQUENCY(val));
6713                 else
6714                         I915_WRITE(GEN6_RPNSWREQ,
6715                                    GEN6_FREQUENCY(val) |
6716                                    GEN6_OFFSET(0) |
6717                                    GEN6_AGGRESSIVE_TURBO);
6718         }
6719
6720         /* Make sure we continue to get interrupts
6721          * until we hit the minimum or maximum frequencies.
6722          */
6723         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
6724         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6725
6726         rps->cur_freq = val;
6727         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6728
6729         return 0;
6730 }
6731
6732 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
6733 {
6734         int err;
6735
6736         if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
6737                       "Odd GPU freq value\n"))
6738                 val &= ~1;
6739
6740         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6741
6742         if (val != dev_priv->gt_pm.rps.cur_freq) {
6743                 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6744                 if (err)
6745                         return err;
6746
6747                 gen6_set_rps_thresholds(dev_priv, val);
6748         }
6749
6750         dev_priv->gt_pm.rps.cur_freq = val;
6751         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6752
6753         return 0;
6754 }
6755
6756 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6757  *
6758  * * If Gfx is Idle, then
6759  * 1. Forcewake Media well.
6760  * 2. Request idle freq.
6761  * 3. Release Forcewake of Media well.
6762 */
6763 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6764 {
6765         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6766         u32 val = rps->idle_freq;
6767         int err;
6768
6769         if (rps->cur_freq <= val)
6770                 return;
6771
6772         /* The punit delays the write of the frequency and voltage until it
6773          * determines the GPU is awake. During normal usage we don't want to
6774          * waste power changing the frequency if the GPU is sleeping (rc6).
6775          * However, the GPU and driver is now idle and we do not want to delay
6776          * switching to minimum voltage (reducing power whilst idle) as we do
6777          * not expect to be woken in the near future and so must flush the
6778          * change by waking the device.
6779          *
6780          * We choose to take the media powerwell (either would do to trick the
6781          * punit into committing the voltage change) as that takes a lot less
6782          * power than the render powerwell.
6783          */
6784         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_MEDIA);
6785         err = valleyview_set_rps(dev_priv, val);
6786         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_MEDIA);
6787
6788         if (err)
6789                 DRM_ERROR("Failed to set RPS for idle\n");
6790 }
6791
6792 void gen6_rps_busy(struct drm_i915_private *dev_priv)
6793 {
6794         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6795
6796         mutex_lock(&dev_priv->pcu_lock);
6797         if (rps->enabled) {
6798                 u8 freq;
6799
6800                 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
6801                         gen6_rps_reset_ei(dev_priv);
6802                 I915_WRITE(GEN6_PMINTRMSK,
6803                            gen6_rps_pm_mask(dev_priv, rps->cur_freq));
6804
6805                 gen6_enable_rps_interrupts(dev_priv);
6806
6807                 /* Use the user's desired frequency as a guide, but for better
6808                  * performance, jump directly to RPe as our starting frequency.
6809                  */
6810                 freq = max(rps->cur_freq,
6811                            rps->efficient_freq);
6812
6813                 if (intel_set_rps(dev_priv,
6814                                   clamp(freq,
6815                                         rps->min_freq_softlimit,
6816                                         rps->max_freq_softlimit)))
6817                         DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6818         }
6819         mutex_unlock(&dev_priv->pcu_lock);
6820 }
6821
6822 void gen6_rps_idle(struct drm_i915_private *dev_priv)
6823 {
6824         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6825
6826         /* Flush our bottom-half so that it does not race with us
6827          * setting the idle frequency and so that it is bounded by
6828          * our rpm wakeref. And then disable the interrupts to stop any
6829          * futher RPS reclocking whilst we are asleep.
6830          */
6831         gen6_disable_rps_interrupts(dev_priv);
6832
6833         mutex_lock(&dev_priv->pcu_lock);
6834         if (rps->enabled) {
6835                 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6836                         vlv_set_rps_idle(dev_priv);
6837                 else
6838                         gen6_set_rps(dev_priv, rps->idle_freq);
6839                 rps->last_adj = 0;
6840                 I915_WRITE(GEN6_PMINTRMSK,
6841                            gen6_sanitize_rps_pm_mask(dev_priv, ~0));
6842         }
6843         mutex_unlock(&dev_priv->pcu_lock);
6844 }
6845
6846 void gen6_rps_boost(struct i915_request *rq)
6847 {
6848         struct intel_rps *rps = &rq->i915->gt_pm.rps;
6849         unsigned long flags;
6850         bool boost;
6851
6852         /* This is intentionally racy! We peek at the state here, then
6853          * validate inside the RPS worker.
6854          */
6855         if (!rps->enabled)
6856                 return;
6857
6858         if (i915_request_signaled(rq))
6859                 return;
6860
6861         /* Serializes with i915_request_retire() */
6862         boost = false;
6863         spin_lock_irqsave(&rq->lock, flags);
6864         if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
6865                 boost = !atomic_fetch_inc(&rps->num_waiters);
6866                 rq->waitboost = true;
6867         }
6868         spin_unlock_irqrestore(&rq->lock, flags);
6869         if (!boost)
6870                 return;
6871
6872         if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6873                 schedule_work(&rps->work);
6874
6875         atomic_inc(&rps->boosts);
6876 }
6877
6878 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
6879 {
6880         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6881         int err;
6882
6883         lockdep_assert_held(&dev_priv->pcu_lock);
6884         GEM_BUG_ON(val > rps->max_freq);
6885         GEM_BUG_ON(val < rps->min_freq);
6886
6887         if (!rps->enabled) {
6888                 rps->cur_freq = val;
6889                 return 0;
6890         }
6891
6892         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6893                 err = valleyview_set_rps(dev_priv, val);
6894         else
6895                 err = gen6_set_rps(dev_priv, val);
6896
6897         return err;
6898 }
6899
6900 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
6901 {
6902         I915_WRITE(GEN6_RC_CONTROL, 0);
6903         I915_WRITE(GEN9_PG_ENABLE, 0);
6904 }
6905
6906 static void gen9_disable_rps(struct drm_i915_private *dev_priv)
6907 {
6908         I915_WRITE(GEN6_RP_CONTROL, 0);
6909 }
6910
6911 static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
6912 {
6913         I915_WRITE(GEN6_RC_CONTROL, 0);
6914 }
6915
6916 static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6917 {
6918         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
6919         I915_WRITE(GEN6_RP_CONTROL, 0);
6920 }
6921
6922 static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
6923 {
6924         I915_WRITE(GEN6_RC_CONTROL, 0);
6925 }
6926
6927 static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6928 {
6929         I915_WRITE(GEN6_RP_CONTROL, 0);
6930 }
6931
6932 static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
6933 {
6934         /* We're doing forcewake before Disabling RC6,
6935          * This what the BIOS expects when going into suspend */
6936         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
6937
6938         I915_WRITE(GEN6_RC_CONTROL, 0);
6939
6940         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
6941 }
6942
6943 static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6944 {
6945         I915_WRITE(GEN6_RP_CONTROL, 0);
6946 }
6947
6948 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
6949 {
6950         bool enable_rc6 = true;
6951         unsigned long rc6_ctx_base;
6952         u32 rc_ctl;
6953         int rc_sw_target;
6954
6955         rc_ctl = I915_READ(GEN6_RC_CONTROL);
6956         rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6957                        RC_SW_TARGET_STATE_SHIFT;
6958         DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6959                          "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6960                          onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6961                          onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6962                          rc_sw_target);
6963
6964         if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
6965                 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6966                 enable_rc6 = false;
6967         }
6968
6969         /*
6970          * The exact context size is not known for BXT, so assume a page size
6971          * for this check.
6972          */
6973         rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
6974         if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6975               (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
6976                 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6977                 enable_rc6 = false;
6978         }
6979
6980         if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6981               ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6982               ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6983               ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
6984                 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
6985                 enable_rc6 = false;
6986         }
6987
6988         if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6989             !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6990             !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6991                 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6992                 enable_rc6 = false;
6993         }
6994
6995         if (!I915_READ(GEN6_GFXPAUSE)) {
6996                 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6997                 enable_rc6 = false;
6998         }
6999
7000         if (!I915_READ(GEN8_MISC_CTRL0)) {
7001                 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
7002                 enable_rc6 = false;
7003         }
7004
7005         return enable_rc6;
7006 }
7007
7008 static bool sanitize_rc6(struct drm_i915_private *i915)
7009 {
7010         struct intel_device_info *info = mkwrite_device_info(i915);
7011
7012         /* Powersaving is controlled by the host when inside a VM */
7013         if (intel_vgpu_active(i915))
7014                 info->has_rc6 = 0;
7015
7016         if (info->has_rc6 &&
7017             IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
7018                 DRM_INFO("RC6 disabled by BIOS\n");
7019                 info->has_rc6 = 0;
7020         }
7021
7022         /*
7023          * We assume that we do not have any deep rc6 levels if we don't have
7024          * have the previous rc6 level supported, i.e. we use HAS_RC6()
7025          * as the initial coarse check for rc6 in general, moving on to
7026          * progressively finer/deeper levels.
7027          */
7028         if (!info->has_rc6 && info->has_rc6p)
7029                 info->has_rc6p = 0;
7030
7031         return info->has_rc6;
7032 }
7033
7034 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
7035 {
7036         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7037
7038         /* All of these values are in units of 50MHz */
7039
7040         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
7041         if (IS_GEN9_LP(dev_priv)) {
7042                 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
7043                 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
7044                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
7045                 rps->min_freq = (rp_state_cap >>  0) & 0xff;
7046         } else {
7047                 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
7048                 rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
7049                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
7050                 rps->min_freq = (rp_state_cap >> 16) & 0xff;
7051         }
7052         /* hw_max = RP0 until we check for overclocking */
7053         rps->max_freq = rps->rp0_freq;
7054
7055         rps->efficient_freq = rps->rp1_freq;
7056         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
7057             IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7058                 u32 ddcc_status = 0;
7059
7060                 if (sandybridge_pcode_read(dev_priv,
7061                                            HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
7062                                            &ddcc_status) == 0)
7063                         rps->efficient_freq =
7064                                 clamp_t(u8,
7065                                         ((ddcc_status >> 8) & 0xff),
7066                                         rps->min_freq,
7067                                         rps->max_freq);
7068         }
7069
7070         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7071                 /* Store the frequency values in 16.66 MHZ units, which is
7072                  * the natural hardware unit for SKL
7073                  */
7074                 rps->rp0_freq *= GEN9_FREQ_SCALER;
7075                 rps->rp1_freq *= GEN9_FREQ_SCALER;
7076                 rps->min_freq *= GEN9_FREQ_SCALER;
7077                 rps->max_freq *= GEN9_FREQ_SCALER;
7078                 rps->efficient_freq *= GEN9_FREQ_SCALER;
7079         }
7080 }
7081
7082 static void reset_rps(struct drm_i915_private *dev_priv,
7083                       int (*set)(struct drm_i915_private *, u8))
7084 {
7085         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7086         u8 freq = rps->cur_freq;
7087
7088         /* force a reset */
7089         rps->power.mode = -1;
7090         rps->cur_freq = -1;
7091
7092         if (set(dev_priv, freq))
7093                 DRM_ERROR("Failed to reset RPS to initial values\n");
7094 }
7095
7096 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
7097 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
7098 {
7099         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7100
7101         /* Program defaults and thresholds for RPS */
7102         if (IS_GEN(dev_priv, 9))
7103                 I915_WRITE(GEN6_RC_VIDEO_FREQ,
7104                         GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
7105
7106         /* 1 second timeout*/
7107         I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
7108                 GT_INTERVAL_FROM_US(dev_priv, 1000000));
7109
7110         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
7111
7112         /* Leaning on the below call to gen6_set_rps to program/setup the
7113          * Up/Down EI & threshold registers, as well as the RP_CONTROL,
7114          * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
7115         reset_rps(dev_priv, gen6_set_rps);
7116
7117         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7118 }
7119
7120 static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
7121 {
7122         struct intel_engine_cs *engine;
7123         enum intel_engine_id id;
7124         u32 rc6_mode;
7125
7126         /* 1a: Software RC state - RC0 */
7127         I915_WRITE(GEN6_RC_STATE, 0);
7128
7129         /* 1b: Get forcewake during program sequence. Although the driver
7130          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7131         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7132
7133         /* 2a: Disable RC states. */
7134         I915_WRITE(GEN6_RC_CONTROL, 0);
7135
7136         /* 2b: Program RC6 thresholds.*/
7137         if (INTEL_GEN(dev_priv) >= 10) {
7138                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
7139                 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
7140         } else if (IS_SKYLAKE(dev_priv)) {
7141                 /*
7142                  * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
7143                  * when CPG is enabled
7144                  */
7145                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
7146         } else {
7147                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
7148         }
7149
7150         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7151         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7152         for_each_engine(engine, dev_priv, id)
7153                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7154
7155         if (HAS_GUC(dev_priv))
7156                 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
7157
7158         I915_WRITE(GEN6_RC_SLEEP, 0);
7159
7160         /*
7161          * 2c: Program Coarse Power Gating Policies.
7162          *
7163          * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
7164          * use instead is a more conservative estimate for the maximum time
7165          * it takes us to service a CS interrupt and submit a new ELSP - that
7166          * is the time which the GPU is idle waiting for the CPU to select the
7167          * next request to execute. If the idle hysteresis is less than that
7168          * interrupt service latency, the hardware will automatically gate
7169          * the power well and we will then incur the wake up cost on top of
7170          * the service latency. A similar guide from intel_pstate is that we
7171          * do not want the enable hysteresis to less than the wakeup latency.
7172          *
7173          * igt/gem_exec_nop/sequential provides a rough estimate for the
7174          * service latency, and puts it around 10us for Broadwell (and other
7175          * big core) and around 40us for Broxton (and other low power cores).
7176          * [Note that for legacy ringbuffer submission, this is less than 1us!]
7177          * However, the wakeup latency on Broxton is closer to 100us. To be
7178          * conservative, we have to factor in a context switch on top (due
7179          * to ksoftirqd).
7180          */
7181         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
7182         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
7183
7184         /* 3a: Enable RC6 */
7185         I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
7186
7187         /* WaRsUseTimeoutMode:cnl (pre-prod) */
7188         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
7189                 rc6_mode = GEN7_RC_CTL_TO_MODE;
7190         else
7191                 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
7192
7193         I915_WRITE(GEN6_RC_CONTROL,
7194                    GEN6_RC_CTL_HW_ENABLE |
7195                    GEN6_RC_CTL_RC6_ENABLE |
7196                    rc6_mode);
7197
7198         /*
7199          * 3b: Enable Coarse Power Gating only when RC6 is enabled.
7200          * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
7201          */
7202         if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
7203                 I915_WRITE(GEN9_PG_ENABLE, 0);
7204         else
7205                 I915_WRITE(GEN9_PG_ENABLE,
7206                            GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
7207
7208         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7209 }
7210
7211 static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
7212 {
7213         struct intel_engine_cs *engine;
7214         enum intel_engine_id id;
7215
7216         /* 1a: Software RC state - RC0 */
7217         I915_WRITE(GEN6_RC_STATE, 0);
7218
7219         /* 1b: Get forcewake during program sequence. Although the driver
7220          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7221         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7222
7223         /* 2a: Disable RC states. */
7224         I915_WRITE(GEN6_RC_CONTROL, 0);
7225
7226         /* 2b: Program RC6 thresholds.*/
7227         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7228         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7229         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7230         for_each_engine(engine, dev_priv, id)
7231                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7232         I915_WRITE(GEN6_RC_SLEEP, 0);
7233         I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
7234
7235         /* 3: Enable RC6 */
7236
7237         I915_WRITE(GEN6_RC_CONTROL,
7238                    GEN6_RC_CTL_HW_ENABLE |
7239                    GEN7_RC_CTL_TO_MODE |
7240                    GEN6_RC_CTL_RC6_ENABLE);
7241
7242         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7243 }
7244
7245 static void gen8_enable_rps(struct drm_i915_private *dev_priv)
7246 {
7247         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7248
7249         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7250
7251         /* 1 Program defaults and thresholds for RPS*/
7252         I915_WRITE(GEN6_RPNSWREQ,
7253                    HSW_FREQUENCY(rps->rp1_freq));
7254         I915_WRITE(GEN6_RC_VIDEO_FREQ,
7255                    HSW_FREQUENCY(rps->rp1_freq));
7256         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7257         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
7258
7259         /* Docs recommend 900MHz, and 300 MHz respectively */
7260         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
7261                    rps->max_freq_softlimit << 24 |
7262                    rps->min_freq_softlimit << 16);
7263
7264         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
7265         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7266         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
7267         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
7268
7269         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7270
7271         /* 2: Enable RPS */
7272         I915_WRITE(GEN6_RP_CONTROL,
7273                    GEN6_RP_MEDIA_TURBO |
7274                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7275                    GEN6_RP_MEDIA_IS_GFX |
7276                    GEN6_RP_ENABLE |
7277                    GEN6_RP_UP_BUSY_AVG |
7278                    GEN6_RP_DOWN_IDLE_AVG);
7279
7280         reset_rps(dev_priv, gen6_set_rps);
7281
7282         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7283 }
7284
7285 static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
7286 {
7287         struct intel_engine_cs *engine;
7288         enum intel_engine_id id;
7289         u32 rc6vids, rc6_mask;
7290         u32 gtfifodbg;
7291         int ret;
7292
7293         I915_WRITE(GEN6_RC_STATE, 0);
7294
7295         /* Clear the DBG now so we don't confuse earlier errors */
7296         gtfifodbg = I915_READ(GTFIFODBG);
7297         if (gtfifodbg) {
7298                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
7299                 I915_WRITE(GTFIFODBG, gtfifodbg);
7300         }
7301
7302         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7303
7304         /* disable the counters and set deterministic thresholds */
7305         I915_WRITE(GEN6_RC_CONTROL, 0);
7306
7307         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
7308         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
7309         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
7310         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7311         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7312
7313         for_each_engine(engine, dev_priv, id)
7314                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7315
7316         I915_WRITE(GEN6_RC_SLEEP, 0);
7317         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
7318         if (IS_IVYBRIDGE(dev_priv))
7319                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
7320         else
7321                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
7322         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
7323         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7324
7325         /* We don't use those on Haswell */
7326         rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
7327         if (HAS_RC6p(dev_priv))
7328                 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
7329         if (HAS_RC6pp(dev_priv))
7330                 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
7331         I915_WRITE(GEN6_RC_CONTROL,
7332                    rc6_mask |
7333                    GEN6_RC_CTL_EI_MODE(1) |
7334                    GEN6_RC_CTL_HW_ENABLE);
7335
7336         rc6vids = 0;
7337         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
7338         if (IS_GEN(dev_priv, 6) && ret) {
7339                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
7340         } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
7341                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7342                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
7343                 rc6vids &= 0xffff00;
7344                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
7345                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
7346                 if (ret)
7347                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7348         }
7349
7350         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7351 }
7352
7353 static void gen6_enable_rps(struct drm_i915_private *dev_priv)
7354 {
7355         /* Here begins a magic sequence of register writes to enable
7356          * auto-downclocking.
7357          *
7358          * Perhaps there might be some value in exposing these to
7359          * userspace...
7360          */
7361         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7362
7363         /* Power down if completely idle for over 50ms */
7364         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
7365         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7366
7367         reset_rps(dev_priv, gen6_set_rps);
7368
7369         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7370 }
7371
7372 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
7373 {
7374         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7375         const int min_freq = 15;
7376         const int scaling_factor = 180;
7377         unsigned int gpu_freq;
7378         unsigned int max_ia_freq, min_ring_freq;
7379         unsigned int max_gpu_freq, min_gpu_freq;
7380         struct cpufreq_policy *policy;
7381
7382         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
7383
7384         if (rps->max_freq <= rps->min_freq)
7385                 return;
7386
7387         policy = cpufreq_cpu_get(0);
7388         if (policy) {
7389                 max_ia_freq = policy->cpuinfo.max_freq;
7390                 cpufreq_cpu_put(policy);
7391         } else {
7392                 /*
7393                  * Default to measured freq if none found, PCU will ensure we
7394                  * don't go over
7395                  */
7396                 max_ia_freq = tsc_khz;
7397         }
7398
7399         /* Convert from kHz to MHz */
7400         max_ia_freq /= 1000;
7401
7402         min_ring_freq = I915_READ(DCLK) & 0xf;
7403         /* convert DDR frequency from units of 266.6MHz to bandwidth */
7404         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
7405
7406         min_gpu_freq = rps->min_freq;
7407         max_gpu_freq = rps->max_freq;
7408         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7409                 /* Convert GT frequency to 50 HZ units */
7410                 min_gpu_freq /= GEN9_FREQ_SCALER;
7411                 max_gpu_freq /= GEN9_FREQ_SCALER;
7412         }
7413
7414         /*
7415          * For each potential GPU frequency, load a ring frequency we'd like
7416          * to use for memory access.  We do this by specifying the IA frequency
7417          * the PCU should use as a reference to determine the ring frequency.
7418          */
7419         for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
7420                 const int diff = max_gpu_freq - gpu_freq;
7421                 unsigned int ia_freq = 0, ring_freq = 0;
7422
7423                 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7424                         /*
7425                          * ring_freq = 2 * GT. ring_freq is in 100MHz units
7426                          * No floor required for ring frequency on SKL.
7427                          */
7428                         ring_freq = gpu_freq;
7429                 } else if (INTEL_GEN(dev_priv) >= 8) {
7430                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
7431                         ring_freq = max(min_ring_freq, gpu_freq);
7432                 } else if (IS_HASWELL(dev_priv)) {
7433                         ring_freq = mult_frac(gpu_freq, 5, 4);
7434                         ring_freq = max(min_ring_freq, ring_freq);
7435                         /* leave ia_freq as the default, chosen by cpufreq */
7436                 } else {
7437                         /* On older processors, there is no separate ring
7438                          * clock domain, so in order to boost the bandwidth
7439                          * of the ring, we need to upclock the CPU (ia_freq).
7440                          *
7441                          * For GPU frequencies less than 750MHz,
7442                          * just use the lowest ring freq.
7443                          */
7444                         if (gpu_freq < min_freq)
7445                                 ia_freq = 800;
7446                         else
7447                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
7448                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
7449                 }
7450
7451                 sandybridge_pcode_write(dev_priv,
7452                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
7453                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
7454                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
7455                                         gpu_freq);
7456         }
7457 }
7458
7459 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
7460 {
7461         u32 val, rp0;
7462
7463         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7464
7465         switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) {
7466         case 8:
7467                 /* (2 * 4) config */
7468                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
7469                 break;
7470         case 12:
7471                 /* (2 * 6) config */
7472                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
7473                 break;
7474         case 16:
7475                 /* (2 * 8) config */
7476         default:
7477                 /* Setting (2 * 8) Min RP0 for any other combination */
7478                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
7479                 break;
7480         }
7481
7482         rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
7483
7484         return rp0;
7485 }
7486
7487 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7488 {
7489         u32 val, rpe;
7490
7491         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7492         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7493
7494         return rpe;
7495 }
7496
7497 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7498 {
7499         u32 val, rp1;
7500
7501         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7502         rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7503
7504         return rp1;
7505 }
7506
7507 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7508 {
7509         u32 val, rpn;
7510
7511         val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7512         rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7513                        FB_GFX_FREQ_FUSE_MASK);
7514
7515         return rpn;
7516 }
7517
7518 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7519 {
7520         u32 val, rp1;
7521
7522         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7523
7524         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7525
7526         return rp1;
7527 }
7528
7529 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
7530 {
7531         u32 val, rp0;
7532
7533         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7534
7535         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7536         /* Clamp to max */
7537         rp0 = min_t(u32, rp0, 0xea);
7538
7539         return rp0;
7540 }
7541
7542 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7543 {
7544         u32 val, rpe;
7545
7546         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
7547         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
7548         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
7549         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7550
7551         return rpe;
7552 }
7553
7554 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
7555 {
7556         u32 val;
7557
7558         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7559         /*
7560          * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7561          * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7562          * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7563          * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7564          * to make sure it matches what Punit accepts.
7565          */
7566         return max_t(u32, val, 0xc0);
7567 }
7568
7569 /* Check that the pctx buffer wasn't move under us. */
7570 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7571 {
7572         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7573
7574         WARN_ON(pctx_addr != dev_priv->dsm.start +
7575                              dev_priv->vlv_pctx->stolen->start);
7576 }
7577
7578
7579 /* Check that the pcbr address is not empty. */
7580 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7581 {
7582         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7583
7584         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7585 }
7586
7587 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
7588 {
7589         resource_size_t pctx_paddr, paddr;
7590         resource_size_t pctx_size = 32*1024;
7591         u32 pcbr;
7592
7593         pcbr = I915_READ(VLV_PCBR);
7594         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
7595                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7596                 paddr = dev_priv->dsm.end + 1 - pctx_size;
7597                 GEM_BUG_ON(paddr > U32_MAX);
7598
7599                 pctx_paddr = (paddr & (~4095));
7600                 I915_WRITE(VLV_PCBR, pctx_paddr);
7601         }
7602
7603         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7604 }
7605
7606 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
7607 {
7608         struct drm_i915_gem_object *pctx;
7609         resource_size_t pctx_paddr;
7610         resource_size_t pctx_size = 24*1024;
7611         u32 pcbr;
7612
7613         pcbr = I915_READ(VLV_PCBR);
7614         if (pcbr) {
7615                 /* BIOS set it up already, grab the pre-alloc'd space */
7616                 resource_size_t pcbr_offset;
7617
7618                 pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
7619                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
7620                                                                       pcbr_offset,
7621                                                                       I915_GTT_OFFSET_NONE,
7622                                                                       pctx_size);
7623                 goto out;
7624         }
7625
7626         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7627
7628         /*
7629          * From the Gunit register HAS:
7630          * The Gfx driver is expected to program this register and ensure
7631          * proper allocation within Gfx stolen memory.  For example, this
7632          * register should be programmed such than the PCBR range does not
7633          * overlap with other ranges, such as the frame buffer, protected
7634          * memory, or any other relevant ranges.
7635          */
7636         pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
7637         if (!pctx) {
7638                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7639                 goto out;
7640         }
7641
7642         GEM_BUG_ON(range_overflows_t(u64,
7643                                      dev_priv->dsm.start,
7644                                      pctx->stolen->start,
7645                                      U32_MAX));
7646         pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
7647         I915_WRITE(VLV_PCBR, pctx_paddr);
7648
7649 out:
7650         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7651         dev_priv->vlv_pctx = pctx;
7652 }
7653
7654 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
7655 {
7656         struct drm_i915_gem_object *pctx;
7657
7658         pctx = fetch_and_zero(&dev_priv->vlv_pctx);
7659         if (pctx)
7660                 i915_gem_object_put(pctx);
7661 }
7662
7663 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7664 {
7665         dev_priv->gt_pm.rps.gpll_ref_freq =
7666                 vlv_get_cck_clock(dev_priv, "GPLL ref",
7667                                   CCK_GPLL_CLOCK_CONTROL,
7668                                   dev_priv->czclk_freq);
7669
7670         DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7671                          dev_priv->gt_pm.rps.gpll_ref_freq);
7672 }
7673
7674 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
7675 {
7676         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7677         u32 val;
7678
7679         valleyview_setup_pctx(dev_priv);
7680
7681         vlv_init_gpll_ref_freq(dev_priv);
7682
7683         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7684         switch ((val >> 6) & 3) {
7685         case 0:
7686         case 1:
7687                 dev_priv->mem_freq = 800;
7688                 break;
7689         case 2:
7690                 dev_priv->mem_freq = 1066;
7691                 break;
7692         case 3:
7693                 dev_priv->mem_freq = 1333;
7694                 break;
7695         }
7696         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7697
7698         rps->max_freq = valleyview_rps_max_freq(dev_priv);
7699         rps->rp0_freq = rps->max_freq;
7700         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7701                          intel_gpu_freq(dev_priv, rps->max_freq),
7702                          rps->max_freq);
7703
7704         rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
7705         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7706                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7707                          rps->efficient_freq);
7708
7709         rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
7710         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7711                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7712                          rps->rp1_freq);
7713
7714         rps->min_freq = valleyview_rps_min_freq(dev_priv);
7715         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7716                          intel_gpu_freq(dev_priv, rps->min_freq),
7717                          rps->min_freq);
7718 }
7719
7720 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
7721 {
7722         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7723         u32 val;
7724
7725         cherryview_setup_pctx(dev_priv);
7726
7727         vlv_init_gpll_ref_freq(dev_priv);
7728
7729         mutex_lock(&dev_priv->sb_lock);
7730         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
7731         mutex_unlock(&dev_priv->sb_lock);
7732
7733         switch ((val >> 2) & 0x7) {
7734         case 3:
7735                 dev_priv->mem_freq = 2000;
7736                 break;
7737         default:
7738                 dev_priv->mem_freq = 1600;
7739                 break;
7740         }
7741         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7742
7743         rps->max_freq = cherryview_rps_max_freq(dev_priv);
7744         rps->rp0_freq = rps->max_freq;
7745         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7746                          intel_gpu_freq(dev_priv, rps->max_freq),
7747                          rps->max_freq);
7748
7749         rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
7750         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7751                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7752                          rps->efficient_freq);
7753
7754         rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7755         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7756                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7757                          rps->rp1_freq);
7758
7759         rps->min_freq = cherryview_rps_min_freq(dev_priv);
7760         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7761                          intel_gpu_freq(dev_priv, rps->min_freq),
7762                          rps->min_freq);
7763
7764         WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7765                    rps->min_freq) & 1,
7766                   "Odd GPU freq values\n");
7767 }
7768
7769 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7770 {
7771         valleyview_cleanup_pctx(dev_priv);
7772 }
7773
7774 static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
7775 {
7776         struct intel_engine_cs *engine;
7777         enum intel_engine_id id;
7778         u32 gtfifodbg, rc6_mode, pcbr;
7779
7780         gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7781                                              GT_FIFO_FREE_ENTRIES_CHV);
7782         if (gtfifodbg) {
7783                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7784                                  gtfifodbg);
7785                 I915_WRITE(GTFIFODBG, gtfifodbg);
7786         }
7787
7788         cherryview_check_pctx(dev_priv);
7789
7790         /* 1a & 1b: Get forcewake during program sequence. Although the driver
7791          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7792         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7793
7794         /*  Disable RC states. */
7795         I915_WRITE(GEN6_RC_CONTROL, 0);
7796
7797         /* 2a: Program RC6 thresholds.*/
7798         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7799         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7800         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7801
7802         for_each_engine(engine, dev_priv, id)
7803                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7804         I915_WRITE(GEN6_RC_SLEEP, 0);
7805
7806         /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7807         I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
7808
7809         /* Allows RC6 residency counter to work */
7810         I915_WRITE(VLV_COUNTER_CONTROL,
7811                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7812                                       VLV_MEDIA_RC6_COUNT_EN |
7813                                       VLV_RENDER_RC6_COUNT_EN));
7814
7815         /* For now we assume BIOS is allocating and populating the PCBR  */
7816         pcbr = I915_READ(VLV_PCBR);
7817
7818         /* 3: Enable RC6 */
7819         rc6_mode = 0;
7820         if (pcbr >> VLV_PCBR_ADDR_SHIFT)
7821                 rc6_mode = GEN7_RC_CTL_TO_MODE;
7822         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7823
7824         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7825 }
7826
7827 static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7828 {
7829         u32 val;
7830
7831         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7832
7833         /* 1: Program defaults and thresholds for RPS*/
7834         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7835         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7836         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7837         I915_WRITE(GEN6_RP_UP_EI, 66000);
7838         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7839
7840         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7841
7842         /* 2: Enable RPS */
7843         I915_WRITE(GEN6_RP_CONTROL,
7844                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7845                    GEN6_RP_MEDIA_IS_GFX |
7846                    GEN6_RP_ENABLE |
7847                    GEN6_RP_UP_BUSY_AVG |
7848                    GEN6_RP_DOWN_IDLE_AVG);
7849
7850         /* Setting Fixed Bias */
7851         val = VLV_OVERRIDE_EN |
7852                   VLV_SOC_TDP_EN |
7853                   CHV_BIAS_CPU_50_SOC_50;
7854         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7855
7856         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7857
7858         /* RPS code assumes GPLL is used */
7859         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7860
7861         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7862         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7863
7864         reset_rps(dev_priv, valleyview_set_rps);
7865
7866         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7867 }
7868
7869 static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
7870 {
7871         struct intel_engine_cs *engine;
7872         enum intel_engine_id id;
7873         u32 gtfifodbg;
7874
7875         valleyview_check_pctx(dev_priv);
7876
7877         gtfifodbg = I915_READ(GTFIFODBG);
7878         if (gtfifodbg) {
7879                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7880                                  gtfifodbg);
7881                 I915_WRITE(GTFIFODBG, gtfifodbg);
7882         }
7883
7884         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7885
7886         /*  Disable RC states. */
7887         I915_WRITE(GEN6_RC_CONTROL, 0);
7888
7889         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7890         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7891         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7892
7893         for_each_engine(engine, dev_priv, id)
7894                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7895
7896         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
7897
7898         /* Allows RC6 residency counter to work */
7899         I915_WRITE(VLV_COUNTER_CONTROL,
7900                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7901                                       VLV_MEDIA_RC0_COUNT_EN |
7902                                       VLV_RENDER_RC0_COUNT_EN |
7903                                       VLV_MEDIA_RC6_COUNT_EN |
7904                                       VLV_RENDER_RC6_COUNT_EN));
7905
7906         I915_WRITE(GEN6_RC_CONTROL,
7907                    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
7908
7909         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7910 }
7911
7912 static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
7913 {
7914         u32 val;
7915
7916         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7917
7918         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7919         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7920         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7921         I915_WRITE(GEN6_RP_UP_EI, 66000);
7922         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7923
7924         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7925
7926         I915_WRITE(GEN6_RP_CONTROL,
7927                    GEN6_RP_MEDIA_TURBO |
7928                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7929                    GEN6_RP_MEDIA_IS_GFX |
7930                    GEN6_RP_ENABLE |
7931                    GEN6_RP_UP_BUSY_AVG |
7932                    GEN6_RP_DOWN_IDLE_CONT);
7933
7934         /* Setting Fixed Bias */
7935         val = VLV_OVERRIDE_EN |
7936                   VLV_SOC_TDP_EN |
7937                   VLV_BIAS_CPU_125_SOC_875;
7938         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7939
7940         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7941
7942         /* RPS code assumes GPLL is used */
7943         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7944
7945         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7946         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7947
7948         reset_rps(dev_priv, valleyview_set_rps);
7949
7950         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7951 }
7952
7953 static unsigned long intel_pxfreq(u32 vidfreq)
7954 {
7955         unsigned long freq;
7956         int div = (vidfreq & 0x3f0000) >> 16;
7957         int post = (vidfreq & 0x3000) >> 12;
7958         int pre = (vidfreq & 0x7);
7959
7960         if (!pre)
7961                 return 0;
7962
7963         freq = ((div * 133333) / ((1<<post) * pre));
7964
7965         return freq;
7966 }
7967
7968 static const struct cparams {
7969         u16 i;
7970         u16 t;
7971         u16 m;
7972         u16 c;
7973 } cparams[] = {
7974         { 1, 1333, 301, 28664 },
7975         { 1, 1066, 294, 24460 },
7976         { 1, 800, 294, 25192 },
7977         { 0, 1333, 276, 27605 },
7978         { 0, 1066, 276, 27605 },
7979         { 0, 800, 231, 23784 },
7980 };
7981
7982 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
7983 {
7984         u64 total_count, diff, ret;
7985         u32 count1, count2, count3, m = 0, c = 0;
7986         unsigned long now = jiffies_to_msecs(jiffies), diff1;
7987         int i;
7988
7989         lockdep_assert_held(&mchdev_lock);
7990
7991         diff1 = now - dev_priv->ips.last_time1;
7992
7993         /* Prevent division-by-zero if we are asking too fast.
7994          * Also, we don't get interesting results if we are polling
7995          * faster than once in 10ms, so just return the saved value
7996          * in such cases.
7997          */
7998         if (diff1 <= 10)
7999                 return dev_priv->ips.chipset_power;
8000
8001         count1 = I915_READ(DMIEC);
8002         count2 = I915_READ(DDREC);
8003         count3 = I915_READ(CSIEC);
8004
8005         total_count = count1 + count2 + count3;
8006
8007         /* FIXME: handle per-counter overflow */
8008         if (total_count < dev_priv->ips.last_count1) {
8009                 diff = ~0UL - dev_priv->ips.last_count1;
8010                 diff += total_count;
8011         } else {
8012                 diff = total_count - dev_priv->ips.last_count1;
8013         }
8014
8015         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
8016                 if (cparams[i].i == dev_priv->ips.c_m &&
8017                     cparams[i].t == dev_priv->ips.r_t) {
8018                         m = cparams[i].m;
8019                         c = cparams[i].c;
8020                         break;
8021                 }
8022         }
8023
8024         diff = div_u64(diff, diff1);
8025         ret = ((m * diff) + c);
8026         ret = div_u64(ret, 10);
8027
8028         dev_priv->ips.last_count1 = total_count;
8029         dev_priv->ips.last_time1 = now;
8030
8031         dev_priv->ips.chipset_power = ret;
8032
8033         return ret;
8034 }
8035
8036 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
8037 {
8038         intel_wakeref_t wakeref;
8039         unsigned long val = 0;
8040
8041         if (!IS_GEN(dev_priv, 5))
8042                 return 0;
8043
8044         with_intel_runtime_pm(dev_priv, wakeref) {
8045                 spin_lock_irq(&mchdev_lock);
8046                 val = __i915_chipset_val(dev_priv);
8047                 spin_unlock_irq(&mchdev_lock);
8048         }
8049
8050         return val;
8051 }
8052
8053 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
8054 {
8055         unsigned long m, x, b;
8056         u32 tsfs;
8057
8058         tsfs = I915_READ(TSFS);
8059
8060         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
8061         x = I915_READ8(TR1);
8062
8063         b = tsfs & TSFS_INTR_MASK;
8064
8065         return ((m * x) / 127) - b;
8066 }
8067
8068 static int _pxvid_to_vd(u8 pxvid)
8069 {
8070         if (pxvid == 0)
8071                 return 0;
8072
8073         if (pxvid >= 8 && pxvid < 31)
8074                 pxvid = 31;
8075
8076         return (pxvid + 2) * 125;
8077 }
8078
8079 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
8080 {
8081         const int vd = _pxvid_to_vd(pxvid);
8082         const int vm = vd - 1125;
8083
8084         if (INTEL_INFO(dev_priv)->is_mobile)
8085                 return vm > 0 ? vm : 0;
8086
8087         return vd;
8088 }
8089
8090 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
8091 {
8092         u64 now, diff, diffms;
8093         u32 count;
8094
8095         lockdep_assert_held(&mchdev_lock);
8096
8097         now = ktime_get_raw_ns();
8098         diffms = now - dev_priv->ips.last_time2;
8099         do_div(diffms, NSEC_PER_MSEC);
8100
8101         /* Don't divide by 0 */
8102         if (!diffms)
8103                 return;
8104
8105         count = I915_READ(GFXEC);
8106
8107         if (count < dev_priv->ips.last_count2) {
8108                 diff = ~0UL - dev_priv->ips.last_count2;
8109                 diff += count;
8110         } else {
8111                 diff = count - dev_priv->ips.last_count2;
8112         }
8113
8114         dev_priv->ips.last_count2 = count;
8115         dev_priv->ips.last_time2 = now;
8116
8117         /* More magic constants... */
8118         diff = diff * 1181;
8119         diff = div_u64(diff, diffms * 10);
8120         dev_priv->ips.gfx_power = diff;
8121 }
8122
8123 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
8124 {
8125         intel_wakeref_t wakeref;
8126
8127         if (!IS_GEN(dev_priv, 5))
8128                 return;
8129
8130         with_intel_runtime_pm(dev_priv, wakeref) {
8131                 spin_lock_irq(&mchdev_lock);
8132                 __i915_update_gfx_val(dev_priv);
8133                 spin_unlock_irq(&mchdev_lock);
8134         }
8135 }
8136
8137 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
8138 {
8139         unsigned long t, corr, state1, corr2, state2;
8140         u32 pxvid, ext_v;
8141
8142         lockdep_assert_held(&mchdev_lock);
8143
8144         pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
8145         pxvid = (pxvid >> 24) & 0x7f;
8146         ext_v = pvid_to_extvid(dev_priv, pxvid);
8147
8148         state1 = ext_v;
8149
8150         t = i915_mch_val(dev_priv);
8151
8152         /* Revel in the empirically derived constants */
8153
8154         /* Correction factor in 1/100000 units */
8155         if (t > 80)
8156                 corr = ((t * 2349) + 135940);
8157         else if (t >= 50)
8158                 corr = ((t * 964) + 29317);
8159         else /* < 50 */
8160                 corr = ((t * 301) + 1004);
8161
8162         corr = corr * ((150142 * state1) / 10000 - 78642);
8163         corr /= 100000;
8164         corr2 = (corr * dev_priv->ips.corr);
8165
8166         state2 = (corr2 * state1) / 10000;
8167         state2 /= 100; /* convert to mW */
8168
8169         __i915_update_gfx_val(dev_priv);
8170
8171         return dev_priv->ips.gfx_power + state2;
8172 }
8173
8174 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
8175 {
8176         intel_wakeref_t wakeref;
8177         unsigned long val = 0;
8178
8179         if (!IS_GEN(dev_priv, 5))
8180                 return 0;
8181
8182         with_intel_runtime_pm(dev_priv, wakeref) {
8183                 spin_lock_irq(&mchdev_lock);
8184                 val = __i915_gfx_val(dev_priv);
8185                 spin_unlock_irq(&mchdev_lock);
8186         }
8187
8188         return val;
8189 }
8190
8191 static struct drm_i915_private *i915_mch_dev;
8192
8193 static struct drm_i915_private *mchdev_get(void)
8194 {
8195         struct drm_i915_private *i915;
8196
8197         rcu_read_lock();
8198         i915 = i915_mch_dev;
8199         if (!kref_get_unless_zero(&i915->drm.ref))
8200                 i915 = NULL;
8201         rcu_read_unlock();
8202
8203         return i915;
8204 }
8205
8206 /**
8207  * i915_read_mch_val - return value for IPS use
8208  *
8209  * Calculate and return a value for the IPS driver to use when deciding whether
8210  * we have thermal and power headroom to increase CPU or GPU power budget.
8211  */
8212 unsigned long i915_read_mch_val(void)
8213 {
8214         struct drm_i915_private *i915;
8215         unsigned long chipset_val = 0;
8216         unsigned long graphics_val = 0;
8217         intel_wakeref_t wakeref;
8218
8219         i915 = mchdev_get();
8220         if (!i915)
8221                 return 0;
8222
8223         with_intel_runtime_pm(i915, wakeref) {
8224                 spin_lock_irq(&mchdev_lock);
8225                 chipset_val = __i915_chipset_val(i915);
8226                 graphics_val = __i915_gfx_val(i915);
8227                 spin_unlock_irq(&mchdev_lock);
8228         }
8229
8230         drm_dev_put(&i915->drm);
8231         return chipset_val + graphics_val;
8232 }
8233 EXPORT_SYMBOL_GPL(i915_read_mch_val);
8234
8235 /**
8236  * i915_gpu_raise - raise GPU frequency limit
8237  *
8238  * Raise the limit; IPS indicates we have thermal headroom.
8239  */
8240 bool i915_gpu_raise(void)
8241 {
8242         struct drm_i915_private *i915;
8243
8244         i915 = mchdev_get();
8245         if (!i915)
8246                 return false;
8247
8248         spin_lock_irq(&mchdev_lock);
8249         if (i915->ips.max_delay > i915->ips.fmax)
8250                 i915->ips.max_delay--;
8251         spin_unlock_irq(&mchdev_lock);
8252
8253         drm_dev_put(&i915->drm);
8254         return true;
8255 }
8256 EXPORT_SYMBOL_GPL(i915_gpu_raise);
8257
8258 /**
8259  * i915_gpu_lower - lower GPU frequency limit
8260  *
8261  * IPS indicates we're close to a thermal limit, so throttle back the GPU
8262  * frequency maximum.
8263  */
8264 bool i915_gpu_lower(void)
8265 {
8266         struct drm_i915_private *i915;
8267
8268         i915 = mchdev_get();
8269         if (!i915)
8270                 return false;
8271
8272         spin_lock_irq(&mchdev_lock);
8273         if (i915->ips.max_delay < i915->ips.min_delay)
8274                 i915->ips.max_delay++;
8275         spin_unlock_irq(&mchdev_lock);
8276
8277         drm_dev_put(&i915->drm);
8278         return true;
8279 }
8280 EXPORT_SYMBOL_GPL(i915_gpu_lower);
8281
8282 /**
8283  * i915_gpu_busy - indicate GPU business to IPS
8284  *
8285  * Tell the IPS driver whether or not the GPU is busy.
8286  */
8287 bool i915_gpu_busy(void)
8288 {
8289         struct drm_i915_private *i915;
8290         bool ret;
8291
8292         i915 = mchdev_get();
8293         if (!i915)
8294                 return false;
8295
8296         ret = i915->gt.awake;
8297
8298         drm_dev_put(&i915->drm);
8299         return ret;
8300 }
8301 EXPORT_SYMBOL_GPL(i915_gpu_busy);
8302
8303 /**
8304  * i915_gpu_turbo_disable - disable graphics turbo
8305  *
8306  * Disable graphics turbo by resetting the max frequency and setting the
8307  * current frequency to the default.
8308  */
8309 bool i915_gpu_turbo_disable(void)
8310 {
8311         struct drm_i915_private *i915;
8312         bool ret;
8313
8314         i915 = mchdev_get();
8315         if (!i915)
8316                 return false;
8317
8318         spin_lock_irq(&mchdev_lock);
8319         i915->ips.max_delay = i915->ips.fstart;
8320         ret = ironlake_set_drps(i915, i915->ips.fstart);
8321         spin_unlock_irq(&mchdev_lock);
8322
8323         drm_dev_put(&i915->drm);
8324         return ret;
8325 }
8326 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
8327
8328 /**
8329  * Tells the intel_ips driver that the i915 driver is now loaded, if
8330  * IPS got loaded first.
8331  *
8332  * This awkward dance is so that neither module has to depend on the
8333  * other in order for IPS to do the appropriate communication of
8334  * GPU turbo limits to i915.
8335  */
8336 static void
8337 ips_ping_for_i915_load(void)
8338 {
8339         void (*link)(void);
8340
8341         link = symbol_get(ips_link_to_i915_driver);
8342         if (link) {
8343                 link();
8344                 symbol_put(ips_link_to_i915_driver);
8345         }
8346 }
8347
8348 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
8349 {
8350         /* We only register the i915 ips part with intel-ips once everything is
8351          * set up, to avoid intel-ips sneaking in and reading bogus values. */
8352         rcu_assign_pointer(i915_mch_dev, dev_priv);
8353
8354         ips_ping_for_i915_load();
8355 }
8356
8357 void intel_gpu_ips_teardown(void)
8358 {
8359         rcu_assign_pointer(i915_mch_dev, NULL);
8360 }
8361
8362 static void intel_init_emon(struct drm_i915_private *dev_priv)
8363 {
8364         u32 lcfuse;
8365         u8 pxw[16];
8366         int i;
8367
8368         /* Disable to program */
8369         I915_WRITE(ECR, 0);
8370         POSTING_READ(ECR);
8371
8372         /* Program energy weights for various events */
8373         I915_WRITE(SDEW, 0x15040d00);
8374         I915_WRITE(CSIEW0, 0x007f0000);
8375         I915_WRITE(CSIEW1, 0x1e220004);
8376         I915_WRITE(CSIEW2, 0x04000004);
8377
8378         for (i = 0; i < 5; i++)
8379                 I915_WRITE(PEW(i), 0);
8380         for (i = 0; i < 3; i++)
8381                 I915_WRITE(DEW(i), 0);
8382
8383         /* Program P-state weights to account for frequency power adjustment */
8384         for (i = 0; i < 16; i++) {
8385                 u32 pxvidfreq = I915_READ(PXVFREQ(i));
8386                 unsigned long freq = intel_pxfreq(pxvidfreq);
8387                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
8388                         PXVFREQ_PX_SHIFT;
8389                 unsigned long val;
8390
8391                 val = vid * vid;
8392                 val *= (freq / 1000);
8393                 val *= 255;
8394                 val /= (127*127*900);
8395                 if (val > 0xff)
8396                         DRM_ERROR("bad pxval: %ld\n", val);
8397                 pxw[i] = val;
8398         }
8399         /* Render standby states get 0 weight */
8400         pxw[14] = 0;
8401         pxw[15] = 0;
8402
8403         for (i = 0; i < 4; i++) {
8404                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
8405                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
8406                 I915_WRITE(PXW(i), val);
8407         }
8408
8409         /* Adjust magic regs to magic values (more experimental results) */
8410         I915_WRITE(OGW0, 0);
8411         I915_WRITE(OGW1, 0);
8412         I915_WRITE(EG0, 0x00007f00);
8413         I915_WRITE(EG1, 0x0000000e);
8414         I915_WRITE(EG2, 0x000e0000);
8415         I915_WRITE(EG3, 0x68000300);
8416         I915_WRITE(EG4, 0x42000000);
8417         I915_WRITE(EG5, 0x00140031);
8418         I915_WRITE(EG6, 0);
8419         I915_WRITE(EG7, 0);
8420
8421         for (i = 0; i < 8; i++)
8422                 I915_WRITE(PXWL(i), 0);
8423
8424         /* Enable PMON + select events */
8425         I915_WRITE(ECR, 0x80000019);
8426
8427         lcfuse = I915_READ(LCFUSE02);
8428
8429         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
8430 }
8431
8432 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
8433 {
8434         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8435
8436         /*
8437          * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8438          * requirement.
8439          */
8440         if (!sanitize_rc6(dev_priv)) {
8441                 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
8442                 pm_runtime_get(&dev_priv->drm.pdev->dev);
8443         }
8444
8445         mutex_lock(&dev_priv->pcu_lock);
8446
8447         /* Initialize RPS limits (for userspace) */
8448         if (IS_CHERRYVIEW(dev_priv))
8449                 cherryview_init_gt_powersave(dev_priv);
8450         else if (IS_VALLEYVIEW(dev_priv))
8451                 valleyview_init_gt_powersave(dev_priv);
8452         else if (INTEL_GEN(dev_priv) >= 6)
8453                 gen6_init_rps_frequencies(dev_priv);
8454
8455         /* Derive initial user preferences/limits from the hardware limits */
8456         rps->idle_freq = rps->min_freq;
8457         rps->cur_freq = rps->idle_freq;
8458
8459         rps->max_freq_softlimit = rps->max_freq;
8460         rps->min_freq_softlimit = rps->min_freq;
8461
8462         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
8463                 rps->min_freq_softlimit =
8464                         max_t(int,
8465                               rps->efficient_freq,
8466                               intel_freq_opcode(dev_priv, 450));
8467
8468         /* After setting max-softlimit, find the overclock max freq */
8469         if (IS_GEN(dev_priv, 6) ||
8470             IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8471                 u32 params = 0;
8472
8473                 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
8474                 if (params & BIT(31)) { /* OC supported */
8475                         DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
8476                                          (rps->max_freq & 0xff) * 50,
8477                                          (params & 0xff) * 50);
8478                         rps->max_freq = params & 0xff;
8479                 }
8480         }
8481
8482         /* Finally allow us to boost to max by default */
8483         rps->boost_freq = rps->max_freq;
8484
8485         mutex_unlock(&dev_priv->pcu_lock);
8486 }
8487
8488 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
8489 {
8490         if (IS_VALLEYVIEW(dev_priv))
8491                 valleyview_cleanup_gt_powersave(dev_priv);
8492
8493         if (!HAS_RC6(dev_priv))
8494                 pm_runtime_put(&dev_priv->drm.pdev->dev);
8495 }
8496
8497 /**
8498  * intel_suspend_gt_powersave - suspend PM work and helper threads
8499  * @dev_priv: i915 device
8500  *
8501  * We don't want to disable RC6 or other features here, we just want
8502  * to make sure any work we've queued has finished and won't bother
8503  * us while we're suspended.
8504  */
8505 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
8506 {
8507         if (INTEL_GEN(dev_priv) < 6)
8508                 return;
8509
8510         /* gen6_rps_idle() will be called later to disable interrupts */
8511 }
8512
8513 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8514 {
8515         dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8516         dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
8517         intel_disable_gt_powersave(dev_priv);
8518
8519         if (INTEL_GEN(dev_priv) >= 11)
8520                 gen11_reset_rps_interrupts(dev_priv);
8521         else if (INTEL_GEN(dev_priv) >= 6)
8522                 gen6_reset_rps_interrupts(dev_priv);
8523 }
8524
8525 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8526 {
8527         lockdep_assert_held(&i915->pcu_lock);
8528
8529         if (!i915->gt_pm.llc_pstate.enabled)
8530                 return;
8531
8532         /* Currently there is no HW configuration to be done to disable. */
8533
8534         i915->gt_pm.llc_pstate.enabled = false;
8535 }
8536
8537 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8538 {
8539         lockdep_assert_held(&dev_priv->pcu_lock);
8540
8541         if (!dev_priv->gt_pm.rc6.enabled)
8542                 return;
8543
8544         if (INTEL_GEN(dev_priv) >= 9)
8545                 gen9_disable_rc6(dev_priv);
8546         else if (IS_CHERRYVIEW(dev_priv))
8547                 cherryview_disable_rc6(dev_priv);
8548         else if (IS_VALLEYVIEW(dev_priv))
8549                 valleyview_disable_rc6(dev_priv);
8550         else if (INTEL_GEN(dev_priv) >= 6)
8551                 gen6_disable_rc6(dev_priv);
8552
8553         dev_priv->gt_pm.rc6.enabled = false;
8554 }
8555
8556 static void intel_disable_rps(struct drm_i915_private *dev_priv)
8557 {
8558         lockdep_assert_held(&dev_priv->pcu_lock);
8559
8560         if (!dev_priv->gt_pm.rps.enabled)
8561                 return;
8562
8563         if (INTEL_GEN(dev_priv) >= 9)
8564                 gen9_disable_rps(dev_priv);
8565         else if (IS_CHERRYVIEW(dev_priv))
8566                 cherryview_disable_rps(dev_priv);
8567         else if (IS_VALLEYVIEW(dev_priv))
8568                 valleyview_disable_rps(dev_priv);
8569         else if (INTEL_GEN(dev_priv) >= 6)
8570                 gen6_disable_rps(dev_priv);
8571         else if (IS_IRONLAKE_M(dev_priv))
8572                 ironlake_disable_drps(dev_priv);
8573
8574         dev_priv->gt_pm.rps.enabled = false;
8575 }
8576
8577 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8578 {
8579         mutex_lock(&dev_priv->pcu_lock);
8580
8581         intel_disable_rc6(dev_priv);
8582         intel_disable_rps(dev_priv);
8583         if (HAS_LLC(dev_priv))
8584                 intel_disable_llc_pstate(dev_priv);
8585
8586         mutex_unlock(&dev_priv->pcu_lock);
8587 }
8588
8589 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8590 {
8591         lockdep_assert_held(&i915->pcu_lock);
8592
8593         if (i915->gt_pm.llc_pstate.enabled)
8594                 return;
8595
8596         gen6_update_ring_freq(i915);
8597
8598         i915->gt_pm.llc_pstate.enabled = true;
8599 }
8600
8601 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
8602 {
8603         lockdep_assert_held(&dev_priv->pcu_lock);
8604
8605         if (dev_priv->gt_pm.rc6.enabled)
8606                 return;
8607
8608         if (IS_CHERRYVIEW(dev_priv))
8609                 cherryview_enable_rc6(dev_priv);
8610         else if (IS_VALLEYVIEW(dev_priv))
8611                 valleyview_enable_rc6(dev_priv);
8612         else if (INTEL_GEN(dev_priv) >= 9)
8613                 gen9_enable_rc6(dev_priv);
8614         else if (IS_BROADWELL(dev_priv))
8615                 gen8_enable_rc6(dev_priv);
8616         else if (INTEL_GEN(dev_priv) >= 6)
8617                 gen6_enable_rc6(dev_priv);
8618
8619         dev_priv->gt_pm.rc6.enabled = true;
8620 }
8621
8622 static void intel_enable_rps(struct drm_i915_private *dev_priv)
8623 {
8624         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8625
8626         lockdep_assert_held(&dev_priv->pcu_lock);
8627
8628         if (rps->enabled)
8629                 return;
8630
8631         if (IS_CHERRYVIEW(dev_priv)) {
8632                 cherryview_enable_rps(dev_priv);
8633         } else if (IS_VALLEYVIEW(dev_priv)) {
8634                 valleyview_enable_rps(dev_priv);
8635         } else if (INTEL_GEN(dev_priv) >= 9) {
8636                 gen9_enable_rps(dev_priv);
8637         } else if (IS_BROADWELL(dev_priv)) {
8638                 gen8_enable_rps(dev_priv);
8639         } else if (INTEL_GEN(dev_priv) >= 6) {
8640                 gen6_enable_rps(dev_priv);
8641         } else if (IS_IRONLAKE_M(dev_priv)) {
8642                 ironlake_enable_drps(dev_priv);
8643                 intel_init_emon(dev_priv);
8644         }
8645
8646         WARN_ON(rps->max_freq < rps->min_freq);
8647         WARN_ON(rps->idle_freq > rps->max_freq);
8648
8649         WARN_ON(rps->efficient_freq < rps->min_freq);
8650         WARN_ON(rps->efficient_freq > rps->max_freq);
8651
8652         rps->enabled = true;
8653 }
8654
8655 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8656 {
8657         /* Powersaving is controlled by the host when inside a VM */
8658         if (intel_vgpu_active(dev_priv))
8659                 return;
8660
8661         mutex_lock(&dev_priv->pcu_lock);
8662
8663         if (HAS_RC6(dev_priv))
8664                 intel_enable_rc6(dev_priv);
8665         intel_enable_rps(dev_priv);
8666         if (HAS_LLC(dev_priv))
8667                 intel_enable_llc_pstate(dev_priv);
8668
8669         mutex_unlock(&dev_priv->pcu_lock);
8670 }
8671
8672 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
8673 {
8674         /*
8675          * On Ibex Peak and Cougar Point, we need to disable clock
8676          * gating for the panel power sequencer or it will fail to
8677          * start up when no ports are active.
8678          */
8679         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8680 }
8681
8682 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
8683 {
8684         enum pipe pipe;
8685
8686         for_each_pipe(dev_priv, pipe) {
8687                 I915_WRITE(DSPCNTR(pipe),
8688                            I915_READ(DSPCNTR(pipe)) |
8689                            DISPPLANE_TRICKLE_FEED_DISABLE);
8690
8691                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8692                 POSTING_READ(DSPSURF(pipe));
8693         }
8694 }
8695
8696 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
8697 {
8698         u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8699
8700         /*
8701          * Required for FBC
8702          * WaFbcDisableDpfcClockGating:ilk
8703          */
8704         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8705                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8706                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
8707
8708         I915_WRITE(PCH_3DCGDIS0,
8709                    MARIUNIT_CLOCK_GATE_DISABLE |
8710                    SVSMUNIT_CLOCK_GATE_DISABLE);
8711         I915_WRITE(PCH_3DCGDIS1,
8712                    VFMUNIT_CLOCK_GATE_DISABLE);
8713
8714         /*
8715          * According to the spec the following bits should be set in
8716          * order to enable memory self-refresh
8717          * The bit 22/21 of 0x42004
8718          * The bit 5 of 0x42020
8719          * The bit 15 of 0x45000
8720          */
8721         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8722                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
8723                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
8724         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
8725         I915_WRITE(DISP_ARB_CTL,
8726                    (I915_READ(DISP_ARB_CTL) |
8727                     DISP_FBC_WM_DIS));
8728
8729         /*
8730          * Based on the document from hardware guys the following bits
8731          * should be set unconditionally in order to enable FBC.
8732          * The bit 22 of 0x42000
8733          * The bit 22 of 0x42004
8734          * The bit 7,8,9 of 0x42020.
8735          */
8736         if (IS_IRONLAKE_M(dev_priv)) {
8737                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8738                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8739                            I915_READ(ILK_DISPLAY_CHICKEN1) |
8740                            ILK_FBCQ_DIS);
8741                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8742                            I915_READ(ILK_DISPLAY_CHICKEN2) |
8743                            ILK_DPARB_GATE);
8744         }
8745
8746         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8747
8748         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8749                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8750                    ILK_ELPIN_409_SELECT);
8751         I915_WRITE(_3D_CHICKEN2,
8752                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8753                    _3D_CHICKEN2_WM_READ_PIPELINED);
8754
8755         /* WaDisableRenderCachePipelinedFlush:ilk */
8756         I915_WRITE(CACHE_MODE_0,
8757                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8758
8759         /* WaDisable_RenderCache_OperationalFlush:ilk */
8760         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8761
8762         g4x_disable_trickle_feed(dev_priv);
8763
8764         ibx_init_clock_gating(dev_priv);
8765 }
8766
8767 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
8768 {
8769         int pipe;
8770         u32 val;
8771
8772         /*
8773          * On Ibex Peak and Cougar Point, we need to disable clock
8774          * gating for the panel power sequencer or it will fail to
8775          * start up when no ports are active.
8776          */
8777         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8778                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8779                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
8780         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8781                    DPLS_EDP_PPS_FIX_DIS);
8782         /* The below fixes the weird display corruption, a few pixels shifted
8783          * downward, on (only) LVDS of some HP laptops with IVY.
8784          */
8785         for_each_pipe(dev_priv, pipe) {
8786                 val = I915_READ(TRANS_CHICKEN2(pipe));
8787                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8788                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8789                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
8790                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8791                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8792                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8793                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
8794                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8795         }
8796         /* WADP0ClockGatingDisable */
8797         for_each_pipe(dev_priv, pipe) {
8798                 I915_WRITE(TRANS_CHICKEN1(pipe),
8799                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8800         }
8801 }
8802
8803 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
8804 {
8805         u32 tmp;
8806
8807         tmp = I915_READ(MCH_SSKPD);
8808         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8809                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8810                               tmp);
8811 }
8812
8813 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
8814 {
8815         u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8816
8817         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8818
8819         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8820                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8821                    ILK_ELPIN_409_SELECT);
8822
8823         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8824         I915_WRITE(_3D_CHICKEN,
8825                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8826
8827         /* WaDisable_RenderCache_OperationalFlush:snb */
8828         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8829
8830         /*
8831          * BSpec recoomends 8x4 when MSAA is used,
8832          * however in practice 16x4 seems fastest.
8833          *
8834          * Note that PS/WM thread counts depend on the WIZ hashing
8835          * disable bit, which we don't touch here, but it's good
8836          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8837          */
8838         I915_WRITE(GEN6_GT_MODE,
8839                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8840
8841         I915_WRITE(CACHE_MODE_0,
8842                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
8843
8844         I915_WRITE(GEN6_UCGCTL1,
8845                    I915_READ(GEN6_UCGCTL1) |
8846                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8847                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8848
8849         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8850          * gating disable must be set.  Failure to set it results in
8851          * flickering pixels due to Z write ordering failures after
8852          * some amount of runtime in the Mesa "fire" demo, and Unigine
8853          * Sanctuary and Tropics, and apparently anything else with
8854          * alpha test or pixel discard.
8855          *
8856          * According to the spec, bit 11 (RCCUNIT) must also be set,
8857          * but we didn't debug actual testcases to find it out.
8858          *
8859          * WaDisableRCCUnitClockGating:snb
8860          * WaDisableRCPBUnitClockGating:snb
8861          */
8862         I915_WRITE(GEN6_UCGCTL2,
8863                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8864                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8865
8866         /* WaStripsFansDisableFastClipPerformanceFix:snb */
8867         I915_WRITE(_3D_CHICKEN3,
8868                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
8869
8870         /*
8871          * Bspec says:
8872          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8873          * 3DSTATE_SF number of SF output attributes is more than 16."
8874          */
8875         I915_WRITE(_3D_CHICKEN3,
8876                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8877
8878         /*
8879          * According to the spec the following bits should be
8880          * set in order to enable memory self-refresh and fbc:
8881          * The bit21 and bit22 of 0x42000
8882          * The bit21 and bit22 of 0x42004
8883          * The bit5 and bit7 of 0x42020
8884          * The bit14 of 0x70180
8885          * The bit14 of 0x71180
8886          *
8887          * WaFbcAsynchFlipDisableFbcQueue:snb
8888          */
8889         I915_WRITE(ILK_DISPLAY_CHICKEN1,
8890                    I915_READ(ILK_DISPLAY_CHICKEN1) |
8891                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8892         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8893                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8894                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
8895         I915_WRITE(ILK_DSPCLK_GATE_D,
8896                    I915_READ(ILK_DSPCLK_GATE_D) |
8897                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
8898                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
8899
8900         g4x_disable_trickle_feed(dev_priv);
8901
8902         cpt_init_clock_gating(dev_priv);
8903
8904         gen6_check_mch_setup(dev_priv);
8905 }
8906
8907 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8908 {
8909         u32 reg = I915_READ(GEN7_FF_THREAD_MODE);
8910
8911         /*
8912          * WaVSThreadDispatchOverride:ivb,vlv
8913          *
8914          * This actually overrides the dispatch
8915          * mode for all thread types.
8916          */
8917         reg &= ~GEN7_FF_SCHED_MASK;
8918         reg |= GEN7_FF_TS_SCHED_HW;
8919         reg |= GEN7_FF_VS_SCHED_HW;
8920         reg |= GEN7_FF_DS_SCHED_HW;
8921
8922         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8923 }
8924
8925 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
8926 {
8927         /*
8928          * TODO: this bit should only be enabled when really needed, then
8929          * disabled when not needed anymore in order to save power.
8930          */
8931         if (HAS_PCH_LPT_LP(dev_priv))
8932                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
8933                            I915_READ(SOUTH_DSPCLK_GATE_D) |
8934                            PCH_LP_PARTITION_LEVEL_DISABLE);
8935
8936         /* WADPOClockGatingDisable:hsw */
8937         I915_WRITE(TRANS_CHICKEN1(PIPE_A),
8938                    I915_READ(TRANS_CHICKEN1(PIPE_A)) |
8939                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8940 }
8941
8942 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
8943 {
8944         if (HAS_PCH_LPT_LP(dev_priv)) {
8945                 u32 val = I915_READ(SOUTH_DSPCLK_GATE_D);
8946
8947                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
8948                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
8949         }
8950 }
8951
8952 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
8953                                    int general_prio_credits,
8954                                    int high_prio_credits)
8955 {
8956         u32 misccpctl;
8957         u32 val;
8958
8959         /* WaTempDisableDOPClkGating:bdw */
8960         misccpctl = I915_READ(GEN7_MISCCPCTL);
8961         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
8962
8963         val = I915_READ(GEN8_L3SQCREG1);
8964         val &= ~L3_PRIO_CREDITS_MASK;
8965         val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
8966         val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
8967         I915_WRITE(GEN8_L3SQCREG1, val);
8968
8969         /*
8970          * Wait at least 100 clocks before re-enabling clock gating.
8971          * See the definition of L3SQCREG1 in BSpec.
8972          */
8973         POSTING_READ(GEN8_L3SQCREG1);
8974         udelay(1);
8975         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
8976 }
8977
8978 static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
8979 {
8980         /* This is not an Wa. Enable to reduce Sampler power */
8981         I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
8982                    I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
8983
8984         /* WaEnable32PlaneMode:icl */
8985         I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
8986                    _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE));
8987 }
8988
8989 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
8990 {
8991         if (!HAS_PCH_CNP(dev_priv))
8992                 return;
8993
8994         /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
8995         I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
8996                    CNP_PWM_CGE_GATING_DISABLE);
8997 }
8998
8999 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
9000 {
9001         u32 val;
9002         cnp_init_clock_gating(dev_priv);
9003
9004         /* This is not an Wa. Enable for better image quality */
9005         I915_WRITE(_3D_CHICKEN3,
9006                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
9007
9008         /* WaEnableChickenDCPR:cnl */
9009         I915_WRITE(GEN8_CHICKEN_DCPR_1,
9010                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
9011
9012         /* WaFbcWakeMemOn:cnl */
9013         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
9014                    DISP_FBC_MEMORY_WAKE);
9015
9016         val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
9017         /* ReadHitWriteOnlyDisable:cnl */
9018         val |= RCCUNIT_CLKGATE_DIS;
9019         /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
9020         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
9021                 val |= SARBUNIT_CLKGATE_DIS;
9022         I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
9023
9024         /* Wa_2201832410:cnl */
9025         val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
9026         val |= GWUNIT_CLKGATE_DIS;
9027         I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
9028
9029         /* WaDisableVFclkgate:cnl */
9030         /* WaVFUnitClockGatingDisable:cnl */
9031         val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
9032         val |= VFUNIT_CLKGATE_DIS;
9033         I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
9034 }
9035
9036 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
9037 {
9038         cnp_init_clock_gating(dev_priv);
9039         gen9_init_clock_gating(dev_priv);
9040
9041         /* WaFbcNukeOnHostModify:cfl */
9042         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9043                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9044 }
9045
9046 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
9047 {
9048         gen9_init_clock_gating(dev_priv);
9049
9050         /* WaDisableSDEUnitClockGating:kbl */
9051         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9052                 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9053                            GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9054
9055         /* WaDisableGamClockGating:kbl */
9056         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9057                 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9058                            GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
9059
9060         /* WaFbcNukeOnHostModify:kbl */
9061         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9062                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9063 }
9064
9065 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
9066 {
9067         gen9_init_clock_gating(dev_priv);
9068
9069         /* WAC6entrylatency:skl */
9070         I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
9071                    FBC_LLC_FULLY_OPEN);
9072
9073         /* WaFbcNukeOnHostModify:skl */
9074         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9075                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9076 }
9077
9078 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
9079 {
9080         /* The GTT cache must be disabled if the system is using 2M pages. */
9081         bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
9082                                                  I915_GTT_PAGE_SIZE_2M);
9083         enum pipe pipe;
9084
9085         /* WaSwitchSolVfFArbitrationPriority:bdw */
9086         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9087
9088         /* WaPsrDPAMaskVBlankInSRD:bdw */
9089         I915_WRITE(CHICKEN_PAR1_1,
9090                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
9091
9092         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
9093         for_each_pipe(dev_priv, pipe) {
9094                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
9095                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
9096                            BDW_DPRS_MASK_VBLANK_SRD);
9097         }
9098
9099         /* WaVSRefCountFullforceMissDisable:bdw */
9100         /* WaDSRefCountFullforceMissDisable:bdw */
9101         I915_WRITE(GEN7_FF_THREAD_MODE,
9102                    I915_READ(GEN7_FF_THREAD_MODE) &
9103                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9104
9105         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9106                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9107
9108         /* WaDisableSDEUnitClockGating:bdw */
9109         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9110                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9111
9112         /* WaProgramL3SqcReg1Default:bdw */
9113         gen8_set_l3sqc_credits(dev_priv, 30, 2);
9114
9115         /* WaGttCachingOffByDefault:bdw */
9116         I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
9117
9118         /* WaKVMNotificationOnConfigChange:bdw */
9119         I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
9120                    | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
9121
9122         lpt_init_clock_gating(dev_priv);
9123
9124         /* WaDisableDopClockGating:bdw
9125          *
9126          * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
9127          * clock gating.
9128          */
9129         I915_WRITE(GEN6_UCGCTL1,
9130                    I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
9131 }
9132
9133 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
9134 {
9135         /* L3 caching of data atomics doesn't work -- disable it. */
9136         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
9137         I915_WRITE(HSW_ROW_CHICKEN3,
9138                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
9139
9140         /* This is required by WaCatErrorRejectionIssue:hsw */
9141         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9142                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9143                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9144
9145         /* WaVSRefCountFullforceMissDisable:hsw */
9146         I915_WRITE(GEN7_FF_THREAD_MODE,
9147                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
9148
9149         /* WaDisable_RenderCache_OperationalFlush:hsw */
9150         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9151
9152         /* enable HiZ Raw Stall Optimization */
9153         I915_WRITE(CACHE_MODE_0_GEN7,
9154                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9155
9156         /* WaDisable4x2SubspanOptimization:hsw */
9157         I915_WRITE(CACHE_MODE_1,
9158                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9159
9160         /*
9161          * BSpec recommends 8x4 when MSAA is used,
9162          * however in practice 16x4 seems fastest.
9163          *
9164          * Note that PS/WM thread counts depend on the WIZ hashing
9165          * disable bit, which we don't touch here, but it's good
9166          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9167          */
9168         I915_WRITE(GEN7_GT_MODE,
9169                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9170
9171         /* WaSampleCChickenBitEnable:hsw */
9172         I915_WRITE(HALF_SLICE_CHICKEN3,
9173                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
9174
9175         /* WaSwitchSolVfFArbitrationPriority:hsw */
9176         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9177
9178         lpt_init_clock_gating(dev_priv);
9179 }
9180
9181 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
9182 {
9183         u32 snpcr;
9184
9185         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
9186
9187         /* WaDisableEarlyCull:ivb */
9188         I915_WRITE(_3D_CHICKEN3,
9189                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9190
9191         /* WaDisableBackToBackFlipFix:ivb */
9192         I915_WRITE(IVB_CHICKEN3,
9193                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9194                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
9195
9196         /* WaDisablePSDDualDispatchEnable:ivb */
9197         if (IS_IVB_GT1(dev_priv))
9198                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9199                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9200
9201         /* WaDisable_RenderCache_OperationalFlush:ivb */
9202         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9203
9204         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
9205         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
9206                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
9207
9208         /* WaApplyL3ControlAndL3ChickenMode:ivb */
9209         I915_WRITE(GEN7_L3CNTLREG1,
9210                         GEN7_WA_FOR_GEN7_L3_CONTROL);
9211         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
9212                    GEN7_WA_L3_CHICKEN_MODE);
9213         if (IS_IVB_GT1(dev_priv))
9214                 I915_WRITE(GEN7_ROW_CHICKEN2,
9215                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9216         else {
9217                 /* must write both registers */
9218                 I915_WRITE(GEN7_ROW_CHICKEN2,
9219                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9220                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
9221                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9222         }
9223
9224         /* WaForceL3Serialization:ivb */
9225         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9226                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9227
9228         /*
9229          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9230          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
9231          */
9232         I915_WRITE(GEN6_UCGCTL2,
9233                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9234
9235         /* This is required by WaCatErrorRejectionIssue:ivb */
9236         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9237                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9238                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9239
9240         g4x_disable_trickle_feed(dev_priv);
9241
9242         gen7_setup_fixed_func_scheduler(dev_priv);
9243
9244         if (0) { /* causes HiZ corruption on ivb:gt1 */
9245                 /* enable HiZ Raw Stall Optimization */
9246                 I915_WRITE(CACHE_MODE_0_GEN7,
9247                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9248         }
9249
9250         /* WaDisable4x2SubspanOptimization:ivb */
9251         I915_WRITE(CACHE_MODE_1,
9252                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9253
9254         /*
9255          * BSpec recommends 8x4 when MSAA is used,
9256          * however in practice 16x4 seems fastest.
9257          *
9258          * Note that PS/WM thread counts depend on the WIZ hashing
9259          * disable bit, which we don't touch here, but it's good
9260          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9261          */
9262         I915_WRITE(GEN7_GT_MODE,
9263                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9264
9265         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
9266         snpcr &= ~GEN6_MBC_SNPCR_MASK;
9267         snpcr |= GEN6_MBC_SNPCR_MED;
9268         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
9269
9270         if (!HAS_PCH_NOP(dev_priv))
9271                 cpt_init_clock_gating(dev_priv);
9272
9273         gen6_check_mch_setup(dev_priv);
9274 }
9275
9276 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
9277 {
9278         /* WaDisableEarlyCull:vlv */
9279         I915_WRITE(_3D_CHICKEN3,
9280                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9281
9282         /* WaDisableBackToBackFlipFix:vlv */
9283         I915_WRITE(IVB_CHICKEN3,
9284                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9285                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
9286
9287         /* WaPsdDispatchEnable:vlv */
9288         /* WaDisablePSDDualDispatchEnable:vlv */
9289         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9290                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
9291                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9292
9293         /* WaDisable_RenderCache_OperationalFlush:vlv */
9294         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9295
9296         /* WaForceL3Serialization:vlv */
9297         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9298                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9299
9300         /* WaDisableDopClockGating:vlv */
9301         I915_WRITE(GEN7_ROW_CHICKEN2,
9302                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9303
9304         /* This is required by WaCatErrorRejectionIssue:vlv */
9305         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9306                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9307                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9308
9309         gen7_setup_fixed_func_scheduler(dev_priv);
9310
9311         /*
9312          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9313          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
9314          */
9315         I915_WRITE(GEN6_UCGCTL2,
9316                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9317
9318         /* WaDisableL3Bank2xClockGate:vlv
9319          * Disabling L3 clock gating- MMIO 940c[25] = 1
9320          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9321         I915_WRITE(GEN7_UCGCTL4,
9322                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
9323
9324         /*
9325          * BSpec says this must be set, even though
9326          * WaDisable4x2SubspanOptimization isn't listed for VLV.
9327          */
9328         I915_WRITE(CACHE_MODE_1,
9329                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9330
9331         /*
9332          * BSpec recommends 8x4 when MSAA is used,
9333          * however in practice 16x4 seems fastest.
9334          *
9335          * Note that PS/WM thread counts depend on the WIZ hashing
9336          * disable bit, which we don't touch here, but it's good
9337          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9338          */
9339         I915_WRITE(GEN7_GT_MODE,
9340                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9341
9342         /*
9343          * WaIncreaseL3CreditsForVLVB0:vlv
9344          * This is the hardware default actually.
9345          */
9346         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
9347
9348         /*
9349          * WaDisableVLVClockGating_VBIIssue:vlv
9350          * Disable clock gating on th GCFG unit to prevent a delay
9351          * in the reporting of vblank events.
9352          */
9353         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
9354 }
9355
9356 static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
9357 {
9358         /* WaVSRefCountFullforceMissDisable:chv */
9359         /* WaDSRefCountFullforceMissDisable:chv */
9360         I915_WRITE(GEN7_FF_THREAD_MODE,
9361                    I915_READ(GEN7_FF_THREAD_MODE) &
9362                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9363
9364         /* WaDisableSemaphoreAndSyncFlipWait:chv */
9365         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9366                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9367
9368         /* WaDisableCSUnitClockGating:chv */
9369         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9370                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
9371
9372         /* WaDisableSDEUnitClockGating:chv */
9373         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9374                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9375
9376         /*
9377          * WaProgramL3SqcReg1Default:chv
9378          * See gfxspecs/Related Documents/Performance Guide/
9379          * LSQC Setting Recommendations.
9380          */
9381         gen8_set_l3sqc_credits(dev_priv, 38, 2);
9382
9383         /*
9384          * GTT cache may not work with big pages, so if those
9385          * are ever enabled GTT cache may need to be disabled.
9386          */
9387         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
9388 }
9389
9390 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
9391 {
9392         u32 dspclk_gate;
9393
9394         I915_WRITE(RENCLK_GATE_D1, 0);
9395         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9396                    GS_UNIT_CLOCK_GATE_DISABLE |
9397                    CL_UNIT_CLOCK_GATE_DISABLE);
9398         I915_WRITE(RAMCLK_GATE_D, 0);
9399         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9400                 OVRUNIT_CLOCK_GATE_DISABLE |
9401                 OVCUNIT_CLOCK_GATE_DISABLE;
9402         if (IS_GM45(dev_priv))
9403                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9404         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
9405
9406         /* WaDisableRenderCachePipelinedFlush */
9407         I915_WRITE(CACHE_MODE_0,
9408                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
9409
9410         /* WaDisable_RenderCache_OperationalFlush:g4x */
9411         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9412
9413         g4x_disable_trickle_feed(dev_priv);
9414 }
9415
9416 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
9417 {
9418         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9419         I915_WRITE(RENCLK_GATE_D2, 0);
9420         I915_WRITE(DSPCLK_GATE_D, 0);
9421         I915_WRITE(RAMCLK_GATE_D, 0);
9422         I915_WRITE16(DEUC, 0);
9423         I915_WRITE(MI_ARB_STATE,
9424                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9425
9426         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9427         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9428 }
9429
9430 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
9431 {
9432         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9433                    I965_RCC_CLOCK_GATE_DISABLE |
9434                    I965_RCPB_CLOCK_GATE_DISABLE |
9435                    I965_ISC_CLOCK_GATE_DISABLE |
9436                    I965_FBC_CLOCK_GATE_DISABLE);
9437         I915_WRITE(RENCLK_GATE_D2, 0);
9438         I915_WRITE(MI_ARB_STATE,
9439                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9440
9441         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9442         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9443 }
9444
9445 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
9446 {
9447         u32 dstate = I915_READ(D_STATE);
9448
9449         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9450                 DSTATE_DOT_CLOCK_GATING;
9451         I915_WRITE(D_STATE, dstate);
9452
9453         if (IS_PINEVIEW(dev_priv))
9454                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
9455
9456         /* IIR "flip pending" means done if this bit is set */
9457         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
9458
9459         /* interrupts should cause a wake up from C3 */
9460         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
9461
9462         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9463         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
9464
9465         I915_WRITE(MI_ARB_STATE,
9466                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9467 }
9468
9469 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
9470 {
9471         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
9472
9473         /* interrupts should cause a wake up from C3 */
9474         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9475                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
9476
9477         I915_WRITE(MEM_MODE,
9478                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
9479 }
9480
9481 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
9482 {
9483         I915_WRITE(MEM_MODE,
9484                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9485                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
9486 }
9487
9488 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
9489 {
9490         dev_priv->display.init_clock_gating(dev_priv);
9491 }
9492
9493 void intel_suspend_hw(struct drm_i915_private *dev_priv)
9494 {
9495         if (HAS_PCH_LPT(dev_priv))
9496                 lpt_suspend_hw(dev_priv);
9497 }
9498
9499 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
9500 {
9501         DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9502 }
9503
9504 /**
9505  * intel_init_clock_gating_hooks - setup the clock gating hooks
9506  * @dev_priv: device private
9507  *
9508  * Setup the hooks that configure which clocks of a given platform can be
9509  * gated and also apply various GT and display specific workarounds for these
9510  * platforms. Note that some GT specific workarounds are applied separately
9511  * when GPU contexts or batchbuffers start their execution.
9512  */
9513 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9514 {
9515         if (IS_ICELAKE(dev_priv))
9516                 dev_priv->display.init_clock_gating = icl_init_clock_gating;
9517         else if (IS_CANNONLAKE(dev_priv))
9518                 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
9519         else if (IS_COFFEELAKE(dev_priv))
9520                 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
9521         else if (IS_SKYLAKE(dev_priv))
9522                 dev_priv->display.init_clock_gating = skl_init_clock_gating;
9523         else if (IS_KABYLAKE(dev_priv))
9524                 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9525         else if (IS_BROXTON(dev_priv))
9526                 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9527         else if (IS_GEMINILAKE(dev_priv))
9528                 dev_priv->display.init_clock_gating = glk_init_clock_gating;
9529         else if (IS_BROADWELL(dev_priv))
9530                 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
9531         else if (IS_CHERRYVIEW(dev_priv))
9532                 dev_priv->display.init_clock_gating = chv_init_clock_gating;
9533         else if (IS_HASWELL(dev_priv))
9534                 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
9535         else if (IS_IVYBRIDGE(dev_priv))
9536                 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
9537         else if (IS_VALLEYVIEW(dev_priv))
9538                 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
9539         else if (IS_GEN(dev_priv, 6))
9540                 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
9541         else if (IS_GEN(dev_priv, 5))
9542                 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
9543         else if (IS_G4X(dev_priv))
9544                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
9545         else if (IS_I965GM(dev_priv))
9546                 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
9547         else if (IS_I965G(dev_priv))
9548                 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
9549         else if (IS_GEN(dev_priv, 3))
9550                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9551         else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9552                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
9553         else if (IS_GEN(dev_priv, 2))
9554                 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9555         else {
9556                 MISSING_CASE(INTEL_DEVID(dev_priv));
9557                 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9558         }
9559 }
9560
9561 /* Set up chip specific power management-related functions */
9562 void intel_init_pm(struct drm_i915_private *dev_priv)
9563 {
9564         /* For cxsr */
9565         if (IS_PINEVIEW(dev_priv))
9566                 i915_pineview_get_mem_freq(dev_priv);
9567         else if (IS_GEN(dev_priv, 5))
9568                 i915_ironlake_get_mem_freq(dev_priv);
9569
9570         /* For FIFO watermark updates */
9571         if (INTEL_GEN(dev_priv) >= 9) {
9572                 skl_setup_wm_latency(dev_priv);
9573                 dev_priv->display.initial_watermarks = skl_initial_wm;
9574                 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
9575                 dev_priv->display.compute_global_watermarks = skl_compute_wm;
9576         } else if (HAS_PCH_SPLIT(dev_priv)) {
9577                 ilk_setup_wm_latency(dev_priv);
9578
9579                 if ((IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[1] &&
9580                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
9581                     (!IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[0] &&
9582                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
9583                         dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
9584                         dev_priv->display.compute_intermediate_wm =
9585                                 ilk_compute_intermediate_wm;
9586                         dev_priv->display.initial_watermarks =
9587                                 ilk_initial_watermarks;
9588                         dev_priv->display.optimize_watermarks =
9589                                 ilk_optimize_watermarks;
9590                 } else {
9591                         DRM_DEBUG_KMS("Failed to read display plane latency. "
9592                                       "Disable CxSR\n");
9593                 }
9594         } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9595                 vlv_setup_wm_latency(dev_priv);
9596                 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
9597                 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
9598                 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
9599                 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
9600                 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
9601         } else if (IS_G4X(dev_priv)) {
9602                 g4x_setup_wm_latency(dev_priv);
9603                 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9604                 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9605                 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9606                 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9607         } else if (IS_PINEVIEW(dev_priv)) {
9608                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
9609                                             dev_priv->is_ddr3,
9610                                             dev_priv->fsb_freq,
9611                                             dev_priv->mem_freq)) {
9612                         DRM_INFO("failed to find known CxSR latency "
9613                                  "(found ddr%s fsb freq %d, mem freq %d), "
9614                                  "disabling CxSR\n",
9615                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
9616                                  dev_priv->fsb_freq, dev_priv->mem_freq);
9617                         /* Disable CxSR and never update its watermark again */
9618                         intel_set_memory_cxsr(dev_priv, false);
9619                         dev_priv->display.update_wm = NULL;
9620                 } else
9621                         dev_priv->display.update_wm = pineview_update_wm;
9622         } else if (IS_GEN(dev_priv, 4)) {
9623                 dev_priv->display.update_wm = i965_update_wm;
9624         } else if (IS_GEN(dev_priv, 3)) {
9625                 dev_priv->display.update_wm = i9xx_update_wm;
9626                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
9627         } else if (IS_GEN(dev_priv, 2)) {
9628                 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
9629                         dev_priv->display.update_wm = i845_update_wm;
9630                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
9631                 } else {
9632                         dev_priv->display.update_wm = i9xx_update_wm;
9633                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
9634                 }
9635         } else {
9636                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9637         }
9638 }
9639
9640 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
9641 {
9642         u32 flags =
9643                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9644
9645         switch (flags) {
9646         case GEN6_PCODE_SUCCESS:
9647                 return 0;
9648         case GEN6_PCODE_UNIMPLEMENTED_CMD:
9649                 return -ENODEV;
9650         case GEN6_PCODE_ILLEGAL_CMD:
9651                 return -ENXIO;
9652         case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9653         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9654                 return -EOVERFLOW;
9655         case GEN6_PCODE_TIMEOUT:
9656                 return -ETIMEDOUT;
9657         default:
9658                 MISSING_CASE(flags);
9659                 return 0;
9660         }
9661 }
9662
9663 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
9664 {
9665         u32 flags =
9666                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9667
9668         switch (flags) {
9669         case GEN6_PCODE_SUCCESS:
9670                 return 0;
9671         case GEN6_PCODE_ILLEGAL_CMD:
9672                 return -ENXIO;
9673         case GEN7_PCODE_TIMEOUT:
9674                 return -ETIMEDOUT;
9675         case GEN7_PCODE_ILLEGAL_DATA:
9676                 return -EINVAL;
9677         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9678                 return -EOVERFLOW;
9679         default:
9680                 MISSING_CASE(flags);
9681                 return 0;
9682         }
9683 }
9684
9685 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
9686 {
9687         int status;
9688
9689         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9690
9691         /* GEN6_PCODE_* are outside of the forcewake domain, we can
9692          * use te fw I915_READ variants to reduce the amount of work
9693          * required when reading/writing.
9694          */
9695
9696         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9697                 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9698                                  mbox, __builtin_return_address(0));
9699                 return -EAGAIN;
9700         }
9701
9702         I915_WRITE_FW(GEN6_PCODE_DATA, *val);
9703         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9704         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9705
9706         if (__intel_wait_for_register_fw(dev_priv,
9707                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9708                                          500, 0, NULL)) {
9709                 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9710                           mbox, __builtin_return_address(0));
9711                 return -ETIMEDOUT;
9712         }
9713
9714         *val = I915_READ_FW(GEN6_PCODE_DATA);
9715         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9716
9717         if (INTEL_GEN(dev_priv) > 6)
9718                 status = gen7_check_mailbox_status(dev_priv);
9719         else
9720                 status = gen6_check_mailbox_status(dev_priv);
9721
9722         if (status) {
9723                 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9724                                  mbox, __builtin_return_address(0), status);
9725                 return status;
9726         }
9727
9728         return 0;
9729 }
9730
9731 int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
9732                                     u32 mbox, u32 val,
9733                                     int fast_timeout_us, int slow_timeout_ms)
9734 {
9735         int status;
9736
9737         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9738
9739         /* GEN6_PCODE_* are outside of the forcewake domain, we can
9740          * use te fw I915_READ variants to reduce the amount of work
9741          * required when reading/writing.
9742          */
9743
9744         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9745                 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9746                                  val, mbox, __builtin_return_address(0));
9747                 return -EAGAIN;
9748         }
9749
9750         I915_WRITE_FW(GEN6_PCODE_DATA, val);
9751         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9752         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9753
9754         if (__intel_wait_for_register_fw(dev_priv,
9755                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9756                                          fast_timeout_us, slow_timeout_ms,
9757                                          NULL)) {
9758                 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9759                           val, mbox, __builtin_return_address(0));
9760                 return -ETIMEDOUT;
9761         }
9762
9763         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9764
9765         if (INTEL_GEN(dev_priv) > 6)
9766                 status = gen7_check_mailbox_status(dev_priv);
9767         else
9768                 status = gen6_check_mailbox_status(dev_priv);
9769
9770         if (status) {
9771                 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9772                                  val, mbox, __builtin_return_address(0), status);
9773                 return status;
9774         }
9775
9776         return 0;
9777 }
9778
9779 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
9780                                   u32 request, u32 reply_mask, u32 reply,
9781                                   u32 *status)
9782 {
9783         u32 val = request;
9784
9785         *status = sandybridge_pcode_read(dev_priv, mbox, &val);
9786
9787         return *status || ((val & reply_mask) == reply);
9788 }
9789
9790 /**
9791  * skl_pcode_request - send PCODE request until acknowledgment
9792  * @dev_priv: device private
9793  * @mbox: PCODE mailbox ID the request is targeted for
9794  * @request: request ID
9795  * @reply_mask: mask used to check for request acknowledgment
9796  * @reply: value used to check for request acknowledgment
9797  * @timeout_base_ms: timeout for polling with preemption enabled
9798  *
9799  * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
9800  * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
9801  * The request is acknowledged once the PCODE reply dword equals @reply after
9802  * applying @reply_mask. Polling is first attempted with preemption enabled
9803  * for @timeout_base_ms and if this times out for another 50 ms with
9804  * preemption disabled.
9805  *
9806  * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9807  * other error as reported by PCODE.
9808  */
9809 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
9810                       u32 reply_mask, u32 reply, int timeout_base_ms)
9811 {
9812         u32 status;
9813         int ret;
9814
9815         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9816
9817 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9818                                    &status)
9819
9820         /*
9821          * Prime the PCODE by doing a request first. Normally it guarantees
9822          * that a subsequent request, at most @timeout_base_ms later, succeeds.
9823          * _wait_for() doesn't guarantee when its passed condition is evaluated
9824          * first, so send the first request explicitly.
9825          */
9826         if (COND) {
9827                 ret = 0;
9828                 goto out;
9829         }
9830         ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
9831         if (!ret)
9832                 goto out;
9833
9834         /*
9835          * The above can time out if the number of requests was low (2 in the
9836          * worst case) _and_ PCODE was busy for some reason even after a
9837          * (queued) request and @timeout_base_ms delay. As a workaround retry
9838          * the poll with preemption disabled to maximize the number of
9839          * requests. Increase the timeout from @timeout_base_ms to 50ms to
9840          * account for interrupts that could reduce the number of these
9841          * requests, and for any quirks of the PCODE firmware that delays
9842          * the request completion.
9843          */
9844         DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9845         WARN_ON_ONCE(timeout_base_ms > 3);
9846         preempt_disable();
9847         ret = wait_for_atomic(COND, 50);
9848         preempt_enable();
9849
9850 out:
9851         return ret ? ret : status;
9852 #undef COND
9853 }
9854
9855 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9856 {
9857         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9858
9859         /*
9860          * N = val - 0xb7
9861          * Slow = Fast = GPLL ref * N
9862          */
9863         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
9864 }
9865
9866 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
9867 {
9868         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9869
9870         return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
9871 }
9872
9873 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
9874 {
9875         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9876
9877         /*
9878          * N = val / 2
9879          * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9880          */
9881         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
9882 }
9883
9884 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
9885 {
9886         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9887
9888         /* CHV needs even values */
9889         return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
9890 }
9891
9892 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
9893 {
9894         if (INTEL_GEN(dev_priv) >= 9)
9895                 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9896                                          GEN9_FREQ_SCALER);
9897         else if (IS_CHERRYVIEW(dev_priv))
9898                 return chv_gpu_freq(dev_priv, val);
9899         else if (IS_VALLEYVIEW(dev_priv))
9900                 return byt_gpu_freq(dev_priv, val);
9901         else
9902                 return val * GT_FREQUENCY_MULTIPLIER;
9903 }
9904
9905 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9906 {
9907         if (INTEL_GEN(dev_priv) >= 9)
9908                 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9909                                          GT_FREQUENCY_MULTIPLIER);
9910         else if (IS_CHERRYVIEW(dev_priv))
9911                 return chv_freq_opcode(dev_priv, val);
9912         else if (IS_VALLEYVIEW(dev_priv))
9913                 return byt_freq_opcode(dev_priv, val);
9914         else
9915                 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
9916 }
9917
9918 void intel_pm_setup(struct drm_i915_private *dev_priv)
9919 {
9920         mutex_init(&dev_priv->pcu_lock);
9921         mutex_init(&dev_priv->gt_pm.rps.power.mutex);
9922
9923         atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
9924
9925         dev_priv->runtime_pm.suspended = false;
9926         atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
9927 }
9928
9929 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9930                              const i915_reg_t reg)
9931 {
9932         u32 lower, upper, tmp;
9933         int loop = 2;
9934
9935         /*
9936          * The register accessed do not need forcewake. We borrow
9937          * uncore lock to prevent concurrent access to range reg.
9938          */
9939         lockdep_assert_held(&dev_priv->uncore.lock);
9940
9941         /*
9942          * vlv and chv residency counters are 40 bits in width.
9943          * With a control bit, we can choose between upper or lower
9944          * 32bit window into this counter.
9945          *
9946          * Although we always use the counter in high-range mode elsewhere,
9947          * userspace may attempt to read the value before rc6 is initialised,
9948          * before we have set the default VLV_COUNTER_CONTROL value. So always
9949          * set the high bit to be safe.
9950          */
9951         I915_WRITE_FW(VLV_COUNTER_CONTROL,
9952                       _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9953         upper = I915_READ_FW(reg);
9954         do {
9955                 tmp = upper;
9956
9957                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9958                               _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9959                 lower = I915_READ_FW(reg);
9960
9961                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9962                               _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9963                 upper = I915_READ_FW(reg);
9964         } while (upper != tmp && --loop);
9965
9966         /*
9967          * Everywhere else we always use VLV_COUNTER_CONTROL with the
9968          * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9969          * now.
9970          */
9971
9972         return lower | (u64)upper << 8;
9973 }
9974
9975 u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
9976                            const i915_reg_t reg)
9977 {
9978         u64 time_hw, prev_hw, overflow_hw;
9979         unsigned int fw_domains;
9980         unsigned long flags;
9981         unsigned int i;
9982         u32 mul, div;
9983
9984         if (!HAS_RC6(dev_priv))
9985                 return 0;
9986
9987         /*
9988          * Store previous hw counter values for counter wrap-around handling.
9989          *
9990          * There are only four interesting registers and they live next to each
9991          * other so we can use the relative address, compared to the smallest
9992          * one as the index into driver storage.
9993          */
9994         i = (i915_mmio_reg_offset(reg) -
9995              i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
9996         if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
9997                 return 0;
9998
9999         fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
10000
10001         spin_lock_irqsave(&dev_priv->uncore.lock, flags);
10002         intel_uncore_forcewake_get__locked(&dev_priv->uncore, fw_domains);
10003
10004         /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
10005         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
10006                 mul = 1000000;
10007                 div = dev_priv->czclk_freq;
10008                 overflow_hw = BIT_ULL(40);
10009                 time_hw = vlv_residency_raw(dev_priv, reg);
10010         } else {
10011                 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
10012                 if (IS_GEN9_LP(dev_priv)) {
10013                         mul = 10000;
10014                         div = 12;
10015                 } else {
10016                         mul = 1280;
10017                         div = 1;
10018                 }
10019
10020                 overflow_hw = BIT_ULL(32);
10021                 time_hw = I915_READ_FW(reg);
10022         }
10023
10024         /*
10025          * Counter wrap handling.
10026          *
10027          * But relying on a sufficient frequency of queries otherwise counters
10028          * can still wrap.
10029          */
10030         prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
10031         dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
10032
10033         /* RC6 delta from last sample. */
10034         if (time_hw >= prev_hw)
10035                 time_hw -= prev_hw;
10036         else
10037                 time_hw += overflow_hw - prev_hw;
10038
10039         /* Add delta to RC6 extended raw driver copy. */
10040         time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
10041         dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
10042
10043         intel_uncore_forcewake_put__locked(&dev_priv->uncore, fw_domains);
10044         spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
10045
10046         return mul_u64_u32_div(time_hw, mul, div);
10047 }
10048
10049 u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
10050 {
10051         u32 cagf;
10052
10053         if (INTEL_GEN(dev_priv) >= 9)
10054                 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
10055         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
10056                 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
10057         else
10058                 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
10059
10060         return  cagf;
10061 }